1//===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines common loop utility functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/Transforms/Utils/LoopUtils.h"
14#include "llvm/ADT/DenseSet.h"
15#include "llvm/ADT/PriorityWorklist.h"
16#include "llvm/ADT/ScopeExit.h"
17#include "llvm/ADT/SetVector.h"
18#include "llvm/ADT/SmallPtrSet.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/Analysis/AliasAnalysis.h"
21#include "llvm/Analysis/BasicAliasAnalysis.h"
22#include "llvm/Analysis/DomTreeUpdater.h"
23#include "llvm/Analysis/GlobalsModRef.h"
24#include "llvm/Analysis/InstSimplifyFolder.h"
25#include "llvm/Analysis/LoopAccessAnalysis.h"
26#include "llvm/Analysis/LoopInfo.h"
27#include "llvm/Analysis/LoopPass.h"
28#include "llvm/Analysis/MemorySSA.h"
29#include "llvm/Analysis/MemorySSAUpdater.h"
30#include "llvm/Analysis/ScalarEvolution.h"
31#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
32#include "llvm/Analysis/ScalarEvolutionExpressions.h"
33#include "llvm/IR/DIBuilder.h"
34#include "llvm/IR/Dominators.h"
35#include "llvm/IR/Instructions.h"
36#include "llvm/IR/IntrinsicInst.h"
37#include "llvm/IR/MDBuilder.h"
38#include "llvm/IR/Module.h"
39#include "llvm/IR/PatternMatch.h"
40#include "llvm/IR/ProfDataUtils.h"
41#include "llvm/IR/ValueHandle.h"
42#include "llvm/InitializePasses.h"
43#include "llvm/Pass.h"
44#include "llvm/Support/Debug.h"
45#include "llvm/Transforms/Utils/BasicBlockUtils.h"
46#include "llvm/Transforms/Utils/Local.h"
47#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
48
49using namespace llvm;
50using namespace llvm::PatternMatch;
51
52#define DEBUG_TYPE "loop-utils"
53
54static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
55static const char *LLVMLoopDisableLICM = "llvm.licm.disable";
56
57bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
58 MemorySSAUpdater *MSSAU,
59 bool PreserveLCSSA) {
60 bool Changed = false;
61
62 // We re-use a vector for the in-loop predecesosrs.
63 SmallVector<BasicBlock *, 4> InLoopPredecessors;
64
65 auto RewriteExit = [&](BasicBlock *BB) {
66 assert(InLoopPredecessors.empty() &&
67 "Must start with an empty predecessors list!");
68 auto Cleanup = make_scope_exit(F: [&] { InLoopPredecessors.clear(); });
69
70 // See if there are any non-loop predecessors of this exit block and
71 // keep track of the in-loop predecessors.
72 bool IsDedicatedExit = true;
73 for (auto *PredBB : predecessors(BB))
74 if (L->contains(BB: PredBB)) {
75 if (isa<IndirectBrInst>(Val: PredBB->getTerminator()))
76 // We cannot rewrite exiting edges from an indirectbr.
77 return false;
78
79 InLoopPredecessors.push_back(Elt: PredBB);
80 } else {
81 IsDedicatedExit = false;
82 }
83
84 assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!");
85
86 // Nothing to do if this is already a dedicated exit.
87 if (IsDedicatedExit)
88 return false;
89
90 auto *NewExitBB = SplitBlockPredecessors(
91 BB, Preds: InLoopPredecessors, Suffix: ".loopexit", DT, LI, MSSAU, PreserveLCSSA);
92
93 if (!NewExitBB)
94 LLVM_DEBUG(
95 dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
96 << *L << "\n");
97 else
98 LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
99 << NewExitBB->getName() << "\n");
100 return true;
101 };
102
103 // Walk the exit blocks directly rather than building up a data structure for
104 // them, but only visit each one once.
105 SmallPtrSet<BasicBlock *, 4> Visited;
106 for (auto *BB : L->blocks())
107 for (auto *SuccBB : successors(BB)) {
108 // We're looking for exit blocks so skip in-loop successors.
109 if (L->contains(BB: SuccBB))
110 continue;
111
112 // Visit each exit block exactly once.
113 if (!Visited.insert(Ptr: SuccBB).second)
114 continue;
115
116 Changed |= RewriteExit(SuccBB);
117 }
118
119 return Changed;
120}
121
122/// Returns the instructions that use values defined in the loop.
123SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) {
124 SmallVector<Instruction *, 8> UsedOutside;
125
126 for (auto *Block : L->getBlocks())
127 // FIXME: I believe that this could use copy_if if the Inst reference could
128 // be adapted into a pointer.
129 for (auto &Inst : *Block) {
130 auto Users = Inst.users();
131 if (any_of(Range&: Users, P: [&](User *U) {
132 auto *Use = cast<Instruction>(Val: U);
133 return !L->contains(BB: Use->getParent());
134 }))
135 UsedOutside.push_back(Elt: &Inst);
136 }
137
138 return UsedOutside;
139}
140
141void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) {
142 // By definition, all loop passes need the LoopInfo analysis and the
143 // Dominator tree it depends on. Because they all participate in the loop
144 // pass manager, they must also preserve these.
145 AU.addRequired<DominatorTreeWrapperPass>();
146 AU.addPreserved<DominatorTreeWrapperPass>();
147 AU.addRequired<LoopInfoWrapperPass>();
148 AU.addPreserved<LoopInfoWrapperPass>();
149
150 // We must also preserve LoopSimplify and LCSSA. We locally access their IDs
151 // here because users shouldn't directly get them from this header.
152 extern char &LoopSimplifyID;
153 extern char &LCSSAID;
154 AU.addRequiredID(ID&: LoopSimplifyID);
155 AU.addPreservedID(ID&: LoopSimplifyID);
156 AU.addRequiredID(ID&: LCSSAID);
157 AU.addPreservedID(ID&: LCSSAID);
158 // This is used in the LPPassManager to perform LCSSA verification on passes
159 // which preserve lcssa form
160 AU.addRequired<LCSSAVerificationPass>();
161 AU.addPreserved<LCSSAVerificationPass>();
162
163 // Loop passes are designed to run inside of a loop pass manager which means
164 // that any function analyses they require must be required by the first loop
165 // pass in the manager (so that it is computed before the loop pass manager
166 // runs) and preserved by all loop pasess in the manager. To make this
167 // reasonably robust, the set needed for most loop passes is maintained here.
168 // If your loop pass requires an analysis not listed here, you will need to
169 // carefully audit the loop pass manager nesting structure that results.
170 AU.addRequired<AAResultsWrapperPass>();
171 AU.addPreserved<AAResultsWrapperPass>();
172 AU.addPreserved<BasicAAWrapperPass>();
173 AU.addPreserved<GlobalsAAWrapperPass>();
174 AU.addPreserved<SCEVAAWrapperPass>();
175 AU.addRequired<ScalarEvolutionWrapperPass>();
176 AU.addPreserved<ScalarEvolutionWrapperPass>();
177 // FIXME: When all loop passes preserve MemorySSA, it can be required and
178 // preserved here instead of the individual handling in each pass.
179}
180
181/// Manually defined generic "LoopPass" dependency initialization. This is used
182/// to initialize the exact set of passes from above in \c
183/// getLoopAnalysisUsage. It can be used within a loop pass's initialization
184/// with:
185///
186/// INITIALIZE_PASS_DEPENDENCY(LoopPass)
187///
188/// As-if "LoopPass" were a pass.
189void llvm::initializeLoopPassPass(PassRegistry &Registry) {
190 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
191 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
192 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
193 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
194 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
195 INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
196 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
197 INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
198 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
199 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
200}
201
202/// Create MDNode for input string.
203static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) {
204 LLVMContext &Context = TheLoop->getHeader()->getContext();
205 Metadata *MDs[] = {
206 MDString::get(Context, Str: Name),
207 ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C&: Context), V))};
208 return MDNode::get(Context, MDs);
209}
210
211/// Set input string into loop metadata by keeping other values intact.
212/// If the string is already in loop metadata update value if it is
213/// different.
214void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD,
215 unsigned V) {
216 SmallVector<Metadata *, 4> MDs(1);
217 // If the loop already has metadata, retain it.
218 MDNode *LoopID = TheLoop->getLoopID();
219 if (LoopID) {
220 for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
221 MDNode *Node = cast<MDNode>(Val: LoopID->getOperand(I: i));
222 // If it is of form key = value, try to parse it.
223 if (Node->getNumOperands() == 2) {
224 MDString *S = dyn_cast<MDString>(Val: Node->getOperand(I: 0));
225 if (S && S->getString().equals(RHS: StringMD)) {
226 ConstantInt *IntMD =
227 mdconst::extract_or_null<ConstantInt>(MD: Node->getOperand(I: 1));
228 if (IntMD && IntMD->getSExtValue() == V)
229 // It is already in place. Do nothing.
230 return;
231 // We need to update the value, so just skip it here and it will
232 // be added after copying other existed nodes.
233 continue;
234 }
235 }
236 MDs.push_back(Elt: Node);
237 }
238 }
239 // Add new metadata.
240 MDs.push_back(Elt: createStringMetadata(TheLoop, Name: StringMD, V));
241 // Replace current metadata node with new one.
242 LLVMContext &Context = TheLoop->getHeader()->getContext();
243 MDNode *NewLoopID = MDNode::get(Context, MDs);
244 // Set operand 0 to refer to the loop id itself.
245 NewLoopID->replaceOperandWith(I: 0, New: NewLoopID);
246 TheLoop->setLoopID(NewLoopID);
247}
248
249std::optional<ElementCount>
250llvm::getOptionalElementCountLoopAttribute(const Loop *TheLoop) {
251 std::optional<int> Width =
252 getOptionalIntLoopAttribute(TheLoop, Name: "llvm.loop.vectorize.width");
253
254 if (Width) {
255 std::optional<int> IsScalable = getOptionalIntLoopAttribute(
256 TheLoop, Name: "llvm.loop.vectorize.scalable.enable");
257 return ElementCount::get(MinVal: *Width, Scalable: IsScalable.value_or(u: false));
258 }
259
260 return std::nullopt;
261}
262
263std::optional<MDNode *> llvm::makeFollowupLoopID(
264 MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions,
265 const char *InheritOptionsExceptPrefix, bool AlwaysNew) {
266 if (!OrigLoopID) {
267 if (AlwaysNew)
268 return nullptr;
269 return std::nullopt;
270 }
271
272 assert(OrigLoopID->getOperand(0) == OrigLoopID);
273
274 bool InheritAllAttrs = !InheritOptionsExceptPrefix;
275 bool InheritSomeAttrs =
276 InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[0] != '\0';
277 SmallVector<Metadata *, 8> MDs;
278 MDs.push_back(Elt: nullptr);
279
280 bool Changed = false;
281 if (InheritAllAttrs || InheritSomeAttrs) {
282 for (const MDOperand &Existing : drop_begin(RangeOrContainer: OrigLoopID->operands())) {
283 MDNode *Op = cast<MDNode>(Val: Existing.get());
284
285 auto InheritThisAttribute = [InheritSomeAttrs,
286 InheritOptionsExceptPrefix](MDNode *Op) {
287 if (!InheritSomeAttrs)
288 return false;
289
290 // Skip malformatted attribute metadata nodes.
291 if (Op->getNumOperands() == 0)
292 return true;
293 Metadata *NameMD = Op->getOperand(I: 0).get();
294 if (!isa<MDString>(Val: NameMD))
295 return true;
296 StringRef AttrName = cast<MDString>(Val: NameMD)->getString();
297
298 // Do not inherit excluded attributes.
299 return !AttrName.starts_with(Prefix: InheritOptionsExceptPrefix);
300 };
301
302 if (InheritThisAttribute(Op))
303 MDs.push_back(Elt: Op);
304 else
305 Changed = true;
306 }
307 } else {
308 // Modified if we dropped at least one attribute.
309 Changed = OrigLoopID->getNumOperands() > 1;
310 }
311
312 bool HasAnyFollowup = false;
313 for (StringRef OptionName : FollowupOptions) {
314 MDNode *FollowupNode = findOptionMDForLoopID(LoopID: OrigLoopID, Name: OptionName);
315 if (!FollowupNode)
316 continue;
317
318 HasAnyFollowup = true;
319 for (const MDOperand &Option : drop_begin(RangeOrContainer: FollowupNode->operands())) {
320 MDs.push_back(Elt: Option.get());
321 Changed = true;
322 }
323 }
324
325 // Attributes of the followup loop not specified explicity, so signal to the
326 // transformation pass to add suitable attributes.
327 if (!AlwaysNew && !HasAnyFollowup)
328 return std::nullopt;
329
330 // If no attributes were added or remove, the previous loop Id can be reused.
331 if (!AlwaysNew && !Changed)
332 return OrigLoopID;
333
334 // No attributes is equivalent to having no !llvm.loop metadata at all.
335 if (MDs.size() == 1)
336 return nullptr;
337
338 // Build the new loop ID.
339 MDTuple *FollowupLoopID = MDNode::get(Context&: OrigLoopID->getContext(), MDs);
340 FollowupLoopID->replaceOperandWith(I: 0, New: FollowupLoopID);
341 return FollowupLoopID;
342}
343
344bool llvm::hasDisableAllTransformsHint(const Loop *L) {
345 return getBooleanLoopAttribute(TheLoop: L, Name: LLVMLoopDisableNonforced);
346}
347
348bool llvm::hasDisableLICMTransformsHint(const Loop *L) {
349 return getBooleanLoopAttribute(TheLoop: L, Name: LLVMLoopDisableLICM);
350}
351
352TransformationMode llvm::hasUnrollTransformation(const Loop *L) {
353 if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll.disable"))
354 return TM_SuppressedByUser;
355
356 std::optional<int> Count =
357 getOptionalIntLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll.count");
358 if (Count)
359 return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
360
361 if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll.enable"))
362 return TM_ForcedByUser;
363
364 if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll.full"))
365 return TM_ForcedByUser;
366
367 if (hasDisableAllTransformsHint(L))
368 return TM_Disable;
369
370 return TM_Unspecified;
371}
372
373TransformationMode llvm::hasUnrollAndJamTransformation(const Loop *L) {
374 if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll_and_jam.disable"))
375 return TM_SuppressedByUser;
376
377 std::optional<int> Count =
378 getOptionalIntLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll_and_jam.count");
379 if (Count)
380 return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
381
382 if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll_and_jam.enable"))
383 return TM_ForcedByUser;
384
385 if (hasDisableAllTransformsHint(L))
386 return TM_Disable;
387
388 return TM_Unspecified;
389}
390
391TransformationMode llvm::hasVectorizeTransformation(const Loop *L) {
392 std::optional<bool> Enable =
393 getOptionalBoolLoopAttribute(TheLoop: L, Name: "llvm.loop.vectorize.enable");
394
395 if (Enable == false)
396 return TM_SuppressedByUser;
397
398 std::optional<ElementCount> VectorizeWidth =
399 getOptionalElementCountLoopAttribute(TheLoop: L);
400 std::optional<int> InterleaveCount =
401 getOptionalIntLoopAttribute(TheLoop: L, Name: "llvm.loop.interleave.count");
402
403 // 'Forcing' vector width and interleave count to one effectively disables
404 // this tranformation.
405 if (Enable == true && VectorizeWidth && VectorizeWidth->isScalar() &&
406 InterleaveCount == 1)
407 return TM_SuppressedByUser;
408
409 if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.isvectorized"))
410 return TM_Disable;
411
412 if (Enable == true)
413 return TM_ForcedByUser;
414
415 if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1)
416 return TM_Disable;
417
418 if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1)
419 return TM_Enable;
420
421 if (hasDisableAllTransformsHint(L))
422 return TM_Disable;
423
424 return TM_Unspecified;
425}
426
427TransformationMode llvm::hasDistributeTransformation(const Loop *L) {
428 if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.distribute.enable"))
429 return TM_ForcedByUser;
430
431 if (hasDisableAllTransformsHint(L))
432 return TM_Disable;
433
434 return TM_Unspecified;
435}
436
437TransformationMode llvm::hasLICMVersioningTransformation(const Loop *L) {
438 if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.licm_versioning.disable"))
439 return TM_SuppressedByUser;
440
441 if (hasDisableAllTransformsHint(L))
442 return TM_Disable;
443
444 return TM_Unspecified;
445}
446
447/// Does a BFS from a given node to all of its children inside a given loop.
448/// The returned vector of nodes includes the starting point.
449SmallVector<DomTreeNode *, 16>
450llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) {
451 SmallVector<DomTreeNode *, 16> Worklist;
452 auto AddRegionToWorklist = [&](DomTreeNode *DTN) {
453 // Only include subregions in the top level loop.
454 BasicBlock *BB = DTN->getBlock();
455 if (CurLoop->contains(BB))
456 Worklist.push_back(Elt: DTN);
457 };
458
459 AddRegionToWorklist(N);
460
461 for (size_t I = 0; I < Worklist.size(); I++) {
462 for (DomTreeNode *Child : Worklist[I]->children())
463 AddRegionToWorklist(Child);
464 }
465
466 return Worklist;
467}
468
469bool llvm::isAlmostDeadIV(PHINode *PN, BasicBlock *LatchBlock, Value *Cond) {
470 int LatchIdx = PN->getBasicBlockIndex(BB: LatchBlock);
471 assert(LatchIdx != -1 && "LatchBlock is not a case in this PHINode");
472 Value *IncV = PN->getIncomingValue(i: LatchIdx);
473
474 for (User *U : PN->users())
475 if (U != Cond && U != IncV) return false;
476
477 for (User *U : IncV->users())
478 if (U != Cond && U != PN) return false;
479 return true;
480}
481
482
483void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
484 LoopInfo *LI, MemorySSA *MSSA) {
485 assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!");
486 auto *Preheader = L->getLoopPreheader();
487 assert(Preheader && "Preheader should exist!");
488
489 std::unique_ptr<MemorySSAUpdater> MSSAU;
490 if (MSSA)
491 MSSAU = std::make_unique<MemorySSAUpdater>(args&: MSSA);
492
493 // Now that we know the removal is safe, remove the loop by changing the
494 // branch from the preheader to go to the single exit block.
495 //
496 // Because we're deleting a large chunk of code at once, the sequence in which
497 // we remove things is very important to avoid invalidation issues.
498
499 // Tell ScalarEvolution that the loop is deleted. Do this before
500 // deleting the loop so that ScalarEvolution can look at the loop
501 // to determine what it needs to clean up.
502 if (SE) {
503 SE->forgetLoop(L);
504 SE->forgetBlockAndLoopDispositions();
505 }
506
507 Instruction *OldTerm = Preheader->getTerminator();
508 assert(!OldTerm->mayHaveSideEffects() &&
509 "Preheader must end with a side-effect-free terminator");
510 assert(OldTerm->getNumSuccessors() == 1 &&
511 "Preheader must have a single successor");
512 // Connect the preheader to the exit block. Keep the old edge to the header
513 // around to perform the dominator tree update in two separate steps
514 // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge
515 // preheader -> header.
516 //
517 //
518 // 0. Preheader 1. Preheader 2. Preheader
519 // | | | |
520 // V | V |
521 // Header <--\ | Header <--\ | Header <--\
522 // | | | | | | | | | | |
523 // | V | | | V | | | V |
524 // | Body --/ | | Body --/ | | Body --/
525 // V V V V V
526 // Exit Exit Exit
527 //
528 // By doing this is two separate steps we can perform the dominator tree
529 // update without using the batch update API.
530 //
531 // Even when the loop is never executed, we cannot remove the edge from the
532 // source block to the exit block. Consider the case where the unexecuted loop
533 // branches back to an outer loop. If we deleted the loop and removed the edge
534 // coming to this inner loop, this will break the outer loop structure (by
535 // deleting the backedge of the outer loop). If the outer loop is indeed a
536 // non-loop, it will be deleted in a future iteration of loop deletion pass.
537 IRBuilder<> Builder(OldTerm);
538
539 auto *ExitBlock = L->getUniqueExitBlock();
540 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
541 if (ExitBlock) {
542 assert(ExitBlock && "Should have a unique exit block!");
543 assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
544
545 Builder.CreateCondBr(Cond: Builder.getFalse(), True: L->getHeader(), False: ExitBlock);
546 // Remove the old branch. The conditional branch becomes a new terminator.
547 OldTerm->eraseFromParent();
548
549 // Rewrite phis in the exit block to get their inputs from the Preheader
550 // instead of the exiting block.
551 for (PHINode &P : ExitBlock->phis()) {
552 // Set the zero'th element of Phi to be from the preheader and remove all
553 // other incoming values. Given the loop has dedicated exits, all other
554 // incoming values must be from the exiting blocks.
555 int PredIndex = 0;
556 P.setIncomingBlock(i: PredIndex, BB: Preheader);
557 // Removes all incoming values from all other exiting blocks (including
558 // duplicate values from an exiting block).
559 // Nuke all entries except the zero'th entry which is the preheader entry.
560 P.removeIncomingValueIf(Predicate: [](unsigned Idx) { return Idx != 0; },
561 /* DeletePHIIfEmpty */ false);
562
563 assert((P.getNumIncomingValues() == 1 &&
564 P.getIncomingBlock(PredIndex) == Preheader) &&
565 "Should have exactly one value and that's from the preheader!");
566 }
567
568 if (DT) {
569 DTU.applyUpdates(Updates: {{DominatorTree::Insert, Preheader, ExitBlock}});
570 if (MSSA) {
571 MSSAU->applyUpdates(Updates: {{DominatorTree::Insert, Preheader, ExitBlock}},
572 DT&: *DT);
573 if (VerifyMemorySSA)
574 MSSA->verifyMemorySSA();
575 }
576 }
577
578 // Disconnect the loop body by branching directly to its exit.
579 Builder.SetInsertPoint(Preheader->getTerminator());
580 Builder.CreateBr(Dest: ExitBlock);
581 // Remove the old branch.
582 Preheader->getTerminator()->eraseFromParent();
583 } else {
584 assert(L->hasNoExitBlocks() &&
585 "Loop should have either zero or one exit blocks.");
586
587 Builder.SetInsertPoint(OldTerm);
588 Builder.CreateUnreachable();
589 Preheader->getTerminator()->eraseFromParent();
590 }
591
592 if (DT) {
593 DTU.applyUpdates(Updates: {{DominatorTree::Delete, Preheader, L->getHeader()}});
594 if (MSSA) {
595 MSSAU->applyUpdates(Updates: {{DominatorTree::Delete, Preheader, L->getHeader()}},
596 DT&: *DT);
597 SmallSetVector<BasicBlock *, 8> DeadBlockSet(L->block_begin(),
598 L->block_end());
599 MSSAU->removeBlocks(DeadBlocks: DeadBlockSet);
600 if (VerifyMemorySSA)
601 MSSA->verifyMemorySSA();
602 }
603 }
604
605 // Use a map to unique and a vector to guarantee deterministic ordering.
606 llvm::SmallDenseSet<DebugVariable, 4> DeadDebugSet;
607 llvm::SmallVector<DbgVariableIntrinsic *, 4> DeadDebugInst;
608 llvm::SmallVector<DbgVariableRecord *, 4> DeadDbgVariableRecords;
609
610 if (ExitBlock) {
611 // Given LCSSA form is satisfied, we should not have users of instructions
612 // within the dead loop outside of the loop. However, LCSSA doesn't take
613 // unreachable uses into account. We handle them here.
614 // We could do it after drop all references (in this case all users in the
615 // loop will be already eliminated and we have less work to do but according
616 // to API doc of User::dropAllReferences only valid operation after dropping
617 // references, is deletion. So let's substitute all usages of
618 // instruction from the loop with poison value of corresponding type first.
619 for (auto *Block : L->blocks())
620 for (Instruction &I : *Block) {
621 auto *Poison = PoisonValue::get(T: I.getType());
622 for (Use &U : llvm::make_early_inc_range(Range: I.uses())) {
623 if (auto *Usr = dyn_cast<Instruction>(Val: U.getUser()))
624 if (L->contains(BB: Usr->getParent()))
625 continue;
626 // If we have a DT then we can check that uses outside a loop only in
627 // unreachable block.
628 if (DT)
629 assert(!DT->isReachableFromEntry(U) &&
630 "Unexpected user in reachable block");
631 U.set(Poison);
632 }
633
634 // RemoveDIs: do the same as below for DbgVariableRecords.
635 if (Block->IsNewDbgInfoFormat) {
636 for (DbgVariableRecord &DVR : llvm::make_early_inc_range(
637 Range: filterDbgVars(R: I.getDbgRecordRange()))) {
638 DebugVariable Key(DVR.getVariable(), DVR.getExpression(),
639 DVR.getDebugLoc().get());
640 if (!DeadDebugSet.insert(V: Key).second)
641 continue;
642 // Unlinks the DVR from it's container, for later insertion.
643 DVR.removeFromParent();
644 DeadDbgVariableRecords.push_back(Elt: &DVR);
645 }
646 }
647
648 // For one of each variable encountered, preserve a debug intrinsic (set
649 // to Poison) and transfer it to the loop exit. This terminates any
650 // variable locations that were set during the loop.
651 auto *DVI = dyn_cast<DbgVariableIntrinsic>(Val: &I);
652 if (!DVI)
653 continue;
654 if (!DeadDebugSet.insert(V: DebugVariable(DVI)).second)
655 continue;
656 DeadDebugInst.push_back(Elt: DVI);
657 }
658
659 // After the loop has been deleted all the values defined and modified
660 // inside the loop are going to be unavailable. Values computed in the
661 // loop will have been deleted, automatically causing their debug uses
662 // be be replaced with undef. Loop invariant values will still be available.
663 // Move dbg.values out the loop so that earlier location ranges are still
664 // terminated and loop invariant assignments are preserved.
665 DIBuilder DIB(*ExitBlock->getModule());
666 BasicBlock::iterator InsertDbgValueBefore =
667 ExitBlock->getFirstInsertionPt();
668 assert(InsertDbgValueBefore != ExitBlock->end() &&
669 "There should be a non-PHI instruction in exit block, else these "
670 "instructions will have no parent.");
671
672 for (auto *DVI : DeadDebugInst)
673 DVI->moveBefore(BB&: *ExitBlock, I: InsertDbgValueBefore);
674
675 // Due to the "head" bit in BasicBlock::iterator, we're going to insert
676 // each DbgVariableRecord right at the start of the block, wheras dbg.values
677 // would be repeatedly inserted before the first instruction. To replicate
678 // this behaviour, do it backwards.
679 for (DbgVariableRecord *DVR : llvm::reverse(C&: DeadDbgVariableRecords))
680 ExitBlock->insertDbgRecordBefore(DR: DVR, Here: InsertDbgValueBefore);
681 }
682
683 // Remove the block from the reference counting scheme, so that we can
684 // delete it freely later.
685 for (auto *Block : L->blocks())
686 Block->dropAllReferences();
687
688 if (MSSA && VerifyMemorySSA)
689 MSSA->verifyMemorySSA();
690
691 if (LI) {
692 // Erase the instructions and the blocks without having to worry
693 // about ordering because we already dropped the references.
694 // NOTE: This iteration is safe because erasing the block does not remove
695 // its entry from the loop's block list. We do that in the next section.
696 for (BasicBlock *BB : L->blocks())
697 BB->eraseFromParent();
698
699 // Finally, the blocks from loopinfo. This has to happen late because
700 // otherwise our loop iterators won't work.
701
702 SmallPtrSet<BasicBlock *, 8> blocks;
703 blocks.insert(I: L->block_begin(), E: L->block_end());
704 for (BasicBlock *BB : blocks)
705 LI->removeBlock(BB);
706
707 // The last step is to update LoopInfo now that we've eliminated this loop.
708 // Note: LoopInfo::erase remove the given loop and relink its subloops with
709 // its parent. While removeLoop/removeChildLoop remove the given loop but
710 // not relink its subloops, which is what we want.
711 if (Loop *ParentLoop = L->getParentLoop()) {
712 Loop::iterator I = find(Range&: *ParentLoop, Val: L);
713 assert(I != ParentLoop->end() && "Couldn't find loop");
714 ParentLoop->removeChildLoop(I);
715 } else {
716 Loop::iterator I = find(Range&: *LI, Val: L);
717 assert(I != LI->end() && "Couldn't find loop");
718 LI->removeLoop(I);
719 }
720 LI->destroy(L);
721 }
722}
723
724void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
725 LoopInfo &LI, MemorySSA *MSSA) {
726 auto *Latch = L->getLoopLatch();
727 assert(Latch && "multiple latches not yet supported");
728 auto *Header = L->getHeader();
729 Loop *OutermostLoop = L->getOutermostLoop();
730
731 SE.forgetLoop(L);
732 SE.forgetBlockAndLoopDispositions();
733
734 std::unique_ptr<MemorySSAUpdater> MSSAU;
735 if (MSSA)
736 MSSAU = std::make_unique<MemorySSAUpdater>(args&: MSSA);
737
738 // Update the CFG and domtree. We chose to special case a couple of
739 // of common cases for code quality and test readability reasons.
740 [&]() -> void {
741 if (auto *BI = dyn_cast<BranchInst>(Val: Latch->getTerminator())) {
742 if (!BI->isConditional()) {
743 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
744 (void)changeToUnreachable(I: BI, /*PreserveLCSSA*/ true, DTU: &DTU,
745 MSSAU: MSSAU.get());
746 return;
747 }
748
749 // Conditional latch/exit - note that latch can be shared by inner
750 // and outer loop so the other target doesn't need to an exit
751 if (L->isLoopExiting(BB: Latch)) {
752 // TODO: Generalize ConstantFoldTerminator so that it can be used
753 // here without invalidating LCSSA or MemorySSA. (Tricky case for
754 // LCSSA: header is an exit block of a preceeding sibling loop w/o
755 // dedicated exits.)
756 const unsigned ExitIdx = L->contains(BB: BI->getSuccessor(i: 0)) ? 1 : 0;
757 BasicBlock *ExitBB = BI->getSuccessor(i: ExitIdx);
758
759 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
760 Header->removePredecessor(Pred: Latch, KeepOneInputPHIs: true);
761
762 IRBuilder<> Builder(BI);
763 auto *NewBI = Builder.CreateBr(Dest: ExitBB);
764 // Transfer the metadata to the new branch instruction (minus the
765 // loop info since this is no longer a loop)
766 NewBI->copyMetadata(SrcInst: *BI, WL: {LLVMContext::MD_dbg,
767 LLVMContext::MD_annotation});
768
769 BI->eraseFromParent();
770 DTU.applyUpdates(Updates: {{DominatorTree::Delete, Latch, Header}});
771 if (MSSA)
772 MSSAU->applyUpdates(Updates: {{DominatorTree::Delete, Latch, Header}}, DT);
773 return;
774 }
775 }
776
777 // General case. By splitting the backedge, and then explicitly making it
778 // unreachable we gracefully handle corner cases such as switch and invoke
779 // termiantors.
780 auto *BackedgeBB = SplitEdge(From: Latch, To: Header, DT: &DT, LI: &LI, MSSAU: MSSAU.get());
781
782 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
783 (void)changeToUnreachable(I: BackedgeBB->getTerminator(),
784 /*PreserveLCSSA*/ true, DTU: &DTU, MSSAU: MSSAU.get());
785 }();
786
787 // Erase (and destroy) this loop instance. Handles relinking sub-loops
788 // and blocks within the loop as needed.
789 LI.erase(L);
790
791 // If the loop we broke had a parent, then changeToUnreachable might have
792 // caused a block to be removed from the parent loop (see loop_nest_lcssa
793 // test case in zero-btc.ll for an example), thus changing the parent's
794 // exit blocks. If that happened, we need to rebuild LCSSA on the outermost
795 // loop which might have a had a block removed.
796 if (OutermostLoop != L)
797 formLCSSARecursively(L&: *OutermostLoop, DT, LI: &LI, SE: &SE);
798}
799
800
801/// Checks if \p L has an exiting latch branch. There may also be other
802/// exiting blocks. Returns branch instruction terminating the loop
803/// latch if above check is successful, nullptr otherwise.
804static BranchInst *getExpectedExitLoopLatchBranch(Loop *L) {
805 BasicBlock *Latch = L->getLoopLatch();
806 if (!Latch)
807 return nullptr;
808
809 BranchInst *LatchBR = dyn_cast<BranchInst>(Val: Latch->getTerminator());
810 if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(BB: Latch))
811 return nullptr;
812
813 assert((LatchBR->getSuccessor(0) == L->getHeader() ||
814 LatchBR->getSuccessor(1) == L->getHeader()) &&
815 "At least one edge out of the latch must go to the header");
816
817 return LatchBR;
818}
819
820/// Return the estimated trip count for any exiting branch which dominates
821/// the loop latch.
822static std::optional<uint64_t> getEstimatedTripCount(BranchInst *ExitingBranch,
823 Loop *L,
824 uint64_t &OrigExitWeight) {
825 // To estimate the number of times the loop body was executed, we want to
826 // know the number of times the backedge was taken, vs. the number of times
827 // we exited the loop.
828 uint64_t LoopWeight, ExitWeight;
829 if (!extractBranchWeights(I: *ExitingBranch, TrueVal&: LoopWeight, FalseVal&: ExitWeight))
830 return std::nullopt;
831
832 if (L->contains(BB: ExitingBranch->getSuccessor(i: 1)))
833 std::swap(a&: LoopWeight, b&: ExitWeight);
834
835 if (!ExitWeight)
836 // Don't have a way to return predicated infinite
837 return std::nullopt;
838
839 OrigExitWeight = ExitWeight;
840
841 // Estimated exit count is a ratio of the loop weight by the weight of the
842 // edge exiting the loop, rounded to nearest.
843 uint64_t ExitCount = llvm::divideNearest(Numerator: LoopWeight, Denominator: ExitWeight);
844 // Estimated trip count is one plus estimated exit count.
845 return ExitCount + 1;
846}
847
848std::optional<unsigned>
849llvm::getLoopEstimatedTripCount(Loop *L,
850 unsigned *EstimatedLoopInvocationWeight) {
851 // Currently we take the estimate exit count only from the loop latch,
852 // ignoring other exiting blocks. This can overestimate the trip count
853 // if we exit through another exit, but can never underestimate it.
854 // TODO: incorporate information from other exits
855 if (BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L)) {
856 uint64_t ExitWeight;
857 if (std::optional<uint64_t> EstTripCount =
858 getEstimatedTripCount(ExitingBranch: LatchBranch, L, OrigExitWeight&: ExitWeight)) {
859 if (EstimatedLoopInvocationWeight)
860 *EstimatedLoopInvocationWeight = ExitWeight;
861 return *EstTripCount;
862 }
863 }
864 return std::nullopt;
865}
866
867bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount,
868 unsigned EstimatedloopInvocationWeight) {
869 // At the moment, we currently support changing the estimate trip count of
870 // the latch branch only. We could extend this API to manipulate estimated
871 // trip counts for any exit.
872 BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L);
873 if (!LatchBranch)
874 return false;
875
876 // Calculate taken and exit weights.
877 unsigned LatchExitWeight = 0;
878 unsigned BackedgeTakenWeight = 0;
879
880 if (EstimatedTripCount > 0) {
881 LatchExitWeight = EstimatedloopInvocationWeight;
882 BackedgeTakenWeight = (EstimatedTripCount - 1) * LatchExitWeight;
883 }
884
885 // Make a swap if back edge is taken when condition is "false".
886 if (LatchBranch->getSuccessor(i: 0) != L->getHeader())
887 std::swap(a&: BackedgeTakenWeight, b&: LatchExitWeight);
888
889 MDBuilder MDB(LatchBranch->getContext());
890
891 // Set/Update profile metadata.
892 LatchBranch->setMetadata(
893 KindID: LLVMContext::MD_prof,
894 Node: MDB.createBranchWeights(TrueWeight: BackedgeTakenWeight, FalseWeight: LatchExitWeight));
895
896 return true;
897}
898
899bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
900 ScalarEvolution &SE) {
901 Loop *OuterL = InnerLoop->getParentLoop();
902 if (!OuterL)
903 return true;
904
905 // Get the backedge taken count for the inner loop
906 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
907 const SCEV *InnerLoopBECountSC = SE.getExitCount(L: InnerLoop, ExitingBlock: InnerLoopLatch);
908 if (isa<SCEVCouldNotCompute>(Val: InnerLoopBECountSC) ||
909 !InnerLoopBECountSC->getType()->isIntegerTy())
910 return false;
911
912 // Get whether count is invariant to the outer loop
913 ScalarEvolution::LoopDisposition LD =
914 SE.getLoopDisposition(S: InnerLoopBECountSC, L: OuterL);
915 if (LD != ScalarEvolution::LoopInvariant)
916 return false;
917
918 return true;
919}
920
921unsigned llvm::getArithmeticReductionInstruction(Intrinsic::ID RdxID) {
922 switch (RdxID) {
923 case Intrinsic::vector_reduce_fadd:
924 return Instruction::FAdd;
925 case Intrinsic::vector_reduce_fmul:
926 return Instruction::FMul;
927 case Intrinsic::vector_reduce_add:
928 return Instruction::Add;
929 case Intrinsic::vector_reduce_mul:
930 return Instruction::Mul;
931 case Intrinsic::vector_reduce_and:
932 return Instruction::And;
933 case Intrinsic::vector_reduce_or:
934 return Instruction::Or;
935 case Intrinsic::vector_reduce_xor:
936 return Instruction::Xor;
937 case Intrinsic::vector_reduce_smax:
938 case Intrinsic::vector_reduce_smin:
939 case Intrinsic::vector_reduce_umax:
940 case Intrinsic::vector_reduce_umin:
941 return Instruction::ICmp;
942 case Intrinsic::vector_reduce_fmax:
943 case Intrinsic::vector_reduce_fmin:
944 return Instruction::FCmp;
945 default:
946 llvm_unreachable("Unexpected ID");
947 }
948}
949
950Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID) {
951 switch (RdxID) {
952 default:
953 llvm_unreachable("Unknown min/max recurrence kind");
954 case Intrinsic::vector_reduce_umin:
955 return Intrinsic::umin;
956 case Intrinsic::vector_reduce_umax:
957 return Intrinsic::umax;
958 case Intrinsic::vector_reduce_smin:
959 return Intrinsic::smin;
960 case Intrinsic::vector_reduce_smax:
961 return Intrinsic::smax;
962 case Intrinsic::vector_reduce_fmin:
963 return Intrinsic::minnum;
964 case Intrinsic::vector_reduce_fmax:
965 return Intrinsic::maxnum;
966 case Intrinsic::vector_reduce_fminimum:
967 return Intrinsic::minimum;
968 case Intrinsic::vector_reduce_fmaximum:
969 return Intrinsic::maximum;
970 }
971}
972
973Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(RecurKind RK) {
974 switch (RK) {
975 default:
976 llvm_unreachable("Unknown min/max recurrence kind");
977 case RecurKind::UMin:
978 return Intrinsic::umin;
979 case RecurKind::UMax:
980 return Intrinsic::umax;
981 case RecurKind::SMin:
982 return Intrinsic::smin;
983 case RecurKind::SMax:
984 return Intrinsic::smax;
985 case RecurKind::FMin:
986 return Intrinsic::minnum;
987 case RecurKind::FMax:
988 return Intrinsic::maxnum;
989 case RecurKind::FMinimum:
990 return Intrinsic::minimum;
991 case RecurKind::FMaximum:
992 return Intrinsic::maximum;
993 }
994}
995
996RecurKind llvm::getMinMaxReductionRecurKind(Intrinsic::ID RdxID) {
997 switch (RdxID) {
998 case Intrinsic::vector_reduce_smax:
999 return RecurKind::SMax;
1000 case Intrinsic::vector_reduce_smin:
1001 return RecurKind::SMin;
1002 case Intrinsic::vector_reduce_umax:
1003 return RecurKind::UMax;
1004 case Intrinsic::vector_reduce_umin:
1005 return RecurKind::UMin;
1006 case Intrinsic::vector_reduce_fmax:
1007 return RecurKind::FMax;
1008 case Intrinsic::vector_reduce_fmin:
1009 return RecurKind::FMin;
1010 default:
1011 return RecurKind::None;
1012 }
1013}
1014
1015CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
1016 switch (RK) {
1017 default:
1018 llvm_unreachable("Unknown min/max recurrence kind");
1019 case RecurKind::UMin:
1020 return CmpInst::ICMP_ULT;
1021 case RecurKind::UMax:
1022 return CmpInst::ICMP_UGT;
1023 case RecurKind::SMin:
1024 return CmpInst::ICMP_SLT;
1025 case RecurKind::SMax:
1026 return CmpInst::ICMP_SGT;
1027 case RecurKind::FMin:
1028 return CmpInst::FCMP_OLT;
1029 case RecurKind::FMax:
1030 return CmpInst::FCMP_OGT;
1031 // We do not add FMinimum/FMaximum recurrence kind here since there is no
1032 // equivalent predicate which compares signed zeroes according to the
1033 // semantics of the intrinsics (llvm.minimum/maximum).
1034 }
1035}
1036
1037Value *llvm::createAnyOfOp(IRBuilderBase &Builder, Value *StartVal,
1038 RecurKind RK, Value *Left, Value *Right) {
1039 if (auto VTy = dyn_cast<VectorType>(Val: Left->getType()))
1040 StartVal = Builder.CreateVectorSplat(EC: VTy->getElementCount(), V: StartVal);
1041 Value *Cmp =
1042 Builder.CreateCmp(Pred: CmpInst::ICMP_NE, LHS: Left, RHS: StartVal, Name: "rdx.select.cmp");
1043 return Builder.CreateSelect(C: Cmp, True: Left, False: Right, Name: "rdx.select");
1044}
1045
1046Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
1047 Value *Right) {
1048 Type *Ty = Left->getType();
1049 if (Ty->isIntOrIntVectorTy() ||
1050 (RK == RecurKind::FMinimum || RK == RecurKind::FMaximum)) {
1051 // TODO: Add float minnum/maxnum support when FMF nnan is set.
1052 Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RK);
1053 return Builder.CreateIntrinsic(RetTy: Ty, ID: Id, Args: {Left, Right}, FMFSource: nullptr,
1054 Name: "rdx.minmax");
1055 }
1056 CmpInst::Predicate Pred = getMinMaxReductionPredicate(RK);
1057 Value *Cmp = Builder.CreateCmp(Pred, LHS: Left, RHS: Right, Name: "rdx.minmax.cmp");
1058 Value *Select = Builder.CreateSelect(C: Cmp, True: Left, False: Right, Name: "rdx.minmax.select");
1059 return Select;
1060}
1061
1062// Helper to generate an ordered reduction.
1063Value *llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
1064 unsigned Op, RecurKind RdxKind) {
1065 unsigned VF = cast<FixedVectorType>(Val: Src->getType())->getNumElements();
1066
1067 // Extract and apply reduction ops in ascending order:
1068 // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1]
1069 Value *Result = Acc;
1070 for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) {
1071 Value *Ext =
1072 Builder.CreateExtractElement(Vec: Src, Idx: Builder.getInt32(C: ExtractIdx));
1073
1074 if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
1075 Result = Builder.CreateBinOp(Opc: (Instruction::BinaryOps)Op, LHS: Result, RHS: Ext,
1076 Name: "bin.rdx");
1077 } else {
1078 assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) &&
1079 "Invalid min/max");
1080 Result = createMinMaxOp(Builder, RK: RdxKind, Left: Result, Right: Ext);
1081 }
1082 }
1083
1084 return Result;
1085}
1086
1087// Helper to generate a log2 shuffle reduction.
1088Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src,
1089 unsigned Op, RecurKind RdxKind) {
1090 unsigned VF = cast<FixedVectorType>(Val: Src->getType())->getNumElements();
1091 // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
1092 // and vector ops, reducing the set of values being computed by half each
1093 // round.
1094 assert(isPowerOf2_32(VF) &&
1095 "Reduction emission only supported for pow2 vectors!");
1096 // Note: fast-math-flags flags are controlled by the builder configuration
1097 // and are assumed to apply to all generated arithmetic instructions. Other
1098 // poison generating flags (nsw/nuw/inbounds/inrange/exact) are not part
1099 // of the builder configuration, and since they're not passed explicitly,
1100 // will never be relevant here. Note that it would be generally unsound to
1101 // propagate these from an intrinsic call to the expansion anyways as we/
1102 // change the order of operations.
1103 Value *TmpVec = Src;
1104 SmallVector<int, 32> ShuffleMask(VF);
1105 for (unsigned i = VF; i != 1; i >>= 1) {
1106 // Move the upper half of the vector to the lower half.
1107 for (unsigned j = 0; j != i / 2; ++j)
1108 ShuffleMask[j] = i / 2 + j;
1109
1110 // Fill the rest of the mask with undef.
1111 std::fill(first: &ShuffleMask[i / 2], last: ShuffleMask.end(), value: -1);
1112
1113 Value *Shuf = Builder.CreateShuffleVector(V: TmpVec, Mask: ShuffleMask, Name: "rdx.shuf");
1114
1115 if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
1116 TmpVec = Builder.CreateBinOp(Opc: (Instruction::BinaryOps)Op, LHS: TmpVec, RHS: Shuf,
1117 Name: "bin.rdx");
1118 } else {
1119 assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) &&
1120 "Invalid min/max");
1121 TmpVec = createMinMaxOp(Builder, RK: RdxKind, Left: TmpVec, Right: Shuf);
1122 }
1123 }
1124 // The result is in the first element of the vector.
1125 return Builder.CreateExtractElement(Vec: TmpVec, Idx: Builder.getInt32(C: 0));
1126}
1127
1128Value *llvm::createAnyOfTargetReduction(IRBuilderBase &Builder, Value *Src,
1129 const RecurrenceDescriptor &Desc,
1130 PHINode *OrigPhi) {
1131 assert(
1132 RecurrenceDescriptor::isAnyOfRecurrenceKind(Desc.getRecurrenceKind()) &&
1133 "Unexpected reduction kind");
1134 Value *InitVal = Desc.getRecurrenceStartValue();
1135 Value *NewVal = nullptr;
1136
1137 // First use the original phi to determine the new value we're trying to
1138 // select from in the loop.
1139 SelectInst *SI = nullptr;
1140 for (auto *U : OrigPhi->users()) {
1141 if ((SI = dyn_cast<SelectInst>(Val: U)))
1142 break;
1143 }
1144 assert(SI && "One user of the original phi should be a select");
1145
1146 if (SI->getTrueValue() == OrigPhi)
1147 NewVal = SI->getFalseValue();
1148 else {
1149 assert(SI->getFalseValue() == OrigPhi &&
1150 "At least one input to the select should be the original Phi");
1151 NewVal = SI->getTrueValue();
1152 }
1153
1154 // Create a splat vector with the new value and compare this to the vector
1155 // we want to reduce.
1156 ElementCount EC = cast<VectorType>(Val: Src->getType())->getElementCount();
1157 Value *Right = Builder.CreateVectorSplat(EC, V: InitVal);
1158 Value *Cmp =
1159 Builder.CreateCmp(Pred: CmpInst::ICMP_NE, LHS: Src, RHS: Right, Name: "rdx.select.cmp");
1160
1161 // If any predicate is true it means that we want to select the new value.
1162 Cmp = Builder.CreateOrReduce(Src: Cmp);
1163 return Builder.CreateSelect(C: Cmp, True: NewVal, False: InitVal, Name: "rdx.select");
1164}
1165
1166Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, Value *Src,
1167 RecurKind RdxKind) {
1168 auto *SrcVecEltTy = cast<VectorType>(Val: Src->getType())->getElementType();
1169 switch (RdxKind) {
1170 case RecurKind::Add:
1171 return Builder.CreateAddReduce(Src);
1172 case RecurKind::Mul:
1173 return Builder.CreateMulReduce(Src);
1174 case RecurKind::And:
1175 return Builder.CreateAndReduce(Src);
1176 case RecurKind::Or:
1177 return Builder.CreateOrReduce(Src);
1178 case RecurKind::Xor:
1179 return Builder.CreateXorReduce(Src);
1180 case RecurKind::FMulAdd:
1181 case RecurKind::FAdd:
1182 return Builder.CreateFAddReduce(Acc: ConstantFP::getNegativeZero(Ty: SrcVecEltTy),
1183 Src);
1184 case RecurKind::FMul:
1185 return Builder.CreateFMulReduce(Acc: ConstantFP::get(Ty: SrcVecEltTy, V: 1.0), Src);
1186 case RecurKind::SMax:
1187 return Builder.CreateIntMaxReduce(Src, IsSigned: true);
1188 case RecurKind::SMin:
1189 return Builder.CreateIntMinReduce(Src, IsSigned: true);
1190 case RecurKind::UMax:
1191 return Builder.CreateIntMaxReduce(Src, IsSigned: false);
1192 case RecurKind::UMin:
1193 return Builder.CreateIntMinReduce(Src, IsSigned: false);
1194 case RecurKind::FMax:
1195 return Builder.CreateFPMaxReduce(Src);
1196 case RecurKind::FMin:
1197 return Builder.CreateFPMinReduce(Src);
1198 case RecurKind::FMinimum:
1199 return Builder.CreateFPMinimumReduce(Src);
1200 case RecurKind::FMaximum:
1201 return Builder.CreateFPMaximumReduce(Src);
1202 default:
1203 llvm_unreachable("Unhandled opcode");
1204 }
1205}
1206
1207Value *llvm::createTargetReduction(IRBuilderBase &B,
1208 const RecurrenceDescriptor &Desc, Value *Src,
1209 PHINode *OrigPhi) {
1210 // TODO: Support in-order reductions based on the recurrence descriptor.
1211 // All ops in the reduction inherit fast-math-flags from the recurrence
1212 // descriptor.
1213 IRBuilderBase::FastMathFlagGuard FMFGuard(B);
1214 B.setFastMathFlags(Desc.getFastMathFlags());
1215
1216 RecurKind RK = Desc.getRecurrenceKind();
1217 if (RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind: RK))
1218 return createAnyOfTargetReduction(Builder&: B, Src, Desc, OrigPhi);
1219
1220 return createSimpleTargetReduction(Builder&: B, Src, RdxKind: RK);
1221}
1222
1223Value *llvm::createOrderedReduction(IRBuilderBase &B,
1224 const RecurrenceDescriptor &Desc,
1225 Value *Src, Value *Start) {
1226 assert((Desc.getRecurrenceKind() == RecurKind::FAdd ||
1227 Desc.getRecurrenceKind() == RecurKind::FMulAdd) &&
1228 "Unexpected reduction kind");
1229 assert(Src->getType()->isVectorTy() && "Expected a vector type");
1230 assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
1231
1232 return B.CreateFAddReduce(Acc: Start, Src);
1233}
1234
1235void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue,
1236 bool IncludeWrapFlags) {
1237 auto *VecOp = dyn_cast<Instruction>(Val: I);
1238 if (!VecOp)
1239 return;
1240 auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(Val: VL[0])
1241 : dyn_cast<Instruction>(Val: OpValue);
1242 if (!Intersection)
1243 return;
1244 const unsigned Opcode = Intersection->getOpcode();
1245 VecOp->copyIRFlags(V: Intersection, IncludeWrapFlags);
1246 for (auto *V : VL) {
1247 auto *Instr = dyn_cast<Instruction>(Val: V);
1248 if (!Instr)
1249 continue;
1250 if (OpValue == nullptr || Opcode == Instr->getOpcode())
1251 VecOp->andIRFlags(V);
1252 }
1253}
1254
1255bool llvm::isKnownNegativeInLoop(const SCEV *S, const Loop *L,
1256 ScalarEvolution &SE) {
1257 const SCEV *Zero = SE.getZero(Ty: S->getType());
1258 return SE.isAvailableAtLoopEntry(S, L) &&
1259 SE.isLoopEntryGuardedByCond(L, Pred: ICmpInst::ICMP_SLT, LHS: S, RHS: Zero);
1260}
1261
1262bool llvm::isKnownNonNegativeInLoop(const SCEV *S, const Loop *L,
1263 ScalarEvolution &SE) {
1264 const SCEV *Zero = SE.getZero(Ty: S->getType());
1265 return SE.isAvailableAtLoopEntry(S, L) &&
1266 SE.isLoopEntryGuardedByCond(L, Pred: ICmpInst::ICMP_SGE, LHS: S, RHS: Zero);
1267}
1268
1269bool llvm::isKnownPositiveInLoop(const SCEV *S, const Loop *L,
1270 ScalarEvolution &SE) {
1271 const SCEV *Zero = SE.getZero(Ty: S->getType());
1272 return SE.isAvailableAtLoopEntry(S, L) &&
1273 SE.isLoopEntryGuardedByCond(L, Pred: ICmpInst::ICMP_SGT, LHS: S, RHS: Zero);
1274}
1275
1276bool llvm::isKnownNonPositiveInLoop(const SCEV *S, const Loop *L,
1277 ScalarEvolution &SE) {
1278 const SCEV *Zero = SE.getZero(Ty: S->getType());
1279 return SE.isAvailableAtLoopEntry(S, L) &&
1280 SE.isLoopEntryGuardedByCond(L, Pred: ICmpInst::ICMP_SLE, LHS: S, RHS: Zero);
1281}
1282
1283bool llvm::cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
1284 bool Signed) {
1285 unsigned BitWidth = cast<IntegerType>(Val: S->getType())->getBitWidth();
1286 APInt Min = Signed ? APInt::getSignedMinValue(numBits: BitWidth) :
1287 APInt::getMinValue(numBits: BitWidth);
1288 auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1289 return SE.isAvailableAtLoopEntry(S, L) &&
1290 SE.isLoopEntryGuardedByCond(L, Pred: Predicate, LHS: S,
1291 RHS: SE.getConstant(Val: Min));
1292}
1293
1294bool llvm::cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
1295 bool Signed) {
1296 unsigned BitWidth = cast<IntegerType>(Val: S->getType())->getBitWidth();
1297 APInt Max = Signed ? APInt::getSignedMaxValue(numBits: BitWidth) :
1298 APInt::getMaxValue(numBits: BitWidth);
1299 auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1300 return SE.isAvailableAtLoopEntry(S, L) &&
1301 SE.isLoopEntryGuardedByCond(L, Pred: Predicate, LHS: S,
1302 RHS: SE.getConstant(Val: Max));
1303}
1304
1305//===----------------------------------------------------------------------===//
1306// rewriteLoopExitValues - Optimize IV users outside the loop.
1307// As a side effect, reduces the amount of IV processing within the loop.
1308//===----------------------------------------------------------------------===//
1309
1310static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) {
1311 SmallPtrSet<const Instruction *, 8> Visited;
1312 SmallVector<const Instruction *, 8> WorkList;
1313 Visited.insert(Ptr: I);
1314 WorkList.push_back(Elt: I);
1315 while (!WorkList.empty()) {
1316 const Instruction *Curr = WorkList.pop_back_val();
1317 // This use is outside the loop, nothing to do.
1318 if (!L->contains(Inst: Curr))
1319 continue;
1320 // Do we assume it is a "hard" use which will not be eliminated easily?
1321 if (Curr->mayHaveSideEffects())
1322 return true;
1323 // Otherwise, add all its users to worklist.
1324 for (const auto *U : Curr->users()) {
1325 auto *UI = cast<Instruction>(Val: U);
1326 if (Visited.insert(Ptr: UI).second)
1327 WorkList.push_back(Elt: UI);
1328 }
1329 }
1330 return false;
1331}
1332
1333// Collect information about PHI nodes which can be transformed in
1334// rewriteLoopExitValues.
1335struct RewritePhi {
1336 PHINode *PN; // For which PHI node is this replacement?
1337 unsigned Ith; // For which incoming value?
1338 const SCEV *ExpansionSCEV; // The SCEV of the incoming value we are rewriting.
1339 Instruction *ExpansionPoint; // Where we'd like to expand that SCEV?
1340 bool HighCost; // Is this expansion a high-cost?
1341
1342 RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt,
1343 bool H)
1344 : PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt),
1345 HighCost(H) {}
1346};
1347
1348// Check whether it is possible to delete the loop after rewriting exit
1349// value. If it is possible, ignore ReplaceExitValue and do rewriting
1350// aggressively.
1351static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
1352 BasicBlock *Preheader = L->getLoopPreheader();
1353 // If there is no preheader, the loop will not be deleted.
1354 if (!Preheader)
1355 return false;
1356
1357 // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
1358 // We obviate multiple ExitingBlocks case for simplicity.
1359 // TODO: If we see testcase with multiple ExitingBlocks can be deleted
1360 // after exit value rewriting, we can enhance the logic here.
1361 SmallVector<BasicBlock *, 4> ExitingBlocks;
1362 L->getExitingBlocks(ExitingBlocks);
1363 SmallVector<BasicBlock *, 8> ExitBlocks;
1364 L->getUniqueExitBlocks(ExitBlocks);
1365 if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
1366 return false;
1367
1368 BasicBlock *ExitBlock = ExitBlocks[0];
1369 BasicBlock::iterator BI = ExitBlock->begin();
1370 while (PHINode *P = dyn_cast<PHINode>(Val&: BI)) {
1371 Value *Incoming = P->getIncomingValueForBlock(BB: ExitingBlocks[0]);
1372
1373 // If the Incoming value of P is found in RewritePhiSet, we know it
1374 // could be rewritten to use a loop invariant value in transformation
1375 // phase later. Skip it in the loop invariant check below.
1376 bool found = false;
1377 for (const RewritePhi &Phi : RewritePhiSet) {
1378 unsigned i = Phi.Ith;
1379 if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
1380 found = true;
1381 break;
1382 }
1383 }
1384
1385 Instruction *I;
1386 if (!found && (I = dyn_cast<Instruction>(Val: Incoming)))
1387 if (!L->hasLoopInvariantOperands(I))
1388 return false;
1389
1390 ++BI;
1391 }
1392
1393 for (auto *BB : L->blocks())
1394 if (llvm::any_of(Range&: *BB, P: [](Instruction &I) {
1395 return I.mayHaveSideEffects();
1396 }))
1397 return false;
1398
1399 return true;
1400}
1401
1402/// Checks if it is safe to call InductionDescriptor::isInductionPHI for \p Phi,
1403/// and returns true if this Phi is an induction phi in the loop. When
1404/// isInductionPHI returns true, \p ID will be also be set by isInductionPHI.
1405static bool checkIsIndPhi(PHINode *Phi, Loop *L, ScalarEvolution *SE,
1406 InductionDescriptor &ID) {
1407 if (!Phi)
1408 return false;
1409 if (!L->getLoopPreheader())
1410 return false;
1411 if (Phi->getParent() != L->getHeader())
1412 return false;
1413 return InductionDescriptor::isInductionPHI(Phi, L, SE, D&: ID);
1414}
1415
1416int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
1417 ScalarEvolution *SE,
1418 const TargetTransformInfo *TTI,
1419 SCEVExpander &Rewriter, DominatorTree *DT,
1420 ReplaceExitVal ReplaceExitValue,
1421 SmallVector<WeakTrackingVH, 16> &DeadInsts) {
1422 // Check a pre-condition.
1423 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1424 "Indvars did not preserve LCSSA!");
1425
1426 SmallVector<BasicBlock*, 8> ExitBlocks;
1427 L->getUniqueExitBlocks(ExitBlocks);
1428
1429 SmallVector<RewritePhi, 8> RewritePhiSet;
1430 // Find all values that are computed inside the loop, but used outside of it.
1431 // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
1432 // the exit blocks of the loop to find them.
1433 for (BasicBlock *ExitBB : ExitBlocks) {
1434 // If there are no PHI nodes in this exit block, then no values defined
1435 // inside the loop are used on this path, skip it.
1436 PHINode *PN = dyn_cast<PHINode>(Val: ExitBB->begin());
1437 if (!PN) continue;
1438
1439 unsigned NumPreds = PN->getNumIncomingValues();
1440
1441 // Iterate over all of the PHI nodes.
1442 BasicBlock::iterator BBI = ExitBB->begin();
1443 while ((PN = dyn_cast<PHINode>(Val: BBI++))) {
1444 if (PN->use_empty())
1445 continue; // dead use, don't replace it
1446
1447 if (!SE->isSCEVable(Ty: PN->getType()))
1448 continue;
1449
1450 // Iterate over all of the values in all the PHI nodes.
1451 for (unsigned i = 0; i != NumPreds; ++i) {
1452 // If the value being merged in is not integer or is not defined
1453 // in the loop, skip it.
1454 Value *InVal = PN->getIncomingValue(i);
1455 if (!isa<Instruction>(Val: InVal))
1456 continue;
1457
1458 // If this pred is for a subloop, not L itself, skip it.
1459 if (LI->getLoopFor(BB: PN->getIncomingBlock(i)) != L)
1460 continue; // The Block is in a subloop, skip it.
1461
1462 // Check that InVal is defined in the loop.
1463 Instruction *Inst = cast<Instruction>(Val: InVal);
1464 if (!L->contains(Inst))
1465 continue;
1466
1467 // Find exit values which are induction variables in the loop, and are
1468 // unused in the loop, with the only use being the exit block PhiNode,
1469 // and the induction variable update binary operator.
1470 // The exit value can be replaced with the final value when it is cheap
1471 // to do so.
1472 if (ReplaceExitValue == UnusedIndVarInLoop) {
1473 InductionDescriptor ID;
1474 PHINode *IndPhi = dyn_cast<PHINode>(Val: Inst);
1475 if (IndPhi) {
1476 if (!checkIsIndPhi(Phi: IndPhi, L, SE, ID))
1477 continue;
1478 // This is an induction PHI. Check that the only users are PHI
1479 // nodes, and induction variable update binary operators.
1480 if (llvm::any_of(Range: Inst->users(), P: [&](User *U) {
1481 if (!isa<PHINode>(Val: U) && !isa<BinaryOperator>(Val: U))
1482 return true;
1483 BinaryOperator *B = dyn_cast<BinaryOperator>(Val: U);
1484 if (B && B != ID.getInductionBinOp())
1485 return true;
1486 return false;
1487 }))
1488 continue;
1489 } else {
1490 // If it is not an induction phi, it must be an induction update
1491 // binary operator with an induction phi user.
1492 BinaryOperator *B = dyn_cast<BinaryOperator>(Val: Inst);
1493 if (!B)
1494 continue;
1495 if (llvm::any_of(Range: Inst->users(), P: [&](User *U) {
1496 PHINode *Phi = dyn_cast<PHINode>(Val: U);
1497 if (Phi != PN && !checkIsIndPhi(Phi, L, SE, ID))
1498 return true;
1499 return false;
1500 }))
1501 continue;
1502 if (B != ID.getInductionBinOp())
1503 continue;
1504 }
1505 }
1506
1507 // Okay, this instruction has a user outside of the current loop
1508 // and varies predictably *inside* the loop. Evaluate the value it
1509 // contains when the loop exits, if possible. We prefer to start with
1510 // expressions which are true for all exits (so as to maximize
1511 // expression reuse by the SCEVExpander), but resort to per-exit
1512 // evaluation if that fails.
1513 const SCEV *ExitValue = SE->getSCEVAtScope(V: Inst, L: L->getParentLoop());
1514 if (isa<SCEVCouldNotCompute>(Val: ExitValue) ||
1515 !SE->isLoopInvariant(S: ExitValue, L) ||
1516 !Rewriter.isSafeToExpand(S: ExitValue)) {
1517 // TODO: This should probably be sunk into SCEV in some way; maybe a
1518 // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for
1519 // most SCEV expressions and other recurrence types (e.g. shift
1520 // recurrences). Is there existing code we can reuse?
1521 const SCEV *ExitCount = SE->getExitCount(L, ExitingBlock: PN->getIncomingBlock(i));
1522 if (isa<SCEVCouldNotCompute>(Val: ExitCount))
1523 continue;
1524 if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(Val: SE->getSCEV(V: Inst)))
1525 if (AddRec->getLoop() == L)
1526 ExitValue = AddRec->evaluateAtIteration(It: ExitCount, SE&: *SE);
1527 if (isa<SCEVCouldNotCompute>(Val: ExitValue) ||
1528 !SE->isLoopInvariant(S: ExitValue, L) ||
1529 !Rewriter.isSafeToExpand(S: ExitValue))
1530 continue;
1531 }
1532
1533 // Computing the value outside of the loop brings no benefit if it is
1534 // definitely used inside the loop in a way which can not be optimized
1535 // away. Avoid doing so unless we know we have a value which computes
1536 // the ExitValue already. TODO: This should be merged into SCEV
1537 // expander to leverage its knowledge of existing expressions.
1538 if (ReplaceExitValue != AlwaysRepl && !isa<SCEVConstant>(Val: ExitValue) &&
1539 !isa<SCEVUnknown>(Val: ExitValue) && hasHardUserWithinLoop(L, I: Inst))
1540 continue;
1541
1542 // Check if expansions of this SCEV would count as being high cost.
1543 bool HighCost = Rewriter.isHighCostExpansion(
1544 Exprs: ExitValue, L, Budget: SCEVCheapExpansionBudget, TTI, At: Inst);
1545
1546 // Note that we must not perform expansions until after
1547 // we query *all* the costs, because if we perform temporary expansion
1548 // inbetween, one that we might not intend to keep, said expansion
1549 // *may* affect cost calculation of the next SCEV's we'll query,
1550 // and next SCEV may errneously get smaller cost.
1551
1552 // Collect all the candidate PHINodes to be rewritten.
1553 Instruction *InsertPt =
1554 (isa<PHINode>(Val: Inst) || isa<LandingPadInst>(Val: Inst)) ?
1555 &*Inst->getParent()->getFirstInsertionPt() : Inst;
1556 RewritePhiSet.emplace_back(Args&: PN, Args&: i, Args&: ExitValue, Args&: InsertPt, Args&: HighCost);
1557 }
1558 }
1559 }
1560
1561 // TODO: evaluate whether it is beneficial to change how we calculate
1562 // high-cost: if we have SCEV 'A' which we know we will expand, should we
1563 // calculate the cost of other SCEV's after expanding SCEV 'A', thus
1564 // potentially giving cost bonus to those other SCEV's?
1565
1566 bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
1567 int NumReplaced = 0;
1568
1569 // Transformation.
1570 for (const RewritePhi &Phi : RewritePhiSet) {
1571 PHINode *PN = Phi.PN;
1572
1573 // Only do the rewrite when the ExitValue can be expanded cheaply.
1574 // If LoopCanBeDel is true, rewrite exit value aggressively.
1575 if ((ReplaceExitValue == OnlyCheapRepl ||
1576 ReplaceExitValue == UnusedIndVarInLoop) &&
1577 !LoopCanBeDel && Phi.HighCost)
1578 continue;
1579
1580 Value *ExitVal = Rewriter.expandCodeFor(
1581 SH: Phi.ExpansionSCEV, Ty: Phi.PN->getType(), I: Phi.ExpansionPoint);
1582
1583 LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " << *ExitVal
1584 << '\n'
1585 << " LoopVal = " << *(Phi.ExpansionPoint) << "\n");
1586
1587#ifndef NDEBUG
1588 // If we reuse an instruction from a loop which is neither L nor one of
1589 // its containing loops, we end up breaking LCSSA form for this loop by
1590 // creating a new use of its instruction.
1591 if (auto *ExitInsn = dyn_cast<Instruction>(Val: ExitVal))
1592 if (auto *EVL = LI->getLoopFor(BB: ExitInsn->getParent()))
1593 if (EVL != L)
1594 assert(EVL->contains(L) && "LCSSA breach detected!");
1595#endif
1596
1597 NumReplaced++;
1598 Instruction *Inst = cast<Instruction>(Val: PN->getIncomingValue(i: Phi.Ith));
1599 PN->setIncomingValue(i: Phi.Ith, V: ExitVal);
1600 // It's necessary to tell ScalarEvolution about this explicitly so that
1601 // it can walk the def-use list and forget all SCEVs, as it may not be
1602 // watching the PHI itself. Once the new exit value is in place, there
1603 // may not be a def-use connection between the loop and every instruction
1604 // which got a SCEVAddRecExpr for that loop.
1605 SE->forgetValue(V: PN);
1606
1607 // If this instruction is dead now, delete it. Don't do it now to avoid
1608 // invalidating iterators.
1609 if (isInstructionTriviallyDead(I: Inst, TLI))
1610 DeadInsts.push_back(Elt: Inst);
1611
1612 // Replace PN with ExitVal if that is legal and does not break LCSSA.
1613 if (PN->getNumIncomingValues() == 1 &&
1614 LI->replacementPreservesLCSSAForm(From: PN, To: ExitVal)) {
1615 PN->replaceAllUsesWith(V: ExitVal);
1616 PN->eraseFromParent();
1617 }
1618 }
1619
1620 // The insertion point instruction may have been deleted; clear it out
1621 // so that the rewriter doesn't trip over it later.
1622 Rewriter.clearInsertPoint();
1623 return NumReplaced;
1624}
1625
1626/// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for
1627/// \p OrigLoop.
1628void llvm::setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop,
1629 Loop *RemainderLoop, uint64_t UF) {
1630 assert(UF > 0 && "Zero unrolled factor is not supported");
1631 assert(UnrolledLoop != RemainderLoop &&
1632 "Unrolled and Remainder loops are expected to distinct");
1633
1634 // Get number of iterations in the original scalar loop.
1635 unsigned OrigLoopInvocationWeight = 0;
1636 std::optional<unsigned> OrigAverageTripCount =
1637 getLoopEstimatedTripCount(L: OrigLoop, EstimatedLoopInvocationWeight: &OrigLoopInvocationWeight);
1638 if (!OrigAverageTripCount)
1639 return;
1640
1641 // Calculate number of iterations in unrolled loop.
1642 unsigned UnrolledAverageTripCount = *OrigAverageTripCount / UF;
1643 // Calculate number of iterations for remainder loop.
1644 unsigned RemainderAverageTripCount = *OrigAverageTripCount % UF;
1645
1646 setLoopEstimatedTripCount(L: UnrolledLoop, EstimatedTripCount: UnrolledAverageTripCount,
1647 EstimatedloopInvocationWeight: OrigLoopInvocationWeight);
1648 setLoopEstimatedTripCount(L: RemainderLoop, EstimatedTripCount: RemainderAverageTripCount,
1649 EstimatedloopInvocationWeight: OrigLoopInvocationWeight);
1650}
1651
1652/// Utility that implements appending of loops onto a worklist.
1653/// Loops are added in preorder (analogous for reverse postorder for trees),
1654/// and the worklist is processed LIFO.
1655template <typename RangeT>
1656void llvm::appendReversedLoopsToWorklist(
1657 RangeT &&Loops, SmallPriorityWorklist<Loop *, 4> &Worklist) {
1658 // We use an internal worklist to build up the preorder traversal without
1659 // recursion.
1660 SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;
1661
1662 // We walk the initial sequence of loops in reverse because we generally want
1663 // to visit defs before uses and the worklist is LIFO.
1664 for (Loop *RootL : Loops) {
1665 assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
1666 assert(PreOrderWorklist.empty() &&
1667 "Must start with an empty preorder walk worklist.");
1668 PreOrderWorklist.push_back(Elt: RootL);
1669 do {
1670 Loop *L = PreOrderWorklist.pop_back_val();
1671 PreOrderWorklist.append(in_start: L->begin(), in_end: L->end());
1672 PreOrderLoops.push_back(Elt: L);
1673 } while (!PreOrderWorklist.empty());
1674
1675 Worklist.insert(Input: std::move(PreOrderLoops));
1676 PreOrderLoops.clear();
1677 }
1678}
1679
1680template <typename RangeT>
1681void llvm::appendLoopsToWorklist(RangeT &&Loops,
1682 SmallPriorityWorklist<Loop *, 4> &Worklist) {
1683 appendReversedLoopsToWorklist(reverse(Loops), Worklist);
1684}
1685
1686template void llvm::appendLoopsToWorklist<ArrayRef<Loop *> &>(
1687 ArrayRef<Loop *> &Loops, SmallPriorityWorklist<Loop *, 4> &Worklist);
1688
1689template void
1690llvm::appendLoopsToWorklist<Loop &>(Loop &L,
1691 SmallPriorityWorklist<Loop *, 4> &Worklist);
1692
1693void llvm::appendLoopsToWorklist(LoopInfo &LI,
1694 SmallPriorityWorklist<Loop *, 4> &Worklist) {
1695 appendReversedLoopsToWorklist(Loops&: LI, Worklist);
1696}
1697
1698Loop *llvm::cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
1699 LoopInfo *LI, LPPassManager *LPM) {
1700 Loop &New = *LI->AllocateLoop();
1701 if (PL)
1702 PL->addChildLoop(NewChild: &New);
1703 else
1704 LI->addTopLevelLoop(New: &New);
1705
1706 if (LPM)
1707 LPM->addLoop(L&: New);
1708
1709 // Add all of the blocks in L to the new loop.
1710 for (BasicBlock *BB : L->blocks())
1711 if (LI->getLoopFor(BB) == L)
1712 New.addBasicBlockToLoop(NewBB: cast<BasicBlock>(Val&: VM[BB]), LI&: *LI);
1713
1714 // Add all of the subloops to the new loop.
1715 for (Loop *I : *L)
1716 cloneLoop(L: I, PL: &New, VM, LI, LPM);
1717
1718 return &New;
1719}
1720
1721/// IR Values for the lower and upper bounds of a pointer evolution. We
1722/// need to use value-handles because SCEV expansion can invalidate previously
1723/// expanded values. Thus expansion of a pointer can invalidate the bounds for
1724/// a previous one.
1725struct PointerBounds {
1726 TrackingVH<Value> Start;
1727 TrackingVH<Value> End;
1728 Value *StrideToCheck;
1729};
1730
1731/// Expand code for the lower and upper bound of the pointer group \p CG
1732/// in \p TheLoop. \return the values for the bounds.
1733static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG,
1734 Loop *TheLoop, Instruction *Loc,
1735 SCEVExpander &Exp, bool HoistRuntimeChecks) {
1736 LLVMContext &Ctx = Loc->getContext();
1737 Type *PtrArithTy = PointerType::get(C&: Ctx, AddressSpace: CG->AddressSpace);
1738
1739 Value *Start = nullptr, *End = nullptr;
1740 LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
1741 const SCEV *Low = CG->Low, *High = CG->High, *Stride = nullptr;
1742
1743 // If the Low and High values are themselves loop-variant, then we may want
1744 // to expand the range to include those covered by the outer loop as well.
1745 // There is a trade-off here with the advantage being that creating checks
1746 // using the expanded range permits the runtime memory checks to be hoisted
1747 // out of the outer loop. This reduces the cost of entering the inner loop,
1748 // which can be significant for low trip counts. The disadvantage is that
1749 // there is a chance we may now never enter the vectorized inner loop,
1750 // whereas using a restricted range check could have allowed us to enter at
1751 // least once. This is why the behaviour is not currently the default and is
1752 // controlled by the parameter 'HoistRuntimeChecks'.
1753 if (HoistRuntimeChecks && TheLoop->getParentLoop() &&
1754 isa<SCEVAddRecExpr>(Val: High) && isa<SCEVAddRecExpr>(Val: Low)) {
1755 auto *HighAR = cast<SCEVAddRecExpr>(Val: High);
1756 auto *LowAR = cast<SCEVAddRecExpr>(Val: Low);
1757 const Loop *OuterLoop = TheLoop->getParentLoop();
1758 const SCEV *Recur = LowAR->getStepRecurrence(SE&: *Exp.getSE());
1759 if (Recur == HighAR->getStepRecurrence(SE&: *Exp.getSE()) &&
1760 HighAR->getLoop() == OuterLoop && LowAR->getLoop() == OuterLoop) {
1761 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1762 const SCEV *OuterExitCount =
1763 Exp.getSE()->getExitCount(L: OuterLoop, ExitingBlock: OuterLoopLatch);
1764 if (!isa<SCEVCouldNotCompute>(Val: OuterExitCount) &&
1765 OuterExitCount->getType()->isIntegerTy()) {
1766 const SCEV *NewHigh = cast<SCEVAddRecExpr>(Val: High)->evaluateAtIteration(
1767 It: OuterExitCount, SE&: *Exp.getSE());
1768 if (!isa<SCEVCouldNotCompute>(Val: NewHigh)) {
1769 LLVM_DEBUG(dbgs() << "LAA: Expanded RT check for range to include "
1770 "outer loop in order to permit hoisting\n");
1771 High = NewHigh;
1772 Low = cast<SCEVAddRecExpr>(Val: Low)->getStart();
1773 // If there is a possibility that the stride is negative then we have
1774 // to generate extra checks to ensure the stride is positive.
1775 if (!Exp.getSE()->isKnownNonNegative(S: Recur)) {
1776 Stride = Recur;
1777 LLVM_DEBUG(dbgs() << "LAA: ... but need to check stride is "
1778 "positive: "
1779 << *Stride << '\n');
1780 }
1781 }
1782 }
1783 }
1784 }
1785
1786 Start = Exp.expandCodeFor(SH: Low, Ty: PtrArithTy, I: Loc);
1787 End = Exp.expandCodeFor(SH: High, Ty: PtrArithTy, I: Loc);
1788 if (CG->NeedsFreeze) {
1789 IRBuilder<> Builder(Loc);
1790 Start = Builder.CreateFreeze(V: Start, Name: Start->getName() + ".fr");
1791 End = Builder.CreateFreeze(V: End, Name: End->getName() + ".fr");
1792 }
1793 Value *StrideVal =
1794 Stride ? Exp.expandCodeFor(SH: Stride, Ty: Stride->getType(), I: Loc) : nullptr;
1795 LLVM_DEBUG(dbgs() << "Start: " << *Low << " End: " << *High << "\n");
1796 return {.Start: Start, .End: End, .StrideToCheck: StrideVal};
1797}
1798
1799/// Turns a collection of checks into a collection of expanded upper and
1800/// lower bounds for both pointers in the check.
1801static SmallVector<std::pair<PointerBounds, PointerBounds>, 4>
1802expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L,
1803 Instruction *Loc, SCEVExpander &Exp, bool HoistRuntimeChecks) {
1804 SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds;
1805
1806 // Here we're relying on the SCEV Expander's cache to only emit code for the
1807 // same bounds once.
1808 transform(Range: PointerChecks, d_first: std::back_inserter(x&: ChecksWithBounds),
1809 F: [&](const RuntimePointerCheck &Check) {
1810 PointerBounds First = expandBounds(CG: Check.first, TheLoop: L, Loc, Exp,
1811 HoistRuntimeChecks),
1812 Second = expandBounds(CG: Check.second, TheLoop: L, Loc, Exp,
1813 HoistRuntimeChecks);
1814 return std::make_pair(x&: First, y&: Second);
1815 });
1816
1817 return ChecksWithBounds;
1818}
1819
1820Value *llvm::addRuntimeChecks(
1821 Instruction *Loc, Loop *TheLoop,
1822 const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
1823 SCEVExpander &Exp, bool HoistRuntimeChecks) {
1824 // TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible.
1825 // TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible
1826 auto ExpandedChecks =
1827 expandBounds(PointerChecks, L: TheLoop, Loc, Exp, HoistRuntimeChecks);
1828
1829 LLVMContext &Ctx = Loc->getContext();
1830 IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx,
1831 Loc->getModule()->getDataLayout());
1832 ChkBuilder.SetInsertPoint(Loc);
1833 // Our instructions might fold to a constant.
1834 Value *MemoryRuntimeCheck = nullptr;
1835
1836 for (const auto &Check : ExpandedChecks) {
1837 const PointerBounds &A = Check.first, &B = Check.second;
1838 // Check if two pointers (A and B) conflict where conflict is computed as:
1839 // start(A) <= end(B) && start(B) <= end(A)
1840
1841 assert((A.Start->getType()->getPointerAddressSpace() ==
1842 B.End->getType()->getPointerAddressSpace()) &&
1843 (B.Start->getType()->getPointerAddressSpace() ==
1844 A.End->getType()->getPointerAddressSpace()) &&
1845 "Trying to bounds check pointers with different address spaces");
1846
1847 // [A|B].Start points to the first accessed byte under base [A|B].
1848 // [A|B].End points to the last accessed byte, plus one.
1849 // There is no conflict when the intervals are disjoint:
1850 // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
1851 //
1852 // bound0 = (B.Start < A.End)
1853 // bound1 = (A.Start < B.End)
1854 // IsConflict = bound0 & bound1
1855 Value *Cmp0 = ChkBuilder.CreateICmpULT(LHS: A.Start, RHS: B.End, Name: "bound0");
1856 Value *Cmp1 = ChkBuilder.CreateICmpULT(LHS: B.Start, RHS: A.End, Name: "bound1");
1857 Value *IsConflict = ChkBuilder.CreateAnd(LHS: Cmp0, RHS: Cmp1, Name: "found.conflict");
1858 if (A.StrideToCheck) {
1859 Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(
1860 LHS: A.StrideToCheck, RHS: ConstantInt::get(Ty: A.StrideToCheck->getType(), V: 0),
1861 Name: "stride.check");
1862 IsConflict = ChkBuilder.CreateOr(LHS: IsConflict, RHS: IsNegativeStride);
1863 }
1864 if (B.StrideToCheck) {
1865 Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(
1866 LHS: B.StrideToCheck, RHS: ConstantInt::get(Ty: B.StrideToCheck->getType(), V: 0),
1867 Name: "stride.check");
1868 IsConflict = ChkBuilder.CreateOr(LHS: IsConflict, RHS: IsNegativeStride);
1869 }
1870 if (MemoryRuntimeCheck) {
1871 IsConflict =
1872 ChkBuilder.CreateOr(LHS: MemoryRuntimeCheck, RHS: IsConflict, Name: "conflict.rdx");
1873 }
1874 MemoryRuntimeCheck = IsConflict;
1875 }
1876
1877 return MemoryRuntimeCheck;
1878}
1879
1880Value *llvm::addDiffRuntimeChecks(
1881 Instruction *Loc, ArrayRef<PointerDiffInfo> Checks, SCEVExpander &Expander,
1882 function_ref<Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC) {
1883
1884 LLVMContext &Ctx = Loc->getContext();
1885 IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx,
1886 Loc->getModule()->getDataLayout());
1887 ChkBuilder.SetInsertPoint(Loc);
1888 // Our instructions might fold to a constant.
1889 Value *MemoryRuntimeCheck = nullptr;
1890
1891 auto &SE = *Expander.getSE();
1892 // Map to keep track of created compares, The key is the pair of operands for
1893 // the compare, to allow detecting and re-using redundant compares.
1894 DenseMap<std::pair<Value *, Value *>, Value *> SeenCompares;
1895 for (const auto &C : Checks) {
1896 Type *Ty = C.SinkStart->getType();
1897 // Compute VF * IC * AccessSize.
1898 auto *VFTimesUFTimesSize =
1899 ChkBuilder.CreateMul(LHS: GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
1900 RHS: ConstantInt::get(Ty, V: IC * C.AccessSize));
1901 Value *Diff = Expander.expandCodeFor(
1902 SH: SE.getMinusSCEV(LHS: C.SinkStart, RHS: C.SrcStart), Ty, I: Loc);
1903
1904 // Check if the same compare has already been created earlier. In that case,
1905 // there is no need to check it again.
1906 Value *IsConflict = SeenCompares.lookup(Val: {Diff, VFTimesUFTimesSize});
1907 if (IsConflict)
1908 continue;
1909
1910 IsConflict =
1911 ChkBuilder.CreateICmpULT(LHS: Diff, RHS: VFTimesUFTimesSize, Name: "diff.check");
1912 SeenCompares.insert(KV: {{Diff, VFTimesUFTimesSize}, IsConflict});
1913 if (C.NeedsFreeze)
1914 IsConflict =
1915 ChkBuilder.CreateFreeze(V: IsConflict, Name: IsConflict->getName() + ".fr");
1916 if (MemoryRuntimeCheck) {
1917 IsConflict =
1918 ChkBuilder.CreateOr(LHS: MemoryRuntimeCheck, RHS: IsConflict, Name: "conflict.rdx");
1919 }
1920 MemoryRuntimeCheck = IsConflict;
1921 }
1922
1923 return MemoryRuntimeCheck;
1924}
1925
1926std::optional<IVConditionInfo>
1927llvm::hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold,
1928 const MemorySSA &MSSA, AAResults &AA) {
1929 auto *TI = dyn_cast<BranchInst>(Val: L.getHeader()->getTerminator());
1930 if (!TI || !TI->isConditional())
1931 return {};
1932
1933 auto *CondI = dyn_cast<CmpInst>(Val: TI->getCondition());
1934 // The case with the condition outside the loop should already be handled
1935 // earlier.
1936 if (!CondI || !L.contains(Inst: CondI))
1937 return {};
1938
1939 SmallVector<Instruction *> InstToDuplicate;
1940 InstToDuplicate.push_back(Elt: CondI);
1941
1942 SmallVector<Value *, 4> WorkList;
1943 WorkList.append(in_start: CondI->op_begin(), in_end: CondI->op_end());
1944
1945 SmallVector<MemoryAccess *, 4> AccessesToCheck;
1946 SmallVector<MemoryLocation, 4> AccessedLocs;
1947 while (!WorkList.empty()) {
1948 Instruction *I = dyn_cast<Instruction>(Val: WorkList.pop_back_val());
1949 if (!I || !L.contains(Inst: I))
1950 continue;
1951
1952 // TODO: support additional instructions.
1953 if (!isa<LoadInst>(Val: I) && !isa<GetElementPtrInst>(Val: I))
1954 return {};
1955
1956 // Do not duplicate volatile and atomic loads.
1957 if (auto *LI = dyn_cast<LoadInst>(Val: I))
1958 if (LI->isVolatile() || LI->isAtomic())
1959 return {};
1960
1961 InstToDuplicate.push_back(Elt: I);
1962 if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) {
1963 if (auto *MemUse = dyn_cast_or_null<MemoryUse>(Val: MA)) {
1964 // Queue the defining access to check for alias checks.
1965 AccessesToCheck.push_back(Elt: MemUse->getDefiningAccess());
1966 AccessedLocs.push_back(Elt: MemoryLocation::get(Inst: I));
1967 } else {
1968 // MemoryDefs may clobber the location or may be atomic memory
1969 // operations. Bail out.
1970 return {};
1971 }
1972 }
1973 WorkList.append(in_start: I->op_begin(), in_end: I->op_end());
1974 }
1975
1976 if (InstToDuplicate.empty())
1977 return {};
1978
1979 SmallVector<BasicBlock *, 4> ExitingBlocks;
1980 L.getExitingBlocks(ExitingBlocks);
1981 auto HasNoClobbersOnPath =
1982 [&L, &AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate,
1983 MSSAThreshold](BasicBlock *Succ, BasicBlock *Header,
1984 SmallVector<MemoryAccess *, 4> AccessesToCheck)
1985 -> std::optional<IVConditionInfo> {
1986 IVConditionInfo Info;
1987 // First, collect all blocks in the loop that are on a patch from Succ
1988 // to the header.
1989 SmallVector<BasicBlock *, 4> WorkList;
1990 WorkList.push_back(Elt: Succ);
1991 WorkList.push_back(Elt: Header);
1992 SmallPtrSet<BasicBlock *, 4> Seen;
1993 Seen.insert(Ptr: Header);
1994 Info.PathIsNoop &=
1995 all_of(Range&: *Header, P: [](Instruction &I) { return !I.mayHaveSideEffects(); });
1996
1997 while (!WorkList.empty()) {
1998 BasicBlock *Current = WorkList.pop_back_val();
1999 if (!L.contains(BB: Current))
2000 continue;
2001 const auto &SeenIns = Seen.insert(Ptr: Current);
2002 if (!SeenIns.second)
2003 continue;
2004
2005 Info.PathIsNoop &= all_of(
2006 Range&: *Current, P: [](Instruction &I) { return !I.mayHaveSideEffects(); });
2007 WorkList.append(in_start: succ_begin(BB: Current), in_end: succ_end(BB: Current));
2008 }
2009
2010 // Require at least 2 blocks on a path through the loop. This skips
2011 // paths that directly exit the loop.
2012 if (Seen.size() < 2)
2013 return {};
2014
2015 // Next, check if there are any MemoryDefs that are on the path through
2016 // the loop (in the Seen set) and they may-alias any of the locations in
2017 // AccessedLocs. If that is the case, they may modify the condition and
2018 // partial unswitching is not possible.
2019 SmallPtrSet<MemoryAccess *, 4> SeenAccesses;
2020 while (!AccessesToCheck.empty()) {
2021 MemoryAccess *Current = AccessesToCheck.pop_back_val();
2022 auto SeenI = SeenAccesses.insert(Ptr: Current);
2023 if (!SeenI.second || !Seen.contains(Ptr: Current->getBlock()))
2024 continue;
2025
2026 // Bail out if exceeded the threshold.
2027 if (SeenAccesses.size() >= MSSAThreshold)
2028 return {};
2029
2030 // MemoryUse are read-only accesses.
2031 if (isa<MemoryUse>(Val: Current))
2032 continue;
2033
2034 // For a MemoryDef, check if is aliases any of the location feeding
2035 // the original condition.
2036 if (auto *CurrentDef = dyn_cast<MemoryDef>(Val: Current)) {
2037 if (any_of(Range&: AccessedLocs, P: [&AA, CurrentDef](MemoryLocation &Loc) {
2038 return isModSet(
2039 MRI: AA.getModRefInfo(I: CurrentDef->getMemoryInst(), OptLoc: Loc));
2040 }))
2041 return {};
2042 }
2043
2044 for (Use &U : Current->uses())
2045 AccessesToCheck.push_back(Elt: cast<MemoryAccess>(Val: U.getUser()));
2046 }
2047
2048 // We could also allow loops with known trip counts without mustprogress,
2049 // but ScalarEvolution may not be available.
2050 Info.PathIsNoop &= isMustProgress(L: &L);
2051
2052 // If the path is considered a no-op so far, check if it reaches a
2053 // single exit block without any phis. This ensures no values from the
2054 // loop are used outside of the loop.
2055 if (Info.PathIsNoop) {
2056 for (auto *Exiting : ExitingBlocks) {
2057 if (!Seen.contains(Ptr: Exiting))
2058 continue;
2059 for (auto *Succ : successors(BB: Exiting)) {
2060 if (L.contains(BB: Succ))
2061 continue;
2062
2063 Info.PathIsNoop &= Succ->phis().empty() &&
2064 (!Info.ExitForPath || Info.ExitForPath == Succ);
2065 if (!Info.PathIsNoop)
2066 break;
2067 assert((!Info.ExitForPath || Info.ExitForPath == Succ) &&
2068 "cannot have multiple exit blocks");
2069 Info.ExitForPath = Succ;
2070 }
2071 }
2072 }
2073 if (!Info.ExitForPath)
2074 Info.PathIsNoop = false;
2075
2076 Info.InstToDuplicate = InstToDuplicate;
2077 return Info;
2078 };
2079
2080 // If we branch to the same successor, partial unswitching will not be
2081 // beneficial.
2082 if (TI->getSuccessor(i: 0) == TI->getSuccessor(i: 1))
2083 return {};
2084
2085 if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(i: 0), L.getHeader(),
2086 AccessesToCheck)) {
2087 Info->KnownValue = ConstantInt::getTrue(Context&: TI->getContext());
2088 return Info;
2089 }
2090 if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(i: 1), L.getHeader(),
2091 AccessesToCheck)) {
2092 Info->KnownValue = ConstantInt::getFalse(Context&: TI->getContext());
2093 return Info;
2094 }
2095
2096 return {};
2097}
2098

source code of llvm/lib/Transforms/Utils/LoopUtils.cpp