LoopUtils.cpp source code [llvm/lib/Transforms/Utils/LoopUtils.cpp]

1	//===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines common loop utility functions.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/Transforms/Utils/LoopUtils.h"
14	#include "llvm/ADT/DenseSet.h"
15	#include "llvm/ADT/PriorityWorklist.h"
16	#include "llvm/ADT/ScopeExit.h"
17	#include "llvm/ADT/SetVector.h"
18	#include "llvm/ADT/SmallPtrSet.h"
19	#include "llvm/ADT/SmallVector.h"
20	#include "llvm/Analysis/AliasAnalysis.h"
21	#include "llvm/Analysis/BasicAliasAnalysis.h"
22	#include "llvm/Analysis/DomTreeUpdater.h"
23	#include "llvm/Analysis/GlobalsModRef.h"
24	#include "llvm/Analysis/InstSimplifyFolder.h"
25	#include "llvm/Analysis/LoopAccessAnalysis.h"
26	#include "llvm/Analysis/LoopInfo.h"
27	#include "llvm/Analysis/LoopPass.h"
28	#include "llvm/Analysis/MemorySSA.h"
29	#include "llvm/Analysis/MemorySSAUpdater.h"
30	#include "llvm/Analysis/ScalarEvolution.h"
31	#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
32	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
33	#include "llvm/IR/DIBuilder.h"
34	#include "llvm/IR/Dominators.h"
35	#include "llvm/IR/Instructions.h"
36	#include "llvm/IR/IntrinsicInst.h"
37	#include "llvm/IR/MDBuilder.h"
38	#include "llvm/IR/Module.h"
39	#include "llvm/IR/PatternMatch.h"
40	#include "llvm/IR/ProfDataUtils.h"
41	#include "llvm/IR/ValueHandle.h"
42	#include "llvm/InitializePasses.h"
43	#include "llvm/Pass.h"
44	#include "llvm/Support/Debug.h"
45	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
46	#include "llvm/Transforms/Utils/Local.h"
47	#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
48
49	using namespace llvm;
50	using namespace llvm::PatternMatch;
51
52	#define DEBUG_TYPE "loop-utils"
53
54	static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
55	static const char *LLVMLoopDisableLICM = "llvm.licm.disable";
56
57	bool llvm::formDedicatedExitBlocks(Loop L, DominatorTree DT, LoopInfo *LI,
58	MemorySSAUpdater *MSSAU,
59	bool PreserveLCSSA) {
60	bool Changed = false;
61
62	// We re-use a vector for the in-loop predecesosrs.
63	SmallVector<BasicBlock *, `4`> InLoopPredecessors;
64
65	auto RewriteExit = [&](BasicBlock *BB) {
66	assert(InLoopPredecessors.empty() &&
67	"Must start with an empty predecessors list!");
68	auto Cleanup = make_scope_exit(F: [&] { InLoopPredecessors.clear(); });
69
70	// See if there are any non-loop predecessors of this exit block and
71	// keep track of the in-loop predecessors.
72	bool IsDedicatedExit = true;
73	for (auto *PredBB : predecessors(BB))
74	if (L->contains(BB: PredBB)) {
75	if (isa<IndirectBrInst>(Val: PredBB->getTerminator()))
76	// We cannot rewrite exiting edges from an indirectbr.
77	return false;
78
79	InLoopPredecessors.push_back(Elt: PredBB);
80	} else {
81	IsDedicatedExit = false;
82	}
83
84	assert(!InLoopPredecessors.empty() && "Must have some loop predecessor!");
85
86	// Nothing to do if this is already a dedicated exit.
87	if (IsDedicatedExit)
88	return false;
89
90	auto *NewExitBB = SplitBlockPredecessors(
91	BB, Preds: InLoopPredecessors, Suffix: ".loopexit", DT, LI, MSSAU, PreserveLCSSA);
92
93	if (!NewExitBB)
94	LLVM_DEBUG(
95	dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
96	<< *L << "\n");
97	else
98	LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
99	<< NewExitBB->getName() << "\n");
100	return true;
101	};
102
103	// Walk the exit blocks directly rather than building up a data structure for
104	// them, but only visit each one once.
105	SmallPtrSet<BasicBlock *, `4`> Visited;
106	for (auto *BB : L->blocks())
107	for (auto *SuccBB : successors(BB)) {
108	// We're looking for exit blocks so skip in-loop successors.
109	if (L->contains(BB: SuccBB))
110	continue;
111
112	// Visit each exit block exactly once.
113	if (!Visited.insert(Ptr: SuccBB).second)
114	continue;
115
116	Changed \|= RewriteExit (SuccBB);
117	}
118
119	return Changed;
120	}
121
122	/// Returns the instructions that use values defined in the loop.
123	SmallVector<Instruction , `8`> llvm::findDefsUsedOutsideOfLoop(Loop L) {
124	SmallVector<Instruction *, `8`> UsedOutside;
125
126	for (auto *Block : L->getBlocks())
127	// FIXME: I believe that this could use copy_if if the Inst reference could
128	// be adapted into a pointer.
129	for (auto &Inst : *Block) {
130	auto Users = Inst.users();
131	if (any_of(Range&: Users, P: [&](User *U) {
132	auto *Use = cast<Instruction>(Val: U);
133	return !L->contains(BB: Use->getParent());
134	}))
135	UsedOutside.push_back(Elt: &Inst);
136	}
137
138	return UsedOutside;
139	}
140
141	void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) {
142	// By definition, all loop passes need the LoopInfo analysis and the
143	// Dominator tree it depends on. Because they all participate in the loop
144	// pass manager, they must also preserve these.
145	AU.addRequired<DominatorTreeWrapperPass>();
146	AU.addPreserved<DominatorTreeWrapperPass>();
147	AU.addRequired<LoopInfoWrapperPass>();
148	AU.addPreserved<LoopInfoWrapperPass>();
149
150	// We must also preserve LoopSimplify and LCSSA. We locally access their IDs
151	// here because users shouldn't directly get them from this header.
152	extern char &LoopSimplifyID;
153	extern char &LCSSAID;
154	AU.addRequiredID(ID&: LoopSimplifyID);
155	AU.addPreservedID(ID&: LoopSimplifyID);
156	AU.addRequiredID(ID&: LCSSAID);
157	AU.addPreservedID(ID&: LCSSAID);
158	// This is used in the LPPassManager to perform LCSSA verification on passes
159	// which preserve lcssa form
160	AU.addRequired<LCSSAVerificationPass>();
161	AU.addPreserved<LCSSAVerificationPass>();
162
163	// Loop passes are designed to run inside of a loop pass manager which means
164	// that any function analyses they require must be required by the first loop
165	// pass in the manager (so that it is computed before the loop pass manager
166	// runs) and preserved by all loop pasess in the manager. To make this
167	// reasonably robust, the set needed for most loop passes is maintained here.
168	// If your loop pass requires an analysis not listed here, you will need to
169	// carefully audit the loop pass manager nesting structure that results.
170	AU.addRequired<AAResultsWrapperPass>();
171	AU.addPreserved<AAResultsWrapperPass>();
172	AU.addPreserved<BasicAAWrapperPass>();
173	AU.addPreserved<GlobalsAAWrapperPass>();
174	AU.addPreserved<SCEVAAWrapperPass>();
175	AU.addRequired<ScalarEvolutionWrapperPass>();
176	AU.addPreserved<ScalarEvolutionWrapperPass>();
177	// FIXME: When all loop passes preserve MemorySSA, it can be required and
178	// preserved here instead of the individual handling in each pass.
179	}
180
181	/// Manually defined generic "LoopPass" dependency initialization. This is used
182	/// to initialize the exact set of passes from above in \c
183	/// getLoopAnalysisUsage. It can be used within a loop pass's initialization
184	/// with:
185	///
186	/// INITIALIZE_PASS_DEPENDENCY(LoopPass)
187	///
188	/// As-if "LoopPass" were a pass.
189	void llvm::initializeLoopPassPass(PassRegistry &Registry) {
190	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
191	INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
192	INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
193	INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
194	INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
195	INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
196	INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
197	INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
198	INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
199	INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
200	}
201
202	/// Create MDNode for input string.
203	static MDNode createStringMetadata(Loop TheLoop, StringRef Name, unsigned V) {
204	LLVMContext &Context = TheLoop->getHeader()->getContext();
205	Metadata *MDs[] = {
206	MDString::get(Context, Str: Name),
207	ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C&: Context), V))};
208	return MDNode::get(Context, MDs);
209	}
210
211	/// Set input string into loop metadata by keeping other values intact.
212	/// If the string is already in loop metadata update value if it is
213	/// different.
214	void llvm::addStringMetadataToLoop(Loop TheLoop, const* char *StringMD,
215	unsigned V) {
216	SmallVector<Metadata *, `4`> MDs(`1`);
217	// If the loop already has metadata, retain it.
218	MDNode *LoopID = TheLoop->getLoopID();
219	if (LoopID) {
220	for (unsigned i = `1`, ie = LoopID->getNumOperands(); i < ie; ++i) {
221	MDNode *Node = cast<MDNode>(Val: LoopID->getOperand(I: i));
222	// If it is of form key = value, try to parse it.
223	if (Node->getNumOperands() == `2`) {
224	MDString *S = dyn_cast<MDString>(Val: Node->getOperand(I: `0`));
225	if (S && S->getString().equals(RHS: StringMD)) {
226	ConstantInt *IntMD =
227	mdconst::extract_or_null<ConstantInt>(MD: Node->getOperand(I: `1`));
228	if (IntMD && IntMD->getSExtValue() == V)
229	// It is already in place. Do nothing.
230	return;
231	// We need to update the value, so just skip it here and it will
232	// be added after copying other existed nodes.
233	continue;
234	}
235	}
236	MDs.push_back(Elt: Node);
237	}
238	}
239	// Add new metadata.
240	MDs.push_back(Elt: createStringMetadata(TheLoop, Name: StringMD, V));
241	// Replace current metadata node with new one.
242	LLVMContext &Context = TheLoop->getHeader()->getContext();
243	MDNode *NewLoopID = MDNode::get(Context, MDs);
244	// Set operand 0 to refer to the loop id itself.
245	NewLoopID->replaceOperandWith(I: `0`, New: NewLoopID);
246	TheLoop->setLoopID(NewLoopID);
247	}
248
249	std::optional<ElementCount>
250	llvm::getOptionalElementCountLoopAttribute(const Loop *TheLoop) {
251	std::optional<int> Width =
252	getOptionalIntLoopAttribute(TheLoop, Name: "llvm.loop.vectorize.width");
253
254	if (Width) {
255	std::optional<int> IsScalable = getOptionalIntLoopAttribute(
256	TheLoop, Name: "llvm.loop.vectorize.scalable.enable");
257	return ElementCount::get(MinVal: Width, Scalable: IsScalable.value_or(u: false*));
258	}
259
260	return std::nullopt;
261	}
262
263	std::optional<MDNode *> llvm::makeFollowupLoopID(
264	MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions,
265	const char InheritOptionsExceptPrefix, bool* AlwaysNew) {
266	if (!OrigLoopID) {
267	if (AlwaysNew)
268	return nullptr;
269	return std::nullopt;
270	}
271
272	assert(OrigLoopID->getOperand(`0`) == OrigLoopID);
273
274	bool InheritAllAttrs = !InheritOptionsExceptPrefix;
275	bool InheritSomeAttrs =
276	InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[`0`] != `'\0'`;
277	SmallVector<Metadata *, `8`> MDs;
278	MDs.push_back(Elt: nullptr);
279
280	bool Changed = false;
281	if (InheritAllAttrs \|\| InheritSomeAttrs) {
282	for (const MDOperand &Existing : drop_begin(RangeOrContainer: OrigLoopID->operands())) {
283	MDNode *Op = cast<MDNode>(Val: Existing.get());
284
285	auto InheritThisAttribute = [InheritSomeAttrs,
286	InheritOptionsExceptPrefix](MDNode *Op) {
287	if (!InheritSomeAttrs)
288	return false;
289
290	// Skip malformatted attribute metadata nodes.
291	if (Op->getNumOperands() == `0`)
292	return true;
293	Metadata *NameMD = Op->getOperand(I: `0`).get();
294	if (!isa<MDString>(Val: NameMD))
295	return true;
296	StringRef AttrName = cast<MDString>(Val: NameMD)->getString();
297
298	// Do not inherit excluded attributes.
299	return !AttrName.starts_with(Prefix: InheritOptionsExceptPrefix);
300	};
301
302	if (InheritThisAttribute (Op))
303	MDs.push_back(Elt: Op);
304	else
305	Changed = true;
306	}
307	} else {
308	// Modified if we dropped at least one attribute.
309	Changed = OrigLoopID->getNumOperands() > `1`;
310	}
311
312	bool HasAnyFollowup = false;
313	for (StringRef OptionName : FollowupOptions) {
314	MDNode *FollowupNode = findOptionMDForLoopID(LoopID: OrigLoopID, Name: OptionName);
315	if (!FollowupNode)
316	continue;
317
318	HasAnyFollowup = true;
319	for (const MDOperand &Option : drop_begin(RangeOrContainer: FollowupNode->operands())) {
320	MDs.push_back(Elt: Option.get());
321	Changed = true;
322	}
323	}
324
325	// Attributes of the followup loop not specified explicity, so signal to the
326	// transformation pass to add suitable attributes.
327	if (!AlwaysNew && !HasAnyFollowup)
328	return std::nullopt;
329
330	// If no attributes were added or remove, the previous loop Id can be reused.
331	if (!AlwaysNew && !Changed)
332	return OrigLoopID;
333
334	// No attributes is equivalent to having no !llvm.loop metadata at all.
335	if (MDs.size() == `1`)
336	return nullptr;
337
338	// Build the new loop ID.
339	MDTuple *FollowupLoopID = MDNode::get(Context&: OrigLoopID->getContext(), MDs);
340	FollowupLoopID->replaceOperandWith(I: `0`, New: FollowupLoopID);
341	return FollowupLoopID;
342	}
343
344	bool llvm::hasDisableAllTransformsHint(const Loop *L) {
345	return getBooleanLoopAttribute(TheLoop: L, Name: LLVMLoopDisableNonforced);
346	}
347
348	bool llvm::hasDisableLICMTransformsHint(const Loop *L) {
349	return getBooleanLoopAttribute(TheLoop: L, Name: LLVMLoopDisableLICM);
350	}
351
352	TransformationMode llvm::hasUnrollTransformation(const Loop *L) {
353	if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll.disable"))
354	return TM_SuppressedByUser;
355
356	std::optional<int> Count =
357	getOptionalIntLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll.count");
358	if (Count)
359	return *Count == `1` ? TM_SuppressedByUser : TM_ForcedByUser;
360
361	if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll.enable"))
362	return TM_ForcedByUser;
363
364	if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll.full"))
365	return TM_ForcedByUser;
366
367	if (hasDisableAllTransformsHint(L))
368	return TM_Disable;
369
370	return TM_Unspecified;
371	}
372
373	TransformationMode llvm::hasUnrollAndJamTransformation(const Loop *L) {
374	if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll_and_jam.disable"))
375	return TM_SuppressedByUser;
376
377	std::optional<int> Count =
378	getOptionalIntLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll_and_jam.count");
379	if (Count)
380	return *Count == `1` ? TM_SuppressedByUser : TM_ForcedByUser;
381
382	if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.unroll_and_jam.enable"))
383	return TM_ForcedByUser;
384
385	if (hasDisableAllTransformsHint(L))
386	return TM_Disable;
387
388	return TM_Unspecified;
389	}
390
391	TransformationMode llvm::hasVectorizeTransformation(const Loop *L) {
392	std::optional<bool> Enable =
393	getOptionalBoolLoopAttribute(TheLoop: L, Name: "llvm.loop.vectorize.enable");
394
395	if (Enable == false)
396	return TM_SuppressedByUser;
397
398	std::optional<ElementCount> VectorizeWidth =
399	getOptionalElementCountLoopAttribute(TheLoop: L);
400	std::optional<int> InterleaveCount =
401	getOptionalIntLoopAttribute(TheLoop: L, Name: "llvm.loop.interleave.count");
402
403	// 'Forcing' vector width and interleave count to one effectively disables
404	// this tranformation.
405	if (Enable == true && VectorizeWidth && VectorizeWidth ->isScalar() &&
406	InterleaveCount == `1`)
407	return TM_SuppressedByUser;
408
409	if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.isvectorized"))
410	return TM_Disable;
411
412	if (Enable == true)
413	return TM_ForcedByUser;
414
415	if ((VectorizeWidth && VectorizeWidth ->isScalar()) && InterleaveCount == `1`)
416	return TM_Disable;
417
418	if ((VectorizeWidth && VectorizeWidth ->isVector()) \|\| InterleaveCount > `1`)
419	return TM_Enable;
420
421	if (hasDisableAllTransformsHint(L))
422	return TM_Disable;
423
424	return TM_Unspecified;
425	}
426
427	TransformationMode llvm::hasDistributeTransformation(const Loop *L) {
428	if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.distribute.enable"))
429	return TM_ForcedByUser;
430
431	if (hasDisableAllTransformsHint(L))
432	return TM_Disable;
433
434	return TM_Unspecified;
435	}
436
437	TransformationMode llvm::hasLICMVersioningTransformation(const Loop *L) {
438	if (getBooleanLoopAttribute(TheLoop: L, Name: "llvm.loop.licm_versioning.disable"))
439	return TM_SuppressedByUser;
440
441	if (hasDisableAllTransformsHint(L))
442	return TM_Disable;
443
444	return TM_Unspecified;
445	}
446
447	/// Does a BFS from a given node to all of its children inside a given loop.
448	/// The returned vector of nodes includes the starting point.
449	SmallVector<DomTreeNode *, `16`>
450	llvm::collectChildrenInLoop(DomTreeNode N, const* Loop *CurLoop) {
451	SmallVector<DomTreeNode *, `16`> Worklist;
452	auto AddRegionToWorklist = [&](DomTreeNode *DTN) {
453	// Only include subregions in the top level loop.
454	BasicBlock *BB = DTN->getBlock();
455	if (CurLoop->contains(BB))
456	Worklist.push_back(Elt: DTN);
457	};
458
459	AddRegionToWorklist (N);
460
461	for (size_t I = `0`; I < Worklist.size(); I++) {
462	for (DomTreeNode *Child : Worklist [I]->children())
463	AddRegionToWorklist (Child);
464	}
465
466	return Worklist;
467	}
468
469	bool llvm::isAlmostDeadIV(PHINode PN, BasicBlock LatchBlock, Value *Cond) {
470	int LatchIdx = PN->getBasicBlockIndex(BB: LatchBlock);
471	assert(LatchIdx != -`1` && "LatchBlock is not a case in this PHINode");
472	Value *IncV = PN->getIncomingValue(i: LatchIdx);
473
474	for (User *U : PN->users())
475	if (U != Cond && U != IncV) return false;
476
477	for (User *U : IncV->users())
478	if (U != Cond && U != PN) return false;
479	return true;
480	}
481
482
483	void llvm::deleteDeadLoop(Loop L, DominatorTree DT, ScalarEvolution *SE,
484	LoopInfo LI, MemorySSA MSSA) {
485	assert((!DT \|\| L->isLCSSAForm(*DT)) && "Expected LCSSA!");
486	auto *Preheader = L->getLoopPreheader();
487	assert(Preheader && "Preheader should exist!");
488
489	std::unique_ptr<MemorySSAUpdater> MSSAU;
490	if (MSSA)
491	MSSAU = std::make_unique<MemorySSAUpdater>(args&: MSSA);
492
493	// Now that we know the removal is safe, remove the loop by changing the
494	// branch from the preheader to go to the single exit block.
495	//
496	// Because we're deleting a large chunk of code at once, the sequence in which
497	// we remove things is very important to avoid invalidation issues.
498
499	// Tell ScalarEvolution that the loop is deleted. Do this before
500	// deleting the loop so that ScalarEvolution can look at the loop
501	// to determine what it needs to clean up.
502	if (SE) {
503	SE->forgetLoop(L);
504	SE->forgetBlockAndLoopDispositions();
505	}
506
507	Instruction *OldTerm = Preheader->getTerminator();
508	assert(!OldTerm->mayHaveSideEffects() &&
509	"Preheader must end with a side-effect-free terminator");
510	assert(OldTerm->getNumSuccessors() == `1` &&
511	"Preheader must have a single successor");
512	// Connect the preheader to the exit block. Keep the old edge to the header
513	// around to perform the dominator tree update in two separate steps
514	// -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge
515	// preheader -> header.
516	//
517	//
518	// 0. Preheader 1. Preheader 2. Preheader
519	// \| \| \| \|
520	// V \| V \|
521	// Header <--\ \| Header <--\ \| Header <--\
522	// \| \| \| \| \| \| \| \| \| \| \|
523	// \| V \| \| \| V \| \| \| V \|
524	// \| Body --/ \| \| Body --/ \| \| Body --/
525	// V V V V V
526	// Exit Exit Exit
527	//
528	// By doing this is two separate steps we can perform the dominator tree
529	// update without using the batch update API.
530	//
531	// Even when the loop is never executed, we cannot remove the edge from the
532	// source block to the exit block. Consider the case where the unexecuted loop
533	// branches back to an outer loop. If we deleted the loop and removed the edge
534	// coming to this inner loop, this will break the outer loop structure (by
535	// deleting the backedge of the outer loop). If the outer loop is indeed a
536	// non-loop, it will be deleted in a future iteration of loop deletion pass.
537	IRBuilder<> Builder(OldTerm);
538
539	auto *ExitBlock = L->getUniqueExitBlock();
540	DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
541	if (ExitBlock) {
542	assert(ExitBlock && "Should have a unique exit block!");
543	assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
544
545	Builder.CreateCondBr(Cond: Builder.getFalse(), True: L->getHeader(), False: ExitBlock);
546	// Remove the old branch. The conditional branch becomes a new terminator.
547	OldTerm->eraseFromParent();
548
549	// Rewrite phis in the exit block to get their inputs from the Preheader
550	// instead of the exiting block.
551	for (PHINode &P : ExitBlock->phis()) {
552	// Set the zero'th element of Phi to be from the preheader and remove all
553	// other incoming values. Given the loop has dedicated exits, all other
554	// incoming values must be from the exiting blocks.
555	int PredIndex = `0`;
556	P.setIncomingBlock(i: PredIndex, BB: Preheader);
557	// Removes all incoming values from all other exiting blocks (including
558	// duplicate values from an exiting block).
559	// Nuke all entries except the zero'th entry which is the preheader entry.
560	P.removeIncomingValueIf(Predicate: [](unsigned Idx) { return Idx != `0`; },
561	/ DeletePHIIfEmpty / false);
562
563	assert((P.getNumIncomingValues() == `1` &&
564	P.getIncomingBlock(PredIndex) == Preheader) &&
565	"Should have exactly one value and that's from the preheader!");
566	}
567
568	if (DT) {
569	DTU.applyUpdates(Updates: {{DominatorTree::Insert, Preheader, ExitBlock}});
570	if (MSSA) {
571	MSSAU ->applyUpdates(Updates: {{DominatorTree::Insert, Preheader, ExitBlock}},
572	DT&: *DT);
573	if (VerifyMemorySSA)
574	MSSA->verifyMemorySSA();
575	}
576	}
577
578	// Disconnect the loop body by branching directly to its exit.
579	Builder.SetInsertPoint(Preheader->getTerminator());
580	Builder.CreateBr(Dest: ExitBlock);
581	// Remove the old branch.
582	Preheader->getTerminator()->eraseFromParent();
583	} else {
584	assert(L->hasNoExitBlocks() &&
585	"Loop should have either zero or one exit blocks.");
586
587	Builder.SetInsertPoint(OldTerm);
588	Builder.CreateUnreachable();
589	Preheader->getTerminator()->eraseFromParent();
590	}
591
592	if (DT) {
593	DTU.applyUpdates(Updates: {{DominatorTree::Delete, Preheader, L->getHeader()}});
594	if (MSSA) {
595	MSSAU ->applyUpdates(Updates: {{DominatorTree::Delete, Preheader, L->getHeader()}},
596	DT&: *DT);
597	SmallSetVector<BasicBlock *, `8`> DeadBlockSet(L->block_begin(),
598	L->block_end());
599	MSSAU ->removeBlocks(DeadBlocks: DeadBlockSet);
600	if (VerifyMemorySSA)
601	MSSA->verifyMemorySSA();
602	}
603	}
604
605	// Use a map to unique and a vector to guarantee deterministic ordering.
606	llvm::SmallDenseSet<DebugVariable, `4`> DeadDebugSet;
607	llvm::SmallVector<DbgVariableIntrinsic *, `4`> DeadDebugInst;
608	llvm::SmallVector<DbgVariableRecord *, `4`> DeadDbgVariableRecords;
609
610	if (ExitBlock) {
611	// Given LCSSA form is satisfied, we should not have users of instructions
612	// within the dead loop outside of the loop. However, LCSSA doesn't take
613	// unreachable uses into account. We handle them here.
614	// We could do it after drop all references (in this case all users in the
615	// loop will be already eliminated and we have less work to do but according
616	// to API doc of User::dropAllReferences only valid operation after dropping
617	// references, is deletion. So let's substitute all usages of
618	// instruction from the loop with poison value of corresponding type first.
619	for (auto *Block : L->blocks())
620	for (Instruction &I : *Block) {
621	auto *Poison = PoisonValue::get(T: I.getType());
622	for (Use &U : llvm::make_early_inc_range(Range: I.uses())) {
623	if (auto *Usr = dyn_cast<Instruction>(Val: U.getUser()))
624	if (L->contains(BB: Usr->getParent()))
625	continue;
626	// If we have a DT then we can check that uses outside a loop only in
627	// unreachable block.
628	if (DT)
629	assert(!DT->isReachableFromEntry(U) &&
630	"Unexpected user in reachable block");
631	U.set(Poison);
632	}
633
634	// RemoveDIs: do the same as below for DbgVariableRecords.
635	if (Block->IsNewDbgInfoFormat) {
636	for (DbgVariableRecord &DVR : llvm::make_early_inc_range(
637	Range: filterDbgVars(R: I.getDbgRecordRange()))) {
638	DebugVariable Key(DVR.getVariable(), DVR.getExpression(),
639	DVR.getDebugLoc().get());
640	if (!DeadDebugSet.insert(V: Key).second)
641	continue;
642	// Unlinks the DVR from it's container, for later insertion.
643	DVR.removeFromParent();
644	DeadDbgVariableRecords.push_back(Elt: &DVR);
645	}
646	}
647
648	// For one of each variable encountered, preserve a debug intrinsic (set
649	// to Poison) and transfer it to the loop exit. This terminates any
650	// variable locations that were set during the loop.
651	auto *DVI = dyn_cast<DbgVariableIntrinsic>(Val: &I);
652	if (!DVI)
653	continue;
654	if (!DeadDebugSet.insert(V: DebugVariable (DVI)).second)
655	continue;
656	DeadDebugInst.push_back(Elt: DVI);
657	}
658
659	// After the loop has been deleted all the values defined and modified
660	// inside the loop are going to be unavailable. Values computed in the
661	// loop will have been deleted, automatically causing their debug uses
662	// be be replaced with undef. Loop invariant values will still be available.
663	// Move dbg.values out the loop so that earlier location ranges are still
664	// terminated and loop invariant assignments are preserved.
665	DIBuilder DIB(*ExitBlock->getModule());
666	BasicBlock::iterator InsertDbgValueBefore =
667	ExitBlock->getFirstInsertionPt();
668	assert(InsertDbgValueBefore != ExitBlock->end() &&
669	"There should be a non-PHI instruction in exit block, else these "
670	"instructions will have no parent.");
671
672	for (auto *DVI : DeadDebugInst)
673	DVI->moveBefore(BB&: *ExitBlock, I: InsertDbgValueBefore);
674
675	// Due to the "head" bit in BasicBlock::iterator, we're going to insert
676	// each DbgVariableRecord right at the start of the block, wheras dbg.values
677	// would be repeatedly inserted before the first instruction. To replicate
678	// this behaviour, do it backwards.
679	for (DbgVariableRecord *DVR : llvm::reverse(C&: DeadDbgVariableRecords))
680	ExitBlock->insertDbgRecordBefore(DR: DVR, Here: InsertDbgValueBefore);
681	}
682
683	// Remove the block from the reference counting scheme, so that we can
684	// delete it freely later.
685	for (auto *Block : L->blocks())
686	Block->dropAllReferences();
687
688	if (MSSA && VerifyMemorySSA)
689	MSSA->verifyMemorySSA();
690
691	if (LI) {
692	// Erase the instructions and the blocks without having to worry
693	// about ordering because we already dropped the references.
694	// NOTE: This iteration is safe because erasing the block does not remove
695	// its entry from the loop's block list. We do that in the next section.
696	for (BasicBlock *BB : L->blocks())
697	BB->eraseFromParent();
698
699	// Finally, the blocks from loopinfo. This has to happen late because
700	// otherwise our loop iterators won't work.
701
702	SmallPtrSet<BasicBlock *, `8`> blocks;
703	blocks.insert(I: L->block_begin(), E: L->block_end());
704	for (BasicBlock *BB : blocks)
705	LI->removeBlock(BB);
706
707	// The last step is to update LoopInfo now that we've eliminated this loop.
708	// Note: LoopInfo::erase remove the given loop and relink its subloops with
709	// its parent. While removeLoop/removeChildLoop remove the given loop but
710	// not relink its subloops, which is what we want.
711	if (Loop *ParentLoop = L->getParentLoop()) {
712	Loop::iterator I = find(Range&: *ParentLoop, Val: L);
713	assert(I != ParentLoop->end() && "Couldn't find loop");
714	ParentLoop->removeChildLoop(I);
715	} else {
716	Loop::iterator I = find(Range&: *LI, Val: L);
717	assert(I != LI->end() && "Couldn't find loop");
718	LI->removeLoop(I);
719	}
720	LI->destroy(L);
721	}
722	}
723
724	void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
725	LoopInfo &LI, MemorySSA *MSSA) {
726	auto *Latch = L->getLoopLatch();
727	assert(Latch && "multiple latches not yet supported");
728	auto *Header = L->getHeader();
729	Loop *OutermostLoop = L->getOutermostLoop();
730
731	SE.forgetLoop(L);
732	SE.forgetBlockAndLoopDispositions();
733
734	std::unique_ptr<MemorySSAUpdater> MSSAU;
735	if (MSSA)
736	MSSAU = std::make_unique<MemorySSAUpdater>(args&: MSSA);
737
738	// Update the CFG and domtree. We chose to special case a couple of
739	// of common cases for code quality and test readability reasons.
740	[&]() -> void {
741	if (auto *BI = dyn_cast<BranchInst>(Val: Latch->getTerminator())) {
742	if (!BI->isConditional()) {
743	DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
744	(void)changeToUnreachable(I: BI, /PreserveLCSSA/ true, DTU: &DTU,
745	MSSAU: MSSAU.get());
746	return;
747	}
748
749	// Conditional latch/exit - note that latch can be shared by inner
750	// and outer loop so the other target doesn't need to an exit
751	if (L->isLoopExiting(BB: Latch)) {
752	// TODO: Generalize ConstantFoldTerminator so that it can be used
753	// here without invalidating LCSSA or MemorySSA. (Tricky case for
754	// LCSSA: header is an exit block of a preceeding sibling loop w/o
755	// dedicated exits.)
756	const unsigned ExitIdx = L->contains(BB: BI->getSuccessor(i: `0`)) ? `1` : `0`;
757	BasicBlock *ExitBB = BI->getSuccessor(i: ExitIdx);
758
759	DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
760	Header->removePredecessor(Pred: Latch, KeepOneInputPHIs: true);
761
762	IRBuilder<> Builder(BI);
763	auto *NewBI = Builder.CreateBr(Dest: ExitBB);
764	// Transfer the metadata to the new branch instruction (minus the
765	// loop info since this is no longer a loop)
766	NewBI->copyMetadata(SrcInst: *BI, WL: {LLVMContext::MD_dbg,
767	LLVMContext::MD_annotation});
768
769	BI->eraseFromParent();
770	DTU.applyUpdates(Updates: {{DominatorTree::Delete, Latch, Header}});
771	if (MSSA)
772	MSSAU ->applyUpdates(Updates: {{DominatorTree::Delete, Latch, Header}}, DT);
773	return;
774	}
775	}
776
777	// General case. By splitting the backedge, and then explicitly making it
778	// unreachable we gracefully handle corner cases such as switch and invoke
779	// termiantors.
780	auto *BackedgeBB = SplitEdge(From: Latch, To: Header, DT: &DT, LI: &LI, MSSAU: MSSAU.get());
781
782	DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
783	(void)changeToUnreachable(I: BackedgeBB->getTerminator(),
784	/PreserveLCSSA/ true, DTU: &DTU, MSSAU: MSSAU.get());
785	}();
786
787	// Erase (and destroy) this loop instance. Handles relinking sub-loops
788	// and blocks within the loop as needed.
789	LI.erase(L);
790
791	// If the loop we broke had a parent, then changeToUnreachable might have
792	// caused a block to be removed from the parent loop (see loop_nest_lcssa
793	// test case in zero-btc.ll for an example), thus changing the parent's
794	// exit blocks. If that happened, we need to rebuild LCSSA on the outermost
795	// loop which might have a had a block removed.
796	if (OutermostLoop != L)
797	formLCSSARecursively(L&: *OutermostLoop, DT, LI: &LI, SE: &SE);
798	}
799
800
801	/// Checks if \p L has an exiting latch branch. There may also be other
802	/// exiting blocks. Returns branch instruction terminating the loop
803	/// latch if above check is successful, nullptr otherwise.
804	static BranchInst getExpectedExitLoopLatchBranch(Loop L) {
805	BasicBlock *Latch = L->getLoopLatch();
806	if (!Latch)
807	return nullptr;
808
809	BranchInst *LatchBR = dyn_cast<BranchInst>(Val: Latch->getTerminator());
810	if (!LatchBR \|\| LatchBR->getNumSuccessors() != `2` \|\| !L->isLoopExiting(BB: Latch))
811	return nullptr;
812
813	assert((LatchBR->getSuccessor(`0`) == L->getHeader() \|\|
814	LatchBR->getSuccessor(`1`) == L->getHeader()) &&
815	"At least one edge out of the latch must go to the header");
816
817	return LatchBR;
818	}
819
820	/// Return the estimated trip count for any exiting branch which dominates
821	/// the loop latch.
822	static std::optional<uint64_t> getEstimatedTripCount(BranchInst *ExitingBranch,
823	Loop *L,
824	uint64_t &OrigExitWeight) {
825	// To estimate the number of times the loop body was executed, we want to
826	// know the number of times the backedge was taken, vs. the number of times
827	// we exited the loop.
828	uint64_t LoopWeight, ExitWeight;
829	if (!extractBranchWeights(I: *ExitingBranch, TrueVal&: LoopWeight, FalseVal&: ExitWeight))
830	return std::nullopt;
831
832	if (L->contains(BB: ExitingBranch->getSuccessor(i: `1`)))
833	std::swap(a&: LoopWeight, b&: ExitWeight);
834
835	if (!ExitWeight)
836	// Don't have a way to return predicated infinite
837	return std::nullopt;
838
839	OrigExitWeight = ExitWeight;
840
841	// Estimated exit count is a ratio of the loop weight by the weight of the
842	// edge exiting the loop, rounded to nearest.
843	uint64_t ExitCount = llvm::divideNearest(Numerator: LoopWeight, Denominator: ExitWeight);
844	// Estimated trip count is one plus estimated exit count.
845	return ExitCount + `1`;
846	}
847
848	std::optional<unsigned>
849	llvm::getLoopEstimatedTripCount(Loop *L,
850	unsigned *EstimatedLoopInvocationWeight) {
851	// Currently we take the estimate exit count only from the loop latch,
852	// ignoring other exiting blocks. This can overestimate the trip count
853	// if we exit through another exit, but can never underestimate it.
854	// TODO: incorporate information from other exits
855	if (BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L)) {
856	uint64_t ExitWeight;
857	if (std::optional<uint64_t> EstTripCount =
858	getEstimatedTripCount(ExitingBranch: LatchBranch, L, OrigExitWeight&: ExitWeight)) {
859	if (EstimatedLoopInvocationWeight)
860	*EstimatedLoopInvocationWeight = ExitWeight;
861	return *EstTripCount;
862	}
863	}
864	return std::nullopt;
865	}
866
867	bool llvm::setLoopEstimatedTripCount(Loop L, unsigned* EstimatedTripCount,
868	unsigned EstimatedloopInvocationWeight) {
869	// At the moment, we currently support changing the estimate trip count of
870	// the latch branch only. We could extend this API to manipulate estimated
871	// trip counts for any exit.
872	BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L);
873	if (!LatchBranch)
874	return false;
875
876	// Calculate taken and exit weights.
877	unsigned LatchExitWeight = `0`;
878	unsigned BackedgeTakenWeight = `0`;
879
880	if (EstimatedTripCount > `0`) {
881	LatchExitWeight = EstimatedloopInvocationWeight;
882	BackedgeTakenWeight = (EstimatedTripCount - `1`) * LatchExitWeight;
883	}
884
885	// Make a swap if back edge is taken when condition is "false".
886	if (LatchBranch->getSuccessor(i: `0`) != L->getHeader())
887	std::swap(a&: BackedgeTakenWeight, b&: LatchExitWeight);
888
889	MDBuilder MDB(LatchBranch->getContext());
890
891	// Set/Update profile metadata.
892	LatchBranch->setMetadata(
893	KindID: LLVMContext::MD_prof,
894	Node: MDB.createBranchWeights(TrueWeight: BackedgeTakenWeight, FalseWeight: LatchExitWeight));
895
896	return true;
897	}
898
899	bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
900	ScalarEvolution &SE) {
901	Loop *OuterL = InnerLoop->getParentLoop();
902	if (!OuterL)
903	return true;
904
905	// Get the backedge taken count for the inner loop
906	BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
907	const SCEV *InnerLoopBECountSC = SE.getExitCount(L: InnerLoop, ExitingBlock: InnerLoopLatch);
908	if (isa<SCEVCouldNotCompute>(Val: InnerLoopBECountSC) \|\|
909	!InnerLoopBECountSC->getType()->isIntegerTy())
910	return false;
911
912	// Get whether count is invariant to the outer loop
913	ScalarEvolution::LoopDisposition LD =
914	SE.getLoopDisposition(S: InnerLoopBECountSC, L: OuterL);
915	if (LD != ScalarEvolution::LoopInvariant)
916	return false;
917
918	return true;
919	}
920
921	unsigned llvm::getArithmeticReductionInstruction(Intrinsic::ID RdxID) {
922	switch (RdxID) {
923	case Intrinsic::vector_reduce_fadd:
924	return Instruction::FAdd;
925	case Intrinsic::vector_reduce_fmul:
926	return Instruction::FMul;
927	case Intrinsic::vector_reduce_add:
928	return Instruction::Add;
929	case Intrinsic::vector_reduce_mul:
930	return Instruction::Mul;
931	case Intrinsic::vector_reduce_and:
932	return Instruction::And;
933	case Intrinsic::vector_reduce_or:
934	return Instruction::Or;
935	case Intrinsic::vector_reduce_xor:
936	return Instruction::Xor;
937	case Intrinsic::vector_reduce_smax:
938	case Intrinsic::vector_reduce_smin:
939	case Intrinsic::vector_reduce_umax:
940	case Intrinsic::vector_reduce_umin:
941	return Instruction::ICmp;
942	case Intrinsic::vector_reduce_fmax:
943	case Intrinsic::vector_reduce_fmin:
944	return Instruction::FCmp;
945	default:
946	llvm_unreachable("Unexpected ID");
947	}
948	}
949
950	Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID) {
951	switch (RdxID) {
952	default:
953	llvm_unreachable("Unknown min/max recurrence kind");
954	case Intrinsic::vector_reduce_umin:
955	return Intrinsic::umin;
956	case Intrinsic::vector_reduce_umax:
957	return Intrinsic::umax;
958	case Intrinsic::vector_reduce_smin:
959	return Intrinsic::smin;
960	case Intrinsic::vector_reduce_smax:
961	return Intrinsic::smax;
962	case Intrinsic::vector_reduce_fmin:
963	return Intrinsic::minnum;
964	case Intrinsic::vector_reduce_fmax:
965	return Intrinsic::maxnum;
966	case Intrinsic::vector_reduce_fminimum:
967	return Intrinsic::minimum;
968	case Intrinsic::vector_reduce_fmaximum:
969	return Intrinsic::maximum;
970	}
971	}
972
973	Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(RecurKind RK) {
974	switch (RK) {
975	default:
976	llvm_unreachable("Unknown min/max recurrence kind");
977	case RecurKind::UMin:
978	return Intrinsic::umin;
979	case RecurKind::UMax:
980	return Intrinsic::umax;
981	case RecurKind::SMin:
982	return Intrinsic::smin;
983	case RecurKind::SMax:
984	return Intrinsic::smax;
985	case RecurKind::FMin:
986	return Intrinsic::minnum;
987	case RecurKind::FMax:
988	return Intrinsic::maxnum;
989	case RecurKind::FMinimum:
990	return Intrinsic::minimum;
991	case RecurKind::FMaximum:
992	return Intrinsic::maximum;
993	}
994	}
995
996	RecurKind llvm::getMinMaxReductionRecurKind(Intrinsic::ID RdxID) {
997	switch (RdxID) {
998	case Intrinsic::vector_reduce_smax:
999	return RecurKind::SMax;
1000	case Intrinsic::vector_reduce_smin:
1001	return RecurKind::SMin;
1002	case Intrinsic::vector_reduce_umax:
1003	return RecurKind::UMax;
1004	case Intrinsic::vector_reduce_umin:
1005	return RecurKind::UMin;
1006	case Intrinsic::vector_reduce_fmax:
1007	return RecurKind::FMax;
1008	case Intrinsic::vector_reduce_fmin:
1009	return RecurKind::FMin;
1010	default:
1011	return RecurKind::None;
1012	}
1013	}
1014
1015	CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
1016	switch (RK) {
1017	default:
1018	llvm_unreachable("Unknown min/max recurrence kind");
1019	case RecurKind::UMin:
1020	return CmpInst::ICMP_ULT;
1021	case RecurKind::UMax:
1022	return CmpInst::ICMP_UGT;
1023	case RecurKind::SMin:
1024	return CmpInst::ICMP_SLT;
1025	case RecurKind::SMax:
1026	return CmpInst::ICMP_SGT;
1027	case RecurKind::FMin:
1028	return CmpInst::FCMP_OLT;
1029	case RecurKind::FMax:
1030	return CmpInst::FCMP_OGT;
1031	// We do not add FMinimum/FMaximum recurrence kind here since there is no
1032	// equivalent predicate which compares signed zeroes according to the
1033	// semantics of the intrinsics (llvm.minimum/maximum).
1034	}
1035	}
1036
1037	Value llvm::createAnyOfOp(IRBuilderBase &Builder, Value StartVal,
1038	RecurKind RK, Value Left, Value Right) {
1039	if (auto VTy = dyn_cast<VectorType>(Val: Left->getType()))
1040	StartVal = Builder.CreateVectorSplat(EC: VTy->getElementCount(), V: StartVal);
1041	Value *Cmp =
1042	Builder.CreateCmp(Pred: CmpInst::ICMP_NE, LHS: Left, RHS: StartVal, Name: "rdx.select.cmp");
1043	return Builder.CreateSelect(C: Cmp, True: Left, False: Right, Name: "rdx.select");
1044	}
1045
1046	Value llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value Left,
1047	Value *Right) {
1048	Type *Ty = Left->getType();
1049	if (Ty->isIntOrIntVectorTy() \|\|
1050	(RK == RecurKind::FMinimum \|\| RK == RecurKind::FMaximum)) {
1051	// TODO: Add float minnum/maxnum support when FMF nnan is set.
1052	Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RK);
1053	return Builder.CreateIntrinsic(RetTy: Ty, ID: Id, Args: {Left, Right}, FMFSource: nullptr,
1054	Name: "rdx.minmax");
1055	}
1056	CmpInst::Predicate Pred = getMinMaxReductionPredicate(RK);
1057	Value *Cmp = Builder.CreateCmp(Pred, LHS: Left, RHS: Right, Name: "rdx.minmax.cmp");
1058	Value *Select = Builder.CreateSelect(C: Cmp, True: Left, False: Right, Name: "rdx.minmax.select");
1059	return Select;
1060	}
1061
1062	// Helper to generate an ordered reduction.
1063	Value llvm::getOrderedReduction(IRBuilderBase &Builder, Value Acc, Value *Src,
1064	unsigned Op, RecurKind RdxKind) {
1065	unsigned VF = cast<FixedVectorType>(Val: Src->getType())->getNumElements();
1066
1067	// Extract and apply reduction ops in ascending order:
1068	// e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1]
1069	Value *Result = Acc;
1070	for (unsigned ExtractIdx = `0`; ExtractIdx != VF; ++ExtractIdx) {
1071	Value *Ext =
1072	Builder.CreateExtractElement(Vec: Src, Idx: Builder.getInt32(C: ExtractIdx));
1073
1074	if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
1075	Result = Builder.CreateBinOp(Opc: (Instruction::BinaryOps)Op, LHS: Result, RHS: Ext,
1076	Name: "bin.rdx");
1077	} else {
1078	assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) &&
1079	"Invalid min/max");
1080	Result = createMinMaxOp(Builder, RK: RdxKind, Left: Result, Right: Ext);
1081	}
1082	}
1083
1084	return Result;
1085	}
1086
1087	// Helper to generate a log2 shuffle reduction.
1088	Value llvm::getShuffleReduction(IRBuilderBase &Builder, Value Src,
1089	unsigned Op, RecurKind RdxKind) {
1090	unsigned VF = cast<FixedVectorType>(Val: Src->getType())->getNumElements();
1091	// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
1092	// and vector ops, reducing the set of values being computed by half each
1093	// round.
1094	assert(isPowerOf2_32(VF) &&
1095	"Reduction emission only supported for pow2 vectors!");
1096	// Note: fast-math-flags flags are controlled by the builder configuration
1097	// and are assumed to apply to all generated arithmetic instructions. Other
1098	// poison generating flags (nsw/nuw/inbounds/inrange/exact) are not part
1099	// of the builder configuration, and since they're not passed explicitly,
1100	// will never be relevant here. Note that it would be generally unsound to
1101	// propagate these from an intrinsic call to the expansion anyways as we/
1102	// change the order of operations.
1103	Value *TmpVec = Src;
1104	SmallVector<int, `32`> ShuffleMask(VF);
1105	for (unsigned i = VF; i != `1`; i >>= `1`) {
1106	// Move the upper half of the vector to the lower half.
1107	for (unsigned j = `0`; j != i / `2`; ++j)
1108	ShuffleMask [j] = i / `2` + j;
1109
1110	// Fill the rest of the mask with undef.
1111	std::fill(first: &ShuffleMask [i / `2`], last: ShuffleMask.end(), value: -`1`);
1112
1113	Value *Shuf = Builder.CreateShuffleVector(V: TmpVec, Mask: ShuffleMask, Name: "rdx.shuf");
1114
1115	if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
1116	TmpVec = Builder.CreateBinOp(Opc: (Instruction::BinaryOps)Op, LHS: TmpVec, RHS: Shuf,
1117	Name: "bin.rdx");
1118	} else {
1119	assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) &&
1120	"Invalid min/max");
1121	TmpVec = createMinMaxOp(Builder, RK: RdxKind, Left: TmpVec, Right: Shuf);
1122	}
1123	}
1124	// The result is in the first element of the vector.
1125	return Builder.CreateExtractElement(Vec: TmpVec, Idx: Builder.getInt32(C: `0`));
1126	}
1127
1128	Value llvm::createAnyOfTargetReduction(IRBuilderBase &Builder, Value Src,
1129	const RecurrenceDescriptor &Desc,
1130	PHINode *OrigPhi) {
1131	assert(
1132	RecurrenceDescriptor::isAnyOfRecurrenceKind(Desc.getRecurrenceKind()) &&
1133	"Unexpected reduction kind");
1134	Value *InitVal = Desc.getRecurrenceStartValue();
1135	Value NewVal = nullptr*;
1136
1137	// First use the original phi to determine the new value we're trying to
1138	// select from in the loop.
1139	SelectInst SI = nullptr*;
1140	for (auto *U : OrigPhi->users()) {
1141	if ((SI = dyn_cast<SelectInst>(Val: U)))
1142	break;
1143	}
1144	assert(SI && "One user of the original phi should be a select");
1145
1146	if (SI->getTrueValue() == OrigPhi)
1147	NewVal = SI->getFalseValue();
1148	else {
1149	assert(SI->getFalseValue() == OrigPhi &&
1150	"At least one input to the select should be the original Phi");
1151	NewVal = SI->getTrueValue();
1152	}
1153
1154	// Create a splat vector with the new value and compare this to the vector
1155	// we want to reduce.
1156	ElementCount EC = cast<VectorType>(Val: Src->getType())->getElementCount();
1157	Value *Right = Builder.CreateVectorSplat(EC, V: InitVal);
1158	Value *Cmp =
1159	Builder.CreateCmp(Pred: CmpInst::ICMP_NE, LHS: Src, RHS: Right, Name: "rdx.select.cmp");
1160
1161	// If any predicate is true it means that we want to select the new value.
1162	Cmp = Builder.CreateOrReduce(Src: Cmp);
1163	return Builder.CreateSelect(C: Cmp, True: NewVal, False: InitVal, Name: "rdx.select");
1164	}
1165
1166	Value llvm::createSimpleTargetReduction(IRBuilderBase &Builder, Value Src,
1167	RecurKind RdxKind) {
1168	auto *SrcVecEltTy = cast<VectorType>(Val: Src->getType())->getElementType();
1169	switch (RdxKind) {
1170	case RecurKind::Add:
1171	return Builder.CreateAddReduce(Src);
1172	case RecurKind::Mul:
1173	return Builder.CreateMulReduce(Src);
1174	case RecurKind::And:
1175	return Builder.CreateAndReduce(Src);
1176	case RecurKind::Or:
1177	return Builder.CreateOrReduce(Src);
1178	case RecurKind::Xor:
1179	return Builder.CreateXorReduce(Src);
1180	case RecurKind::FMulAdd:
1181	case RecurKind::FAdd:
1182	return Builder.CreateFAddReduce(Acc: ConstantFP::getNegativeZero(Ty: SrcVecEltTy),
1183	Src);
1184	case RecurKind::FMul:
1185	return Builder.CreateFMulReduce(Acc: ConstantFP::get(Ty: SrcVecEltTy, V: `1.0`), Src);
1186	case RecurKind::SMax:
1187	return Builder.CreateIntMaxReduce(Src, IsSigned: true);
1188	case RecurKind::SMin:
1189	return Builder.CreateIntMinReduce(Src, IsSigned: true);
1190	case RecurKind::UMax:
1191	return Builder.CreateIntMaxReduce(Src, IsSigned: false);
1192	case RecurKind::UMin:
1193	return Builder.CreateIntMinReduce(Src, IsSigned: false);
1194	case RecurKind::FMax:
1195	return Builder.CreateFPMaxReduce(Src);
1196	case RecurKind::FMin:
1197	return Builder.CreateFPMinReduce(Src);
1198	case RecurKind::FMinimum:
1199	return Builder.CreateFPMinimumReduce(Src);
1200	case RecurKind::FMaximum:
1201	return Builder.CreateFPMaximumReduce(Src);
1202	default:
1203	llvm_unreachable("Unhandled opcode");
1204	}
1205	}
1206
1207	Value *llvm::createTargetReduction(IRBuilderBase &B,
1208	const RecurrenceDescriptor &Desc, Value *Src,
1209	PHINode *OrigPhi) {
1210	// TODO: Support in-order reductions based on the recurrence descriptor.
1211	// All ops in the reduction inherit fast-math-flags from the recurrence
1212	// descriptor.
1213	IRBuilderBase::FastMathFlagGuard FMFGuard(B);
1214	B.setFastMathFlags(Desc.getFastMathFlags());
1215
1216	RecurKind RK = Desc.getRecurrenceKind();
1217	if (RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind: RK))
1218	return createAnyOfTargetReduction(Builder&: B, Src, Desc, OrigPhi);
1219
1220	return createSimpleTargetReduction(Builder&: B, Src, RdxKind: RK);
1221	}
1222
1223	Value *llvm::createOrderedReduction(IRBuilderBase &B,
1224	const RecurrenceDescriptor &Desc,
1225	Value Src, Value Start) {
1226	assert((Desc.getRecurrenceKind() == RecurKind::FAdd \|\|
1227	Desc.getRecurrenceKind() == RecurKind::FMulAdd) &&
1228	"Unexpected reduction kind");
1229	assert(Src->getType()->isVectorTy() && "Expected a vector type");
1230	assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
1231
1232	return B.CreateFAddReduce(Acc: Start, Src);
1233	}
1234
1235	void llvm::propagateIRFlags(Value I, ArrayRef<Value > VL, Value *OpValue,
1236	bool IncludeWrapFlags) {
1237	auto *VecOp = dyn_cast<Instruction>(Val: I);
1238	if (!VecOp)
1239	return;
1240	auto Intersection = (OpValue == nullptr*) ? dyn_cast<Instruction>(Val: VL [`0`])
1241	: dyn_cast<Instruction>(Val: OpValue);
1242	if (!Intersection)
1243	return;
1244	const unsigned Opcode = Intersection->getOpcode();
1245	VecOp->copyIRFlags(V: Intersection, IncludeWrapFlags);
1246	for (auto *V : VL) {
1247	auto *Instr = dyn_cast<Instruction>(Val: V);
1248	if (!Instr)
1249	continue;
1250	if (OpValue == nullptr \|\| Opcode == Instr->getOpcode())
1251	VecOp->andIRFlags(V);
1252	}
1253	}
1254
1255	bool llvm::isKnownNegativeInLoop(const SCEV S, const* Loop *L,
1256	ScalarEvolution &SE) {
1257	const SCEV *Zero = SE.getZero(Ty: S->getType());
1258	return SE.isAvailableAtLoopEntry(S, L) &&
1259	SE.isLoopEntryGuardedByCond(L, Pred: ICmpInst::ICMP_SLT, LHS: S, RHS: Zero);
1260	}
1261
1262	bool llvm::isKnownNonNegativeInLoop(const SCEV S, const* Loop *L,
1263	ScalarEvolution &SE) {
1264	const SCEV *Zero = SE.getZero(Ty: S->getType());
1265	return SE.isAvailableAtLoopEntry(S, L) &&
1266	SE.isLoopEntryGuardedByCond(L, Pred: ICmpInst::ICMP_SGE, LHS: S, RHS: Zero);
1267	}
1268
1269	bool llvm::isKnownPositiveInLoop(const SCEV S, const* Loop *L,
1270	ScalarEvolution &SE) {
1271	const SCEV *Zero = SE.getZero(Ty: S->getType());
1272	return SE.isAvailableAtLoopEntry(S, L) &&
1273	SE.isLoopEntryGuardedByCond(L, Pred: ICmpInst::ICMP_SGT, LHS: S, RHS: Zero);
1274	}
1275
1276	bool llvm::isKnownNonPositiveInLoop(const SCEV S, const* Loop *L,
1277	ScalarEvolution &SE) {
1278	const SCEV *Zero = SE.getZero(Ty: S->getType());
1279	return SE.isAvailableAtLoopEntry(S, L) &&
1280	SE.isLoopEntryGuardedByCond(L, Pred: ICmpInst::ICMP_SLE, LHS: S, RHS: Zero);
1281	}
1282
1283	bool llvm::cannotBeMinInLoop(const SCEV S, const* Loop *L, ScalarEvolution &SE,
1284	bool Signed) {
1285	unsigned BitWidth = cast<IntegerType>(Val: S->getType())->getBitWidth();
1286	APInt Min = Signed ? APInt::getSignedMinValue(numBits: BitWidth) :
1287	APInt::getMinValue(numBits: BitWidth);
1288	auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1289	return SE.isAvailableAtLoopEntry(S, L) &&
1290	SE.isLoopEntryGuardedByCond(L, Pred: Predicate, LHS: S,
1291	RHS: SE.getConstant(Val: Min));
1292	}
1293
1294	bool llvm::cannotBeMaxInLoop(const SCEV S, const* Loop *L, ScalarEvolution &SE,
1295	bool Signed) {
1296	unsigned BitWidth = cast<IntegerType>(Val: S->getType())->getBitWidth();
1297	APInt Max = Signed ? APInt::getSignedMaxValue(numBits: BitWidth) :
1298	APInt::getMaxValue(numBits: BitWidth);
1299	auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1300	return SE.isAvailableAtLoopEntry(S, L) &&
1301	SE.isLoopEntryGuardedByCond(L, Pred: Predicate, LHS: S,
1302	RHS: SE.getConstant(Val: Max));
1303	}
1304
1305	//===----------------------------------------------------------------------===//
1306	// rewriteLoopExitValues - Optimize IV users outside the loop.
1307	// As a side effect, reduces the amount of IV processing within the loop.
1308	//===----------------------------------------------------------------------===//
1309
1310	static bool hasHardUserWithinLoop(const Loop L, const* Instruction *I) {
1311	SmallPtrSet<const Instruction *, `8`> Visited;
1312	SmallVector<const Instruction *, `8`> WorkList;
1313	Visited.insert(Ptr: I);
1314	WorkList.push_back(Elt: I);
1315	while (!WorkList.empty()) {
1316	const Instruction *Curr = WorkList.pop_back_val();
1317	// This use is outside the loop, nothing to do.
1318	if (!L->contains(Inst: Curr))
1319	continue;
1320	// Do we assume it is a "hard" use which will not be eliminated easily?
1321	if (Curr->mayHaveSideEffects())
1322	return true;
1323	// Otherwise, add all its users to worklist.
1324	for (const auto *U : Curr->users()) {
1325	auto *UI = cast<Instruction>(Val: U);
1326	if (Visited.insert(Ptr: UI).second)
1327	WorkList.push_back(Elt: UI);
1328	}
1329	}
1330	return false;
1331	}
1332
1333	// Collect information about PHI nodes which can be transformed in
1334	// rewriteLoopExitValues.
1335	struct RewritePhi {
1336	PHINode PN; // For which PHI node is this replacement?*
1337	unsigned Ith; // For which incoming value?
1338	const SCEV ExpansionSCEV; // The SCEV of the incoming value we are rewriting.*
1339	Instruction ExpansionPoint; // Where we'd like to expand that SCEV?*
1340	bool HighCost; // Is this expansion a high-cost?
1341
1342	RewritePhi(PHINode P, unsigned* I, const SCEV Val, Instruction ExpansionPt,
1343	bool H)
1344	: PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt),
1345	HighCost(H) {}
1346	};
1347
1348	// Check whether it is possible to delete the loop after rewriting exit
1349	// value. If it is possible, ignore ReplaceExitValue and do rewriting
1350	// aggressively.
1351	static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, `8`> &RewritePhiSet) {
1352	BasicBlock *Preheader = L->getLoopPreheader();
1353	// If there is no preheader, the loop will not be deleted.
1354	if (!Preheader)
1355	return false;
1356
1357	// In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
1358	// We obviate multiple ExitingBlocks case for simplicity.
1359	// TODO: If we see testcase with multiple ExitingBlocks can be deleted
1360	// after exit value rewriting, we can enhance the logic here.
1361	SmallVector<BasicBlock *, `4`> ExitingBlocks;
1362	L->getExitingBlocks(ExitingBlocks);
1363	SmallVector<BasicBlock *, `8`> ExitBlocks;
1364	L->getUniqueExitBlocks(ExitBlocks);
1365	if (ExitBlocks.size() != `1` \|\| ExitingBlocks.size() != `1`)
1366	return false;
1367
1368	BasicBlock *ExitBlock = ExitBlocks [`0`];
1369	BasicBlock::iterator BI = ExitBlock->begin();
1370	while (PHINode *P = dyn_cast<PHINode>(Val&: BI)) {
1371	Value *Incoming = P->getIncomingValueForBlock(BB: ExitingBlocks [`0`]);
1372
1373	// If the Incoming value of P is found in RewritePhiSet, we know it
1374	// could be rewritten to use a loop invariant value in transformation
1375	// phase later. Skip it in the loop invariant check below.
1376	bool found = false;
1377	for (const RewritePhi &Phi : RewritePhiSet) {
1378	unsigned i = Phi.Ith;
1379	if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
1380	found = true;
1381	break;
1382	}
1383	}
1384
1385	Instruction *I;
1386	if (!found && (I = dyn_cast<Instruction>(Val: Incoming)))
1387	if (!L->hasLoopInvariantOperands(I))
1388	return false;
1389
1390	++BI;
1391	}
1392
1393	for (auto *BB : L->blocks())
1394	if (llvm::any_of(Range&: *BB, P: [](Instruction &I) {
1395	return I.mayHaveSideEffects();
1396	}))
1397	return false;
1398
1399	return true;
1400	}
1401
1402	/// Checks if it is safe to call InductionDescriptor::isInductionPHI for \p Phi,
1403	/// and returns true if this Phi is an induction phi in the loop. When
1404	/// isInductionPHI returns true, \p ID will be also be set by isInductionPHI.
1405	static bool checkIsIndPhi(PHINode Phi, Loop L, ScalarEvolution *SE,
1406	InductionDescriptor &ID) {
1407	if (!Phi)
1408	return false;
1409	if (!L->getLoopPreheader())
1410	return false;
1411	if (Phi->getParent() != L->getHeader())
1412	return false;
1413	return InductionDescriptor::isInductionPHI(Phi, L, SE, D&: ID);
1414	}
1415
1416	int llvm::rewriteLoopExitValues(Loop L, LoopInfo LI, TargetLibraryInfo *TLI,
1417	ScalarEvolution *SE,
1418	const TargetTransformInfo *TTI,
1419	SCEVExpander &Rewriter, DominatorTree *DT,
1420	ReplaceExitVal ReplaceExitValue,
1421	SmallVector<WeakTrackingVH, `16`> &DeadInsts) {
1422	// Check a pre-condition.
1423	assert(L->isRecursivelyLCSSAForm(DT, LI) &&
1424	"Indvars did not preserve LCSSA!");
1425
1426	SmallVector<BasicBlock*, `8`> ExitBlocks;
1427	L->getUniqueExitBlocks(ExitBlocks);
1428
1429	SmallVector<RewritePhi, `8`> RewritePhiSet;
1430	// Find all values that are computed inside the loop, but used outside of it.
1431	// Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
1432	// the exit blocks of the loop to find them.
1433	for (BasicBlock *ExitBB : ExitBlocks) {
1434	// If there are no PHI nodes in this exit block, then no values defined
1435	// inside the loop are used on this path, skip it.
1436	PHINode *PN = dyn_cast<PHINode>(Val: ExitBB->begin());
1437	if (!PN) continue;
1438
1439	unsigned NumPreds = PN->getNumIncomingValues();
1440
1441	// Iterate over all of the PHI nodes.
1442	BasicBlock::iterator BBI = ExitBB->begin();
1443	while ((PN = dyn_cast<PHINode>(Val: BBI ++))) {
1444	if (PN->use_empty())
1445	continue; // dead use, don't replace it
1446
1447	if (!SE->isSCEVable(Ty: PN->getType()))
1448	continue;
1449
1450	// Iterate over all of the values in all the PHI nodes.
1451	for (unsigned i = `0`; i != NumPreds; ++i) {
1452	// If the value being merged in is not integer or is not defined
1453	// in the loop, skip it.
1454	Value *InVal = PN->getIncomingValue(i);
1455	if (!isa<Instruction>(Val: InVal))
1456	continue;
1457
1458	// If this pred is for a subloop, not L itself, skip it.
1459	if (LI->getLoopFor(BB: PN->getIncomingBlock(i)) != L)
1460	continue; // The Block is in a subloop, skip it.
1461
1462	// Check that InVal is defined in the loop.
1463	Instruction *Inst = cast<Instruction>(Val: InVal);
1464	if (!L->contains(Inst))
1465	continue;
1466
1467	// Find exit values which are induction variables in the loop, and are
1468	// unused in the loop, with the only use being the exit block PhiNode,
1469	// and the induction variable update binary operator.
1470	// The exit value can be replaced with the final value when it is cheap
1471	// to do so.
1472	if (ReplaceExitValue == UnusedIndVarInLoop) {
1473	InductionDescriptor ID;
1474	PHINode *IndPhi = dyn_cast<PHINode>(Val: Inst);
1475	if (IndPhi) {
1476	if (!checkIsIndPhi(Phi: IndPhi, L, SE, ID))
1477	continue;
1478	// This is an induction PHI. Check that the only users are PHI
1479	// nodes, and induction variable update binary operators.
1480	if (llvm::any_of(Range: Inst->users(), P: [&](User *U) {
1481	if (!isa<PHINode>(Val: U) && !isa<BinaryOperator>(Val: U))
1482	return true;
1483	BinaryOperator *B = dyn_cast<BinaryOperator>(Val: U);
1484	if (B && B != ID.getInductionBinOp())
1485	return true;
1486	return false;
1487	}))
1488	continue;
1489	} else {
1490	// If it is not an induction phi, it must be an induction update
1491	// binary operator with an induction phi user.
1492	BinaryOperator *B = dyn_cast<BinaryOperator>(Val: Inst);
1493	if (!B)
1494	continue;
1495	if (llvm::any_of(Range: Inst->users(), P: [&](User *U) {
1496	PHINode *Phi = dyn_cast<PHINode>(Val: U);
1497	if (Phi != PN && !checkIsIndPhi(Phi, L, SE, ID))
1498	return true;
1499	return false;
1500	}))
1501	continue;
1502	if (B != ID.getInductionBinOp())
1503	continue;
1504	}
1505	}
1506
1507	// Okay, this instruction has a user outside of the current loop
1508	// and varies predictably inside* the loop. Evaluate the value it*
1509	// contains when the loop exits, if possible. We prefer to start with
1510	// expressions which are true for all exits (so as to maximize
1511	// expression reuse by the SCEVExpander), but resort to per-exit
1512	// evaluation if that fails.
1513	const SCEV *ExitValue = SE->getSCEVAtScope(V: Inst, L: L->getParentLoop());
1514	if (isa<SCEVCouldNotCompute>(Val: ExitValue) \|\|
1515	!SE->isLoopInvariant(S: ExitValue, L) \|\|
1516	!Rewriter.isSafeToExpand(S: ExitValue)) {
1517	// TODO: This should probably be sunk into SCEV in some way; maybe a
1518	// getSCEVForExit(SCEV, L, ExitingBB)? It can be generalized for*
1519	// most SCEV expressions and other recurrence types (e.g. shift
1520	// recurrences). Is there existing code we can reuse?
1521	const SCEV *ExitCount = SE->getExitCount(L, ExitingBlock: PN->getIncomingBlock(i));
1522	if (isa<SCEVCouldNotCompute>(Val: ExitCount))
1523	continue;
1524	if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(Val: SE->getSCEV(V: Inst)))
1525	if (AddRec->getLoop() == L)
1526	ExitValue = AddRec->evaluateAtIteration(It: ExitCount, SE&: *SE);
1527	if (isa<SCEVCouldNotCompute>(Val: ExitValue) \|\|
1528	!SE->isLoopInvariant(S: ExitValue, L) \|\|
1529	!Rewriter.isSafeToExpand(S: ExitValue))
1530	continue;
1531	}
1532
1533	// Computing the value outside of the loop brings no benefit if it is
1534	// definitely used inside the loop in a way which can not be optimized
1535	// away. Avoid doing so unless we know we have a value which computes
1536	// the ExitValue already. TODO: This should be merged into SCEV
1537	// expander to leverage its knowledge of existing expressions.
1538	if (ReplaceExitValue != AlwaysRepl && !isa<SCEVConstant>(Val: ExitValue) &&
1539	!isa<SCEVUnknown>(Val: ExitValue) && hasHardUserWithinLoop(L, I: Inst))
1540	continue;
1541
1542	// Check if expansions of this SCEV would count as being high cost.
1543	bool HighCost = Rewriter.isHighCostExpansion(
1544	Exprs: ExitValue, L, Budget: SCEVCheapExpansionBudget, TTI, At: Inst);
1545
1546	// Note that we must not perform expansions until after
1547	// we query all* the costs, because if we perform temporary expansion*
1548	// inbetween, one that we might not intend to keep, said expansion
1549	// may* affect cost calculation of the next SCEV's we'll query,*
1550	// and next SCEV may errneously get smaller cost.
1551
1552	// Collect all the candidate PHINodes to be rewritten.
1553	Instruction *InsertPt =
1554	(isa<PHINode>(Val: Inst) \|\| isa<LandingPadInst>(Val: Inst)) ?
1555	&*Inst->getParent()->getFirstInsertionPt() : Inst;
1556	RewritePhiSet.emplace_back(Args&: PN, Args&: i, Args&: ExitValue, Args&: InsertPt, Args&: HighCost);
1557	}
1558	}
1559	}
1560
1561	// TODO: evaluate whether it is beneficial to change how we calculate
1562	// high-cost: if we have SCEV 'A' which we know we will expand, should we
1563	// calculate the cost of other SCEV's after expanding SCEV 'A', thus
1564	// potentially giving cost bonus to those other SCEV's?
1565
1566	bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
1567	int NumReplaced = `0`;
1568
1569	// Transformation.
1570	for (const RewritePhi &Phi : RewritePhiSet) {
1571	PHINode *PN = Phi.PN;
1572
1573	// Only do the rewrite when the ExitValue can be expanded cheaply.
1574	// If LoopCanBeDel is true, rewrite exit value aggressively.
1575	if ((ReplaceExitValue == OnlyCheapRepl \|\|
1576	ReplaceExitValue == UnusedIndVarInLoop) &&
1577	!LoopCanBeDel && Phi.HighCost)
1578	continue;
1579
1580	Value *ExitVal = Rewriter.expandCodeFor(
1581	SH: Phi.ExpansionSCEV, Ty: Phi.PN->getType(), I: Phi.ExpansionPoint);
1582
1583	LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " << *ExitVal
1584	<< `'\n'`
1585	<< " LoopVal = " << *(Phi.ExpansionPoint) << "\n");
1586
1587	#ifndef NDEBUG
1588	// If we reuse an instruction from a loop which is neither L nor one of
1589	// its containing loops, we end up breaking LCSSA form for this loop by
1590	// creating a new use of its instruction.
1591	if (auto *ExitInsn = dyn_cast<Instruction>(Val: ExitVal))
1592	if (auto *EVL = LI->getLoopFor(BB: ExitInsn->getParent()))
1593	if (EVL != L)
1594	assert(EVL->contains(L) && "LCSSA breach detected!");
1595	#endif
1596
1597	NumReplaced++;
1598	Instruction *Inst = cast<Instruction>(Val: PN->getIncomingValue(i: Phi.Ith));
1599	PN->setIncomingValue(i: Phi.Ith, V: ExitVal);
1600	// It's necessary to tell ScalarEvolution about this explicitly so that
1601	// it can walk the def-use list and forget all SCEVs, as it may not be
1602	// watching the PHI itself. Once the new exit value is in place, there
1603	// may not be a def-use connection between the loop and every instruction
1604	// which got a SCEVAddRecExpr for that loop.
1605	SE->forgetValue(V: PN);
1606
1607	// If this instruction is dead now, delete it. Don't do it now to avoid
1608	// invalidating iterators.
1609	if (isInstructionTriviallyDead(I: Inst, TLI))
1610	DeadInsts.push_back(Elt: Inst);
1611
1612	// Replace PN with ExitVal if that is legal and does not break LCSSA.
1613	if (PN->getNumIncomingValues() == `1` &&
1614	LI->replacementPreservesLCSSAForm(From: PN, To: ExitVal)) {
1615	PN->replaceAllUsesWith(V: ExitVal);
1616	PN->eraseFromParent();
1617	}
1618	}
1619
1620	// The insertion point instruction may have been deleted; clear it out
1621	// so that the rewriter doesn't trip over it later.
1622	Rewriter.clearInsertPoint();
1623	return NumReplaced;
1624	}
1625
1626	/// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for
1627	/// \p OrigLoop.
1628	void llvm::setProfileInfoAfterUnrolling(Loop OrigLoop, Loop UnrolledLoop,
1629	Loop *RemainderLoop, uint64_t UF) {
1630	assert(UF > `0` && "Zero unrolled factor is not supported");
1631	assert(UnrolledLoop != RemainderLoop &&
1632	"Unrolled and Remainder loops are expected to distinct");
1633
1634	// Get number of iterations in the original scalar loop.
1635	unsigned OrigLoopInvocationWeight = `0`;
1636	std::optional<unsigned> OrigAverageTripCount =
1637	getLoopEstimatedTripCount(L: OrigLoop, EstimatedLoopInvocationWeight: &OrigLoopInvocationWeight);
1638	if (!OrigAverageTripCount)
1639	return;
1640
1641	// Calculate number of iterations in unrolled loop.
1642	unsigned UnrolledAverageTripCount = *OrigAverageTripCount / UF;
1643	// Calculate number of iterations for remainder loop.
1644	unsigned RemainderAverageTripCount = *OrigAverageTripCount % UF;
1645
1646	setLoopEstimatedTripCount(L: UnrolledLoop, EstimatedTripCount: UnrolledAverageTripCount,
1647	EstimatedloopInvocationWeight: OrigLoopInvocationWeight);
1648	setLoopEstimatedTripCount(L: RemainderLoop, EstimatedTripCount: RemainderAverageTripCount,
1649	EstimatedloopInvocationWeight: OrigLoopInvocationWeight);
1650	}
1651
1652	/// Utility that implements appending of loops onto a worklist.
1653	/// Loops are added in preorder (analogous for reverse postorder for trees),
1654	/// and the worklist is processed LIFO.
1655	template <typename RangeT>
1656	void llvm::appendReversedLoopsToWorklist(
1657	RangeT &&Loops, SmallPriorityWorklist<Loop *, `4`> &Worklist) {
1658	// We use an internal worklist to build up the preorder traversal without
1659	// recursion.
1660	SmallVector<Loop *, `4`> PreOrderLoops, PreOrderWorklist;
1661
1662	// We walk the initial sequence of loops in reverse because we generally want
1663	// to visit defs before uses and the worklist is LIFO.
1664	for (Loop *RootL : Loops) {
1665	assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
1666	assert(PreOrderWorklist.empty() &&
1667	"Must start with an empty preorder walk worklist.");
1668	PreOrderWorklist.push_back(Elt: RootL);
1669	do {
1670	Loop *L = PreOrderWorklist.pop_back_val();
1671	PreOrderWorklist.append(in_start: L->begin(), in_end: L->end());
1672	PreOrderLoops.push_back(Elt: L);
1673	} while (!PreOrderWorklist.empty());
1674
1675	Worklist.insert(Input: std::move(PreOrderLoops));
1676	PreOrderLoops.clear();
1677	}
1678	}
1679
1680	template <typename RangeT>
1681	void llvm::appendLoopsToWorklist(RangeT &&Loops,
1682	SmallPriorityWorklist<Loop *, `4`> &Worklist) {
1683	appendReversedLoopsToWorklist(reverse(Loops), Worklist);
1684	}
1685
1686	template void llvm::appendLoopsToWorklist<ArrayRef<Loop *> &>(
1687	ArrayRef<Loop > &Loops, SmallPriorityWorklist<Loop , `4`> &Worklist);
1688
1689	template void
1690	llvm::appendLoopsToWorklist<Loop &>(Loop &L,
1691	SmallPriorityWorklist<Loop *, `4`> &Worklist);
1692
1693	void llvm::appendLoopsToWorklist(LoopInfo &LI,
1694	SmallPriorityWorklist<Loop *, `4`> &Worklist) {
1695	appendReversedLoopsToWorklist(Loops&: LI, Worklist);
1696	}
1697
1698	Loop llvm::cloneLoop(Loop L, Loop *PL, ValueToValueMapTy &VM,
1699	LoopInfo LI, LPPassManager LPM) {
1700	Loop &New = *LI->AllocateLoop();
1701	if (PL)
1702	PL->addChildLoop(NewChild: &New);
1703	else
1704	LI->addTopLevelLoop(New: &New);
1705
1706	if (LPM)
1707	LPM->addLoop(L&: New);
1708
1709	// Add all of the blocks in L to the new loop.
1710	for (BasicBlock *BB : L->blocks())
1711	if (LI->getLoopFor(BB) == L)
1712	New.addBasicBlockToLoop(NewBB: cast<BasicBlock>(Val&: VM [BB]), LI&: *LI);
1713
1714	// Add all of the subloops to the new loop.
1715	for (Loop I : L)
1716	cloneLoop(L: I, PL: &New, VM, LI, LPM);
1717
1718	return &New;
1719	}
1720
1721	/// IR Values for the lower and upper bounds of a pointer evolution. We
1722	/// need to use value-handles because SCEV expansion can invalidate previously
1723	/// expanded values. Thus expansion of a pointer can invalidate the bounds for
1724	/// a previous one.
1725	struct PointerBounds {
1726	TrackingVH<Value> Start;
1727	TrackingVH<Value> End;
1728	Value *StrideToCheck;
1729	};
1730
1731	/// Expand code for the lower and upper bound of the pointer group \p CG
1732	/// in \p TheLoop. \return the values for the bounds.
1733	static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG,
1734	Loop TheLoop, Instruction Loc,
1735	SCEVExpander &Exp, bool HoistRuntimeChecks) {
1736	LLVMContext &Ctx = Loc->getContext();
1737	Type *PtrArithTy = PointerType::get(C&: Ctx, AddressSpace: CG->AddressSpace);
1738
1739	Value Start = nullptr, End = nullptr;
1740	LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
1741	const SCEV Low = CG->Low, High = CG->High, Stride = nullptr*;
1742
1743	// If the Low and High values are themselves loop-variant, then we may want
1744	// to expand the range to include those covered by the outer loop as well.
1745	// There is a trade-off here with the advantage being that creating checks
1746	// using the expanded range permits the runtime memory checks to be hoisted
1747	// out of the outer loop. This reduces the cost of entering the inner loop,
1748	// which can be significant for low trip counts. The disadvantage is that
1749	// there is a chance we may now never enter the vectorized inner loop,
1750	// whereas using a restricted range check could have allowed us to enter at
1751	// least once. This is why the behaviour is not currently the default and is
1752	// controlled by the parameter 'HoistRuntimeChecks'.
1753	if (HoistRuntimeChecks && TheLoop->getParentLoop() &&
1754	isa<SCEVAddRecExpr>(Val: High) && isa<SCEVAddRecExpr>(Val: Low)) {
1755	auto *HighAR = cast<SCEVAddRecExpr>(Val: High);
1756	auto *LowAR = cast<SCEVAddRecExpr>(Val: Low);
1757	const Loop *OuterLoop = TheLoop->getParentLoop();
1758	const SCEV Recur = LowAR->getStepRecurrence(SE&: Exp.getSE());
1759	if (Recur == HighAR->getStepRecurrence(SE&: *Exp.getSE()) &&
1760	HighAR->getLoop() == OuterLoop && LowAR->getLoop() == OuterLoop) {
1761	BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1762	const SCEV *OuterExitCount =
1763	Exp.getSE()->getExitCount(L: OuterLoop, ExitingBlock: OuterLoopLatch);
1764	if (!isa<SCEVCouldNotCompute>(Val: OuterExitCount) &&
1765	OuterExitCount->getType()->isIntegerTy()) {
1766	const SCEV *NewHigh = cast<SCEVAddRecExpr>(Val: High)->evaluateAtIteration(
1767	It: OuterExitCount, SE&: *Exp.getSE());
1768	if (!isa<SCEVCouldNotCompute>(Val: NewHigh)) {
1769	LLVM_DEBUG(dbgs() << "LAA: Expanded RT check for range to include "
1770	"outer loop in order to permit hoisting\n");
1771	High = NewHigh;
1772	Low = cast<SCEVAddRecExpr>(Val: Low)->getStart();
1773	// If there is a possibility that the stride is negative then we have
1774	// to generate extra checks to ensure the stride is positive.
1775	if (!Exp.getSE()->isKnownNonNegative(S: Recur)) {
1776	Stride = Recur;
1777	LLVM_DEBUG(dbgs() << "LAA: ... but need to check stride is "
1778	"positive: "
1779	<< *Stride << `'\n'`);
1780	}
1781	}
1782	}
1783	}
1784	}
1785
1786	Start = Exp.expandCodeFor(SH: Low, Ty: PtrArithTy, I: Loc);
1787	End = Exp.expandCodeFor(SH: High, Ty: PtrArithTy, I: Loc);
1788	if (CG->NeedsFreeze) {
1789	IRBuilder<> Builder(Loc);
1790	Start = Builder.CreateFreeze(V: Start, Name: Start->getName() + ".fr");
1791	End = Builder.CreateFreeze(V: End, Name: End->getName() + ".fr");
1792	}
1793	Value *StrideVal =
1794	Stride ? Exp.expandCodeFor(SH: Stride, Ty: Stride->getType(), I: Loc) : nullptr;
1795	LLVM_DEBUG(dbgs() << "Start: " << Low << " End: " << High << "\n");
1796	return {.Start: Start, .End: End, .StrideToCheck: StrideVal};
1797	}
1798
1799	/// Turns a collection of checks into a collection of expanded upper and
1800	/// lower bounds for both pointers in the check.
1801	static SmallVector<std::pair<PointerBounds, PointerBounds>, `4`>
1802	expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L,
1803	Instruction Loc, SCEVExpander &Exp, bool* HoistRuntimeChecks) {
1804	SmallVector<std::pair<PointerBounds, PointerBounds>, `4`> ChecksWithBounds;
1805
1806	// Here we're relying on the SCEV Expander's cache to only emit code for the
1807	// same bounds once.
1808	transform(Range: PointerChecks, d_first: std::back_inserter(x&: ChecksWithBounds),
1809	F: [&](const RuntimePointerCheck &Check) {
1810	PointerBounds First = expandBounds(CG: Check.first, TheLoop: L, Loc, Exp,
1811	HoistRuntimeChecks),
1812	Second = expandBounds(CG: Check.second, TheLoop: L, Loc, Exp,
1813	HoistRuntimeChecks);
1814	return std::make_pair(x&: First, y&: Second);
1815	});
1816
1817	return ChecksWithBounds;
1818	}
1819
1820	Value *llvm::addRuntimeChecks(
1821	Instruction Loc, Loop TheLoop,
1822	const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
1823	SCEVExpander &Exp, bool HoistRuntimeChecks) {
1824	// TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible.
1825	// TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible
1826	auto ExpandedChecks =
1827	expandBounds(PointerChecks, L: TheLoop, Loc, Exp, HoistRuntimeChecks);
1828
1829	LLVMContext &Ctx = Loc->getContext();
1830	IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx,
1831	Loc->getModule()->getDataLayout());
1832	ChkBuilder.SetInsertPoint(Loc);
1833	// Our instructions might fold to a constant.
1834	Value MemoryRuntimeCheck = nullptr*;
1835
1836	for (const auto &Check : ExpandedChecks) {
1837	const PointerBounds &A = Check.first, &B = Check.second;
1838	// Check if two pointers (A and B) conflict where conflict is computed as:
1839	// start(A) <= end(B) && start(B) <= end(A)
1840
1841	assert((A.Start ->getType()->getPointerAddressSpace() ==
1842	B.End ->getType()->getPointerAddressSpace()) &&
1843	(B.Start ->getType()->getPointerAddressSpace() ==
1844	A.End ->getType()->getPointerAddressSpace()) &&
1845	"Trying to bounds check pointers with different address spaces");
1846
1847	// [A\|B].Start points to the first accessed byte under base [A\|B].
1848	// [A\|B].End points to the last accessed byte, plus one.
1849	// There is no conflict when the intervals are disjoint:
1850	// NoConflict = (B.Start >= A.End) \|\| (A.Start >= B.End)
1851	//
1852	// bound0 = (B.Start < A.End)
1853	// bound1 = (A.Start < B.End)
1854	// IsConflict = bound0 & bound1
1855	Value *Cmp0 = ChkBuilder.CreateICmpULT(LHS: A.Start, RHS: B.End, Name: "bound0");
1856	Value *Cmp1 = ChkBuilder.CreateICmpULT(LHS: B.Start, RHS: A.End, Name: "bound1");
1857	Value *IsConflict = ChkBuilder.CreateAnd(LHS: Cmp0, RHS: Cmp1, Name: "found.conflict");
1858	if (A.StrideToCheck) {
1859	Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(
1860	LHS: A.StrideToCheck, RHS: ConstantInt::get(Ty: A.StrideToCheck->getType(), V: `0`),
1861	Name: "stride.check");
1862	IsConflict = ChkBuilder.CreateOr(LHS: IsConflict, RHS: IsNegativeStride);
1863	}
1864	if (B.StrideToCheck) {
1865	Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(
1866	LHS: B.StrideToCheck, RHS: ConstantInt::get(Ty: B.StrideToCheck->getType(), V: `0`),
1867	Name: "stride.check");
1868	IsConflict = ChkBuilder.CreateOr(LHS: IsConflict, RHS: IsNegativeStride);
1869	}
1870	if (MemoryRuntimeCheck) {
1871	IsConflict =
1872	ChkBuilder.CreateOr(LHS: MemoryRuntimeCheck, RHS: IsConflict, Name: "conflict.rdx");
1873	}
1874	MemoryRuntimeCheck = IsConflict;
1875	}
1876
1877	return MemoryRuntimeCheck;
1878	}
1879
1880	Value *llvm::addDiffRuntimeChecks(
1881	Instruction *Loc, ArrayRef<PointerDiffInfo> Checks, SCEVExpander &Expander,
1882	function_ref<Value (IRBuilderBase &, unsigned)> GetVF, unsigned* IC) {
1883
1884	LLVMContext &Ctx = Loc->getContext();
1885	IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx,
1886	Loc->getModule()->getDataLayout());
1887	ChkBuilder.SetInsertPoint(Loc);
1888	// Our instructions might fold to a constant.
1889	Value MemoryRuntimeCheck = nullptr*;
1890
1891	auto &SE = *Expander.getSE();
1892	// Map to keep track of created compares, The key is the pair of operands for
1893	// the compare, to allow detecting and re-using redundant compares.
1894	DenseMap<std::pair<Value , Value >, Value *> SeenCompares;
1895	for (const auto &C : Checks) {
1896	Type *Ty = C.SinkStart->getType();
1897	// Compute VF IC * AccessSize.*
1898	auto *VFTimesUFTimesSize =
1899	ChkBuilder.CreateMul(LHS: GetVF (ChkBuilder, Ty->getScalarSizeInBits()),
1900	RHS: ConstantInt::get(Ty, V: IC * C.AccessSize));
1901	Value *Diff = Expander.expandCodeFor(
1902	SH: SE.getMinusSCEV(LHS: C.SinkStart, RHS: C.SrcStart), Ty, I: Loc);
1903
1904	// Check if the same compare has already been created earlier. In that case,
1905	// there is no need to check it again.
1906	Value *IsConflict = SeenCompares.lookup(Val: {Diff, VFTimesUFTimesSize});
1907	if (IsConflict)
1908	continue;
1909
1910	IsConflict =
1911	ChkBuilder.CreateICmpULT(LHS: Diff, RHS: VFTimesUFTimesSize, Name: "diff.check");
1912	SeenCompares.insert(KV: {{Diff, VFTimesUFTimesSize}, IsConflict});
1913	if (C.NeedsFreeze)
1914	IsConflict =
1915	ChkBuilder.CreateFreeze(V: IsConflict, Name: IsConflict->getName() + ".fr");
1916	if (MemoryRuntimeCheck) {
1917	IsConflict =
1918	ChkBuilder.CreateOr(LHS: MemoryRuntimeCheck, RHS: IsConflict, Name: "conflict.rdx");
1919	}
1920	MemoryRuntimeCheck = IsConflict;
1921	}
1922
1923	return MemoryRuntimeCheck;
1924	}
1925
1926	std::optional<IVConditionInfo>
1927	llvm::hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold,
1928	const MemorySSA &MSSA, AAResults &AA) {
1929	auto *TI = dyn_cast<BranchInst>(Val: L.getHeader()->getTerminator());
1930	if (!TI \|\| !TI->isConditional())
1931	return {};
1932
1933	auto *CondI = dyn_cast<CmpInst>(Val: TI->getCondition());
1934	// The case with the condition outside the loop should already be handled
1935	// earlier.
1936	if (!CondI \|\| !L.contains(Inst: CondI))
1937	return {};
1938
1939	SmallVector<Instruction *> InstToDuplicate;
1940	InstToDuplicate.push_back(Elt: CondI);
1941
1942	SmallVector<Value *, `4`> WorkList;
1943	WorkList.append(in_start: CondI->op_begin(), in_end: CondI->op_end());
1944
1945	SmallVector<MemoryAccess *, `4`> AccessesToCheck;
1946	SmallVector<MemoryLocation, `4`> AccessedLocs;
1947	while (!WorkList.empty()) {
1948	Instruction *I = dyn_cast<Instruction>(Val: WorkList.pop_back_val());
1949	if (!I \|\| !L.contains(Inst: I))
1950	continue;
1951
1952	// TODO: support additional instructions.
1953	if (!isa<LoadInst>(Val: I) && !isa<GetElementPtrInst>(Val: I))
1954	return {};
1955
1956	// Do not duplicate volatile and atomic loads.
1957	if (auto *LI = dyn_cast<LoadInst>(Val: I))
1958	if (LI->isVolatile() \|\| LI->isAtomic())
1959	return {};
1960
1961	InstToDuplicate.push_back(Elt: I);
1962	if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) {
1963	if (auto *MemUse = dyn_cast_or_null<MemoryUse>(Val: MA)) {
1964	// Queue the defining access to check for alias checks.
1965	AccessesToCheck.push_back(Elt: MemUse->getDefiningAccess());
1966	AccessedLocs.push_back(Elt: MemoryLocation::get(Inst: I));
1967	} else {
1968	// MemoryDefs may clobber the location or may be atomic memory
1969	// operations. Bail out.
1970	return {};
1971	}
1972	}
1973	WorkList.append(in_start: I->op_begin(), in_end: I->op_end());
1974	}
1975
1976	if (InstToDuplicate.empty())
1977	return {};
1978
1979	SmallVector<BasicBlock *, `4`> ExitingBlocks;
1980	L.getExitingBlocks(ExitingBlocks);
1981	auto HasNoClobbersOnPath =
1982	[&L, &AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate,
1983	MSSAThreshold](BasicBlock Succ, BasicBlock Header,
1984	SmallVector<MemoryAccess *, `4`> AccessesToCheck)
1985	-> std::optional<IVConditionInfo> {
1986	IVConditionInfo Info;
1987	// First, collect all blocks in the loop that are on a patch from Succ
1988	// to the header.
1989	SmallVector<BasicBlock *, `4`> WorkList;
1990	WorkList.push_back(Elt: Succ);
1991	WorkList.push_back(Elt: Header);
1992	SmallPtrSet<BasicBlock *, `4`> Seen;
1993	Seen.insert(Ptr: Header);
1994	Info.PathIsNoop &=
1995	all_of(Range&: Header, P: [](Instruction &I) { return* !I.mayHaveSideEffects(); });
1996
1997	while (!WorkList.empty()) {
1998	BasicBlock *Current = WorkList.pop_back_val();
1999	if (!L.contains(BB: Current))
2000	continue;
2001	const auto &SeenIns = Seen.insert(Ptr: Current);
2002	if (!SeenIns.second)
2003	continue;
2004
2005	Info.PathIsNoop &= all_of(
2006	Range&: Current, P: [](Instruction &I) { return* !I.mayHaveSideEffects(); });
2007	WorkList.append(in_start: succ_begin(BB: Current), in_end: succ_end(BB: Current));
2008	}
2009
2010	// Require at least 2 blocks on a path through the loop. This skips
2011	// paths that directly exit the loop.
2012	if (Seen.size() < `2`)
2013	return {};
2014
2015	// Next, check if there are any MemoryDefs that are on the path through
2016	// the loop (in the Seen set) and they may-alias any of the locations in
2017	// AccessedLocs. If that is the case, they may modify the condition and
2018	// partial unswitching is not possible.
2019	SmallPtrSet<MemoryAccess *, `4`> SeenAccesses;
2020	while (!AccessesToCheck.empty()) {
2021	MemoryAccess *Current = AccessesToCheck.pop_back_val();
2022	auto SeenI = SeenAccesses.insert(Ptr: Current);
2023	if (!SeenI.second \|\| !Seen.contains(Ptr: Current->getBlock()))
2024	continue;
2025
2026	// Bail out if exceeded the threshold.
2027	if (SeenAccesses.size() >= MSSAThreshold)
2028	return {};
2029
2030	// MemoryUse are read-only accesses.
2031	if (isa<MemoryUse>(Val: Current))
2032	continue;
2033
2034	// For a MemoryDef, check if is aliases any of the location feeding
2035	// the original condition.
2036	if (auto *CurrentDef = dyn_cast<MemoryDef>(Val: Current)) {
2037	if (any_of(Range&: AccessedLocs, P: [&AA, CurrentDef](MemoryLocation &Loc) {
2038	return isModSet(
2039	MRI: AA.getModRefInfo(I: CurrentDef->getMemoryInst(), OptLoc: Loc));
2040	}))
2041	return {};
2042	}
2043
2044	for (Use &U : Current->uses())
2045	AccessesToCheck.push_back(Elt: cast<MemoryAccess>(Val: U.getUser()));
2046	}
2047
2048	// We could also allow loops with known trip counts without mustprogress,
2049	// but ScalarEvolution may not be available.
2050	Info.PathIsNoop &= isMustProgress(L: &L);
2051
2052	// If the path is considered a no-op so far, check if it reaches a
2053	// single exit block without any phis. This ensures no values from the
2054	// loop are used outside of the loop.
2055	if (Info.PathIsNoop) {
2056	for (auto *Exiting : ExitingBlocks) {
2057	if (!Seen.contains(Ptr: Exiting))
2058	continue;
2059	for (auto *Succ : successors(BB: Exiting)) {
2060	if (L.contains(BB: Succ))
2061	continue;
2062
2063	Info.PathIsNoop &= Succ->phis().empty() &&
2064	(!Info.ExitForPath \|\| Info.ExitForPath == Succ);
2065	if (!Info.PathIsNoop)
2066	break;
2067	assert((!Info.ExitForPath \|\| Info.ExitForPath == Succ) &&
2068	"cannot have multiple exit blocks");
2069	Info.ExitForPath = Succ;
2070	}
2071	}
2072	}
2073	if (!Info.ExitForPath)
2074	Info.PathIsNoop = false;
2075
2076	Info.InstToDuplicate = InstToDuplicate;
2077	return Info;
2078	};
2079
2080	// If we branch to the same successor, partial unswitching will not be
2081	// beneficial.
2082	if (TI->getSuccessor(i: `0`) == TI->getSuccessor(i: `1`))
2083	return {};
2084
2085	if (auto Info = HasNoClobbersOnPath (TI->getSuccessor(i: `0`), L.getHeader(),
2086	AccessesToCheck)) {
2087	Info ->KnownValue = ConstantInt::getTrue(Context&: TI->getContext());
2088	return Info;
2089	}
2090	if (auto Info = HasNoClobbersOnPath (TI->getSuccessor(i: `1`), L.getHeader(),
2091	AccessesToCheck)) {
2092	Info ->KnownValue = ConstantInt::getFalse(Context&: TI->getContext());
2093	return Info;
2094	}
2095
2096	return {};
2097	}
2098

source code of llvm/lib/Transforms/Utils/LoopUtils.cpp