LoopUnrollPass.cpp source code [llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp]

1	//===- LoopUnroll.cpp - Loop unroller pass --------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This pass implements a simple loop unroller. It works best when loops have
10	// been canonicalized by the -indvars pass, allowing it to determine the trip
11	// counts of loops easily.
12	//===----------------------------------------------------------------------===//
13
14	#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
15	#include "llvm/ADT/DenseMap.h"
16	#include "llvm/ADT/DenseMapInfo.h"
17	#include "llvm/ADT/DenseSet.h"
18	#include "llvm/ADT/STLExtras.h"
19	#include "llvm/ADT/SetVector.h"
20	#include "llvm/ADT/SmallPtrSet.h"
21	#include "llvm/ADT/SmallVector.h"
22	#include "llvm/ADT/StringRef.h"
23	#include "llvm/Analysis/AssumptionCache.h"
24	#include "llvm/Analysis/BlockFrequencyInfo.h"
25	#include "llvm/Analysis/CodeMetrics.h"
26	#include "llvm/Analysis/LoopAnalysisManager.h"
27	#include "llvm/Analysis/LoopInfo.h"
28	#include "llvm/Analysis/LoopPass.h"
29	#include "llvm/Analysis/LoopUnrollAnalyzer.h"
30	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
31	#include "llvm/Analysis/ProfileSummaryInfo.h"
32	#include "llvm/Analysis/ScalarEvolution.h"
33	#include "llvm/Analysis/TargetTransformInfo.h"
34	#include "llvm/IR/BasicBlock.h"
35	#include "llvm/IR/CFG.h"
36	#include "llvm/IR/Constant.h"
37	#include "llvm/IR/Constants.h"
38	#include "llvm/IR/DiagnosticInfo.h"
39	#include "llvm/IR/Dominators.h"
40	#include "llvm/IR/Function.h"
41	#include "llvm/IR/Instruction.h"
42	#include "llvm/IR/Instructions.h"
43	#include "llvm/IR/IntrinsicInst.h"
44	#include "llvm/IR/Metadata.h"
45	#include "llvm/IR/PassManager.h"
46	#include "llvm/InitializePasses.h"
47	#include "llvm/Pass.h"
48	#include "llvm/Support/Casting.h"
49	#include "llvm/Support/CommandLine.h"
50	#include "llvm/Support/Debug.h"
51	#include "llvm/Support/ErrorHandling.h"
52	#include "llvm/Support/raw_ostream.h"
53	#include "llvm/Transforms/Scalar.h"
54	#include "llvm/Transforms/Scalar/LoopPassManager.h"
55	#include "llvm/Transforms/Utils.h"
56	#include "llvm/Transforms/Utils/LoopPeel.h"
57	#include "llvm/Transforms/Utils/LoopSimplify.h"
58	#include "llvm/Transforms/Utils/LoopUtils.h"
59	#include "llvm/Transforms/Utils/SizeOpts.h"
60	#include "llvm/Transforms/Utils/UnrollLoop.h"
61	#include <algorithm>
62	#include <cassert>
63	#include <cstdint>
64	#include <limits>
65	#include <optional>
66	#include <string>
67	#include <tuple>
68	#include <utility>
69
70	using namespace llvm;
71
72	#define DEBUG_TYPE "loop-unroll"
73
74	cl::opt<bool> llvm::ForgetSCEVInLoopUnroll(
75	"forget-scev-loop-unroll", cl::init(Val: false), cl::Hidden,
76	cl::desc ("Forget everything in SCEV when doing LoopUnroll, instead of just"
77	" the current top-most loop. This is sometimes preferred to reduce"
78	" compile time."));
79
80	static cl::opt<unsigned>
81	UnrollThreshold("unroll-threshold", cl::Hidden,
82	cl::desc ("The cost threshold for loop unrolling"));
83
84	static cl::opt<unsigned>
85	UnrollOptSizeThreshold(
86	"unroll-optsize-threshold", cl::init(Val: `0`), cl::Hidden,
87	cl::desc ("The cost threshold for loop unrolling when optimizing for "
88	"size"));
89
90	static cl::opt<unsigned> UnrollPartialThreshold(
91	"unroll-partial-threshold", cl::Hidden,
92	cl::desc ("The cost threshold for partial loop unrolling"));
93
94	static cl::opt<unsigned> UnrollMaxPercentThresholdBoost(
95	"unroll-max-percent-threshold-boost", cl::init(Val: `400`), cl::Hidden,
96	cl::desc ("The maximum 'boost' (represented as a percentage >= 100) applied "
97	"to the threshold when aggressively unrolling a loop due to the "
98	"dynamic cost savings. If completely unrolling a loop will reduce "
99	"the total runtime from X to Y, we boost the loop unroll "
100	"threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
101	"X/Y). This limit avoids excessive code bloat."));
102
103	static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze(
104	"unroll-max-iteration-count-to-analyze", cl::init(Val: `10`), cl::Hidden,
105	cl::desc ("Don't allow loop unrolling to simulate more than this number of"
106	"iterations when checking full unroll profitability"));
107
108	static cl::opt<unsigned> UnrollCount(
109	"unroll-count", cl::Hidden,
110	cl::desc ("Use this unroll count for all loops including those with "
111	"unroll_count pragma values, for testing purposes"));
112
113	static cl::opt<unsigned> UnrollMaxCount(
114	"unroll-max-count", cl::Hidden,
115	cl::desc ("Set the max unroll count for partial and runtime unrolling, for"
116	"testing purposes"));
117
118	static cl::opt<unsigned> UnrollFullMaxCount(
119	"unroll-full-max-count", cl::Hidden,
120	cl::desc (
121	"Set the max unroll count for full unrolling, for testing purposes"));
122
123	static cl::opt<bool>
124	UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
125	cl::desc ("Allows loops to be partially unrolled until "
126	"-unroll-threshold loop size is reached."));
127
128	static cl::opt<bool> UnrollAllowRemainder(
129	"unroll-allow-remainder", cl::Hidden,
130	cl::desc ("Allow generation of a loop remainder (extra iterations) "
131	"when unrolling a loop."));
132
133	static cl::opt<bool>
134	UnrollRuntime("unroll-runtime", cl::Hidden,
135	cl::desc ("Unroll loops with run-time trip counts"));
136
137	static cl::opt<unsigned> UnrollMaxUpperBound(
138	"unroll-max-upperbound", cl::init(Val: `8`), cl::Hidden,
139	cl::desc (
140	"The max of trip count upper bound that is considered in unrolling"));
141
142	static cl::opt<unsigned> PragmaUnrollThreshold(
143	"pragma-unroll-threshold", cl::init(Val: `16` * `1024`), cl::Hidden,
144	cl::desc ("Unrolled size limit for loops with an unroll(full) or "
145	"unroll_count pragma."));
146
147	static cl::opt<unsigned> FlatLoopTripCountThreshold(
148	"flat-loop-tripcount-threshold", cl::init(Val: `5`), cl::Hidden,
149	cl::desc ("If the runtime tripcount for the loop is lower than the "
150	"threshold, the loop is considered as flat and will be less "
151	"aggressively unrolled."));
152
153	static cl::opt<bool> UnrollUnrollRemainder(
154	"unroll-remainder", cl::Hidden,
155	cl::desc ("Allow the loop remainder to be unrolled."));
156
157	// This option isn't ever intended to be enabled, it serves to allow
158	// experiments to check the assumptions about when this kind of revisit is
159	// necessary.
160	static cl::opt<bool> UnrollRevisitChildLoops(
161	"unroll-revisit-child-loops", cl::Hidden,
162	cl::desc ("Enqueue and re-visit child loops in the loop PM after unrolling. "
163	"This shouldn't typically be needed as child loops (or their "
164	"clones) were already visited."));
165
166	static cl::opt<unsigned> UnrollThresholdAggressive(
167	"unroll-threshold-aggressive", cl::init(Val: `300`), cl::Hidden,
168	cl::desc ("Threshold (max size of unrolled loop) to use in aggressive (O3) "
169	"optimizations"));
170	static cl::opt<unsigned>
171	UnrollThresholdDefault("unroll-threshold-default", cl::init(Val: `150`),
172	cl::Hidden,
173	cl::desc ("Default threshold (max size of unrolled "
174	"loop), used in all but O3 optimizations"));
175
176	static cl::opt<unsigned> PragmaUnrollFullMaxIterations(
177	"pragma-unroll-full-max-iterations", cl::init(Val: `1'000'000`), cl::Hidden,
178	cl::desc ("Maximum allowed iterations to unroll under pragma unroll full."));
179
180	/// A magic value for use with the Threshold parameter to indicate
181	/// that the loop unroll should be performed regardless of how much
182	/// code expansion would result.
183	static const unsigned NoThreshold = std::numeric_limits<unsigned>::max();
184
185	/// Gather the various unrolling parameters based on the defaults, compiler
186	/// flags, TTI overrides and user specified parameters.
187	TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
188	Loop L, ScalarEvolution &SE, const* TargetTransformInfo &TTI,
189	BlockFrequencyInfo BFI, ProfileSummaryInfo PSI,
190	OptimizationRemarkEmitter &ORE, int OptLevel,
191	std::optional<unsigned> UserThreshold, std::optional<unsigned> UserCount,
192	std::optional<bool> UserAllowPartial, std::optional<bool> UserRuntime,
193	std::optional<bool> UserUpperBound,
194	std::optional<unsigned> UserFullUnrollMaxCount) {
195	TargetTransformInfo::UnrollingPreferences UP;
196
197	// Set up the defaults
198	UP.Threshold =
199	OptLevel > `2` ? UnrollThresholdAggressive : UnrollThresholdDefault;
200	UP.MaxPercentThresholdBoost = `400`;
201	UP.OptSizeThreshold = UnrollOptSizeThreshold;
202	UP.PartialThreshold = `150`;
203	UP.PartialOptSizeThreshold = UnrollOptSizeThreshold;
204	UP.Count = `0`;
205	UP.DefaultUnrollRuntimeCount = `8`;
206	UP.MaxCount = std::numeric_limits<unsigned>::max();
207	UP.MaxUpperBound = UnrollMaxUpperBound;
208	UP.FullUnrollMaxCount = std::numeric_limits<unsigned>::max();
209	UP.BEInsns = `2`;
210	UP.Partial = false;
211	UP.Runtime = false;
212	UP.AllowRemainder = true;
213	UP.UnrollRemainder = false;
214	UP.AllowExpensiveTripCount = false;
215	UP.Force = false;
216	UP.UpperBound = false;
217	UP.UnrollAndJam = false;
218	UP.UnrollAndJamInnerLoopThreshold = `60`;
219	UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze;
220
221	// Override with any target specific settings
222	TTI.getUnrollingPreferences(L, SE, UP, ORE: &ORE);
223
224	// Apply size attributes
225	bool OptForSize = L->getHeader()->getParent()->hasOptSize() \|\|
226	// Let unroll hints / pragmas take precedence over PGSO.
227	(hasUnrollTransformation(L) != TM_ForcedByUser &&
228	llvm::shouldOptimizeForSize(BB: L->getHeader(), PSI, BFI,
229	QueryType: PGSOQueryType::IRPass));
230	if (OptForSize) {
231	UP.Threshold = UP.OptSizeThreshold;
232	UP.PartialThreshold = UP.PartialOptSizeThreshold;
233	UP.MaxPercentThresholdBoost = `100`;
234	}
235
236	// Apply any user values specified by cl::opt
237	if (UnrollThreshold.getNumOccurrences() > `0`)
238	UP.Threshold = UnrollThreshold;
239	if (UnrollPartialThreshold.getNumOccurrences() > `0`)
240	UP.PartialThreshold = UnrollPartialThreshold;
241	if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > `0`)
242	UP.MaxPercentThresholdBoost = UnrollMaxPercentThresholdBoost;
243	if (UnrollMaxCount.getNumOccurrences() > `0`)
244	UP.MaxCount = UnrollMaxCount;
245	if (UnrollMaxUpperBound.getNumOccurrences() > `0`)
246	UP.MaxUpperBound = UnrollMaxUpperBound;
247	if (UnrollFullMaxCount.getNumOccurrences() > `0`)
248	UP.FullUnrollMaxCount = UnrollFullMaxCount;
249	if (UnrollAllowPartial.getNumOccurrences() > `0`)
250	UP.Partial = UnrollAllowPartial;
251	if (UnrollAllowRemainder.getNumOccurrences() > `0`)
252	UP.AllowRemainder = UnrollAllowRemainder;
253	if (UnrollRuntime.getNumOccurrences() > `0`)
254	UP.Runtime = UnrollRuntime;
255	if (UnrollMaxUpperBound == `0`)
256	UP.UpperBound = false;
257	if (UnrollUnrollRemainder.getNumOccurrences() > `0`)
258	UP.UnrollRemainder = UnrollUnrollRemainder;
259	if (UnrollMaxIterationsCountToAnalyze.getNumOccurrences() > `0`)
260	UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze;
261
262	// Apply user values provided by argument
263	if (UserThreshold) {
264	UP.Threshold = *UserThreshold;
265	UP.PartialThreshold = *UserThreshold;
266	}
267	if (UserCount)
268	UP.Count = *UserCount;
269	if (UserAllowPartial)
270	UP.Partial = *UserAllowPartial;
271	if (UserRuntime)
272	UP.Runtime = *UserRuntime;
273	if (UserUpperBound)
274	UP.UpperBound = *UserUpperBound;
275	if (UserFullUnrollMaxCount)
276	UP.FullUnrollMaxCount = *UserFullUnrollMaxCount;
277
278	return UP;
279	}
280
281	namespace {
282
283	/// A struct to densely store the state of an instruction after unrolling at
284	/// each iteration.
285	///
286	/// This is designed to work like a tuple of <Instruction , int> for the*
287	/// purposes of hashing and lookup, but to be able to associate two boolean
288	/// states with each key.
289	struct UnrolledInstState {
290	Instruction *I;
291	int Iteration : `30`;
292	unsigned IsFree : `1`;
293	unsigned IsCounted : `1`;
294	};
295
296	/// Hashing and equality testing for a set of the instruction states.
297	struct UnrolledInstStateKeyInfo {
298	using PtrInfo = DenseMapInfo<Instruction *>;
299	using PairInfo = DenseMapInfo<std::pair<Instruction , int*>>;
300
301	static inline UnrolledInstState getEmptyKey() {
302	return {.I: PtrInfo::getEmptyKey(), .Iteration: `0`, .IsFree: `0`, .IsCounted: `0`};
303	}
304
305	static inline UnrolledInstState getTombstoneKey() {
306	return {.I: PtrInfo::getTombstoneKey(), .Iteration: `0`, .IsFree: `0`, .IsCounted: `0`};
307	}
308
309	static inline unsigned getHashValue(const UnrolledInstState &S) {
310	return PairInfo::getHashValue(PairVal: {S.I, S.Iteration});
311	}
312
313	static inline bool isEqual(const UnrolledInstState &LHS,
314	const UnrolledInstState &RHS) {
315	return PairInfo::isEqual(LHS: {LHS.I, LHS.Iteration}, RHS: {RHS.I, RHS.Iteration});
316	}
317	};
318
319	struct EstimatedUnrollCost {
320	/// The estimated cost after unrolling.
321	unsigned UnrolledCost;
322
323	/// The estimated dynamic cost of executing the instructions in the
324	/// rolled form.
325	unsigned RolledDynamicCost;
326	};
327
328	struct PragmaInfo {
329	PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU)
330	: UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC),
331	PragmaEnableUnroll(PEU) {}
332	const bool UserUnrollCount;
333	const bool PragmaFullUnroll;
334	const unsigned PragmaCount;
335	const bool PragmaEnableUnroll;
336	};
337
338	} // end anonymous namespace
339
340	/// Figure out if the loop is worth full unrolling.
341	///
342	/// Complete loop unrolling can make some loads constant, and we need to know
343	/// if that would expose any further optimization opportunities. This routine
344	/// estimates this optimization. It computes cost of unrolled loop
345	/// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
346	/// dynamic cost we mean that we won't count costs of blocks that are known not
347	/// to be executed (i.e. if we have a branch in the loop and we know that at the
348	/// given iteration its condition would be resolved to true, we won't add up the
349	/// cost of the 'false'-block).
350	/// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
351	/// the analysis failed (no benefits expected from the unrolling, or the loop is
352	/// too big to analyze), the returned value is std::nullopt.
353	static std::optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
354	const Loop L, unsigned* TripCount, DominatorTree &DT, ScalarEvolution &SE,
355	const SmallPtrSetImpl<const Value *> &EphValues,
356	const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize,
357	unsigned MaxIterationsCountToAnalyze) {
358	// We want to be able to scale offsets by the trip count and add more offsets
359	// to them without checking for overflows, and we already don't want to
360	// analyze massive* trip counts, so we force the max to be reasonably small.*
361	assert(MaxIterationsCountToAnalyze <
362	(unsigned)(std::numeric_limits<int>::max() / `2`) &&
363	"The unroll iterations max is too large!");
364
365	// Only analyze inner loops. We can't properly estimate cost of nested loops
366	// and we won't visit inner loops again anyway.
367	if (!L->isInnermost())
368	return std::nullopt;
369
370	// Don't simulate loops with a big or unknown tripcount
371	if (!TripCount \|\| TripCount > MaxIterationsCountToAnalyze)
372	return std::nullopt;
373
374	SmallSetVector<BasicBlock *, `16`> BBWorklist;
375	SmallSetVector<std::pair<BasicBlock , BasicBlock >, `4`> ExitWorklist;
376	DenseMap<Value , Value > SimplifiedValues;
377	SmallVector<std::pair<Value , Value >, `4`> SimplifiedInputValues;
378
379	// The estimated cost of the unrolled form of the loop. We try to estimate
380	// this by simplifying as much as we can while computing the estimate.
381	InstructionCost UnrolledCost = `0`;
382
383	// We also track the estimated dynamic (that is, actually executed) cost in
384	// the rolled form. This helps identify cases when the savings from unrolling
385	// aren't just exposing dead control flows, but actual reduced dynamic
386	// instructions due to the simplifications which we expect to occur after
387	// unrolling.
388	InstructionCost RolledDynamicCost = `0`;
389
390	// We track the simplification of each instruction in each iteration. We use
391	// this to recursively merge costs into the unrolled cost on-demand so that
392	// we don't count the cost of any dead code. This is essentially a map from
393	// <instruction, int> to <bool, bool>, but stored as a densely packed struct.
394	DenseSet<UnrolledInstState, UnrolledInstStateKeyInfo> InstCostMap;
395
396	// A small worklist used to accumulate cost of instructions from each
397	// observable and reached root in the loop.
398	SmallVector<Instruction *, `16`> CostWorklist;
399
400	// PHI-used worklist used between iterations while accumulating cost.
401	SmallVector<Instruction *, `4`> PHIUsedList;
402
403	// Helper function to accumulate cost for instructions in the loop.
404	auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
405	assert(Iteration >= `0` && "Cannot have a negative iteration!");
406	assert(CostWorklist.empty() && "Must start with an empty cost list");
407	assert(PHIUsedList.empty() && "Must start with an empty phi used list");
408	CostWorklist.push_back(Elt: &RootI);
409	TargetTransformInfo::TargetCostKind CostKind =
410	RootI.getFunction()->hasMinSize() ?
411	TargetTransformInfo::TCK_CodeSize :
412	TargetTransformInfo::TCK_SizeAndLatency;
413	for (;; --Iteration) {
414	do {
415	Instruction *I = CostWorklist.pop_back_val();
416
417	// InstCostMap only uses I and Iteration as a key, the other two values
418	// don't matter here.
419	auto CostIter = InstCostMap.find(V: {.I: I, .Iteration: Iteration, .IsFree: `0`, .IsCounted: `0`});
420	if (CostIter == InstCostMap.end())
421	// If an input to a PHI node comes from a dead path through the loop
422	// we may have no cost data for it here. What that actually means is
423	// that it is free.
424	continue;
425	auto &Cost = *CostIter;
426	if (Cost.IsCounted)
427	// Already counted this instruction.
428	continue;
429
430	// Mark that we are counting the cost of this instruction now.
431	Cost.IsCounted = true;
432
433	// If this is a PHI node in the loop header, just add it to the PHI set.
434	if (auto *PhiI = dyn_cast<PHINode>(Val: I))
435	if (PhiI->getParent() == L->getHeader()) {
436	assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
437	"inherently simplify during unrolling.");
438	if (Iteration == `0`)
439	continue;
440
441	// Push the incoming value from the backedge into the PHI used list
442	// if it is an in-loop instruction. We'll use this to populate the
443	// cost worklist for the next iteration (as we count backwards).
444	if (auto *OpI = dyn_cast<Instruction>(
445	Val: PhiI->getIncomingValueForBlock(BB: L->getLoopLatch())))
446	if (L->contains(Inst: OpI))
447	PHIUsedList.push_back(Elt: OpI);
448	continue;
449	}
450
451	// First accumulate the cost of this instruction.
452	if (!Cost.IsFree) {
453	// Consider simplified operands in instruction cost.
454	SmallVector<Value *, `4`> Operands;
455	transform(Range: I->operands(), d_first: std::back_inserter(x&: Operands),
456	F: [&](Value *Op) {
457	if (auto Res = SimplifiedValues.lookup(Val: Op))
458	return Res;
459	return Op;
460	});
461	UnrolledCost += TTI.getInstructionCost(U: I, Operands, CostKind);
462	LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
463	<< Iteration << "): ");
464	LLVM_DEBUG(I->dump());
465	}
466
467	// We must count the cost of every operand which is not free,
468	// recursively. If we reach a loop PHI node, simply add it to the set
469	// to be considered on the next iteration (backwards!).
470	for (Value *Op : I->operands()) {
471	// Check whether this operand is free due to being a constant or
472	// outside the loop.
473	auto *OpI = dyn_cast<Instruction>(Val: Op);
474	if (!OpI \|\| !L->contains(Inst: OpI))
475	continue;
476
477	// Otherwise accumulate its cost.
478	CostWorklist.push_back(Elt: OpI);
479	}
480	} while (!CostWorklist.empty());
481
482	if (PHIUsedList.empty())
483	// We've exhausted the search.
484	break;
485
486	assert(Iteration > `0` &&
487	"Cannot track PHI-used values past the first iteration!");
488	CostWorklist.append(in_start: PHIUsedList.begin(), in_end: PHIUsedList.end());
489	PHIUsedList.clear();
490	}
491	};
492
493	// Ensure that we don't violate the loop structure invariants relied on by
494	// this analysis.
495	assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
496	assert(L->isLCSSAForm(DT) &&
497	"Must have loops in LCSSA form to track live-out values.");
498
499	LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
500
501	TargetTransformInfo::TargetCostKind CostKind =
502	L->getHeader()->getParent()->hasMinSize() ?
503	TargetTransformInfo::TCK_CodeSize : TargetTransformInfo::TCK_SizeAndLatency;
504	// Simulate execution of each iteration of the loop counting instructions,
505	// which would be simplified.
506	// Since the same load will take different values on different iterations,
507	// we literally have to go through all loop's iterations.
508	for (unsigned Iteration = `0`; Iteration < TripCount; ++Iteration) {
509	LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
510
511	// Prepare for the iteration by collecting any simplified entry or backedge
512	// inputs.
513	for (Instruction &I : *L->getHeader()) {
514	auto *PHI = dyn_cast<PHINode>(Val: &I);
515	if (!PHI)
516	break;
517
518	// The loop header PHI nodes must have exactly two input: one from the
519	// loop preheader and one from the loop latch.
520	assert(
521	PHI->getNumIncomingValues() == `2` &&
522	"Must have an incoming value only for the preheader and the latch.");
523
524	Value *V = PHI->getIncomingValueForBlock(
525	BB: Iteration == `0` ? L->getLoopPreheader() : L->getLoopLatch());
526	if (Iteration != `0` && SimplifiedValues.count(Val: V))
527	V = SimplifiedValues.lookup(Val: V);
528	SimplifiedInputValues.push_back(Elt: {PHI, V});
529	}
530
531	// Now clear and re-populate the map for the next iteration.
532	SimplifiedValues.clear();
533	while (!SimplifiedInputValues.empty())
534	SimplifiedValues.insert(KV: SimplifiedInputValues.pop_back_val());
535
536	UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
537
538	BBWorklist.clear();
539	BBWorklist.insert(X: L->getHeader());
540	// Note that we must not* cache the size, this loop grows the worklist.*
541	for (unsigned Idx = `0`; Idx != BBWorklist.size(); ++Idx) {
542	BasicBlock *BB = BBWorklist [Idx];
543
544	// Visit all instructions in the given basic block and try to simplify
545	// it. We don't change the actual IR, just count optimization
546	// opportunities.
547	for (Instruction &I : *BB) {
548	// These won't get into the final code - don't even try calculating the
549	// cost for them.
550	if (isa<DbgInfoIntrinsic>(Val: I) \|\| EphValues.count(Ptr: &I))
551	continue;
552
553	// Track this instruction's expected baseline cost when executing the
554	// rolled loop form.
555	RolledDynamicCost += TTI.getInstructionCost(U: &I, CostKind);
556
557	// Visit the instruction to analyze its loop cost after unrolling,
558	// and if the visitor returns true, mark the instruction as free after
559	// unrolling and continue.
560	bool IsFree = Analyzer.visit(I);
561	bool Inserted = InstCostMap.insert(V: {.I: &I, .Iteration: (int)Iteration,
562	.IsFree: (unsigned)IsFree,
563	/IsCounted/ false}).second;
564	(void)Inserted;
565	assert(Inserted && "Cannot have a state for an unvisited instruction!");
566
567	if (IsFree)
568	continue;
569
570	// Can't properly model a cost of a call.
571	// FIXME: With a proper cost model we should be able to do it.
572	if (auto *CI = dyn_cast<CallInst>(Val: &I)) {
573	const Function *Callee = CI->getCalledFunction();
574	if (!Callee \|\| TTI.isLoweredToCall(F: Callee)) {
575	LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n");
576	return std::nullopt;
577	}
578	}
579
580	// If the instruction might have a side-effect recursively account for
581	// the cost of it and all the instructions leading up to it.
582	if (I.mayHaveSideEffects())
583	AddCostRecursively (I, Iteration);
584
585	// If unrolled body turns out to be too big, bail out.
586	if (UnrolledCost > MaxUnrolledLoopSize) {
587	LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
588	<< " UnrolledCost: " << UnrolledCost
589	<< ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
590	<< "\n");
591	return std::nullopt;
592	}
593	}
594
595	Instruction *TI = BB->getTerminator();
596
597	auto getSimplifiedConstant = [&](Value V) -> Constant {
598	if (SimplifiedValues.count(Val: V))
599	V = SimplifiedValues.lookup(Val: V);
600	return dyn_cast<Constant>(Val: V);
601	};
602
603	// Add in the live successors by first checking whether we have terminator
604	// that may be simplified based on the values simplified by this call.
605	BasicBlock KnownSucc = nullptr*;
606	if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI)) {
607	if (BI->isConditional()) {
608	if (auto *SimpleCond = getSimplifiedConstant (BI->getCondition())) {
609	// Just take the first successor if condition is undef
610	if (isa<UndefValue>(Val: SimpleCond))
611	KnownSucc = BI->getSuccessor(i: `0`);
612	else if (ConstantInt *SimpleCondVal =
613	dyn_cast<ConstantInt>(Val: SimpleCond))
614	KnownSucc = BI->getSuccessor(i: SimpleCondVal->isZero() ? `1` : `0`);
615	}
616	}
617	} else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
618	if (auto *SimpleCond = getSimplifiedConstant (SI->getCondition())) {
619	// Just take the first successor if condition is undef
620	if (isa<UndefValue>(Val: SimpleCond))
621	KnownSucc = SI->getSuccessor(idx: `0`);
622	else if (ConstantInt *SimpleCondVal =
623	dyn_cast<ConstantInt>(Val: SimpleCond))
624	KnownSucc = SI->findCaseValue(C: SimpleCondVal)->getCaseSuccessor();
625	}
626	}
627	if (KnownSucc) {
628	if (L->contains(BB: KnownSucc))
629	BBWorklist.insert(X: KnownSucc);
630	else
631	ExitWorklist.insert(X: {BB, KnownSucc});
632	continue;
633	}
634
635	// Add BB's successors to the worklist.
636	for (BasicBlock *Succ : successors(BB))
637	if (L->contains(BB: Succ))
638	BBWorklist.insert(X: Succ);
639	else
640	ExitWorklist.insert(X: {BB, Succ});
641	AddCostRecursively (*TI, Iteration);
642	}
643
644	// If we found no optimization opportunities on the first iteration, we
645	// won't find them on later ones too.
646	if (UnrolledCost == RolledDynamicCost) {
647	LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n"
648	<< " UnrolledCost: " << UnrolledCost << "\n");
649	return std::nullopt;
650	}
651	}
652
653	while (!ExitWorklist.empty()) {
654	BasicBlock ExitingBB, ExitBB;
655	std::tie(args&: ExitingBB, args&: ExitBB) = ExitWorklist.pop_back_val();
656
657	for (Instruction &I : *ExitBB) {
658	auto *PN = dyn_cast<PHINode>(Val: &I);
659	if (!PN)
660	break;
661
662	Value *Op = PN->getIncomingValueForBlock(BB: ExitingBB);
663	if (auto *OpI = dyn_cast<Instruction>(Val: Op))
664	if (L->contains(Inst: OpI))
665	AddCostRecursively (*OpI, TripCount - `1`);
666	}
667	}
668
669	assert(UnrolledCost.isValid() && RolledDynamicCost.isValid() &&
670	"All instructions must have a valid cost, whether the "
671	"loop is rolled or unrolled.");
672
673	LLVM_DEBUG(dbgs() << "Analysis finished:\n"
674	<< "UnrolledCost: " << UnrolledCost << ", "
675	<< "RolledDynamicCost: " << RolledDynamicCost << "\n");
676	return {{.UnrolledCost: unsigned(*UnrolledCost.getValue()),
677	.RolledDynamicCost: unsigned(*RolledDynamicCost.getValue())}};
678	}
679
680	UnrollCostEstimator::UnrollCostEstimator(
681	const Loop L, const* TargetTransformInfo &TTI,
682	const SmallPtrSetImpl<const Value > &EphValues, unsigned* BEInsns) {
683	CodeMetrics Metrics;
684	for (BasicBlock *BB : L->blocks())
685	Metrics.analyzeBasicBlock(BB, TTI, EphValues);
686	NumInlineCandidates = Metrics.NumInlineCandidates;
687	NotDuplicatable = Metrics.notDuplicatable;
688	Convergent = Metrics.convergent;
689	LoopSize = Metrics.NumInsts;
690
691	// Don't allow an estimate of size zero. This would allows unrolling of loops
692	// with huge iteration counts, which is a compile time problem even if it's
693	// not a problem for code quality. Also, the code using this size may assume
694	// that each loop has at least three instructions (likely a conditional
695	// branch, a comparison feeding that branch, and some kind of loop increment
696	// feeding that comparison instruction).
697	if (LoopSize.isValid() && LoopSize < BEInsns + `1`)
698	// This is an open coded max() on InstructionCost
699	LoopSize = BEInsns + `1`;
700	}
701
702	uint64_t UnrollCostEstimator::getUnrolledLoopSize(
703	const TargetTransformInfo::UnrollingPreferences &UP,
704	unsigned CountOverwrite) const {
705	unsigned LS = *LoopSize.getValue();
706	assert(LS >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
707	if (CountOverwrite)
708	return static_cast<uint64_t>(LS - UP.BEInsns) * CountOverwrite + UP.BEInsns;
709	else
710	return static_cast<uint64_t>(LS - UP.BEInsns) * UP.Count + UP.BEInsns;
711	}
712
713	// Returns the loop hint metadata node with the given name (for example,
714	// "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
715	// returned.
716	static MDNode getUnrollMetadataForLoop(const* Loop *L, StringRef Name) {
717	if (MDNode *LoopID = L->getLoopID())
718	return GetUnrollMetadata(LoopID, Name);
719	return nullptr;
720	}
721
722	// Returns true if the loop has an unroll(full) pragma.
723	static bool hasUnrollFullPragma(const Loop *L) {
724	return getUnrollMetadataForLoop(L, Name: "llvm.loop.unroll.full");
725	}
726
727	// Returns true if the loop has an unroll(enable) pragma. This metadata is used
728	// for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
729	static bool hasUnrollEnablePragma(const Loop *L) {
730	return getUnrollMetadataForLoop(L, Name: "llvm.loop.unroll.enable");
731	}
732
733	// Returns true if the loop has an runtime unroll(disable) pragma.
734	static bool hasRuntimeUnrollDisablePragma(const Loop *L) {
735	return getUnrollMetadataForLoop(L, Name: "llvm.loop.unroll.runtime.disable");
736	}
737
738	// If loop has an unroll_count pragma return the (necessarily
739	// positive) value from the pragma. Otherwise return 0.
740	static unsigned unrollCountPragmaValue(const Loop *L) {
741	MDNode *MD = getUnrollMetadataForLoop(L, Name: "llvm.loop.unroll.count");
742	if (MD) {
743	assert(MD->getNumOperands() == `2` &&
744	"Unroll count hint metadata should have two operands.");
745	unsigned Count =
746	mdconst::extract<ConstantInt>(MD: MD->getOperand(I: `1`))->getZExtValue();
747	assert(Count >= `1` && "Unroll count must be positive.");
748	return Count;
749	}
750	return `0`;
751	}
752
753	// Computes the boosting factor for complete unrolling.
754	// If fully unrolling the loop would save a lot of RolledDynamicCost, it would
755	// be beneficial to fully unroll the loop even if unrolledcost is large. We
756	// use (RolledDynamicCost / UnrolledCost) to model the unroll benefits to adjust
757	// the unroll threshold.
758	static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
759	unsigned MaxPercentThresholdBoost) {
760	if (Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / `100`)
761	return `100`;
762	else if (Cost.UnrolledCost != `0`)
763	// The boosting factor is RolledDynamicCost / UnrolledCost
764	return std::min(a: `100` * Cost.RolledDynamicCost / Cost.UnrolledCost,
765	b: MaxPercentThresholdBoost);
766	else
767	return MaxPercentThresholdBoost;
768	}
769
770	static std::optional<unsigned>
771	shouldPragmaUnroll(Loop L, const* PragmaInfo &PInfo,
772	const unsigned TripMultiple, const unsigned TripCount,
773	unsigned MaxTripCount, const UnrollCostEstimator UCE,
774	const TargetTransformInfo::UnrollingPreferences &UP) {
775
776	// Using unroll pragma
777	// 1st priority is unroll count set by "unroll-count" option.
778
779	if (PInfo.UserUnrollCount) {
780	if (UP.AllowRemainder &&
781	UCE.getUnrolledLoopSize(UP, CountOverwrite: (unsigned)UnrollCount) < UP.Threshold)
782	return (unsigned)UnrollCount;
783	}
784
785	// 2nd priority is unroll count set by pragma.
786	if (PInfo.PragmaCount > `0`) {
787	if ((UP.AllowRemainder \|\| (TripMultiple % PInfo.PragmaCount == `0`)))
788	return PInfo.PragmaCount;
789	}
790
791	if (PInfo.PragmaFullUnroll && TripCount != `0`) {
792	// Certain cases with UBSAN can cause trip count to be calculated as
793	// INT_MAX, Block full unrolling at a reasonable limit so that the compiler
794	// doesn't hang trying to unroll the loop. See PR77842
795	if (TripCount > PragmaUnrollFullMaxIterations) {
796	LLVM_DEBUG(dbgs() << "Won't unroll; trip count is too large\n");
797	return std::nullopt;
798	}
799
800	return TripCount;
801	}
802
803	if (PInfo.PragmaEnableUnroll && !TripCount && MaxTripCount &&
804	MaxTripCount <= UP.MaxUpperBound)
805	return MaxTripCount;
806
807	// if didn't return until here, should continue to other priorties
808	return std::nullopt;
809	}
810
811	static std::optional<unsigned> shouldFullUnroll(
812	Loop L, const* TargetTransformInfo &TTI, DominatorTree &DT,
813	ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
814	const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
815	const TargetTransformInfo::UnrollingPreferences &UP) {
816	assert(FullUnrollTripCount && "should be non-zero!");
817
818	if (FullUnrollTripCount > UP.FullUnrollMaxCount)
819	return std::nullopt;
820
821	// When computing the unrolled size, note that BEInsns are not replicated
822	// like the rest of the loop body.
823	if (UCE.getUnrolledLoopSize(UP) < UP.Threshold)
824	return FullUnrollTripCount;
825
826	// The loop isn't that small, but we still can fully unroll it if that
827	// helps to remove a significant number of instructions.
828	// To check that, run additional analysis on the loop.
829	if (std::optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
830	L, TripCount: FullUnrollTripCount, DT, SE, EphValues, TTI,
831	MaxUnrolledLoopSize: UP.Threshold * UP.MaxPercentThresholdBoost / `100`,
832	MaxIterationsCountToAnalyze: UP.MaxIterationsCountToAnalyze)) {
833	unsigned Boost =
834	getFullUnrollBoostingFactor(Cost: *Cost, MaxPercentThresholdBoost: UP.MaxPercentThresholdBoost);
835	if (Cost ->UnrolledCost < UP.Threshold * Boost / `100`)
836	return FullUnrollTripCount;
837	}
838	return std::nullopt;
839	}
840
841	static std::optional<unsigned>
842	shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
843	const UnrollCostEstimator UCE,
844	const TargetTransformInfo::UnrollingPreferences &UP) {
845
846	if (!TripCount)
847	return std::nullopt;
848
849	if (!UP.Partial) {
850	LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
851	<< "-unroll-allow-partial not given\n");
852	return `0`;
853	}
854	unsigned count = UP.Count;
855	if (count == `0`)
856	count = TripCount;
857	if (UP.PartialThreshold != NoThreshold) {
858	// Reduce unroll count to be modulo of TripCount for partial unrolling.
859	if (UCE.getUnrolledLoopSize(UP, CountOverwrite: count) > UP.PartialThreshold)
860	count = (std::max(a: UP.PartialThreshold, b: UP.BEInsns + `1`) - UP.BEInsns) /
861	(LoopSize - UP.BEInsns);
862	if (count > UP.MaxCount)
863	count = UP.MaxCount;
864	while (count != `0` && TripCount % count != `0`)
865	count--;
866	if (UP.AllowRemainder && count <= `1`) {
867	// If there is no Count that is modulo of TripCount, set Count to
868	// largest power-of-two factor that satisfies the threshold limit.
869	// As we'll create fixup loop, do the type of unrolling only if
870	// remainder loop is allowed.
871	count = UP.DefaultUnrollRuntimeCount;
872	while (count != `0` &&
873	UCE.getUnrolledLoopSize(UP, CountOverwrite: count) > UP.PartialThreshold)
874	count >>= `1`;
875	}
876	if (count < `2`) {
877	count = `0`;
878	}
879	} else {
880	count = TripCount;
881	}
882	if (count > UP.MaxCount)
883	count = UP.MaxCount;
884
885	LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n");
886
887	return count;
888	}
889	// Returns true if unroll count was set explicitly.
890	// Calculates unroll count and writes it to UP.Count.
891	// Unless IgnoreUser is true, will also use metadata and command-line options
892	// that are specific to to the LoopUnroll pass (which, for instance, are
893	// irrelevant for the LoopUnrollAndJam pass).
894	// FIXME: This function is used by LoopUnroll and LoopUnrollAndJam, but consumes
895	// many LoopUnroll-specific options. The shared functionality should be
896	// refactored into it own function.
897	bool llvm::computeUnrollCount(
898	Loop L, const* TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
899	AssumptionCache *AC, ScalarEvolution &SE,
900	const SmallPtrSetImpl<const Value *> &EphValues,
901	OptimizationRemarkEmitter ORE, unsigned* TripCount, unsigned MaxTripCount,
902	bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE,
903	TargetTransformInfo::UnrollingPreferences &UP,
904	TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
905
906	unsigned LoopSize = UCE.getRolledLoopSize();
907
908	const bool UserUnrollCount = UnrollCount.getNumOccurrences() > `0`;
909	const bool PragmaFullUnroll = hasUnrollFullPragma(L);
910	const unsigned PragmaCount = unrollCountPragmaValue(L);
911	const bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
912
913	const bool ExplicitUnroll = PragmaCount > `0` \|\| PragmaFullUnroll \|\|
914	PragmaEnableUnroll \|\| UserUnrollCount;
915
916	PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount,
917	PragmaEnableUnroll);
918	// Use an explicit peel count that has been specified for testing. In this
919	// case it's not permitted to also specify an explicit unroll count.
920	if (PP.PeelCount) {
921	if (UnrollCount.getNumOccurrences() > `0`) {
922	report_fatal_error(reason: "Cannot specify both explicit peel count and "
923	"explicit unroll count", /GenCrashDiag=/gen_crash_diag: false);
924	}
925	UP.Count = `1`;
926	UP.Runtime = false;
927	return true;
928	}
929	// Check for explicit Count.
930	// 1st priority is unroll count set by "unroll-count" option.
931	// 2nd priority is unroll count set by pragma.
932	if (auto UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount,
933	MaxTripCount, UCE, UP)) {
934	UP.Count = *UnrollFactor;
935
936	if (UserUnrollCount \|\| (PragmaCount > `0`)) {
937	UP.AllowExpensiveTripCount = true;
938	UP.Force = true;
939	}
940	UP.Runtime \|= (PragmaCount > `0`);
941	return ExplicitUnroll;
942	} else {
943	if (ExplicitUnroll && TripCount != `0`) {
944	// If the loop has an unrolling pragma, we want to be more aggressive with
945	// unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
946	// value which is larger than the default limits.
947	UP.Threshold = std::max<unsigned>(a: UP.Threshold, b: PragmaUnrollThreshold);
948	UP.PartialThreshold =
949	std::max<unsigned>(a: UP.PartialThreshold, b: PragmaUnrollThreshold);
950	}
951	}
952
953	// 3rd priority is exact full unrolling. This will eliminate all copies
954	// of some exit test.
955	UP.Count = `0`;
956	if (TripCount) {
957	UP.Count = TripCount;
958	if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
959	FullUnrollTripCount: TripCount, UCE, UP)) {
960	UP.Count = *UnrollFactor;
961	UseUpperBound = false;
962	return ExplicitUnroll;
963	}
964	}
965
966	// 4th priority is bounded unrolling.
967	// We can unroll by the upper bound amount if it's generally allowed or if
968	// we know that the loop is executed either the upper bound or zero times.
969	// (MaxOrZero unrolling keeps only the first loop test, so the number of
970	// loop tests remains the same compared to the non-unrolled version, whereas
971	// the generic upper bound unrolling keeps all but the last loop test so the
972	// number of loop tests goes up which may end up being worse on targets with
973	// constrained branch predictor resources so is controlled by an option.)
974	// In addition we only unroll small upper bounds.
975	// Note that the cost of bounded unrolling is always strictly greater than
976	// cost of exact full unrolling. As such, if we have an exact count and
977	// found it unprofitable, we'll never chose to bounded unroll.
978	if (!TripCount && MaxTripCount && (UP.UpperBound \|\| MaxOrZero) &&
979	MaxTripCount <= UP.MaxUpperBound) {
980	UP.Count = MaxTripCount;
981	if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
982	FullUnrollTripCount: MaxTripCount, UCE, UP)) {
983	UP.Count = *UnrollFactor;
984	UseUpperBound = true;
985	return ExplicitUnroll;
986	}
987	}
988
989	// 5th priority is loop peeling.
990	computePeelCount(L, LoopSize, PP, TripCount, DT, SE, AC, Threshold: UP.Threshold);
991	if (PP.PeelCount) {
992	UP.Runtime = false;
993	UP.Count = `1`;
994	return ExplicitUnroll;
995	}
996
997	// Before starting partial unrolling, set up.partial to true,
998	// if user explicitly asked for unrolling
999	if (TripCount)
1000	UP.Partial \|= ExplicitUnroll;
1001
1002	// 6th priority is partial unrolling.
1003	// Try partial unroll only when TripCount could be statically calculated.
1004	if (auto UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP)) {
1005	UP.Count = *UnrollFactor;
1006
1007	if ((PragmaFullUnroll \|\| PragmaEnableUnroll) && TripCount &&
1008	UP.Count != TripCount)
1009	ORE->emit(RemarkBuilder: [&]() {
1010	return OptimizationRemarkMissed (DEBUG_TYPE,
1011	"FullUnrollAsDirectedTooLarge",
1012	L->getStartLoc(), L->getHeader())
1013	<< "Unable to fully unroll loop as directed by unroll pragma "
1014	"because "
1015	"unrolled size is too large.";
1016	});
1017
1018	if (UP.PartialThreshold != NoThreshold) {
1019	if (UP.Count == `0`) {
1020	if (PragmaEnableUnroll)
1021	ORE->emit(RemarkBuilder: [&]() {
1022	return OptimizationRemarkMissed (DEBUG_TYPE,
1023	"UnrollAsDirectedTooLarge",
1024	L->getStartLoc(), L->getHeader())
1025	<< "Unable to unroll loop as directed by unroll(enable) "
1026	"pragma "
1027	"because unrolled size is too large.";
1028	});
1029	}
1030	}
1031	return ExplicitUnroll;
1032	}
1033	assert(TripCount == `0` &&
1034	"All cases when TripCount is constant should be covered here.");
1035	if (PragmaFullUnroll)
1036	ORE->emit(RemarkBuilder: [&]() {
1037	return OptimizationRemarkMissed (
1038	DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount",
1039	L->getStartLoc(), L->getHeader())
1040	<< "Unable to fully unroll loop as directed by unroll(full) "
1041	"pragma "
1042	"because loop has a runtime trip count.";
1043	});
1044
1045	// 7th priority is runtime unrolling.
1046	// Don't unroll a runtime trip count loop when it is disabled.
1047	if (hasRuntimeUnrollDisablePragma(L)) {
1048	UP.Count = `0`;
1049	return false;
1050	}
1051
1052	// Don't unroll a small upper bound loop unless user or TTI asked to do so.
1053	if (MaxTripCount && !UP.Force && MaxTripCount < UP.MaxUpperBound) {
1054	UP.Count = `0`;
1055	return false;
1056	}
1057
1058	// Check if the runtime trip count is too small when profile is available.
1059	if (L->getHeader()->getParent()->hasProfileData()) {
1060	if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
1061	if (*ProfileTripCount < FlatLoopTripCountThreshold)
1062	return false;
1063	else
1064	UP.AllowExpensiveTripCount = true;
1065	}
1066	}
1067	UP.Runtime \|= PragmaEnableUnroll \|\| PragmaCount > `0` \|\| UserUnrollCount;
1068	if (!UP.Runtime) {
1069	LLVM_DEBUG(
1070	dbgs() << " will not try to unroll loop with runtime trip count "
1071	<< "-unroll-runtime not given\n");
1072	UP.Count = `0`;
1073	return false;
1074	}
1075	if (UP.Count == `0`)
1076	UP.Count = UP.DefaultUnrollRuntimeCount;
1077
1078	// Reduce unroll count to be the largest power-of-two factor of
1079	// the original count which satisfies the threshold limit.
1080	while (UP.Count != `0` &&
1081	UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
1082	UP.Count >>= `1`;
1083
1084	#ifndef NDEBUG
1085	unsigned OrigCount = UP.Count;
1086	#endif
1087
1088	if (!UP.AllowRemainder && UP.Count != `0` && (TripMultiple % UP.Count) != `0`) {
1089	while (UP.Count != `0` && TripMultiple % UP.Count != `0`)
1090	UP.Count >>= `1`;
1091	LLVM_DEBUG(
1092	dbgs() << "Remainder loop is restricted (that could architecture "
1093	"specific or because the loop contains a convergent "
1094	"instruction), so unroll count must divide the trip "
1095	"multiple, "
1096	<< TripMultiple << ". Reducing unroll count from " << OrigCount
1097	<< " to " << UP.Count << ".\n");
1098
1099	using namespace ore;
1100
1101	if (unrollCountPragmaValue(L) > `0` && !UP.AllowRemainder)
1102	ORE->emit(RemarkBuilder: [&]() {
1103	return OptimizationRemarkMissed (DEBUG_TYPE,
1104	"DifferentUnrollCountFromDirected",
1105	L->getStartLoc(), L->getHeader())
1106	<< "Unable to unroll loop the number of times directed by "
1107	"unroll_count pragma because remainder loop is restricted "
1108	"(that could architecture specific or because the loop "
1109	"contains a convergent instruction) and so must have an "
1110	"unroll "
1111	"count that divides the loop trip multiple of "
1112	<< NV ("TripMultiple", TripMultiple) << ". Unrolling instead "
1113	<< NV ("UnrollCount", UP.Count) << " time(s).";
1114	});
1115	}
1116
1117	if (UP.Count > UP.MaxCount)
1118	UP.Count = UP.MaxCount;
1119
1120	if (MaxTripCount && UP.Count > MaxTripCount)
1121	UP.Count = MaxTripCount;
1122
1123	LLVM_DEBUG(dbgs() << " runtime unrolling with count: " << UP.Count
1124	<< "\n");
1125	if (UP.Count < `2`)
1126	UP.Count = `0`;
1127	return ExplicitUnroll;
1128	}
1129
1130	static LoopUnrollResult
1131	tryToUnrollLoop(Loop L, DominatorTree &DT, LoopInfo LI, ScalarEvolution &SE,
1132	const TargetTransformInfo &TTI, AssumptionCache &AC,
1133	OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
1134	ProfileSummaryInfo PSI, bool* PreserveLCSSA, int OptLevel,
1135	bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV,
1136	std::optional<unsigned> ProvidedCount,
1137	std::optional<unsigned> ProvidedThreshold,
1138	std::optional<bool> ProvidedAllowPartial,
1139	std::optional<bool> ProvidedRuntime,
1140	std::optional<bool> ProvidedUpperBound,
1141	std::optional<bool> ProvidedAllowPeeling,
1142	std::optional<bool> ProvidedAllowProfileBasedPeeling,
1143	std::optional<unsigned> ProvidedFullUnrollMaxCount) {
1144
1145	LLVM_DEBUG(dbgs() << "Loop Unroll: F["
1146	<< L->getHeader()->getParent()->getName() << "] Loop %"
1147	<< L->getHeader()->getName() << "\n");
1148	TransformationMode TM = hasUnrollTransformation(L);
1149	if (TM & TM_Disable)
1150	return LoopUnrollResult::Unmodified;
1151
1152	// If this loop isn't forced to be unrolled, avoid unrolling it when the
1153	// parent loop has an explicit unroll-and-jam pragma. This is to prevent
1154	// automatic unrolling from interfering with the user requested
1155	// transformation.
1156	Loop *ParentL = L->getParentLoop();
1157	if (ParentL != nullptr &&
1158	hasUnrollAndJamTransformation(L: ParentL) == TM_ForcedByUser &&
1159	hasUnrollTransformation(L) != TM_ForcedByUser) {
1160	LLVM_DEBUG(dbgs() << "Not unrolling loop since parent loop has"
1161	<< " llvm.loop.unroll_and_jam.\n");
1162	return LoopUnrollResult::Unmodified;
1163	}
1164
1165	// If this loop isn't forced to be unrolled, avoid unrolling it when the
1166	// loop has an explicit unroll-and-jam pragma. This is to prevent automatic
1167	// unrolling from interfering with the user requested transformation.
1168	if (hasUnrollAndJamTransformation(L) == TM_ForcedByUser &&
1169	hasUnrollTransformation(L) != TM_ForcedByUser) {
1170	LLVM_DEBUG(
1171	dbgs()
1172	<< " Not unrolling loop since it has llvm.loop.unroll_and_jam.\n");
1173	return LoopUnrollResult::Unmodified;
1174	}
1175
1176	if (!L->isLoopSimplifyForm()) {
1177	LLVM_DEBUG(
1178	dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");
1179	return LoopUnrollResult::Unmodified;
1180	}
1181
1182	// When automatic unrolling is disabled, do not unroll unless overridden for
1183	// this loop.
1184	if (OnlyWhenForced && !(TM & TM_Enable))
1185	return LoopUnrollResult::Unmodified;
1186
1187	bool OptForSize = L->getHeader()->getParent()->hasOptSize();
1188	TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
1189	L, SE, TTI, BFI, PSI, ORE, OptLevel, UserThreshold: ProvidedThreshold, UserCount: ProvidedCount,
1190	UserAllowPartial: ProvidedAllowPartial, UserRuntime: ProvidedRuntime, UserUpperBound: ProvidedUpperBound,
1191	UserFullUnrollMaxCount: ProvidedFullUnrollMaxCount);
1192	TargetTransformInfo::PeelingPreferences PP = gatherPeelingPreferences(
1193	L, SE, TTI, UserAllowPeeling: ProvidedAllowPeeling, UserAllowProfileBasedPeeling: ProvidedAllowProfileBasedPeeling, UnrollingSpecficValues: true);
1194
1195	// Exit early if unrolling is disabled. For OptForSize, we pick the loop size
1196	// as threshold later on.
1197	if (UP.Threshold == `0` && (!UP.Partial \|\| UP.PartialThreshold == `0`) &&
1198	!OptForSize)
1199	return LoopUnrollResult::Unmodified;
1200
1201	SmallPtrSet<const Value *, `32`> EphValues;
1202	CodeMetrics::collectEphemeralValues(L, AC: &AC, EphValues);
1203
1204	UnrollCostEstimator UCE(L, TTI, EphValues, UP.BEInsns);
1205	if (!UCE.canUnroll()) {
1206	LLVM_DEBUG(dbgs() << " Not unrolling loop which contains instructions"
1207	<< " which cannot be duplicated or have invalid cost.\n");
1208	return LoopUnrollResult::Unmodified;
1209	}
1210
1211	unsigned LoopSize = UCE.getRolledLoopSize();
1212	LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
1213
1214	// When optimizing for size, use LoopSize + 1 as threshold (we use < Threshold
1215	// later), to (fully) unroll loops, if it does not increase code size.
1216	if (OptForSize)
1217	UP.Threshold = std::max(a: UP.Threshold, b: LoopSize + `1`);
1218
1219	if (UCE.NumInlineCandidates != `0`) {
1220	LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
1221	return LoopUnrollResult::Unmodified;
1222	}
1223
1224	// Find the smallest exact trip count for any exit. This is an upper bound
1225	// on the loop trip count, but an exit at an earlier iteration is still
1226	// possible. An unroll by the smallest exact trip count guarantees that all
1227	// branches relating to at least one exit can be eliminated. This is unlike
1228	// the max trip count, which only guarantees that the backedge can be broken.
1229	unsigned TripCount = `0`;
1230	unsigned TripMultiple = `1`;
1231	SmallVector<BasicBlock *, `8`> ExitingBlocks;
1232	L->getExitingBlocks(ExitingBlocks);
1233	for (BasicBlock *ExitingBlock : ExitingBlocks)
1234	if (unsigned TC = SE.getSmallConstantTripCount(L, ExitingBlock))
1235	if (!TripCount \|\| TC < TripCount)
1236	TripCount = TripMultiple = TC;
1237
1238	if (!TripCount) {
1239	// If no exact trip count is known, determine the trip multiple of either
1240	// the loop latch or the single exiting block.
1241	// TODO: Relax for multiple exits.
1242	BasicBlock *ExitingBlock = L->getLoopLatch();
1243	if (!ExitingBlock \|\| !L->isLoopExiting(BB: ExitingBlock))
1244	ExitingBlock = L->getExitingBlock();
1245	if (ExitingBlock)
1246	TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock);
1247	}
1248
1249	// If the loop contains a convergent operation, the prelude we'd add
1250	// to do the first few instructions before we hit the unrolled loop
1251	// is unsafe -- it adds a control-flow dependency to the convergent
1252	// operation. Therefore restrict remainder loop (try unrolling without).
1253	//
1254	// TODO: This is quite conservative. In practice, convergent_op()
1255	// is likely to be called unconditionally in the loop. In this
1256	// case, the program would be ill-formed (on most architectures)
1257	// unless n were the same on all threads in a thread group.
1258	// Assuming n is the same on all threads, any kind of unrolling is
1259	// safe. But currently llvm's notion of convergence isn't powerful
1260	// enough to express this.
1261	if (UCE.Convergent)
1262	UP.AllowRemainder = false;
1263
1264	// Try to find the trip count upper bound if we cannot find the exact trip
1265	// count.
1266	unsigned MaxTripCount = `0`;
1267	bool MaxOrZero = false;
1268	if (!TripCount) {
1269	MaxTripCount = SE.getSmallConstantMaxTripCount(L);
1270	MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L);
1271	}
1272
1273	// computeUnrollCount() decides whether it is beneficial to use upper bound to
1274	// fully unroll the loop.
1275	bool UseUpperBound = false;
1276	bool IsCountSetExplicitly = computeUnrollCount(
1277	L, TTI, DT, LI, AC: &AC, SE, EphValues, ORE: &ORE, TripCount, MaxTripCount,
1278	MaxOrZero, TripMultiple, UCE, UP, PP, UseUpperBound);
1279	if (!UP.Count)
1280	return LoopUnrollResult::Unmodified;
1281
1282	if (PP.PeelCount) {
1283	assert(UP.Count == `1` && "Cannot perform peel and unroll in the same step");
1284	LLVM_DEBUG(dbgs() << "PEELING loop %" << L->getHeader()->getName()
1285	<< " with iteration count " << PP.PeelCount << "!\n");
1286	ORE.emit(RemarkBuilder: [&]() {
1287	return OptimizationRemark (DEBUG_TYPE, "Peeled", L->getStartLoc(),
1288	L->getHeader())
1289	<< " peeled loop by " << ore::NV ("PeelCount", PP.PeelCount)
1290	<< " iterations";
1291	});
1292
1293	ValueToValueMapTy VMap;
1294	if (peelLoop(L, PeelCount: PP.PeelCount, LI, SE: &SE, DT, AC: &AC, PreserveLCSSA, VMap)) {
1295	simplifyLoopAfterUnroll(L, SimplifyIVs: true, LI, SE: &SE, DT: &DT, AC: &AC, TTI: &TTI);
1296	// If the loop was peeled, we already "used up" the profile information
1297	// we had, so we don't want to unroll or peel again.
1298	if (PP.PeelProfiledIterations)
1299	L->setLoopAlreadyUnrolled();
1300	return LoopUnrollResult::PartiallyUnrolled;
1301	}
1302	return LoopUnrollResult::Unmodified;
1303	}
1304
1305	// Do not attempt partial/runtime unrolling in FullLoopUnrolling
1306	if (OnlyFullUnroll && (UP.Count < TripCount \|\| UP.Count < MaxTripCount)) {
1307	LLVM_DEBUG(
1308	dbgs() << "Not attempting partial/runtime unroll in FullLoopUnroll.\n");
1309	return LoopUnrollResult::Unmodified;
1310	}
1311
1312	// At this point, UP.Runtime indicates that run-time unrolling is allowed.
1313	// However, we only want to actually perform it if we don't know the trip
1314	// count and the unroll count doesn't divide the known trip multiple.
1315	// TODO: This decision should probably be pushed up into
1316	// computeUnrollCount().
1317	UP.Runtime &= TripCount == `0` && TripMultiple % UP.Count != `0`;
1318
1319	// Save loop properties before it is transformed.
1320	MDNode *OrigLoopID = L->getLoopID();
1321
1322	// Unroll the loop.
1323	Loop RemainderLoop = nullptr*;
1324	LoopUnrollResult UnrollResult = UnrollLoop(
1325	L,
1326	ULO: {.Count: UP.Count, .Force: UP.Force, .Runtime: UP.Runtime, .AllowExpensiveTripCount: UP.AllowExpensiveTripCount,
1327	.UnrollRemainder: UP.UnrollRemainder, .ForgetAllSCEV: ForgetAllSCEV},
1328	LI, SE: &SE, DT: &DT, AC: &AC, TTI: &TTI, ORE: &ORE, PreserveLCSSA, RemainderLoop: &RemainderLoop);
1329	if (UnrollResult == LoopUnrollResult::Unmodified)
1330	return LoopUnrollResult::Unmodified;
1331
1332	if (RemainderLoop) {
1333	std::optional<MDNode *> RemainderLoopID =
1334	makeFollowupLoopID(OrigLoopID, FollowupAttrs: {LLVMLoopUnrollFollowupAll,
1335	LLVMLoopUnrollFollowupRemainder});
1336	if (RemainderLoopID)
1337	RemainderLoop->setLoopID(*RemainderLoopID);
1338	}
1339
1340	if (UnrollResult != LoopUnrollResult::FullyUnrolled) {
1341	std::optional<MDNode *> NewLoopID =
1342	makeFollowupLoopID(OrigLoopID, FollowupAttrs: {LLVMLoopUnrollFollowupAll,
1343	LLVMLoopUnrollFollowupUnrolled});
1344	if (NewLoopID) {
1345	L->setLoopID(*NewLoopID);
1346
1347	// Do not setLoopAlreadyUnrolled if loop attributes have been specified
1348	// explicitly.
1349	return UnrollResult;
1350	}
1351	}
1352
1353	// If loop has an unroll count pragma or unrolled by explicitly set count
1354	// mark loop as unrolled to prevent unrolling beyond that requested.
1355	if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly)
1356	L->setLoopAlreadyUnrolled();
1357
1358	return UnrollResult;
1359	}
1360
1361	namespace {
1362
1363	class LoopUnroll : public LoopPass {
1364	public:
1365	static char ID; // Pass ID, replacement for typeid
1366
1367	int OptLevel;
1368
1369	/// If false, use a cost model to determine whether unrolling of a loop is
1370	/// profitable. If true, only loops that explicitly request unrolling via
1371	/// metadata are considered. All other loops are skipped.
1372	bool OnlyWhenForced;
1373
1374	/// If false, when SCEV is invalidated, only forget everything in the
1375	/// top-most loop (call forgetTopMostLoop), of the loop being processed.
1376	/// Otherwise, forgetAllLoops and rebuild when needed next.
1377	bool ForgetAllSCEV;
1378
1379	std::optional<unsigned> ProvidedCount;
1380	std::optional<unsigned> ProvidedThreshold;
1381	std::optional<bool> ProvidedAllowPartial;
1382	std::optional<bool> ProvidedRuntime;
1383	std::optional<bool> ProvidedUpperBound;
1384	std::optional<bool> ProvidedAllowPeeling;
1385	std::optional<bool> ProvidedAllowProfileBasedPeeling;
1386	std::optional<unsigned> ProvidedFullUnrollMaxCount;
1387
1388	LoopUnroll(int OptLevel = `2`, bool OnlyWhenForced = false,
1389	bool ForgetAllSCEV = false,
1390	std::optional<unsigned> Threshold = std::nullopt,
1391	std::optional<unsigned> Count = std::nullopt,
1392	std::optional<bool> AllowPartial = std::nullopt,
1393	std::optional<bool> Runtime = std::nullopt,
1394	std::optional<bool> UpperBound = std::nullopt,
1395	std::optional<bool> AllowPeeling = std::nullopt,
1396	std::optional<bool> AllowProfileBasedPeeling = std::nullopt,
1397	std::optional<unsigned> ProvidedFullUnrollMaxCount = std::nullopt)
1398	: LoopPass (ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
1399	ForgetAllSCEV(ForgetAllSCEV), ProvidedCount (std::move(Count)),
1400	ProvidedThreshold (Threshold), ProvidedAllowPartial (AllowPartial),
1401	ProvidedRuntime (Runtime), ProvidedUpperBound (UpperBound),
1402	ProvidedAllowPeeling (AllowPeeling),
1403	ProvidedAllowProfileBasedPeeling (AllowProfileBasedPeeling),
1404	ProvidedFullUnrollMaxCount (ProvidedFullUnrollMaxCount) {
1405	initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
1406	}
1407
1408	bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1409	if (skipLoop(L))
1410	return false;
1411
1412	Function &F = *L->getHeader()->getParent();
1413
1414	auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1415	LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1416	ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1417	const TargetTransformInfo &TTI =
1418	getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1419	auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1420	// For the old PM, we can't use OptimizationRemarkEmitter as an analysis
1421	// pass. Function analyses need to be preserved across loop transformations
1422	// but ORE cannot be preserved (see comment before the pass definition).
1423	OptimizationRemarkEmitter ORE(&F);
1424	bool PreserveLCSSA = mustPreserveAnalysisID(AID&: LCSSAID);
1425
1426	LoopUnrollResult Result = tryToUnrollLoop(
1427	L, DT, LI, SE, TTI, AC, ORE, BFI: nullptr, PSI: nullptr, PreserveLCSSA, OptLevel,
1428	/OnlyFullUnroll/ false, OnlyWhenForced, ForgetAllSCEV, ProvidedCount,
1429	ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
1430	ProvidedUpperBound, ProvidedAllowPeeling,
1431	ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount);
1432
1433	if (Result == LoopUnrollResult::FullyUnrolled)
1434	LPM.markLoopAsDeleted(L&: *L);
1435
1436	return Result != LoopUnrollResult::Unmodified;
1437	}
1438
1439	/// This transformation requires natural loop information & requires that
1440	/// loop preheaders be inserted into the CFG...
1441	void getAnalysisUsage(AnalysisUsage &AU) const override {
1442	AU.addRequired<AssumptionCacheTracker>();
1443	AU.addRequired<TargetTransformInfoWrapperPass>();
1444	// FIXME: Loop passes are required to preserve domtree, and for now we just
1445	// recreate dom info if anything gets unrolled.
1446	getLoopAnalysisUsage(AU);
1447	}
1448	};
1449
1450	} // end anonymous namespace
1451
1452	char LoopUnroll::ID = `0`;
1453
1454	INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1455	INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1456	INITIALIZE_PASS_DEPENDENCY(LoopPass)
1457	INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1458	INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1459
1460	Pass llvm::createLoopUnrollPass(int* OptLevel, bool OnlyWhenForced,
1461	bool ForgetAllSCEV, int Threshold, int Count,
1462	int AllowPartial, int Runtime, int UpperBound,
1463	int AllowPeeling) {
1464	// TODO: It would make more sense for this function to take the optionals
1465	// directly, but that's dangerous since it would silently break out of tree
1466	// callers.
1467	return new LoopUnroll (
1468	OptLevel, OnlyWhenForced, ForgetAllSCEV,
1469	Threshold == -`1` ? std::nullopt : std::optional<unsigned>(Threshold),
1470	Count == -`1` ? std::nullopt : std::optional<unsigned>(Count),
1471	AllowPartial == -`1` ? std::nullopt : std::optional<bool>(AllowPartial),
1472	Runtime == -`1` ? std::nullopt : std::optional<bool>(Runtime),
1473	UpperBound == -`1` ? std::nullopt : std::optional<bool>(UpperBound),
1474	AllowPeeling == -`1` ? std::nullopt : std::optional<bool>(AllowPeeling));
1475	}
1476
1477	PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
1478	LoopStandardAnalysisResults &AR,
1479	LPMUpdater &Updater) {
1480	// For the new PM, we can't use OptimizationRemarkEmitter as an analysis
1481	// pass. Function analyses need to be preserved across loop transformations
1482	// but ORE cannot be preserved (see comment before the pass definition).
1483	OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
1484
1485	// Keep track of the previous loop structure so we can identify new loops
1486	// created by unrolling.
1487	Loop *ParentL = L.getParentLoop();
1488	SmallPtrSet<Loop *, `4`> OldLoops;
1489	if (ParentL)
1490	OldLoops.insert(I: ParentL->begin(), E: ParentL->end());
1491	else
1492	OldLoops.insert(I: AR.LI.begin(), E: AR.LI.end());
1493
1494	std::string LoopName = std::string (L.getName());
1495
1496	bool Changed =
1497	tryToUnrollLoop(L: &L, DT&: AR.DT, LI: &AR.LI, SE&: AR.SE, TTI: AR.TTI, AC&: AR.AC, ORE,
1498	/BFI/ nullptr, /PSI/ nullptr,
1499	/PreserveLCSSA/ true, OptLevel, /OnlyFullUnroll/ true,
1500	OnlyWhenForced, ForgetAllSCEV: ForgetSCEV, /Count/ ProvidedCount: std::nullopt,
1501	/Threshold/ ProvidedThreshold: std::nullopt, /AllowPartial/ ProvidedAllowPartial: false,
1502	/Runtime/ ProvidedRuntime: false, /UpperBound/ ProvidedUpperBound: false,
1503	/AllowPeeling/ ProvidedAllowPeeling: true,
1504	/AllowProfileBasedPeeling/ ProvidedAllowProfileBasedPeeling: false,
1505	/FullUnrollMaxCount/ ProvidedFullUnrollMaxCount: std::nullopt) !=
1506	LoopUnrollResult::Unmodified;
1507	if (!Changed)
1508	return PreservedAnalyses::all();
1509
1510	// The parent must not be damaged by unrolling!
1511	#ifndef NDEBUG
1512	if (ParentL)
1513	ParentL->verifyLoop();
1514	#endif
1515
1516	// Unrolling can do several things to introduce new loops into a loop nest:
1517	// - Full unrolling clones child loops within the current loop but then
1518	// removes the current loop making all of the children appear to be new
1519	// sibling loops.
1520	//
1521	// When a new loop appears as a sibling loop after fully unrolling,
1522	// its nesting structure has fundamentally changed and we want to revisit
1523	// it to reflect that.
1524	//
1525	// When unrolling has removed the current loop, we need to tell the
1526	// infrastructure that it is gone.
1527	//
1528	// Finally, we support a debugging/testing mode where we revisit child loops
1529	// as well. These are not expected to require further optimizations as either
1530	// they or the loop they were cloned from have been directly visited already.
1531	// But the debugging mode allows us to check this assumption.
1532	bool IsCurrentLoopValid = false;
1533	SmallVector<Loop *, `4`> SibLoops;
1534	if (ParentL)
1535	SibLoops.append(in_start: ParentL->begin(), in_end: ParentL->end());
1536	else
1537	SibLoops.append(in_start: AR.LI.begin(), in_end: AR.LI.end());
1538	erase_if(C&: SibLoops, P: [&](Loop *SibLoop) {
1539	if (SibLoop == &L) {
1540	IsCurrentLoopValid = true;
1541	return true;
1542	}
1543
1544	// Otherwise erase the loop from the list if it was in the old loops.
1545	return OldLoops.contains(Ptr: SibLoop);
1546	});
1547	Updater.addSiblingLoops(NewSibLoops: SibLoops);
1548
1549	if (!IsCurrentLoopValid) {
1550	Updater.markLoopAsDeleted(L, Name: LoopName);
1551	} else {
1552	// We can only walk child loops if the current loop remained valid.
1553	if (UnrollRevisitChildLoops) {
1554	// Walk all* of the child loops.*
1555	SmallVector<Loop *, `4`> ChildLoops(L.begin(), L.end());
1556	Updater.addChildLoops(NewChildLoops: ChildLoops);
1557	}
1558	}
1559
1560	return getLoopPassPreservedAnalyses();
1561	}
1562
1563	PreservedAnalyses LoopUnrollPass::run(Function &F,
1564	FunctionAnalysisManager &AM) {
1565	auto &LI = AM.getResult<LoopAnalysis>(IR&: F);
1566	// There are no loops in the function. Return before computing other expensive
1567	// analyses.
1568	if (LI.empty())
1569	return PreservedAnalyses::all();
1570	auto &SE = AM.getResult<ScalarEvolutionAnalysis>(IR&: F);
1571	auto &TTI = AM.getResult<TargetIRAnalysis>(IR&: F);
1572	auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
1573	auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F);
1574	auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
1575
1576	LoopAnalysisManager LAM = nullptr*;
1577	if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(IR&: F))
1578	LAM = &LAMProxy->getManager();
1579
1580	auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F);
1581	ProfileSummaryInfo *PSI =
1582	MAMProxy.getCachedResult<ProfileSummaryAnalysis>(IR&: *F.getParent());
1583	auto *BFI = (PSI && PSI->hasProfileSummary()) ?
1584	&AM.getResult<BlockFrequencyAnalysis>(IR&: F) : nullptr;
1585
1586	bool Changed = false;
1587
1588	// The unroller requires loops to be in simplified form, and also needs LCSSA.
1589	// Since simplification may add new inner loops, it has to run before the
1590	// legality and profitability checks. This means running the loop unroller
1591	// will simplify all loops, regardless of whether anything end up being
1592	// unrolled.
1593	for (const auto &L : LI) {
1594	Changed \|=
1595	simplifyLoop(L, DT: &DT, LI: &LI, SE: &SE, AC: &AC, MSSAU: nullptr, PreserveLCSSA: false / PreserveLCSSA /);
1596	Changed \|= formLCSSARecursively(L&: *L, DT, LI: &LI, SE: &SE);
1597	}
1598
1599	// Add the loop nests in the reverse order of LoopInfo. See method
1600	// declaration.
1601	SmallPriorityWorklist<Loop *, `4`> Worklist;
1602	appendLoopsToWorklist(LI, Worklist);
1603
1604	while (!Worklist.empty()) {
1605	// Because the LoopInfo stores the loops in RPO, we walk the worklist
1606	// from back to front so that we work forward across the CFG, which
1607	// for unrolling is only needed to get optimization remarks emitted in
1608	// a forward order.
1609	Loop &L = *Worklist.pop_back_val();
1610	#ifndef NDEBUG
1611	Loop *ParentL = L.getParentLoop();
1612	#endif
1613
1614	// Check if the profile summary indicates that the profiled application
1615	// has a huge working set size, in which case we disable peeling to avoid
1616	// bloating it further.
1617	std::optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling;
1618	if (PSI && PSI->hasHugeWorkingSetSize())
1619	LocalAllowPeeling = false;
1620	std::string LoopName = std::string (L.getName());
1621	// The API here is quite complex to call and we allow to select some
1622	// flavors of unrolling during construction time (by setting UnrollOpts).
1623	LoopUnrollResult Result = tryToUnrollLoop(
1624	L: &L, DT, LI: &LI, SE, TTI, AC, ORE, BFI, PSI,
1625	/PreserveLCSSA/ true, OptLevel: UnrollOpts.OptLevel, /OnlyFullUnroll/ false,
1626	OnlyWhenForced: UnrollOpts.OnlyWhenForced, ForgetAllSCEV: UnrollOpts.ForgetSCEV,
1627	/Count/ ProvidedCount: std::nullopt,
1628	/Threshold/ ProvidedThreshold: std::nullopt, ProvidedAllowPartial: UnrollOpts.AllowPartial,
1629	ProvidedRuntime: UnrollOpts.AllowRuntime, ProvidedUpperBound: UnrollOpts.AllowUpperBound, ProvidedAllowPeeling: LocalAllowPeeling,
1630	ProvidedAllowProfileBasedPeeling: UnrollOpts.AllowProfileBasedPeeling, ProvidedFullUnrollMaxCount: UnrollOpts.FullUnrollMaxCount);
1631	Changed \|= Result != LoopUnrollResult::Unmodified;
1632
1633	// The parent must not be damaged by unrolling!
1634	#ifndef NDEBUG
1635	if (Result != LoopUnrollResult::Unmodified && ParentL)
1636	ParentL->verifyLoop();
1637	#endif
1638
1639	// Clear any cached analysis results for L if we removed it completely.
1640	if (LAM && Result == LoopUnrollResult::FullyUnrolled)
1641	LAM->clear(IR&: L, Name: LoopName);
1642	}
1643
1644	if (!Changed)
1645	return PreservedAnalyses::all();
1646
1647	return getLoopPassPreservedAnalyses();
1648	}
1649
1650	void LoopUnrollPass::printPipeline(
1651	raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1652	static_cast<PassInfoMixin<LoopUnrollPass> >(this*)->printPipeline(
1653	OS, MapClassName2PassName);
1654	OS << `'<'`;
1655	if (UnrollOpts.AllowPartial != std::nullopt)
1656	OS << (*UnrollOpts.AllowPartial ? "" : "no-") << "partial;";
1657	if (UnrollOpts.AllowPeeling != std::nullopt)
1658	OS << (*UnrollOpts.AllowPeeling ? "" : "no-") << "peeling;";
1659	if (UnrollOpts.AllowRuntime != std::nullopt)
1660	OS << (*UnrollOpts.AllowRuntime ? "" : "no-") << "runtime;";
1661	if (UnrollOpts.AllowUpperBound != std::nullopt)
1662	OS << (*UnrollOpts.AllowUpperBound ? "" : "no-") << "upperbound;";
1663	if (UnrollOpts.AllowProfileBasedPeeling != std::nullopt)
1664	OS << (*UnrollOpts.AllowProfileBasedPeeling ? "" : "no-")
1665	<< "profile-peeling;";
1666	if (UnrollOpts.FullUnrollMaxCount != std::nullopt)
1667	OS << "full-unroll-max=" << UnrollOpts.FullUnrollMaxCount << `';'`;
1668	OS << `'O'` << UnrollOpts.OptLevel;
1669	OS << `'>'`;
1670	}
1671

source code of llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp