LoopStrengthReduce.cpp source code [llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp]

1	//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This transformation analyzes and transforms the induction variables (and
10	// computations derived from them) into forms suitable for efficient execution
11	// on the target.
12	//
13	// This pass performs a strength reduction on array references inside loops that
14	// have as one or more of their components the loop induction variable, it
15	// rewrites expressions to take advantage of scaled-index addressing modes
16	// available on the target, and it performs a variety of other optimizations
17	// related to loop induction variables.
18	//
19	// Terminology note: this code has a lot of handling for "post-increment" or
20	// "post-inc" users. This is not talking about post-increment addressing modes;
21	// it is instead talking about code like this:
22	//
23	// %i = phi [ 0, %entry ], [ %i.next, %latch ]
24	// ...
25	// %i.next = add %i, 1
26	// %c = icmp eq %i.next, %n
27	//
28	// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
29	// it's useful to think about these as the same register, with some uses using
30	// the value of the register before the add and some using it after. In this
31	// example, the icmp is a post-increment user, since it uses %i.next, which is
32	// the value of the induction variable after the increment. The other common
33	// case of post-increment users is users outside the loop.
34	//
35	// TODO: More sophistication in the way Formulae are generated and filtered.
36	//
37	// TODO: Handle multiple loops at a time.
38	//
39	// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
40	// of a GlobalValue?
41	//
42	// TODO: When truncation is free, truncate ICmp users' operands to make it a
43	// smaller encoding (on x86 at least).
44	//
45	// TODO: When a negated register is used by an add (such as in a list of
46	// multiple base registers, or as the increment expression in an addrec),
47	// we may not actually need both reg and (-1 reg) in registers; the*
48	// negation can be implemented by using a sub instead of an add. The
49	// lack of support for taking this into consideration when making
50	// register pressure decisions is partly worked around by the "Special"
51	// use kind.
52	//
53	//===----------------------------------------------------------------------===//
54
55	#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
56	#include "llvm/ADT/APInt.h"
57	#include "llvm/ADT/DenseMap.h"
58	#include "llvm/ADT/DenseSet.h"
59	#include "llvm/ADT/Hashing.h"
60	#include "llvm/ADT/PointerIntPair.h"
61	#include "llvm/ADT/STLExtras.h"
62	#include "llvm/ADT/SetVector.h"
63	#include "llvm/ADT/SmallBitVector.h"
64	#include "llvm/ADT/SmallPtrSet.h"
65	#include "llvm/ADT/SmallSet.h"
66	#include "llvm/ADT/SmallVector.h"
67	#include "llvm/ADT/Statistic.h"
68	#include "llvm/ADT/iterator_range.h"
69	#include "llvm/Analysis/AssumptionCache.h"
70	#include "llvm/Analysis/DomTreeUpdater.h"
71	#include "llvm/Analysis/IVUsers.h"
72	#include "llvm/Analysis/LoopAnalysisManager.h"
73	#include "llvm/Analysis/LoopInfo.h"
74	#include "llvm/Analysis/LoopPass.h"
75	#include "llvm/Analysis/MemorySSA.h"
76	#include "llvm/Analysis/MemorySSAUpdater.h"
77	#include "llvm/Analysis/ScalarEvolution.h"
78	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
79	#include "llvm/Analysis/ScalarEvolutionNormalization.h"
80	#include "llvm/Analysis/TargetLibraryInfo.h"
81	#include "llvm/Analysis/TargetTransformInfo.h"
82	#include "llvm/Analysis/ValueTracking.h"
83	#include "llvm/BinaryFormat/Dwarf.h"
84	#include "llvm/Config/llvm-config.h"
85	#include "llvm/IR/BasicBlock.h"
86	#include "llvm/IR/Constant.h"
87	#include "llvm/IR/Constants.h"
88	#include "llvm/IR/DebugInfoMetadata.h"
89	#include "llvm/IR/DerivedTypes.h"
90	#include "llvm/IR/Dominators.h"
91	#include "llvm/IR/GlobalValue.h"
92	#include "llvm/IR/IRBuilder.h"
93	#include "llvm/IR/InstrTypes.h"
94	#include "llvm/IR/Instruction.h"
95	#include "llvm/IR/Instructions.h"
96	#include "llvm/IR/IntrinsicInst.h"
97	#include "llvm/IR/Module.h"
98	#include "llvm/IR/Operator.h"
99	#include "llvm/IR/PassManager.h"
100	#include "llvm/IR/Type.h"
101	#include "llvm/IR/Use.h"
102	#include "llvm/IR/User.h"
103	#include "llvm/IR/Value.h"
104	#include "llvm/IR/ValueHandle.h"
105	#include "llvm/InitializePasses.h"
106	#include "llvm/Pass.h"
107	#include "llvm/Support/Casting.h"
108	#include "llvm/Support/CommandLine.h"
109	#include "llvm/Support/Compiler.h"
110	#include "llvm/Support/Debug.h"
111	#include "llvm/Support/ErrorHandling.h"
112	#include "llvm/Support/MathExtras.h"
113	#include "llvm/Support/raw_ostream.h"
114	#include "llvm/Transforms/Scalar.h"
115	#include "llvm/Transforms/Utils.h"
116	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
117	#include "llvm/Transforms/Utils/Local.h"
118	#include "llvm/Transforms/Utils/LoopUtils.h"
119	#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
120	#include <algorithm>
121	#include <cassert>
122	#include <cstddef>
123	#include <cstdint>
124	#include <iterator>
125	#include <limits>
126	#include <map>
127	#include <numeric>
128	#include <optional>
129	#include <utility>
130
131	using namespace llvm;
132
133	#define DEBUG_TYPE "loop-reduce"
134
135	/// MaxIVUsers is an arbitrary threshold that provides an early opportunity for
136	/// bail out. This threshold is far beyond the number of users that LSR can
137	/// conceivably solve, so it should not affect generated code, but catches the
138	/// worst cases before LSR burns too much compile time and stack space.
139	static const unsigned MaxIVUsers = `200`;
140
141	/// Limit the size of expression that SCEV-based salvaging will attempt to
142	/// translate into a DIExpression.
143	/// Choose a maximum size such that debuginfo is not excessively increased and
144	/// the salvaging is not too expensive for the compiler.
145	static const unsigned MaxSCEVSalvageExpressionSize = `64`;
146
147	// Cleanup congruent phis after LSR phi expansion.
148	static cl::opt<bool> EnablePhiElim(
149	"enable-lsr-phielim", cl::Hidden, cl::init(Val: true),
150	cl::desc ("Enable LSR phi elimination"));
151
152	// The flag adds instruction count to solutions cost comparison.
153	static cl::opt<bool> InsnsCost(
154	"lsr-insns-cost", cl::Hidden, cl::init(Val: true),
155	cl::desc ("Add instruction count to a LSR cost model"));
156
157	// Flag to choose how to narrow complex lsr solution
158	static cl::opt<bool> LSRExpNarrow(
159	"lsr-exp-narrow", cl::Hidden, cl::init(Val: false),
160	cl::desc ("Narrow LSR complex solution using"
161	" expectation of registers number"));
162
163	// Flag to narrow search space by filtering non-optimal formulae with
164	// the same ScaledReg and Scale.
165	static cl::opt<bool> FilterSameScaledReg(
166	"lsr-filter-same-scaled-reg", cl::Hidden, cl::init(Val: true),
167	cl::desc ("Narrow LSR search space by filtering non-optimal formulae"
168	" with the same ScaledReg and Scale"));
169
170	static cl::opt<TTI::AddressingModeKind> PreferredAddresingMode(
171	"lsr-preferred-addressing-mode", cl::Hidden, cl::init(Val: TTI::AMK_None),
172	cl::desc ("A flag that overrides the target's preferred addressing mode."),
173	cl::values(clEnumValN(TTI::AMK_None,
174	"none",
175	"Don't prefer any addressing mode"),
176	clEnumValN(TTI::AMK_PreIndexed,
177	"preindexed",
178	"Prefer pre-indexed addressing mode"),
179	clEnumValN(TTI::AMK_PostIndexed,
180	"postindexed",
181	"Prefer post-indexed addressing mode")));
182
183	static cl::opt<unsigned> ComplexityLimit(
184	"lsr-complexity-limit", cl::Hidden,
185	cl::init(Val: std::numeric_limits<uint16_t>::max()),
186	cl::desc ("LSR search space complexity limit"));
187
188	static cl::opt<unsigned> SetupCostDepthLimit(
189	"lsr-setupcost-depth-limit", cl::Hidden, cl::init(Val: `7`),
190	cl::desc ("The limit on recursion depth for LSRs setup cost"));
191
192	static cl::opt<cl::boolOrDefault> AllowTerminatingConditionFoldingAfterLSR(
193	"lsr-term-fold", cl::Hidden,
194	cl::desc ("Attempt to replace primary IV with other IV."));
195
196	static cl::opt<bool> AllowDropSolutionIfLessProfitable(
197	"lsr-drop-solution", cl::Hidden, cl::init(Val: false),
198	cl::desc ("Attempt to drop solution if it is less profitable"));
199
200	STATISTIC(NumTermFold,
201	"Number of terminating condition fold recognized and performed");
202
203	#ifndef NDEBUG
204	// Stress test IV chain generation.
205	static cl::opt<bool> StressIVChain(
206	"stress-ivchain", cl::Hidden, cl::init(Val: false),
207	cl::desc ("Stress test LSR IV chains"));
208	#else
209	static bool StressIVChain = false;
210	#endif
211
212	namespace {
213
214	struct MemAccessTy {
215	/// Used in situations where the accessed memory type is unknown.
216	static const unsigned UnknownAddressSpace =
217	std::numeric_limits<unsigned>::max();
218
219	Type MemTy = nullptr*;
220	unsigned AddrSpace = UnknownAddressSpace;
221
222	MemAccessTy() = default;
223	MemAccessTy(Type Ty, unsigned* AS) : MemTy(Ty), AddrSpace(AS) {}
224
225	bool operator==(MemAccessTy Other) const {
226	return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace;
227	}
228
229	bool operator!=(MemAccessTy Other) const { return !(*this == Other); }
230
231	static MemAccessTy getUnknown(LLVMContext &Ctx,
232	unsigned AS = UnknownAddressSpace) {
233	return MemAccessTy (Type::getVoidTy(C&: Ctx), AS);
234	}
235
236	Type getType() { return* MemTy; }
237	};
238
239	/// This class holds data which is used to order reuse candidates.
240	class RegSortData {
241	public:
242	/// This represents the set of LSRUse indices which reference
243	/// a particular register.
244	SmallBitVector UsedByIndices;
245
246	void print(raw_ostream &OS) const;
247	void dump() const;
248	};
249
250	} // end anonymous namespace
251
252	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
253	void RegSortData::print(raw_ostream &OS) const {
254	OS << "[NumUses=" << UsedByIndices.count() << `']'`;
255	}
256
257	LLVM_DUMP_METHOD void RegSortData::dump() const {
258	print(OS&: errs()); errs() << `'\n'`;
259	}
260	#endif
261
262	namespace {
263
264	/// Map register candidates to information about how they are used.
265	class RegUseTracker {
266	using RegUsesTy = DenseMap<const SCEV *, RegSortData>;
267
268	RegUsesTy RegUsesMap;
269	SmallVector<const SCEV *, `16`> RegSequence;
270
271	public:
272	void countRegister(const SCEV *Reg, size_t LUIdx);
273	void dropRegister(const SCEV *Reg, size_t LUIdx);
274	void swapAndDropUse(size_t LUIdx, size_t LastLUIdx);
275
276	bool isRegUsedByUsesOtherThan(const SCEV Reg, size_t LUIdx) const*;
277
278	const SmallBitVector &getUsedByIndices(const SCEV Reg) const*;
279
280	void clear();
281
282	using iterator = SmallVectorImpl<const SCEV *>::iterator;
283	using const_iterator = SmallVectorImpl<const SCEV *>::const_iterator;
284
285	iterator begin() { return RegSequence.begin(); }
286	iterator end() { return RegSequence.end(); }
287	const_iterator begin() const { return RegSequence.begin(); }
288	const_iterator end() const { return RegSequence.end(); }
289	};
290
291	} // end anonymous namespace
292
293	void
294	RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) {
295	std::pair<RegUsesTy::iterator, bool> Pair =
296	RegUsesMap.insert(KV: std::make_pair(x&: Reg, y: RegSortData ()));
297	RegSortData &RSD = Pair.first ->second;
298	if (Pair.second)
299	RegSequence.push_back(Elt: Reg);
300	RSD.UsedByIndices.resize(N: std::max(a: RSD.UsedByIndices.size(), b: LUIdx + `1`));
301	RSD.UsedByIndices.set(LUIdx);
302	}
303
304	void
305	RegUseTracker::dropRegister(const SCEV *Reg, size_t LUIdx) {
306	RegUsesTy::iterator It = RegUsesMap.find(Val: Reg);
307	assert(It != RegUsesMap.end());
308	RegSortData &RSD = It ->second;
309	assert(RSD.UsedByIndices.size() > LUIdx);
310	RSD.UsedByIndices.reset(Idx: LUIdx);
311	}
312
313	void
314	RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
315	assert(LUIdx <= LastLUIdx);
316
317	// Update RegUses. The data structure is not optimized for this purpose;
318	// we must iterate through it and update each of the bit vectors.
319	for (auto &Pair : RegUsesMap) {
320	SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
321	if (LUIdx < UsedByIndices.size())
322	UsedByIndices [LUIdx] =
323	LastLUIdx < UsedByIndices.size() ? UsedByIndices [LastLUIdx] : false;
324	UsedByIndices.resize(N: std::min(a: UsedByIndices.size(), b: LastLUIdx));
325	}
326	}
327
328	bool
329	RegUseTracker::isRegUsedByUsesOtherThan(const SCEV Reg, size_t LUIdx) const* {
330	RegUsesTy::const_iterator I = RegUsesMap.find(Val: Reg);
331	if (I == RegUsesMap.end())
332	return false;
333	const SmallBitVector &UsedByIndices = I ->second.UsedByIndices;
334	int i = UsedByIndices.find_first();
335	if (i == -`1`) return false;
336	if ((size_t)i != LUIdx) return true;
337	return UsedByIndices.find_next(Prev: i) != -`1`;
338	}
339
340	const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV Reg) const* {
341	RegUsesTy::const_iterator I = RegUsesMap.find(Val: Reg);
342	assert(I != RegUsesMap.end() && "Unknown register!");
343	return I ->second.UsedByIndices;
344	}
345
346	void RegUseTracker::clear() {
347	RegUsesMap.clear();
348	RegSequence.clear();
349	}
350
351	namespace {
352
353	/// This class holds information that describes a formula for computing
354	/// satisfying a use. It may include broken-out immediates and scaled registers.
355	struct Formula {
356	/// Global base address used for complex addressing.
357	GlobalValue BaseGV = nullptr*;
358
359	/// Base offset for complex addressing.
360	int64_t BaseOffset = `0`;
361
362	/// Whether any complex addressing has a base register.
363	bool HasBaseReg = false;
364
365	/// The scale of any complex addressing.
366	int64_t Scale = `0`;
367
368	/// The list of "base" registers for this use. When this is non-empty. The
369	/// canonical representation of a formula is
370	/// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
371	/// 2. ScaledReg != NULL implies Scale != 1 \|\| !BaseRegs.empty().
372	/// 3. The reg containing recurrent expr related with currect loop in the
373	/// formula should be put in the ScaledReg.
374	/// #1 enforces that the scaled register is always used when at least two
375	/// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 reg2.*
376	/// #2 enforces that 1 reg is reg.*
377	/// #3 ensures invariant regs with respect to current loop can be combined
378	/// together in LSR codegen.
379	/// This invariant can be temporarily broken while building a formula.
380	/// However, every formula inserted into the LSRInstance must be in canonical
381	/// form.
382	SmallVector<const SCEV *, `4`> BaseRegs;
383
384	/// The 'scaled' register for this use. This should be non-null when Scale is
385	/// not zero.
386	const SCEV ScaledReg = nullptr*;
387
388	/// An additional constant offset which added near the use. This requires a
389	/// temporary register, but the offset itself can live in an add immediate
390	/// field rather than a register.
391	int64_t UnfoldedOffset = `0`;
392
393	Formula() = default;
394
395	void initialMatch(const SCEV S, Loop L, ScalarEvolution &SE);
396
397	bool isCanonical(const Loop &L) const;
398
399	void canonicalize(const Loop &L);
400
401	bool unscale();
402
403	bool hasZeroEnd() const;
404
405	size_t getNumRegs() const;
406	Type getType() const*;
407
408	void deleteBaseReg(const SCEV *&S);
409
410	bool referencesReg(const SCEV S) const*;
411	bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
412	const RegUseTracker &RegUses) const;
413
414	void print(raw_ostream &OS) const;
415	void dump() const;
416	};
417
418	} // end anonymous namespace
419
420	/// Recursion helper for initialMatch.
421	static void DoInitialMatch(const SCEV S, Loop L,
422	SmallVectorImpl<const SCEV *> &Good,
423	SmallVectorImpl<const SCEV *> &Bad,
424	ScalarEvolution &SE) {
425	// Collect expressions which properly dominate the loop header.
426	if (SE.properlyDominates(S, BB: L->getHeader())) {
427	Good.push_back(Elt: S);
428	return;
429	}
430
431	// Look at add operands.
432	if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Val: S)) {
433	for (const SCEV *S : Add->operands())
434	DoInitialMatch(S, L, Good, Bad, SE);
435	return;
436	}
437
438	// Look at addrec operands.
439	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S))
440	if (!AR->getStart()->isZero() && AR->isAffine()) {
441	DoInitialMatch(S: AR->getStart(), L, Good, Bad, SE);
442	DoInitialMatch(S: SE.getAddRecExpr(Start: SE.getConstant(Ty: AR->getType(), V: `0`),
443	Step: AR->getStepRecurrence(SE),
444	// FIXME: AR->getNoWrapFlags()
445	L: AR->getLoop(), Flags: SCEV::FlagAnyWrap),
446	L, Good, Bad, SE);
447	return;
448	}
449
450	// Handle a multiplication by -1 (negation) if it didn't fold.
451	if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Val: S))
452	if (Mul->getOperand(i: `0`)->isAllOnesValue()) {
453	SmallVector<const SCEV *, `4`> Ops(drop_begin(RangeOrContainer: Mul->operands()));
454	const SCEV *NewMul = SE.getMulExpr(Ops);
455
456	SmallVector<const SCEV *, `4`> MyGood;
457	SmallVector<const SCEV *, `4`> MyBad;
458	DoInitialMatch(S: NewMul, L, Good&: MyGood, Bad&: MyBad, SE);
459	const SCEV *NegOne = SE.getSCEV(V: ConstantInt::getAllOnesValue(
460	Ty: SE.getEffectiveSCEVType(Ty: NewMul->getType())));
461	for (const SCEV *S : MyGood)
462	Good.push_back(Elt: SE.getMulExpr(LHS: NegOne, RHS: S));
463	for (const SCEV *S : MyBad)
464	Bad.push_back(Elt: SE.getMulExpr(LHS: NegOne, RHS: S));
465	return;
466	}
467
468	// Ok, we can't do anything interesting. Just stuff the whole thing into a
469	// register and hope for the best.
470	Bad.push_back(Elt: S);
471	}
472
473	/// Incorporate loop-variant parts of S into this Formula, attempting to keep
474	/// all loop-invariant and loop-computable values in a single base register.
475	void Formula::initialMatch(const SCEV S, Loop L, ScalarEvolution &SE) {
476	SmallVector<const SCEV *, `4`> Good;
477	SmallVector<const SCEV *, `4`> Bad;
478	DoInitialMatch(S, L, Good, Bad, SE);
479	if (!Good.empty()) {
480	const SCEV *Sum = SE.getAddExpr(Ops&: Good);
481	if (!Sum->isZero())
482	BaseRegs.push_back(Elt: Sum);
483	HasBaseReg = true;
484	}
485	if (!Bad.empty()) {
486	const SCEV *Sum = SE.getAddExpr(Ops&: Bad);
487	if (!Sum->isZero())
488	BaseRegs.push_back(Elt: Sum);
489	HasBaseReg = true;
490	}
491	canonicalize(L: *L);
492	}
493
494	static bool containsAddRecDependentOnLoop(const SCEV S, const* Loop &L) {
495	return SCEVExprContains(Root: S, Pred: [&L](const SCEV *S) {
496	return isa<SCEVAddRecExpr>(Val: S) && (cast<SCEVAddRecExpr>(Val: S)->getLoop() == &L);
497	});
498	}
499
500	/// Check whether or not this formula satisfies the canonical
501	/// representation.
502	/// \see Formula::BaseRegs.
503	bool Formula::isCanonical(const Loop &L) const {
504	if (!ScaledReg)
505	return BaseRegs.size() <= `1`;
506
507	if (Scale != `1`)
508	return true;
509
510	if (Scale == `1` && BaseRegs.empty())
511	return false;
512
513	if (containsAddRecDependentOnLoop(S: ScaledReg, L))
514	return true;
515
516	// If ScaledReg is not a recurrent expr, or it is but its loop is not current
517	// loop, meanwhile BaseRegs contains a recurrent expr reg related with current
518	// loop, we want to swap the reg in BaseRegs with ScaledReg.
519	return none_of(Range: BaseRegs, P: [&L](const SCEV *S) {
520	return containsAddRecDependentOnLoop(S, L);
521	});
522	}
523
524	/// Helper method to morph a formula into its canonical representation.
525	/// \see Formula::BaseRegs.
526	/// Every formula having more than one base register, must use the ScaledReg
527	/// field. Otherwise, we would have to do special cases everywhere in LSR
528	/// to treat reg1 + reg2 + ... the same way as reg1 + 1reg2 + ...*
529	/// On the other hand, 1reg should be canonicalized into reg.*
530	void Formula::canonicalize(const Loop &L) {
531	if (isCanonical(L))
532	return;
533
534	if (BaseRegs.empty()) {
535	// No base reg? Use scale reg with scale = 1 as such.
536	assert(ScaledReg && "Expected 1*reg => reg");
537	assert(Scale == `1` && "Expected 1*reg => reg");
538	BaseRegs.push_back(Elt: ScaledReg);
539	Scale = `0`;
540	ScaledReg = nullptr;
541	return;
542	}
543
544	// Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.
545	if (!ScaledReg) {
546	ScaledReg = BaseRegs.pop_back_val();
547	Scale = `1`;
548	}
549
550	// If ScaledReg is an invariant with respect to L, find the reg from
551	// BaseRegs containing the recurrent expr related with Loop L. Swap the
552	// reg with ScaledReg.
553	if (!containsAddRecDependentOnLoop(S: ScaledReg, L)) {
554	auto I = find_if(Range&: BaseRegs, P: [&L](const SCEV *S) {
555	return containsAddRecDependentOnLoop(S, L);
556	});
557	if (I != BaseRegs.end())
558	std::swap(a&: ScaledReg, b&: *I);
559	}
560	assert(isCanonical(L) && "Failed to canonicalize?");
561	}
562
563	/// Get rid of the scale in the formula.
564	/// In other words, this method morphes reg1 + 1reg2 into reg1 + reg2.*
565	/// \return true if it was possible to get rid of the scale, false otherwise.
566	/// \note After this operation the formula may not be in the canonical form.
567	bool Formula::unscale() {
568	if (Scale != `1`)
569	return false;
570	Scale = `0`;
571	BaseRegs.push_back(Elt: ScaledReg);
572	ScaledReg = nullptr;
573	return true;
574	}
575
576	bool Formula::hasZeroEnd() const {
577	if (UnfoldedOffset \|\| BaseOffset)
578	return false;
579	if (BaseRegs.size() != `1` \|\| ScaledReg)
580	return false;
581	return true;
582	}
583
584	/// Return the total number of register operands used by this formula. This does
585	/// not include register uses implied by non-constant addrec strides.
586	size_t Formula::getNumRegs() const {
587	return !!ScaledReg + BaseRegs.size();
588	}
589
590	/// Return the type of this formula, if it has one, or null otherwise. This type
591	/// is meaningless except for the bit size.
592	Type Formula::getType() const* {
593	return !BaseRegs.empty() ? BaseRegs.front()->getType() :
594	ScaledReg ? ScaledReg->getType() :
595	BaseGV ? BaseGV->getType() :
596	nullptr;
597	}
598
599	/// Delete the given base reg from the BaseRegs list.
600	void Formula::deleteBaseReg(const SCEV *&S) {
601	if (&S != &BaseRegs.back())
602	std::swap(a&: S, b&: BaseRegs.back());
603	BaseRegs.pop_back();
604	}
605
606	/// Test if this formula references the given register.
607	bool Formula::referencesReg(const SCEV S) const* {
608	return S == ScaledReg \|\| is_contained(Range: BaseRegs, Element: S);
609	}
610
611	/// Test whether this formula uses registers which are used by uses other than
612	/// the use with the given index.
613	bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
614	const RegUseTracker &RegUses) const {
615	if (ScaledReg)
616	if (RegUses.isRegUsedByUsesOtherThan(Reg: ScaledReg, LUIdx))
617	return true;
618	for (const SCEV *BaseReg : BaseRegs)
619	if (RegUses.isRegUsedByUsesOtherThan(Reg: BaseReg, LUIdx))
620	return true;
621	return false;
622	}
623
624	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
625	void Formula::print(raw_ostream &OS) const {
626	bool First = true;
627	if (BaseGV) {
628	if (!First) OS << " + "; else First = false;
629	BaseGV->printAsOperand(O&: OS, /PrintType=/false);
630	}
631	if (BaseOffset != `0`) {
632	if (!First) OS << " + "; else First = false;
633	OS << BaseOffset;
634	}
635	for (const SCEV *BaseReg : BaseRegs) {
636	if (!First) OS << " + "; else First = false;
637	OS << "reg(" << *BaseReg << `')'`;
638	}
639	if (HasBaseReg && BaseRegs.empty()) {
640	if (!First) OS << " + "; else First = false;
641	OS << "error: HasBaseReg";
642	} else if (!HasBaseReg && !BaseRegs.empty()) {
643	if (!First) OS << " + "; else First = false;
644	OS << "error: !HasBaseReg";
645	}
646	if (Scale != `0`) {
647	if (!First) OS << " + "; else First = false;
648	OS << Scale << "*reg(";
649	if (ScaledReg)
650	OS << *ScaledReg;
651	else
652	OS << "<unknown>";
653	OS << `')'`;
654	}
655	if (UnfoldedOffset != `0`) {
656	if (!First) OS << " + ";
657	OS << "imm(" << UnfoldedOffset << `')'`;
658	}
659	}
660
661	LLVM_DUMP_METHOD void Formula::dump() const {
662	print(OS&: errs()); errs() << `'\n'`;
663	}
664	#endif
665
666	/// Return true if the given addrec can be sign-extended without changing its
667	/// value.
668	static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
669	Type *WideTy =
670	IntegerType::get(C&: SE.getContext(), NumBits: SE.getTypeSizeInBits(Ty: AR->getType()) + `1`);
671	return isa<SCEVAddRecExpr>(Val: SE.getSignExtendExpr(Op: AR, Ty: WideTy));
672	}
673
674	/// Return true if the given add can be sign-extended without changing its
675	/// value.
676	static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
677	Type *WideTy =
678	IntegerType::get(C&: SE.getContext(), NumBits: SE.getTypeSizeInBits(Ty: A->getType()) + `1`);
679	return isa<SCEVAddExpr>(Val: SE.getSignExtendExpr(Op: A, Ty: WideTy));
680	}
681
682	/// Return true if the given mul can be sign-extended without changing its
683	/// value.
684	static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
685	Type *WideTy =
686	IntegerType::get(C&: SE.getContext(),
687	NumBits: SE.getTypeSizeInBits(Ty: M->getType()) * M->getNumOperands());
688	return isa<SCEVMulExpr>(Val: SE.getSignExtendExpr(Op: M, Ty: WideTy));
689	}
690
691	/// Return an expression for LHS /s RHS, if it can be determined and if the
692	/// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits
693	/// is true, expressions like (X Y) /s Y are simplified to X, ignoring that*
694	/// the multiplication may overflow, which is useful when the result will be
695	/// used in a context where the most significant bits are ignored.
696	static const SCEV getExactSDiv(const* SCEV LHS, const* SCEV *RHS,
697	ScalarEvolution &SE,
698	bool IgnoreSignificantBits = false) {
699	// Handle the trivial case, which works for any SCEV type.
700	if (LHS == RHS)
701	return SE.getConstant(Ty: LHS->getType(), V: `1`);
702
703	// Handle a few RHS special cases.
704	const SCEVConstant *RC = dyn_cast<SCEVConstant>(Val: RHS);
705	if (RC) {
706	const APInt &RA = RC->getAPInt();
707	// Handle x /s -1 as x -1, to give ScalarEvolution a chance to do*
708	// some folding.
709	if (RA.isAllOnes()) {
710	if (LHS->getType()->isPointerTy())
711	return nullptr;
712	return SE.getMulExpr(LHS, RHS: RC);
713	}
714	// Handle x /s 1 as x.
715	if (RA == `1`)
716	return LHS;
717	}
718
719	// Check for a division of a constant by a constant.
720	if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Val: LHS)) {
721	if (!RC)
722	return nullptr;
723	const APInt &LA = C->getAPInt();
724	const APInt &RA = RC->getAPInt();
725	if (LA.srem(RHS: RA) != `0`)
726	return nullptr;
727	return SE.getConstant(Val: LA.sdiv(RHS: RA));
728	}
729
730	// Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
731	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: LHS)) {
732	if ((IgnoreSignificantBits \|\| isAddRecSExtable(AR, SE)) && AR->isAffine()) {
733	const SCEV *Step = getExactSDiv(LHS: AR->getStepRecurrence(SE), RHS, SE,
734	IgnoreSignificantBits);
735	if (!Step) return nullptr;
736	const SCEV *Start = getExactSDiv(LHS: AR->getStart(), RHS, SE,
737	IgnoreSignificantBits);
738	if (!Start) return nullptr;
739	// FlagNW is independent of the start value, step direction, and is
740	// preserved with smaller magnitude steps.
741	// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
742	return SE.getAddRecExpr(Start, Step, L: AR->getLoop(), Flags: SCEV::FlagAnyWrap);
743	}
744	return nullptr;
745	}
746
747	// Distribute the sdiv over add operands, if the add doesn't overflow.
748	if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Val: LHS)) {
749	if (IgnoreSignificantBits \|\| isAddSExtable(A: Add, SE)) {
750	SmallVector<const SCEV *, `8`> Ops;
751	for (const SCEV *S : Add->operands()) {
752	const SCEV *Op = getExactSDiv(LHS: S, RHS, SE, IgnoreSignificantBits);
753	if (!Op) return nullptr;
754	Ops.push_back(Elt: Op);
755	}
756	return SE.getAddExpr(Ops);
757	}
758	return nullptr;
759	}
760
761	// Check for a multiply operand that we can pull RHS out of.
762	if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Val: LHS)) {
763	if (IgnoreSignificantBits \|\| isMulSExtable(M: Mul, SE)) {
764	// Handle special case C1XY /s C2XY.
765	if (const SCEVMulExpr *MulRHS = dyn_cast<SCEVMulExpr>(Val: RHS)) {
766	if (IgnoreSignificantBits \|\| isMulSExtable(M: MulRHS, SE)) {
767	const SCEVConstant *LC = dyn_cast<SCEVConstant>(Val: Mul->getOperand(i: `0`));
768	const SCEVConstant *RC =
769	dyn_cast<SCEVConstant>(Val: MulRHS->getOperand(i: `0`));
770	if (LC && RC) {
771	SmallVector<const SCEV *, `4`> LOps(drop_begin(RangeOrContainer: Mul->operands()));
772	SmallVector<const SCEV *, `4`> ROps(drop_begin(RangeOrContainer: MulRHS->operands()));
773	if (LOps == ROps)
774	return getExactSDiv(LHS: LC, RHS: RC, SE, IgnoreSignificantBits);
775	}
776	}
777	}
778
779	SmallVector<const SCEV *, `4`> Ops;
780	bool Found = false;
781	for (const SCEV *S : Mul->operands()) {
782	if (!Found)
783	if (const SCEV *Q = getExactSDiv(LHS: S, RHS, SE,
784	IgnoreSignificantBits)) {
785	S = Q;
786	Found = true;
787	}
788	Ops.push_back(Elt: S);
789	}
790	return Found ? SE.getMulExpr(Ops) : nullptr;
791	}
792	return nullptr;
793	}
794
795	// Otherwise we don't know.
796	return nullptr;
797	}
798
799	/// If S involves the addition of a constant integer value, return that integer
800	/// value, and mutate S to point to a new SCEV with that value excluded.
801	static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
802	if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Val: S)) {
803	if (C->getAPInt().getSignificantBits() <= `64`) {
804	S = SE.getConstant(Ty: C->getType(), V: `0`);
805	return C->getValue()->getSExtValue();
806	}
807	} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Val: S)) {
808	SmallVector<const SCEV *, `8`> NewOps(Add->operands());
809	int64_t Result = ExtractImmediate(S&: NewOps.front(), SE);
810	if (Result != `0`)
811	S = SE.getAddExpr(Ops&: NewOps);
812	return Result;
813	} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S)) {
814	SmallVector<const SCEV *, `8`> NewOps(AR->operands());
815	int64_t Result = ExtractImmediate(S&: NewOps.front(), SE);
816	if (Result != `0`)
817	S = SE.getAddRecExpr(Operands&: NewOps, L: AR->getLoop(),
818	// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
819	Flags: SCEV::FlagAnyWrap);
820	return Result;
821	}
822	return `0`;
823	}
824
825	/// If S involves the addition of a GlobalValue address, return that symbol, and
826	/// mutate S to point to a new SCEV with that value excluded.
827	static GlobalValue ExtractSymbol(const* SCEV *&S, ScalarEvolution &SE) {
828	if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Val: S)) {
829	if (GlobalValue *GV = dyn_cast<GlobalValue>(Val: U->getValue())) {
830	S = SE.getConstant(Ty: GV->getType(), V: `0`);
831	return GV;
832	}
833	} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Val: S)) {
834	SmallVector<const SCEV *, `8`> NewOps(Add->operands());
835	GlobalValue *Result = ExtractSymbol(S&: NewOps.back(), SE);
836	if (Result)
837	S = SE.getAddExpr(Ops&: NewOps);
838	return Result;
839	} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S)) {
840	SmallVector<const SCEV *, `8`> NewOps(AR->operands());
841	GlobalValue *Result = ExtractSymbol(S&: NewOps.front(), SE);
842	if (Result)
843	S = SE.getAddRecExpr(Operands&: NewOps, L: AR->getLoop(),
844	// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
845	Flags: SCEV::FlagAnyWrap);
846	return Result;
847	}
848	return nullptr;
849	}
850
851	/// Returns true if the specified instruction is using the specified value as an
852	/// address.
853	static bool isAddressUse(const TargetTransformInfo &TTI,
854	Instruction Inst, Value OperandVal) {
855	bool isAddress = isa<LoadInst>(Val: Inst);
856	if (StoreInst *SI = dyn_cast<StoreInst>(Val: Inst)) {
857	if (SI->getPointerOperand() == OperandVal)
858	isAddress = true;
859	} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: Inst)) {
860	// Addressing modes can also be folded into prefetches and a variety
861	// of intrinsics.
862	switch (II->getIntrinsicID()) {
863	case Intrinsic::memset:
864	case Intrinsic::prefetch:
865	case Intrinsic::masked_load:
866	if (II->getArgOperand(i: `0`) == OperandVal)
867	isAddress = true;
868	break;
869	case Intrinsic::masked_store:
870	if (II->getArgOperand(i: `1`) == OperandVal)
871	isAddress = true;
872	break;
873	case Intrinsic::memmove:
874	case Intrinsic::memcpy:
875	if (II->getArgOperand(i: `0`) == OperandVal \|\|
876	II->getArgOperand(i: `1`) == OperandVal)
877	isAddress = true;
878	break;
879	default: {
880	MemIntrinsicInfo IntrInfo;
881	if (TTI.getTgtMemIntrinsic(Inst: II, Info&: IntrInfo)) {
882	if (IntrInfo.PtrVal == OperandVal)
883	isAddress = true;
884	}
885	}
886	}
887	} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Val: Inst)) {
888	if (RMW->getPointerOperand() == OperandVal)
889	isAddress = true;
890	} else if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Val: Inst)) {
891	if (CmpX->getPointerOperand() == OperandVal)
892	isAddress = true;
893	}
894	return isAddress;
895	}
896
897	/// Return the type of the memory being accessed.
898	static MemAccessTy getAccessType(const TargetTransformInfo &TTI,
899	Instruction Inst, Value OperandVal) {
900	MemAccessTy AccessTy = MemAccessTy::getUnknown(Ctx&: Inst->getContext());
901
902	// First get the type of memory being accessed.
903	if (Type *Ty = Inst->getAccessType())
904	AccessTy.MemTy = Ty;
905
906	// Then get the pointer address space.
907	if (const StoreInst *SI = dyn_cast<StoreInst>(Val: Inst)) {
908	AccessTy.AddrSpace = SI->getPointerAddressSpace();
909	} else if (const LoadInst *LI = dyn_cast<LoadInst>(Val: Inst)) {
910	AccessTy.AddrSpace = LI->getPointerAddressSpace();
911	} else if (const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Val: Inst)) {
912	AccessTy.AddrSpace = RMW->getPointerAddressSpace();
913	} else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Val: Inst)) {
914	AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
915	} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: Inst)) {
916	switch (II->getIntrinsicID()) {
917	case Intrinsic::prefetch:
918	case Intrinsic::memset:
919	AccessTy.AddrSpace = II->getArgOperand(i: `0`)->getType()->getPointerAddressSpace();
920	AccessTy.MemTy = OperandVal->getType();
921	break;
922	case Intrinsic::memmove:
923	case Intrinsic::memcpy:
924	AccessTy.AddrSpace = OperandVal->getType()->getPointerAddressSpace();
925	AccessTy.MemTy = OperandVal->getType();
926	break;
927	case Intrinsic::masked_load:
928	AccessTy.AddrSpace =
929	II->getArgOperand(i: `0`)->getType()->getPointerAddressSpace();
930	break;
931	case Intrinsic::masked_store:
932	AccessTy.AddrSpace =
933	II->getArgOperand(i: `1`)->getType()->getPointerAddressSpace();
934	break;
935	default: {
936	MemIntrinsicInfo IntrInfo;
937	if (TTI.getTgtMemIntrinsic(Inst: II, Info&: IntrInfo) && IntrInfo.PtrVal) {
938	AccessTy.AddrSpace
939	= IntrInfo.PtrVal->getType()->getPointerAddressSpace();
940	}
941
942	break;
943	}
944	}
945	}
946
947	return AccessTy;
948	}
949
950	/// Return true if this AddRec is already a phi in its loop.
951	static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
952	for (PHINode &PN : AR->getLoop()->getHeader()->phis()) {
953	if (SE.isSCEVable(Ty: PN.getType()) &&
954	(SE.getEffectiveSCEVType(Ty: PN.getType()) ==
955	SE.getEffectiveSCEVType(Ty: AR->getType())) &&
956	SE.getSCEV(V: &PN) == AR)
957	return true;
958	}
959	return false;
960	}
961
962	/// Check if expanding this expression is likely to incur significant cost. This
963	/// is tricky because SCEV doesn't track which expressions are actually computed
964	/// by the current IR.
965	///
966	/// We currently allow expansion of IV increments that involve adds,
967	/// multiplication by constants, and AddRecs from existing phis.
968	///
969	/// TODO: Allow UDivExpr if we can find an existing IV increment that is an
970	/// obvious multiple of the UDivExpr.
971	static bool isHighCostExpansion(const SCEV *S,
972	SmallPtrSetImpl<const SCEV*> &Processed,
973	ScalarEvolution &SE) {
974	// Zero/One operand expressions
975	switch (S->getSCEVType()) {
976	case scUnknown:
977	case scConstant:
978	case scVScale:
979	return false;
980	case scTruncate:
981	return isHighCostExpansion(S: cast<SCEVTruncateExpr>(Val: S)->getOperand(),
982	Processed, SE);
983	case scZeroExtend:
984	return isHighCostExpansion(S: cast<SCEVZeroExtendExpr>(Val: S)->getOperand(),
985	Processed, SE);
986	case scSignExtend:
987	return isHighCostExpansion(S: cast<SCEVSignExtendExpr>(Val: S)->getOperand(),
988	Processed, SE);
989	default:
990	break;
991	}
992
993	if (!Processed.insert(Ptr: S).second)
994	return false;
995
996	if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Val: S)) {
997	for (const SCEV *S : Add->operands()) {
998	if (isHighCostExpansion(S, Processed, SE))
999	return true;
1000	}
1001	return false;
1002	}
1003
1004	if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Val: S)) {
1005	if (Mul->getNumOperands() == `2`) {
1006	// Multiplication by a constant is ok
1007	if (isa<SCEVConstant>(Val: Mul->getOperand(i: `0`)))
1008	return isHighCostExpansion(S: Mul->getOperand(i: `1`), Processed, SE);
1009
1010	// If we have the value of one operand, check if an existing
1011	// multiplication already generates this expression.
1012	if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Val: Mul->getOperand(i: `1`))) {
1013	Value *UVal = U->getValue();
1014	for (User *UR : UVal->users()) {
1015	// If U is a constant, it may be used by a ConstantExpr.
1016	Instruction *UI = dyn_cast<Instruction>(Val: UR);
1017	if (UI && UI->getOpcode() == Instruction::Mul &&
1018	SE.isSCEVable(Ty: UI->getType())) {
1019	return SE.getSCEV(V: UI) == Mul;
1020	}
1021	}
1022	}
1023	}
1024	}
1025
1026	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S)) {
1027	if (isExistingPhi(AR, SE))
1028	return false;
1029	}
1030
1031	// Fow now, consider any other type of expression (div/mul/min/max) high cost.
1032	return true;
1033	}
1034
1035	namespace {
1036
1037	class LSRUse;
1038
1039	} // end anonymous namespace
1040
1041	/// Check if the addressing mode defined by \p F is completely
1042	/// folded in \p LU at isel time.
1043	/// This includes address-mode folding and special icmp tricks.
1044	/// This function returns true if \p LU can accommodate what \p F
1045	/// defines and up to 1 base + 1 scaled + offset.
1046	/// In other words, if \p F has several base registers, this function may
1047	/// still return true. Therefore, users still need to account for
1048	/// additional base registers and/or unfolded offsets to derive an
1049	/// accurate cost model.
1050	static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1051	const LSRUse &LU, const Formula &F);
1052
1053	// Get the cost of the scaling factor used in F for LU.
1054	static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI,
1055	const LSRUse &LU, const Formula &F,
1056	const Loop &L);
1057
1058	namespace {
1059
1060	/// This class is used to measure and compare candidate formulae.
1061	class Cost {
1062	const Loop L = nullptr*;
1063	ScalarEvolution SE = nullptr*;
1064	const TargetTransformInfo TTI = nullptr*;
1065	TargetTransformInfo::LSRCost C;
1066	TTI::AddressingModeKind AMK = TTI::AMK_None;
1067
1068	public:
1069	Cost() = delete;
1070	Cost(const Loop L, ScalarEvolution &SE, const* TargetTransformInfo &TTI,
1071	TTI::AddressingModeKind AMK) :
1072	L(L), SE(&SE), TTI(&TTI), AMK(AMK) {
1073	C.Insns = `0`;
1074	C.NumRegs = `0`;
1075	C.AddRecCost = `0`;
1076	C.NumIVMuls = `0`;
1077	C.NumBaseAdds = `0`;
1078	C.ImmCost = `0`;
1079	C.SetupCost = `0`;
1080	C.ScaleCost = `0`;
1081	}
1082
1083	bool isLess(const Cost &Other) const;
1084
1085	void Lose();
1086
1087	#ifndef NDEBUG
1088	// Once any of the metrics loses, they must all remain losers.
1089	bool isValid() {
1090	return ((C.Insns \| C.NumRegs \| C.AddRecCost \| C.NumIVMuls \| C.NumBaseAdds
1091	\| C.ImmCost \| C.SetupCost \| C.ScaleCost) != ~`0u`)
1092	\|\| ((C.Insns & C.NumRegs & C.AddRecCost & C.NumIVMuls & C.NumBaseAdds
1093	& C.ImmCost & C.SetupCost & C.ScaleCost) == ~`0u`);
1094	}
1095	#endif
1096
1097	bool isLoser() {
1098	assert(isValid() && "invalid cost");
1099	return C.NumRegs == ~`0u`;
1100	}
1101
1102	void RateFormula(const Formula &F,
1103	SmallPtrSetImpl<const SCEV *> &Regs,
1104	const DenseSet<const SCEV *> &VisitedRegs,
1105	const LSRUse &LU,
1106	SmallPtrSetImpl<const SCEV > LoserRegs = nullptr);
1107
1108	void print(raw_ostream &OS) const;
1109	void dump() const;
1110
1111	private:
1112	void RateRegister(const Formula &F, const SCEV *Reg,
1113	SmallPtrSetImpl<const SCEV *> &Regs);
1114	void RatePrimaryRegister(const Formula &F, const SCEV *Reg,
1115	SmallPtrSetImpl<const SCEV *> &Regs,
1116	SmallPtrSetImpl<const SCEV > LoserRegs);
1117	};
1118
1119	/// An operand value in an instruction which is to be replaced with some
1120	/// equivalent, possibly strength-reduced, replacement.
1121	struct LSRFixup {
1122	/// The instruction which will be updated.
1123	Instruction UserInst = nullptr*;
1124
1125	/// The operand of the instruction which will be replaced. The operand may be
1126	/// used more than once; every instance will be replaced.
1127	Value OperandValToReplace = nullptr*;
1128
1129	/// If this user is to use the post-incremented value of an induction
1130	/// variable, this set is non-empty and holds the loops associated with the
1131	/// induction variable.
1132	PostIncLoopSet PostIncLoops;
1133
1134	/// A constant offset to be added to the LSRUse expression. This allows
1135	/// multiple fixups to share the same LSRUse with different offsets, for
1136	/// example in an unrolled loop.
1137	int64_t Offset = `0`;
1138
1139	LSRFixup() = default;
1140
1141	bool isUseFullyOutsideLoop(const Loop L) const*;
1142
1143	void print(raw_ostream &OS) const;
1144	void dump() const;
1145	};
1146
1147	/// A DenseMapInfo implementation for holding DenseMaps and DenseSets of sorted
1148	/// SmallVectors of const SCEV.*
1149	struct UniquifierDenseMapInfo {
1150	static SmallVector<const SCEV *, `4`> getEmptyKey() {
1151	SmallVector<const SCEV *, `4`> V;
1152	V.push_back(Elt: reinterpret_cast<const SCEV *>(-`1`));
1153	return V;
1154	}
1155
1156	static SmallVector<const SCEV *, `4`> getTombstoneKey() {
1157	SmallVector<const SCEV *, `4`> V;
1158	V.push_back(Elt: reinterpret_cast<const SCEV *>(-`2`));
1159	return V;
1160	}
1161
1162	static unsigned getHashValue(const SmallVector<const SCEV *, `4`> &V) {
1163	return static_cast<unsigned>(hash_combine_range(first: V.begin(), last: V.end()));
1164	}
1165
1166	static bool isEqual(const SmallVector<const SCEV *, `4`> &LHS,
1167	const SmallVector<const SCEV *, `4`> &RHS) {
1168	return LHS == RHS;
1169	}
1170	};
1171
1172	/// This class holds the state that LSR keeps for each use in IVUsers, as well
1173	/// as uses invented by LSR itself. It includes information about what kinds of
1174	/// things can be folded into the user, information about the user itself, and
1175	/// information about how the use may be satisfied. TODO: Represent multiple
1176	/// users of the same expression in common?
1177	class LSRUse {
1178	DenseSet<SmallVector<const SCEV *, `4`>, UniquifierDenseMapInfo> Uniquifier;
1179
1180	public:
1181	/// An enum for a kind of use, indicating what types of scaled and immediate
1182	/// operands it might support.
1183	enum KindType {
1184	Basic, ///< A normal use, with no folding.
1185	Special, ///< A special case of basic, allowing -1 scales.
1186	Address, ///< An address use; folding according to TargetLowering
1187	ICmpZero ///< An equality icmp with both operands folded into one.
1188	// TODO: Add a generic icmp too?
1189	};
1190
1191	using SCEVUseKindPair = PointerIntPair<const SCEV *, `2`, KindType>;
1192
1193	KindType Kind;
1194	MemAccessTy AccessTy;
1195
1196	/// The list of operands which are to be replaced.
1197	SmallVector<LSRFixup, `8`> Fixups;
1198
1199	/// Keep track of the min and max offsets of the fixups.
1200	int64_t MinOffset = std::numeric_limits<int64_t>::max();
1201	int64_t MaxOffset = std::numeric_limits<int64_t>::min();
1202
1203	/// This records whether all of the fixups using this LSRUse are outside of
1204	/// the loop, in which case some special-case heuristics may be used.
1205	bool AllFixupsOutsideLoop = true;
1206
1207	/// RigidFormula is set to true to guarantee that this use will be associated
1208	/// with a single formula--the one that initially matched. Some SCEV
1209	/// expressions cannot be expanded. This allows LSR to consider the registers
1210	/// used by those expressions without the need to expand them later after
1211	/// changing the formula.
1212	bool RigidFormula = false;
1213
1214	/// This records the widest use type for any fixup using this
1215	/// LSRUse. FindUseWithSimilarFormula can't consider uses with different max
1216	/// fixup widths to be equivalent, because the narrower one may be relying on
1217	/// the implicit truncation to truncate away bogus bits.
1218	Type WidestFixupType = nullptr*;
1219
1220	/// A list of ways to build a value that can satisfy this user. After the
1221	/// list is populated, one of these is selected heuristically and used to
1222	/// formulate a replacement for OperandValToReplace in UserInst.
1223	SmallVector<Formula, `12`> Formulae;
1224
1225	/// The set of register candidates used by all formulae in this LSRUse.
1226	SmallPtrSet<const SCEV *, `4`> Regs;
1227
1228	LSRUse(KindType K, MemAccessTy AT) : Kind(K), AccessTy (AT) {}
1229
1230	LSRFixup &getNewFixup() {
1231	Fixups.push_back(Elt: LSRFixup ());
1232	return Fixups.back();
1233	}
1234
1235	void pushFixup(LSRFixup &f) {
1236	Fixups.push_back(Elt: f);
1237	if (f.Offset > MaxOffset)
1238	MaxOffset = f.Offset;
1239	if (f.Offset < MinOffset)
1240	MinOffset = f.Offset;
1241	}
1242
1243	bool HasFormulaWithSameRegs(const Formula &F) const;
1244	float getNotSelectedProbability(const SCEV Reg) const*;
1245	bool InsertFormula(const Formula &F, const Loop &L);
1246	void DeleteFormula(Formula &F);
1247	void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
1248
1249	void print(raw_ostream &OS) const;
1250	void dump() const;
1251	};
1252
1253	} // end anonymous namespace
1254
1255	static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1256	LSRUse::KindType Kind, MemAccessTy AccessTy,
1257	GlobalValue *BaseGV, int64_t BaseOffset,
1258	bool HasBaseReg, int64_t Scale,
1259	Instruction Fixup = nullptr*);
1260
1261	static unsigned getSetupCost(const SCEV Reg, unsigned* Depth) {
1262	if (isa<SCEVUnknown>(Val: Reg) \|\| isa<SCEVConstant>(Val: Reg))
1263	return `1`;
1264	if (Depth == `0`)
1265	return `0`;
1266	if (const auto *S = dyn_cast<SCEVAddRecExpr>(Val: Reg))
1267	return getSetupCost(Reg: S->getStart(), Depth: Depth - `1`);
1268	if (auto S = dyn_cast<SCEVIntegralCastExpr>(Val: Reg))
1269	return getSetupCost(Reg: S->getOperand(), Depth: Depth - `1`);
1270	if (auto S = dyn_cast<SCEVNAryExpr>(Val: Reg))
1271	return std::accumulate(first: S->operands().begin(), last: S->operands().end(), init: `0`,
1272	binary_op: [&](unsigned i, const SCEV *Reg) {
1273	return i + getSetupCost(Reg, Depth: Depth - `1`);
1274	});
1275	if (auto S = dyn_cast<SCEVUDivExpr>(Val: Reg))
1276	return getSetupCost(Reg: S->getLHS(), Depth: Depth - `1`) +
1277	getSetupCost(Reg: S->getRHS(), Depth: Depth - `1`);
1278	return `0`;
1279	}
1280
1281	/// Tally up interesting quantities from the given register.
1282	void Cost::RateRegister(const Formula &F, const SCEV *Reg,
1283	SmallPtrSetImpl<const SCEV *> &Regs) {
1284	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: Reg)) {
1285	// If this is an addrec for another loop, it should be an invariant
1286	// with respect to L since L is the innermost loop (at least
1287	// for now LSR only handles innermost loops).
1288	if (AR->getLoop() != L) {
1289	// If the AddRec exists, consider it's register free and leave it alone.
1290	if (isExistingPhi(AR, SE&: *SE) && AMK != TTI::AMK_PostIndexed)
1291	return;
1292
1293	// It is bad to allow LSR for current loop to add induction variables
1294	// for its sibling loops.
1295	if (!AR->getLoop()->contains(L)) {
1296	Lose();
1297	return;
1298	}
1299
1300	// Otherwise, it will be an invariant with respect to Loop L.
1301	++C.NumRegs;
1302	return;
1303	}
1304
1305	unsigned LoopCost = `1`;
1306	if (TTI->isIndexedLoadLegal(Mode: TTI->MIM_PostInc, Ty: AR->getType()) \|\|
1307	TTI->isIndexedStoreLegal(Mode: TTI->MIM_PostInc, Ty: AR->getType())) {
1308
1309	// If the step size matches the base offset, we could use pre-indexed
1310	// addressing.
1311	if (AMK == TTI::AMK_PreIndexed) {
1312	if (auto Step = dyn_cast<SCEVConstant>(Val: AR->getStepRecurrence(SE&: SE)))
1313	if (Step->getAPInt() == F.BaseOffset)
1314	LoopCost = `0`;
1315	} else if (AMK == TTI::AMK_PostIndexed) {
1316	const SCEV LoopStep = AR->getStepRecurrence(SE&: SE);
1317	if (isa<SCEVConstant>(Val: LoopStep)) {
1318	const SCEV *LoopStart = AR->getStart();
1319	if (!isa<SCEVConstant>(Val: LoopStart) &&
1320	SE->isLoopInvariant(S: LoopStart, L))
1321	LoopCost = `0`;
1322	}
1323	}
1324	}
1325	C.AddRecCost += LoopCost;
1326
1327	// Add the step value register, if it needs one.
1328	// TODO: The non-affine case isn't precisely modeled here.
1329	if (!AR->isAffine() \|\| !isa<SCEVConstant>(Val: AR->getOperand(i: `1`))) {
1330	if (!Regs.count(Ptr: AR->getOperand(i: `1`))) {
1331	RateRegister(F, Reg: AR->getOperand(i: `1`), Regs);
1332	if (isLoser())
1333	return;
1334	}
1335	}
1336	}
1337	++C.NumRegs;
1338
1339	// Rough heuristic; favor registers which don't require extra setup
1340	// instructions in the preheader.
1341	C.SetupCost += getSetupCost(Reg, Depth: SetupCostDepthLimit);
1342	// Ensure we don't, even with the recusion limit, produce invalid costs.
1343	C.SetupCost = std::min<unsigned>(a: C.SetupCost, b: `1` << `16`);
1344
1345	C.NumIVMuls += isa<SCEVMulExpr>(Val: Reg) &&
1346	SE->hasComputableLoopEvolution(S: Reg, L);
1347	}
1348
1349	/// Record this register in the set. If we haven't seen it before, rate
1350	/// it. Optional LoserRegs provides a way to declare any formula that refers to
1351	/// one of those regs an instant loser.
1352	void Cost::RatePrimaryRegister(const Formula &F, const SCEV *Reg,
1353	SmallPtrSetImpl<const SCEV *> &Regs,
1354	SmallPtrSetImpl<const SCEV > LoserRegs) {
1355	if (LoserRegs && LoserRegs->count(Ptr: Reg)) {
1356	Lose();
1357	return;
1358	}
1359	if (Regs.insert(Ptr: Reg).second) {
1360	RateRegister(F, Reg, Regs);
1361	if (LoserRegs && isLoser())
1362	LoserRegs->insert(Ptr: Reg);
1363	}
1364	}
1365
1366	void Cost::RateFormula(const Formula &F,
1367	SmallPtrSetImpl<const SCEV *> &Regs,
1368	const DenseSet<const SCEV *> &VisitedRegs,
1369	const LSRUse &LU,
1370	SmallPtrSetImpl<const SCEV > LoserRegs) {
1371	if (isLoser())
1372	return;
1373	assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula");
1374	// Tally up the registers.
1375	unsigned PrevAddRecCost = C.AddRecCost;
1376	unsigned PrevNumRegs = C.NumRegs;
1377	unsigned PrevNumBaseAdds = C.NumBaseAdds;
1378	if (const SCEV *ScaledReg = F.ScaledReg) {
1379	if (VisitedRegs.count(V: ScaledReg)) {
1380	Lose();
1381	return;
1382	}
1383	RatePrimaryRegister(F, Reg: ScaledReg, Regs, LoserRegs);
1384	if (isLoser())
1385	return;
1386	}
1387	for (const SCEV *BaseReg : F.BaseRegs) {
1388	if (VisitedRegs.count(V: BaseReg)) {
1389	Lose();
1390	return;
1391	}
1392	RatePrimaryRegister(F, Reg: BaseReg, Regs, LoserRegs);
1393	if (isLoser())
1394	return;
1395	}
1396
1397	// Determine how many (unfolded) adds we'll need inside the loop.
1398	size_t NumBaseParts = F.getNumRegs();
1399	if (NumBaseParts > `1`)
1400	// Do not count the base and a possible second register if the target
1401	// allows to fold 2 registers.
1402	C.NumBaseAdds +=
1403	NumBaseParts - (`1` + (F.Scale && isAMCompletelyFolded(TTI: *TTI, LU, F)));
1404	C.NumBaseAdds += (F.UnfoldedOffset != `0`);
1405
1406	// Accumulate non-free scaling amounts.
1407	C.ScaleCost += getScalingFactorCost(TTI: TTI, LU, F, L: *L).getValue();
1408
1409	// Tally up the non-zero immediates.
1410	for (const LSRFixup &Fixup : LU.Fixups) {
1411	int64_t O = Fixup.Offset;
1412	int64_t Offset = (uint64_t)O + F.BaseOffset;
1413	if (F.BaseGV)
1414	C.ImmCost += `64`; // Handle symbolic values conservatively.
1415	// TODO: This should probably be the pointer size.
1416	else if (Offset != `0`)
1417	C.ImmCost += APInt (`64`, Offset, true).getSignificantBits();
1418
1419	// Check with target if this offset with this instruction is
1420	// specifically not supported.
1421	if (LU.Kind == LSRUse::Address && Offset != `0` &&
1422	!isAMCompletelyFolded(TTI: *TTI, Kind: LSRUse::Address, AccessTy: LU.AccessTy, BaseGV: F.BaseGV,
1423	BaseOffset: Offset, HasBaseReg: F.HasBaseReg, Scale: F.Scale, Fixup: Fixup.UserInst))
1424	C.NumBaseAdds++;
1425	}
1426
1427	// If we don't count instruction cost exit here.
1428	if (!InsnsCost) {
1429	assert(isValid() && "invalid cost");
1430	return;
1431	}
1432
1433	// Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as
1434	// additional instruction (at least fill).
1435	// TODO: Need distinguish register class?
1436	unsigned TTIRegNum = TTI->getNumberOfRegisters(
1437	ClassID: TTI->getRegisterClassForType(Vector: false, Ty: F.getType())) - `1`;
1438	if (C.NumRegs > TTIRegNum) {
1439	// Cost already exceeded TTIRegNum, then only newly added register can add
1440	// new instructions.
1441	if (PrevNumRegs > TTIRegNum)
1442	C.Insns += (C.NumRegs - PrevNumRegs);
1443	else
1444	C.Insns += (C.NumRegs - TTIRegNum);
1445	}
1446
1447	// If ICmpZero formula ends with not 0, it could not be replaced by
1448	// just add or sub. We'll need to compare final result of AddRec.
1449	// That means we'll need an additional instruction. But if the target can
1450	// macro-fuse a compare with a branch, don't count this extra instruction.
1451	// For -10 + {0, +, 1}:
1452	// i = i + 1;
1453	// cmp i, 10
1454	//
1455	// For {-10, +, 1}:
1456	// i = i + 1;
1457	if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() &&
1458	!TTI->canMacroFuseCmp())
1459	C.Insns++;
1460	// Each new AddRec adds 1 instruction to calculation.
1461	C.Insns += (C.AddRecCost - PrevAddRecCost);
1462
1463	// BaseAdds adds instructions for unfolded registers.
1464	if (LU.Kind != LSRUse::ICmpZero)
1465	C.Insns += C.NumBaseAdds - PrevNumBaseAdds;
1466	assert(isValid() && "invalid cost");
1467	}
1468
1469	/// Set this cost to a losing value.
1470	void Cost::Lose() {
1471	C.Insns = std::numeric_limits<unsigned>::max();
1472	C.NumRegs = std::numeric_limits<unsigned>::max();
1473	C.AddRecCost = std::numeric_limits<unsigned>::max();
1474	C.NumIVMuls = std::numeric_limits<unsigned>::max();
1475	C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1476	C.ImmCost = std::numeric_limits<unsigned>::max();
1477	C.SetupCost = std::numeric_limits<unsigned>::max();
1478	C.ScaleCost = std::numeric_limits<unsigned>::max();
1479	}
1480
1481	/// Choose the lower cost.
1482	bool Cost::isLess(const Cost &Other) const {
1483	if (InsnsCost.getNumOccurrences() > `0` && InsnsCost &&
1484	C.Insns != Other.C.Insns)
1485	return C.Insns < Other.C.Insns;
1486	return TTI->isLSRCostLess(C1: C, C2: Other.C);
1487	}
1488
1489	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1490	void Cost::print(raw_ostream &OS) const {
1491	if (InsnsCost)
1492	OS << C.Insns << " instruction" << (C.Insns == `1` ? " " : "s ");
1493	OS << C.NumRegs << " reg" << (C.NumRegs == `1` ? "" : "s");
1494	if (C.AddRecCost != `0`)
1495	OS << ", with addrec cost " << C.AddRecCost;
1496	if (C.NumIVMuls != `0`)
1497	OS << ", plus " << C.NumIVMuls << " IV mul"
1498	<< (C.NumIVMuls == `1` ? "" : "s");
1499	if (C.NumBaseAdds != `0`)
1500	OS << ", plus " << C.NumBaseAdds << " base add"
1501	<< (C.NumBaseAdds == `1` ? "" : "s");
1502	if (C.ScaleCost != `0`)
1503	OS << ", plus " << C.ScaleCost << " scale cost";
1504	if (C.ImmCost != `0`)
1505	OS << ", plus " << C.ImmCost << " imm cost";
1506	if (C.SetupCost != `0`)
1507	OS << ", plus " << C.SetupCost << " setup cost";
1508	}
1509
1510	LLVM_DUMP_METHOD void Cost::dump() const {
1511	print(OS&: errs()); errs() << `'\n'`;
1512	}
1513	#endif
1514
1515	/// Test whether this fixup always uses its value outside of the given loop.
1516	bool LSRFixup::isUseFullyOutsideLoop(const Loop L) const* {
1517	// PHI nodes use their value in their incoming blocks.
1518	if (const PHINode *PN = dyn_cast<PHINode>(Val: UserInst)) {
1519	for (unsigned i = `0`, e = PN->getNumIncomingValues(); i != e; ++i)
1520	if (PN->getIncomingValue(i) == OperandValToReplace &&
1521	L->contains(BB: PN->getIncomingBlock(i)))
1522	return false;
1523	return true;
1524	}
1525
1526	return !L->contains(Inst: UserInst);
1527	}
1528
1529	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1530	void LSRFixup::print(raw_ostream &OS) const {
1531	OS << "UserInst=";
1532	// Store is common and interesting enough to be worth special-casing.
1533	if (StoreInst *Store = dyn_cast<StoreInst>(Val: UserInst)) {
1534	OS << "store ";
1535	Store->getOperand(i_nocapture: `0`)->printAsOperand(O&: OS, /PrintType=/false);
1536	} else if (UserInst->getType()->isVoidTy())
1537	OS << UserInst->getOpcodeName();
1538	else
1539	UserInst->printAsOperand(O&: OS, /PrintType=/false);
1540
1541	OS << ", OperandValToReplace=";
1542	OperandValToReplace->printAsOperand(O&: OS, /PrintType=/false);
1543
1544	for (const Loop *PIL : PostIncLoops) {
1545	OS << ", PostIncLoop=";
1546	PIL->getHeader()->printAsOperand(O&: OS, /PrintType=/false);
1547	}
1548
1549	if (Offset != `0`)
1550	OS << ", Offset=" << Offset;
1551	}
1552
1553	LLVM_DUMP_METHOD void LSRFixup::dump() const {
1554	print(OS&: errs()); errs() << `'\n'`;
1555	}
1556	#endif
1557
1558	/// Test whether this use as a formula which has the same registers as the given
1559	/// formula.
1560	bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
1561	SmallVector<const SCEV *, `4`> Key = F.BaseRegs;
1562	if (F.ScaledReg) Key.push_back(Elt: F.ScaledReg);
1563	// Unstable sort by host order ok, because this is only used for uniquifying.
1564	llvm::sort(C&: Key);
1565	return Uniquifier.count(V: Key);
1566	}
1567
1568	/// The function returns a probability of selecting formula without Reg.
1569	float LSRUse::getNotSelectedProbability(const SCEV Reg) const* {
1570	unsigned FNum = `0`;
1571	for (const Formula &F : Formulae)
1572	if (F.referencesReg(S: Reg))
1573	FNum++;
1574	return ((float)(Formulae.size() - FNum)) / Formulae.size();
1575	}
1576
1577	/// If the given formula has not yet been inserted, add it to the list, and
1578	/// return true. Return false otherwise. The formula must be in canonical form.
1579	bool LSRUse::InsertFormula(const Formula &F, const Loop &L) {
1580	assert(F.isCanonical(L) && "Invalid canonical representation");
1581
1582	if (!Formulae.empty() && RigidFormula)
1583	return false;
1584
1585	SmallVector<const SCEV *, `4`> Key = F.BaseRegs;
1586	if (F.ScaledReg) Key.push_back(Elt: F.ScaledReg);
1587	// Unstable sort by host order ok, because this is only used for uniquifying.
1588	llvm::sort(C&: Key);
1589
1590	if (!Uniquifier.insert(V: Key).second)
1591	return false;
1592
1593	// Using a register to hold the value of 0 is not profitable.
1594	assert((!F.ScaledReg \|\| !F.ScaledReg->isZero()) &&
1595	"Zero allocated in a scaled register!");
1596	#ifndef NDEBUG
1597	for (const SCEV *BaseReg : F.BaseRegs)
1598	assert(!BaseReg->isZero() && "Zero allocated in a base register!");
1599	#endif
1600
1601	// Add the formula to the list.
1602	Formulae.push_back(Elt: F);
1603
1604	// Record registers now being used by this use.
1605	Regs.insert(I: F.BaseRegs.begin(), E: F.BaseRegs.end());
1606	if (F.ScaledReg)
1607	Regs.insert(Ptr: F.ScaledReg);
1608
1609	return true;
1610	}
1611
1612	/// Remove the given formula from this use's list.
1613	void LSRUse::DeleteFormula(Formula &F) {
1614	if (&F != &Formulae.back())
1615	std::swap(a&: F, b&: Formulae.back());
1616	Formulae.pop_back();
1617	}
1618
1619	/// Recompute the Regs field, and update RegUses.
1620	void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
1621	// Now that we've filtered out some formulae, recompute the Regs set.
1622	SmallPtrSet<const SCEV *, `4`> OldRegs = std::move(Regs);
1623	Regs.clear();
1624	for (const Formula &F : Formulae) {
1625	if (F.ScaledReg) Regs.insert(Ptr: F.ScaledReg);
1626	Regs.insert(I: F.BaseRegs.begin(), E: F.BaseRegs.end());
1627	}
1628
1629	// Update the RegTracker.
1630	for (const SCEV *S : OldRegs)
1631	if (!Regs.count(Ptr: S))
1632	RegUses.dropRegister(Reg: S, LUIdx);
1633	}
1634
1635	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1636	void LSRUse::print(raw_ostream &OS) const {
1637	OS << "LSR Use: Kind=";
1638	switch (Kind) {
1639	case Basic: OS << "Basic"; break;
1640	case Special: OS << "Special"; break;
1641	case ICmpZero: OS << "ICmpZero"; break;
1642	case Address:
1643	OS << "Address of ";
1644	if (AccessTy.MemTy->isPointerTy())
1645	OS << "pointer"; // the full pointer type could be really verbose
1646	else {
1647	OS << *AccessTy.MemTy;
1648	}
1649
1650	OS << " in addrspace(" << AccessTy.AddrSpace << `')'`;
1651	}
1652
1653	OS << ", Offsets={";
1654	bool NeedComma = false;
1655	for (const LSRFixup &Fixup : Fixups) {
1656	if (NeedComma) OS << `','`;
1657	OS << Fixup.Offset;
1658	NeedComma = true;
1659	}
1660	OS << `'}'`;
1661
1662	if (AllFixupsOutsideLoop)
1663	OS << ", all-fixups-outside-loop";
1664
1665	if (WidestFixupType)
1666	OS << ", widest fixup type: " << *WidestFixupType;
1667	}
1668
1669	LLVM_DUMP_METHOD void LSRUse::dump() const {
1670	print(OS&: errs()); errs() << `'\n'`;
1671	}
1672	#endif
1673
1674	static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1675	LSRUse::KindType Kind, MemAccessTy AccessTy,
1676	GlobalValue *BaseGV, int64_t BaseOffset,
1677	bool HasBaseReg, int64_t Scale,
1678	Instruction Fixup/= nullptr*/) {
1679	switch (Kind) {
1680	case LSRUse::Address:
1681	return TTI.isLegalAddressingMode(Ty: AccessTy.MemTy, BaseGV, BaseOffset,
1682	HasBaseReg, Scale, AddrSpace: AccessTy.AddrSpace, I: Fixup);
1683
1684	case LSRUse::ICmpZero:
1685	// There's not even a target hook for querying whether it would be legal to
1686	// fold a GV into an ICmp.
1687	if (BaseGV)
1688	return false;
1689
1690	// ICmp only has two operands; don't allow more than two non-trivial parts.
1691	if (Scale != `0` && HasBaseReg && BaseOffset != `0`)
1692	return false;
1693
1694	// ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
1695	// putting the scaled register in the other operand of the icmp.
1696	if (Scale != `0` && Scale != -`1`)
1697	return false;
1698
1699	// If we have low-level target information, ask the target if it can fold an
1700	// integer immediate on an icmp.
1701	if (BaseOffset != `0`) {
1702	// We have one of:
1703	// ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
1704	// ICmpZero -1ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset*
1705	// Offs is the ICmp immediate.
1706	if (Scale == `0`)
1707	// The cast does the right thing with
1708	// std::numeric_limits<int64_t>::min().
1709	BaseOffset = -(uint64_t)BaseOffset;
1710	return TTI.isLegalICmpImmediate(Imm: BaseOffset);
1711	}
1712
1713	// ICmpZero BaseReg + -1ScaleReg => ICmp BaseReg, ScaleReg*
1714	return true;
1715
1716	case LSRUse::Basic:
1717	// Only handle single-register values.
1718	return !BaseGV && Scale == `0` && BaseOffset == `0`;
1719
1720	case LSRUse::Special:
1721	// Special case Basic to handle -1 scales.
1722	return !BaseGV && (Scale == `0` \|\| Scale == -`1`) && BaseOffset == `0`;
1723	}
1724
1725	llvm_unreachable("Invalid LSRUse Kind!");
1726	}
1727
1728	static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1729	int64_t MinOffset, int64_t MaxOffset,
1730	LSRUse::KindType Kind, MemAccessTy AccessTy,
1731	GlobalValue *BaseGV, int64_t BaseOffset,
1732	bool HasBaseReg, int64_t Scale) {
1733	// Check for overflow.
1734	if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
1735	(MinOffset > `0`))
1736	return false;
1737	MinOffset = (uint64_t)BaseOffset + MinOffset;
1738	if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
1739	(MaxOffset > `0`))
1740	return false;
1741	MaxOffset = (uint64_t)BaseOffset + MaxOffset;
1742
1743	return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset: MinOffset,
1744	HasBaseReg, Scale) &&
1745	isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset: MaxOffset,
1746	HasBaseReg, Scale);
1747	}
1748
1749	static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1750	int64_t MinOffset, int64_t MaxOffset,
1751	LSRUse::KindType Kind, MemAccessTy AccessTy,
1752	const Formula &F, const Loop &L) {
1753	// For the purpose of isAMCompletelyFolded either having a canonical formula
1754	// or a scale not equal to zero is correct.
1755	// Problems may arise from non canonical formulae having a scale == 0.
1756	// Strictly speaking it would best to just rely on canonical formulae.
1757	// However, when we generate the scaled formulae, we first check that the
1758	// scaling factor is profitable before computing the actual ScaledReg for
1759	// compile time sake.
1760	assert((F.isCanonical(L) \|\| F.Scale != `0`));
1761	return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
1762	BaseGV: F.BaseGV, BaseOffset: F.BaseOffset, HasBaseReg: F.HasBaseReg, Scale: F.Scale);
1763	}
1764
1765	/// Test whether we know how to expand the current formula.
1766	static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
1767	int64_t MaxOffset, LSRUse::KindType Kind,
1768	MemAccessTy AccessTy, GlobalValue *BaseGV,
1769	int64_t BaseOffset, bool HasBaseReg, int64_t Scale) {
1770	// We know how to expand completely foldable formulae.
1771	return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
1772	BaseOffset, HasBaseReg, Scale) \|\|
1773	// Or formulae that use a base register produced by a sum of base
1774	// registers.
1775	(Scale == `1` &&
1776	isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
1777	BaseGV, BaseOffset, HasBaseReg: true, Scale: `0`));
1778	}
1779
1780	static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
1781	int64_t MaxOffset, LSRUse::KindType Kind,
1782	MemAccessTy AccessTy, const Formula &F) {
1783	return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV: F.BaseGV,
1784	BaseOffset: F.BaseOffset, HasBaseReg: F.HasBaseReg, Scale: F.Scale);
1785	}
1786
1787	static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1788	const LSRUse &LU, const Formula &F) {
1789	// Target may want to look at the user instructions.
1790	if (LU.Kind == LSRUse::Address && TTI.LSRWithInstrQueries()) {
1791	for (const LSRFixup &Fixup : LU.Fixups)
1792	if (!isAMCompletelyFolded(TTI, Kind: LSRUse::Address, AccessTy: LU.AccessTy, BaseGV: F.BaseGV,
1793	BaseOffset: (F.BaseOffset + Fixup.Offset), HasBaseReg: F.HasBaseReg,
1794	Scale: F.Scale, Fixup: Fixup.UserInst))
1795	return false;
1796	return true;
1797	}
1798
1799	return isAMCompletelyFolded(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind,
1800	AccessTy: LU.AccessTy, BaseGV: F.BaseGV, BaseOffset: F.BaseOffset, HasBaseReg: F.HasBaseReg,
1801	Scale: F.Scale);
1802	}
1803
1804	static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI,
1805	const LSRUse &LU, const Formula &F,
1806	const Loop &L) {
1807	if (!F.Scale)
1808	return `0`;
1809
1810	// If the use is not completely folded in that instruction, we will have to
1811	// pay an extra cost only for scale != 1.
1812	if (!isAMCompletelyFolded(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind,
1813	AccessTy: LU.AccessTy, F, L))
1814	return F.Scale != `1`;
1815
1816	switch (LU.Kind) {
1817	case LSRUse::Address: {
1818	// Check the scaling factor cost with both the min and max offsets.
1819	InstructionCost ScaleCostMinOffset = TTI.getScalingFactorCost(
1820	Ty: LU.AccessTy.MemTy, BaseGV: F.BaseGV, BaseOffset: F.BaseOffset + LU.MinOffset, HasBaseReg: F.HasBaseReg,
1821	Scale: F.Scale, AddrSpace: LU.AccessTy.AddrSpace);
1822	InstructionCost ScaleCostMaxOffset = TTI.getScalingFactorCost(
1823	Ty: LU.AccessTy.MemTy, BaseGV: F.BaseGV, BaseOffset: F.BaseOffset + LU.MaxOffset, HasBaseReg: F.HasBaseReg,
1824	Scale: F.Scale, AddrSpace: LU.AccessTy.AddrSpace);
1825
1826	assert(ScaleCostMinOffset.isValid() && ScaleCostMaxOffset.isValid() &&
1827	"Legal addressing mode has an illegal cost!");
1828	return std::max(a: ScaleCostMinOffset, b: ScaleCostMaxOffset);
1829	}
1830	case LSRUse::ICmpZero:
1831	case LSRUse::Basic:
1832	case LSRUse::Special:
1833	// The use is completely folded, i.e., everything is folded into the
1834	// instruction.
1835	return `0`;
1836	}
1837
1838	llvm_unreachable("Invalid LSRUse Kind!");
1839	}
1840
1841	static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
1842	LSRUse::KindType Kind, MemAccessTy AccessTy,
1843	GlobalValue *BaseGV, int64_t BaseOffset,
1844	bool HasBaseReg) {
1845	// Fast-path: zero is always foldable.
1846	if (BaseOffset == `0` && !BaseGV) return true;
1847
1848	// Conservatively, create an address with an immediate and a
1849	// base and a scale.
1850	int64_t Scale = Kind == LSRUse::ICmpZero ? -`1` : `1`;
1851
1852	// Canonicalize a scale of 1 to a base register if the formula doesn't
1853	// already have a base register.
1854	if (!HasBaseReg && Scale == `1`) {
1855	Scale = `0`;
1856	HasBaseReg = true;
1857	}
1858
1859	return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,
1860	HasBaseReg, Scale);
1861	}
1862
1863	static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
1864	ScalarEvolution &SE, int64_t MinOffset,
1865	int64_t MaxOffset, LSRUse::KindType Kind,
1866	MemAccessTy AccessTy, const SCEV *S,
1867	bool HasBaseReg) {
1868	// Fast-path: zero is always foldable.
1869	if (S->isZero()) return true;
1870
1871	// Conservatively, create an address with an immediate and a
1872	// base and a scale.
1873	int64_t BaseOffset = ExtractImmediate(S, SE);
1874	GlobalValue *BaseGV = ExtractSymbol(S, SE);
1875
1876	// If there's anything else involved, it's not foldable.
1877	if (!S->isZero()) return false;
1878
1879	// Fast-path: zero is always foldable.
1880	if (BaseOffset == `0` && !BaseGV) return true;
1881
1882	// Conservatively, create an address with an immediate and a
1883	// base and a scale.
1884	int64_t Scale = Kind == LSRUse::ICmpZero ? -`1` : `1`;
1885
1886	return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
1887	BaseOffset, HasBaseReg, Scale);
1888	}
1889
1890	namespace {
1891
1892	/// An individual increment in a Chain of IV increments. Relate an IV user to
1893	/// an expression that computes the IV it uses from the IV used by the previous
1894	/// link in the Chain.
1895	///
1896	/// For the head of a chain, IncExpr holds the absolute SCEV expression for the
1897	/// original IVOperand. The head of the chain's IVOperand is only valid during
1898	/// chain collection, before LSR replaces IV users. During chain generation,
1899	/// IncExpr can be used to find the new IVOperand that computes the same
1900	/// expression.
1901	struct IVInc {
1902	Instruction *UserInst;
1903	Value* IVOperand;
1904	const SCEV *IncExpr;
1905
1906	IVInc(Instruction U, Value O, const SCEV *E)
1907	: UserInst(U), IVOperand(O), IncExpr(E) {}
1908	};
1909
1910	// The list of IV increments in program order. We typically add the head of a
1911	// chain without finding subsequent links.
1912	struct IVChain {
1913	SmallVector<IVInc, `1`> Incs;
1914	const SCEV ExprBase = nullptr*;
1915
1916	IVChain() = default;
1917	IVChain(const IVInc &Head, const SCEV *Base)
1918	: Incs (`1`, Head), ExprBase(Base) {}
1919
1920	using const_iterator = SmallVectorImpl<IVInc>::const_iterator;
1921
1922	// Return the first increment in the chain.
1923	const_iterator begin() const {
1924	assert(!Incs.empty());
1925	return std::next(x: Incs.begin());
1926	}
1927	const_iterator end() const {
1928	return Incs.end();
1929	}
1930
1931	// Returns true if this chain contains any increments.
1932	bool hasIncs() const { return Incs.size() >= `2`; }
1933
1934	// Add an IVInc to the end of this chain.
1935	void add(const IVInc &X) { Incs.push_back(Elt: X); }
1936
1937	// Returns the last UserInst in the chain.
1938	Instruction tailUserInst() const* { return Incs.back().UserInst; }
1939
1940	// Returns true if IncExpr can be profitably added to this chain.
1941	bool isProfitableIncrement(const SCEV *OperExpr,
1942	const SCEV *IncExpr,
1943	ScalarEvolution&);
1944	};
1945
1946	/// Helper for CollectChains to track multiple IV increment uses. Distinguish
1947	/// between FarUsers that definitely cross IV increments and NearUsers that may
1948	/// be used between IV increments.
1949	struct ChainUsers {
1950	SmallPtrSet<Instruction*, `4`> FarUsers;
1951	SmallPtrSet<Instruction*, `4`> NearUsers;
1952	};
1953
1954	/// This class holds state for the main loop strength reduction logic.
1955	class LSRInstance {
1956	IVUsers &IU;
1957	ScalarEvolution &SE;
1958	DominatorTree &DT;
1959	LoopInfo &LI;
1960	AssumptionCache &AC;
1961	TargetLibraryInfo &TLI;
1962	const TargetTransformInfo &TTI;
1963	Loop *const L;
1964	MemorySSAUpdater *MSSAU;
1965	TTI::AddressingModeKind AMK;
1966	mutable SCEVExpander Rewriter;
1967	bool Changed = false;
1968
1969	/// This is the insert position that the current loop's induction variable
1970	/// increment should be placed. In simple loops, this is the latch block's
1971	/// terminator. But in more complicated cases, this is a position which will
1972	/// dominate all the in-loop post-increment users.
1973	Instruction IVIncInsertPos = nullptr*;
1974
1975	/// Interesting factors between use strides.
1976	///
1977	/// We explicitly use a SetVector which contains a SmallSet, instead of the
1978	/// default, a SmallDenseSet, because we need to use the full range of
1979	/// int64_ts, and there's currently no good way of doing that with
1980	/// SmallDenseSet.
1981	SetVector<int64_t, SmallVector<int64_t, `8`>, SmallSet<int64_t, `8`>> Factors;
1982
1983	/// The cost of the current SCEV, the best solution by LSR will be dropped if
1984	/// the solution is not profitable.
1985	Cost BaselineCost;
1986
1987	/// Interesting use types, to facilitate truncation reuse.
1988	SmallSetVector<Type *, `4`> Types;
1989
1990	/// The list of interesting uses.
1991	mutable SmallVector<LSRUse, `16`> Uses;
1992
1993	/// Track which uses use which register candidates.
1994	RegUseTracker RegUses;
1995
1996	// Limit the number of chains to avoid quadratic behavior. We don't expect to
1997	// have more than a few IV increment chains in a loop. Missing a Chain falls
1998	// back to normal LSR behavior for those uses.
1999	static const unsigned MaxChains = `8`;
2000
2001	/// IV users can form a chain of IV increments.
2002	SmallVector<IVChain, MaxChains> IVChainVec;
2003
2004	/// IV users that belong to profitable IVChains.
2005	SmallPtrSet<Use*, MaxChains> IVIncSet;
2006
2007	/// Induction variables that were generated and inserted by the SCEV Expander.
2008	SmallVector<llvm::WeakVH, `2`> ScalarEvolutionIVs;
2009
2010	void OptimizeShadowIV();
2011	bool FindIVUserForCond(ICmpInst Cond, IVStrideUse &CondUse);
2012	ICmpInst OptimizeMax(ICmpInst Cond, IVStrideUse* &CondUse);
2013	void OptimizeLoopTermCond();
2014
2015	void ChainInstruction(Instruction UserInst, Instruction IVOper,
2016	SmallVectorImpl<ChainUsers> &ChainUsersVec);
2017	void FinalizeChain(IVChain &Chain);
2018	void CollectChains();
2019	void GenerateIVChain(const IVChain &Chain,
2020	SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2021
2022	void CollectInterestingTypesAndFactors();
2023	void CollectFixupsAndInitialFormulae();
2024
2025	// Support for sharing of LSRUses between LSRFixups.
2026	using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;
2027	UseMapTy UseMap;
2028
2029	bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
2030	LSRUse::KindType Kind, MemAccessTy AccessTy);
2031
2032	std::pair<size_t, int64_t> getUse(const SCEV *&Expr, LSRUse::KindType Kind,
2033	MemAccessTy AccessTy);
2034
2035	void DeleteUse(LSRUse &LU, size_t LUIdx);
2036
2037	LSRUse FindUseWithSimilarFormula(const* Formula &F, const LSRUse &OrigLU);
2038
2039	void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
2040	void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
2041	void CountRegisters(const Formula &F, size_t LUIdx);
2042	bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
2043
2044	void CollectLoopInvariantFixupsAndFormulae();
2045
2046	void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
2047	unsigned Depth = `0`);
2048
2049	void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
2050	const Formula &Base, unsigned Depth,
2051	size_t Idx, bool IsScaledReg = false);
2052	void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
2053	void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
2054	const Formula &Base, size_t Idx,
2055	bool IsScaledReg = false);
2056	void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
2057	void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,
2058	const Formula &Base,
2059	const SmallVectorImpl<int64_t> &Worklist,
2060	size_t Idx, bool IsScaledReg = false);
2061	void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
2062	void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
2063	void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
2064	void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
2065	void GenerateCrossUseConstantOffsets();
2066	void GenerateAllReuseFormulae();
2067
2068	void FilterOutUndesirableDedicatedRegisters();
2069
2070	size_t EstimateSearchSpaceComplexity() const;
2071	void NarrowSearchSpaceByDetectingSupersets();
2072	void NarrowSearchSpaceByCollapsingUnrolledCode();
2073	void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2074	void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2075	void NarrowSearchSpaceByFilterPostInc();
2076	void NarrowSearchSpaceByDeletingCostlyFormulas();
2077	void NarrowSearchSpaceByPickingWinnerRegs();
2078	void NarrowSearchSpaceUsingHeuristics();
2079
2080	void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
2081	Cost &SolutionCost,
2082	SmallVectorImpl<const Formula *> &Workspace,
2083	const Cost &CurCost,
2084	const SmallPtrSet<const SCEV *, `16`> &CurRegs,
2085	DenseSet<const SCEV > &VisitedRegs) const*;
2086	void Solve(SmallVectorImpl<const Formula > &Solution) const*;
2087
2088	BasicBlock::iterator
2089	HoistInsertPosition(BasicBlock::iterator IP,
2090	const SmallVectorImpl<Instruction > &Inputs) const*;
2091	BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP,
2092	const LSRFixup &LF,
2093	const LSRUse &LU) const;
2094
2095	Value Expand(const* LSRUse &LU, const LSRFixup &LF, const Formula &F,
2096	BasicBlock::iterator IP,
2097	SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
2098	void RewriteForPHI(PHINode PN, const* LSRUse &LU, const LSRFixup &LF,
2099	const Formula &F,
2100	SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
2101	void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
2102	SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
2103	void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
2104
2105	public:
2106	LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
2107	LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC,
2108	TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU);
2109
2110	bool getChanged() const { return Changed; }
2111	const SmallVectorImpl<WeakVH> &getScalarEvolutionIVs() const {
2112	return ScalarEvolutionIVs;
2113	}
2114
2115	void print_factors_and_types(raw_ostream &OS) const;
2116	void print_fixups(raw_ostream &OS) const;
2117	void print_uses(raw_ostream &OS) const;
2118	void print(raw_ostream &OS) const;
2119	void dump() const;
2120	};
2121
2122	} // end anonymous namespace
2123
2124	/// If IV is used in a int-to-float cast inside the loop then try to eliminate
2125	/// the cast operation.
2126	void LSRInstance::OptimizeShadowIV() {
2127	const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
2128	if (isa<SCEVCouldNotCompute>(Val: BackedgeTakenCount))
2129	return;
2130
2131	for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
2132	UI != E; / empty /) {
2133	IVUsers::const_iterator CandidateUI = UI;
2134	++UI;
2135	Instruction *ShadowUse = CandidateUI ->getUser();
2136	Type DestTy = nullptr*;
2137	bool IsSigned = false;
2138
2139	/ If shadow use is a int->float cast then insert a second IV*
2140	to eliminate this cast.
2141
2142	for (unsigned i = 0; i < n; ++i)
2143	foo((double)i);
2144
2145	is transformed into
2146
2147	double d = 0.0;
2148	for (unsigned i = 0; i < n; ++i, ++d)
2149	foo(d);
2150	*/
2151	if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(Val: CandidateUI ->getUser())) {
2152	IsSigned = false;
2153	DestTy = UCast->getDestTy();
2154	}
2155	else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(Val: CandidateUI ->getUser())) {
2156	IsSigned = true;
2157	DestTy = SCast->getDestTy();
2158	}
2159	if (!DestTy) continue;
2160
2161	// If target does not support DestTy natively then do not apply
2162	// this transformation.
2163	if (!TTI.isTypeLegal(Ty: DestTy)) continue;
2164
2165	PHINode *PH = dyn_cast<PHINode>(Val: ShadowUse->getOperand(i: `0`));
2166	if (!PH) continue;
2167	if (PH->getNumIncomingValues() != `2`) continue;
2168
2169	// If the calculation in integers overflows, the result in FP type will
2170	// differ. So we only can do this transformation if we are guaranteed to not
2171	// deal with overflowing values
2172	const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: SE.getSCEV(V: PH));
2173	if (!AR) continue;
2174	if (IsSigned && !AR->hasNoSignedWrap()) continue;
2175	if (!IsSigned && !AR->hasNoUnsignedWrap()) continue;
2176
2177	Type *SrcTy = PH->getType();
2178	int Mantissa = DestTy->getFPMantissaWidth();
2179	if (Mantissa == -`1`) continue;
2180	if ((int)SE.getTypeSizeInBits(Ty: SrcTy) > Mantissa)
2181	continue;
2182
2183	unsigned Entry, Latch;
2184	if (PH->getIncomingBlock(i: `0`) == L->getLoopPreheader()) {
2185	Entry = `0`;
2186	Latch = `1`;
2187	} else {
2188	Entry = `1`;
2189	Latch = `0`;
2190	}
2191
2192	ConstantInt *Init = dyn_cast<ConstantInt>(Val: PH->getIncomingValue(i: Entry));
2193	if (!Init) continue;
2194	Constant *NewInit = ConstantFP::get(Ty: DestTy, V: IsSigned ?
2195	(double)Init->getSExtValue() :
2196	(double)Init->getZExtValue());
2197
2198	BinaryOperator *Incr =
2199	dyn_cast<BinaryOperator>(Val: PH->getIncomingValue(i: Latch));
2200	if (!Incr) continue;
2201	if (Incr->getOpcode() != Instruction::Add
2202	&& Incr->getOpcode() != Instruction::Sub)
2203	continue;
2204
2205	/ Initialize new IV, double d = 0.0 in above example. /
2206	ConstantInt C = nullptr*;
2207	if (Incr->getOperand(i_nocapture: `0`) == PH)
2208	C = dyn_cast<ConstantInt>(Val: Incr->getOperand(i_nocapture: `1`));
2209	else if (Incr->getOperand(i_nocapture: `1`) == PH)
2210	C = dyn_cast<ConstantInt>(Val: Incr->getOperand(i_nocapture: `0`));
2211	else
2212	continue;
2213
2214	if (!C) continue;
2215
2216	// Ignore negative constants, as the code below doesn't handle them
2217	// correctly. TODO: Remove this restriction.
2218	if (!C->getValue().isStrictlyPositive())
2219	continue;
2220
2221	/ Add new PHINode. /
2222	PHINode *NewPH = PHINode::Create(Ty: DestTy, NumReservedValues: `2`, NameStr: "IV.S.", InsertBefore: PH->getIterator());
2223
2224	/ create new increment. '++d' in above example. /
2225	Constant *CFP = ConstantFP::get(Ty: DestTy, V: C->getZExtValue());
2226	BinaryOperator *NewIncr = BinaryOperator::Create(
2227	Op: Incr->getOpcode() == Instruction::Add ? Instruction::FAdd
2228	: Instruction::FSub,
2229	S1: NewPH, S2: CFP, Name: "IV.S.next.", InsertBefore: Incr->getIterator());
2230
2231	NewPH->addIncoming(V: NewInit, BB: PH->getIncomingBlock(i: Entry));
2232	NewPH->addIncoming(V: NewIncr, BB: PH->getIncomingBlock(i: Latch));
2233
2234	/ Remove cast operation /
2235	ShadowUse->replaceAllUsesWith(V: NewPH);
2236	ShadowUse->eraseFromParent();
2237	Changed = true;
2238	break;
2239	}
2240	}
2241
2242	/// If Cond has an operand that is an expression of an IV, set the IV user and
2243	/// stride information and return true, otherwise return false.
2244	bool LSRInstance::FindIVUserForCond(ICmpInst Cond, IVStrideUse &CondUse) {
2245	for (IVStrideUse &U : IU)
2246	if (U.getUser() == Cond) {
2247	// NOTE: we could handle setcc instructions with multiple uses here, but
2248	// InstCombine does it as well for simple uses, it's not clear that it
2249	// occurs enough in real life to handle.
2250	CondUse = &U;
2251	return true;
2252	}
2253	return false;
2254	}
2255
2256	/// Rewrite the loop's terminating condition if it uses a max computation.
2257	///
2258	/// This is a narrow solution to a specific, but acute, problem. For loops
2259	/// like this:
2260	///
2261	/// i = 0;
2262	/// do {
2263	/// p[i] = 0.0;
2264	/// } while (++i < n);
2265	///
2266	/// the trip count isn't just 'n', because 'n' might not be positive. And
2267	/// unfortunately this can come up even for loops where the user didn't use
2268	/// a C do-while loop. For example, seemingly well-behaved top-test loops
2269	/// will commonly be lowered like this:
2270	///
2271	/// if (n > 0) {
2272	/// i = 0;
2273	/// do {
2274	/// p[i] = 0.0;
2275	/// } while (++i < n);
2276	/// }
2277	///
2278	/// and then it's possible for subsequent optimization to obscure the if
2279	/// test in such a way that indvars can't find it.
2280	///
2281	/// When indvars can't find the if test in loops like this, it creates a
2282	/// max expression, which allows it to give the loop a canonical
2283	/// induction variable:
2284	///
2285	/// i = 0;
2286	/// max = n < 1 ? 1 : n;
2287	/// do {
2288	/// p[i] = 0.0;
2289	/// } while (++i != max);
2290	///
2291	/// Canonical induction variables are necessary because the loop passes
2292	/// are designed around them. The most obvious example of this is the
2293	/// LoopInfo analysis, which doesn't remember trip count values. It
2294	/// expects to be able to rediscover the trip count each time it is
2295	/// needed, and it does this using a simple analysis that only succeeds if
2296	/// the loop has a canonical induction variable.
2297	///
2298	/// However, when it comes time to generate code, the maximum operation
2299	/// can be quite costly, especially if it's inside of an outer loop.
2300	///
2301	/// This function solves this problem by detecting this type of loop and
2302	/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
2303	/// the instructions for the maximum computation.
2304	ICmpInst LSRInstance::OptimizeMax(ICmpInst Cond, IVStrideUse* &CondUse) {
2305	// Check that the loop matches the pattern we're looking for.
2306	if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
2307	Cond->getPredicate() != CmpInst::ICMP_NE)
2308	return Cond;
2309
2310	SelectInst *Sel = dyn_cast<SelectInst>(Val: Cond->getOperand(i_nocapture: `1`));
2311	if (!Sel \|\| !Sel->hasOneUse()) return Cond;
2312
2313	const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
2314	if (isa<SCEVCouldNotCompute>(Val: BackedgeTakenCount))
2315	return Cond;
2316	const SCEV *One = SE.getConstant(Ty: BackedgeTakenCount->getType(), V: `1`);
2317
2318	// Add one to the backedge-taken count to get the trip count.
2319	const SCEV *IterationCount = SE.getAddExpr(LHS: One, RHS: BackedgeTakenCount);
2320	if (IterationCount != SE.getSCEV(V: Sel)) return Cond;
2321
2322	// Check for a max calculation that matches the pattern. There's no check
2323	// for ICMP_ULE here because the comparison would be with zero, which
2324	// isn't interesting.
2325	CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
2326	const SCEVNAryExpr Max = nullptr*;
2327	if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(Val: BackedgeTakenCount)) {
2328	Pred = ICmpInst::ICMP_SLE;
2329	Max = S;
2330	} else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(Val: IterationCount)) {
2331	Pred = ICmpInst::ICMP_SLT;
2332	Max = S;
2333	} else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(Val: IterationCount)) {
2334	Pred = ICmpInst::ICMP_ULT;
2335	Max = U;
2336	} else {
2337	// No match; bail.
2338	return Cond;
2339	}
2340
2341	// To handle a max with more than two operands, this optimization would
2342	// require additional checking and setup.
2343	if (Max->getNumOperands() != `2`)
2344	return Cond;
2345
2346	const SCEV *MaxLHS = Max->getOperand(i: `0`);
2347	const SCEV *MaxRHS = Max->getOperand(i: `1`);
2348
2349	// ScalarEvolution canonicalizes constants to the left. For < and >, look
2350	// for a comparison with 1. For <= and >=, a comparison with zero.
2351	if (!MaxLHS \|\|
2352	(ICmpInst::isTrueWhenEqual(predicate: Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
2353	return Cond;
2354
2355	// Check the relevant induction variable for conformance to
2356	// the pattern.
2357	const SCEV *IV = SE.getSCEV(V: Cond->getOperand(i_nocapture: `0`));
2358	const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: IV);
2359	if (!AR \|\| !AR->isAffine() \|\|
2360	AR->getStart() != One \|\|
2361	AR->getStepRecurrence(SE) != One)
2362	return Cond;
2363
2364	assert(AR->getLoop() == L &&
2365	"Loop condition operand is an addrec in a different loop!");
2366
2367	// Check the right operand of the select, and remember it, as it will
2368	// be used in the new comparison instruction.
2369	Value NewRHS = nullptr*;
2370	if (ICmpInst::isTrueWhenEqual(predicate: Pred)) {
2371	// Look for n+1, and grab n.
2372	if (AddOperator *BO = dyn_cast<AddOperator>(Val: Sel->getOperand(i_nocapture: `1`)))
2373	if (ConstantInt *BO1 = dyn_cast<ConstantInt>(Val: BO->getOperand(i_nocapture: `1`)))
2374	if (BO1->isOne() && SE.getSCEV(V: BO->getOperand(i_nocapture: `0`)) == MaxRHS)
2375	NewRHS = BO->getOperand(i_nocapture: `0`);
2376	if (AddOperator *BO = dyn_cast<AddOperator>(Val: Sel->getOperand(i_nocapture: `2`)))
2377	if (ConstantInt *BO1 = dyn_cast<ConstantInt>(Val: BO->getOperand(i_nocapture: `1`)))
2378	if (BO1->isOne() && SE.getSCEV(V: BO->getOperand(i_nocapture: `0`)) == MaxRHS)
2379	NewRHS = BO->getOperand(i_nocapture: `0`);
2380	if (!NewRHS)
2381	return Cond;
2382	} else if (SE.getSCEV(V: Sel->getOperand(i_nocapture: `1`)) == MaxRHS)
2383	NewRHS = Sel->getOperand(i_nocapture: `1`);
2384	else if (SE.getSCEV(V: Sel->getOperand(i_nocapture: `2`)) == MaxRHS)
2385	NewRHS = Sel->getOperand(i_nocapture: `2`);
2386	else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(Val: MaxRHS))
2387	NewRHS = SU->getValue();
2388	else
2389	// Max doesn't match expected pattern.
2390	return Cond;
2391
2392	// Determine the new comparison opcode. It may be signed or unsigned,
2393	// and the original comparison may be either equality or inequality.
2394	if (Cond->getPredicate() == CmpInst::ICMP_EQ)
2395	Pred = CmpInst::getInversePredicate(pred: Pred);
2396
2397	// Ok, everything looks ok to change the condition into an SLT or SGE and
2398	// delete the max calculation.
2399	ICmpInst NewCond = new* ICmpInst (Cond->getIterator(), Pred,
2400	Cond->getOperand(i_nocapture: `0`), NewRHS, "scmp");
2401
2402	// Delete the max calculation instructions.
2403	NewCond->setDebugLoc(Cond->getDebugLoc());
2404	Cond->replaceAllUsesWith(V: NewCond);
2405	CondUse->setUser(NewCond);
2406	Instruction *Cmp = cast<Instruction>(Val: Sel->getOperand(i_nocapture: `0`));
2407	Cond->eraseFromParent();
2408	Sel->eraseFromParent();
2409	if (Cmp->use_empty())
2410	Cmp->eraseFromParent();
2411	return NewCond;
2412	}
2413
2414	/// Change loop terminating condition to use the postinc iv when possible.
2415	void
2416	LSRInstance::OptimizeLoopTermCond() {
2417	SmallPtrSet<Instruction *, `4`> PostIncs;
2418
2419	// We need a different set of heuristics for rotated and non-rotated loops.
2420	// If a loop is rotated then the latch is also the backedge, so inserting
2421	// post-inc expressions just before the latch is ideal. To reduce live ranges
2422	// it also makes sense to rewrite terminating conditions to use post-inc
2423	// expressions.
2424	//
2425	// If the loop is not rotated then the latch is not a backedge; the latch
2426	// check is done in the loop head. Adding post-inc expressions before the
2427	// latch will cause overlapping live-ranges of pre-inc and post-inc expressions
2428	// in the loop body. In this case we do not* want to use post-inc expressions*
2429	// in the latch check, and we want to insert post-inc expressions before
2430	// the backedge.
2431	BasicBlock *LatchBlock = L->getLoopLatch();
2432	SmallVector<BasicBlock*, `8`> ExitingBlocks;
2433	L->getExitingBlocks(ExitingBlocks);
2434	if (!llvm::is_contained(Range&: ExitingBlocks, Element: LatchBlock)) {
2435	// The backedge doesn't exit the loop; treat this as a head-tested loop.
2436	IVIncInsertPos = LatchBlock->getTerminator();
2437	return;
2438	}
2439
2440	// Otherwise treat this as a rotated loop.
2441	for (BasicBlock *ExitingBlock : ExitingBlocks) {
2442	// Get the terminating condition for the loop if possible. If we
2443	// can, we want to change it to use a post-incremented version of its
2444	// induction variable, to allow coalescing the live ranges for the IV into
2445	// one register value.
2446
2447	BranchInst *TermBr = dyn_cast<BranchInst>(Val: ExitingBlock->getTerminator());
2448	if (!TermBr)
2449	continue;
2450	// FIXME: Overly conservative, termination condition could be an 'or' etc..
2451	if (TermBr->isUnconditional() \|\| !isa<ICmpInst>(Val: TermBr->getCondition()))
2452	continue;
2453
2454	// Search IVUsesByStride to find Cond's IVUse if there is one.
2455	IVStrideUse CondUse = nullptr*;
2456	ICmpInst *Cond = cast<ICmpInst>(Val: TermBr->getCondition());
2457	if (!FindIVUserForCond(Cond, CondUse))
2458	continue;
2459
2460	// If the trip count is computed in terms of a max (due to ScalarEvolution
2461	// being unable to find a sufficient guard, for example), change the loop
2462	// comparison to use SLT or ULT instead of NE.
2463	// One consequence of doing this now is that it disrupts the count-down
2464	// optimization. That's not always a bad thing though, because in such
2465	// cases it may still be worthwhile to avoid a max.
2466	Cond = OptimizeMax(Cond, CondUse);
2467
2468	// If this exiting block dominates the latch block, it may also use
2469	// the post-inc value if it won't be shared with other uses.
2470	// Check for dominance.
2471	if (!DT.dominates(A: ExitingBlock, B: LatchBlock))
2472	continue;
2473
2474	// Conservatively avoid trying to use the post-inc value in non-latch
2475	// exits if there may be pre-inc users in intervening blocks.
2476	if (LatchBlock != ExitingBlock)
2477	for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
2478	// Test if the use is reachable from the exiting block. This dominator
2479	// query is a conservative approximation of reachability.
2480	if (&*UI != CondUse &&
2481	!DT.properlyDominates(A: UI ->getUser()->getParent(), B: ExitingBlock)) {
2482	// Conservatively assume there may be reuse if the quotient of their
2483	// strides could be a legal scale.
2484	const SCEV A = IU.getStride(IU: CondUse, L);
2485	const SCEV B = IU.getStride(IU: UI, L);
2486	if (!A \|\| !B) continue;
2487	if (SE.getTypeSizeInBits(Ty: A->getType()) !=
2488	SE.getTypeSizeInBits(Ty: B->getType())) {
2489	if (SE.getTypeSizeInBits(Ty: A->getType()) >
2490	SE.getTypeSizeInBits(Ty: B->getType()))
2491	B = SE.getSignExtendExpr(Op: B, Ty: A->getType());
2492	else
2493	A = SE.getSignExtendExpr(Op: A, Ty: B->getType());
2494	}
2495	if (const SCEVConstant *D =
2496	dyn_cast_or_null<SCEVConstant>(Val: getExactSDiv(LHS: B, RHS: A, SE))) {
2497	const ConstantInt *C = D->getValue();
2498	// Stride of one or negative one can have reuse with non-addresses.
2499	if (C->isOne() \|\| C->isMinusOne())
2500	goto decline_post_inc;
2501	// Avoid weird situations.
2502	if (C->getValue().getSignificantBits() >= `64` \|\|
2503	C->getValue().isMinSignedValue())
2504	goto decline_post_inc;
2505	// Check for possible scaled-address reuse.
2506	if (isAddressUse(TTI, Inst: UI ->getUser(), OperandVal: UI ->getOperandValToReplace())) {
2507	MemAccessTy AccessTy = getAccessType(
2508	TTI, Inst: UI ->getUser(), OperandVal: UI ->getOperandValToReplace());
2509	int64_t Scale = C->getSExtValue();
2510	if (TTI.isLegalAddressingMode(Ty: AccessTy.MemTy, /BaseGV=/nullptr,
2511	/BaseOffset=/`0`,
2512	/HasBaseReg=/true, Scale,
2513	AddrSpace: AccessTy.AddrSpace))
2514	goto decline_post_inc;
2515	Scale = -Scale;
2516	if (TTI.isLegalAddressingMode(Ty: AccessTy.MemTy, /BaseGV=/nullptr,
2517	/BaseOffset=/`0`,
2518	/HasBaseReg=/true, Scale,
2519	AddrSpace: AccessTy.AddrSpace))
2520	goto decline_post_inc;
2521	}
2522	}
2523	}
2524
2525	LLVM_DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "
2526	<< *Cond << `'\n'`);
2527
2528	// It's possible for the setcc instruction to be anywhere in the loop, and
2529	// possible for it to have multiple users. If it is not immediately before
2530	// the exiting block branch, move it.
2531	if (Cond->getNextNonDebugInstruction() != TermBr) {
2532	if (Cond->hasOneUse()) {
2533	Cond->moveBefore(MovePos: TermBr);
2534	} else {
2535	// Clone the terminating condition and insert into the loopend.
2536	ICmpInst *OldCond = Cond;
2537	Cond = cast<ICmpInst>(Val: Cond->clone());
2538	Cond->setName(L->getHeader()->getName() + ".termcond");
2539	Cond->insertInto(ParentBB: ExitingBlock, It: TermBr->getIterator());
2540
2541	// Clone the IVUse, as the old use still exists!
2542	CondUse = &IU.AddUser(User: Cond, Operand: CondUse->getOperandValToReplace());
2543	TermBr->replaceUsesOfWith(From: OldCond, To: Cond);
2544	}
2545	}
2546
2547	// If we get to here, we know that we can transform the setcc instruction to
2548	// use the post-incremented version of the IV, allowing us to coalesce the
2549	// live ranges for the IV correctly.
2550	CondUse->transformToPostInc(L);
2551	Changed = true;
2552
2553	PostIncs.insert(Ptr: Cond);
2554	decline_post_inc:;
2555	}
2556
2557	// Determine an insertion point for the loop induction variable increment. It
2558	// must dominate all the post-inc comparisons we just set up, and it must
2559	// dominate the loop latch edge.
2560	IVIncInsertPos = L->getLoopLatch()->getTerminator();
2561	for (Instruction *Inst : PostIncs)
2562	IVIncInsertPos = DT.findNearestCommonDominator(I1: IVIncInsertPos, I2: Inst);
2563	}
2564
2565	/// Determine if the given use can accommodate a fixup at the given offset and
2566	/// other details. If so, update the use and return true.
2567	bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
2568	bool HasBaseReg, LSRUse::KindType Kind,
2569	MemAccessTy AccessTy) {
2570	int64_t NewMinOffset = LU.MinOffset;
2571	int64_t NewMaxOffset = LU.MaxOffset;
2572	MemAccessTy NewAccessTy = AccessTy;
2573
2574	// Check for a mismatched kind. It's tempting to collapse mismatched kinds to
2575	// something conservative, however this can pessimize in the case that one of
2576	// the uses will have all its uses outside the loop, for example.
2577	if (LU.Kind != Kind)
2578	return false;
2579
2580	// Check for a mismatched access type, and fall back conservatively as needed.
2581	// TODO: Be less conservative when the type is similar and can use the same
2582	// addressing modes.
2583	if (Kind == LSRUse::Address) {
2584	if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2585	NewAccessTy = MemAccessTy::getUnknown(Ctx&: AccessTy.MemTy->getContext(),
2586	AS: AccessTy.AddrSpace);
2587	}
2588	}
2589
2590	// Conservatively assume HasBaseReg is true for now.
2591	if (NewOffset < LU.MinOffset) {
2592	if (!isAlwaysFoldable(TTI, Kind, AccessTy: NewAccessTy, /BaseGV=/nullptr,
2593	BaseOffset: LU.MaxOffset - NewOffset, HasBaseReg))
2594	return false;
2595	NewMinOffset = NewOffset;
2596	} else if (NewOffset > LU.MaxOffset) {
2597	if (!isAlwaysFoldable(TTI, Kind, AccessTy: NewAccessTy, /BaseGV=/nullptr,
2598	BaseOffset: NewOffset - LU.MinOffset, HasBaseReg))
2599	return false;
2600	NewMaxOffset = NewOffset;
2601	}
2602
2603	// Update the use.
2604	LU.MinOffset = NewMinOffset;
2605	LU.MaxOffset = NewMaxOffset;
2606	LU.AccessTy = NewAccessTy;
2607	return true;
2608	}
2609
2610	/// Return an LSRUse index and an offset value for a fixup which needs the given
2611	/// expression, with the given kind and optional access type. Either reuse an
2612	/// existing use or create a new one, as needed.
2613	std::pair<size_t, int64_t> LSRInstance::getUse(const SCEV *&Expr,
2614	LSRUse::KindType Kind,
2615	MemAccessTy AccessTy) {
2616	const SCEV *Copy = Expr;
2617	int64_t Offset = ExtractImmediate(S&: Expr, SE);
2618
2619	// Basic uses can't accept any offset, for example.
2620	if (!isAlwaysFoldable(TTI, Kind, AccessTy, /BaseGV=/ nullptr,
2621	BaseOffset: Offset, /HasBaseReg=/ true)) {
2622	Expr = Copy;
2623	Offset = `0`;
2624	}
2625
2626	std::pair<UseMapTy::iterator, bool> P =
2627	UseMap.insert(KV: std::make_pair(x: LSRUse::SCEVUseKindPair (Expr, Kind), y: `0`));
2628	if (!P.second) {
2629	// A use already existed with this base.
2630	size_t LUIdx = P.first ->second;
2631	LSRUse &LU = Uses [LUIdx];
2632	if (reconcileNewOffset(LU, NewOffset: Offset, /HasBaseReg=/true, Kind, AccessTy))
2633	// Reuse this use.
2634	return std::make_pair(x&: LUIdx, y&: Offset);
2635	}
2636
2637	// Create a new use.
2638	size_t LUIdx = Uses.size();
2639	P.first ->second = LUIdx;
2640	Uses.push_back(Elt: LSRUse (Kind, AccessTy));
2641	LSRUse &LU = Uses [LUIdx];
2642
2643	LU.MinOffset = Offset;
2644	LU.MaxOffset = Offset;
2645	return std::make_pair(x&: LUIdx, y&: Offset);
2646	}
2647
2648	/// Delete the given use from the Uses list.
2649	void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
2650	if (&LU != &Uses.back())
2651	std::swap(a&: LU, b&: Uses.back());
2652	Uses.pop_back();
2653
2654	// Update RegUses.
2655	RegUses.swapAndDropUse(LUIdx, LastLUIdx: Uses.size());
2656	}
2657
2658	/// Look for a use distinct from OrigLU which is has a formula that has the same
2659	/// registers as the given formula.
2660	LSRUse *
2661	LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
2662	const LSRUse &OrigLU) {
2663	// Search all uses for the formula. This could be more clever.
2664	for (LSRUse &LU : Uses) {
2665	// Check whether this use is close enough to OrigLU, to see whether it's
2666	// worthwhile looking through its formulae.
2667	// Ignore ICmpZero uses because they may contain formulae generated by
2668	// GenerateICmpZeroScales, in which case adding fixup offsets may
2669	// be invalid.
2670	if (&LU != &OrigLU &&
2671	LU.Kind != LSRUse::ICmpZero &&
2672	LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2673	LU.WidestFixupType == OrigLU.WidestFixupType &&
2674	LU.HasFormulaWithSameRegs(F: OrigF)) {
2675	// Scan through this use's formulae.
2676	for (const Formula &F : LU.Formulae) {
2677	// Check to see if this formula has the same registers and symbols
2678	// as OrigF.
2679	if (F.BaseRegs == OrigF.BaseRegs &&
2680	F.ScaledReg == OrigF.ScaledReg &&
2681	F.BaseGV == OrigF.BaseGV &&
2682	F.Scale == OrigF.Scale &&
2683	F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2684	if (F.BaseOffset == `0`)
2685	return &LU;
2686	// This is the formula where all the registers and symbols matched;
2687	// there aren't going to be any others. Since we declined it, we
2688	// can skip the rest of the formulae and proceed to the next LSRUse.
2689	break;
2690	}
2691	}
2692	}
2693	}
2694
2695	// Nothing looked good.
2696	return nullptr;
2697	}
2698
2699	void LSRInstance::CollectInterestingTypesAndFactors() {
2700	SmallSetVector<const SCEV *, `4`> Strides;
2701
2702	// Collect interesting types and strides.
2703	SmallVector<const SCEV *, `4`> Worklist;
2704	for (const IVStrideUse &U : IU) {
2705	const SCEV *Expr = IU.getExpr(IU: U);
2706	if (!Expr)
2707	continue;
2708
2709	// Collect interesting types.
2710	Types.insert(X: SE.getEffectiveSCEVType(Ty: Expr->getType()));
2711
2712	// Add strides for mentioned loops.
2713	Worklist.push_back(Elt: Expr);
2714	do {
2715	const SCEV *S = Worklist.pop_back_val();
2716	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S)) {
2717	if (AR->getLoop() == L)
2718	Strides.insert(X: AR->getStepRecurrence(SE));
2719	Worklist.push_back(Elt: AR->getStart());
2720	} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Val: S)) {
2721	append_range(C&: Worklist, R: Add->operands());
2722	}
2723	} while (!Worklist.empty());
2724	}
2725
2726	// Compute interesting factors from the set of interesting strides.
2727	for (SmallSetVector<const SCEV *, `4`>::const_iterator
2728	I = Strides.begin(), E = Strides.end(); I != E; ++I)
2729	for (SmallSetVector<const SCEV *, `4`>::const_iterator NewStrideIter =
2730	std::next(x: I); NewStrideIter != E; ++NewStrideIter) {
2731	const SCEV OldStride = I;
2732	const SCEV NewStride = NewStrideIter;
2733
2734	if (SE.getTypeSizeInBits(Ty: OldStride->getType()) !=
2735	SE.getTypeSizeInBits(Ty: NewStride->getType())) {
2736	if (SE.getTypeSizeInBits(Ty: OldStride->getType()) >
2737	SE.getTypeSizeInBits(Ty: NewStride->getType()))
2738	NewStride = SE.getSignExtendExpr(Op: NewStride, Ty: OldStride->getType());
2739	else
2740	OldStride = SE.getSignExtendExpr(Op: OldStride, Ty: NewStride->getType());
2741	}
2742	if (const SCEVConstant *Factor =
2743	dyn_cast_or_null<SCEVConstant>(Val: getExactSDiv(LHS: NewStride, RHS: OldStride,
2744	SE, IgnoreSignificantBits: true))) {
2745	if (Factor->getAPInt().getSignificantBits() <= `64` && !Factor->isZero())
2746	Factors.insert(X: Factor->getAPInt().getSExtValue());
2747	} else if (const SCEVConstant *Factor =
2748	dyn_cast_or_null<SCEVConstant>(Val: getExactSDiv(LHS: OldStride,
2749	RHS: NewStride,
2750	SE, IgnoreSignificantBits: true))) {
2751	if (Factor->getAPInt().getSignificantBits() <= `64` && !Factor->isZero())
2752	Factors.insert(X: Factor->getAPInt().getSExtValue());
2753	}
2754	}
2755
2756	// If all uses use the same type, don't bother looking for truncation-based
2757	// reuse.
2758	if (Types.size() == `1`)
2759	Types.clear();
2760
2761	LLVM_DEBUG(print_factors_and_types(dbgs()));
2762	}
2763
2764	/// Helper for CollectChains that finds an IV operand (computed by an AddRec in
2765	/// this loop) within [OI,OE) or returns OE. If IVUsers mapped Instructions to
2766	/// IVStrideUses, we could partially skip this.
2767	static User::op_iterator
2768	findIVOperand(User::op_iterator OI, User::op_iterator OE,
2769	Loop *L, ScalarEvolution &SE) {
2770	for(; OI != OE; ++OI) {
2771	if (Instruction Oper = dyn_cast<Instruction>(Val&: OI)) {
2772	if (!SE.isSCEVable(Ty: Oper->getType()))
2773	continue;
2774
2775	if (const SCEVAddRecExpr *AR =
2776	dyn_cast<SCEVAddRecExpr>(Val: SE.getSCEV(V: Oper))) {
2777	if (AR->getLoop() == L)
2778	break;
2779	}
2780	}
2781	}
2782	return OI;
2783	}
2784
2785	/// IVChain logic must consistently peek base TruncInst operands, so wrap it in
2786	/// a convenient helper.
2787	static Value getWideOperand(Value Oper) {
2788	if (TruncInst *Trunc = dyn_cast<TruncInst>(Val: Oper))
2789	return Trunc->getOperand(i_nocapture: `0`);
2790	return Oper;
2791	}
2792
2793	/// Return an approximation of this SCEV expression's "base", or NULL for any
2794	/// constant. Returning the expression itself is conservative. Returning a
2795	/// deeper subexpression is more precise and valid as long as it isn't less
2796	/// complex than another subexpression. For expressions involving multiple
2797	/// unscaled values, we need to return the pointer-type SCEVUnknown. This avoids
2798	/// forming chains across objects, such as: PrevOper==a[i], IVOper==b[i],
2799	/// IVInc==b-a.
2800	///
2801	/// Since SCEVUnknown is the rightmost type, and pointers are the rightmost
2802	/// SCEVUnknown, we simply return the rightmost SCEV operand.
2803	static const SCEV getExprBase(const* SCEV *S) {
2804	switch (S->getSCEVType()) {
2805	default: // including scUnknown.
2806	return S;
2807	case scConstant:
2808	case scVScale:
2809	return nullptr;
2810	case scTruncate:
2811	return getExprBase(S: cast<SCEVTruncateExpr>(Val: S)->getOperand());
2812	case scZeroExtend:
2813	return getExprBase(S: cast<SCEVZeroExtendExpr>(Val: S)->getOperand());
2814	case scSignExtend:
2815	return getExprBase(S: cast<SCEVSignExtendExpr>(Val: S)->getOperand());
2816	case scAddExpr: {
2817	// Skip over scaled operands (scMulExpr) to follow add operands as long as
2818	// there's nothing more complex.
2819	// FIXME: not sure if we want to recognize negation.
2820	const SCEVAddExpr *Add = cast<SCEVAddExpr>(Val: S);
2821	for (const SCEV *SubExpr : reverse(C: Add->operands())) {
2822	if (SubExpr->getSCEVType() == scAddExpr)
2823	return getExprBase(S: SubExpr);
2824
2825	if (SubExpr->getSCEVType() != scMulExpr)
2826	return SubExpr;
2827	}
2828	return S; // all operands are scaled, be conservative.
2829	}
2830	case scAddRecExpr:
2831	return getExprBase(S: cast<SCEVAddRecExpr>(Val: S)->getStart());
2832	}
2833	llvm_unreachable("Unknown SCEV kind!");
2834	}
2835
2836	/// Return true if the chain increment is profitable to expand into a loop
2837	/// invariant value, which may require its own register. A profitable chain
2838	/// increment will be an offset relative to the same base. We allow such offsets
2839	/// to potentially be used as chain increment as long as it's not obviously
2840	/// expensive to expand using real instructions.
2841	bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
2842	const SCEV *IncExpr,
2843	ScalarEvolution &SE) {
2844	// Aggressively form chains when -stress-ivchain.
2845	if (StressIVChain)
2846	return true;
2847
2848	// Do not replace a constant offset from IV head with a nonconstant IV
2849	// increment.
2850	if (!isa<SCEVConstant>(Val: IncExpr)) {
2851	const SCEV *HeadExpr = SE.getSCEV(V: getWideOperand(Oper: Incs [`0`].IVOperand));
2852	if (isa<SCEVConstant>(Val: SE.getMinusSCEV(LHS: OperExpr, RHS: HeadExpr)))
2853	return false;
2854	}
2855
2856	SmallPtrSet<const SCEV*, `8`> Processed;
2857	return !isHighCostExpansion(S: IncExpr, Processed, SE);
2858	}
2859
2860	/// Return true if the number of registers needed for the chain is estimated to
2861	/// be less than the number required for the individual IV users. First prohibit
2862	/// any IV users that keep the IV live across increments (the Users set should
2863	/// be empty). Next count the number and type of increments in the chain.
2864	///
2865	/// Chaining IVs can lead to considerable code bloat if ISEL doesn't
2866	/// effectively use postinc addressing modes. Only consider it profitable it the
2867	/// increments can be computed in fewer registers when chained.
2868	///
2869	/// TODO: Consider IVInc free if it's already used in another chains.
2870	static bool isProfitableChain(IVChain &Chain,
2871	SmallPtrSetImpl<Instruction *> &Users,
2872	ScalarEvolution &SE,
2873	const TargetTransformInfo &TTI) {
2874	if (StressIVChain)
2875	return true;
2876
2877	if (!Chain.hasIncs())
2878	return false;
2879
2880	if (!Users.empty()) {
2881	LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[`0`].UserInst << " users:\n";
2882	for (Instruction *Inst
2883	: Users) { dbgs() << " " << *Inst << "\n"; });
2884	return false;
2885	}
2886	assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
2887
2888	// The chain itself may require a register, so intialize cost to 1.
2889	int cost = `1`;
2890
2891	// A complete chain likely eliminates the need for keeping the original IV in
2892	// a register. LSR does not currently know how to form a complete chain unless
2893	// the header phi already exists.
2894	if (isa<PHINode>(Val: Chain.tailUserInst())
2895	&& SE.getSCEV(V: Chain.tailUserInst()) == Chain.Incs [`0`].IncExpr) {
2896	--cost;
2897	}
2898	const SCEV LastIncExpr = nullptr*;
2899	unsigned NumConstIncrements = `0`;
2900	unsigned NumVarIncrements = `0`;
2901	unsigned NumReusedIncrements = `0`;
2902
2903	if (TTI.isProfitableLSRChainElement(I: Chain.Incs [`0`].UserInst))
2904	return true;
2905
2906	for (const IVInc &Inc : Chain) {
2907	if (TTI.isProfitableLSRChainElement(I: Inc.UserInst))
2908	return true;
2909	if (Inc.IncExpr->isZero())
2910	continue;
2911
2912	// Incrementing by zero or some constant is neutral. We assume constants can
2913	// be folded into an addressing mode or an add's immediate operand.
2914	if (isa<SCEVConstant>(Val: Inc.IncExpr)) {
2915	++NumConstIncrements;
2916	continue;
2917	}
2918
2919	if (Inc.IncExpr == LastIncExpr)
2920	++NumReusedIncrements;
2921	else
2922	++NumVarIncrements;
2923
2924	LastIncExpr = Inc.IncExpr;
2925	}
2926	// An IV chain with a single increment is handled by LSR's postinc
2927	// uses. However, a chain with multiple increments requires keeping the IV's
2928	// value live longer than it needs to be if chained.
2929	if (NumConstIncrements > `1`)
2930	--cost;
2931
2932	// Materializing increment expressions in the preheader that didn't exist in
2933	// the original code may cost a register. For example, sign-extended array
2934	// indices can produce ridiculous increments like this:
2935	// IV + ((sext i32 (2 %s) to i64) + (-1 * (sext i32 %s to i64)))*
2936	cost += NumVarIncrements;
2937
2938	// Reusing variable increments likely saves a register to hold the multiple of
2939	// the stride.
2940	cost -= NumReusedIncrements;
2941
2942	LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[`0`].UserInst << " Cost: " << cost
2943	<< "\n");
2944
2945	return cost < `0`;
2946	}
2947
2948	/// Add this IV user to an existing chain or make it the head of a new chain.
2949	void LSRInstance::ChainInstruction(Instruction UserInst, Instruction IVOper,
2950	SmallVectorImpl<ChainUsers> &ChainUsersVec) {
2951	// When IVs are used as types of varying widths, they are generally converted
2952	// to a wider type with some uses remaining narrow under a (free) trunc.
2953	Value *const NextIV = getWideOperand(Oper: IVOper);
2954	const SCEV *const OperExpr = SE.getSCEV(V: NextIV);
2955	const SCEV *const OperExprBase = getExprBase(S: OperExpr);
2956
2957	// Visit all existing chains. Check if its IVOper can be computed as a
2958	// profitable loop invariant increment from the last link in the Chain.
2959	unsigned ChainIdx = `0`, NChains = IVChainVec.size();
2960	const SCEV LastIncExpr = nullptr*;
2961	for (; ChainIdx < NChains; ++ChainIdx) {
2962	IVChain &Chain = IVChainVec [ChainIdx];
2963
2964	// Prune the solution space aggressively by checking that both IV operands
2965	// are expressions that operate on the same unscaled SCEVUnknown. This
2966	// "base" will be canceled by the subsequent getMinusSCEV call. Checking
2967	// first avoids creating extra SCEV expressions.
2968	if (!StressIVChain && Chain.ExprBase != OperExprBase)
2969	continue;
2970
2971	Value *PrevIV = getWideOperand(Oper: Chain.Incs.back().IVOperand);
2972	if (PrevIV->getType() != NextIV->getType())
2973	continue;
2974
2975	// A phi node terminates a chain.
2976	if (isa<PHINode>(Val: UserInst) && isa<PHINode>(Val: Chain.tailUserInst()))
2977	continue;
2978
2979	// The increment must be loop-invariant so it can be kept in a register.
2980	const SCEV *PrevExpr = SE.getSCEV(V: PrevIV);
2981	const SCEV *IncExpr = SE.getMinusSCEV(LHS: OperExpr, RHS: PrevExpr);
2982	if (isa<SCEVCouldNotCompute>(Val: IncExpr) \|\| !SE.isLoopInvariant(S: IncExpr, L))
2983	continue;
2984
2985	if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
2986	LastIncExpr = IncExpr;
2987	break;
2988	}
2989	}
2990	// If we haven't found a chain, create a new one, unless we hit the max. Don't
2991	// bother for phi nodes, because they must be last in the chain.
2992	if (ChainIdx == NChains) {
2993	if (isa<PHINode>(Val: UserInst))
2994	return;
2995	if (NChains >= MaxChains && !StressIVChain) {
2996	LLVM_DEBUG(dbgs() << "IV Chain Limit\n");
2997	return;
2998	}
2999	LastIncExpr = OperExpr;
3000	// IVUsers may have skipped over sign/zero extensions. We don't currently
3001	// attempt to form chains involving extensions unless they can be hoisted
3002	// into this loop's AddRec.
3003	if (!isa<SCEVAddRecExpr>(Val: LastIncExpr))
3004	return;
3005	++NChains;
3006	IVChainVec.push_back(Elt: IVChain (IVInc (UserInst, IVOper, LastIncExpr),
3007	OperExprBase));
3008	ChainUsersVec.resize(N: NChains);
3009	LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst
3010	<< ") IV=" << *LastIncExpr << "\n");
3011	} else {
3012	LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst
3013	<< ") IV+" << *LastIncExpr << "\n");
3014	// Add this IV user to the end of the chain.
3015	IVChainVec [ChainIdx].add(X: IVInc (UserInst, IVOper, LastIncExpr));
3016	}
3017	IVChain &Chain = IVChainVec [ChainIdx];
3018
3019	SmallPtrSet<Instruction*,`4`> &NearUsers = ChainUsersVec [ChainIdx].NearUsers;
3020	// This chain's NearUsers become FarUsers.
3021	if (!LastIncExpr->isZero()) {
3022	ChainUsersVec [ChainIdx].FarUsers.insert(I: NearUsers.begin(),
3023	E: NearUsers.end());
3024	NearUsers.clear();
3025	}
3026
3027	// All other uses of IVOperand become near uses of the chain.
3028	// We currently ignore intermediate values within SCEV expressions, assuming
3029	// they will eventually be used be the current chain, or can be computed
3030	// from one of the chain increments. To be more precise we could
3031	// transitively follow its user and only add leaf IV users to the set.
3032	for (User *U : IVOper->users()) {
3033	Instruction *OtherUse = dyn_cast<Instruction>(Val: U);
3034	if (!OtherUse)
3035	continue;
3036	// Uses in the chain will no longer be uses if the chain is formed.
3037	// Include the head of the chain in this iteration (not Chain.begin()).
3038	IVChain::const_iterator IncIter = Chain.Incs.begin();
3039	IVChain::const_iterator IncEnd = Chain.Incs.end();
3040	for( ; IncIter != IncEnd; ++IncIter) {
3041	if (IncIter->UserInst == OtherUse)
3042	break;
3043	}
3044	if (IncIter != IncEnd)
3045	continue;
3046
3047	if (SE.isSCEVable(Ty: OtherUse->getType())
3048	&& !isa<SCEVUnknown>(Val: SE.getSCEV(V: OtherUse))
3049	&& IU.isIVUserOrOperand(Inst: OtherUse)) {
3050	continue;
3051	}
3052	NearUsers.insert(Ptr: OtherUse);
3053	}
3054
3055	// Since this user is part of the chain, it's no longer considered a use
3056	// of the chain.
3057	ChainUsersVec [ChainIdx].FarUsers.erase(Ptr: UserInst);
3058	}
3059
3060	/// Populate the vector of Chains.
3061	///
3062	/// This decreases ILP at the architecture level. Targets with ample registers,
3063	/// multiple memory ports, and no register renaming probably don't want
3064	/// this. However, such targets should probably disable LSR altogether.
3065	///
3066	/// The job of LSR is to make a reasonable choice of induction variables across
3067	/// the loop. Subsequent passes can easily "unchain" computation exposing more
3068	/// ILP within the loop* if the target wants it.*
3069	///
3070	/// Finding the best IV chain is potentially a scheduling problem. Since LSR
3071	/// will not reorder memory operations, it will recognize this as a chain, but
3072	/// will generate redundant IV increments. Ideally this would be corrected later
3073	/// by a smart scheduler:
3074	/// = A[i]
3075	/// = A[i+x]
3076	/// A[i] =
3077	/// A[i+x] =
3078	///
3079	/// TODO: Walk the entire domtree within this loop, not just the path to the
3080	/// loop latch. This will discover chains on side paths, but requires
3081	/// maintaining multiple copies of the Chains state.
3082	void LSRInstance::CollectChains() {
3083	LLVM_DEBUG(dbgs() << "Collecting IV Chains.\n");
3084	SmallVector<ChainUsers, `8`> ChainUsersVec;
3085
3086	SmallVector<BasicBlock *,`8`> LatchPath;
3087	BasicBlock *LoopHeader = L->getHeader();
3088	for (DomTreeNode *Rung = DT.getNode(BB: L->getLoopLatch());
3089	Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3090	LatchPath.push_back(Elt: Rung->getBlock());
3091	}
3092	LatchPath.push_back(Elt: LoopHeader);
3093
3094	// Walk the instruction stream from the loop header to the loop latch.
3095	for (BasicBlock *BB : reverse(C&: LatchPath)) {
3096	for (Instruction &I : *BB) {
3097	// Skip instructions that weren't seen by IVUsers analysis.
3098	if (isa<PHINode>(Val: I) \|\| !IU.isIVUserOrOperand(Inst: &I))
3099	continue;
3100
3101	// Ignore users that are part of a SCEV expression. This way we only
3102	// consider leaf IV Users. This effectively rediscovers a portion of
3103	// IVUsers analysis but in program order this time.
3104	if (SE.isSCEVable(Ty: I.getType()) && !isa<SCEVUnknown>(Val: SE.getSCEV(V: &I)))
3105	continue;
3106
3107	// Remove this instruction from any NearUsers set it may be in.
3108	for (unsigned ChainIdx = `0`, NChains = IVChainVec.size();
3109	ChainIdx < NChains; ++ChainIdx) {
3110	ChainUsersVec [ChainIdx].NearUsers.erase(Ptr: &I);
3111	}
3112	// Search for operands that can be chained.
3113	SmallPtrSet<Instruction*, `4`> UniqueOperands;
3114	User::op_iterator IVOpEnd = I.op_end();
3115	User::op_iterator IVOpIter = findIVOperand(OI: I.op_begin(), OE: IVOpEnd, L, SE);
3116	while (IVOpIter != IVOpEnd) {
3117	Instruction IVOpInst = cast<Instruction>(Val&: IVOpIter);
3118	if (UniqueOperands.insert(Ptr: IVOpInst).second)
3119	ChainInstruction(UserInst: &I, IVOper: IVOpInst, ChainUsersVec);
3120	IVOpIter = findIVOperand(OI: std::next(x: IVOpIter), OE: IVOpEnd, L, SE);
3121	}
3122	} // Continue walking down the instructions.
3123	} // Continue walking down the domtree.
3124	// Visit phi backedges to determine if the chain can generate the IV postinc.
3125	for (PHINode &PN : L->getHeader()->phis()) {
3126	if (!SE.isSCEVable(Ty: PN.getType()))
3127	continue;
3128
3129	Instruction *IncV =
3130	dyn_cast<Instruction>(Val: PN.getIncomingValueForBlock(BB: L->getLoopLatch()));
3131	if (IncV)
3132	ChainInstruction(UserInst: &PN, IVOper: IncV, ChainUsersVec);
3133	}
3134	// Remove any unprofitable chains.
3135	unsigned ChainIdx = `0`;
3136	for (unsigned UsersIdx = `0`, NChains = IVChainVec.size();
3137	UsersIdx < NChains; ++UsersIdx) {
3138	if (!isProfitableChain(Chain&: IVChainVec [UsersIdx],
3139	Users&: ChainUsersVec [UsersIdx].FarUsers, SE, TTI))
3140	continue;
3141	// Preserve the chain at UsesIdx.
3142	if (ChainIdx != UsersIdx)
3143	IVChainVec [ChainIdx] = IVChainVec [UsersIdx];
3144	FinalizeChain(Chain&: IVChainVec [ChainIdx]);
3145	++ChainIdx;
3146	}
3147	IVChainVec.resize(N: ChainIdx);
3148	}
3149
3150	void LSRInstance::FinalizeChain(IVChain &Chain) {
3151	assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
3152	LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[`0`].UserInst << "\n");
3153
3154	for (const IVInc &Inc : Chain) {
3155	LLVM_DEBUG(dbgs() << " Inc: " << *Inc.UserInst << "\n");
3156	auto UseI = find(Range: Inc.UserInst->operands(), Val: Inc.IVOperand);
3157	assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand");
3158	IVIncSet.insert(Ptr: UseI);
3159	}
3160	}
3161
3162	/// Return true if the IVInc can be folded into an addressing mode.
3163	static bool canFoldIVIncExpr(const SCEV IncExpr, Instruction UserInst,
3164	Value Operand, const* TargetTransformInfo &TTI) {
3165	const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(Val: IncExpr);
3166	if (!IncConst \|\| !isAddressUse(TTI, Inst: UserInst, OperandVal: Operand))
3167	return false;
3168
3169	if (IncConst->getAPInt().getSignificantBits() > `64`)
3170	return false;
3171
3172	MemAccessTy AccessTy = getAccessType(TTI, Inst: UserInst, OperandVal: Operand);
3173	int64_t IncOffset = IncConst->getValue()->getSExtValue();
3174	if (!isAlwaysFoldable(TTI, Kind: LSRUse::Address, AccessTy, /BaseGV=/nullptr,
3175	BaseOffset: IncOffset, /HasBaseReg=/false))
3176	return false;
3177
3178	return true;
3179	}
3180
3181	/// Generate an add or subtract for each IVInc in a chain to materialize the IV
3182	/// user's operand from the previous IV user's operand.
3183	void LSRInstance::GenerateIVChain(const IVChain &Chain,
3184	SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
3185	// Find the new IVOperand for the head of the chain. It may have been replaced
3186	// by LSR.
3187	const IVInc &Head = Chain.Incs [`0`];
3188	User::op_iterator IVOpEnd = Head.UserInst->op_end();
3189	// findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
3190	User::op_iterator IVOpIter = findIVOperand(OI: Head.UserInst->op_begin(),
3191	OE: IVOpEnd, L, SE);
3192	Value IVSrc = nullptr*;
3193	while (IVOpIter != IVOpEnd) {
3194	IVSrc = getWideOperand(Oper: *IVOpIter);
3195
3196	// If this operand computes the expression that the chain needs, we may use
3197	// it. (Check this after setting IVSrc which is used below.)
3198	//
3199	// Note that if Head.IncExpr is wider than IVSrc, then this phi is too
3200	// narrow for the chain, so we can no longer use it. We do allow using a
3201	// wider phi, assuming the LSR checked for free truncation. In that case we
3202	// should already have a truncate on this operand such that
3203	// getSCEV(IVSrc) == IncExpr.
3204	if (SE.getSCEV(V: *IVOpIter) == Head.IncExpr
3205	\|\| SE.getSCEV(V: IVSrc) == Head.IncExpr) {
3206	break;
3207	}
3208	IVOpIter = findIVOperand(OI: std::next(x: IVOpIter), OE: IVOpEnd, L, SE);
3209	}
3210	if (IVOpIter == IVOpEnd) {
3211	// Gracefully give up on this chain.
3212	LLVM_DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");
3213	return;
3214	}
3215	assert(IVSrc && "Failed to find IV chain source");
3216
3217	LLVM_DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
3218	Type *IVTy = IVSrc->getType();
3219	Type *IntTy = SE.getEffectiveSCEVType(Ty: IVTy);
3220	const SCEV LeftOverExpr = nullptr*;
3221	for (const IVInc &Inc : Chain) {
3222	Instruction *InsertPt = Inc.UserInst;
3223	if (isa<PHINode>(Val: InsertPt))
3224	InsertPt = L->getLoopLatch()->getTerminator();
3225
3226	// IVOper will replace the current IV User's operand. IVSrc is the IV
3227	// value currently held in a register.
3228	Value *IVOper = IVSrc;
3229	if (!Inc.IncExpr->isZero()) {
3230	// IncExpr was the result of subtraction of two narrow values, so must
3231	// be signed.
3232	const SCEV *IncExpr = SE.getNoopOrSignExtend(V: Inc.IncExpr, Ty: IntTy);
3233	LeftOverExpr = LeftOverExpr ?
3234	SE.getAddExpr(LHS: LeftOverExpr, RHS: IncExpr) : IncExpr;
3235	}
3236	if (LeftOverExpr && !LeftOverExpr->isZero()) {
3237	// Expand the IV increment.
3238	Rewriter.clearPostInc();
3239	Value *IncV = Rewriter.expandCodeFor(SH: LeftOverExpr, Ty: IntTy, I: InsertPt);
3240	const SCEV *IVOperExpr = SE.getAddExpr(LHS: SE.getUnknown(V: IVSrc),
3241	RHS: SE.getUnknown(V: IncV));
3242	IVOper = Rewriter.expandCodeFor(SH: IVOperExpr, Ty: IVTy, I: InsertPt);
3243
3244	// If an IV increment can't be folded, use it as the next IV value.
3245	if (!canFoldIVIncExpr(IncExpr: LeftOverExpr, UserInst: Inc.UserInst, Operand: Inc.IVOperand, TTI)) {
3246	assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
3247	IVSrc = IVOper;
3248	LeftOverExpr = nullptr;
3249	}
3250	}
3251	Type *OperTy = Inc.IVOperand->getType();
3252	if (IVTy != OperTy) {
3253	assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&
3254	"cannot extend a chained IV");
3255	IRBuilder<> Builder(InsertPt);
3256	IVOper = Builder.CreateTruncOrBitCast(V: IVOper, DestTy: OperTy, Name: "lsr.chain");
3257	}
3258	Inc.UserInst->replaceUsesOfWith(From: Inc.IVOperand, To: IVOper);
3259	if (auto *OperandIsInstr = dyn_cast<Instruction>(Val: Inc.IVOperand))
3260	DeadInsts.emplace_back(Args&: OperandIsInstr);
3261	}
3262	// If LSR created a new, wider phi, we may also replace its postinc. We only
3263	// do this if we also found a wide value for the head of the chain.
3264	if (isa<PHINode>(Val: Chain.tailUserInst())) {
3265	for (PHINode &Phi : L->getHeader()->phis()) {
3266	if (Phi.getType() != IVSrc->getType())
3267	continue;
3268	Instruction *PostIncV = dyn_cast<Instruction>(
3269	Val: Phi.getIncomingValueForBlock(BB: L->getLoopLatch()));
3270	if (!PostIncV \|\| (SE.getSCEV(V: PostIncV) != SE.getSCEV(V: IVSrc)))
3271	continue;
3272	Value *IVOper = IVSrc;
3273	Type *PostIncTy = PostIncV->getType();
3274	if (IVTy != PostIncTy) {
3275	assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types");
3276	IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
3277	Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
3278	IVOper = Builder.CreatePointerCast(V: IVSrc, DestTy: PostIncTy, Name: "lsr.chain");
3279	}
3280	Phi.replaceUsesOfWith(From: PostIncV, To: IVOper);
3281	DeadInsts.emplace_back(Args&: PostIncV);
3282	}
3283	}
3284	}
3285
3286	void LSRInstance::CollectFixupsAndInitialFormulae() {
3287	BranchInst ExitBranch = nullptr*;
3288	bool SaveCmp = TTI.canSaveCmp(L, BI: &ExitBranch, SE: &SE, LI: &LI, DT: &DT, AC: &AC, LibInfo: &TLI);
3289
3290	// For calculating baseline cost
3291	SmallPtrSet<const SCEV *, `16`> Regs;
3292	DenseSet<const SCEV *> VisitedRegs;
3293	DenseSet<size_t> VisitedLSRUse;
3294
3295	for (const IVStrideUse &U : IU) {
3296	Instruction *UserInst = U.getUser();
3297	// Skip IV users that are part of profitable IV Chains.
3298	User::op_iterator UseI =
3299	find(Range: UserInst->operands(), Val: U.getOperandValToReplace());
3300	assert(UseI != UserInst->op_end() && "cannot find IV operand");
3301	if (IVIncSet.count(Ptr: UseI)) {
3302	LLVM_DEBUG(dbgs() << "Use is in profitable chain: " << **UseI << `'\n'`);
3303	continue;
3304	}
3305
3306	LSRUse::KindType Kind = LSRUse::Basic;
3307	MemAccessTy AccessTy;
3308	if (isAddressUse(TTI, Inst: UserInst, OperandVal: U.getOperandValToReplace())) {
3309	Kind = LSRUse::Address;
3310	AccessTy = getAccessType(TTI, Inst: UserInst, OperandVal: U.getOperandValToReplace());
3311	}
3312
3313	const SCEV *S = IU.getExpr(IU: U);
3314	if (!S)
3315	continue;
3316	PostIncLoopSet TmpPostIncLoops = U.getPostIncLoops();
3317
3318	// Equality (== and !=) ICmps are special. We can rewrite (i == N) as
3319	// (N - i == 0), and this allows (N - i) to be the expression that we work
3320	// with rather than just N or i, so we can consider the register
3321	// requirements for both N and i at the same time. Limiting this code to
3322	// equality icmps is not a problem because all interesting loops use
3323	// equality icmps, thanks to IndVarSimplify.
3324	if (ICmpInst *CI = dyn_cast<ICmpInst>(Val: UserInst)) {
3325	// If CI can be saved in some target, like replaced inside hardware loop
3326	// in PowerPC, no need to generate initial formulae for it.
3327	if (SaveCmp && CI == dyn_cast<ICmpInst>(Val: ExitBranch->getCondition()))
3328	continue;
3329	if (CI->isEquality()) {
3330	// Swap the operands if needed to put the OperandValToReplace on the
3331	// left, for consistency.
3332	Value *NV = CI->getOperand(i_nocapture: `1`);
3333	if (NV == U.getOperandValToReplace()) {
3334	CI->setOperand(i_nocapture: `1`, Val_nocapture: CI->getOperand(i_nocapture: `0`));
3335	CI->setOperand(i_nocapture: `0`, Val_nocapture: NV);
3336	NV = CI->getOperand(i_nocapture: `1`);
3337	Changed = true;
3338	}
3339
3340	// x == y --> x - y == 0
3341	const SCEV *N = SE.getSCEV(V: NV);
3342	if (SE.isLoopInvariant(S: N, L) && Rewriter.isSafeToExpand(S: N) &&
3343	(!NV->getType()->isPointerTy() \|\|
3344	SE.getPointerBase(V: N) == SE.getPointerBase(V: S))) {
3345	// S is normalized, so normalize N before folding it into S
3346	// to keep the result normalized.
3347	N = normalizeForPostIncUse(S: N, Loops: TmpPostIncLoops, SE);
3348	if (!N)
3349	continue;
3350	Kind = LSRUse::ICmpZero;
3351	S = SE.getMinusSCEV(LHS: N, RHS: S);
3352	} else if (L->isLoopInvariant(V: NV) &&
3353	(!isa<Instruction>(Val: NV) \|\|
3354	DT.dominates(Def: cast<Instruction>(Val: NV), BB: L->getHeader())) &&
3355	!NV->getType()->isPointerTy()) {
3356	// If we can't generally expand the expression (e.g. it contains
3357	// a divide), but it is already at a loop invariant point before the
3358	// loop, wrap it in an unknown (to prevent the expander from trying
3359	// to re-expand in a potentially unsafe way.) The restriction to
3360	// integer types is required because the unknown hides the base, and
3361	// SCEV can't compute the difference of two unknown pointers.
3362	N = SE.getUnknown(V: NV);
3363	N = normalizeForPostIncUse(S: N, Loops: TmpPostIncLoops, SE);
3364	if (!N)
3365	continue;
3366	Kind = LSRUse::ICmpZero;
3367	S = SE.getMinusSCEV(LHS: N, RHS: S);
3368	assert(!isa<SCEVCouldNotCompute>(S));
3369	}
3370
3371	// -1 and the negations of all interesting strides (except the negation
3372	// of -1) are now also interesting.
3373	for (size_t i = `0`, e = Factors.size(); i != e; ++i)
3374	if (Factors [i] != -`1`)
3375	Factors.insert(X: -(uint64_t)Factors [i]);
3376	Factors.insert(X: -`1`);
3377	}
3378	}
3379
3380	// Get or create an LSRUse.
3381	std::pair<size_t, int64_t> P = getUse(Expr&: S, Kind, AccessTy);
3382	size_t LUIdx = P.first;
3383	int64_t Offset = P.second;
3384	LSRUse &LU = Uses [LUIdx];
3385
3386	// Record the fixup.
3387	LSRFixup &LF = LU.getNewFixup();
3388	LF.UserInst = UserInst;
3389	LF.OperandValToReplace = U.getOperandValToReplace();
3390	LF.PostIncLoops = TmpPostIncLoops;
3391	LF.Offset = Offset;
3392	LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3393
3394	// Create SCEV as Formula for calculating baseline cost
3395	if (!VisitedLSRUse.count(V: LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3396	Formula F;
3397	F.initialMatch(S, L, SE);
3398	BaselineCost.RateFormula(F, Regs, VisitedRegs, LU);
3399	VisitedLSRUse.insert(V: LUIdx);
3400	}
3401
3402	if (!LU.WidestFixupType \|\|
3403	SE.getTypeSizeInBits(Ty: LU.WidestFixupType) <
3404	SE.getTypeSizeInBits(Ty: LF.OperandValToReplace->getType()))
3405	LU.WidestFixupType = LF.OperandValToReplace->getType();
3406
3407	// If this is the first use of this LSRUse, give it a formula.
3408	if (LU.Formulae.empty()) {
3409	InsertInitialFormula(S, LU, LUIdx);
3410	CountRegisters(F: LU.Formulae.back(), LUIdx);
3411	}
3412	}
3413
3414	LLVM_DEBUG(print_fixups(dbgs()));
3415	}
3416
3417	/// Insert a formula for the given expression into the given use, separating out
3418	/// loop-variant portions from loop-invariant and loop-computable portions.
3419	void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU,
3420	size_t LUIdx) {
3421	// Mark uses whose expressions cannot be expanded.
3422	if (!Rewriter.isSafeToExpand(S))
3423	LU.RigidFormula = true;
3424
3425	Formula F;
3426	F.initialMatch(S, L, SE);
3427	bool Inserted = InsertFormula(LU, LUIdx, F);
3428	assert(Inserted && "Initial formula already exists!"); (void)Inserted;
3429	}
3430
3431	/// Insert a simple single-register formula for the given expression into the
3432	/// given use.
3433	void
3434	LSRInstance::InsertSupplementalFormula(const SCEV *S,
3435	LSRUse &LU, size_t LUIdx) {
3436	Formula F;
3437	F.BaseRegs.push_back(Elt: S);
3438	F.HasBaseReg = true;
3439	bool Inserted = InsertFormula(LU, LUIdx, F);
3440	assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
3441	}
3442
3443	/// Note which registers are used by the given formula, updating RegUses.
3444	void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
3445	if (F.ScaledReg)
3446	RegUses.countRegister(Reg: F.ScaledReg, LUIdx);
3447	for (const SCEV *BaseReg : F.BaseRegs)
3448	RegUses.countRegister(Reg: BaseReg, LUIdx);
3449	}
3450
3451	/// If the given formula has not yet been inserted, add it to the list, and
3452	/// return true. Return false otherwise.
3453	bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
3454	// Do not insert formula that we will not be able to expand.
3455	assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
3456	"Formula is illegal");
3457
3458	if (!LU.InsertFormula(F, L: *L))
3459	return false;
3460
3461	CountRegisters(F, LUIdx);
3462	return true;
3463	}
3464
3465	/// Check for other uses of loop-invariant values which we're tracking. These
3466	/// other uses will pin these values in registers, making them less profitable
3467	/// for elimination.
3468	/// TODO: This currently misses non-constant addrec step registers.
3469	/// TODO: Should this give more weight to users inside the loop?
3470	void
3471	LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3472	SmallVector<const SCEV *, `8`> Worklist(RegUses.begin(), RegUses.end());
3473	SmallPtrSet<const SCEV *, `32`> Visited;
3474
3475	// Don't collect outside uses if we are favoring postinc - the instructions in
3476	// the loop are more important than the ones outside of it.
3477	if (AMK == TTI::AMK_PostIndexed)
3478	return;
3479
3480	while (!Worklist.empty()) {
3481	const SCEV *S = Worklist.pop_back_val();
3482
3483	// Don't process the same SCEV twice
3484	if (!Visited.insert(Ptr: S).second)
3485	continue;
3486
3487	if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(Val: S))
3488	append_range(C&: Worklist, R: N->operands());
3489	else if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(Val: S))
3490	Worklist.push_back(Elt: C->getOperand());
3491	else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(Val: S)) {
3492	Worklist.push_back(Elt: D->getLHS());
3493	Worklist.push_back(Elt: D->getRHS());
3494	} else if (const SCEVUnknown *US = dyn_cast<SCEVUnknown>(Val: S)) {
3495	const Value *V = US->getValue();
3496	if (const Instruction *Inst = dyn_cast<Instruction>(Val: V)) {
3497	// Look for instructions defined outside the loop.
3498	if (L->contains(Inst)) continue;
3499	} else if (isa<Constant>(Val: V))
3500	// Constants can be re-materialized.
3501	continue;
3502	for (const Use &U : V->uses()) {
3503	const Instruction *UserInst = dyn_cast<Instruction>(Val: U.getUser());
3504	// Ignore non-instructions.
3505	if (!UserInst)
3506	continue;
3507	// Don't bother if the instruction is an EHPad.
3508	if (UserInst->isEHPad())
3509	continue;
3510	// Ignore instructions in other functions (as can happen with
3511	// Constants).
3512	if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
3513	continue;
3514	// Ignore instructions not dominated by the loop.
3515	const BasicBlock *UseBB = !isa<PHINode>(Val: UserInst) ?
3516	UserInst->getParent() :
3517	cast<PHINode>(Val: UserInst)->getIncomingBlock(
3518	i: PHINode::getIncomingValueNumForOperand(i: U.getOperandNo()));
3519	if (!DT.dominates(A: L->getHeader(), B: UseBB))
3520	continue;
3521	// Don't bother if the instruction is in a BB which ends in an EHPad.
3522	if (UseBB->getTerminator()->isEHPad())
3523	continue;
3524
3525	// Ignore cases in which the currently-examined value could come from
3526	// a basic block terminated with an EHPad. This checks all incoming
3527	// blocks of the phi node since it is possible that the same incoming
3528	// value comes from multiple basic blocks, only some of which may end
3529	// in an EHPad. If any of them do, a subsequent rewrite attempt by this
3530	// pass would try to insert instructions into an EHPad, hitting an
3531	// assertion.
3532	if (isa<PHINode>(Val: UserInst)) {
3533	const auto *PhiNode = cast<PHINode>(Val: UserInst);
3534	bool HasIncompatibleEHPTerminatedBlock = false;
3535	llvm::Value *ExpectedValue = U;
3536	for (unsigned int I = `0`; I < PhiNode->getNumIncomingValues(); I++) {
3537	if (PhiNode->getIncomingValue(i: I) == ExpectedValue) {
3538	if (PhiNode->getIncomingBlock(i: I)->getTerminator()->isEHPad()) {
3539	HasIncompatibleEHPTerminatedBlock = true;
3540	break;
3541	}
3542	}
3543	}
3544	if (HasIncompatibleEHPTerminatedBlock) {
3545	continue;
3546	}
3547	}
3548
3549	// Don't bother rewriting PHIs in catchswitch blocks.
3550	if (isa<CatchSwitchInst>(Val: UserInst->getParent()->getTerminator()))
3551	continue;
3552	// Ignore uses which are part of other SCEV expressions, to avoid
3553	// analyzing them multiple times.
3554	if (SE.isSCEVable(Ty: UserInst->getType())) {
3555	const SCEV UserS = SE.getSCEV(V: const_cast<Instruction >(UserInst));
3556	// If the user is a no-op, look through to its uses.
3557	if (!isa<SCEVUnknown>(Val: UserS))
3558	continue;
3559	if (UserS == US) {
3560	Worklist.push_back(
3561	Elt: SE.getUnknown(V: const_cast<Instruction *>(UserInst)));
3562	continue;
3563	}
3564	}
3565	// Ignore icmp instructions which are already being analyzed.
3566	if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Val: UserInst)) {
3567	unsigned OtherIdx = !U.getOperandNo();
3568	Value OtherOp = const_cast<Value >(ICI->getOperand(i_nocapture: OtherIdx));
3569	if (SE.hasComputableLoopEvolution(S: SE.getSCEV(V: OtherOp), L))
3570	continue;
3571	}
3572
3573	std::pair<size_t, int64_t> P = getUse(
3574	Expr&: S, Kind: LSRUse::Basic, AccessTy: MemAccessTy ());
3575	size_t LUIdx = P.first;
3576	int64_t Offset = P.second;
3577	LSRUse &LU = Uses [LUIdx];
3578	LSRFixup &LF = LU.getNewFixup();
3579	LF.UserInst = const_cast<Instruction *>(UserInst);
3580	LF.OperandValToReplace = U;
3581	LF.Offset = Offset;
3582	LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3583	if (!LU.WidestFixupType \|\|
3584	SE.getTypeSizeInBits(Ty: LU.WidestFixupType) <
3585	SE.getTypeSizeInBits(Ty: LF.OperandValToReplace->getType()))
3586	LU.WidestFixupType = LF.OperandValToReplace->getType();
3587	InsertSupplementalFormula(S: US, LU, LUIdx);
3588	CountRegisters(F: LU.Formulae.back(), LUIdx: Uses.size() - `1`);
3589	break;
3590	}
3591	}
3592	}
3593	}
3594
3595	/// Split S into subexpressions which can be pulled out into separate
3596	/// registers. If C is non-null, multiply each subexpression by C.
3597	///
3598	/// Return remainder expression after factoring the subexpressions captured by
3599	/// Ops. If Ops is complete, return NULL.
3600	static const SCEV CollectSubexprs(const* SCEV S, const* SCEVConstant *C,
3601	SmallVectorImpl<const SCEV *> &Ops,
3602	const Loop *L,
3603	ScalarEvolution &SE,
3604	unsigned Depth = `0`) {
3605	// Arbitrarily cap recursion to protect compile time.
3606	if (Depth >= `3`)
3607	return S;
3608
3609	if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Val: S)) {
3610	// Break out add operands.
3611	for (const SCEV *S : Add->operands()) {
3612	const SCEV *Remainder = CollectSubexprs(S, C, Ops, L, SE, Depth: Depth+`1`);
3613	if (Remainder)
3614	Ops.push_back(Elt: C ? SE.getMulExpr(LHS: C, RHS: Remainder) : Remainder);
3615	}
3616	return nullptr;
3617	} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S)) {
3618	// Split a non-zero base out of an addrec.
3619	if (AR->getStart()->isZero() \|\| !AR->isAffine())
3620	return S;
3621
3622	const SCEV *Remainder = CollectSubexprs(S: AR->getStart(),
3623	C, Ops, L, SE, Depth: Depth+`1`);
3624	// Split the non-zero AddRec unless it is part of a nested recurrence that
3625	// does not pertain to this loop.
3626	if (Remainder && (AR->getLoop() == L \|\| !isa<SCEVAddRecExpr>(Val: Remainder))) {
3627	Ops.push_back(Elt: C ? SE.getMulExpr(LHS: C, RHS: Remainder) : Remainder);
3628	Remainder = nullptr;
3629	}
3630	if (Remainder != AR->getStart()) {
3631	if (!Remainder)
3632	Remainder = SE.getConstant(Ty: AR->getType(), V: `0`);
3633	return SE.getAddRecExpr(Start: Remainder,
3634	Step: AR->getStepRecurrence(SE),
3635	L: AR->getLoop(),
3636	//FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
3637	Flags: SCEV::FlagAnyWrap);
3638	}
3639	} else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Val: S)) {
3640	// Break (C (a + b + c)) into Ca + Cb + Cc.
3641	if (Mul->getNumOperands() != `2`)
3642	return S;
3643	if (const SCEVConstant *Op0 =
3644	dyn_cast<SCEVConstant>(Val: Mul->getOperand(i: `0`))) {
3645	C = C ? cast<SCEVConstant>(Val: SE.getMulExpr(LHS: C, RHS: Op0)) : Op0;
3646	const SCEV *Remainder =
3647	CollectSubexprs(S: Mul->getOperand(i: `1`), C, Ops, L, SE, Depth: Depth+`1`);
3648	if (Remainder)
3649	Ops.push_back(Elt: SE.getMulExpr(LHS: C, RHS: Remainder));
3650	return nullptr;
3651	}
3652	}
3653	return S;
3654	}
3655
3656	/// Return true if the SCEV represents a value that may end up as a
3657	/// post-increment operation.
3658	static bool mayUsePostIncMode(const TargetTransformInfo &TTI,
3659	LSRUse &LU, const SCEV S, const* Loop *L,
3660	ScalarEvolution &SE) {
3661	if (LU.Kind != LSRUse::Address \|\|
3662	!LU.AccessTy.getType()->isIntOrIntVectorTy())
3663	return false;
3664	const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S);
3665	if (!AR)
3666	return false;
3667	const SCEV *LoopStep = AR->getStepRecurrence(SE);
3668	if (!isa<SCEVConstant>(Val: LoopStep))
3669	return false;
3670	// Check if a post-indexed load/store can be used.
3671	if (TTI.isIndexedLoadLegal(Mode: TTI.MIM_PostInc, Ty: AR->getType()) \|\|
3672	TTI.isIndexedStoreLegal(Mode: TTI.MIM_PostInc, Ty: AR->getType())) {
3673	const SCEV *LoopStart = AR->getStart();
3674	if (!isa<SCEVConstant>(Val: LoopStart) && SE.isLoopInvariant(S: LoopStart, L))
3675	return true;
3676	}
3677	return false;
3678	}
3679
3680	/// Helper function for LSRInstance::GenerateReassociations.
3681	void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
3682	const Formula &Base,
3683	unsigned Depth, size_t Idx,
3684	bool IsScaledReg) {
3685	const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs [Idx];
3686	// Don't generate reassociations for the base register of a value that
3687	// may generate a post-increment operator. The reason is that the
3688	// reassociations cause extra base+register formula to be created,
3689	// and possibly chosen, but the post-increment is more efficient.
3690	if (AMK == TTI::AMK_PostIndexed && mayUsePostIncMode(TTI, LU, S: BaseReg, L, SE))
3691	return;
3692	SmallVector<const SCEV *, `8`> AddOps;
3693	const SCEV Remainder = CollectSubexprs(S: BaseReg, C: nullptr*, Ops&: AddOps, L, SE);
3694	if (Remainder)
3695	AddOps.push_back(Elt: Remainder);
3696
3697	if (AddOps.size() == `1`)
3698	return;
3699
3700	for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
3701	JE = AddOps.end();
3702	J != JE; ++J) {
3703	// Loop-variant "unknown" values are uninteresting; we won't be able to
3704	// do anything meaningful with them.
3705	if (isa<SCEVUnknown>(Val: J) && !SE.isLoopInvariant(S: J, L))
3706	continue;
3707
3708	// Don't pull a constant into a register if the constant could be folded
3709	// into an immediate field.
3710	if (isAlwaysFoldable(TTI, SE, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind,
3711	AccessTy: LU.AccessTy, S: *J, HasBaseReg: Base.getNumRegs() > `1`))
3712	continue;
3713
3714	// Collect all operands except J.*
3715	SmallVector<const SCEV *, `8`> InnerAddOps(
3716	((const SmallVector<const SCEV *, `8`> &)AddOps).begin(), J);
3717	InnerAddOps.append(in_start: std::next(x: J),
3718	in_end: ((const SmallVector<const SCEV *, `8`> &)AddOps).end());
3719
3720	// Don't leave just a constant behind in a register if the constant could
3721	// be folded into an immediate field.
3722	if (InnerAddOps.size() == `1` &&
3723	isAlwaysFoldable(TTI, SE, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind,
3724	AccessTy: LU.AccessTy, S: InnerAddOps [`0`], HasBaseReg: Base.getNumRegs() > `1`))
3725	continue;
3726
3727	const SCEV *InnerSum = SE.getAddExpr(Ops&: InnerAddOps);
3728	if (InnerSum->isZero())
3729	continue;
3730	Formula F = Base;
3731
3732	// Add the remaining pieces of the add back into the new formula.
3733	const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(Val: InnerSum);
3734	if (InnerSumSC && SE.getTypeSizeInBits(Ty: InnerSumSC->getType()) <= `64` &&
3735	TTI.isLegalAddImmediate(Imm: (uint64_t)F.UnfoldedOffset +
3736	InnerSumSC->getValue()->getZExtValue())) {
3737	F.UnfoldedOffset =
3738	(uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue();
3739	if (IsScaledReg)
3740	F.ScaledReg = nullptr;
3741	else
3742	F.BaseRegs.erase(CI: F.BaseRegs.begin() + Idx);
3743	} else if (IsScaledReg)
3744	F.ScaledReg = InnerSum;
3745	else
3746	F.BaseRegs [Idx] = InnerSum;
3747
3748	// Add J as its own register, or an unfolded immediate.
3749	const SCEVConstant SC = dyn_cast<SCEVConstant>(Val: J);
3750	if (SC && SE.getTypeSizeInBits(Ty: SC->getType()) <= `64` &&
3751	TTI.isLegalAddImmediate(Imm: (uint64_t)F.UnfoldedOffset +
3752	SC->getValue()->getZExtValue()))
3753	F.UnfoldedOffset =
3754	(uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue();
3755	else
3756	F.BaseRegs.push_back(Elt: *J);
3757	// We may have changed the number of register in base regs, adjust the
3758	// formula accordingly.
3759	F.canonicalize(L: *L);
3760
3761	if (InsertFormula(LU, LUIdx, F))
3762	// If that formula hadn't been seen before, recurse to find more like
3763	// it.
3764	// Add check on Log16(AddOps.size()) - same as Log2_32(AddOps.size()) >> 2)
3765	// Because just Depth is not enough to bound compile time.
3766	// This means that every time AddOps.size() is greater 16^x we will add
3767	// x to Depth.
3768	GenerateReassociations(LU, LUIdx, Base: LU.Formulae.back(),
3769	Depth: Depth + `1` + (Log2_32(Value: AddOps.size()) >> `2`));
3770	}
3771	}
3772
3773	/// Split out subexpressions from adds and the bases of addrecs.
3774	void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
3775	Formula Base, unsigned Depth) {
3776	assert(Base.isCanonical(*L) && "Input must be in the canonical form");
3777	// Arbitrarily cap recursion to protect compile time.
3778	if (Depth >= `3`)
3779	return;
3780
3781	for (size_t i = `0`, e = Base.BaseRegs.size(); i != e; ++i)
3782	GenerateReassociationsImpl(LU, LUIdx, Base, Depth, Idx: i);
3783
3784	if (Base.Scale == `1`)
3785	GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
3786	/ Idx / -`1`, / IsScaledReg / true);
3787	}
3788
3789	/// Generate a formula consisting of all of the loop-dominating registers added
3790	/// into a single register.
3791	void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
3792	Formula Base) {
3793	// This method is only interesting on a plurality of registers.
3794	if (Base.BaseRegs.size() + (Base.Scale == `1`) +
3795	(Base.UnfoldedOffset != `0`) <= `1`)
3796	return;
3797
3798	// Flatten the representation, i.e., reg1 + 1reg2 => reg1 + reg2, before*
3799	// processing the formula.
3800	Base.unscale();
3801	SmallVector<const SCEV *, `4`> Ops;
3802	Formula NewBase = Base;
3803	NewBase.BaseRegs.clear();
3804	Type CombinedIntegerType = nullptr*;
3805	for (const SCEV *BaseReg : Base.BaseRegs) {
3806	if (SE.properlyDominates(S: BaseReg, BB: L->getHeader()) &&
3807	!SE.hasComputableLoopEvolution(S: BaseReg, L)) {
3808	if (!CombinedIntegerType)
3809	CombinedIntegerType = SE.getEffectiveSCEVType(Ty: BaseReg->getType());
3810	Ops.push_back(Elt: BaseReg);
3811	}
3812	else
3813	NewBase.BaseRegs.push_back(Elt: BaseReg);
3814	}
3815
3816	// If no register is relevant, we're done.
3817	if (Ops.size() == `0`)
3818	return;
3819
3820	// Utility function for generating the required variants of the combined
3821	// registers.
3822	auto GenerateFormula = [&](const SCEV *Sum) {
3823	Formula F = NewBase;
3824
3825	// TODO: If Sum is zero, it probably means ScalarEvolution missed an
3826	// opportunity to fold something. For now, just ignore such cases
3827	// rather than proceed with zero in a register.
3828	if (Sum->isZero())
3829	return;
3830
3831	F.BaseRegs.push_back(Elt: Sum);
3832	F.canonicalize(L: *L);
3833	(void)InsertFormula(LU, LUIdx, F);
3834	};
3835
3836	// If we collected at least two registers, generate a formula combining them.
3837	if (Ops.size() > `1`) {
3838	SmallVector<const SCEV , `4`> OpsCopy(Ops); // Don't let SE modify Ops.*
3839	GenerateFormula (SE.getAddExpr(Ops&: OpsCopy));
3840	}
3841
3842	// If we have an unfolded offset, generate a formula combining it with the
3843	// registers collected.
3844	if (NewBase.UnfoldedOffset) {
3845	assert(CombinedIntegerType && "Missing a type for the unfolded offset");
3846	Ops.push_back(Elt: SE.getConstant(Ty: CombinedIntegerType, V: NewBase.UnfoldedOffset,
3847	isSigned: true));
3848	NewBase.UnfoldedOffset = `0`;
3849	GenerateFormula (SE.getAddExpr(Ops));
3850	}
3851	}
3852
3853	/// Helper function for LSRInstance::GenerateSymbolicOffsets.
3854	void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
3855	const Formula &Base, size_t Idx,
3856	bool IsScaledReg) {
3857	const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs [Idx];
3858	GlobalValue *GV = ExtractSymbol(S&: G, SE);
3859	if (G->isZero() \|\| !GV)
3860	return;
3861	Formula F = Base;
3862	F.BaseGV = GV;
3863	if (!isLegalUse(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind, AccessTy: LU.AccessTy, F))
3864	return;
3865	if (IsScaledReg)
3866	F.ScaledReg = G;
3867	else
3868	F.BaseRegs [Idx] = G;
3869	(void)InsertFormula(LU, LUIdx, F);
3870	}
3871
3872	/// Generate reuse formulae using symbolic offsets.
3873	void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
3874	Formula Base) {
3875	// We can't add a symbolic offset if the address already contains one.
3876	if (Base.BaseGV) return;
3877
3878	for (size_t i = `0`, e = Base.BaseRegs.size(); i != e; ++i)
3879	GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, Idx: i);
3880	if (Base.Scale == `1`)
3881	GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, / Idx / -`1`,
3882	/ IsScaledReg / true);
3883	}
3884
3885	/// Helper function for LSRInstance::GenerateConstantOffsets.
3886	void LSRInstance::GenerateConstantOffsetsImpl(
3887	LSRUse &LU, unsigned LUIdx, const Formula &Base,
3888	const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) {
3889
3890	auto GenerateOffset = [&](const SCEV *G, int64_t Offset) {
3891	Formula F = Base;
3892	F.BaseOffset = (uint64_t)Base.BaseOffset - Offset;
3893
3894	if (isLegalUse(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind, AccessTy: LU.AccessTy, F)) {
3895	// Add the offset to the base register.
3896	const SCEV *NewG = SE.getAddExpr(LHS: SE.getConstant(Ty: G->getType(), V: Offset), RHS: G);
3897	// If it cancelled out, drop the base register, otherwise update it.
3898	if (NewG->isZero()) {
3899	if (IsScaledReg) {
3900	F.Scale = `0`;
3901	F.ScaledReg = nullptr;
3902	} else
3903	F.deleteBaseReg(S&: F.BaseRegs [Idx]);
3904	F.canonicalize(L: *L);
3905	} else if (IsScaledReg)
3906	F.ScaledReg = NewG;
3907	else
3908	F.BaseRegs [Idx] = NewG;
3909
3910	(void)InsertFormula(LU, LUIdx, F);
3911	}
3912	};
3913
3914	const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs [Idx];
3915
3916	// With constant offsets and constant steps, we can generate pre-inc
3917	// accesses by having the offset equal the step. So, for access #0 with a
3918	// step of 8, we generate a G - 8 base which would require the first access
3919	// to be ((G - 8) + 8),+,8. The pre-indexed access then updates the pointer
3920	// for itself and hopefully becomes the base for other accesses. This means
3921	// means that a single pre-indexed access can be generated to become the new
3922	// base pointer for each iteration of the loop, resulting in no extra add/sub
3923	// instructions for pointer updating.
3924	if (AMK == TTI::AMK_PreIndexed && LU.Kind == LSRUse::Address) {
3925	if (auto *GAR = dyn_cast<SCEVAddRecExpr>(Val: G)) {
3926	if (auto *StepRec =
3927	dyn_cast<SCEVConstant>(Val: GAR->getStepRecurrence(SE))) {
3928	const APInt &StepInt = StepRec->getAPInt();
3929	int64_t Step = StepInt.isNegative() ?
3930	StepInt.getSExtValue() : StepInt.getZExtValue();
3931
3932	for (int64_t Offset : Worklist) {
3933	Offset -= Step;
3934	GenerateOffset (G, Offset);
3935	}
3936	}
3937	}
3938	}
3939	for (int64_t Offset : Worklist)
3940	GenerateOffset (G, Offset);
3941
3942	int64_t Imm = ExtractImmediate(S&: G, SE);
3943	if (G->isZero() \|\| Imm == `0`)
3944	return;
3945	Formula F = Base;
3946	F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
3947	if (!isLegalUse(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind, AccessTy: LU.AccessTy, F))
3948	return;
3949	if (IsScaledReg) {
3950	F.ScaledReg = G;
3951	} else {
3952	F.BaseRegs [Idx] = G;
3953	// We may generate non canonical Formula if G is a recurrent expr reg
3954	// related with current loop while F.ScaledReg is not.
3955	F.canonicalize(L: *L);
3956	}
3957	(void)InsertFormula(LU, LUIdx, F);
3958	}
3959
3960	/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
3961	void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
3962	Formula Base) {
3963	// TODO: For now, just add the min and max offset, because it usually isn't
3964	// worthwhile looking at everything inbetween.
3965	SmallVector<int64_t, `2`> Worklist;
3966	Worklist.push_back(Elt: LU.MinOffset);
3967	if (LU.MaxOffset != LU.MinOffset)
3968	Worklist.push_back(Elt: LU.MaxOffset);
3969
3970	for (size_t i = `0`, e = Base.BaseRegs.size(); i != e; ++i)
3971	GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, Idx: i);
3972	if (Base.Scale == `1`)
3973	GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, / Idx / -`1`,
3974	/ IsScaledReg / true);
3975	}
3976
3977	/// For ICmpZero, check to see if we can scale up the comparison. For example, x
3978	/// == y -> xc == yc.
3979	void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
3980	Formula Base) {
3981	if (LU.Kind != LSRUse::ICmpZero) return;
3982
3983	// Determine the integer type for the base formula.
3984	Type *IntTy = Base.getType();
3985	if (!IntTy) return;
3986	if (SE.getTypeSizeInBits(Ty: IntTy) > `64`) return;
3987
3988	// Don't do this if there is more than one offset.
3989	if (LU.MinOffset != LU.MaxOffset) return;
3990
3991	// Check if transformation is valid. It is illegal to multiply pointer.
3992	if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())
3993	return;
3994	for (const SCEV *BaseReg : Base.BaseRegs)
3995	if (BaseReg->getType()->isPointerTy())
3996	return;
3997	assert(!Base.BaseGV && "ICmpZero use is not legal!");
3998
3999	// Check each interesting stride.
4000	for (int64_t Factor : Factors) {
4001	// Check that Factor can be represented by IntTy
4002	if (!ConstantInt::isValueValidForType(Ty: IntTy, V: Factor))
4003	continue;
4004	// Check that the multiplication doesn't overflow.
4005	if (Base.BaseOffset == std::numeric_limits<int64_t>::min() && Factor == -`1`)
4006	continue;
4007	int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
4008	assert(Factor != `0` && "Zero factor not expected!");
4009	if (NewBaseOffset / Factor != Base.BaseOffset)
4010	continue;
4011	// If the offset will be truncated at this use, check that it is in bounds.
4012	if (!IntTy->isPointerTy() &&
4013	!ConstantInt::isValueValidForType(Ty: IntTy, V: NewBaseOffset))
4014	continue;
4015
4016	// Check that multiplying with the use offset doesn't overflow.
4017	int64_t Offset = LU.MinOffset;
4018	if (Offset == std::numeric_limits<int64_t>::min() && Factor == -`1`)
4019	continue;
4020	Offset = (uint64_t)Offset * Factor;
4021	if (Offset / Factor != LU.MinOffset)
4022	continue;
4023	// If the offset will be truncated at this use, check that it is in bounds.
4024	if (!IntTy->isPointerTy() &&
4025	!ConstantInt::isValueValidForType(Ty: IntTy, V: Offset))
4026	continue;
4027
4028	Formula F = Base;
4029	F.BaseOffset = NewBaseOffset;
4030
4031	// Check that this scale is legal.
4032	if (!isLegalUse(TTI, MinOffset: Offset, MaxOffset: Offset, Kind: LU.Kind, AccessTy: LU.AccessTy, F))
4033	continue;
4034
4035	// Compensate for the use having MinOffset built into it.
4036	F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset;
4037
4038	const SCEV *FactorS = SE.getConstant(Ty: IntTy, V: Factor);
4039
4040	// Check that multiplying with each base register doesn't overflow.
4041	for (size_t i = `0`, e = F.BaseRegs.size(); i != e; ++i) {
4042	F.BaseRegs [i] = SE.getMulExpr(LHS: F.BaseRegs [i], RHS: FactorS);
4043	if (getExactSDiv(LHS: F.BaseRegs [i], RHS: FactorS, SE) != Base.BaseRegs [i])
4044	goto next;
4045	}
4046
4047	// Check that multiplying with the scaled register doesn't overflow.
4048	if (F.ScaledReg) {
4049	F.ScaledReg = SE.getMulExpr(LHS: F.ScaledReg, RHS: FactorS);
4050	if (getExactSDiv(LHS: F.ScaledReg, RHS: FactorS, SE) != Base.ScaledReg)
4051	continue;
4052	}
4053
4054	// Check that multiplying with the unfolded offset doesn't overflow.
4055	if (F.UnfoldedOffset != `0`) {
4056	if (F.UnfoldedOffset == std::numeric_limits<int64_t>::min() &&
4057	Factor == -`1`)
4058	continue;
4059	F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
4060	if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
4061	continue;
4062	// If the offset will be truncated, check that it is in bounds.
4063	if (!IntTy->isPointerTy() &&
4064	!ConstantInt::isValueValidForType(Ty: IntTy, V: F.UnfoldedOffset))
4065	continue;
4066	}
4067
4068	// If we make it here and it's legal, add it.
4069	(void)InsertFormula(LU, LUIdx, F);
4070	next:;
4071	}
4072	}
4073
4074	/// Generate stride factor reuse formulae by making use of scaled-offset address
4075	/// modes, for example.
4076	void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
4077	// Determine the integer type for the base formula.
4078	Type *IntTy = Base.getType();
4079	if (!IntTy) return;
4080
4081	// If this Formula already has a scaled register, we can't add another one.
4082	// Try to unscale the formula to generate a better scale.
4083	if (Base.Scale != `0` && !Base.unscale())
4084	return;
4085
4086	assert(Base.Scale == `0` && "unscale did not did its job!");
4087
4088	// Check each interesting stride.
4089	for (int64_t Factor : Factors) {
4090	Base.Scale = Factor;
4091	Base.HasBaseReg = Base.BaseRegs.size() > `1`;
4092	// Check whether this scale is going to be legal.
4093	if (!isLegalUse(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind, AccessTy: LU.AccessTy,
4094	F: Base)) {
4095	// As a special-case, handle special out-of-loop Basic users specially.
4096	// TODO: Reconsider this special case.
4097	if (LU.Kind == LSRUse::Basic &&
4098	isLegalUse(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LSRUse::Special,
4099	AccessTy: LU.AccessTy, F: Base) &&
4100	LU.AllFixupsOutsideLoop)
4101	LU.Kind = LSRUse::Special;
4102	else
4103	continue;
4104	}
4105	// For an ICmpZero, negating a solitary base register won't lead to
4106	// new solutions.
4107	if (LU.Kind == LSRUse::ICmpZero &&
4108	!Base.HasBaseReg && Base.BaseOffset == `0` && !Base.BaseGV)
4109	continue;
4110	// For each addrec base reg, if its loop is current loop, apply the scale.
4111	for (size_t i = `0`, e = Base.BaseRegs.size(); i != e; ++i) {
4112	const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: Base.BaseRegs [i]);
4113	if (AR && (AR->getLoop() == L \|\| LU.AllFixupsOutsideLoop)) {
4114	const SCEV *FactorS = SE.getConstant(Ty: IntTy, V: Factor);
4115	if (FactorS->isZero())
4116	continue;
4117	// Divide out the factor, ignoring high bits, since we'll be
4118	// scaling the value back up in the end.
4119	if (const SCEV Quotient = getExactSDiv(LHS: AR, RHS: FactorS, SE, IgnoreSignificantBits: true*))
4120	if (!Quotient->isZero()) {
4121	// TODO: This could be optimized to avoid all the copying.
4122	Formula F = Base;
4123	F.ScaledReg = Quotient;
4124	F.deleteBaseReg(S&: F.BaseRegs [i]);
4125	// The canonical representation of 1reg is reg, which is already in*
4126	// Base. In that case, do not try to insert the formula, it will be
4127	// rejected anyway.
4128	if (F.Scale == `1` && (F.BaseRegs.empty() \|\|
4129	(AR->getLoop() != L && LU.AllFixupsOutsideLoop)))
4130	continue;
4131	// If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate
4132	// non canonical Formula with ScaledReg's loop not being L.
4133	if (F.Scale == `1` && LU.AllFixupsOutsideLoop)
4134	F.canonicalize(L: *L);
4135	(void)InsertFormula(LU, LUIdx, F);
4136	}
4137	}
4138	}
4139	}
4140	}
4141
4142	/// Extend/Truncate \p Expr to \p ToTy considering post-inc uses in \p Loops.
4143	/// For all PostIncLoopSets in \p Loops, first de-normalize \p Expr, then
4144	/// perform the extension/truncate and normalize again, as the normalized form
4145	/// can result in folds that are not valid in the post-inc use contexts. The
4146	/// expressions for all PostIncLoopSets must match, otherwise return nullptr.
4147	static const SCEV *
4148	getAnyExtendConsideringPostIncUses(ArrayRef<PostIncLoopSet> Loops,
4149	const SCEV Expr, Type ToTy,
4150	ScalarEvolution &SE) {
4151	const SCEV Result = nullptr*;
4152	for (auto &L : Loops) {
4153	auto *DenormExpr = denormalizeForPostIncUse(S: Expr, Loops: L, SE);
4154	const SCEV *NewDenormExpr = SE.getAnyExtendExpr(Op: DenormExpr, Ty: ToTy);
4155	const SCEV *New = normalizeForPostIncUse(S: NewDenormExpr, Loops: L, SE);
4156	if (!New \|\| (Result && New != Result))
4157	return nullptr;
4158	Result = New;
4159	}
4160
4161	assert(Result && "failed to create expression");
4162	return Result;
4163	}
4164
4165	/// Generate reuse formulae from different IV types.
4166	void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
4167	// Don't bother truncating symbolic values.
4168	if (Base.BaseGV) return;
4169
4170	// Determine the integer type for the base formula.
4171	Type *DstTy = Base.getType();
4172	if (!DstTy) return;
4173	if (DstTy->isPointerTy())
4174	return;
4175
4176	// It is invalid to extend a pointer type so exit early if ScaledReg or
4177	// any of the BaseRegs are pointers.
4178	if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())
4179	return;
4180	if (any_of(Range&: Base.BaseRegs,
4181	P: [](const SCEV S) { return* S->getType()->isPointerTy(); }))
4182	return;
4183
4184	SmallVector<PostIncLoopSet> Loops;
4185	for (auto &LF : LU.Fixups)
4186	Loops.push_back(Elt: LF.PostIncLoops);
4187
4188	for (Type *SrcTy : Types) {
4189	if (SrcTy != DstTy && TTI.isTruncateFree(Ty1: SrcTy, Ty2: DstTy)) {
4190	Formula F = Base;
4191
4192	// Sometimes SCEV is able to prove zero during ext transform. It may
4193	// happen if SCEV did not do all possible transforms while creating the
4194	// initial node (maybe due to depth limitations), but it can do them while
4195	// taking ext.
4196	if (F.ScaledReg) {
4197	const SCEV *NewScaledReg =
4198	getAnyExtendConsideringPostIncUses(Loops, Expr: F.ScaledReg, ToTy: SrcTy, SE);
4199	if (!NewScaledReg \|\| NewScaledReg->isZero())
4200	continue;
4201	F.ScaledReg = NewScaledReg;
4202	}
4203	bool HasZeroBaseReg = false;
4204	for (const SCEV *&BaseReg : F.BaseRegs) {
4205	const SCEV *NewBaseReg =
4206	getAnyExtendConsideringPostIncUses(Loops, Expr: BaseReg, ToTy: SrcTy, SE);
4207	if (!NewBaseReg \|\| NewBaseReg->isZero()) {
4208	HasZeroBaseReg = true;
4209	break;
4210	}
4211	BaseReg = NewBaseReg;
4212	}
4213	if (HasZeroBaseReg)
4214	continue;
4215
4216	// TODO: This assumes we've done basic processing on all uses and
4217	// have an idea what the register usage is.
4218	if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4219	continue;
4220
4221	F.canonicalize(L: *L);
4222	(void)InsertFormula(LU, LUIdx, F);
4223	}
4224	}
4225	}
4226
4227	namespace {
4228
4229	/// Helper class for GenerateCrossUseConstantOffsets. It's used to defer
4230	/// modifications so that the search phase doesn't have to worry about the data
4231	/// structures moving underneath it.
4232	struct WorkItem {
4233	size_t LUIdx;
4234	int64_t Imm;
4235	const SCEV *OrigReg;
4236
4237	WorkItem(size_t LI, int64_t I, const SCEV *R)
4238	: LUIdx(LI), Imm(I), OrigReg(R) {}
4239
4240	void print(raw_ostream &OS) const;
4241	void dump() const;
4242	};
4243
4244	} // end anonymous namespace
4245
4246	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
4247	void WorkItem::print(raw_ostream &OS) const {
4248	OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
4249	<< " , add offset " << Imm;
4250	}
4251
4252	LLVM_DUMP_METHOD void WorkItem::dump() const {
4253	print(OS&: errs()); errs() << `'\n'`;
4254	}
4255	#endif
4256
4257	/// Look for registers which are a constant distance apart and try to form reuse
4258	/// opportunities between them.
4259	void LSRInstance::GenerateCrossUseConstantOffsets() {
4260	// Group the registers by their value without any added constant offset.
4261	using ImmMapTy = std::map<int64_t, const SCEV *>;
4262
4263	DenseMap<const SCEV *, ImmMapTy> Map;
4264	DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
4265	SmallVector<const SCEV *, `8`> Sequence;
4266	for (const SCEV *Use : RegUses) {
4267	const SCEV Reg = Use; // Make a copy for ExtractImmediate to modify.*
4268	int64_t Imm = ExtractImmediate(S&: Reg, SE);
4269	auto Pair = Map.insert(KV: std::make_pair(x&: Reg, y: ImmMapTy ()));
4270	if (Pair.second)
4271	Sequence.push_back(Elt: Reg);
4272	Pair.first ->second.insert(x: std::make_pair(x&: Imm, y&: Use));
4273	UsedByIndicesMap [Reg] \|= RegUses.getUsedByIndices(Reg: Use);
4274	}
4275
4276	// Now examine each set of registers with the same base value. Build up
4277	// a list of work to do and do the work in a separate step so that we're
4278	// not adding formulae and register counts while we're searching.
4279	SmallVector<WorkItem, `32`> WorkItems;
4280	SmallSet<std::pair<size_t, int64_t>, `32`> UniqueItems;
4281	for (const SCEV *Reg : Sequence) {
4282	const ImmMapTy &Imms = Map.find(Val: Reg)->second;
4283
4284	// It's not worthwhile looking for reuse if there's only one offset.
4285	if (Imms.size() == `1`)
4286	continue;
4287
4288	LLVM_DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << `':'`;
4289	for (const auto &Entry
4290	: Imms) dbgs()
4291	<< `' '` << Entry.first;
4292	dbgs() << `'\n'`);
4293
4294	// Examine each offset.
4295	for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4296	J != JE; ++J) {
4297	const SCEV *OrigReg = J ->second;
4298
4299	int64_t JImm = J ->first;
4300	const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg: OrigReg);
4301
4302	if (!isa<SCEVConstant>(Val: OrigReg) &&
4303	UsedByIndicesMap [Reg].count() == `1`) {
4304	LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg
4305	<< `'\n'`);
4306	continue;
4307	}
4308
4309	// Conservatively examine offsets between this orig reg a few selected
4310	// other orig regs.
4311	int64_t First = Imms.begin()->first;
4312	int64_t Last = std::prev(x: Imms.end())->first;
4313	// Compute (First + Last) / 2 without overflow using the fact that
4314	// First + Last = 2 (First + Last) + (First ^ Last).*
4315	int64_t Avg = (First & Last) + ((First ^ Last) >> `1`);
4316	// If the result is negative and First is odd and Last even (or vice versa),
4317	// we rounded towards -inf. Add 1 in that case, to round towards 0.
4318	Avg = Avg + ((First ^ Last) & ((uint64_t)Avg >> `63`));
4319	ImmMapTy::const_iterator OtherImms[] = {
4320	Imms.begin(), std::prev(x: Imms.end()),
4321	Imms.lower_bound(x: Avg)};
4322	for (const auto &M : OtherImms) {
4323	if (M == J \|\| M == JE) continue;
4324
4325	// Compute the difference between the two.
4326	int64_t Imm = (uint64_t)JImm - M ->first;
4327	for (unsigned LUIdx : UsedByIndices.set_bits())
4328	// Make a memo of this use, offset, and register tuple.
4329	if (UniqueItems.insert(V: std::make_pair(x&: LUIdx, y&: Imm)).second)
4330	WorkItems.push_back(Elt: WorkItem (LUIdx, Imm, OrigReg));
4331	}
4332	}
4333	}
4334
4335	Map.clear();
4336	Sequence.clear();
4337	UsedByIndicesMap.clear();
4338	UniqueItems.clear();
4339
4340	// Now iterate through the worklist and add new formulae.
4341	for (const WorkItem &WI : WorkItems) {
4342	size_t LUIdx = WI.LUIdx;
4343	LSRUse &LU = Uses [LUIdx];
4344	int64_t Imm = WI.Imm;
4345	const SCEV *OrigReg = WI.OrigReg;
4346
4347	Type *IntTy = SE.getEffectiveSCEVType(Ty: OrigReg->getType());
4348	const SCEV *NegImmS = SE.getSCEV(V: ConstantInt::get(Ty: IntTy, V: -(uint64_t)Imm));
4349	unsigned BitWidth = SE.getTypeSizeInBits(Ty: IntTy);
4350
4351	// TODO: Use a more targeted data structure.
4352	for (size_t L = `0`, LE = LU.Formulae.size(); L != LE; ++L) {
4353	Formula F = LU.Formulae [L];
4354	// FIXME: The code for the scaled and unscaled registers looks
4355	// very similar but slightly different. Investigate if they
4356	// could be merged. That way, we would not have to unscale the
4357	// Formula.
4358	F.unscale();
4359	// Use the immediate in the scaled register.
4360	if (F.ScaledReg == OrigReg) {
4361	int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
4362	// Don't create 50 + reg(-50).
4363	if (F.referencesReg(S: SE.getSCEV(
4364	V: ConstantInt::get(Ty: IntTy, V: -(uint64_t)Offset))))
4365	continue;
4366	Formula NewF = F;
4367	NewF.BaseOffset = Offset;
4368	if (!isLegalUse(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind, AccessTy: LU.AccessTy,
4369	F: NewF))
4370	continue;
4371	NewF.ScaledReg = SE.getAddExpr(LHS: NegImmS, RHS: NewF.ScaledReg);
4372
4373	// If the new scale is a constant in a register, and adding the constant
4374	// value to the immediate would produce a value closer to zero than the
4375	// immediate itself, then the formula isn't worthwhile.
4376	if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Val: NewF.ScaledReg))
4377	if (C->getValue()->isNegative() != (NewF.BaseOffset < `0`) &&
4378	(C->getAPInt().abs() * APInt (BitWidth, F.Scale))
4379	.ule(RHS: std::abs(i: NewF.BaseOffset)))
4380	continue;
4381
4382	// OK, looks good.
4383	NewF.canonicalize(L: *this->L);
4384	(void)InsertFormula(LU, LUIdx, F: NewF);
4385	} else {
4386	// Use the immediate in a base register.
4387	for (size_t N = `0`, NE = F.BaseRegs.size(); N != NE; ++N) {
4388	const SCEV *BaseReg = F.BaseRegs [N];
4389	if (BaseReg != OrigReg)
4390	continue;
4391	Formula NewF = F;
4392	NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
4393	if (!isLegalUse(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset,
4394	Kind: LU.Kind, AccessTy: LU.AccessTy, F: NewF)) {
4395	if (AMK == TTI::AMK_PostIndexed &&
4396	mayUsePostIncMode(TTI, LU, S: OrigReg, L: this->L, SE))
4397	continue;
4398	if (!TTI.isLegalAddImmediate(Imm: (uint64_t)NewF.UnfoldedOffset + Imm))
4399	continue;
4400	NewF = F;
4401	NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
4402	}
4403	NewF.BaseRegs [N] = SE.getAddExpr(LHS: NegImmS, RHS: BaseReg);
4404
4405	// If the new formula has a constant in a register, and adding the
4406	// constant value to the immediate would produce a value closer to
4407	// zero than the immediate itself, then the formula isn't worthwhile.
4408	for (const SCEV *NewReg : NewF.BaseRegs)
4409	if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Val: NewReg))
4410	if ((C->getAPInt() + NewF.BaseOffset)
4411	.abs()
4412	.slt(RHS: std::abs(i: NewF.BaseOffset)) &&
4413	(C->getAPInt() + NewF.BaseOffset).countr_zero() >=
4414	(unsigned)llvm::countr_zero<uint64_t>(Val: NewF.BaseOffset))
4415	goto skip_formula;
4416
4417	// Ok, looks good.
4418	NewF.canonicalize(L: *this->L);
4419	(void)InsertFormula(LU, LUIdx, F: NewF);
4420	break;
4421	skip_formula:;
4422	}
4423	}
4424	}
4425	}
4426	}
4427
4428	/// Generate formulae for each use.
4429	void
4430	LSRInstance::GenerateAllReuseFormulae() {
4431	// This is split into multiple loops so that hasRegsUsedByUsesOtherThan
4432	// queries are more precise.
4433	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4434	LSRUse &LU = Uses [LUIdx];
4435	for (size_t i = `0`, f = LU.Formulae.size(); i != f; ++i)
4436	GenerateReassociations(LU, LUIdx, Base: LU.Formulae [i]);
4437	for (size_t i = `0`, f = LU.Formulae.size(); i != f; ++i)
4438	GenerateCombinations(LU, LUIdx, Base: LU.Formulae [i]);
4439	}
4440	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4441	LSRUse &LU = Uses [LUIdx];
4442	for (size_t i = `0`, f = LU.Formulae.size(); i != f; ++i)
4443	GenerateSymbolicOffsets(LU, LUIdx, Base: LU.Formulae [i]);
4444	for (size_t i = `0`, f = LU.Formulae.size(); i != f; ++i)
4445	GenerateConstantOffsets(LU, LUIdx, Base: LU.Formulae [i]);
4446	for (size_t i = `0`, f = LU.Formulae.size(); i != f; ++i)
4447	GenerateICmpZeroScales(LU, LUIdx, Base: LU.Formulae [i]);
4448	for (size_t i = `0`, f = LU.Formulae.size(); i != f; ++i)
4449	GenerateScales(LU, LUIdx, Base: LU.Formulae [i]);
4450	}
4451	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4452	LSRUse &LU = Uses [LUIdx];
4453	for (size_t i = `0`, f = LU.Formulae.size(); i != f; ++i)
4454	GenerateTruncates(LU, LUIdx, Base: LU.Formulae [i]);
4455	}
4456
4457	GenerateCrossUseConstantOffsets();
4458
4459	LLVM_DEBUG(dbgs() << "\n"
4460	"After generating reuse formulae:\n";
4461	print_uses(dbgs()));
4462	}
4463
4464	/// If there are multiple formulae with the same set of registers used
4465	/// by other uses, pick the best one and delete the others.
4466	void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4467	DenseSet<const SCEV *> VisitedRegs;
4468	SmallPtrSet<const SCEV *, `16`> Regs;
4469	SmallPtrSet<const SCEV *, `16`> LoserRegs;
4470	#ifndef NDEBUG
4471	bool ChangedFormulae = false;
4472	#endif
4473
4474	// Collect the best formula for each unique set of shared registers. This
4475	// is reset for each use.
4476	using BestFormulaeTy =
4477	DenseMap<SmallVector<const SCEV *, `4`>, size_t, UniquifierDenseMapInfo>;
4478
4479	BestFormulaeTy BestFormulae;
4480
4481	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4482	LSRUse &LU = Uses [LUIdx];
4483	LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs());
4484	dbgs() << `'\n'`);
4485
4486	bool Any = false;
4487	for (size_t FIdx = `0`, NumForms = LU.Formulae.size();
4488	FIdx != NumForms; ++FIdx) {
4489	Formula &F = LU.Formulae [FIdx];
4490
4491	// Some formulas are instant losers. For example, they may depend on
4492	// nonexistent AddRecs from other loops. These need to be filtered
4493	// immediately, otherwise heuristics could choose them over others leading
4494	// to an unsatisfactory solution. Passing LoserRegs into RateFormula here
4495	// avoids the need to recompute this information across formulae using the
4496	// same bad AddRec. Passing LoserRegs is also essential unless we remove
4497	// the corresponding bad register from the Regs set.
4498	Cost CostF(L, SE, TTI, AMK);
4499	Regs.clear();
4500	CostF.RateFormula(F, Regs, VisitedRegs, LU, LoserRegs: &LoserRegs);
4501	if (CostF.isLoser()) {
4502	// During initial formula generation, undesirable formulae are generated
4503	// by uses within other loops that have some non-trivial address mode or
4504	// use the postinc form of the IV. LSR needs to provide these formulae
4505	// as the basis of rediscovering the desired formula that uses an AddRec
4506	// corresponding to the existing phi. Once all formulae have been
4507	// generated, these initial losers may be pruned.
4508	LLVM_DEBUG(dbgs() << " Filtering loser "; F.print(dbgs());
4509	dbgs() << "\n");
4510	}
4511	else {
4512	SmallVector<const SCEV *, `4`> Key;
4513	for (const SCEV *Reg : F.BaseRegs) {
4514	if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
4515	Key.push_back(Elt: Reg);
4516	}
4517	if (F.ScaledReg &&
4518	RegUses.isRegUsedByUsesOtherThan(Reg: F.ScaledReg, LUIdx))
4519	Key.push_back(Elt: F.ScaledReg);
4520	// Unstable sort by host order ok, because this is only used for
4521	// uniquifying.
4522	llvm::sort(C&: Key);
4523
4524	std::pair<BestFormulaeTy::const_iterator, bool> P =
4525	BestFormulae.insert(KV: std::make_pair(x&: Key, y&: FIdx));
4526	if (P.second)
4527	continue;
4528
4529	Formula &Best = LU.Formulae [P.first ->second];
4530
4531	Cost CostBest(L, SE, TTI, AMK);
4532	Regs.clear();
4533	CostBest.RateFormula(F: Best, Regs, VisitedRegs, LU);
4534	if (CostF.isLess(Other: CostBest))
4535	std::swap(a&: F, b&: Best);
4536	LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
4537	dbgs() << "\n"
4538	" in favor of formula ";
4539	Best.print(dbgs()); dbgs() << `'\n'`);
4540	}
4541	#ifndef NDEBUG
4542	ChangedFormulae = true;
4543	#endif
4544	LU.DeleteFormula(F);
4545	--FIdx;
4546	--NumForms;
4547	Any = true;
4548	}
4549
4550	// Now that we've filtered out some formulae, recompute the Regs set.
4551	if (Any)
4552	LU.RecomputeRegs(LUIdx, RegUses);
4553
4554	// Reset this to prepare for the next use.
4555	BestFormulae.clear();
4556	}
4557
4558	LLVM_DEBUG(if (ChangedFormulae) {
4559	dbgs() << "\n"
4560	"After filtering out undesirable candidates:\n";
4561	print_uses(dbgs());
4562	});
4563	}
4564
4565	/// Estimate the worst-case number of solutions the solver might have to
4566	/// consider. It almost never considers this many solutions because it prune the
4567	/// search space, but the pruning isn't always sufficient.
4568	size_t LSRInstance::EstimateSearchSpaceComplexity() const {
4569	size_t Power = `1`;
4570	for (const LSRUse &LU : Uses) {
4571	size_t FSize = LU.Formulae.size();
4572	if (FSize >= ComplexityLimit) {
4573	Power = ComplexityLimit;
4574	break;
4575	}
4576	Power *= FSize;
4577	if (Power >= ComplexityLimit)
4578	break;
4579	}
4580	return Power;
4581	}
4582
4583	/// When one formula uses a superset of the registers of another formula, it
4584	/// won't help reduce register pressure (though it may not necessarily hurt
4585	/// register pressure); remove it to simplify the system.
4586	void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4587	if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
4588	LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
4589
4590	LLVM_DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
4591	"which use a superset of registers used by other "
4592	"formulae.\n");
4593
4594	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4595	LSRUse &LU = Uses [LUIdx];
4596	bool Any = false;
4597	for (size_t i = `0`, e = LU.Formulae.size(); i != e; ++i) {
4598	Formula &F = LU.Formulae [i];
4599	// Look for a formula with a constant or GV in a register. If the use
4600	// also has a formula with that same value in an immediate field,
4601	// delete the one that uses a register.
4602	for (SmallVectorImpl<const SCEV *>::const_iterator
4603	I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
4604	if (const SCEVConstant C = dyn_cast<SCEVConstant>(Val: I)) {
4605	Formula NewF = F;
4606	//FIXME: Formulas should store bitwidth to do wrapping properly.
4607	// See PR41034.
4608	NewF.BaseOffset += (uint64_t)C->getValue()->getSExtValue();
4609	NewF.BaseRegs.erase(CI: NewF.BaseRegs.begin() +
4610	(I - F.BaseRegs.begin()));
4611	if (LU.HasFormulaWithSameRegs(F: NewF)) {
4612	LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs());
4613	dbgs() << `'\n'`);
4614	LU.DeleteFormula(F);
4615	--i;
4616	--e;
4617	Any = true;
4618	break;
4619	}
4620	} else if (const SCEVUnknown U = dyn_cast<SCEVUnknown>(Val: I)) {
4621	if (GlobalValue *GV = dyn_cast<GlobalValue>(Val: U->getValue()))
4622	if (!F.BaseGV) {
4623	Formula NewF = F;
4624	NewF.BaseGV = GV;
4625	NewF.BaseRegs.erase(CI: NewF.BaseRegs.begin() +
4626	(I - F.BaseRegs.begin()));
4627	if (LU.HasFormulaWithSameRegs(F: NewF)) {
4628	LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs());
4629	dbgs() << `'\n'`);
4630	LU.DeleteFormula(F);
4631	--i;
4632	--e;
4633	Any = true;
4634	break;
4635	}
4636	}
4637	}
4638	}
4639	}
4640	if (Any)
4641	LU.RecomputeRegs(LUIdx, RegUses);
4642	}
4643
4644	LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
4645	}
4646	}
4647
4648	/// When there are many registers for expressions like A, A+1, A+2, etc.,
4649	/// allocate a single register for them.
4650	void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4651	if (EstimateSearchSpaceComplexity() < ComplexityLimit)
4652	return;
4653
4654	LLVM_DEBUG(
4655	dbgs() << "The search space is too complex.\n"
4656	"Narrowing the search space by assuming that uses separated "
4657	"by a constant offset will use the same registers.\n");
4658
4659	// This is especially useful for unrolled loops.
4660
4661	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4662	LSRUse &LU = Uses [LUIdx];
4663	for (const Formula &F : LU.Formulae) {
4664	if (F.BaseOffset == `0` \|\| (F.Scale != `0` && F.Scale != `1`))
4665	continue;
4666
4667	LSRUse *LUThatHas = FindUseWithSimilarFormula(OrigF: F, OrigLU: LU);
4668	if (!LUThatHas)
4669	continue;
4670
4671	if (!reconcileNewOffset(LU&: LUThatHas, NewOffset: F.BaseOffset, /HasBaseReg=/* false,
4672	Kind: LU.Kind, AccessTy: LU.AccessTy))
4673	continue;
4674
4675	LLVM_DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << `'\n'`);
4676
4677	LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4678
4679	// Transfer the fixups of LU to LUThatHas.
4680	for (LSRFixup &Fixup : LU.Fixups) {
4681	Fixup.Offset += F.BaseOffset;
4682	LUThatHas->pushFixup(f&: Fixup);
4683	LLVM_DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << `'\n'`);
4684	}
4685
4686	// Delete formulae from the new use which are no longer legal.
4687	bool Any = false;
4688	for (size_t i = `0`, e = LUThatHas->Formulae.size(); i != e; ++i) {
4689	Formula &F = LUThatHas->Formulae [i];
4690	if (!isLegalUse(TTI, MinOffset: LUThatHas->MinOffset, MaxOffset: LUThatHas->MaxOffset,
4691	Kind: LUThatHas->Kind, AccessTy: LUThatHas->AccessTy, F)) {
4692	LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << `'\n'`);
4693	LUThatHas->DeleteFormula(F);
4694	--i;
4695	--e;
4696	Any = true;
4697	}
4698	}
4699
4700	if (Any)
4701	LUThatHas->RecomputeRegs(LUIdx: LUThatHas - &Uses.front(), RegUses);
4702
4703	// Delete the old use.
4704	DeleteUse(LU, LUIdx);
4705	--LUIdx;
4706	--NumUses;
4707	break;
4708	}
4709	}
4710
4711	LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
4712	}
4713
4714	/// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that
4715	/// we've done more filtering, as it may be able to find more formulae to
4716	/// eliminate.
4717	void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
4718	if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
4719	LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
4720
4721	LLVM_DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
4722	"undesirable dedicated registers.\n");
4723
4724	FilterOutUndesirableDedicatedRegisters();
4725
4726	LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
4727	}
4728	}
4729
4730	/// If a LSRUse has multiple formulae with the same ScaledReg and Scale.
4731	/// Pick the best one and delete the others.
4732	/// This narrowing heuristic is to keep as many formulae with different
4733	/// Scale and ScaledReg pair as possible while narrowing the search space.
4734	/// The benefit is that it is more likely to find out a better solution
4735	/// from a formulae set with more Scale and ScaledReg variations than
4736	/// a formulae set with the same Scale and ScaledReg. The picking winner
4737	/// reg heuristic will often keep the formulae with the same Scale and
4738	/// ScaledReg and filter others, and we want to avoid that if possible.
4739	void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
4740	if (EstimateSearchSpaceComplexity() < ComplexityLimit)
4741	return;
4742
4743	LLVM_DEBUG(
4744	dbgs() << "The search space is too complex.\n"
4745	"Narrowing the search space by choosing the best Formula "
4746	"from the Formulae with the same Scale and ScaledReg.\n");
4747
4748	// Map the "Scale ScaledReg" pair to the best formula of current LSRUse.*
4749	using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>, size_t>;
4750
4751	BestFormulaeTy BestFormulae;
4752	#ifndef NDEBUG
4753	bool ChangedFormulae = false;
4754	#endif
4755	DenseSet<const SCEV *> VisitedRegs;
4756	SmallPtrSet<const SCEV *, `16`> Regs;
4757
4758	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4759	LSRUse &LU = Uses [LUIdx];
4760	LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs());
4761	dbgs() << `'\n'`);
4762
4763	// Return true if Formula FA is better than Formula FB.
4764	auto IsBetterThan = [&](Formula &FA, Formula &FB) {
4765	// First we will try to choose the Formula with fewer new registers.
4766	// For a register used by current Formula, the more the register is
4767	// shared among LSRUses, the less we increase the register number
4768	// counter of the formula.
4769	size_t FARegNum = `0`;
4770	for (const SCEV *Reg : FA.BaseRegs) {
4771	const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
4772	FARegNum += (NumUses - UsedByIndices.count() + `1`);
4773	}
4774	size_t FBRegNum = `0`;
4775	for (const SCEV *Reg : FB.BaseRegs) {
4776	const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
4777	FBRegNum += (NumUses - UsedByIndices.count() + `1`);
4778	}
4779	if (FARegNum != FBRegNum)
4780	return FARegNum < FBRegNum;
4781
4782	// If the new register numbers are the same, choose the Formula with
4783	// less Cost.
4784	Cost CostFA(L, SE, TTI, AMK);
4785	Cost CostFB(L, SE, TTI, AMK);
4786	Regs.clear();
4787	CostFA.RateFormula(F: FA, Regs, VisitedRegs, LU);
4788	Regs.clear();
4789	CostFB.RateFormula(F: FB, Regs, VisitedRegs, LU);
4790	return CostFA.isLess(Other: CostFB);
4791	};
4792
4793	bool Any = false;
4794	for (size_t FIdx = `0`, NumForms = LU.Formulae.size(); FIdx != NumForms;
4795	++FIdx) {
4796	Formula &F = LU.Formulae [FIdx];
4797	if (!F.ScaledReg)
4798	continue;
4799	auto P = BestFormulae.insert(KV: {{F.ScaledReg, F.Scale}, FIdx});
4800	if (P.second)
4801	continue;
4802
4803	Formula &Best = LU.Formulae [P.first ->second];
4804	if (IsBetterThan (F, Best))
4805	std::swap(a&: F, b&: Best);
4806	LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
4807	dbgs() << "\n"
4808	" in favor of formula ";
4809	Best.print(dbgs()); dbgs() << `'\n'`);
4810	#ifndef NDEBUG
4811	ChangedFormulae = true;
4812	#endif
4813	LU.DeleteFormula(F);
4814	--FIdx;
4815	--NumForms;
4816	Any = true;
4817	}
4818	if (Any)
4819	LU.RecomputeRegs(LUIdx, RegUses);
4820
4821	// Reset this to prepare for the next use.
4822	BestFormulae.clear();
4823	}
4824
4825	LLVM_DEBUG(if (ChangedFormulae) {
4826	dbgs() << "\n"
4827	"After filtering out undesirable candidates:\n";
4828	print_uses(dbgs());
4829	});
4830	}
4831
4832	/// If we are over the complexity limit, filter out any post-inc prefering
4833	/// variables to only post-inc values.
4834	void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
4835	if (AMK != TTI::AMK_PostIndexed)
4836	return;
4837	if (EstimateSearchSpaceComplexity() < ComplexityLimit)
4838	return;
4839
4840	LLVM_DEBUG(dbgs() << "The search space is too complex.\n"
4841	"Narrowing the search space by choosing the lowest "
4842	"register Formula for PostInc Uses.\n");
4843
4844	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4845	LSRUse &LU = Uses [LUIdx];
4846
4847	if (LU.Kind != LSRUse::Address)
4848	continue;
4849	if (!TTI.isIndexedLoadLegal(Mode: TTI.MIM_PostInc, Ty: LU.AccessTy.getType()) &&
4850	!TTI.isIndexedStoreLegal(Mode: TTI.MIM_PostInc, Ty: LU.AccessTy.getType()))
4851	continue;
4852
4853	size_t MinRegs = std::numeric_limits<size_t>::max();
4854	for (const Formula &F : LU.Formulae)
4855	MinRegs = std::min(a: F.getNumRegs(), b: MinRegs);
4856
4857	bool Any = false;
4858	for (size_t FIdx = `0`, NumForms = LU.Formulae.size(); FIdx != NumForms;
4859	++FIdx) {
4860	Formula &F = LU.Formulae [FIdx];
4861	if (F.getNumRegs() > MinRegs) {
4862	LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
4863	dbgs() << "\n");
4864	LU.DeleteFormula(F);
4865	--FIdx;
4866	--NumForms;
4867	Any = true;
4868	}
4869	}
4870	if (Any)
4871	LU.RecomputeRegs(LUIdx, RegUses);
4872
4873	if (EstimateSearchSpaceComplexity() < ComplexityLimit)
4874	break;
4875	}
4876
4877	LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
4878	}
4879
4880	/// The function delete formulas with high registers number expectation.
4881	/// Assuming we don't know the value of each formula (already delete
4882	/// all inefficient), generate probability of not selecting for each
4883	/// register.
4884	/// For example,
4885	/// Use1:
4886	/// reg(a) + reg({0,+,1})
4887	/// reg(a) + reg({-1,+,1}) + 1
4888	/// reg({a,+,1})
4889	/// Use2:
4890	/// reg(b) + reg({0,+,1})
4891	/// reg(b) + reg({-1,+,1}) + 1
4892	/// reg({b,+,1})
4893	/// Use3:
4894	/// reg(c) + reg(b) + reg({0,+,1})
4895	/// reg(c) + reg({b,+,1})
4896	///
4897	/// Probability of not selecting
4898	/// Use1 Use2 Use3
4899	/// reg(a) (1/3) 1 * 1*
4900	/// reg(b) 1 (1/3) * (1/2)*
4901	/// reg({0,+,1}) (2/3) (2/3) * (1/2)*
4902	/// reg({-1,+,1}) (2/3) (2/3) * 1*
4903	/// reg({a,+,1}) (2/3) 1 * 1*
4904	/// reg({b,+,1}) 1 (2/3) * (2/3)*
4905	/// reg(c) 1 1 * 0*
4906	///
4907	/// Now count registers number mathematical expectation for each formula:
4908	/// Note that for each use we exclude probability if not selecting for the use.
4909	/// For example for Use1 probability for reg(a) would be just 1 1 (excluding*
4910	/// probabilty 1/3 of not selecting for Use1).
4911	/// Use1:
4912	/// reg(a) + reg({0,+,1}) 1 + 1/3 -- to be deleted
4913	/// reg(a) + reg({-1,+,1}) + 1 1 + 4/9 -- to be deleted
4914	/// reg({a,+,1}) 1
4915	/// Use2:
4916	/// reg(b) + reg({0,+,1}) 1/2 + 1/3 -- to be deleted
4917	/// reg(b) + reg({-1,+,1}) + 1 1/2 + 2/3 -- to be deleted
4918	/// reg({b,+,1}) 2/3
4919	/// Use3:
4920	/// reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted
4921	/// reg(c) + reg({b,+,1}) 1 + 2/3
4922	void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
4923	if (EstimateSearchSpaceComplexity() < ComplexityLimit)
4924	return;
4925	// Ok, we have too many of formulae on our hands to conveniently handle.
4926	// Use a rough heuristic to thin out the list.
4927
4928	// Set of Regs wich will be 100% used in final solution.
4929	// Used in each formula of a solution (in example above this is reg(c)).
4930	// We can skip them in calculations.
4931	SmallPtrSet<const SCEV *, `4`> UniqRegs;
4932	LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
4933
4934	// Map each register to probability of not selecting
4935	DenseMap <const SCEV , float*> RegNumMap;
4936	for (const SCEV *Reg : RegUses) {
4937	if (UniqRegs.count(Ptr: Reg))
4938	continue;
4939	float PNotSel = `1`;
4940	for (const LSRUse &LU : Uses) {
4941	if (!LU.Regs.count(Ptr: Reg))
4942	continue;
4943	float P = LU.getNotSelectedProbability(Reg);
4944	if (P != `0.0`)
4945	PNotSel *= P;
4946	else
4947	UniqRegs.insert(Ptr: Reg);
4948	}
4949	RegNumMap.insert(KV: std::make_pair(x&: Reg, y&: PNotSel));
4950	}
4951
4952	LLVM_DEBUG(
4953	dbgs() << "Narrowing the search space by deleting costly formulas\n");
4954
4955	// Delete formulas where registers number expectation is high.
4956	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4957	LSRUse &LU = Uses [LUIdx];
4958	// If nothing to delete - continue.
4959	if (LU.Formulae.size() < `2`)
4960	continue;
4961	// This is temporary solution to test performance. Float should be
4962	// replaced with round independent type (based on integers) to avoid
4963	// different results for different target builds.
4964	float FMinRegNum = LU.Formulae [`0`].getNumRegs();
4965	float FMinARegNum = LU.Formulae [`0`].getNumRegs();
4966	size_t MinIdx = `0`;
4967	for (size_t i = `0`, e = LU.Formulae.size(); i != e; ++i) {
4968	Formula &F = LU.Formulae [i];
4969	float FRegNum = `0`;
4970	float FARegNum = `0`;
4971	for (const SCEV *BaseReg : F.BaseRegs) {
4972	if (UniqRegs.count(Ptr: BaseReg))
4973	continue;
4974	FRegNum += RegNumMap [BaseReg] / LU.getNotSelectedProbability(Reg: BaseReg);
4975	if (isa<SCEVAddRecExpr>(Val: BaseReg))
4976	FARegNum +=
4977	RegNumMap [BaseReg] / LU.getNotSelectedProbability(Reg: BaseReg);
4978	}
4979	if (const SCEV *ScaledReg = F.ScaledReg) {
4980	if (!UniqRegs.count(Ptr: ScaledReg)) {
4981	FRegNum +=
4982	RegNumMap [ScaledReg] / LU.getNotSelectedProbability(Reg: ScaledReg);
4983	if (isa<SCEVAddRecExpr>(Val: ScaledReg))
4984	FARegNum +=
4985	RegNumMap [ScaledReg] / LU.getNotSelectedProbability(Reg: ScaledReg);
4986	}
4987	}
4988	if (FMinRegNum > FRegNum \|\|
4989	(FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
4990	FMinRegNum = FRegNum;
4991	FMinARegNum = FARegNum;
4992	MinIdx = i;
4993	}
4994	}
4995	LLVM_DEBUG(dbgs() << " The formula "; LU.Formulae[MinIdx].print(dbgs());
4996	dbgs() << " with min reg num " << FMinRegNum << `'\n'`);
4997	if (MinIdx != `0`)
4998	std::swap(a&: LU.Formulae [MinIdx], b&: LU.Formulae [`0`]);
4999	while (LU.Formulae.size() != `1`) {
5000	LLVM_DEBUG(dbgs() << " Deleting "; LU.Formulae.back().print(dbgs());
5001	dbgs() << `'\n'`);
5002	LU.Formulae.pop_back();
5003	}
5004	LU.RecomputeRegs(LUIdx, RegUses);
5005	assert(LU.Formulae.size() == `1` && "Should be exactly 1 min regs formula");
5006	Formula &F = LU.Formulae [`0`];
5007	LLVM_DEBUG(dbgs() << " Leaving only "; F.print(dbgs()); dbgs() << `'\n'`);
5008	// When we choose the formula, the regs become unique.
5009	UniqRegs.insert(I: F.BaseRegs.begin(), E: F.BaseRegs.end());
5010	if (F.ScaledReg)
5011	UniqRegs.insert(Ptr: F.ScaledReg);
5012	}
5013	LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
5014	}
5015
5016	// Check if Best and Reg are SCEVs separated by a constant amount C, and if so
5017	// would the addressing offset +C would be legal where the negative offset -C is
5018	// not.
5019	static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI,
5020	ScalarEvolution &SE, const SCEV *Best,
5021	const SCEV *Reg,
5022	MemAccessTy AccessType) {
5023	if (Best->getType() != Reg->getType() \|\|
5024	(isa<SCEVAddRecExpr>(Val: Best) && isa<SCEVAddRecExpr>(Val: Reg) &&
5025	cast<SCEVAddRecExpr>(Val: Best)->getLoop() !=
5026	cast<SCEVAddRecExpr>(Val: Reg)->getLoop()))
5027	return false;
5028	const auto *Diff = dyn_cast<SCEVConstant>(Val: SE.getMinusSCEV(LHS: Best, RHS: Reg));
5029	if (!Diff)
5030	return false;
5031
5032	return TTI.isLegalAddressingMode(
5033	Ty: AccessType.MemTy, /BaseGV=/nullptr,
5034	/BaseOffset=/Diff->getAPInt().getSExtValue(),
5035	/HasBaseReg=/true, /Scale=/`0`, AddrSpace: AccessType.AddrSpace) &&
5036	!TTI.isLegalAddressingMode(
5037	Ty: AccessType.MemTy, /BaseGV=/nullptr,
5038	/BaseOffset=/-Diff->getAPInt().getSExtValue(),
5039	/HasBaseReg=/true, /Scale=/`0`, AddrSpace: AccessType.AddrSpace);
5040	}
5041
5042	/// Pick a register which seems likely to be profitable, and then in any use
5043	/// which has any reference to that register, delete all formulae which do not
5044	/// reference that register.
5045	void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
5046	// With all other options exhausted, loop until the system is simple
5047	// enough to handle.
5048	SmallPtrSet<const SCEV *, `4`> Taken;
5049	while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
5050	// Ok, we have too many of formulae on our hands to conveniently handle.
5051	// Use a rough heuristic to thin out the list.
5052	LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
5053
5054	// Pick the register which is used by the most LSRUses, which is likely
5055	// to be a good reuse register candidate.
5056	const SCEV Best = nullptr*;
5057	unsigned BestNum = `0`;
5058	for (const SCEV *Reg : RegUses) {
5059	if (Taken.count(Ptr: Reg))
5060	continue;
5061	if (!Best) {
5062	Best = Reg;
5063	BestNum = RegUses.getUsedByIndices(Reg).count();
5064	} else {
5065	unsigned Count = RegUses.getUsedByIndices(Reg).count();
5066	if (Count > BestNum) {
5067	Best = Reg;
5068	BestNum = Count;
5069	}
5070
5071	// If the scores are the same, but the Reg is simpler for the target
5072	// (for example {x,+,1} as opposed to {x+C,+,1}, where the target can
5073	// handle +C but not -C), opt for the simpler formula.
5074	if (Count == BestNum) {
5075	int LUIdx = RegUses.getUsedByIndices(Reg).find_first();
5076	if (LUIdx >= `0` && Uses [LUIdx].Kind == LSRUse::Address &&
5077	IsSimplerBaseSCEVForTarget(TTI, SE, Best, Reg,
5078	AccessType: Uses [LUIdx].AccessTy)) {
5079	Best = Reg;
5080	BestNum = Count;
5081	}
5082	}
5083	}
5084	}
5085	assert(Best && "Failed to find best LSRUse candidate");
5086
5087	LLVM_DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
5088	<< " will yield profitable reuse.\n");
5089	Taken.insert(Ptr: Best);
5090
5091	// In any use with formulae which references this register, delete formulae
5092	// which don't reference it.
5093	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
5094	LSRUse &LU = Uses [LUIdx];
5095	if (!LU.Regs.count(Ptr: Best)) continue;
5096
5097	bool Any = false;
5098	for (size_t i = `0`, e = LU.Formulae.size(); i != e; ++i) {
5099	Formula &F = LU.Formulae [i];
5100	if (!F.referencesReg(S: Best)) {
5101	LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << `'\n'`);
5102	LU.DeleteFormula(F);
5103	--e;
5104	--i;
5105	Any = true;
5106	assert(e != `0` && "Use has no formulae left! Is Regs inconsistent?");
5107	continue;
5108	}
5109	}
5110
5111	if (Any)
5112	LU.RecomputeRegs(LUIdx, RegUses);
5113	}
5114
5115	LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
5116	}
5117	}
5118
5119	/// If there are an extraordinary number of formulae to choose from, use some
5120	/// rough heuristics to prune down the number of formulae. This keeps the main
5121	/// solver from taking an extraordinary amount of time in some worst-case
5122	/// scenarios.
5123	void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
5124	NarrowSearchSpaceByDetectingSupersets();
5125	NarrowSearchSpaceByCollapsingUnrolledCode();
5126	NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
5127	if (FilterSameScaledReg)
5128	NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
5129	NarrowSearchSpaceByFilterPostInc();
5130	if (LSRExpNarrow)
5131	NarrowSearchSpaceByDeletingCostlyFormulas();
5132	else
5133	NarrowSearchSpaceByPickingWinnerRegs();
5134	}
5135
5136	/// This is the recursive solver.
5137	void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
5138	Cost &SolutionCost,
5139	SmallVectorImpl<const Formula *> &Workspace,
5140	const Cost &CurCost,
5141	const SmallPtrSet<const SCEV *, `16`> &CurRegs,
5142	DenseSet<const SCEV > &VisitedRegs) const* {
5143	// Some ideas:
5144	// - prune more:
5145	// - use more aggressive filtering
5146	// - sort the formula so that the most profitable solutions are found first
5147	// - sort the uses too
5148	// - search faster:
5149	// - don't compute a cost, and then compare. compare while computing a cost
5150	// and bail early.
5151	// - track register sets with SmallBitVector
5152
5153	const LSRUse &LU = Uses [Workspace.size()];
5154
5155	// If this use references any register that's already a part of the
5156	// in-progress solution, consider it a requirement that a formula must
5157	// reference that register in order to be considered. This prunes out
5158	// unprofitable searching.
5159	SmallSetVector<const SCEV *, `4`> ReqRegs;
5160	for (const SCEV *S : CurRegs)
5161	if (LU.Regs.count(Ptr: S))
5162	ReqRegs.insert(X: S);
5163
5164	SmallPtrSet<const SCEV *, `16`> NewRegs;
5165	Cost NewCost(L, SE, TTI, AMK);
5166	for (const Formula &F : LU.Formulae) {
5167	// Ignore formulae which may not be ideal in terms of register reuse of
5168	// ReqRegs. The formula should use all required registers before
5169	// introducing new ones.
5170	// This can sometimes (notably when trying to favour postinc) lead to
5171	// sub-optimial decisions. There it is best left to the cost modelling to
5172	// get correct.
5173	if (AMK != TTI::AMK_PostIndexed \|\| LU.Kind != LSRUse::Address) {
5174	int NumReqRegsToFind = std::min(a: F.getNumRegs(), b: ReqRegs.size());
5175	for (const SCEV *Reg : ReqRegs) {
5176	if ((F.ScaledReg && F.ScaledReg == Reg) \|\|
5177	is_contained(Range: F.BaseRegs, Element: Reg)) {
5178	--NumReqRegsToFind;
5179	if (NumReqRegsToFind == `0`)
5180	break;
5181	}
5182	}
5183	if (NumReqRegsToFind != `0`) {
5184	// If none of the formulae satisfied the required registers, then we could
5185	// clear ReqRegs and try again. Currently, we simply give up in this case.
5186	continue;
5187	}
5188	}
5189
5190	// Evaluate the cost of the current formula. If it's already worse than
5191	// the current best, prune the search at that point.
5192	NewCost = CurCost;
5193	NewRegs = CurRegs;
5194	NewCost.RateFormula(F, Regs&: NewRegs, VisitedRegs, LU);
5195	if (NewCost.isLess(Other: SolutionCost)) {
5196	Workspace.push_back(Elt: &F);
5197	if (Workspace.size() != Uses.size()) {
5198	SolveRecurse(Solution, SolutionCost, Workspace, CurCost: NewCost,
5199	CurRegs: NewRegs, VisitedRegs);
5200	if (F.getNumRegs() == `1` && Workspace.size() == `1`)
5201	VisitedRegs.insert(V: F.ScaledReg ? F.ScaledReg : F.BaseRegs [`0`]);
5202	} else {
5203	LLVM_DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
5204	dbgs() << ".\nRegs:\n";
5205	for (const SCEV *S : NewRegs) dbgs()
5206	<< "- " << *S << "\n";
5207	dbgs() << `'\n'`);
5208
5209	SolutionCost = NewCost;
5210	Solution = Workspace;
5211	}
5212	Workspace.pop_back();
5213	}
5214	}
5215	}
5216
5217	/// Choose one formula from each use. Return the results in the given Solution
5218	/// vector.
5219	void LSRInstance::Solve(SmallVectorImpl<const Formula > &Solution) const* {
5220	SmallVector<const Formula *, `8`> Workspace;
5221	Cost SolutionCost(L, SE, TTI, AMK);
5222	SolutionCost.Lose();
5223	Cost CurCost(L, SE, TTI, AMK);
5224	SmallPtrSet<const SCEV *, `16`> CurRegs;
5225	DenseSet<const SCEV *> VisitedRegs;
5226	Workspace.reserve(N: Uses.size());
5227
5228	// SolveRecurse does all the work.
5229	SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
5230	CurRegs, VisitedRegs);
5231	if (Solution.empty()) {
5232	LLVM_DEBUG(dbgs() << "\nNo Satisfactory Solution\n");
5233	return;
5234	}
5235
5236	// Ok, we've now made all our decisions.
5237	LLVM_DEBUG(dbgs() << "\n"
5238	"The chosen solution requires ";
5239	SolutionCost.print(dbgs()); dbgs() << ":\n";
5240	for (size_t i = `0`, e = Uses.size(); i != e; ++i) {
5241	dbgs() << " ";
5242	Uses[i].print(dbgs());
5243	dbgs() << "\n"
5244	" ";
5245	Solution[i]->print(dbgs());
5246	dbgs() << `'\n'`;
5247	});
5248
5249	assert(Solution.size() == Uses.size() && "Malformed solution!");
5250
5251	if (BaselineCost.isLess(Other: SolutionCost)) {
5252	LLVM_DEBUG(dbgs() << "The baseline solution requires ";
5253	BaselineCost.print(dbgs()); dbgs() << "\n");
5254	if (!AllowDropSolutionIfLessProfitable)
5255	LLVM_DEBUG(
5256	dbgs() << "Baseline is more profitable than chosen solution, "
5257	"add option 'lsr-drop-solution' to drop LSR solution.\n");
5258	else {
5259	LLVM_DEBUG(dbgs() << "Baseline is more profitable than chosen "
5260	"solution, dropping LSR solution.\n";);
5261	Solution.clear();
5262	}
5263	}
5264	}
5265
5266	/// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as
5267	/// we can go while still being dominated by the input positions. This helps
5268	/// canonicalize the insert position, which encourages sharing.
5269	BasicBlock::iterator
5270	LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
5271	const SmallVectorImpl<Instruction *> &Inputs)
5272	const {
5273	Instruction Tentative = &IP;
5274	while (true) {
5275	bool AllDominate = true;
5276	Instruction BetterPos = nullptr*;
5277	// Don't bother attempting to insert before a catchswitch, their basic block
5278	// cannot have other non-PHI instructions.
5279	if (isa<CatchSwitchInst>(Val: Tentative))
5280	return IP;
5281
5282	for (Instruction *Inst : Inputs) {
5283	if (Inst == Tentative \|\| !DT.dominates(Def: Inst, User: Tentative)) {
5284	AllDominate = false;
5285	break;
5286	}
5287	// Attempt to find an insert position in the middle of the block,
5288	// instead of at the end, so that it can be used for other expansions.
5289	if (Tentative->getParent() == Inst->getParent() &&
5290	(!BetterPos \|\| !DT.dominates(Def: Inst, User: BetterPos)))
5291	BetterPos = &*std::next(x: BasicBlock::iterator (Inst));
5292	}
5293	if (!AllDominate)
5294	break;
5295	if (BetterPos)
5296	IP = BetterPos->getIterator();
5297	else
5298	IP = Tentative->getIterator();
5299
5300	const Loop *IPLoop = LI.getLoopFor(BB: IP ->getParent());
5301	unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : `0`;
5302
5303	BasicBlock *IDom;
5304	for (DomTreeNode *Rung = DT.getNode(BB: IP ->getParent()); ; ) {
5305	if (!Rung) return IP;
5306	Rung = Rung->getIDom();
5307	if (!Rung) return IP;
5308	IDom = Rung->getBlock();
5309
5310	// Don't climb into a loop though.
5311	const Loop *IDomLoop = LI.getLoopFor(BB: IDom);
5312	unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : `0`;
5313	if (IDomDepth <= IPLoopDepth &&
5314	(IDomDepth != IPLoopDepth \|\| IDomLoop == IPLoop))
5315	break;
5316	}
5317
5318	Tentative = IDom->getTerminator();
5319	}
5320
5321	return IP;
5322	}
5323
5324	/// Determine an input position which will be dominated by the operands and
5325	/// which will dominate the result.
5326	BasicBlock::iterator LSRInstance::AdjustInsertPositionForExpand(
5327	BasicBlock::iterator LowestIP, const LSRFixup &LF, const LSRUse &LU) const {
5328	// Collect some instructions which must be dominated by the
5329	// expanding replacement. These must be dominated by any operands that
5330	// will be required in the expansion.
5331	SmallVector<Instruction *, `4`> Inputs;
5332	if (Instruction *I = dyn_cast<Instruction>(Val: LF.OperandValToReplace))
5333	Inputs.push_back(Elt: I);
5334	if (LU.Kind == LSRUse::ICmpZero)
5335	if (Instruction *I =
5336	dyn_cast<Instruction>(Val: cast<ICmpInst>(Val: LF.UserInst)->getOperand(i_nocapture: `1`)))
5337	Inputs.push_back(Elt: I);
5338	if (LF.PostIncLoops.count(Ptr: L)) {
5339	if (LF.isUseFullyOutsideLoop(L))
5340	Inputs.push_back(Elt: L->getLoopLatch()->getTerminator());
5341	else
5342	Inputs.push_back(Elt: IVIncInsertPos);
5343	}
5344	// The expansion must also be dominated by the increment positions of any
5345	// loops it for which it is using post-inc mode.
5346	for (const Loop *PIL : LF.PostIncLoops) {
5347	if (PIL == L) continue;
5348
5349	// Be dominated by the loop exit.
5350	SmallVector<BasicBlock *, `4`> ExitingBlocks;
5351	PIL->getExitingBlocks(ExitingBlocks);
5352	if (!ExitingBlocks.empty()) {
5353	BasicBlock *BB = ExitingBlocks [`0`];
5354	for (unsigned i = `1`, e = ExitingBlocks.size(); i != e; ++i)
5355	BB = DT.findNearestCommonDominator(A: BB, B: ExitingBlocks [i]);
5356	Inputs.push_back(Elt: BB->getTerminator());
5357	}
5358	}
5359
5360	assert(!isa<PHINode>(LowestIP) && !LowestIP ->isEHPad()
5361	&& !isa<DbgInfoIntrinsic>(LowestIP) &&
5362	"Insertion point must be a normal instruction");
5363
5364	// Then, climb up the immediate dominator tree as far as we can go while
5365	// still being dominated by the input positions.
5366	BasicBlock::iterator IP = HoistInsertPosition(IP: LowestIP, Inputs);
5367
5368	// Don't insert instructions before PHI nodes.
5369	while (isa<PHINode>(Val: IP)) ++IP;
5370
5371	// Ignore landingpad instructions.
5372	while (IP ->isEHPad()) ++IP;
5373
5374	// Ignore debug intrinsics.
5375	while (isa<DbgInfoIntrinsic>(Val: IP)) ++IP;
5376
5377	// Set IP below instructions recently inserted by SCEVExpander. This keeps the
5378	// IP consistent across expansions and allows the previously inserted
5379	// instructions to be reused by subsequent expansion.
5380	while (Rewriter.isInsertedInstruction(I: &*IP) && IP != LowestIP)
5381	++IP;
5382
5383	return IP;
5384	}
5385
5386	/// Emit instructions for the leading candidate expression for this LSRUse (this
5387	/// is called "expanding").
5388	Value LSRInstance::Expand(const* LSRUse &LU, const LSRFixup &LF,
5389	const Formula &F, BasicBlock::iterator IP,
5390	SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
5391	if (LU.RigidFormula)
5392	return LF.OperandValToReplace;
5393
5394	// Determine an input position which will be dominated by the operands and
5395	// which will dominate the result.
5396	IP = AdjustInsertPositionForExpand(LowestIP: IP, LF, LU);
5397	Rewriter.setInsertPoint(&*IP);
5398
5399	// Inform the Rewriter if we have a post-increment use, so that it can
5400	// perform an advantageous expansion.
5401	Rewriter.setPostInc(LF.PostIncLoops);
5402
5403	// This is the type that the user actually needs.
5404	Type *OpTy = LF.OperandValToReplace->getType();
5405	// This will be the type that we'll initially expand to.
5406	Type *Ty = F.getType();
5407	if (!Ty)
5408	// No type known; just expand directly to the ultimate type.
5409	Ty = OpTy;
5410	else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(Ty: OpTy))
5411	// Expand directly to the ultimate type if it's the right size.
5412	Ty = OpTy;
5413	// This is the type to do integer arithmetic in.
5414	Type *IntTy = SE.getEffectiveSCEVType(Ty);
5415
5416	// Build up a list of operands to add together to form the full base.
5417	SmallVector<const SCEV *, `8`> Ops;
5418
5419	// Expand the BaseRegs portion.
5420	for (const SCEV *Reg : F.BaseRegs) {
5421	assert(!Reg->isZero() && "Zero allocated in a base register!");
5422
5423	// If we're expanding for a post-inc user, make the post-inc adjustment.
5424	Reg = denormalizeForPostIncUse(S: Reg, Loops: LF.PostIncLoops, SE);
5425	Ops.push_back(Elt: SE.getUnknown(V: Rewriter.expandCodeFor(SH: Reg, Ty: nullptr)));
5426	}
5427
5428	// Expand the ScaledReg portion.
5429	Value ICmpScaledV = nullptr*;
5430	if (F.Scale != `0`) {
5431	const SCEV *ScaledS = F.ScaledReg;
5432
5433	// If we're expanding for a post-inc user, make the post-inc adjustment.
5434	PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
5435	ScaledS = denormalizeForPostIncUse(S: ScaledS, Loops, SE);
5436
5437	if (LU.Kind == LSRUse::ICmpZero) {
5438	// Expand ScaleReg as if it was part of the base regs.
5439	if (F.Scale == `1`)
5440	Ops.push_back(
5441	Elt: SE.getUnknown(V: Rewriter.expandCodeFor(SH: ScaledS, Ty: nullptr)));
5442	else {
5443	// An interesting way of "folding" with an icmp is to use a negated
5444	// scale, which we'll implement by inserting it into the other operand
5445	// of the icmp.
5446	assert(F.Scale == -`1` &&
5447	"The only scale supported by ICmpZero uses is -1!");
5448	ICmpScaledV = Rewriter.expandCodeFor(SH: ScaledS, Ty: nullptr);
5449	}
5450	} else {
5451	// Otherwise just expand the scaled register and an explicit scale,
5452	// which is expected to be matched as part of the address.
5453
5454	// Flush the operand list to suppress SCEVExpander hoisting address modes.
5455	// Unless the addressing mode will not be folded.
5456	if (!Ops.empty() && LU.Kind == LSRUse::Address &&
5457	isAMCompletelyFolded(TTI, LU, F)) {
5458	Value FullV = Rewriter.expandCodeFor(SH: SE.getAddExpr(Ops), Ty: nullptr*);
5459	Ops.clear();
5460	Ops.push_back(Elt: SE.getUnknown(V: FullV));
5461	}
5462	ScaledS = SE.getUnknown(V: Rewriter.expandCodeFor(SH: ScaledS, Ty: nullptr));
5463	if (F.Scale != `1`)
5464	ScaledS =
5465	SE.getMulExpr(LHS: ScaledS, RHS: SE.getConstant(Ty: ScaledS->getType(), V: F.Scale));
5466	Ops.push_back(Elt: ScaledS);
5467	}
5468	}
5469
5470	// Expand the GV portion.
5471	if (F.BaseGV) {
5472	// Flush the operand list to suppress SCEVExpander hoisting.
5473	if (!Ops.empty()) {
5474	Value *FullV = Rewriter.expandCodeFor(SH: SE.getAddExpr(Ops), Ty: IntTy);
5475	Ops.clear();
5476	Ops.push_back(Elt: SE.getUnknown(V: FullV));
5477	}
5478	Ops.push_back(Elt: SE.getUnknown(V: F.BaseGV));
5479	}
5480
5481	// Flush the operand list to suppress SCEVExpander hoisting of both folded and
5482	// unfolded offsets. LSR assumes they both live next to their uses.
5483	if (!Ops.empty()) {
5484	Value *FullV = Rewriter.expandCodeFor(SH: SE.getAddExpr(Ops), Ty);
5485	Ops.clear();
5486	Ops.push_back(Elt: SE.getUnknown(V: FullV));
5487	}
5488
5489	// Expand the immediate portion.
5490	int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset;
5491	if (Offset != `0`) {
5492	if (LU.Kind == LSRUse::ICmpZero) {
5493	// The other interesting way of "folding" with an ICmpZero is to use a
5494	// negated immediate.
5495	if (!ICmpScaledV)
5496	ICmpScaledV = ConstantInt::get(Ty: IntTy, V: -(uint64_t)Offset);
5497	else {
5498	Ops.push_back(Elt: SE.getUnknown(V: ICmpScaledV));
5499	ICmpScaledV = ConstantInt::get(Ty: IntTy, V: Offset);
5500	}
5501	} else {
5502	// Just add the immediate values. These again are expected to be matched
5503	// as part of the address.
5504	Ops.push_back(Elt: SE.getUnknown(V: ConstantInt::getSigned(Ty: IntTy, V: Offset)));
5505	}
5506	}
5507
5508	// Expand the unfolded offset portion.
5509	int64_t UnfoldedOffset = F.UnfoldedOffset;
5510	if (UnfoldedOffset != `0`) {
5511	// Just add the immediate values.
5512	Ops.push_back(Elt: SE.getUnknown(V: ConstantInt::getSigned(Ty: IntTy,
5513	V: UnfoldedOffset)));
5514	}
5515
5516	// Emit instructions summing all the operands.
5517	const SCEV *FullS = Ops.empty() ?
5518	SE.getConstant(Ty: IntTy, V: `0`) :
5519	SE.getAddExpr(Ops);
5520	Value *FullV = Rewriter.expandCodeFor(SH: FullS, Ty);
5521
5522	// We're done expanding now, so reset the rewriter.
5523	Rewriter.clearPostInc();
5524
5525	// An ICmpZero Formula represents an ICmp which we're handling as a
5526	// comparison against zero. Now that we've expanded an expression for that
5527	// form, update the ICmp's other operand.
5528	if (LU.Kind == LSRUse::ICmpZero) {
5529	ICmpInst *CI = cast<ICmpInst>(Val: LF.UserInst);
5530	if (auto *OperandIsInstr = dyn_cast<Instruction>(Val: CI->getOperand(i_nocapture: `1`)))
5531	DeadInsts.emplace_back(Args&: OperandIsInstr);
5532	assert(!F.BaseGV && "ICmp does not support folding a global value and "
5533	"a scale at the same time!");
5534	if (F.Scale == -`1`) {
5535	if (ICmpScaledV->getType() != OpTy) {
5536	Instruction *Cast = CastInst::Create(
5537	CastInst::getCastOpcode(Val: ICmpScaledV, SrcIsSigned: false, Ty: OpTy, DstIsSigned: false),
5538	S: ICmpScaledV, Ty: OpTy, Name: "tmp", InsertBefore: CI->getIterator());
5539	ICmpScaledV = Cast;
5540	}
5541	CI->setOperand(i_nocapture: `1`, Val_nocapture: ICmpScaledV);
5542	} else {
5543	// A scale of 1 means that the scale has been expanded as part of the
5544	// base regs.
5545	assert((F.Scale == `0` \|\| F.Scale == `1`) &&
5546	"ICmp does not support folding a global value and "
5547	"a scale at the same time!");
5548	Constant *C = ConstantInt::getSigned(Ty: SE.getEffectiveSCEVType(Ty: OpTy),
5549	V: -(uint64_t)Offset);
5550	if (C->getType() != OpTy) {
5551	C = ConstantFoldCastOperand(
5552	Opcode: CastInst::getCastOpcode(Val: C, SrcIsSigned: false, Ty: OpTy, DstIsSigned: false), C, DestTy: OpTy,
5553	DL: CI->getModule()->getDataLayout());
5554	assert(C && "Cast of ConstantInt should have folded");
5555	}
5556
5557	CI->setOperand(i_nocapture: `1`, Val_nocapture: C);
5558	}
5559	}
5560
5561	return FullV;
5562	}
5563
5564	/// Helper for Rewrite. PHI nodes are special because the use of their operands
5565	/// effectively happens in their predecessor blocks, so the expression may need
5566	/// to be expanded in multiple places.
5567	void LSRInstance::RewriteForPHI(
5568	PHINode PN, const* LSRUse &LU, const LSRFixup &LF, const Formula &F,
5569	SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
5570	DenseMap<BasicBlock , Value > Inserted;
5571
5572	// Inserting instructions in the loop and using them as PHI's input could
5573	// break LCSSA in case if PHI's parent block is not a loop exit (i.e. the
5574	// corresponding incoming block is not loop exiting). So collect all such
5575	// instructions to form LCSSA for them later.
5576	SmallVector<Instruction *, `4`> InsertedNonLCSSAInsts;
5577
5578	for (unsigned i = `0`, e = PN->getNumIncomingValues(); i != e; ++i)
5579	if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
5580	bool needUpdateFixups = false;
5581	BasicBlock *BB = PN->getIncomingBlock(i);
5582
5583	// If this is a critical edge, split the edge so that we do not insert
5584	// the code on all predecessor/successor paths. We do this unless this
5585	// is the canonical backedge for this loop, which complicates post-inc
5586	// users.
5587	if (e != `1` && BB->getTerminator()->getNumSuccessors() > `1` &&
5588	!isa<IndirectBrInst>(Val: BB->getTerminator()) &&
5589	!isa<CatchSwitchInst>(Val: BB->getTerminator())) {
5590	BasicBlock *Parent = PN->getParent();
5591	Loop *PNLoop = LI.getLoopFor(BB: Parent);
5592	if (!PNLoop \|\| Parent != PNLoop->getHeader()) {
5593	// Split the critical edge.
5594	BasicBlock NewBB = nullptr*;
5595	if (!Parent->isLandingPad()) {
5596	NewBB =
5597	SplitCriticalEdge(Src: BB, Dst: Parent,
5598	Options: CriticalEdgeSplittingOptions (&DT, &LI, MSSAU)
5599	.setMergeIdenticalEdges()
5600	.setKeepOneInputPHIs());
5601	} else {
5602	SmallVector<BasicBlock*, `2`> NewBBs;
5603	DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
5604	SplitLandingPadPredecessors(OrigBB: Parent, Preds: BB, Suffix: "", Suffix2: "", NewBBs, DTU: &DTU, LI: &LI);
5605	NewBB = NewBBs [`0`];
5606	}
5607	// If NewBB==NULL, then SplitCriticalEdge refused to split because all
5608	// phi predecessors are identical. The simple thing to do is skip
5609	// splitting in this case rather than complicate the API.
5610	if (NewBB) {
5611	// If PN is outside of the loop and BB is in the loop, we want to
5612	// move the block to be immediately before the PHI block, not
5613	// immediately after BB.
5614	if (L->contains(BB) && !L->contains(Inst: PN))
5615	NewBB->moveBefore(MovePos: PN->getParent());
5616
5617	// Splitting the edge can reduce the number of PHI entries we have.
5618	e = PN->getNumIncomingValues();
5619	BB = NewBB;
5620	i = PN->getBasicBlockIndex(BB);
5621
5622	needUpdateFixups = true;
5623	}
5624	}
5625	}
5626
5627	std::pair<DenseMap<BasicBlock , Value >::iterator, bool> Pair =
5628	Inserted.insert(KV: std::make_pair(x&: BB, y: static_cast<Value >(nullptr*)));
5629	if (!Pair.second)
5630	PN->setIncomingValue(i, V: Pair.first ->second);
5631	else {
5632	Value *FullV =
5633	Expand(LU, LF, F, IP: BB->getTerminator()->getIterator(), DeadInsts);
5634
5635	// If this is reuse-by-noop-cast, insert the noop cast.
5636	Type *OpTy = LF.OperandValToReplace->getType();
5637	if (FullV->getType() != OpTy)
5638	FullV = CastInst::Create(
5639	CastInst::getCastOpcode(Val: FullV, SrcIsSigned: false, Ty: OpTy, DstIsSigned: false), S: FullV,
5640	Ty: LF.OperandValToReplace->getType(), Name: "tmp",
5641	InsertBefore: BB->getTerminator()->getIterator());
5642
5643	// If the incoming block for this value is not in the loop, it means the
5644	// current PHI is not in a loop exit, so we must create a LCSSA PHI for
5645	// the inserted value.
5646	if (auto *I = dyn_cast<Instruction>(Val: FullV))
5647	if (L->contains(Inst: I) && !L->contains(BB))
5648	InsertedNonLCSSAInsts.push_back(Elt: I);
5649
5650	PN->setIncomingValue(i, V: FullV);
5651	Pair.first ->second = FullV;
5652	}
5653
5654	// If LSR splits critical edge and phi node has other pending
5655	// fixup operands, we need to update those pending fixups. Otherwise
5656	// formulae will not be implemented completely and some instructions
5657	// will not be eliminated.
5658	if (needUpdateFixups) {
5659	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
5660	for (LSRFixup &Fixup : Uses [LUIdx].Fixups)
5661	// If fixup is supposed to rewrite some operand in the phi
5662	// that was just updated, it may be already moved to
5663	// another phi node. Such fixup requires update.
5664	if (Fixup.UserInst == PN) {
5665	// Check if the operand we try to replace still exists in the
5666	// original phi.
5667	bool foundInOriginalPHI = false;
5668	for (const auto &val : PN->incoming_values())
5669	if (val == Fixup.OperandValToReplace) {
5670	foundInOriginalPHI = true;
5671	break;
5672	}
5673
5674	// If fixup operand found in original PHI - nothing to do.
5675	if (foundInOriginalPHI)
5676	continue;
5677
5678	// Otherwise it might be moved to another PHI and requires update.
5679	// If fixup operand not found in any of the incoming blocks that
5680	// means we have already rewritten it - nothing to do.
5681	for (const auto &Block : PN->blocks())
5682	for (BasicBlock::iterator I = Block->begin(); isa<PHINode>(Val: I);
5683	++I) {
5684	PHINode *NewPN = cast<PHINode>(Val&: I);
5685	for (const auto &val : NewPN->incoming_values())
5686	if (val == Fixup.OperandValToReplace)
5687	Fixup.UserInst = NewPN;
5688	}
5689	}
5690	}
5691	}
5692
5693	formLCSSAForInstructions(Worklist&: InsertedNonLCSSAInsts, DT, LI, SE: &SE);
5694	}
5695
5696	/// Emit instructions for the leading candidate expression for this LSRUse (this
5697	/// is called "expanding"), and update the UserInst to reference the newly
5698	/// expanded value.
5699	void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
5700	const Formula &F,
5701	SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
5702	// First, find an insertion point that dominates UserInst. For PHI nodes,
5703	// find the nearest block which dominates all the relevant uses.
5704	if (PHINode *PN = dyn_cast<PHINode>(Val: LF.UserInst)) {
5705	RewriteForPHI(PN, LU, LF, F, DeadInsts);
5706	} else {
5707	Value *FullV = Expand(LU, LF, F, IP: LF.UserInst->getIterator(), DeadInsts);
5708
5709	// If this is reuse-by-noop-cast, insert the noop cast.
5710	Type *OpTy = LF.OperandValToReplace->getType();
5711	if (FullV->getType() != OpTy) {
5712	Instruction *Cast =
5713	CastInst::Create(CastInst::getCastOpcode(Val: FullV, SrcIsSigned: false, Ty: OpTy, DstIsSigned: false),
5714	S: FullV, Ty: OpTy, Name: "tmp", InsertBefore: LF.UserInst->getIterator());
5715	FullV = Cast;
5716	}
5717
5718	// Update the user. ICmpZero is handled specially here (for now) because
5719	// Expand may have updated one of the operands of the icmp already, and
5720	// its new value may happen to be equal to LF.OperandValToReplace, in
5721	// which case doing replaceUsesOfWith leads to replacing both operands
5722	// with the same value. TODO: Reorganize this.
5723	if (LU.Kind == LSRUse::ICmpZero)
5724	LF.UserInst->setOperand(i: `0`, Val: FullV);
5725	else
5726	LF.UserInst->replaceUsesOfWith(From: LF.OperandValToReplace, To: FullV);
5727	}
5728
5729	if (auto *OperandIsInstr = dyn_cast<Instruction>(Val: LF.OperandValToReplace))
5730	DeadInsts.emplace_back(Args&: OperandIsInstr);
5731	}
5732
5733	// Trying to hoist the IVInc to loop header if all IVInc users are in
5734	// the loop header. It will help backend to generate post index load/store
5735	// when the latch block is different from loop header block.
5736	static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup,
5737	const LSRUse &LU, Instruction *IVIncInsertPos,
5738	Loop *L) {
5739	if (LU.Kind != LSRUse::Address)
5740	return false;
5741
5742	// For now this code do the conservative optimization, only work for
5743	// the header block. Later we can hoist the IVInc to the block post
5744	// dominate all users.
5745	BasicBlock *LHeader = L->getHeader();
5746	if (IVIncInsertPos->getParent() == LHeader)
5747	return false;
5748
5749	if (!Fixup.OperandValToReplace \|\|
5750	any_of(Range: Fixup.OperandValToReplace->users(), P: [&LHeader](User *U) {
5751	Instruction *UI = cast<Instruction>(Val: U);
5752	return UI->getParent() != LHeader;
5753	}))
5754	return false;
5755
5756	Instruction *I = Fixup.UserInst;
5757	Type *Ty = I->getType();
5758	return Ty->isIntegerTy() &&
5759	((isa<LoadInst>(Val: I) && TTI.isIndexedLoadLegal(Mode: TTI.MIM_PostInc, Ty)) \|\|
5760	(isa<StoreInst>(Val: I) && TTI.isIndexedStoreLegal(Mode: TTI.MIM_PostInc, Ty)));
5761	}
5762
5763	/// Rewrite all the fixup locations with new values, following the chosen
5764	/// solution.
5765	void LSRInstance::ImplementSolution(
5766	const SmallVectorImpl<const Formula *> &Solution) {
5767	// Keep track of instructions we may have made dead, so that
5768	// we can remove them after we are done working.
5769	SmallVector<WeakTrackingVH, `16`> DeadInsts;
5770
5771	// Mark phi nodes that terminate chains so the expander tries to reuse them.
5772	for (const IVChain &Chain : IVChainVec) {
5773	if (PHINode *PN = dyn_cast<PHINode>(Val: Chain.tailUserInst()))
5774	Rewriter.setChainedPhi(PN);
5775	}
5776
5777	// Expand the new value definitions and update the users.
5778	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
5779	for (const LSRFixup &Fixup : Uses [LUIdx].Fixups) {
5780	Instruction *InsertPos =
5781	canHoistIVInc(TTI, Fixup, LU: Uses [LUIdx], IVIncInsertPos, L)
5782	? L->getHeader()->getTerminator()
5783	: IVIncInsertPos;
5784	Rewriter.setIVIncInsertPos(L, Pos: InsertPos);
5785	Rewrite(LU: Uses [LUIdx], LF: Fixup, F: *Solution [LUIdx], DeadInsts);
5786	Changed = true;
5787	}
5788
5789	for (const IVChain &Chain : IVChainVec) {
5790	GenerateIVChain(Chain, DeadInsts);
5791	Changed = true;
5792	}
5793
5794	for (const WeakVH &IV : Rewriter.getInsertedIVs())
5795	if (IV && dyn_cast<Instruction>(Val: &*IV)->getParent())
5796	ScalarEvolutionIVs.push_back(Elt: IV);
5797
5798	// Clean up after ourselves. This must be done before deleting any
5799	// instructions.
5800	Rewriter.clear();
5801
5802	Changed \|= RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts,
5803	TLI: &TLI, MSSAU);
5804
5805	// In our cost analysis above, we assume that each addrec consumes exactly
5806	// one register, and arrange to have increments inserted just before the
5807	// latch to maximimize the chance this is true. However, if we reused
5808	// existing IVs, we now need to move the increments to match our
5809	// expectations. Otherwise, our cost modeling results in us having a
5810	// chosen a non-optimal result for the actual schedule. (And yes, this
5811	// scheduling decision does impact later codegen.)
5812	for (PHINode &PN : L->getHeader()->phis()) {
5813	BinaryOperator BO = nullptr*;
5814	Value Start = nullptr, Step = nullptr;
5815	if (!matchSimpleRecurrence(P: &PN, BO, Start, Step))
5816	continue;
5817
5818	switch (BO->getOpcode()) {
5819	case Instruction::Sub:
5820	if (BO->getOperand(i_nocapture: `0`) != &PN)
5821	// sub is non-commutative - match handling elsewhere in LSR
5822	continue;
5823	break;
5824	case Instruction::Add:
5825	break;
5826	default:
5827	continue;
5828	};
5829
5830	if (!isa<Constant>(Val: Step))
5831	// If not a constant step, might increase register pressure
5832	// (We assume constants have been canonicalized to RHS)
5833	continue;
5834
5835	if (BO->getParent() == IVIncInsertPos->getParent())
5836	// Only bother moving across blocks. Isel can handle block local case.
5837	continue;
5838
5839	// Can we legally schedule inc at the desired point?
5840	if (!llvm::all_of(Range: BO->uses(),
5841	P: [&](Use &U) {return DT.dominates(Def: IVIncInsertPos, U);}))
5842	continue;
5843	BO->moveBefore(MovePos: IVIncInsertPos);
5844	Changed = true;
5845	}
5846
5847
5848	}
5849
5850	LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
5851	DominatorTree &DT, LoopInfo &LI,
5852	const TargetTransformInfo &TTI, AssumptionCache &AC,
5853	TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU)
5854	: IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI), TTI(TTI), L(L),
5855	MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > `0`
5856	? PreferredAddresingMode
5857	: TTI.getPreferredAddressingMode(L, SE: &SE)),
5858	Rewriter (SE, L->getHeader()->getModule()->getDataLayout(), "lsr", false),
5859	BaselineCost (L, SE, TTI, AMK) {
5860	// If LoopSimplify form is not available, stay out of trouble.
5861	if (!L->isLoopSimplifyForm())
5862	return;
5863
5864	// If there's no interesting work to be done, bail early.
5865	if (IU.empty()) return;
5866
5867	// If there's too much analysis to be done, bail early. We won't be able to
5868	// model the problem anyway.
5869	unsigned NumUsers = `0`;
5870	for (const IVStrideUse &U : IU) {
5871	if (++NumUsers > MaxIVUsers) {
5872	(void)U;
5873	LLVM_DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U
5874	<< "\n");
5875	return;
5876	}
5877	// Bail out if we have a PHI on an EHPad that gets a value from a
5878	// CatchSwitchInst. Because the CatchSwitchInst cannot be split, there is
5879	// no good place to stick any instructions.
5880	if (auto *PN = dyn_cast<PHINode>(Val: U.getUser())) {
5881	auto *FirstNonPHI = PN->getParent()->getFirstNonPHI();
5882	if (isa<FuncletPadInst>(Val: FirstNonPHI) \|\|
5883	isa<CatchSwitchInst>(Val: FirstNonPHI))
5884	for (BasicBlock *PredBB : PN->blocks())
5885	if (isa<CatchSwitchInst>(Val: PredBB->getFirstNonPHI()))
5886	return;
5887	}
5888	}
5889
5890	LLVM_DEBUG(dbgs() << "\nLSR on loop ";
5891	L->getHeader()->printAsOperand(dbgs(), /PrintType=/false);
5892	dbgs() << ":\n");
5893
5894	// Configure SCEVExpander already now, so the correct mode is used for
5895	// isSafeToExpand() checks.
5896	#ifndef NDEBUG
5897	Rewriter.setDebugType(DEBUG_TYPE);
5898	#endif
5899	Rewriter.disableCanonicalMode();
5900	Rewriter.enableLSRMode();
5901
5902	// First, perform some low-level loop optimizations.
5903	OptimizeShadowIV();
5904	OptimizeLoopTermCond();
5905
5906	// If loop preparation eliminates all interesting IV users, bail.
5907	if (IU.empty()) return;
5908
5909	// Skip nested loops until we can model them better with formulae.
5910	if (!L->isInnermost()) {
5911	LLVM_DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
5912	return;
5913	}
5914
5915	// Start collecting data and preparing for the solver.
5916	// If number of registers is not the major cost, we cannot benefit from the
5917	// current profitable chain optimization which is based on number of
5918	// registers.
5919	// FIXME: add profitable chain optimization for other kinds major cost, for
5920	// example number of instructions.
5921	if (TTI.isNumRegsMajorCostOfLSR() \|\| StressIVChain)
5922	CollectChains();
5923	CollectInterestingTypesAndFactors();
5924	CollectFixupsAndInitialFormulae();
5925	CollectLoopInvariantFixupsAndFormulae();
5926
5927	if (Uses.empty())
5928	return;
5929
5930	LLVM_DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
5931	print_uses(dbgs()));
5932
5933	// Now use the reuse data to generate a bunch of interesting ways
5934	// to formulate the values needed for the uses.
5935	GenerateAllReuseFormulae();
5936
5937	FilterOutUndesirableDedicatedRegisters();
5938	NarrowSearchSpaceUsingHeuristics();
5939
5940	SmallVector<const Formula *, `8`> Solution;
5941	Solve(Solution);
5942
5943	// Release memory that is no longer needed.
5944	Factors.clear();
5945	Types.clear();
5946	RegUses.clear();
5947
5948	if (Solution.empty())
5949	return;
5950
5951	#ifndef NDEBUG
5952	// Formulae should be legal.
5953	for (const LSRUse &LU : Uses) {
5954	for (const Formula &F : LU.Formulae)
5955	assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
5956	F) && "Illegal formula generated!");
5957	};
5958	#endif
5959
5960	// Now that we've decided what we want, make it so.
5961	ImplementSolution(Solution);
5962	}
5963
5964	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
5965	void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
5966	if (Factors.empty() && Types.empty()) return;
5967
5968	OS << "LSR has identified the following interesting factors and types: ";
5969	bool First = true;
5970
5971	for (int64_t Factor : Factors) {
5972	if (!First) OS << ", ";
5973	First = false;
5974	OS << `'*'` << Factor;
5975	}
5976
5977	for (Type *Ty : Types) {
5978	if (!First) OS << ", ";
5979	First = false;
5980	OS << `'('` << *Ty << `')'`;
5981	}
5982	OS << `'\n'`;
5983	}
5984
5985	void LSRInstance::print_fixups(raw_ostream &OS) const {
5986	OS << "LSR is examining the following fixup sites:\n";
5987	for (const LSRUse &LU : Uses)
5988	for (const LSRFixup &LF : LU.Fixups) {
5989	dbgs() << " ";
5990	LF.print(OS);
5991	OS << `'\n'`;
5992	}
5993	}
5994
5995	void LSRInstance::print_uses(raw_ostream &OS) const {
5996	OS << "LSR is examining the following uses:\n";
5997	for (const LSRUse &LU : Uses) {
5998	dbgs() << " ";
5999	LU.print(OS);
6000	OS << `'\n'`;
6001	for (const Formula &F : LU.Formulae) {
6002	OS << " ";
6003	F.print(OS);
6004	OS << `'\n'`;
6005	}
6006	}
6007	}
6008
6009	void LSRInstance::print(raw_ostream &OS) const {
6010	print_factors_and_types(OS);
6011	print_fixups(OS);
6012	print_uses(OS);
6013	}
6014
6015	LLVM_DUMP_METHOD void LSRInstance::dump() const {
6016	print(OS&: errs()); errs() << `'\n'`;
6017	}
6018	#endif
6019
6020	namespace {
6021
6022	class LoopStrengthReduce : public LoopPass {
6023	public:
6024	static char ID; // Pass ID, replacement for typeid
6025
6026	LoopStrengthReduce();
6027
6028	private:
6029	bool runOnLoop(Loop *L, LPPassManager &LPM) override;
6030	void getAnalysisUsage(AnalysisUsage &AU) const override;
6031	};
6032
6033	} // end anonymous namespace
6034
6035	LoopStrengthReduce::LoopStrengthReduce() : LoopPass (ID) {
6036	initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
6037	}
6038
6039	void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
6040	// We split critical edges, so we change the CFG. However, we do update
6041	// many analyses if they are around.
6042	AU.addPreservedID(ID&: LoopSimplifyID);
6043
6044	AU.addRequired<LoopInfoWrapperPass>();
6045	AU.addPreserved<LoopInfoWrapperPass>();
6046	AU.addRequiredID(ID&: LoopSimplifyID);
6047	AU.addRequired<DominatorTreeWrapperPass>();
6048	AU.addPreserved<DominatorTreeWrapperPass>();
6049	AU.addRequired<ScalarEvolutionWrapperPass>();
6050	AU.addPreserved<ScalarEvolutionWrapperPass>();
6051	AU.addRequired<AssumptionCacheTracker>();
6052	AU.addRequired<TargetLibraryInfoWrapperPass>();
6053	// Requiring LoopSimplify a second time here prevents IVUsers from running
6054	// twice, since LoopSimplify was invalidated by running ScalarEvolution.
6055	AU.addRequiredID(ID&: LoopSimplifyID);
6056	AU.addRequired<IVUsersWrapperPass>();
6057	AU.addPreserved<IVUsersWrapperPass>();
6058	AU.addRequired<TargetTransformInfoWrapperPass>();
6059	AU.addPreserved<MemorySSAWrapperPass>();
6060	}
6061
6062	namespace {
6063
6064	/// Enables more convenient iteration over a DWARF expression vector.
6065	static iterator_range<llvm::DIExpression::expr_op_iterator>
6066	ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) {
6067	llvm::DIExpression::expr_op_iterator Begin =
6068	llvm::DIExpression::expr_op_iterator (Expr.begin());
6069	llvm::DIExpression::expr_op_iterator End =
6070	llvm::DIExpression::expr_op_iterator (Expr.end());
6071	return {Begin, End};
6072	}
6073
6074	struct SCEVDbgValueBuilder {
6075	SCEVDbgValueBuilder() = default;
6076	SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) { clone(Base); }
6077
6078	void clone(const SCEVDbgValueBuilder &Base) {
6079	LocationOps = Base.LocationOps;
6080	Expr = Base.Expr;
6081	}
6082
6083	void clear() {
6084	LocationOps.clear();
6085	Expr.clear();
6086	}
6087
6088	/// The DIExpression as we translate the SCEV.
6089	SmallVector<uint64_t, `6`> Expr;
6090	/// The location ops of the DIExpression.
6091	SmallVector<Value *, `2`> LocationOps;
6092
6093	void pushOperator(uint64_t Op) { Expr.push_back(Elt: Op); }
6094	void pushUInt(uint64_t Operand) { Expr.push_back(Elt: Operand); }
6095
6096	/// Add a DW_OP_LLVM_arg to the expression, followed by the index of the value
6097	/// in the set of values referenced by the expression.
6098	void pushLocation(llvm::Value *V) {
6099	Expr.push_back(Elt: llvm::dwarf::DW_OP_LLVM_arg);
6100	auto *It = llvm::find(Range&: LocationOps, Val: V);
6101	unsigned ArgIndex = `0`;
6102	if (It != LocationOps.end()) {
6103	ArgIndex = std::distance(first: LocationOps.begin(), last: It);
6104	} else {
6105	ArgIndex = LocationOps.size();
6106	LocationOps.push_back(Elt: V);
6107	}
6108	Expr.push_back(Elt: ArgIndex);
6109	}
6110
6111	void pushValue(const SCEVUnknown *U) {
6112	llvm::Value *V = cast<SCEVUnknown>(Val: U)->getValue();
6113	pushLocation(V);
6114	}
6115
6116	bool pushConst(const SCEVConstant *C) {
6117	if (C->getAPInt().getSignificantBits() > `64`)
6118	return false;
6119	Expr.push_back(Elt: llvm::dwarf::DW_OP_consts);
6120	Expr.push_back(Elt: C->getAPInt().getSExtValue());
6121	return true;
6122	}
6123
6124	// Iterating the expression as DWARF ops is convenient when updating
6125	// DWARF_OP_LLVM_args.
6126	iterator_range<llvm::DIExpression::expr_op_iterator> expr_ops() {
6127	return ToDwarfOpIter(Expr);
6128	}
6129
6130	/// Several SCEV types are sequences of the same arithmetic operator applied
6131	/// to constants and values that may be extended or truncated.
6132	bool pushArithmeticExpr(const llvm::SCEVCommutativeExpr *CommExpr,
6133	uint64_t DwarfOp) {
6134	assert((isa<llvm::SCEVAddExpr>(CommExpr) \|\| isa<SCEVMulExpr>(CommExpr)) &&
6135	"Expected arithmetic SCEV type");
6136	bool Success = true;
6137	unsigned EmitOperator = `0`;
6138	for (const auto &Op : CommExpr->operands()) {
6139	Success &= pushSCEV(S: Op);
6140
6141	if (EmitOperator >= `1`)
6142	pushOperator(Op: DwarfOp);
6143	++EmitOperator;
6144	}
6145	return Success;
6146	}
6147
6148	// TODO: Identify and omit noop casts.
6149	bool pushCast(const llvm::SCEVCastExpr C, bool* IsSigned) {
6150	const llvm::SCEV *Inner = C->getOperand(i: `0`);
6151	const llvm::Type *Type = C->getType();
6152	uint64_t ToWidth = Type->getIntegerBitWidth();
6153	bool Success = pushSCEV(S: Inner);
6154	uint64_t CastOps[] = {dwarf::DW_OP_LLVM_convert, ToWidth,
6155	IsSigned ? llvm::dwarf::DW_ATE_signed
6156	: llvm::dwarf::DW_ATE_unsigned};
6157	for (const auto &Op : CastOps)
6158	pushOperator(Op);
6159	return Success;
6160	}
6161
6162	// TODO: MinMax - although these haven't been encountered in the test suite.
6163	bool pushSCEV(const llvm::SCEV *S) {
6164	bool Success = true;
6165	if (const SCEVConstant *StartInt = dyn_cast<SCEVConstant>(Val: S)) {
6166	Success &= pushConst(C: StartInt);
6167
6168	} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Val: S)) {
6169	if (!U->getValue())
6170	return false;
6171	pushLocation(V: U->getValue());
6172
6173	} else if (const SCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(Val: S)) {
6174	Success &= pushArithmeticExpr(CommExpr: MulRec, DwarfOp: llvm::dwarf::DW_OP_mul);
6175
6176	} else if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(Val: S)) {
6177	Success &= pushSCEV(S: UDiv->getLHS());
6178	Success &= pushSCEV(S: UDiv->getRHS());
6179	pushOperator(Op: llvm::dwarf::DW_OP_div);
6180
6181	} else if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(Val: S)) {
6182	// Assert if a new and unknown SCEVCastEXpr type is encountered.
6183	assert((isa<SCEVZeroExtendExpr>(Cast) \|\| isa<SCEVTruncateExpr>(Cast) \|\|
6184	isa<SCEVPtrToIntExpr>(Cast) \|\| isa<SCEVSignExtendExpr>(Cast)) &&
6185	"Unexpected cast type in SCEV.");
6186	Success &= pushCast(C: Cast, IsSigned: (isa<SCEVSignExtendExpr>(Val: Cast)));
6187
6188	} else if (const SCEVAddExpr *AddExpr = dyn_cast<SCEVAddExpr>(Val: S)) {
6189	Success &= pushArithmeticExpr(CommExpr: AddExpr, DwarfOp: llvm::dwarf::DW_OP_plus);
6190
6191	} else if (isa<SCEVAddRecExpr>(Val: S)) {
6192	// Nested SCEVAddRecExpr are generated by nested loops and are currently
6193	// unsupported.
6194	return false;
6195
6196	} else {
6197	return false;
6198	}
6199	return Success;
6200	}
6201
6202	/// Return true if the combination of arithmetic operator and underlying
6203	/// SCEV constant value is an identity function.
6204	bool isIdentityFunction(uint64_t Op, const SCEV *S) {
6205	if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Val: S)) {
6206	if (C->getAPInt().getSignificantBits() > `64`)
6207	return false;
6208	int64_t I = C->getAPInt().getSExtValue();
6209	switch (Op) {
6210	case llvm::dwarf::DW_OP_plus:
6211	case llvm::dwarf::DW_OP_minus:
6212	return I == `0`;
6213	case llvm::dwarf::DW_OP_mul:
6214	case llvm::dwarf::DW_OP_div:
6215	return I == `1`;
6216	}
6217	}
6218	return false;
6219	}
6220
6221	/// Convert a SCEV of a value to a DIExpression that is pushed onto the
6222	/// builder's expression stack. The stack should already contain an
6223	/// expression for the iteration count, so that it can be multiplied by
6224	/// the stride and added to the start.
6225	/// Components of the expression are omitted if they are an identity function.
6226	/// Chain (non-affine) SCEVs are not supported.
6227	bool SCEVToValueExpr(const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) {
6228	assert(SAR.isAffine() && "Expected affine SCEV");
6229	// TODO: Is this check needed?
6230	if (isa<SCEVAddRecExpr>(Val: SAR.getStart()))
6231	return false;
6232
6233	const SCEV *Start = SAR.getStart();
6234	const SCEV *Stride = SAR.getStepRecurrence(SE);
6235
6236	// Skip pushing arithmetic noops.
6237	if (!isIdentityFunction(Op: llvm::dwarf::DW_OP_mul, S: Stride)) {
6238	if (!pushSCEV(S: Stride))
6239	return false;
6240	pushOperator(Op: llvm::dwarf::DW_OP_mul);
6241	}
6242	if (!isIdentityFunction(Op: llvm::dwarf::DW_OP_plus, S: Start)) {
6243	if (!pushSCEV(S: Start))
6244	return false;
6245	pushOperator(Op: llvm::dwarf::DW_OP_plus);
6246	}
6247	return true;
6248	}
6249
6250	/// Create an expression that is an offset from a value (usually the IV).
6251	void createOffsetExpr(int64_t Offset, Value *OffsetValue) {
6252	pushLocation(V: OffsetValue);
6253	DIExpression::appendOffset(Ops&: Expr, Offset);
6254	LLVM_DEBUG(
6255	dbgs() << "scev-salvage: Generated IV offset expression. Offset: "
6256	<< std::to_string(Offset) << "\n");
6257	}
6258
6259	/// Combine a translation of the SCEV and the IV to create an expression that
6260	/// recovers a location's value.
6261	/// returns true if an expression was created.
6262	bool createIterCountExpr(const SCEV *S,
6263	const SCEVDbgValueBuilder &IterationCount,
6264	ScalarEvolution &SE) {
6265	// SCEVs for SSA values are most frquently of the form
6266	// {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..).
6267	// This is because %a is a PHI node that is not the IV. However, these
6268	// SCEVs have not been observed to result in debuginfo-lossy optimisations,
6269	// so its not expected this point will be reached.
6270	if (!isa<SCEVAddRecExpr>(Val: S))
6271	return false;
6272
6273	LLVM_DEBUG(dbgs() << "scev-salvage: Location to salvage SCEV: " << *S
6274	<< `'\n'`);
6275
6276	const auto *Rec = cast<SCEVAddRecExpr>(Val: S);
6277	if (!Rec->isAffine())
6278	return false;
6279
6280	if (S->getExpressionSize() > MaxSCEVSalvageExpressionSize)
6281	return false;
6282
6283	// Initialise a new builder with the iteration count expression. In
6284	// combination with the value's SCEV this enables recovery.
6285	clone(Base: IterationCount);
6286	if (!SCEVToValueExpr(SAR: *Rec, SE))
6287	return false;
6288
6289	return true;
6290	}
6291
6292	/// Convert a SCEV of a value to a DIExpression that is pushed onto the
6293	/// builder's expression stack. The stack should already contain an
6294	/// expression for the iteration count, so that it can be multiplied by
6295	/// the stride and added to the start.
6296	/// Components of the expression are omitted if they are an identity function.
6297	bool SCEVToIterCountExpr(const llvm::SCEVAddRecExpr &SAR,
6298	ScalarEvolution &SE) {
6299	assert(SAR.isAffine() && "Expected affine SCEV");
6300	if (isa<SCEVAddRecExpr>(Val: SAR.getStart())) {
6301	LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV. Unsupported nested AddRec: "
6302	<< SAR << `'\n'`);
6303	return false;
6304	}
6305	const SCEV *Start = SAR.getStart();
6306	const SCEV *Stride = SAR.getStepRecurrence(SE);
6307
6308	// Skip pushing arithmetic noops.
6309	if (!isIdentityFunction(Op: llvm::dwarf::DW_OP_minus, S: Start)) {
6310	if (!pushSCEV(S: Start))
6311	return false;
6312	pushOperator(Op: llvm::dwarf::DW_OP_minus);
6313	}
6314	if (!isIdentityFunction(Op: llvm::dwarf::DW_OP_div, S: Stride)) {
6315	if (!pushSCEV(S: Stride))
6316	return false;
6317	pushOperator(Op: llvm::dwarf::DW_OP_div);
6318	}
6319	return true;
6320	}
6321
6322	// Append the current expression and locations to a location list and an
6323	// expression list. Modify the DW_OP_LLVM_arg indexes to account for
6324	// the locations already present in the destination list.
6325	void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr,
6326	SmallVectorImpl<Value *> &DestLocations) {
6327	assert(!DestLocations.empty() &&
6328	"Expected the locations vector to contain the IV");
6329	// The DWARF_OP_LLVM_arg arguments of the expression being appended must be
6330	// modified to account for the locations already in the destination vector.
6331	// All builders contain the IV as the first location op.
6332	assert(!LocationOps.empty() &&
6333	"Expected the location ops to contain the IV.");
6334	// DestIndexMap[n] contains the index in DestLocations for the nth
6335	// location in this SCEVDbgValueBuilder.
6336	SmallVector<uint64_t, `2`> DestIndexMap;
6337	for (const auto &Op : LocationOps) {
6338	auto It = find(Range&: DestLocations, Val: Op);
6339	if (It != DestLocations.end()) {
6340	// Location already exists in DestLocations, reuse existing ArgIndex.
6341	DestIndexMap.push_back(Elt: std::distance(first: DestLocations.begin(), last: It));
6342	continue;
6343	}
6344	// Location is not in DestLocations, add it.
6345	DestIndexMap.push_back(Elt: DestLocations.size());
6346	DestLocations.push_back(Elt: Op);
6347	}
6348
6349	for (const auto &Op : expr_ops()) {
6350	if (Op.getOp() != dwarf::DW_OP_LLVM_arg) {
6351	Op.appendToVector(V&: DestExpr);
6352	continue;
6353	}
6354
6355	DestExpr.push_back(Elt: dwarf::DW_OP_LLVM_arg);
6356	// `DW_OP_LLVM_arg n` represents the nth LocationOp in this SCEV,
6357	// DestIndexMap[n] contains its new index in DestLocations.
6358	uint64_t NewIndex = DestIndexMap [Op.getArg(I: `0`)];
6359	DestExpr.push_back(Elt: NewIndex);
6360	}
6361	}
6362	};
6363
6364	/// Holds all the required data to salvage a dbg.value using the pre-LSR SCEVs
6365	/// and DIExpression.
6366	struct DVIRecoveryRec {
6367	DVIRecoveryRec(DbgValueInst *DbgValue)
6368	: DbgRef (DbgValue), Expr(DbgValue->getExpression()),
6369	HadLocationArgList(false) {}
6370	DVIRecoveryRec(DbgVariableRecord *DVR)
6371	: DbgRef (DVR), Expr(DVR->getExpression()), HadLocationArgList(false) {}
6372
6373	PointerUnion<DbgValueInst , DbgVariableRecord > DbgRef;
6374	DIExpression *Expr;
6375	bool HadLocationArgList;
6376	SmallVector<WeakVH, `2`> LocationOps;
6377	SmallVector<const llvm::SCEV *, `2`> SCEVs;
6378	SmallVector<std::unique_ptr<SCEVDbgValueBuilder>, `2`> RecoveryExprs;
6379
6380	void clear() {
6381	for (auto &RE : RecoveryExprs)
6382	RE.reset();
6383	RecoveryExprs.clear();
6384	}
6385
6386	~DVIRecoveryRec() { clear(); }
6387	};
6388	} // namespace
6389
6390	/// Returns the total number of DW_OP_llvm_arg operands in the expression.
6391	/// This helps in determining if a DIArglist is necessary or can be omitted from
6392	/// the dbg.value.
6393	static unsigned numLLVMArgOps(SmallVectorImpl<uint64_t> &Expr) {
6394	auto expr_ops = ToDwarfOpIter(Expr);
6395	unsigned Count = `0`;
6396	for (auto Op : expr_ops)
6397	if (Op.getOp() == dwarf::DW_OP_LLVM_arg)
6398	Count++;
6399	return Count;
6400	}
6401
6402	/// Overwrites DVI with the location and Ops as the DIExpression. This will
6403	/// create an invalid expression if Ops has any dwarf::DW_OP_llvm_arg operands,
6404	/// because a DIArglist is not created for the first argument of the dbg.value.
6405	template <typename T>
6406	static void updateDVIWithLocation(T &DbgVal, Value *Location,
6407	SmallVectorImpl<uint64_t> &Ops) {
6408	assert(numLLVMArgOps(Ops) == `0` && "Expected expression that does not "
6409	"contain any DW_OP_llvm_arg operands.");
6410	DbgVal.setRawLocation(ValueAsMetadata::get(V: Location));
6411	DbgVal.setExpression(DIExpression::get(Context&: DbgVal.getContext(), Elements: Ops));
6412	DbgVal.setExpression(DIExpression::get(Context&: DbgVal.getContext(), Elements: Ops));
6413	}
6414
6415	/// Overwrite DVI with locations placed into a DIArglist.
6416	template <typename T>
6417	static void updateDVIWithLocations(T &DbgVal,
6418	SmallVectorImpl<Value *> &Locations,
6419	SmallVectorImpl<uint64_t> &Ops) {
6420	assert(numLLVMArgOps(Ops) != `0` &&
6421	"Expected expression that references DIArglist locations using "
6422	"DW_OP_llvm_arg operands.");
6423	SmallVector<ValueAsMetadata *, `3`> MetadataLocs;
6424	for (Value *V : Locations)
6425	MetadataLocs.push_back(Elt: ValueAsMetadata::get(V));
6426	auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);
6427	DbgVal.setRawLocation(llvm::DIArgList::get(Context&: DbgVal.getContext(), Args: ValArrayRef));
6428	DbgVal.setExpression(DIExpression::get(Context&: DbgVal.getContext(), Elements: Ops));
6429	}
6430
6431	/// Write the new expression and new location ops for the dbg.value. If possible
6432	/// reduce the szie of the dbg.value intrinsic by omitting DIArglist. This
6433	/// can be omitted if:
6434	/// 1. There is only a single location, refenced by a single DW_OP_llvm_arg.
6435	/// 2. The DW_OP_LLVM_arg is the first operand in the expression.
6436	static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec,
6437	SmallVectorImpl<Value *> &NewLocationOps,
6438	SmallVectorImpl<uint64_t> &NewExpr) {
6439	auto UpdateDbgValueInstImpl = [&](auto *DbgVal) {
6440	unsigned NumLLVMArgs = numLLVMArgOps(Expr&: NewExpr);
6441	if (NumLLVMArgs == `0`) {
6442	// Location assumed to be on the stack.
6443	updateDVIWithLocation(*DbgVal, NewLocationOps [`0`], NewExpr);
6444	} else if (NumLLVMArgs == `1` && NewExpr [`0`] == dwarf::DW_OP_LLVM_arg) {
6445	// There is only a single DW_OP_llvm_arg at the start of the expression,
6446	// so it can be omitted along with DIArglist.
6447	assert(NewExpr[`1`] == `0` &&
6448	"Lone LLVM_arg in a DIExpression should refer to location-op 0.");
6449	llvm::SmallVector<uint64_t, `6`> ShortenedOps(llvm::drop_begin(RangeOrContainer&: NewExpr, N: `2`));
6450	updateDVIWithLocation(*DbgVal, NewLocationOps [`0`], ShortenedOps);
6451	} else {
6452	// Multiple DW_OP_llvm_arg, so DIArgList is strictly necessary.
6453	updateDVIWithLocations(*DbgVal, NewLocationOps, NewExpr);
6454	}
6455
6456	// If the DIExpression was previously empty then add the stack terminator.
6457	// Non-empty expressions have only had elements inserted into them and so
6458	// the terminator should already be present e.g. stack_value or fragment.
6459	DIExpression *SalvageExpr = DbgVal->getExpression();
6460	if (!DVIRec.Expr->isComplex() && SalvageExpr->isComplex()) {
6461	SalvageExpr =
6462	DIExpression::append(Expr: SalvageExpr, Ops: {dwarf::DW_OP_stack_value});
6463	DbgVal->setExpression(SalvageExpr);
6464	}
6465	};
6466	if (isa<DbgValueInst *>(Val: DVIRec.DbgRef))
6467	UpdateDbgValueInstImpl (cast<DbgValueInst *>(Val&: DVIRec.DbgRef));
6468	else
6469	UpdateDbgValueInstImpl (cast<DbgVariableRecord *>(Val&: DVIRec.DbgRef));
6470	}
6471
6472	/// Cached location ops may be erased during LSR, in which case a poison is
6473	/// required when restoring from the cache. The type of that location is no
6474	/// longer available, so just use int8. The poison will be replaced by one or
6475	/// more locations later when a SCEVDbgValueBuilder selects alternative
6476	/// locations to use for the salvage.
6477	static Value *getValueOrPoison(WeakVH &VH, LLVMContext &C) {
6478	return (VH) ? VH : PoisonValue::get(T: llvm::Type::getInt8Ty(C));
6479	}
6480
6481	/// Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
6482	static void restorePreTransformState(DVIRecoveryRec &DVIRec) {
6483	auto RestorePreTransformStateImpl = [&](auto *DbgVal) {
6484	LLVM_DEBUG(dbgs() << "scev-salvage: restore dbg.value to pre-LSR state\n"
6485	<< "scev-salvage: post-LSR: " << *DbgVal << `'\n'`);
6486	assert(DVIRec.Expr && "Expected an expression");
6487	DbgVal->setExpression(DVIRec.Expr);
6488
6489	// Even a single location-op may be inside a DIArgList and referenced with
6490	// DW_OP_LLVM_arg, which is valid only with a DIArgList.
6491	if (!DVIRec.HadLocationArgList) {
6492	assert(DVIRec.LocationOps.size() == `1` &&
6493	"Unexpected number of location ops.");
6494	// LSR's unsuccessful salvage attempt may have added DIArgList, which in
6495	// this case was not present before, so force the location back to a
6496	// single uncontained Value.
6497	Value *CachedValue =
6498	getValueOrPoison(DVIRec.LocationOps [`0`], DbgVal->getContext());
6499	DbgVal->setRawLocation(ValueAsMetadata::get(V: CachedValue));
6500	} else {
6501	SmallVector<ValueAsMetadata *, `3`> MetadataLocs;
6502	for (WeakVH VH : DVIRec.LocationOps) {
6503	Value *CachedValue = getValueOrPoison(VH, DbgVal->getContext());
6504	MetadataLocs.push_back(Elt: ValueAsMetadata::get(V: CachedValue));
6505	}
6506	auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);
6507	DbgVal->setRawLocation(
6508	llvm::DIArgList::get(Context&: DbgVal->getContext(), Args: ValArrayRef));
6509	}
6510	LLVM_DEBUG(dbgs() << "scev-salvage: pre-LSR: " << *DbgVal << `'\n'`);
6511	};
6512	if (isa<DbgValueInst *>(Val: DVIRec.DbgRef))
6513	RestorePreTransformStateImpl (cast<DbgValueInst *>(Val&: DVIRec.DbgRef));
6514	else
6515	RestorePreTransformStateImpl (cast<DbgVariableRecord *>(Val&: DVIRec.DbgRef));
6516	}
6517
6518	static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE,
6519	llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec,
6520	const SCEV *SCEVInductionVar,
6521	SCEVDbgValueBuilder IterCountExpr) {
6522
6523	if (isa<DbgValueInst *>(Val: DVIRec.DbgRef)
6524	? !cast<DbgValueInst *>(Val&: DVIRec.DbgRef)->isKillLocation()
6525	: !cast<DbgVariableRecord *>(Val&: DVIRec.DbgRef)->isKillLocation())
6526	return false;
6527
6528	// LSR may have caused several changes to the dbg.value in the failed salvage
6529	// attempt. So restore the DIExpression, the location ops and also the
6530	// location ops format, which is always DIArglist for multiple ops, but only
6531	// sometimes for a single op.
6532	restorePreTransformState(DVIRec);
6533
6534	// LocationOpIndexMap[i] will store the post-LSR location index of
6535	// the non-optimised out location at pre-LSR index i.
6536	SmallVector<int64_t, `2`> LocationOpIndexMap;
6537	LocationOpIndexMap.assign(NumElts: DVIRec.LocationOps.size(), Elt: -`1`);
6538	SmallVector<Value *, `2`> NewLocationOps;
6539	NewLocationOps.push_back(Elt: LSRInductionVar);
6540
6541	for (unsigned i = `0`; i < DVIRec.LocationOps.size(); i++) {
6542	WeakVH VH = DVIRec.LocationOps [i];
6543	// Place the locations not optimised out in the list first, avoiding
6544	// inserts later. The map is used to update the DIExpression's
6545	// DW_OP_LLVM_arg arguments as the expression is updated.
6546	if (VH && !isa<UndefValue>(Val: VH)) {
6547	NewLocationOps.push_back(Elt: VH);
6548	LocationOpIndexMap [i] = NewLocationOps.size() - `1`;
6549	LLVM_DEBUG(dbgs() << "scev-salvage: Location index " << i
6550	<< " now at index " << LocationOpIndexMap[i] << "\n");
6551	continue;
6552	}
6553
6554	// It's possible that a value referred to in the SCEV may have been
6555	// optimised out by LSR.
6556	if (SE.containsErasedValue(S: DVIRec.SCEVs [i]) \|\|
6557	SE.containsUndefs(S: DVIRec.SCEVs [i])) {
6558	LLVM_DEBUG(dbgs() << "scev-salvage: SCEV for location at index: " << i
6559	<< " refers to a location that is now undef or erased. "
6560	"Salvage abandoned.\n");
6561	return false;
6562	}
6563
6564	LLVM_DEBUG(dbgs() << "scev-salvage: salvaging location at index " << i
6565	<< " with SCEV: " << *DVIRec.SCEVs[i] << "\n");
6566
6567	DVIRec.RecoveryExprs [i] = std::make_unique<SCEVDbgValueBuilder>();
6568	SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs [i].get();
6569
6570	// Create an offset-based salvage expression if possible, as it requires
6571	// less DWARF ops than an iteration count-based expression.
6572	if (std::optional<APInt> Offset =
6573	SE.computeConstantDifference(LHS: DVIRec.SCEVs [i], RHS: SCEVInductionVar)) {
6574	if (Offset ->getSignificantBits() <= `64`)
6575	SalvageExpr->createOffsetExpr(Offset: Offset ->getSExtValue(), OffsetValue: LSRInductionVar);
6576	} else if (!SalvageExpr->createIterCountExpr(S: DVIRec.SCEVs [i], IterationCount: IterCountExpr,
6577	SE))
6578	return false;
6579	}
6580
6581	// Merge the DbgValueBuilder generated expressions and the original
6582	// DIExpression, place the result into an new vector.
6583	SmallVector<uint64_t, `3`> NewExpr;
6584	if (DVIRec.Expr->getNumElements() == `0`) {
6585	assert(DVIRec.RecoveryExprs.size() == `1` &&
6586	"Expected only a single recovery expression for an empty "
6587	"DIExpression.");
6588	assert(DVIRec.RecoveryExprs[`0`] &&
6589	"Expected a SCEVDbgSalvageBuilder for location 0");
6590	SCEVDbgValueBuilder *B = DVIRec.RecoveryExprs [`0`].get();
6591	B->appendToVectors(DestExpr&: NewExpr, DestLocations&: NewLocationOps);
6592	}
6593	for (const auto &Op : DVIRec.Expr->expr_ops()) {
6594	// Most Ops needn't be updated.
6595	if (Op.getOp() != dwarf::DW_OP_LLVM_arg) {
6596	Op.appendToVector(V&: NewExpr);
6597	continue;
6598	}
6599
6600	uint64_t LocationArgIndex = Op.getArg(I: `0`);
6601	SCEVDbgValueBuilder *DbgBuilder =
6602	DVIRec.RecoveryExprs [LocationArgIndex].get();
6603	// The location doesn't have s SCEVDbgValueBuilder, so LSR did not
6604	// optimise it away. So just translate the argument to the updated
6605	// location index.
6606	if (!DbgBuilder) {
6607	NewExpr.push_back(Elt: dwarf::DW_OP_LLVM_arg);
6608	assert(LocationOpIndexMap[Op.getArg(`0`)] != -`1` &&
6609	"Expected a positive index for the location-op position.");
6610	NewExpr.push_back(Elt: LocationOpIndexMap [Op.getArg(I: `0`)]);
6611	continue;
6612	}
6613	// The location has a recovery expression.
6614	DbgBuilder->appendToVectors(DestExpr&: NewExpr, DestLocations&: NewLocationOps);
6615	}
6616
6617	UpdateDbgValueInst(DVIRec, NewLocationOps, NewExpr);
6618	if (isa<DbgValueInst *>(Val: DVIRec.DbgRef))
6619	LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: "
6620	<< cast<DbgValueInst >(DVIRec.DbgRef) << "\n");
6621	else
6622	LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: "
6623	<< cast<DbgVariableRecord >(DVIRec.DbgRef) << "\n");
6624	return true;
6625	}
6626
6627	/// Obtain an expression for the iteration count, then attempt to salvage the
6628	/// dbg.value intrinsics.
6629	static void DbgRewriteSalvageableDVIs(
6630	llvm::Loop L, ScalarEvolution &SE, llvm::PHINode LSRInductionVar,
6631	SmallVector<std::unique_ptr<DVIRecoveryRec>, `2`> &DVIToUpdate) {
6632	if (DVIToUpdate.empty())
6633	return;
6634
6635	const llvm::SCEV *SCEVInductionVar = SE.getSCEV(V: LSRInductionVar);
6636	assert(SCEVInductionVar &&
6637	"Anticipated a SCEV for the post-LSR induction variable");
6638
6639	if (const SCEVAddRecExpr *IVAddRec =
6640	dyn_cast<SCEVAddRecExpr>(Val: SCEVInductionVar)) {
6641	if (!IVAddRec->isAffine())
6642	return;
6643
6644	// Prevent translation using excessive resources.
6645	if (IVAddRec->getExpressionSize() > MaxSCEVSalvageExpressionSize)
6646	return;
6647
6648	// The iteration count is required to recover location values.
6649	SCEVDbgValueBuilder IterCountExpr;
6650	IterCountExpr.pushLocation(V: LSRInductionVar);
6651	if (!IterCountExpr.SCEVToIterCountExpr(SAR: *IVAddRec, SE))
6652	return;
6653
6654	LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar
6655	<< `'\n'`);
6656
6657	for (auto &DVIRec : DVIToUpdate) {
6658	SalvageDVI(L, SE, LSRInductionVar, DVIRec&: *DVIRec, SCEVInductionVar,
6659	IterCountExpr);
6660	}
6661	}
6662	}
6663
6664	/// Identify and cache salvageable DVI locations and expressions along with the
6665	/// corresponding SCEV(s). Also ensure that the DVI is not deleted between
6666	/// cacheing and salvaging.
6667	static void DbgGatherSalvagableDVI(
6668	Loop *L, ScalarEvolution &SE,
6669	SmallVector<std::unique_ptr<DVIRecoveryRec>, `2`> &SalvageableDVISCEVs,
6670	SmallSet<AssertingVH<DbgValueInst>, `2`> &DVIHandles) {
6671	for (const auto &B : L->getBlocks()) {
6672	for (auto &I : *B) {
6673	auto ProcessDbgValue = [&](auto DbgVal) -> bool* {
6674	// Ensure that if any location op is undef that the dbg.vlue is not
6675	// cached.
6676	if (DbgVal->isKillLocation())
6677	return false;
6678
6679	// Check that the location op SCEVs are suitable for translation to
6680	// DIExpression.
6681	const auto &HasTranslatableLocationOps =
6682	[&](const auto DbgValToTranslate) -> bool* {
6683	for (const auto LocOp : DbgValToTranslate->location_ops()) {
6684	if (!LocOp)
6685	return false;
6686
6687	if (!SE.isSCEVable(Ty: LocOp->getType()))
6688	return false;
6689
6690	const SCEV *S = SE.getSCEV(V: LocOp);
6691	if (SE.containsUndefs(S))
6692	return false;
6693	}
6694	return true;
6695	};
6696
6697	if (!HasTranslatableLocationOps(DbgVal))
6698	return false;
6699
6700	std::unique_ptr<DVIRecoveryRec> NewRec =
6701	std::make_unique<DVIRecoveryRec>(DbgVal);
6702	// Each location Op may need a SCEVDbgValueBuilder in order to recover
6703	// it. Pre-allocating a vector will enable quick lookups of the builder
6704	// later during the salvage.
6705	NewRec ->RecoveryExprs.resize(DbgVal->getNumVariableLocationOps());
6706	for (const auto LocOp : DbgVal->location_ops()) {
6707	NewRec ->SCEVs.push_back(Elt: SE.getSCEV(V: LocOp));
6708	NewRec ->LocationOps.push_back(LocOp);
6709	NewRec ->HadLocationArgList = DbgVal->hasArgList();
6710	}
6711	SalvageableDVISCEVs.push_back(Elt: std::move(NewRec));
6712	return true;
6713	};
6714	for (DbgVariableRecord &DVR : filterDbgVars(R: I.getDbgRecordRange())) {
6715	if (DVR.isDbgValue() \|\| DVR.isDbgAssign())
6716	ProcessDbgValue (&DVR);
6717	}
6718	auto DVI = dyn_cast<DbgValueInst>(Val: &I);
6719	if (!DVI)
6720	continue;
6721	if (ProcessDbgValue (DVI))
6722	DVIHandles.insert(V: DVI);
6723	}
6724	}
6725	}
6726
6727	/// Ideally pick the PHI IV inserted by ScalarEvolutionExpander. As a fallback
6728	/// any PHi from the loop header is usable, but may have less chance of
6729	/// surviving subsequent transforms.
6730	static llvm::PHINode GetInductionVariable(const* Loop &L, ScalarEvolution &SE,
6731	const LSRInstance &LSR) {
6732
6733	auto IsSuitableIV = [&](PHINode *P) {
6734	if (!SE.isSCEVable(Ty: P->getType()))
6735	return false;
6736	if (const SCEVAddRecExpr *Rec = dyn_cast<SCEVAddRecExpr>(Val: SE.getSCEV(V: P)))
6737	return Rec->isAffine() && !SE.containsUndefs(S: SE.getSCEV(V: P));
6738	return false;
6739	};
6740
6741	// For now, just pick the first IV that was generated and inserted by
6742	// ScalarEvolution. Ideally pick an IV that is unlikely to be optimised away
6743	// by subsequent transforms.
6744	for (const WeakVH &IV : LSR.getScalarEvolutionIVs()) {
6745	if (!IV)
6746	continue;
6747
6748	// There should only be PHI node IVs.
6749	PHINode P = cast<PHINode>(Val: &IV);
6750
6751	if (IsSuitableIV (P))
6752	return P;
6753	}
6754
6755	for (PHINode &P : L.getHeader()->phis()) {
6756	if (IsSuitableIV (&P))
6757	return &P;
6758	}
6759	return nullptr;
6760	}
6761
6762	static std::optional<std::tuple<PHINode , PHINode , const SCEV , bool*>>
6763	canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
6764	const LoopInfo &LI, const TargetTransformInfo &TTI) {
6765	if (!L->isInnermost()) {
6766	LLVM_DEBUG(dbgs() << "Cannot fold on non-innermost loop\n");
6767	return std::nullopt;
6768	}
6769	// Only inspect on simple loop structure
6770	if (!L->isLoopSimplifyForm()) {
6771	LLVM_DEBUG(dbgs() << "Cannot fold on non-simple loop\n");
6772	return std::nullopt;
6773	}
6774
6775	if (!SE.hasLoopInvariantBackedgeTakenCount(L)) {
6776	LLVM_DEBUG(dbgs() << "Cannot fold on backedge that is loop variant\n");
6777	return std::nullopt;
6778	}
6779
6780	BasicBlock *LoopLatch = L->getLoopLatch();
6781	BranchInst *BI = dyn_cast<BranchInst>(Val: LoopLatch->getTerminator());
6782	if (!BI \|\| BI->isUnconditional())
6783	return std::nullopt;
6784	auto *TermCond = dyn_cast<ICmpInst>(Val: BI->getCondition());
6785	if (!TermCond) {
6786	LLVM_DEBUG(
6787	dbgs() << "Cannot fold on branching condition that is not an ICmpInst");
6788	return std::nullopt;
6789	}
6790	if (!TermCond->hasOneUse()) {
6791	LLVM_DEBUG(
6792	dbgs()
6793	<< "Cannot replace terminating condition with more than one use\n");
6794	return std::nullopt;
6795	}
6796
6797	BinaryOperator *LHS = dyn_cast<BinaryOperator>(Val: TermCond->getOperand(i_nocapture: `0`));
6798	Value *RHS = TermCond->getOperand(i_nocapture: `1`);
6799	if (!LHS \|\| !L->isLoopInvariant(V: RHS))
6800	// We could pattern match the inverse form of the icmp, but that is
6801	// non-canonical, and this pass is running very* late in the pipeline.*
6802	return std::nullopt;
6803
6804	// Find the IV used by the current exit condition.
6805	PHINode *ToFold;
6806	Value ToFoldStart, ToFoldStep;
6807	if (!matchSimpleRecurrence(I: LHS, P&: ToFold, Start&: ToFoldStart, Step&: ToFoldStep))
6808	return std::nullopt;
6809
6810	// Ensure the simple recurrence is a part of the current loop.
6811	if (ToFold->getParent() != L->getHeader())
6812	return std::nullopt;
6813
6814	// If that IV isn't dead after we rewrite the exit condition in terms of
6815	// another IV, there's no point in doing the transform.
6816	if (!isAlmostDeadIV(IV: ToFold, LatchBlock: LoopLatch, Cond: TermCond))
6817	return std::nullopt;
6818
6819	// Inserting instructions in the preheader has a runtime cost, scale
6820	// the allowed cost with the loops trip count as best we can.
6821	const unsigned ExpansionBudget = [&]() {
6822	unsigned Budget = `2` * SCEVCheapExpansionBudget;
6823	if (unsigned SmallTC = SE.getSmallConstantMaxTripCount(L))
6824	return std::min(a: Budget, b: SmallTC);
6825	if (std::optional<unsigned> SmallTC = getLoopEstimatedTripCount(L))
6826	return std::min(a: Budget, b: *SmallTC);
6827	// Unknown trip count, assume long running by default.
6828	return Budget;
6829	}();
6830
6831	const SCEV *BECount = SE.getBackedgeTakenCount(L);
6832	const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
6833	SCEVExpander Expander(SE, DL, "lsr_fold_term_cond");
6834
6835	PHINode ToHelpFold = nullptr*;
6836	const SCEV TermValueS = nullptr*;
6837	bool MustDropPoison = false;
6838	auto InsertPt = L->getLoopPreheader()->getTerminator();
6839	for (PHINode &PN : L->getHeader()->phis()) {
6840	if (ToFold == &PN)
6841	continue;
6842
6843	if (!SE.isSCEVable(Ty: PN.getType())) {
6844	LLVM_DEBUG(dbgs() << "IV of phi '" << PN
6845	<< "' is not SCEV-able, not qualified for the "
6846	"terminating condition folding.\n");
6847	continue;
6848	}
6849	const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Val: SE.getSCEV(V: &PN));
6850	// Only speculate on affine AddRec
6851	if (!AddRec \|\| !AddRec->isAffine()) {
6852	LLVM_DEBUG(dbgs() << "SCEV of phi '" << PN
6853	<< "' is not an affine add recursion, not qualified "
6854	"for the terminating condition folding.\n");
6855	continue;
6856	}
6857
6858	// Check that we can compute the value of AddRec on the exiting iteration
6859	// without soundness problems. evaluateAtIteration internally needs
6860	// to multiply the stride of the iteration number - which may wrap around.
6861	// The issue here is subtle because computing the result accounting for
6862	// wrap is insufficient. In order to use the result in an exit test, we
6863	// must also know that AddRec doesn't take the same value on any previous
6864	// iteration. The simplest case to consider is a candidate IV which is
6865	// narrower than the trip count (and thus original IV), but this can
6866	// also happen due to non-unit strides on the candidate IVs.
6867	if (!AddRec->hasNoSelfWrap() \|\|
6868	!SE.isKnownNonZero(S: AddRec->getStepRecurrence(SE)))
6869	continue;
6870
6871	const SCEVAddRecExpr *PostInc = AddRec->getPostIncExpr(SE);
6872	const SCEV *TermValueSLocal = PostInc->evaluateAtIteration(It: BECount, SE);
6873	if (!Expander.isSafeToExpand(S: TermValueSLocal)) {
6874	LLVM_DEBUG(
6875	dbgs() << "Is not safe to expand terminating value for phi node" << PN
6876	<< "\n");
6877	continue;
6878	}
6879
6880	if (Expander.isHighCostExpansion(Exprs: TermValueSLocal, L, Budget: ExpansionBudget,
6881	TTI: &TTI, At: InsertPt)) {
6882	LLVM_DEBUG(
6883	dbgs() << "Is too expensive to expand terminating value for phi node"
6884	<< PN << "\n");
6885	continue;
6886	}
6887
6888	// The candidate IV may have been otherwise dead and poison from the
6889	// very first iteration. If we can't disprove that, we can't use the IV.
6890	if (!mustExecuteUBIfPoisonOnPathTo(Root: &PN, OnPathTo: LoopLatch->getTerminator(), DT: &DT)) {
6891	LLVM_DEBUG(dbgs() << "Can not prove poison safety for IV "
6892	<< PN << "\n");
6893	continue;
6894	}
6895
6896	// The candidate IV may become poison on the last iteration. If this
6897	// value is not branched on, this is a well defined program. We're
6898	// about to add a new use to this IV, and we have to ensure we don't
6899	// insert UB which didn't previously exist.
6900	bool MustDropPoisonLocal = false;
6901	Instruction *PostIncV =
6902	cast<Instruction>(Val: PN.getIncomingValueForBlock(BB: LoopLatch));
6903	if (!mustExecuteUBIfPoisonOnPathTo(Root: PostIncV, OnPathTo: LoopLatch->getTerminator(),
6904	DT: &DT)) {
6905	LLVM_DEBUG(dbgs() << "Can not prove poison safety to insert use"
6906	<< PN << "\n");
6907
6908	// If this is a complex recurrance with multiple instructions computing
6909	// the backedge value, we might need to strip poison flags from all of
6910	// them.
6911	if (PostIncV->getOperand(i: `0`) != &PN)
6912	continue;
6913
6914	// In order to perform the transform, we need to drop the poison generating
6915	// flags on this instruction (if any).
6916	MustDropPoisonLocal = PostIncV->hasPoisonGeneratingFlags();
6917	}
6918
6919	// We pick the last legal alternate IV. We could expore choosing an optimal
6920	// alternate IV if we had a decent heuristic to do so.
6921	ToHelpFold = &PN;
6922	TermValueS = TermValueSLocal;
6923	MustDropPoison = MustDropPoisonLocal;
6924	}
6925
6926	LLVM_DEBUG(if (ToFold && !ToHelpFold) dbgs()
6927	<< "Cannot find other AddRec IV to help folding\n";);
6928
6929	LLVM_DEBUG(if (ToFold && ToHelpFold) dbgs()
6930	<< "\nFound loop that can fold terminating condition\n"
6931	<< " BECount (SCEV): " << *SE.getBackedgeTakenCount(L) << "\n"
6932	<< " TermCond: " << *TermCond << "\n"
6933	<< " BrandInst: " << *BI << "\n"
6934	<< " ToFold: " << *ToFold << "\n"
6935	<< " ToHelpFold: " << *ToHelpFold << "\n");
6936
6937	if (!ToFold \|\| !ToHelpFold)
6938	return std::nullopt;
6939	return std::make_tuple(args&: ToFold, args&: ToHelpFold, args&: TermValueS, args&: MustDropPoison);
6940	}
6941
6942	static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
6943	DominatorTree &DT, LoopInfo &LI,
6944	const TargetTransformInfo &TTI,
6945	AssumptionCache &AC, TargetLibraryInfo &TLI,
6946	MemorySSA *MSSA) {
6947
6948	// Debug preservation - before we start removing anything identify which DVI
6949	// meet the salvageable criteria and store their DIExpression and SCEVs.
6950	SmallVector<std::unique_ptr<DVIRecoveryRec>, `2`> SalvageableDVIRecords;
6951	SmallSet<AssertingVH<DbgValueInst>, `2`> DVIHandles;
6952	DbgGatherSalvagableDVI(L, SE, SalvageableDVISCEVs&: SalvageableDVIRecords, DVIHandles);
6953
6954	bool Changed = false;
6955	std::unique_ptr<MemorySSAUpdater> MSSAU;
6956	if (MSSA)
6957	MSSAU = std::make_unique<MemorySSAUpdater>(args&: MSSA);
6958
6959	// Run the main LSR transformation.
6960	const LSRInstance &Reducer =
6961	LSRInstance (L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get());
6962	Changed \|= Reducer.getChanged();
6963
6964	// Remove any extra phis created by processing inner loops.
6965	Changed \|= DeleteDeadPHIs(BB: L->getHeader(), TLI: &TLI, MSSAU: MSSAU.get());
6966	if (EnablePhiElim && L->isLoopSimplifyForm()) {
6967	SmallVector<WeakTrackingVH, `16`> DeadInsts;
6968	const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
6969	SCEVExpander Rewriter(SE, DL, "lsr", false);
6970	#ifndef NDEBUG
6971	Rewriter.setDebugType(DEBUG_TYPE);
6972	#endif
6973	unsigned numFolded = Rewriter.replaceCongruentIVs(L, DT: &DT, DeadInsts, TTI: &TTI);
6974	Rewriter.clear();
6975	if (numFolded) {
6976	Changed = true;
6977	RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, TLI: &TLI,
6978	MSSAU: MSSAU.get());
6979	DeleteDeadPHIs(BB: L->getHeader(), TLI: &TLI, MSSAU: MSSAU.get());
6980	}
6981	}
6982	// LSR may at times remove all uses of an induction variable from a loop.
6983	// The only remaining use is the PHI in the exit block.
6984	// When this is the case, if the exit value of the IV can be calculated using
6985	// SCEV, we can replace the exit block PHI with the final value of the IV and
6986	// skip the updates in each loop iteration.
6987	if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
6988	SmallVector<WeakTrackingVH, `16`> DeadInsts;
6989	const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
6990	SCEVExpander Rewriter(SE, DL, "lsr", true);
6991	int Rewrites = rewriteLoopExitValues(L, LI: &LI, TLI: &TLI, SE: &SE, TTI: &TTI, Rewriter, DT: &DT,
6992	ReplaceExitValue: UnusedIndVarInLoop, DeadInsts);
6993	Rewriter.clear();
6994	if (Rewrites) {
6995	Changed = true;
6996	RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, TLI: &TLI,
6997	MSSAU: MSSAU.get());
6998	DeleteDeadPHIs(BB: L->getHeader(), TLI: &TLI, MSSAU: MSSAU.get());
6999	}
7000	}
7001
7002	const bool EnableFormTerm = [&] {
7003	switch (AllowTerminatingConditionFoldingAfterLSR) {
7004	case cl::BOU_TRUE:
7005	return true;
7006	case cl::BOU_FALSE:
7007	return false;
7008	case cl::BOU_UNSET:
7009	return TTI.shouldFoldTerminatingConditionAfterLSR();
7010	}
7011	llvm_unreachable("Unhandled cl::boolOrDefault enum");
7012	}();
7013
7014	if (EnableFormTerm) {
7015	if (auto Opt = canFoldTermCondOfLoop(L, SE, DT, LI, TTI)) {
7016	auto [ToFold, ToHelpFold, TermValueS, MustDrop] = *Opt;
7017
7018	Changed = true;
7019	NumTermFold ++;
7020
7021	BasicBlock *LoopPreheader = L->getLoopPreheader();
7022	BasicBlock *LoopLatch = L->getLoopLatch();
7023
7024	(void)ToFold;
7025	LLVM_DEBUG(dbgs() << "To fold phi-node:\n"
7026	<< *ToFold << "\n"
7027	<< "New term-cond phi-node:\n"
7028	<< *ToHelpFold << "\n");
7029
7030	Value *StartValue = ToHelpFold->getIncomingValueForBlock(BB: LoopPreheader);
7031	(void)StartValue;
7032	Value *LoopValue = ToHelpFold->getIncomingValueForBlock(BB: LoopLatch);
7033
7034	// See comment in canFoldTermCondOfLoop on why this is sufficient.
7035	if (MustDrop)
7036	cast<Instruction>(Val: LoopValue)->dropPoisonGeneratingFlags();
7037
7038	// SCEVExpander for both use in preheader and latch
7039	const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
7040	SCEVExpander Expander(SE, DL, "lsr_fold_term_cond");
7041
7042	assert(Expander.isSafeToExpand(TermValueS) &&
7043	"Terminating value was checked safe in canFoldTerminatingCondition");
7044
7045	// Create new terminating value at loop preheader
7046	Value *TermValue = Expander.expandCodeFor(SH: TermValueS, Ty: ToHelpFold->getType(),
7047	I: LoopPreheader->getTerminator());
7048
7049	LLVM_DEBUG(dbgs() << "Start value of new term-cond phi-node:\n"
7050	<< *StartValue << "\n"
7051	<< "Terminating value of new term-cond phi-node:\n"
7052	<< *TermValue << "\n");
7053
7054	// Create new terminating condition at loop latch
7055	BranchInst *BI = cast<BranchInst>(Val: LoopLatch->getTerminator());
7056	ICmpInst *OldTermCond = cast<ICmpInst>(Val: BI->getCondition());
7057	IRBuilder<> LatchBuilder(LoopLatch->getTerminator());
7058	Value *NewTermCond =
7059	LatchBuilder.CreateICmp(P: CmpInst::ICMP_EQ, LHS: LoopValue, RHS: TermValue,
7060	Name: "lsr_fold_term_cond.replaced_term_cond");
7061	// Swap successors to exit loop body if IV equals to new TermValue
7062	if (BI->getSuccessor(i: `0`) == L->getHeader())
7063	BI->swapSuccessors();
7064
7065	LLVM_DEBUG(dbgs() << "Old term-cond:\n"
7066	<< *OldTermCond << "\n"
7067	<< "New term-cond:\n" << *NewTermCond << "\n");
7068
7069	BI->setCondition(NewTermCond);
7070
7071	Expander.clear();
7072	OldTermCond->eraseFromParent();
7073	DeleteDeadPHIs(BB: L->getHeader(), TLI: &TLI, MSSAU: MSSAU.get());
7074	}
7075	}
7076
7077	if (SalvageableDVIRecords.empty())
7078	return Changed;
7079
7080	// Obtain relevant IVs and attempt to rewrite the salvageable DVIs with
7081	// expressions composed using the derived iteration count.
7082	// TODO: Allow for multiple IV references for nested AddRecSCEVs
7083	for (const auto &L : LI) {
7084	if (llvm::PHINode IV = GetInductionVariable(L: L, SE, LSR: Reducer))
7085	DbgRewriteSalvageableDVIs(L, SE, LSRInductionVar: IV, DVIToUpdate&: SalvageableDVIRecords);
7086	else {
7087	LLVM_DEBUG(dbgs() << "scev-salvage: SCEV salvaging not possible. An IV "
7088	"could not be identified.\n");
7089	}
7090	}
7091
7092	for (auto &Rec : SalvageableDVIRecords)
7093	Rec ->clear();
7094	SalvageableDVIRecords.clear();
7095	DVIHandles.clear();
7096	return Changed;
7097	}
7098
7099	bool LoopStrengthReduce::runOnLoop(Loop L, LPPassManager & /LPM/*) {
7100	if (skipLoop(L))
7101	return false;
7102
7103	auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
7104	auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
7105	auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
7106	auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
7107	const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
7108	F: *L->getHeader()->getParent());
7109	auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
7110	F&: *L->getHeader()->getParent());
7111	auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
7112	F: *L->getHeader()->getParent());
7113	auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
7114	MemorySSA MSSA = nullptr*;
7115	if (MSSAAnalysis)
7116	MSSA = &MSSAAnalysis->getMSSA();
7117	return ReduceLoopStrength(L, IU, SE, DT, LI, TTI, AC, TLI, MSSA);
7118	}
7119
7120	PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM,
7121	LoopStandardAnalysisResults &AR,
7122	LPMUpdater &) {
7123	if (!ReduceLoopStrength(L: &L, IU&: AM.getResult<IVUsersAnalysis>(IR&: L, ExtraArgs&: AR), SE&: AR.SE,
7124	DT&: AR.DT, LI&: AR.LI, TTI: AR.TTI, AC&: AR.AC, TLI&: AR.TLI, MSSA: AR.MSSA))
7125	return PreservedAnalyses::all();
7126
7127	auto PA = getLoopPassPreservedAnalyses();
7128	if (AR.MSSA)
7129	PA.preserve<MemorySSAAnalysis>();
7130	return PA;
7131	}
7132
7133	char LoopStrengthReduce::ID = `0`;
7134
7135	INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
7136	"Loop Strength Reduction", false, false)
7137	INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
7138	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
7139	INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
7140	INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass)
7141	INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
7142	INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
7143	INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
7144	"Loop Strength Reduction", false, false)
7145
7146	Pass llvm::createLoopStrengthReducePass() { return* new LoopStrengthReduce (); }
7147

source code of llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp