ScalarEvolutionExpander.cpp source code [llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp]

1	//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains the implementation of the scalar evolution expander,
10	// which is used to generate the code corresponding to a given scalar evolution
11	// expression.
12	//
13	//===----------------------------------------------------------------------===//
14
15	#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
16	#include "llvm/ADT/STLExtras.h"
17	#include "llvm/ADT/ScopeExit.h"
18	#include "llvm/ADT/SmallSet.h"
19	#include "llvm/Analysis/InstructionSimplify.h"
20	#include "llvm/Analysis/LoopInfo.h"
21	#include "llvm/Analysis/TargetTransformInfo.h"
22	#include "llvm/Analysis/ValueTracking.h"
23	#include "llvm/IR/DataLayout.h"
24	#include "llvm/IR/Dominators.h"
25	#include "llvm/IR/IntrinsicInst.h"
26	#include "llvm/IR/PatternMatch.h"
27	#include "llvm/Support/CommandLine.h"
28	#include "llvm/Support/raw_ostream.h"
29	#include "llvm/Transforms/Utils/LoopUtils.h"
30
31	#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS
32	#define SCEV_DEBUG_WITH_TYPE(TYPE, X) DEBUG_WITH_TYPE(TYPE, X)
33	#else
34	#define SCEV_DEBUG_WITH_TYPE(TYPE, X)
35	#endif
36
37	using namespace llvm;
38
39	cl::opt<unsigned> llvm::SCEVCheapExpansionBudget(
40	"scev-cheap-expansion-budget", cl::Hidden, cl::init(Val: `4`),
41	cl::desc ("When performing SCEV expansion only if it is cheap to do, this "
42	"controls the budget that is considered cheap (default = 4)"));
43
44	using namespace PatternMatch;
45
46	PoisonFlags::PoisonFlags(const Instruction *I) {
47	NUW = false;
48	NSW = false;
49	Exact = false;
50	Disjoint = false;
51	NNeg = false;
52	if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: I)) {
53	NUW = OBO->hasNoUnsignedWrap();
54	NSW = OBO->hasNoSignedWrap();
55	}
56	if (auto *PEO = dyn_cast<PossiblyExactOperator>(Val: I))
57	Exact = PEO->isExact();
58	if (auto *PDI = dyn_cast<PossiblyDisjointInst>(Val: I))
59	Disjoint = PDI->isDisjoint();
60	if (auto *PNI = dyn_cast<PossiblyNonNegInst>(Val: I))
61	NNeg = PNI->hasNonNeg();
62	if (auto *TI = dyn_cast<TruncInst>(Val: I)) {
63	NUW = TI->hasNoUnsignedWrap();
64	NSW = TI->hasNoSignedWrap();
65	}
66	}
67
68	void PoisonFlags::apply(Instruction *I) {
69	if (isa<OverflowingBinaryOperator>(Val: I)) {
70	I->setHasNoUnsignedWrap(NUW);
71	I->setHasNoSignedWrap(NSW);
72	}
73	if (isa<PossiblyExactOperator>(Val: I))
74	I->setIsExact(Exact);
75	if (auto *PDI = dyn_cast<PossiblyDisjointInst>(Val: I))
76	PDI->setIsDisjoint(Disjoint);
77	if (auto *PNI = dyn_cast<PossiblyNonNegInst>(Val: I))
78	PNI->setNonNeg(NNeg);
79	if (isa<TruncInst>(Val: I)) {
80	I->setHasNoUnsignedWrap(NUW);
81	I->setHasNoSignedWrap(NSW);
82	}
83	}
84
85	/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
86	/// reusing an existing cast if a suitable one (= dominating IP) exists, or
87	/// creating a new one.
88	Value SCEVExpander::ReuseOrCreateCast(Value V, Type *Ty,
89	Instruction::CastOps Op,
90	BasicBlock::iterator IP) {
91	// This function must be called with the builder having a valid insertion
92	// point. It doesn't need to be the actual IP where the uses of the returned
93	// cast will be added, but it must dominate such IP.
94	// We use this precondition to produce a cast that will dominate all its
95	// uses. In particular, this is crucial for the case where the builder's
96	// insertion point is* the point where we were asked to put the cast.*
97	// Since we don't know the builder's insertion point is actually
98	// where the uses will be added (only that it dominates it), we are
99	// not allowed to move it.
100	BasicBlock::iterator BIP = Builder.GetInsertPoint();
101
102	Value Ret = nullptr*;
103
104	// Check to see if there is already a cast!
105	for (User *U : V->users()) {
106	if (U->getType() != Ty)
107	continue;
108	CastInst *CI = dyn_cast<CastInst>(Val: U);
109	if (!CI \|\| CI->getOpcode() != Op)
110	continue;
111
112	// Found a suitable cast that is at IP or comes before IP. Use it. Note that
113	// the cast must also properly dominate the Builder's insertion point.
114	if (IP ->getParent() == CI->getParent() && &*BIP != CI &&
115	(&IP == CI \|\| CI->comesBefore(Other: &IP))) {
116	Ret = CI;
117	break;
118	}
119	}
120
121	// Create a new cast.
122	if (!Ret) {
123	SCEVInsertPointGuard Guard(Builder, this);
124	Builder.SetInsertPoint(&*IP);
125	Ret = Builder.CreateCast(Op, V, DestTy: Ty, Name: V->getName());
126	}
127
128	// We assert at the end of the function since IP might point to an
129	// instruction with different dominance properties than a cast
130	// (an invoke for example) and not dominate BIP (but the cast does).
131	assert(!isa<Instruction>(Ret) \|\|
132	SE.DT.dominates(cast<Instruction>(Ret), &*BIP));
133
134	return Ret;
135	}
136
137	BasicBlock::iterator
138	SCEVExpander::findInsertPointAfter(Instruction *I,
139	Instruction MustDominate) const* {
140	BasicBlock::iterator IP = ++I->getIterator();
141	if (auto *II = dyn_cast<InvokeInst>(Val: I))
142	IP = II->getNormalDest()->begin();
143
144	while (isa<PHINode>(Val: IP))
145	++IP;
146
147	if (isa<FuncletPadInst>(Val: IP) \|\| isa<LandingPadInst>(Val: IP)) {
148	++IP;
149	} else if (isa<CatchSwitchInst>(Val: IP)) {
150	IP = MustDominate->getParent()->getFirstInsertionPt();
151	} else {
152	assert(!IP ->isEHPad() && "unexpected eh pad!");
153	}
154
155	// Adjust insert point to be after instructions inserted by the expander, so
156	// we can re-use already inserted instructions. Avoid skipping past the
157	// original \p MustDominate, in case it is an inserted instruction.
158	while (isInsertedInstruction(I: &IP) && &IP != MustDominate)
159	++IP;
160
161	return IP;
162	}
163
164	BasicBlock::iterator
165	SCEVExpander::GetOptimalInsertionPointForCastOf(Value V) const* {
166	// Cast the argument at the beginning of the entry block, after
167	// any bitcasts of other arguments.
168	if (Argument *A = dyn_cast<Argument>(Val: V)) {
169	BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
170	while ((isa<BitCastInst>(Val: IP) &&
171	isa<Argument>(Val: cast<BitCastInst>(Val&: IP)->getOperand(i_nocapture: `0`)) &&
172	cast<BitCastInst>(Val&: IP)->getOperand(i_nocapture: `0`) != A) \|\|
173	isa<DbgInfoIntrinsic>(Val: IP))
174	++IP;
175	return IP;
176	}
177
178	// Cast the instruction immediately after the instruction.
179	if (Instruction *I = dyn_cast<Instruction>(Val: V))
180	return findInsertPointAfter(I, MustDominate: &*Builder.GetInsertPoint());
181
182	// Otherwise, this must be some kind of a constant,
183	// so let's plop this cast into the function's entry block.
184	assert(isa<Constant>(V) &&
185	"Expected the cast argument to be a global/constant");
186	return Builder.GetInsertBlock()
187	->getParent()
188	->getEntryBlock()
189	.getFirstInsertionPt();
190	}
191
192	/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
193	/// which must be possible with a noop cast, doing what we can to share
194	/// the casts.
195	Value SCEVExpander::InsertNoopCastOfTo(Value V, Type *Ty) {
196	Instruction::CastOps Op = CastInst::getCastOpcode(Val: V, SrcIsSigned: false, Ty, DstIsSigned: false);
197	assert((Op == Instruction::BitCast \|\|
198	Op == Instruction::PtrToInt \|\|
199	Op == Instruction::IntToPtr) &&
200	"InsertNoopCastOfTo cannot perform non-noop casts!");
201	assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
202	"InsertNoopCastOfTo cannot change sizes!");
203
204	// inttoptr only works for integral pointers. For non-integral pointers, we
205	// can create a GEP on null with the integral value as index. Note that
206	// it is safe to use GEP of null instead of inttoptr here, because only
207	// expressions already based on a GEP of null should be converted to pointers
208	// during expansion.
209	if (Op == Instruction::IntToPtr) {
210	auto *PtrTy = cast<PointerType>(Val: Ty);
211	if (DL.isNonIntegralPointerType(PT: PtrTy))
212	return Builder.CreatePtrAdd(Ptr: Constant::getNullValue(Ty: PtrTy), Offset: V, Name: "scevgep");
213	}
214	// Short-circuit unnecessary bitcasts.
215	if (Op == Instruction::BitCast) {
216	if (V->getType() == Ty)
217	return V;
218	if (CastInst *CI = dyn_cast<CastInst>(Val: V)) {
219	if (CI->getOperand(i_nocapture: `0`)->getType() == Ty)
220	return CI->getOperand(i_nocapture: `0`);
221	}
222	}
223	// Short-circuit unnecessary inttoptr<->ptrtoint casts.
224	if ((Op == Instruction::PtrToInt \|\| Op == Instruction::IntToPtr) &&
225	SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(Ty: V->getType())) {
226	if (CastInst *CI = dyn_cast<CastInst>(Val: V))
227	if ((CI->getOpcode() == Instruction::PtrToInt \|\|
228	CI->getOpcode() == Instruction::IntToPtr) &&
229	SE.getTypeSizeInBits(Ty: CI->getType()) ==
230	SE.getTypeSizeInBits(Ty: CI->getOperand(i_nocapture: `0`)->getType()))
231	return CI->getOperand(i_nocapture: `0`);
232	if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: V))
233	if ((CE->getOpcode() == Instruction::PtrToInt \|\|
234	CE->getOpcode() == Instruction::IntToPtr) &&
235	SE.getTypeSizeInBits(Ty: CE->getType()) ==
236	SE.getTypeSizeInBits(Ty: CE->getOperand(i_nocapture: `0`)->getType()))
237	return CE->getOperand(i_nocapture: `0`);
238	}
239
240	// Fold a cast of a constant.
241	if (Constant *C = dyn_cast<Constant>(Val: V))
242	return ConstantExpr::getCast(ops: Op, C, Ty);
243
244	// Try to reuse existing cast, or insert one.
245	return ReuseOrCreateCast(V, Ty, Op, IP: GetOptimalInsertionPointForCastOf(V));
246	}
247
248	/// InsertBinop - Insert the specified binary operator, doing a small amount
249	/// of work to avoid inserting an obviously redundant operation, and hoisting
250	/// to an outer loop when the opportunity is there and it is safe.
251	Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
252	Value LHS, Value RHS,
253	SCEV::NoWrapFlags Flags, bool IsSafeToHoist) {
254	// Fold a binop with constant operands.
255	if (Constant *CLHS = dyn_cast<Constant>(Val: LHS))
256	if (Constant *CRHS = dyn_cast<Constant>(Val: RHS))
257	if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, LHS: CLHS, RHS: CRHS, DL))
258	return Res;
259
260	// Do a quick scan to see if we have this binop nearby. If so, reuse it.
261	unsigned ScanLimit = `6`;
262	BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
263	// Scanning starts from the last instruction before the insertion point.
264	BasicBlock::iterator IP = Builder.GetInsertPoint();
265	if (IP != BlockBegin) {
266	--IP;
267	for (; ScanLimit; --IP, --ScanLimit) {
268	// Don't count dbg.value against the ScanLimit, to avoid perturbing the
269	// generated code.
270	if (isa<DbgInfoIntrinsic>(Val: IP))
271	ScanLimit++;
272
273	auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) {
274	// Ensure that no-wrap flags match.
275	if (isa<OverflowingBinaryOperator>(Val: I)) {
276	if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW))
277	return true;
278	if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW))
279	return true;
280	}
281	// Conservatively, do not use any instruction which has any of exact
282	// flags installed.
283	if (isa<PossiblyExactOperator>(Val: I) && I->isExact())
284	return true;
285	return false;
286	};
287	if (IP ->getOpcode() == (unsigned)Opcode && IP ->getOperand(i: `0`) == LHS &&
288	IP ->getOperand(i: `1`) == RHS && !canGenerateIncompatiblePoison (&*IP))
289	return &*IP;
290	if (IP == BlockBegin) break;
291	}
292	}
293
294	// Save the original insertion point so we can restore it when we're done.
295	DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
296	SCEVInsertPointGuard Guard(Builder, this);
297
298	if (IsSafeToHoist) {
299	// Move the insertion point out of as many loops as we can.
300	while (const Loop *L = SE.LI.getLoopFor(BB: Builder.GetInsertBlock())) {
301	if (!L->isLoopInvariant(V: LHS) \|\| !L->isLoopInvariant(V: RHS)) break;
302	BasicBlock *Preheader = L->getLoopPreheader();
303	if (!Preheader) break;
304
305	// Ok, move up a level.
306	Builder.SetInsertPoint(Preheader->getTerminator());
307	}
308	}
309
310	// If we haven't found this binop, insert it.
311	// TODO: Use the Builder, which will make CreateBinOp below fold with
312	// InstSimplifyFolder.
313	Instruction *BO = Builder.Insert(I: BinaryOperator::Create(Op: Opcode, S1: LHS, S2: RHS));
314	BO->setDebugLoc(Loc);
315	if (Flags & SCEV::FlagNUW)
316	BO->setHasNoUnsignedWrap();
317	if (Flags & SCEV::FlagNSW)
318	BO->setHasNoSignedWrap();
319
320	return BO;
321	}
322
323	/// expandAddToGEP - Expand an addition expression with a pointer type into
324	/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
325	/// BasicAliasAnalysis and other passes analyze the result. See the rules
326	/// for getelementptr vs. inttoptr in
327	/// http://llvm.org/docs/LangRef.html#pointeraliasing
328	/// for details.
329	///
330	/// Design note: The correctness of using getelementptr here depends on
331	/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
332	/// they may introduce pointer arithmetic which may not be safely converted
333	/// into getelementptr.
334	///
335	/// Design note: It might seem desirable for this function to be more
336	/// loop-aware. If some of the indices are loop-invariant while others
337	/// aren't, it might seem desirable to emit multiple GEPs, keeping the
338	/// loop-invariant portions of the overall computation outside the loop.
339	/// However, there are a few reasons this is not done here. Hoisting simple
340	/// arithmetic is a low-level optimization that often isn't very
341	/// important until late in the optimization process. In fact, passes
342	/// like InstructionCombining will combine GEPs, even if it means
343	/// pushing loop-invariant computation down into loops, so even if the
344	/// GEPs were split here, the work would quickly be undone. The
345	/// LoopStrengthReduction pass, which is usually run quite late (and
346	/// after the last InstructionCombining pass), takes care of hoisting
347	/// loop-invariant portions of expressions, after considering what
348	/// can be folded using target addressing modes.
349	///
350	Value SCEVExpander::expandAddToGEP(const* SCEV Offset, Value V) {
351	assert(!isa<Instruction>(V) \|\|
352	SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
353
354	Value *Idx = expand(S: Offset);
355
356	// Fold a GEP with constant operands.
357	if (Constant *CLHS = dyn_cast<Constant>(Val: V))
358	if (Constant *CRHS = dyn_cast<Constant>(Val: Idx))
359	return Builder.CreatePtrAdd(Ptr: CLHS, Offset: CRHS);
360
361	// Do a quick scan to see if we have this GEP nearby. If so, reuse it.
362	unsigned ScanLimit = `6`;
363	BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
364	// Scanning starts from the last instruction before the insertion point.
365	BasicBlock::iterator IP = Builder.GetInsertPoint();
366	if (IP != BlockBegin) {
367	--IP;
368	for (; ScanLimit; --IP, --ScanLimit) {
369	// Don't count dbg.value against the ScanLimit, to avoid perturbing the
370	// generated code.
371	if (isa<DbgInfoIntrinsic>(Val: IP))
372	ScanLimit++;
373	if (IP ->getOpcode() == Instruction::GetElementPtr &&
374	IP ->getOperand(i: `0`) == V && IP ->getOperand(i: `1`) == Idx &&
375	cast<GEPOperator>(Val: &*IP)->getSourceElementType() ==
376	Builder.getInt8Ty())
377	return &*IP;
378	if (IP == BlockBegin) break;
379	}
380	}
381
382	// Save the original insertion point so we can restore it when we're done.
383	SCEVInsertPointGuard Guard(Builder, this);
384
385	// Move the insertion point out of as many loops as we can.
386	while (const Loop *L = SE.LI.getLoopFor(BB: Builder.GetInsertBlock())) {
387	if (!L->isLoopInvariant(V) \|\| !L->isLoopInvariant(V: Idx)) break;
388	BasicBlock *Preheader = L->getLoopPreheader();
389	if (!Preheader) break;
390
391	// Ok, move up a level.
392	Builder.SetInsertPoint(Preheader->getTerminator());
393	}
394
395	// Emit a GEP.
396	return Builder.CreatePtrAdd(Ptr: V, Offset: Idx, Name: "scevgep");
397	}
398
399	/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
400	/// SCEV expansion. If they are nested, this is the most nested. If they are
401	/// neighboring, pick the later.
402	static const Loop PickMostRelevantLoop(const* Loop A, const* Loop *B,
403	DominatorTree &DT) {
404	if (!A) return B;
405	if (!B) return A;
406	if (A->contains(L: B)) return B;
407	if (B->contains(L: A)) return A;
408	if (DT.dominates(A: A->getHeader(), B: B->getHeader())) return B;
409	if (DT.dominates(A: B->getHeader(), B: A->getHeader())) return A;
410	return A; // Arbitrarily break the tie.
411	}
412
413	/// getRelevantLoop - Get the most relevant loop associated with the given
414	/// expression, according to PickMostRelevantLoop.
415	const Loop SCEVExpander::getRelevantLoop(const* SCEV *S) {
416	// Test whether we've already computed the most relevant loop for this SCEV.
417	auto Pair = RelevantLoops.insert(KV: std::make_pair(x&: S, y: nullptr));
418	if (!Pair.second)
419	return Pair.first ->second;
420
421	switch (S->getSCEVType()) {
422	case scConstant:
423	case scVScale:
424	return nullptr; // A constant has no relevant loops.
425	case scTruncate:
426	case scZeroExtend:
427	case scSignExtend:
428	case scPtrToInt:
429	case scAddExpr:
430	case scMulExpr:
431	case scUDivExpr:
432	case scAddRecExpr:
433	case scUMaxExpr:
434	case scSMaxExpr:
435	case scUMinExpr:
436	case scSMinExpr:
437	case scSequentialUMinExpr: {
438	const Loop L = nullptr*;
439	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S))
440	L = AR->getLoop();
441	for (const SCEV *Op : S->operands())
442	L = PickMostRelevantLoop(A: L, B: getRelevantLoop(S: Op), DT&: SE.DT);
443	return RelevantLoops [S] = L;
444	}
445	case scUnknown: {
446	const SCEVUnknown *U = cast<SCEVUnknown>(Val: S);
447	if (const Instruction *I = dyn_cast<Instruction>(Val: U->getValue()))
448	return Pair.first ->second = SE.LI.getLoopFor(BB: I->getParent());
449	// A non-instruction has no relevant loops.
450	return nullptr;
451	}
452	case scCouldNotCompute:
453	llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
454	}
455	llvm_unreachable("Unexpected SCEV type!");
456	}
457
458	namespace {
459
460	/// LoopCompare - Compare loops by PickMostRelevantLoop.
461	class LoopCompare {
462	DominatorTree &DT;
463	public:
464	explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
465
466	bool operator()(std::pair<const Loop , const* SCEV *> LHS,
467	std::pair<const Loop , const* SCEV > RHS) const* {
468	// Keep pointer operands sorted at the end.
469	if (LHS.second->getType()->isPointerTy() !=
470	RHS.second->getType()->isPointerTy())
471	return LHS.second->getType()->isPointerTy();
472
473	// Compare loops with PickMostRelevantLoop.
474	if (LHS.first != RHS.first)
475	return PickMostRelevantLoop(A: LHS.first, B: RHS.first, DT) != LHS.first;
476
477	// If one operand is a non-constant negative and the other is not,
478	// put the non-constant negative on the right so that a sub can
479	// be used instead of a negate and add.
480	if (LHS.second->isNonConstantNegative()) {
481	if (!RHS.second->isNonConstantNegative())
482	return false;
483	} else if (RHS.second->isNonConstantNegative())
484	return true;
485
486	// Otherwise they are equivalent according to this comparison.
487	return false;
488	}
489	};
490
491	}
492
493	Value SCEVExpander::visitAddExpr(const* SCEVAddExpr *S) {
494	// Collect all the add operands in a loop, along with their associated loops.
495	// Iterate in reverse so that constants are emitted last, all else equal, and
496	// so that pointer operands are inserted first, which the code below relies on
497	// to form more involved GEPs.
498	SmallVector<std::pair<const Loop , const* SCEV *>, `8`> OpsAndLoops;
499	for (const SCEV *Op : reverse(C: S->operands()))
500	OpsAndLoops.push_back(Elt: std::make_pair(x: getRelevantLoop(S: Op), y&: Op));
501
502	// Sort by loop. Use a stable sort so that constants follow non-constants and
503	// pointer operands precede non-pointer operands.
504	llvm::stable_sort(Range&: OpsAndLoops, C: LoopCompare (SE.DT));
505
506	// Emit instructions to add all the operands. Hoist as much as possible
507	// out of loops, and form meaningful getelementptrs where possible.
508	Value Sum = nullptr*;
509	for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) {
510	const Loop *CurLoop = I->first;
511	const SCEV *Op = I->second;
512	if (!Sum) {
513	// This is the first operand. Just expand it.
514	Sum = expand(S: Op);
515	++I;
516	continue;
517	}
518
519	assert(!Op->getType()->isPointerTy() && "Only first op can be pointer");
520	if (isa<PointerType>(Val: Sum->getType())) {
521	// The running sum expression is a pointer. Try to form a getelementptr
522	// at this level with that as the base.
523	SmallVector<const SCEV *, `4`> NewOps;
524	for (; I != E && I->first == CurLoop; ++I) {
525	// If the operand is SCEVUnknown and not instructions, peek through
526	// it, to enable more of it to be folded into the GEP.
527	const SCEV *X = I->second;
528	if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Val: X))
529	if (!isa<Instruction>(Val: U->getValue()))
530	X = SE.getSCEV(V: U->getValue());
531	NewOps.push_back(Elt: X);
532	}
533	Sum = expandAddToGEP(Offset: SE.getAddExpr(Ops&: NewOps), V: Sum);
534	} else if (Op->isNonConstantNegative()) {
535	// Instead of doing a negate and add, just do a subtract.
536	Value *W = expand(S: SE.getNegativeSCEV(V: Op));
537	Sum = InsertBinop(Opcode: Instruction::Sub, LHS: Sum, RHS: W, Flags: SCEV::FlagAnyWrap,
538	/IsSafeToHoist/ true);
539	++I;
540	} else {
541	// A simple add.
542	Value *W = expand(S: Op);
543	// Canonicalize a constant to the RHS.
544	if (isa<Constant>(Val: Sum))
545	std::swap(a&: Sum, b&: W);
546	Sum = InsertBinop(Opcode: Instruction::Add, LHS: Sum, RHS: W, Flags: S->getNoWrapFlags(),
547	/IsSafeToHoist/ true);
548	++I;
549	}
550	}
551
552	return Sum;
553	}
554
555	Value SCEVExpander::visitMulExpr(const* SCEVMulExpr *S) {
556	Type *Ty = S->getType();
557
558	// Collect all the mul operands in a loop, along with their associated loops.
559	// Iterate in reverse so that constants are emitted last, all else equal.
560	SmallVector<std::pair<const Loop , const* SCEV *>, `8`> OpsAndLoops;
561	for (const SCEV *Op : reverse(C: S->operands()))
562	OpsAndLoops.push_back(Elt: std::make_pair(x: getRelevantLoop(S: Op), y&: Op));
563
564	// Sort by loop. Use a stable sort so that constants follow non-constants.
565	llvm::stable_sort(Range&: OpsAndLoops, C: LoopCompare (SE.DT));
566
567	// Emit instructions to mul all the operands. Hoist as much as possible
568	// out of loops.
569	Value Prod = nullptr*;
570	auto I = OpsAndLoops.begin();
571
572	// Expand the calculation of X pow N in the following manner:
573	// Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
574	// X pow N = (X pow P1) (X pow P2) * ... * (X pow PK).*
575	const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops]() {
576	auto E = I;
577	// Calculate how many times the same operand from the same loop is included
578	// into this power.
579	uint64_t Exponent = `0`;
580	const uint64_t MaxExponent = UINT64_MAX >> `1`;
581	// No one sane will ever try to calculate such huge exponents, but if we
582	// need this, we stop on UINT64_MAX / 2 because we need to exit the loop
583	// below when the power of 2 exceeds our Exponent, and we want it to be
584	// 1u << 31 at most to not deal with unsigned overflow.
585	while (E != OpsAndLoops.end() && I == E && Exponent != MaxExponent) {
586	++Exponent;
587	++E;
588	}
589	assert(Exponent > `0` && "Trying to calculate a zeroth exponent of operand?");
590
591	// Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
592	// that are needed into the result.
593	Value *P = expand(S: I->second);
594	Value Result = nullptr*;
595	if (Exponent & `1`)
596	Result = P;
597	for (uint64_t BinExp = `2`; BinExp <= Exponent; BinExp <<= `1`) {
598	P = InsertBinop(Opcode: Instruction::Mul, LHS: P, RHS: P, Flags: SCEV::FlagAnyWrap,
599	/IsSafeToHoist/ true);
600	if (Exponent & BinExp)
601	Result = Result ? InsertBinop(Opcode: Instruction::Mul, LHS: Result, RHS: P,
602	Flags: SCEV::FlagAnyWrap,
603	/IsSafeToHoist/ true)
604	: P;
605	}
606
607	I = E;
608	assert(Result && "Nothing was expanded?");
609	return Result;
610	};
611
612	while (I != OpsAndLoops.end()) {
613	if (!Prod) {
614	// This is the first operand. Just expand it.
615	Prod = ExpandOpBinPowN ();
616	} else if (I->second->isAllOnesValue()) {
617	// Instead of doing a multiply by negative one, just do a negate.
618	Prod = InsertBinop(Opcode: Instruction::Sub, LHS: Constant::getNullValue(Ty), RHS: Prod,
619	Flags: SCEV::FlagAnyWrap, /IsSafeToHoist/ true);
620	++I;
621	} else {
622	// A simple mul.
623	Value *W = ExpandOpBinPowN ();
624	// Canonicalize a constant to the RHS.
625	if (isa<Constant>(Val: Prod)) std::swap(a&: Prod, b&: W);
626	const APInt *RHS;
627	if (match(V: W, P: m_Power2(V&: RHS))) {
628	// Canonicalize Prod(1<<C) to Prod<<C.*
629	assert(!Ty->isVectorTy() && "vector types are not SCEVable");
630	auto NWFlags = S->getNoWrapFlags();
631	// clear nsw flag if shl will produce poison value.
632	if (RHS->logBase2() == RHS->getBitWidth() - `1`)
633	NWFlags = ScalarEvolution::clearFlags(Flags: NWFlags, OffFlags: SCEV::FlagNSW);
634	Prod = InsertBinop(Opcode: Instruction::Shl, LHS: Prod,
635	RHS: ConstantInt::get(Ty, V: RHS->logBase2()), Flags: NWFlags,
636	/IsSafeToHoist/ true);
637	} else {
638	Prod = InsertBinop(Opcode: Instruction::Mul, LHS: Prod, RHS: W, Flags: S->getNoWrapFlags(),
639	/IsSafeToHoist/ true);
640	}
641	}
642	}
643
644	return Prod;
645	}
646
647	Value SCEVExpander::visitUDivExpr(const* SCEVUDivExpr *S) {
648	Value *LHS = expand(S: S->getLHS());
649	if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Val: S->getRHS())) {
650	const APInt &RHS = SC->getAPInt();
651	if (RHS.isPowerOf2())
652	return InsertBinop(Opcode: Instruction::LShr, LHS,
653	RHS: ConstantInt::get(Ty: SC->getType(), V: RHS.logBase2()),
654	Flags: SCEV::FlagAnyWrap, /IsSafeToHoist/ true);
655	}
656
657	Value *RHS = expand(S: S->getRHS());
658	return InsertBinop(Opcode: Instruction::UDiv, LHS, RHS, Flags: SCEV::FlagAnyWrap,
659	/IsSafeToHoist/ SE.isKnownNonZero(S: S->getRHS()));
660	}
661
662	/// Determine if this is a well-behaved chain of instructions leading back to
663	/// the PHI. If so, it may be reused by expanded expressions.
664	bool SCEVExpander::isNormalAddRecExprPHI(PHINode PN, Instruction IncV,
665	const Loop *L) {
666	if (IncV->getNumOperands() == `0` \|\| isa<PHINode>(Val: IncV) \|\|
667	(isa<CastInst>(Val: IncV) && !isa<BitCastInst>(Val: IncV)))
668	return false;
669	// If any of the operands don't dominate the insert position, bail.
670	// Addrec operands are always loop-invariant, so this can only happen
671	// if there are instructions which haven't been hoisted.
672	if (L == IVIncInsertLoop) {
673	for (Use &Op : llvm::drop_begin(RangeOrContainer: IncV->operands()))
674	if (Instruction *OInst = dyn_cast<Instruction>(Val&: Op))
675	if (!SE.DT.dominates(Def: OInst, User: IVIncInsertPos))
676	return false;
677	}
678	// Advance to the next instruction.
679	IncV = dyn_cast<Instruction>(Val: IncV->getOperand(i: `0`));
680	if (!IncV)
681	return false;
682
683	if (IncV->mayHaveSideEffects())
684	return false;
685
686	if (IncV == PN)
687	return true;
688
689	return isNormalAddRecExprPHI(PN, IncV, L);
690	}
691
692	/// getIVIncOperand returns an induction variable increment's induction
693	/// variable operand.
694	///
695	/// If allowScale is set, any type of GEP is allowed as long as the nonIV
696	/// operands dominate InsertPos.
697	///
698	/// If allowScale is not set, ensure that a GEP increment conforms to one of the
699	/// simple patterns generated by getAddRecExprPHILiterally and
700	/// expandAddtoGEP. If the pattern isn't recognized, return NULL.
701	Instruction SCEVExpander::getIVIncOperand(Instruction IncV,
702	Instruction *InsertPos,
703	bool allowScale) {
704	if (IncV == InsertPos)
705	return nullptr;
706
707	switch (IncV->getOpcode()) {
708	default:
709	return nullptr;
710	// Check for a simple Add/Sub or GEP of a loop invariant step.
711	case Instruction::Add:
712	case Instruction::Sub: {
713	Instruction *OInst = dyn_cast<Instruction>(Val: IncV->getOperand(i: `1`));
714	if (!OInst \|\| SE.DT.dominates(Def: OInst, User: InsertPos))
715	return dyn_cast<Instruction>(Val: IncV->getOperand(i: `0`));
716	return nullptr;
717	}
718	case Instruction::BitCast:
719	return dyn_cast<Instruction>(Val: IncV->getOperand(i: `0`));
720	case Instruction::GetElementPtr:
721	for (Use &U : llvm::drop_begin(RangeOrContainer: IncV->operands())) {
722	if (isa<Constant>(Val: U))
723	continue;
724	if (Instruction *OInst = dyn_cast<Instruction>(Val&: U)) {
725	if (!SE.DT.dominates(Def: OInst, User: InsertPos))
726	return nullptr;
727	}
728	if (allowScale) {
729	// allow any kind of GEP as long as it can be hoisted.
730	continue;
731	}
732	// GEPs produced by SCEVExpander use i8 element type.
733	if (!cast<GEPOperator>(Val: IncV)->getSourceElementType()->isIntegerTy(Bitwidth: `8`))
734	return nullptr;
735	break;
736	}
737	return dyn_cast<Instruction>(Val: IncV->getOperand(i: `0`));
738	}
739	}
740
741	/// If the insert point of the current builder or any of the builders on the
742	/// stack of saved builders has 'I' as its insert point, update it to point to
743	/// the instruction after 'I'. This is intended to be used when the instruction
744	/// 'I' is being moved. If this fixup is not done and 'I' is moved to a
745	/// different block, the inconsistent insert point (with a mismatched
746	/// Instruction and Block) can lead to an instruction being inserted in a block
747	/// other than its parent.
748	void SCEVExpander::fixupInsertPoints(Instruction *I) {
749	BasicBlock::iterator It(*I);
750	BasicBlock::iterator NewInsertPt = std::next(x: It);
751	if (Builder.GetInsertPoint() == It)
752	Builder.SetInsertPoint(&*NewInsertPt);
753	for (auto *InsertPtGuard : InsertPointGuards)
754	if (InsertPtGuard->GetInsertPoint() == It)
755	InsertPtGuard->SetInsertPoint(NewInsertPt);
756	}
757
758	/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
759	/// it available to other uses in this loop. Recursively hoist any operands,
760	/// until we reach a value that dominates InsertPos.
761	bool SCEVExpander::hoistIVInc(Instruction IncV, Instruction InsertPos,
762	bool RecomputePoisonFlags) {
763	auto FixupPoisonFlags = [this](Instruction *I) {
764	// Drop flags that are potentially inferred from old context and infer flags
765	// in new context.
766	rememberFlags(I);
767	I->dropPoisonGeneratingFlags();
768	if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: I))
769	if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
770	auto *BO = cast<BinaryOperator>(Val: I);
771	BO->setHasNoUnsignedWrap(
772	ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNUW) == SCEV::FlagNUW);
773	BO->setHasNoSignedWrap(
774	ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNSW) == SCEV::FlagNSW);
775	}
776	};
777
778	if (SE.DT.dominates(Def: IncV, User: InsertPos)) {
779	if (RecomputePoisonFlags)
780	FixupPoisonFlags (IncV);
781	return true;
782	}
783
784	// InsertPos must itself dominate IncV so that IncV's new position satisfies
785	// its existing users.
786	if (isa<PHINode>(Val: InsertPos) \|\|
787	!SE.DT.dominates(A: InsertPos->getParent(), B: IncV->getParent()))
788	return false;
789
790	if (!SE.LI.movementPreservesLCSSAForm(Inst: IncV, NewLoc: InsertPos))
791	return false;
792
793	// Check that the chain of IV operands leading back to Phi can be hoisted.
794	SmallVector<Instruction*, `4`> IVIncs;
795	for(;;) {
796	Instruction Oper = getIVIncOperand(IncV, InsertPos, /allowScale/*true);
797	if (!Oper)
798	return false;
799	// IncV is safe to hoist.
800	IVIncs.push_back(Elt: IncV);
801	IncV = Oper;
802	if (SE.DT.dominates(Def: IncV, User: InsertPos))
803	break;
804	}
805	for (Instruction *I : llvm::reverse(C&: IVIncs)) {
806	fixupInsertPoints(I);
807	I->moveBefore(MovePos: InsertPos);
808	if (RecomputePoisonFlags)
809	FixupPoisonFlags (I);
810	}
811	return true;
812	}
813
814	bool SCEVExpander::canReuseFlagsFromOriginalIVInc(PHINode *OrigPhi,
815	PHINode *WidePhi,
816	Instruction *OrigInc,
817	Instruction *WideInc) {
818	return match(V: OrigInc, P: m_c_BinOp(L: m_Specific(V: OrigPhi), R: m_Value())) &&
819	match(V: WideInc, P: m_c_BinOp(L: m_Specific(V: WidePhi), R: m_Value())) &&
820	OrigInc->getOpcode() == WideInc->getOpcode();
821	}
822
823	/// Determine if this cyclic phi is in a form that would have been generated by
824	/// LSR. We don't care if the phi was actually expanded in this pass, as long
825	/// as it is in a low-cost form, for example, no implied multiplication. This
826	/// should match any patterns generated by getAddRecExprPHILiterally and
827	/// expandAddtoGEP.
828	bool SCEVExpander::isExpandedAddRecExprPHI(PHINode PN, Instruction IncV,
829	const Loop *L) {
830	for(Instruction *IVOper = IncV;
831	(IVOper = getIVIncOperand(IncV: IVOper, InsertPos: L->getLoopPreheader()->getTerminator(),
832	/allowScale=/false));) {
833	if (IVOper == PN)
834	return true;
835	}
836	return false;
837	}
838
839	/// expandIVInc - Expand an IV increment at Builder's current InsertPos.
840	/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
841	/// need to materialize IV increments elsewhere to handle difficult situations.
842	Value SCEVExpander::expandIVInc(PHINode PN, Value StepV, const* Loop *L,
843	bool useSubtract) {
844	Value *IncV;
845	// If the PHI is a pointer, use a GEP, otherwise use an add or sub.
846	if (PN->getType()->isPointerTy()) {
847	// TODO: Change name to IVName.iv.next.
848	IncV = Builder.CreatePtrAdd(Ptr: PN, Offset: StepV, Name: "scevgep");
849	} else {
850	IncV = useSubtract ?
851	Builder.CreateSub(LHS: PN, RHS: StepV, Name: Twine (IVName) + ".iv.next") :
852	Builder.CreateAdd(LHS: PN, RHS: StepV, Name: Twine (IVName) + ".iv.next");
853	}
854	return IncV;
855	}
856
857	/// Check whether we can cheaply express the requested SCEV in terms of
858	/// the available PHI SCEV by truncation and/or inversion of the step.
859	static bool canBeCheaplyTransformed(ScalarEvolution &SE,
860	const SCEVAddRecExpr *Phi,
861	const SCEVAddRecExpr *Requested,
862	bool &InvertStep) {
863	// We can't transform to match a pointer PHI.
864	Type *PhiTy = Phi->getType();
865	Type *RequestedTy = Requested->getType();
866	if (PhiTy->isPointerTy() \|\| RequestedTy->isPointerTy())
867	return false;
868
869	if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
870	return false;
871
872	// Try truncate it if necessary.
873	Phi = dyn_cast<SCEVAddRecExpr>(Val: SE.getTruncateOrNoop(V: Phi, Ty: RequestedTy));
874	if (!Phi)
875	return false;
876
877	// Check whether truncation will help.
878	if (Phi == Requested) {
879	InvertStep = false;
880	return true;
881	}
882
883	// Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
884	if (SE.getMinusSCEV(LHS: Requested->getStart(), RHS: Requested) == Phi) {
885	InvertStep = true;
886	return true;
887	}
888
889	return false;
890	}
891
892	static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
893	if (!isa<IntegerType>(Val: AR->getType()))
894	return false;
895
896	unsigned BitWidth = cast<IntegerType>(Val: AR->getType())->getBitWidth();
897	Type WideTy = IntegerType::get(C&: AR->getType()->getContext(), NumBits: BitWidth `2`);
898	const SCEV *Step = AR->getStepRecurrence(SE);
899	const SCEV *OpAfterExtend = SE.getAddExpr(LHS: SE.getSignExtendExpr(Op: Step, Ty: WideTy),
900	RHS: SE.getSignExtendExpr(Op: AR, Ty: WideTy));
901	const SCEV *ExtendAfterOp =
902	SE.getSignExtendExpr(Op: SE.getAddExpr(LHS: AR, RHS: Step), Ty: WideTy);
903	return ExtendAfterOp == OpAfterExtend;
904	}
905
906	static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
907	if (!isa<IntegerType>(Val: AR->getType()))
908	return false;
909
910	unsigned BitWidth = cast<IntegerType>(Val: AR->getType())->getBitWidth();
911	Type WideTy = IntegerType::get(C&: AR->getType()->getContext(), NumBits: BitWidth `2`);
912	const SCEV *Step = AR->getStepRecurrence(SE);
913	const SCEV *OpAfterExtend = SE.getAddExpr(LHS: SE.getZeroExtendExpr(Op: Step, Ty: WideTy),
914	RHS: SE.getZeroExtendExpr(Op: AR, Ty: WideTy));
915	const SCEV *ExtendAfterOp =
916	SE.getZeroExtendExpr(Op: SE.getAddExpr(LHS: AR, RHS: Step), Ty: WideTy);
917	return ExtendAfterOp == OpAfterExtend;
918	}
919
920	/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
921	/// the base addrec, which is the addrec without any non-loop-dominating
922	/// values, and return the PHI.
923	PHINode *
924	SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
925	const Loop L, Type &TruncTy,
926	bool &InvertStep) {
927	assert((!IVIncInsertLoop \|\| IVIncInsertPos) &&
928	"Uninitialized insert position");
929
930	// Reuse a previously-inserted PHI, if present.
931	BasicBlock *LatchBlock = L->getLoopLatch();
932	if (LatchBlock) {
933	PHINode AddRecPhiMatch = nullptr*;
934	Instruction IncV = nullptr*;
935	TruncTy = nullptr;
936	InvertStep = false;
937
938	// Only try partially matching scevs that need truncation and/or
939	// step-inversion if we know this loop is outside the current loop.
940	bool TryNonMatchingSCEV =
941	IVIncInsertLoop &&
942	SE.DT.properlyDominates(A: LatchBlock, B: IVIncInsertLoop->getHeader());
943
944	for (PHINode &PN : L->getHeader()->phis()) {
945	if (!SE.isSCEVable(Ty: PN.getType()))
946	continue;
947
948	// We should not look for a incomplete PHI. Getting SCEV for a incomplete
949	// PHI has no meaning at all.
950	if (!PN.isComplete()) {
951	SCEV_DEBUG_WITH_TYPE(
952	DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n");
953	continue;
954	}
955
956	const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(Val: SE.getSCEV(V: &PN));
957	if (!PhiSCEV)
958	continue;
959
960	bool IsMatchingSCEV = PhiSCEV == Normalized;
961	// We only handle truncation and inversion of phi recurrences for the
962	// expanded expression if the expanded expression's loop dominates the
963	// loop we insert to. Check now, so we can bail out early.
964	if (!IsMatchingSCEV && !TryNonMatchingSCEV)
965	continue;
966
967	// TODO: this possibly can be reworked to avoid this cast at all.
968	Instruction *TempIncV =
969	dyn_cast<Instruction>(Val: PN.getIncomingValueForBlock(BB: LatchBlock));
970	if (!TempIncV)
971	continue;
972
973	// Check whether we can reuse this PHI node.
974	if (LSRMode) {
975	if (!isExpandedAddRecExprPHI(PN: &PN, IncV: TempIncV, L))
976	continue;
977	} else {
978	if (!isNormalAddRecExprPHI(PN: &PN, IncV: TempIncV, L))
979	continue;
980	}
981
982	// Stop if we have found an exact match SCEV.
983	if (IsMatchingSCEV) {
984	IncV = TempIncV;
985	TruncTy = nullptr;
986	InvertStep = false;
987	AddRecPhiMatch = &PN;
988	break;
989	}
990
991	// Try whether the phi can be translated into the requested form
992	// (truncated and/or offset by a constant).
993	if ((!TruncTy \|\| InvertStep) &&
994	canBeCheaplyTransformed(SE, Phi: PhiSCEV, Requested: Normalized, InvertStep)) {
995	// Record the phi node. But don't stop we might find an exact match
996	// later.
997	AddRecPhiMatch = &PN;
998	IncV = TempIncV;
999	TruncTy = Normalized->getType();
1000	}
1001	}
1002
1003	if (AddRecPhiMatch) {
1004	// Ok, the add recurrence looks usable.
1005	// Remember this PHI, even in post-inc mode.
1006	InsertedValues.insert(V: AddRecPhiMatch);
1007	// Remember the increment.
1008	rememberInstruction(I: IncV);
1009	// Those values were not actually inserted but re-used.
1010	ReusedValues.insert(Ptr: AddRecPhiMatch);
1011	ReusedValues.insert(Ptr: IncV);
1012	return AddRecPhiMatch;
1013	}
1014	}
1015
1016	// Save the original insertion point so we can restore it when we're done.
1017	SCEVInsertPointGuard Guard(Builder, this);
1018
1019	// Another AddRec may need to be recursively expanded below. For example, if
1020	// this AddRec is quadratic, the StepV may itself be an AddRec in this
1021	// loop. Remove this loop from the PostIncLoops set before expanding such
1022	// AddRecs. Otherwise, we cannot find a valid position for the step
1023	// (i.e. StepV can never dominate its loop header). Ideally, we could do
1024	// SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
1025	// so it's not worth implementing SmallPtrSet::swap.
1026	PostIncLoopSet SavedPostIncLoops = PostIncLoops;
1027	PostIncLoops.clear();
1028
1029	// Expand code for the start value into the loop preheader.
1030	assert(L->getLoopPreheader() &&
1031	"Can't expand add recurrences without a loop preheader!");
1032	Value *StartV =
1033	expand(S: Normalized->getStart(), I: L->getLoopPreheader()->getTerminator());
1034
1035	// StartV must have been be inserted into L's preheader to dominate the new
1036	// phi.
1037	assert(!isa<Instruction>(StartV) \|\|
1038	SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
1039	L->getHeader()));
1040
1041	// Expand code for the step value. Do this before creating the PHI so that PHI
1042	// reuse code doesn't see an incomplete PHI.
1043	const SCEV *Step = Normalized->getStepRecurrence(SE);
1044	Type *ExpandTy = Normalized->getType();
1045	// If the stride is negative, insert a sub instead of an add for the increment
1046	// (unless it's a constant, because subtracts of constants are canonicalized
1047	// to adds).
1048	bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
1049	if (useSubtract)
1050	Step = SE.getNegativeSCEV(V: Step);
1051	// Expand the step somewhere that dominates the loop header.
1052	Value *StepV = expand(S: Step, I: L->getHeader()->getFirstInsertionPt());
1053
1054	// The no-wrap behavior proved by IsIncrement(NUW\|NSW) is only applicable if
1055	// we actually do emit an addition. It does not apply if we emit a
1056	// subtraction.
1057	bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, AR: Normalized);
1058	bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, AR: Normalized);
1059
1060	// Create the PHI.
1061	BasicBlock *Header = L->getHeader();
1062	Builder.SetInsertPoint(TheBB: Header, IP: Header->begin());
1063	PHINode *PN =
1064	Builder.CreatePHI(Ty: ExpandTy, NumReservedValues: pred_size(BB: Header), Name: Twine (IVName) + ".iv");
1065
1066	// Create the step instructions and populate the PHI.
1067	for (BasicBlock *Pred : predecessors(BB: Header)) {
1068	// Add a start value.
1069	if (!L->contains(BB: Pred)) {
1070	PN->addIncoming(V: StartV, BB: Pred);
1071	continue;
1072	}
1073
1074	// Create a step value and add it to the PHI.
1075	// If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
1076	// instructions at IVIncInsertPos.
1077	Instruction *InsertPos = L == IVIncInsertLoop ?
1078	IVIncInsertPos : Pred->getTerminator();
1079	Builder.SetInsertPoint(InsertPos);
1080	Value *IncV = expandIVInc(PN, StepV, L, useSubtract);
1081
1082	if (isa<OverflowingBinaryOperator>(Val: IncV)) {
1083	if (IncrementIsNUW)
1084	cast<BinaryOperator>(Val: IncV)->setHasNoUnsignedWrap();
1085	if (IncrementIsNSW)
1086	cast<BinaryOperator>(Val: IncV)->setHasNoSignedWrap();
1087	}
1088	PN->addIncoming(V: IncV, BB: Pred);
1089	}
1090
1091	// After expanding subexpressions, restore the PostIncLoops set so the caller
1092	// can ensure that IVIncrement dominates the current uses.
1093	PostIncLoops = SavedPostIncLoops;
1094
1095	// Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most
1096	// effective when we are able to use an IV inserted here, so record it.
1097	InsertedValues.insert(V: PN);
1098	InsertedIVs.push_back(Elt: PN);
1099	return PN;
1100	}
1101
1102	Value SCEVExpander::expandAddRecExprLiterally(const* SCEVAddRecExpr *S) {
1103	const Loop *L = S->getLoop();
1104
1105	// Determine a normalized form of this expression, which is the expression
1106	// before any post-inc adjustment is made.
1107	const SCEVAddRecExpr *Normalized = S;
1108	if (PostIncLoops.count(Ptr: L)) {
1109	PostIncLoopSet Loops;
1110	Loops.insert(Ptr: L);
1111	Normalized = cast<SCEVAddRecExpr>(
1112	Val: normalizeForPostIncUse(S, Loops, SE, /CheckInvertible=/false));
1113	}
1114
1115	[[maybe_unused]] const SCEV *Start = Normalized->getStart();
1116	const SCEV *Step = Normalized->getStepRecurrence(SE);
1117	assert(SE.properlyDominates(Start, L->getHeader()) &&
1118	"Start does not properly dominate loop header");
1119	assert(SE.dominates(Step, L->getHeader()) && "Step not dominate loop header");
1120
1121	// In some cases, we decide to reuse an existing phi node but need to truncate
1122	// it and/or invert the step.
1123	Type TruncTy = nullptr*;
1124	bool InvertStep = false;
1125	PHINode *PN = getAddRecExprPHILiterally(Normalized, L, TruncTy, InvertStep);
1126
1127	// Accommodate post-inc mode, if necessary.
1128	Value *Result;
1129	if (!PostIncLoops.count(Ptr: L))
1130	Result = PN;
1131	else {
1132	// In PostInc mode, use the post-incremented value.
1133	BasicBlock *LatchBlock = L->getLoopLatch();
1134	assert(LatchBlock && "PostInc mode requires a unique loop latch!");
1135	Result = PN->getIncomingValueForBlock(BB: LatchBlock);
1136
1137	// We might be introducing a new use of the post-inc IV that is not poison
1138	// safe, in which case we should drop poison generating flags. Only keep
1139	// those flags for which SCEV has proven that they always hold.
1140	if (isa<OverflowingBinaryOperator>(Val: Result)) {
1141	auto *I = cast<Instruction>(Val: Result);
1142	if (!S->hasNoUnsignedWrap())
1143	I->setHasNoUnsignedWrap(false);
1144	if (!S->hasNoSignedWrap())
1145	I->setHasNoSignedWrap(false);
1146	}
1147
1148	// For an expansion to use the postinc form, the client must call
1149	// expandCodeFor with an InsertPoint that is either outside the PostIncLoop
1150	// or dominated by IVIncInsertPos.
1151	if (isa<Instruction>(Val: Result) &&
1152	!SE.DT.dominates(Def: cast<Instruction>(Val: Result),
1153	User: &*Builder.GetInsertPoint())) {
1154	// The induction variable's postinc expansion does not dominate this use.
1155	// IVUsers tries to prevent this case, so it is rare. However, it can
1156	// happen when an IVUser outside the loop is not dominated by the latch
1157	// block. Adjusting IVIncInsertPos before expansion begins cannot handle
1158	// all cases. Consider a phi outside whose operand is replaced during
1159	// expansion with the value of the postinc user. Without fundamentally
1160	// changing the way postinc users are tracked, the only remedy is
1161	// inserting an extra IV increment. StepV might fold into PostLoopOffset,
1162	// but hopefully expandCodeFor handles that.
1163	bool useSubtract =
1164	!S->getType()->isPointerTy() && Step->isNonConstantNegative();
1165	if (useSubtract)
1166	Step = SE.getNegativeSCEV(V: Step);
1167	Value *StepV;
1168	{
1169	// Expand the step somewhere that dominates the loop header.
1170	SCEVInsertPointGuard Guard(Builder, this);
1171	StepV = expand(S: Step, I: L->getHeader()->getFirstInsertionPt());
1172	}
1173	Result = expandIVInc(PN, StepV, L, useSubtract);
1174	}
1175	}
1176
1177	// We have decided to reuse an induction variable of a dominating loop. Apply
1178	// truncation and/or inversion of the step.
1179	if (TruncTy) {
1180	// Truncate the result.
1181	if (TruncTy != Result->getType())
1182	Result = Builder.CreateTrunc(V: Result, DestTy: TruncTy);
1183
1184	// Invert the result.
1185	if (InvertStep)
1186	Result = Builder.CreateSub(LHS: expand(S: Normalized->getStart()), RHS: Result);
1187	}
1188
1189	return Result;
1190	}
1191
1192	Value SCEVExpander::visitAddRecExpr(const* SCEVAddRecExpr *S) {
1193	// In canonical mode we compute the addrec as an expression of a canonical IV
1194	// using evaluateAtIteration and expand the resulting SCEV expression. This
1195	// way we avoid introducing new IVs to carry on the computation of the addrec
1196	// throughout the loop.
1197	//
1198	// For nested addrecs evaluateAtIteration might need a canonical IV of a
1199	// type wider than the addrec itself. Emitting a canonical IV of the
1200	// proper type might produce non-legal types, for example expanding an i64
1201	// {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall
1202	// back to non-canonical mode for nested addrecs.
1203	if (!CanonicalMode \|\| (S->getNumOperands() > `2`))
1204	return expandAddRecExprLiterally(S);
1205
1206	Type *Ty = SE.getEffectiveSCEVType(Ty: S->getType());
1207	const Loop *L = S->getLoop();
1208
1209	// First check for an existing canonical IV in a suitable type.
1210	PHINode CanonicalIV = nullptr*;
1211	if (PHINode *PN = L->getCanonicalInductionVariable())
1212	if (SE.getTypeSizeInBits(Ty: PN->getType()) >= SE.getTypeSizeInBits(Ty))
1213	CanonicalIV = PN;
1214
1215	// Rewrite an AddRec in terms of the canonical induction variable, if
1216	// its type is more narrow.
1217	if (CanonicalIV &&
1218	SE.getTypeSizeInBits(Ty: CanonicalIV->getType()) > SE.getTypeSizeInBits(Ty) &&
1219	!S->getType()->isPointerTy()) {
1220	SmallVector<const SCEV *, `4`> NewOps(S->getNumOperands());
1221	for (unsigned i = `0`, e = S->getNumOperands(); i != e; ++i)
1222	NewOps [i] = SE.getAnyExtendExpr(Op: S->getOperand(i), Ty: CanonicalIV->getType());
1223	Value *V = expand(S: SE.getAddRecExpr(Operands&: NewOps, L: S->getLoop(),
1224	Flags: S->getNoWrapFlags(Mask: SCEV::FlagNW)));
1225	BasicBlock::iterator NewInsertPt =
1226	findInsertPointAfter(I: cast<Instruction>(Val: V), MustDominate: &*Builder.GetInsertPoint());
1227	V = expand(S: SE.getTruncateExpr(Op: SE.getUnknown(V), Ty), I: NewInsertPt);
1228	return V;
1229	}
1230
1231	// {X,+,F} --> X + {0,+,F}
1232	if (!S->getStart()->isZero()) {
1233	if (isa<PointerType>(Val: S->getType())) {
1234	Value *StartV = expand(S: SE.getPointerBase(V: S));
1235	return expandAddToGEP(Offset: SE.removePointerBase(S), V: StartV);
1236	}
1237
1238	SmallVector<const SCEV *, `4`> NewOps(S->operands());
1239	NewOps [`0`] = SE.getConstant(Ty, V: `0`);
1240	const SCEV *Rest = SE.getAddRecExpr(Operands&: NewOps, L,
1241	Flags: S->getNoWrapFlags(Mask: SCEV::FlagNW));
1242
1243	// Just do a normal add. Pre-expand the operands to suppress folding.
1244	//
1245	// The LHS and RHS values are factored out of the expand call to make the
1246	// output independent of the argument evaluation order.
1247	const SCEV *AddExprLHS = SE.getUnknown(V: expand(S: S->getStart()));
1248	const SCEV *AddExprRHS = SE.getUnknown(V: expand(S: Rest));
1249	return expand(S: SE.getAddExpr(LHS: AddExprLHS, RHS: AddExprRHS));
1250	}
1251
1252	// If we don't yet have a canonical IV, create one.
1253	if (!CanonicalIV) {
1254	// Create and insert the PHI node for the induction variable in the
1255	// specified loop.
1256	BasicBlock *Header = L->getHeader();
1257	pred_iterator HPB = pred_begin(BB: Header), HPE = pred_end(BB: Header);
1258	CanonicalIV = PHINode::Create(Ty, NumReservedValues: std::distance(first: HPB, last: HPE), NameStr: "indvar");
1259	CanonicalIV->insertBefore(InsertPos: Header->begin());
1260	rememberInstruction(I: CanonicalIV);
1261
1262	SmallSet<BasicBlock *, `4`> PredSeen;
1263	Constant *One = ConstantInt::get(Ty, V: `1`);
1264	for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
1265	BasicBlock HP = HPI;
1266	if (!PredSeen.insert(Ptr: HP).second) {
1267	// There must be an incoming value for each predecessor, even the
1268	// duplicates!
1269	CanonicalIV->addIncoming(V: CanonicalIV->getIncomingValueForBlock(BB: HP), BB: HP);
1270	continue;
1271	}
1272
1273	if (L->contains(BB: HP)) {
1274	// Insert a unit add instruction right before the terminator
1275	// corresponding to the back-edge.
1276	Instruction *Add = BinaryOperator::CreateAdd(V1: CanonicalIV, V2: One,
1277	Name: "indvar.next",
1278	It: HP->getTerminator()->getIterator());
1279	Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
1280	rememberInstruction(I: Add);
1281	CanonicalIV->addIncoming(V: Add, BB: HP);
1282	} else {
1283	CanonicalIV->addIncoming(V: Constant::getNullValue(Ty), BB: HP);
1284	}
1285	}
1286	}
1287
1288	// {0,+,1} --> Insert a canonical induction variable into the loop!
1289	if (S->isAffine() && S->getOperand(i: `1`)->isOne()) {
1290	assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
1291	"IVs with types different from the canonical IV should "
1292	"already have been handled!");
1293	return CanonicalIV;
1294	}
1295
1296	// {0,+,F} --> {0,+,1} F*
1297
1298	// If this is a simple linear addrec, emit it now as a special case.
1299	if (S->isAffine()) // {0,+,F} --> iF*
1300	return
1301	expand(S: SE.getTruncateOrNoop(
1302	V: SE.getMulExpr(LHS: SE.getUnknown(V: CanonicalIV),
1303	RHS: SE.getNoopOrAnyExtend(V: S->getOperand(i: `1`),
1304	Ty: CanonicalIV->getType())),
1305	Ty));
1306
1307	// If this is a chain of recurrences, turn it into a closed form, using the
1308	// folders, then expandCodeFor the closed form. This allows the folders to
1309	// simplify the expression without having to build a bunch of special code
1310	// into this folder.
1311	const SCEV IH = SE.getUnknown(V: CanonicalIV); // Get I as a "symbolic" SCEV.*
1312
1313	// Promote S up to the canonical IV type, if the cast is foldable.
1314	const SCEV *NewS = S;
1315	const SCEV *Ext = SE.getNoopOrAnyExtend(V: S, Ty: CanonicalIV->getType());
1316	if (isa<SCEVAddRecExpr>(Val: Ext))
1317	NewS = Ext;
1318
1319	const SCEV *V = cast<SCEVAddRecExpr>(Val: NewS)->evaluateAtIteration(It: IH, SE);
1320
1321	// Truncate the result down to the original type, if needed.
1322	const SCEV *T = SE.getTruncateOrNoop(V, Ty);
1323	return expand(S: T);
1324	}
1325
1326	Value SCEVExpander::visitPtrToIntExpr(const* SCEVPtrToIntExpr *S) {
1327	Value *V = expand(S: S->getOperand());
1328	return ReuseOrCreateCast(V, Ty: S->getType(), Op: CastInst::PtrToInt,
1329	IP: GetOptimalInsertionPointForCastOf(V));
1330	}
1331
1332	Value SCEVExpander::visitTruncateExpr(const* SCEVTruncateExpr *S) {
1333	Value *V = expand(S: S->getOperand());
1334	return Builder.CreateTrunc(V, DestTy: S->getType());
1335	}
1336
1337	Value SCEVExpander::visitZeroExtendExpr(const* SCEVZeroExtendExpr *S) {
1338	Value *V = expand(S: S->getOperand());
1339	return Builder.CreateZExt(V, DestTy: S->getType(), Name: "",
1340	IsNonNeg: SE.isKnownNonNegative(S: S->getOperand()));
1341	}
1342
1343	Value SCEVExpander::visitSignExtendExpr(const* SCEVSignExtendExpr *S) {
1344	Value *V = expand(S: S->getOperand());
1345	return Builder.CreateSExt(V, DestTy: S->getType());
1346	}
1347
1348	Value SCEVExpander::expandMinMaxExpr(const* SCEVNAryExpr *S,
1349	Intrinsic::ID IntrinID, Twine Name,
1350	bool IsSequential) {
1351	Value *LHS = expand(S: S->getOperand(i: S->getNumOperands() - `1`));
1352	Type *Ty = LHS->getType();
1353	if (IsSequential)
1354	LHS = Builder.CreateFreeze(V: LHS);
1355	for (int i = S->getNumOperands() - `2`; i >= `0`; --i) {
1356	Value *RHS = expand(S: S->getOperand(i));
1357	if (IsSequential && i != `0`)
1358	RHS = Builder.CreateFreeze(V: RHS);
1359	Value *Sel;
1360	if (Ty->isIntegerTy())
1361	Sel = Builder.CreateIntrinsic(ID: IntrinID, Types: {Ty}, Args: {LHS, RHS},
1362	/FMFSource=/nullptr, Name);
1363	else {
1364	Value *ICmp =
1365	Builder.CreateICmp(P: MinMaxIntrinsic::getPredicate(ID: IntrinID), LHS, RHS);
1366	Sel = Builder.CreateSelect(C: ICmp, True: LHS, False: RHS, Name);
1367	}
1368	LHS = Sel;
1369	}
1370	return LHS;
1371	}
1372
1373	Value SCEVExpander::visitSMaxExpr(const* SCEVSMaxExpr *S) {
1374	return expandMinMaxExpr(S, Intrinsic::IntrinID: smax, Name: "smax");
1375	}
1376
1377	Value SCEVExpander::visitUMaxExpr(const* SCEVUMaxExpr *S) {
1378	return expandMinMaxExpr(S, Intrinsic::IntrinID: umax, Name: "umax");
1379	}
1380
1381	Value SCEVExpander::visitSMinExpr(const* SCEVSMinExpr *S) {
1382	return expandMinMaxExpr(S, Intrinsic::IntrinID: smin, Name: "smin");
1383	}
1384
1385	Value SCEVExpander::visitUMinExpr(const* SCEVUMinExpr *S) {
1386	return expandMinMaxExpr(S, Intrinsic::IntrinID: umin, Name: "umin");
1387	}
1388
1389	Value SCEVExpander::visitSequentialUMinExpr(const* SCEVSequentialUMinExpr *S) {
1390	return expandMinMaxExpr(S, Intrinsic::IntrinID: umin, Name: "umin", /IsSequential/true);
1391	}
1392
1393	Value SCEVExpander::visitVScale(const* SCEVVScale *S) {
1394	return Builder.CreateVScale(Scaling: ConstantInt::get(Ty: S->getType(), V: `1`));
1395	}
1396
1397	Value SCEVExpander::expandCodeFor(const* SCEV SH, Type Ty,
1398	BasicBlock::iterator IP) {
1399	setInsertPoint(IP);
1400	Value *V = expandCodeFor(SH, Ty);
1401	return V;
1402	}
1403
1404	Value SCEVExpander::expandCodeFor(const* SCEV SH, Type Ty) {
1405	// Expand the code for this SCEV.
1406	Value *V = expand(S: SH);
1407
1408	if (Ty) {
1409	assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
1410	"non-trivial casts should be done with the SCEVs directly!");
1411	V = InsertNoopCastOfTo(V, Ty);
1412	}
1413	return V;
1414	}
1415
1416	Value *SCEVExpander::FindValueInExprValueMap(
1417	const SCEV S, const* Instruction *InsertPt,
1418	SmallVectorImpl<Instruction *> &DropPoisonGeneratingInsts) {
1419	// If the expansion is not in CanonicalMode, and the SCEV contains any
1420	// sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
1421	if (!CanonicalMode && SE.containsAddRecurrence(S))
1422	return nullptr;
1423
1424	// If S is a constant, it may be worse to reuse an existing Value.
1425	if (isa<SCEVConstant>(Val: S))
1426	return nullptr;
1427
1428	for (Value *V : SE.getSCEVValues(S)) {
1429	Instruction *EntInst = dyn_cast<Instruction>(Val: V);
1430	if (!EntInst)
1431	continue;
1432
1433	// Choose a Value from the set which dominates the InsertPt.
1434	// InsertPt should be inside the Value's parent loop so as not to break
1435	// the LCSSA form.
1436	assert(EntInst->getFunction() == InsertPt->getFunction());
1437	if (S->getType() != V->getType() \|\| !SE.DT.dominates(Def: EntInst, User: InsertPt) \|\|
1438	!(SE.LI.getLoopFor(BB: EntInst->getParent()) == nullptr \|\|
1439	SE.LI.getLoopFor(BB: EntInst->getParent())->contains(Inst: InsertPt)))
1440	continue;
1441
1442	// Make sure reusing the instruction is poison-safe.
1443	if (SE.canReuseInstruction(S, I: EntInst, DropPoisonGeneratingInsts))
1444	return V;
1445	DropPoisonGeneratingInsts.clear();
1446	}
1447	return nullptr;
1448	}
1449
1450	// The expansion of SCEV will either reuse a previous Value in ExprValueMap,
1451	// or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
1452	// and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
1453	// literally, to prevent LSR's transformed SCEV from being reverted. Otherwise,
1454	// the expansion will try to reuse Value from ExprValueMap, and only when it
1455	// fails, expand the SCEV literally.
1456	Value SCEVExpander::expand(const* SCEV *S) {
1457	// Compute an insertion point for this SCEV object. Hoist the instructions
1458	// as far out in the loop nest as possible.
1459	BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
1460
1461	// We can move insertion point only if there is no div or rem operations
1462	// otherwise we are risky to move it over the check for zero denominator.
1463	auto SafeToHoist = [](const SCEV *S) {
1464	return !SCEVExprContains(Root: S, Pred: [](const SCEV *S) {
1465	if (const auto *D = dyn_cast<SCEVUDivExpr>(Val: S)) {
1466	if (const auto *SC = dyn_cast<SCEVConstant>(Val: D->getRHS()))
1467	// Division by non-zero constants can be hoisted.
1468	return SC->getValue()->isZero();
1469	// All other divisions should not be moved as they may be
1470	// divisions by zero and should be kept within the
1471	// conditions of the surrounding loops that guard their
1472	// execution (see PR35406).
1473	return true;
1474	}
1475	return false;
1476	});
1477	};
1478	if (SafeToHoist (S)) {
1479	for (Loop *L = SE.LI.getLoopFor(BB: Builder.GetInsertBlock());;
1480	L = L->getParentLoop()) {
1481	if (SE.isLoopInvariant(S, L)) {
1482	if (!L) break;
1483	if (BasicBlock *Preheader = L->getLoopPreheader()) {
1484	InsertPt = Preheader->getTerminator()->getIterator();
1485	} else {
1486	// LSR sets the insertion point for AddRec start/step values to the
1487	// block start to simplify value reuse, even though it's an invalid
1488	// position. SCEVExpander must correct for this in all cases.
1489	InsertPt = L->getHeader()->getFirstInsertionPt();
1490	}
1491	} else {
1492	// If the SCEV is computable at this level, insert it into the header
1493	// after the PHIs (and after any other instructions that we've inserted
1494	// there) so that it is guaranteed to dominate any user inside the loop.
1495	if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(Ptr: L))
1496	InsertPt = L->getHeader()->getFirstInsertionPt();
1497
1498	while (InsertPt != Builder.GetInsertPoint() &&
1499	(isInsertedInstruction(I: &*InsertPt) \|\|
1500	isa<DbgInfoIntrinsic>(Val: &*InsertPt))) {
1501	InsertPt = std::next(x: InsertPt);
1502	}
1503	break;
1504	}
1505	}
1506	}
1507
1508	// Check to see if we already expanded this here.
1509	auto I = InsertedExpressions.find(Val: std::make_pair(x&: S, y: &*InsertPt));
1510	if (I != InsertedExpressions.end())
1511	return I ->second;
1512
1513	SCEVInsertPointGuard Guard(Builder, this);
1514	Builder.SetInsertPoint(TheBB: InsertPt ->getParent(), IP: InsertPt);
1515
1516	// Expand the expression into instructions.
1517	SmallVector<Instruction *> DropPoisonGeneratingInsts;
1518	Value V = FindValueInExprValueMap(S, InsertPt: &InsertPt, DropPoisonGeneratingInsts);
1519	if (!V) {
1520	V = visit(S);
1521	V = fixupLCSSAFormFor(V);
1522	} else {
1523	for (Instruction *I : DropPoisonGeneratingInsts) {
1524	rememberFlags(I);
1525	I->dropPoisonGeneratingAnnotations();
1526	// See if we can re-infer from first principles any of the flags we just
1527	// dropped.
1528	if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: I))
1529	if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
1530	auto *BO = cast<BinaryOperator>(Val: I);
1531	BO->setHasNoUnsignedWrap(
1532	ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNUW) == SCEV::FlagNUW);
1533	BO->setHasNoSignedWrap(
1534	ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNSW) == SCEV::FlagNSW);
1535	}
1536	if (auto *NNI = dyn_cast<PossiblyNonNegInst>(Val: I)) {
1537	auto *Src = NNI->getOperand(i_nocapture: `0`);
1538	if (isImpliedByDomCondition(Pred: ICmpInst::ICMP_SGE, LHS: Src,
1539	RHS: Constant::getNullValue(Ty: Src->getType()), ContextI: I,
1540	DL).value_or(u: false))
1541	NNI->setNonNeg(true);
1542	}
1543	}
1544	}
1545	// Remember the expanded value for this SCEV at this location.
1546	//
1547	// This is independent of PostIncLoops. The mapped value simply materializes
1548	// the expression at this insertion point. If the mapped value happened to be
1549	// a postinc expansion, it could be reused by a non-postinc user, but only if
1550	// its insertion point was already at the head of the loop.
1551	InsertedExpressions [std::make_pair(x&: S, y: &*InsertPt)] = V;
1552	return V;
1553	}
1554
1555	void SCEVExpander::rememberInstruction(Value *I) {
1556	auto DoInsert = [this](Value *V) {
1557	if (!PostIncLoops.empty())
1558	InsertedPostIncValues.insert(V);
1559	else
1560	InsertedValues.insert(V);
1561	};
1562	DoInsert (I);
1563	}
1564
1565	void SCEVExpander::rememberFlags(Instruction *I) {
1566	// If we already have flags for the instruction, keep the existing ones.
1567	OrigFlags.try_emplace(Key: I, Args: PoisonFlags (I));
1568	}
1569
1570	void SCEVExpander::replaceCongruentIVInc(
1571	PHINode &Phi, PHINode &OrigPhi, Loop L, const* DominatorTree *DT,
1572	SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1573	BasicBlock *LatchBlock = L->getLoopLatch();
1574	if (!LatchBlock)
1575	return;
1576
1577	Instruction *OrigInc =
1578	dyn_cast<Instruction>(Val: OrigPhi->getIncomingValueForBlock(BB: LatchBlock));
1579	Instruction *IsomorphicInc =
1580	dyn_cast<Instruction>(Val: Phi->getIncomingValueForBlock(BB: LatchBlock));
1581	if (!OrigInc \|\| !IsomorphicInc)
1582	return;
1583
1584	// If this phi has the same width but is more canonical, replace the
1585	// original with it. As part of the "more canonical" determination,
1586	// respect a prior decision to use an IV chain.
1587	if (OrigPhi->getType() == Phi->getType() &&
1588	!(ChainedPhis.count(V: Phi) \|\|
1589	isExpandedAddRecExprPHI(PN: OrigPhi, IncV: OrigInc, L)) &&
1590	(ChainedPhis.count(V: Phi) \|\|
1591	isExpandedAddRecExprPHI(PN: Phi, IncV: IsomorphicInc, L))) {
1592	std::swap(a&: OrigPhi, b&: Phi);
1593	std::swap(a&: OrigInc, b&: IsomorphicInc);
1594	}
1595
1596	// Replacing the congruent phi is sufficient because acyclic
1597	// redundancy elimination, CSE/GVN, should handle the
1598	// rest. However, once SCEV proves that a phi is congruent,
1599	// it's often the head of an IV user cycle that is isomorphic
1600	// with the original phi. It's worth eagerly cleaning up the
1601	// common case of a single IV increment so that DeleteDeadPHIs
1602	// can remove cycles that had postinc uses.
1603	// Because we may potentially introduce a new use of OrigIV that didn't
1604	// exist before at this point, its poison flags need readjustment.
1605	const SCEV *TruncExpr =
1606	SE.getTruncateOrNoop(V: SE.getSCEV(V: OrigInc), Ty: IsomorphicInc->getType());
1607	if (OrigInc == IsomorphicInc \|\| TruncExpr != SE.getSCEV(V: IsomorphicInc) \|\|
1608	!SE.LI.replacementPreservesLCSSAForm(From: IsomorphicInc, To: OrigInc))
1609	return;
1610
1611	bool BothHaveNUW = false;
1612	bool BothHaveNSW = false;
1613	auto *OBOIncV = dyn_cast<OverflowingBinaryOperator>(Val: OrigInc);
1614	auto *OBOIsomorphic = dyn_cast<OverflowingBinaryOperator>(Val: IsomorphicInc);
1615	if (OBOIncV && OBOIsomorphic) {
1616	BothHaveNUW =
1617	OBOIncV->hasNoUnsignedWrap() && OBOIsomorphic->hasNoUnsignedWrap();
1618	BothHaveNSW =
1619	OBOIncV->hasNoSignedWrap() && OBOIsomorphic->hasNoSignedWrap();
1620	}
1621
1622	if (!hoistIVInc(IncV: OrigInc, InsertPos: IsomorphicInc,
1623	/RecomputePoisonFlags/ true))
1624	return;
1625
1626	// We are replacing with a wider increment. If both OrigInc and IsomorphicInc
1627	// are NUW/NSW, then we can preserve them on the wider increment; the narrower
1628	// IsomorphicInc would wrap before the wider OrigInc, so the replacement won't
1629	// make IsomorphicInc's uses more poisonous.
1630	assert(OrigInc->getType()->getScalarSizeInBits() >=
1631	IsomorphicInc->getType()->getScalarSizeInBits() &&
1632	"Should only replace an increment with a wider one.");
1633	if (BothHaveNUW \|\| BothHaveNSW) {
1634	OrigInc->setHasNoUnsignedWrap(OBOIncV->hasNoUnsignedWrap() \|\| BothHaveNUW);
1635	OrigInc->setHasNoSignedWrap(OBOIncV->hasNoSignedWrap() \|\| BothHaveNSW);
1636	}
1637
1638	SCEV_DEBUG_WITH_TYPE(DebugType,
1639	dbgs() << "INDVARS: Eliminated congruent iv.inc: "
1640	<< *IsomorphicInc << `'\n'`);
1641	Value *NewInc = OrigInc;
1642	if (OrigInc->getType() != IsomorphicInc->getType()) {
1643	BasicBlock::iterator IP;
1644	if (PHINode *PN = dyn_cast<PHINode>(Val: OrigInc))
1645	IP = PN->getParent()->getFirstInsertionPt();
1646	else
1647	IP = OrigInc->getNextNonDebugInstruction()->getIterator();
1648
1649	IRBuilder<> Builder(IP ->getParent(), IP);
1650	Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
1651	NewInc =
1652	Builder.CreateTruncOrBitCast(V: OrigInc, DestTy: IsomorphicInc->getType(), Name: IVName);
1653	}
1654	IsomorphicInc->replaceAllUsesWith(V: NewInc);
1655	DeadInsts.emplace_back(Args&: IsomorphicInc);
1656	}
1657
1658	/// replaceCongruentIVs - Check for congruent phis in this loop header and
1659	/// replace them with their most canonical representative. Return the number of
1660	/// phis eliminated.
1661	///
1662	/// This does not depend on any SCEVExpander state but should be used in
1663	/// the same context that SCEVExpander is used.
1664	unsigned
1665	SCEVExpander::replaceCongruentIVs(Loop L, const* DominatorTree *DT,
1666	SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1667	const TargetTransformInfo *TTI) {
1668	// Find integer phis in order of increasing width.
1669	SmallVector<PHINode*, `8`> Phis;
1670	for (PHINode &PN : L->getHeader()->phis())
1671	Phis.push_back(Elt: &PN);
1672
1673	if (TTI)
1674	// Use stable_sort to preserve order of equivalent PHIs, so the order
1675	// of the sorted Phis is the same from run to run on the same loop.
1676	llvm::stable_sort(Range&: Phis, C: [](Value LHS, Value RHS) {
1677	// Put pointers at the back and make sure pointer < pointer = false.
1678	if (!LHS->getType()->isIntegerTy() \|\| !RHS->getType()->isIntegerTy())
1679	return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
1680	return RHS->getType()->getPrimitiveSizeInBits().getFixedValue() <
1681	LHS->getType()->getPrimitiveSizeInBits().getFixedValue();
1682	});
1683
1684	unsigned NumElim = `0`;
1685	DenseMap<const SCEV , PHINode > ExprToIVMap;
1686	// Process phis from wide to narrow. Map wide phis to their truncation
1687	// so narrow phis can reuse them.
1688	for (PHINode *Phi : Phis) {
1689	auto SimplifyPHINode = [&](PHINode PN) -> Value {
1690	if (Value *V = simplifyInstruction(I: PN, Q: {DL, &SE.TLI, &SE.DT, &SE.AC}))
1691	return V;
1692	if (!SE.isSCEVable(Ty: PN->getType()))
1693	return nullptr;
1694	auto *Const = dyn_cast<SCEVConstant>(Val: SE.getSCEV(V: PN));
1695	if (!Const)
1696	return nullptr;
1697	return Const->getValue();
1698	};
1699
1700	// Fold constant phis. They may be congruent to other constant phis and
1701	// would confuse the logic below that expects proper IVs.
1702	if (Value *V = SimplifyPHINode (Phi)) {
1703	if (V->getType() != Phi->getType())
1704	continue;
1705	SE.forgetValue(V: Phi);
1706	Phi->replaceAllUsesWith(V);
1707	DeadInsts.emplace_back(Args&: Phi);
1708	++NumElim;
1709	SCEV_DEBUG_WITH_TYPE(DebugType,
1710	dbgs() << "INDVARS: Eliminated constant iv: " << *Phi
1711	<< `'\n'`);
1712	continue;
1713	}
1714
1715	if (!SE.isSCEVable(Ty: Phi->getType()))
1716	continue;
1717
1718	PHINode *&OrigPhiRef = ExprToIVMap [SE.getSCEV(V: Phi)];
1719	if (!OrigPhiRef) {
1720	OrigPhiRef = Phi;
1721	if (Phi->getType()->isIntegerTy() && TTI &&
1722	TTI->isTruncateFree(Ty1: Phi->getType(), Ty2: Phis.back()->getType())) {
1723	// Make sure we only rewrite using simple induction variables;
1724	// otherwise, we can make the trip count of a loop unanalyzable
1725	// to SCEV.
1726	const SCEV *PhiExpr = SE.getSCEV(V: Phi);
1727	if (isa<SCEVAddRecExpr>(Val: PhiExpr)) {
1728	// This phi can be freely truncated to the narrowest phi type. Map the
1729	// truncated expression to it so it will be reused for narrow types.
1730	const SCEV *TruncExpr =
1731	SE.getTruncateExpr(Op: PhiExpr, Ty: Phis.back()->getType());
1732	ExprToIVMap [TruncExpr] = Phi;
1733	}
1734	}
1735	continue;
1736	}
1737
1738	// Replacing a pointer phi with an integer phi or vice-versa doesn't make
1739	// sense.
1740	if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy())
1741	continue;
1742
1743	replaceCongruentIVInc(Phi, OrigPhi&: OrigPhiRef, L, DT, DeadInsts);
1744	SCEV_DEBUG_WITH_TYPE(DebugType,
1745	dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi
1746	<< `'\n'`);
1747	SCEV_DEBUG_WITH_TYPE(
1748	DebugType, dbgs() << "INDVARS: Original iv: " << *OrigPhiRef << `'\n'`);
1749	++NumElim;
1750	Value *NewIV = OrigPhiRef;
1751	if (OrigPhiRef->getType() != Phi->getType()) {
1752	IRBuilder<> Builder(L->getHeader(),
1753	L->getHeader()->getFirstInsertionPt());
1754	Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
1755	NewIV = Builder.CreateTruncOrBitCast(V: OrigPhiRef, DestTy: Phi->getType(), Name: IVName);
1756	}
1757	Phi->replaceAllUsesWith(V: NewIV);
1758	DeadInsts.emplace_back(Args&: Phi);
1759	}
1760	return NumElim;
1761	}
1762
1763	bool SCEVExpander::hasRelatedExistingExpansion(const SCEV *S,
1764	const Instruction *At,
1765	Loop *L) {
1766	using namespace llvm::PatternMatch;
1767
1768	SmallVector<BasicBlock *, `4`> ExitingBlocks;
1769	L->getExitingBlocks(ExitingBlocks);
1770
1771	// Look for suitable value in simple conditions at the loop exits.
1772	for (BasicBlock *BB : ExitingBlocks) {
1773	ICmpInst::Predicate Pred;
1774	Instruction LHS, RHS;
1775
1776	if (!match(V: BB->getTerminator(),
1777	P: m_Br(C: m_ICmp(Pred, L: m_Instruction(I&: LHS), R: m_Instruction(I&: RHS)),
1778	T: m_BasicBlock(), F: m_BasicBlock())))
1779	continue;
1780
1781	if (SE.getSCEV(V: LHS) == S && SE.DT.dominates(Def: LHS, User: At))
1782	return true;
1783
1784	if (SE.getSCEV(V: RHS) == S && SE.DT.dominates(Def: RHS, User: At))
1785	return true;
1786	}
1787
1788	// Use expand's logic which is used for reusing a previous Value in
1789	// ExprValueMap. Note that we don't currently model the cost of
1790	// needing to drop poison generating flags on the instruction if we
1791	// want to reuse it. We effectively assume that has zero cost.
1792	SmallVector<Instruction *> DropPoisonGeneratingInsts;
1793	return FindValueInExprValueMap(S, InsertPt: At, DropPoisonGeneratingInsts) != nullptr;
1794	}
1795
1796	template<typename T> static InstructionCost costAndCollectOperands(
1797	const SCEVOperand &WorkItem, const TargetTransformInfo &TTI,
1798	TargetTransformInfo::TargetCostKind CostKind,
1799	SmallVectorImpl<SCEVOperand> &Worklist) {
1800
1801	const T *S = cast<T>(WorkItem.S);
1802	InstructionCost Cost = `0`;
1803	// Object to help map SCEV operands to expanded IR instructions.
1804	struct OperationIndices {
1805	OperationIndices(unsigned Opc, size_t min, size_t max) :
1806	Opcode(Opc), MinIdx(min), MaxIdx(max) { }
1807	unsigned Opcode;
1808	size_t MinIdx;
1809	size_t MaxIdx;
1810	};
1811
1812	// Collect the operations of all the instructions that will be needed to
1813	// expand the SCEVExpr. This is so that when we come to cost the operands,
1814	// we know what the generated user(s) will be.
1815	SmallVector<OperationIndices, `2`> Operations;
1816
1817	auto CastCost = [&](unsigned Opcode) -> InstructionCost {
1818	Operations.emplace_back(Opcode, `0`, `0`);
1819	return TTI.getCastInstrCost(Opcode, Dst: S->getType(),
1820	Src: S->getOperand(`0`)->getType(),
1821	CCH: TTI::CastContextHint::None, CostKind);
1822	};
1823
1824	auto ArithCost = [&](unsigned Opcode, unsigned NumRequired,
1825	unsigned MinIdx = `0`,
1826	unsigned MaxIdx = `1`) -> InstructionCost {
1827	Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1828	return NumRequired *
1829	TTI.getArithmeticInstrCost(Opcode, Ty: S->getType(), CostKind);
1830	};
1831
1832	auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, unsigned MinIdx,
1833	unsigned MaxIdx) -> InstructionCost {
1834	Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1835	Type *OpType = S->getType();
1836	return NumRequired * TTI.getCmpSelInstrCost(
1837	Opcode, ValTy: OpType, CondTy: CmpInst::makeCmpResultType(opnd_type: OpType),
1838	VecPred: CmpInst::BAD_ICMP_PREDICATE, CostKind);
1839	};
1840
1841	switch (S->getSCEVType()) {
1842	case scCouldNotCompute:
1843	llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1844	case scUnknown:
1845	case scConstant:
1846	case scVScale:
1847	return `0`;
1848	case scPtrToInt:
1849	Cost = CastCost(Instruction::PtrToInt);
1850	break;
1851	case scTruncate:
1852	Cost = CastCost(Instruction::Trunc);
1853	break;
1854	case scZeroExtend:
1855	Cost = CastCost(Instruction::ZExt);
1856	break;
1857	case scSignExtend:
1858	Cost = CastCost(Instruction::SExt);
1859	break;
1860	case scUDivExpr: {
1861	unsigned Opcode = Instruction::UDiv;
1862	if (auto *SC = dyn_cast<SCEVConstant>(S->getOperand(`1`)))
1863	if (SC->getAPInt().isPowerOf2())
1864	Opcode = Instruction::LShr;
1865	Cost = ArithCost(Opcode, `1`);
1866	break;
1867	}
1868	case scAddExpr:
1869	Cost = ArithCost(Instruction::Add, S->getNumOperands() - `1`);
1870	break;
1871	case scMulExpr:
1872	// TODO: this is a very pessimistic cost modelling for Mul,
1873	// because of Bin Pow algorithm actually used by the expander,
1874	// see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
1875	Cost = ArithCost(Instruction::Mul, S->getNumOperands() - `1`);
1876	break;
1877	case scSMaxExpr:
1878	case scUMaxExpr:
1879	case scSMinExpr:
1880	case scUMinExpr:
1881	case scSequentialUMinExpr: {
1882	// FIXME: should this ask the cost for Intrinsic's?
1883	// The reduction tree.
1884	Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - `1`, `0`, `1`);
1885	Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - `1`, `0`, `2`);
1886	switch (S->getSCEVType()) {
1887	case scSequentialUMinExpr: {
1888	// The safety net against poison.
1889	// FIXME: this is broken.
1890	Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - `1`, `0`, `0`);
1891	Cost += ArithCost(Instruction::Or,
1892	S->getNumOperands() > `2` ? S->getNumOperands() - `2` : `0`);
1893	Cost += CmpSelCost(Instruction::Select, `1`, `0`, `1`);
1894	break;
1895	}
1896	default:
1897	assert(!isa<SCEVSequentialMinMaxExpr>(S) &&
1898	"Unhandled SCEV expression type?");
1899	break;
1900	}
1901	break;
1902	}
1903	case scAddRecExpr: {
1904	// In this polynominal, we may have some zero operands, and we shouldn't
1905	// really charge for those. So how many non-zero coefficients are there?
1906	int NumTerms = llvm::count_if(S->operands(), [](const SCEV *Op) {
1907	return !Op->isZero();
1908	});
1909
1910	assert(NumTerms >= `1` && "Polynominal should have at least one term.");
1911	assert(!(*std::prev(S->operands().end()))->isZero() &&
1912	"Last operand should not be zero");
1913
1914	// Ignoring constant term (operand 0), how many of the coefficients are u> 1?
1915	int NumNonZeroDegreeNonOneTerms =
1916	llvm::count_if(S->operands(), [](const SCEV *Op) {
1917	auto *SConst = dyn_cast<SCEVConstant>(Val: Op);
1918	return !SConst \|\| SConst->getAPInt().ugt(RHS: `1`);
1919	});
1920
1921	// Much like with normal add expr, the polynominal will require
1922	// one less addition than the number of it's terms.
1923	InstructionCost AddCost = ArithCost(Instruction::Add, NumTerms - `1`,
1924	/MinIdx/ `1`, /MaxIdx/ `1`);
1925	// Here, each* one of those will require a multiplication.*
1926	InstructionCost MulCost =
1927	ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms);
1928	Cost = AddCost + MulCost;
1929
1930	// What is the degree of this polynominal?
1931	int PolyDegree = S->getNumOperands() - `1`;
1932	assert(PolyDegree >= `1` && "Should be at least affine.");
1933
1934	// The final term will be:
1935	// Op_{PolyDegree} x ^ {PolyDegree}*
1936	// Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations.
1937	// Note that x ^ {PolyDegree} = x x ^ {PolyDegree-1} so charging for*
1938	// x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free.
1939	// FIXME: this is conservatively correct, but might be overly pessimistic.
1940	Cost += MulCost * (PolyDegree - `1`);
1941	break;
1942	}
1943	}
1944
1945	for (auto &CostOp : Operations) {
1946	for (auto SCEVOp : enumerate(S->operands())) {
1947	// Clamp the index to account for multiple IR operations being chained.
1948	size_t MinIdx = std::max(SCEVOp.index(), CostOp.MinIdx);
1949	size_t OpIdx = std::min(MinIdx, CostOp.MaxIdx);
1950	Worklist.emplace_back(CostOp.Opcode, OpIdx, SCEVOp.value());
1951	}
1952	}
1953	return Cost;
1954	}
1955
1956	bool SCEVExpander::isHighCostExpansionHelper(
1957	const SCEVOperand &WorkItem, Loop L, const* Instruction &At,
1958	InstructionCost &Cost, unsigned Budget, const TargetTransformInfo &TTI,
1959	SmallPtrSetImpl<const SCEV *> &Processed,
1960	SmallVectorImpl<SCEVOperand> &Worklist) {
1961	if (Cost > Budget)
1962	return true; // Already run out of budget, give up.
1963
1964	const SCEV *S = WorkItem.S;
1965	// Was the cost of expansion of this expression already accounted for?
1966	if (!isa<SCEVConstant>(Val: S) && !Processed.insert(Ptr: S).second)
1967	return false; // We have already accounted for this expression.
1968
1969	// If we can find an existing value for this scev available at the point "At"
1970	// then consider the expression cheap.
1971	if (hasRelatedExistingExpansion(S, At: &At, L))
1972	return false; // Consider the expression to be free.
1973
1974	TargetTransformInfo::TargetCostKind CostKind =
1975	L->getHeader()->getParent()->hasMinSize()
1976	? TargetTransformInfo::TCK_CodeSize
1977	: TargetTransformInfo::TCK_RecipThroughput;
1978
1979	switch (S->getSCEVType()) {
1980	case scCouldNotCompute:
1981	llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1982	case scUnknown:
1983	case scVScale:
1984	// Assume to be zero-cost.
1985	return false;
1986	case scConstant: {
1987	// Only evalulate the costs of constants when optimizing for size.
1988	if (CostKind != TargetTransformInfo::TCK_CodeSize)
1989	return false;
1990	const APInt &Imm = cast<SCEVConstant>(Val: S)->getAPInt();
1991	Type *Ty = S->getType();
1992	Cost += TTI.getIntImmCostInst(
1993	Opc: WorkItem.ParentOpcode, Idx: WorkItem.OperandIdx, Imm, Ty, CostKind);
1994	return Cost > Budget;
1995	}
1996	case scTruncate:
1997	case scPtrToInt:
1998	case scZeroExtend:
1999	case scSignExtend: {
2000	Cost +=
2001	costAndCollectOperands<SCEVCastExpr>(WorkItem, TTI, CostKind, Worklist);
2002	return false; // Will answer upon next entry into this function.
2003	}
2004	case scUDivExpr: {
2005	// UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
2006	// HowManyLessThans produced to compute a precise expression, rather than a
2007	// UDiv from the user's code. If we can't find a UDiv in the code with some
2008	// simple searching, we need to account for it's cost.
2009
2010	// At the beginning of this function we already tried to find existing
2011	// value for plain 'S'. Now try to lookup 'S + 1' since it is common
2012	// pattern involving division. This is just a simple search heuristic.
2013	if (hasRelatedExistingExpansion(
2014	S: SE.getAddExpr(LHS: S, RHS: SE.getConstant(Ty: S->getType(), V: `1`)), At: &At, L))
2015	return false; // Consider it to be free.
2016
2017	Cost +=
2018	costAndCollectOperands<SCEVUDivExpr>(WorkItem, TTI, CostKind, Worklist);
2019	return false; // Will answer upon next entry into this function.
2020	}
2021	case scAddExpr:
2022	case scMulExpr:
2023	case scUMaxExpr:
2024	case scSMaxExpr:
2025	case scUMinExpr:
2026	case scSMinExpr:
2027	case scSequentialUMinExpr: {
2028	assert(cast<SCEVNAryExpr>(S)->getNumOperands() > `1` &&
2029	"Nary expr should have more than 1 operand.");
2030	// The simple nary expr will require one less op (or pair of ops)
2031	// than the number of it's terms.
2032	Cost +=
2033	costAndCollectOperands<SCEVNAryExpr>(WorkItem, TTI, CostKind, Worklist);
2034	return Cost > Budget;
2035	}
2036	case scAddRecExpr: {
2037	assert(cast<SCEVAddRecExpr>(S)->getNumOperands() >= `2` &&
2038	"Polynomial should be at least linear");
2039	Cost += costAndCollectOperands<SCEVAddRecExpr>(
2040	WorkItem, TTI, CostKind, Worklist);
2041	return Cost > Budget;
2042	}
2043	}
2044	llvm_unreachable("Unknown SCEV kind!");
2045	}
2046
2047	Value SCEVExpander::expandCodeForPredicate(const* SCEVPredicate *Pred,
2048	Instruction *IP) {
2049	assert(IP);
2050	switch (Pred->getKind()) {
2051	case SCEVPredicate::P_Union:
2052	return expandUnionPredicate(Pred: cast<SCEVUnionPredicate>(Val: Pred), Loc: IP);
2053	case SCEVPredicate::P_Compare:
2054	return expandComparePredicate(Pred: cast<SCEVComparePredicate>(Val: Pred), Loc: IP);
2055	case SCEVPredicate::P_Wrap: {
2056	auto *AddRecPred = cast<SCEVWrapPredicate>(Val: Pred);
2057	return expandWrapPredicate(P: AddRecPred, Loc: IP);
2058	}
2059	}
2060	llvm_unreachable("Unknown SCEV predicate type");
2061	}
2062
2063	Value SCEVExpander::expandComparePredicate(const* SCEVComparePredicate *Pred,
2064	Instruction *IP) {
2065	Value *Expr0 = expand(S: Pred->getLHS(), I: IP);
2066	Value *Expr1 = expand(S: Pred->getRHS(), I: IP);
2067
2068	Builder.SetInsertPoint(IP);
2069	auto InvPred = ICmpInst::getInversePredicate(pred: Pred->getPredicate());
2070	auto *I = Builder.CreateICmp(P: InvPred, LHS: Expr0, RHS: Expr1, Name: "ident.check");
2071	return I;
2072	}
2073
2074	Value SCEVExpander::generateOverflowCheck(const* SCEVAddRecExpr *AR,
2075	Instruction Loc, bool* Signed) {
2076	assert(AR->isAffine() && "Cannot generate RT check for "
2077	"non-affine expression");
2078
2079	// FIXME: It is highly suspicious that we're ignoring the predicates here.
2080	SmallVector<const SCEVPredicate *, `4`> Pred;
2081	const SCEV *ExitCount =
2082	SE.getPredicatedBackedgeTakenCount(L: AR->getLoop(), Predicates&: Pred);
2083
2084	assert(!isa<SCEVCouldNotCompute>(ExitCount) && "Invalid loop count");
2085
2086	const SCEV *Step = AR->getStepRecurrence(SE);
2087	const SCEV *Start = AR->getStart();
2088
2089	Type *ARTy = AR->getType();
2090	unsigned SrcBits = SE.getTypeSizeInBits(Ty: ExitCount->getType());
2091	unsigned DstBits = SE.getTypeSizeInBits(Ty: ARTy);
2092
2093	// The expression {Start,+,Step} has nusw/nssw if
2094	// Step < 0, Start - \|Step\| Backedge <= Start*
2095	// Step >= 0, Start + \|Step\| Backedge > Start*
2096	// and \|Step\| Backedge doesn't unsigned overflow.*
2097
2098	Builder.SetInsertPoint(Loc);
2099	Value *TripCountVal = expand(S: ExitCount, I: Loc);
2100
2101	IntegerType *Ty =
2102	IntegerType::get(C&: Loc->getContext(), NumBits: SE.getTypeSizeInBits(Ty: ARTy));
2103
2104	Value *StepValue = expand(S: Step, I: Loc);
2105	Value *NegStepValue = expand(S: SE.getNegativeSCEV(V: Step), I: Loc);
2106	Value *StartValue = expand(S: Start, I: Loc);
2107
2108	ConstantInt *Zero =
2109	ConstantInt::get(Context&: Loc->getContext(), V: APInt::getZero(numBits: DstBits));
2110
2111	Builder.SetInsertPoint(Loc);
2112	// Compute \|Step\|
2113	Value *StepCompare = Builder.CreateICmp(P: ICmpInst::ICMP_SLT, LHS: StepValue, RHS: Zero);
2114	Value *AbsStep = Builder.CreateSelect(C: StepCompare, True: NegStepValue, False: StepValue);
2115
2116	// Compute \|Step\| Backedge*
2117	// Compute:
2118	// 1. Start + \|Step\| Backedge < Start*
2119	// 2. Start - \|Step\| Backedge > Start*
2120	//
2121	// And select either 1. or 2. depending on whether step is positive or
2122	// negative. If Step is known to be positive or negative, only create
2123	// either 1. or 2.
2124	auto ComputeEndCheck = [&]() -> Value * {
2125	// Checking <u 0 is always false.
2126	if (!Signed && Start->isZero() && SE.isKnownPositive(S: Step))
2127	return ConstantInt::getFalse(Context&: Loc->getContext());
2128
2129	// Get the backedge taken count and truncate or extended to the AR type.
2130	Value *TruncTripCount = Builder.CreateZExtOrTrunc(V: TripCountVal, DestTy: Ty);
2131
2132	Value MulV, OfMul;
2133	if (Step->isOne()) {
2134	// Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
2135	// needed, there is never an overflow, so to avoid artificially inflating
2136	// the cost of the check, directly emit the optimized IR.
2137	MulV = TruncTripCount;
2138	OfMul = ConstantInt::getFalse(Context&: MulV->getContext());
2139	} else {
2140	auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
2141	Intrinsic::umul_with_overflow, Ty);
2142	CallInst *Mul =
2143	Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
2144	MulV = Builder.CreateExtractValue(Agg: Mul, Idxs: `0`, Name: "mul.result");
2145	OfMul = Builder.CreateExtractValue(Agg: Mul, Idxs: `1`, Name: "mul.overflow");
2146	}
2147
2148	Value Add = nullptr, Sub = nullptr;
2149	bool NeedPosCheck = !SE.isKnownNegative(S: Step);
2150	bool NeedNegCheck = !SE.isKnownPositive(S: Step);
2151
2152	if (isa<PointerType>(Val: ARTy)) {
2153	Value *NegMulV = Builder.CreateNeg(V: MulV);
2154	if (NeedPosCheck)
2155	Add = Builder.CreatePtrAdd(Ptr: StartValue, Offset: MulV);
2156	if (NeedNegCheck)
2157	Sub = Builder.CreatePtrAdd(Ptr: StartValue, Offset: NegMulV);
2158	} else {
2159	if (NeedPosCheck)
2160	Add = Builder.CreateAdd(LHS: StartValue, RHS: MulV);
2161	if (NeedNegCheck)
2162	Sub = Builder.CreateSub(LHS: StartValue, RHS: MulV);
2163	}
2164
2165	Value EndCompareLT = nullptr*;
2166	Value EndCompareGT = nullptr*;
2167	Value EndCheck = nullptr*;
2168	if (NeedPosCheck)
2169	EndCheck = EndCompareLT = Builder.CreateICmp(
2170	P: Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, LHS: Add, RHS: StartValue);
2171	if (NeedNegCheck)
2172	EndCheck = EndCompareGT = Builder.CreateICmp(
2173	P: Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, LHS: Sub, RHS: StartValue);
2174	if (NeedPosCheck && NeedNegCheck) {
2175	// Select the answer based on the sign of Step.
2176	EndCheck = Builder.CreateSelect(C: StepCompare, True: EndCompareGT, False: EndCompareLT);
2177	}
2178	return Builder.CreateOr(LHS: EndCheck, RHS: OfMul);
2179	};
2180	Value *EndCheck = ComputeEndCheck ();
2181
2182	// If the backedge taken count type is larger than the AR type,
2183	// check that we don't drop any bits by truncating it. If we are
2184	// dropping bits, then we have overflow (unless the step is zero).
2185	if (SrcBits > DstBits) {
2186	auto MaxVal = APInt::getMaxValue(numBits: DstBits).zext(width: SrcBits);
2187	auto *BackedgeCheck =
2188	Builder.CreateICmp(P: ICmpInst::ICMP_UGT, LHS: TripCountVal,
2189	RHS: ConstantInt::get(Context&: Loc->getContext(), V: MaxVal));
2190	BackedgeCheck = Builder.CreateAnd(
2191	LHS: BackedgeCheck, RHS: Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: StepValue, RHS: Zero));
2192
2193	EndCheck = Builder.CreateOr(LHS: EndCheck, RHS: BackedgeCheck);
2194	}
2195
2196	return EndCheck;
2197	}
2198
2199	Value SCEVExpander::expandWrapPredicate(const* SCEVWrapPredicate *Pred,
2200	Instruction *IP) {
2201	const auto *A = cast<SCEVAddRecExpr>(Val: Pred->getExpr());
2202	Value NSSWCheck = nullptr, NUSWCheck = nullptr;
2203
2204	// Add a check for NUSW
2205	if (Pred->getFlags() & SCEVWrapPredicate::IncrementNUSW)
2206	NUSWCheck = generateOverflowCheck(AR: A, Loc: IP, Signed: false);
2207
2208	// Add a check for NSSW
2209	if (Pred->getFlags() & SCEVWrapPredicate::IncrementNSSW)
2210	NSSWCheck = generateOverflowCheck(AR: A, Loc: IP, Signed: true);
2211
2212	if (NUSWCheck && NSSWCheck)
2213	return Builder.CreateOr(LHS: NUSWCheck, RHS: NSSWCheck);
2214
2215	if (NUSWCheck)
2216	return NUSWCheck;
2217
2218	if (NSSWCheck)
2219	return NSSWCheck;
2220
2221	return ConstantInt::getFalse(Context&: IP->getContext());
2222	}
2223
2224	Value SCEVExpander::expandUnionPredicate(const* SCEVUnionPredicate *Union,
2225	Instruction *IP) {
2226	// Loop over all checks in this set.
2227	SmallVector<Value *> Checks;
2228	for (const auto *Pred : Union->getPredicates()) {
2229	Checks.push_back(Elt: expandCodeForPredicate(Pred, IP));
2230	Builder.SetInsertPoint(IP);
2231	}
2232
2233	if (Checks.empty())
2234	return ConstantInt::getFalse(Context&: IP->getContext());
2235	return Builder.CreateOr(Ops: Checks);
2236	}
2237
2238	Value SCEVExpander::fixupLCSSAFormFor(Value V) {
2239	auto *DefI = dyn_cast<Instruction>(Val: V);
2240	if (!PreserveLCSSA \|\| !DefI)
2241	return V;
2242
2243	BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
2244	Loop *DefLoop = SE.LI.getLoopFor(BB: DefI->getParent());
2245	Loop *UseLoop = SE.LI.getLoopFor(BB: InsertPt ->getParent());
2246	if (!DefLoop \|\| UseLoop == DefLoop \|\| DefLoop->contains(L: UseLoop))
2247	return V;
2248
2249	// Create a temporary instruction to at the current insertion point, so we
2250	// can hand it off to the helper to create LCSSA PHIs if required for the
2251	// new use.
2252	// FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor)
2253	// would accept a insertion point and return an LCSSA phi for that
2254	// insertion point, so there is no need to insert & remove the temporary
2255	// instruction.
2256	Type *ToTy;
2257	if (DefI->getType()->isIntegerTy())
2258	ToTy = PointerType::get(C&: DefI->getContext(), AddressSpace: `0`);
2259	else
2260	ToTy = Type::getInt32Ty(C&: DefI->getContext());
2261	Instruction *User =
2262	CastInst::CreateBitOrPointerCast(S: DefI, Ty: ToTy, Name: "tmp.lcssa.user", InsertBefore: InsertPt);
2263	auto RemoveUserOnExit =
2264	make_scope_exit(F: [User]() { User->eraseFromParent(); });
2265
2266	SmallVector<Instruction *, `1`> ToUpdate;
2267	ToUpdate.push_back(Elt: DefI);
2268	SmallVector<PHINode *, `16`> PHIsToRemove;
2269	SmallVector<PHINode *, `16`> InsertedPHIs;
2270	formLCSSAForInstructions(Worklist&: ToUpdate, DT: SE.DT, LI: SE.LI, SE: &SE, PHIsToRemove: &PHIsToRemove,
2271	InsertedPHIs: &InsertedPHIs);
2272	for (PHINode *PN : InsertedPHIs)
2273	rememberInstruction(I: PN);
2274	for (PHINode *PN : PHIsToRemove) {
2275	if (!PN->use_empty())
2276	continue;
2277	InsertedValues.erase(V: PN);
2278	InsertedPostIncValues.erase(V: PN);
2279	PN->eraseFromParent();
2280	}
2281
2282	return User->getOperand(i: `0`);
2283	}
2284
2285	namespace {
2286	// Search for a SCEV subexpression that is not safe to expand. Any expression
2287	// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
2288	// UDiv expressions. We don't know if the UDiv is derived from an IR divide
2289	// instruction, but the important thing is that we prove the denominator is
2290	// nonzero before expansion.
2291	//
2292	// IVUsers already checks that IV-derived expressions are safe. So this check is
2293	// only needed when the expression includes some subexpression that is not IV
2294	// derived.
2295	//
2296	// Currently, we only allow division by a value provably non-zero here.
2297	//
2298	// We cannot generally expand recurrences unless the step dominates the loop
2299	// header. The expander handles the special case of affine recurrences by
2300	// scaling the recurrence outside the loop, but this technique isn't generally
2301	// applicable. Expanding a nested recurrence outside a loop requires computing
2302	// binomial coefficients. This could be done, but the recurrence has to be in a
2303	// perfectly reduced form, which can't be guaranteed.
2304	struct SCEVFindUnsafe {
2305	ScalarEvolution &SE;
2306	bool CanonicalMode;
2307	bool IsUnsafe = false;
2308
2309	SCEVFindUnsafe(ScalarEvolution &SE, bool CanonicalMode)
2310	: SE(SE), CanonicalMode(CanonicalMode) {}
2311
2312	bool follow(const SCEV *S) {
2313	if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(Val: S)) {
2314	if (!SE.isKnownNonZero(S: D->getRHS())) {
2315	IsUnsafe = true;
2316	return false;
2317	}
2318	}
2319	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S)) {
2320	// For non-affine addrecs or in non-canonical mode we need a preheader
2321	// to insert into.
2322	if (!AR->getLoop()->getLoopPreheader() &&
2323	(!CanonicalMode \|\| !AR->isAffine())) {
2324	IsUnsafe = true;
2325	return false;
2326	}
2327	}
2328	return true;
2329	}
2330	bool isDone() const { return IsUnsafe; }
2331	};
2332	} // namespace
2333
2334	bool SCEVExpander::isSafeToExpand(const SCEV S) const* {
2335	SCEVFindUnsafe Search(SE, CanonicalMode);
2336	visitAll(Root: S, Visitor&: Search);
2337	return !Search.IsUnsafe;
2338	}
2339
2340	bool SCEVExpander::isSafeToExpandAt(const SCEV *S,
2341	const Instruction InsertionPoint) const* {
2342	if (!isSafeToExpand(S))
2343	return false;
2344	// We have to prove that the expanded site of S dominates InsertionPoint.
2345	// This is easy when not in the same block, but hard when S is an instruction
2346	// to be expanded somewhere inside the same block as our insertion point.
2347	// What we really need here is something analogous to an OrderedBasicBlock,
2348	// but for the moment, we paper over the problem by handling two common and
2349	// cheap to check cases.
2350	if (SE.properlyDominates(S, BB: InsertionPoint->getParent()))
2351	return true;
2352	if (SE.dominates(S, BB: InsertionPoint->getParent())) {
2353	if (InsertionPoint->getParent()->getTerminator() == InsertionPoint)
2354	return true;
2355	if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Val: S))
2356	if (llvm::is_contained(Range: InsertionPoint->operand_values(), Element: U->getValue()))
2357	return true;
2358	}
2359	return false;
2360	}
2361
2362	void SCEVExpanderCleaner::cleanup() {
2363	// Result is used, nothing to remove.
2364	if (ResultUsed)
2365	return;
2366
2367	// Restore original poison flags.
2368	for (auto [I, Flags] : Expander.OrigFlags)
2369	Flags.apply(I);
2370
2371	auto InsertedInstructions = Expander.getAllInsertedInstructions();
2372	#ifndef NDEBUG
2373	SmallPtrSet<Instruction *, `8`> InsertedSet(InsertedInstructions.begin(),
2374	InsertedInstructions.end());
2375	(void)InsertedSet;
2376	#endif
2377	// Remove sets with value handles.
2378	Expander.clear();
2379
2380	// Remove all inserted instructions.
2381	for (Instruction *I : reverse(C&: InsertedInstructions)) {
2382	#ifndef NDEBUG
2383	assert(all_of(I->users(),
2384	[&InsertedSet](Value *U) {
2385	return InsertedSet.contains(cast<Instruction>(U));
2386	}) &&
2387	"removed instruction should only be used by instructions inserted "
2388	"during expansion");
2389	#endif
2390	assert(!I->getType()->isVoidTy() &&
2391	"inserted instruction should have non-void types");
2392	I->replaceAllUsesWith(V: PoisonValue::get(T: I->getType()));
2393	I->eraseFromParent();
2394	}
2395	}
2396

source code of llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp