1//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the implementation of the scalar evolution expander,
10// which is used to generate the code corresponding to a given scalar evolution
11// expression.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/ScopeExit.h"
18#include "llvm/ADT/SmallSet.h"
19#include "llvm/Analysis/InstructionSimplify.h"
20#include "llvm/Analysis/LoopInfo.h"
21#include "llvm/Analysis/TargetTransformInfo.h"
22#include "llvm/Analysis/ValueTracking.h"
23#include "llvm/IR/DataLayout.h"
24#include "llvm/IR/Dominators.h"
25#include "llvm/IR/IntrinsicInst.h"
26#include "llvm/IR/PatternMatch.h"
27#include "llvm/Support/CommandLine.h"
28#include "llvm/Support/raw_ostream.h"
29#include "llvm/Transforms/Utils/LoopUtils.h"
30
31#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS
32#define SCEV_DEBUG_WITH_TYPE(TYPE, X) DEBUG_WITH_TYPE(TYPE, X)
33#else
34#define SCEV_DEBUG_WITH_TYPE(TYPE, X)
35#endif
36
37using namespace llvm;
38
39cl::opt<unsigned> llvm::SCEVCheapExpansionBudget(
40 "scev-cheap-expansion-budget", cl::Hidden, cl::init(Val: 4),
41 cl::desc("When performing SCEV expansion only if it is cheap to do, this "
42 "controls the budget that is considered cheap (default = 4)"));
43
44using namespace PatternMatch;
45
46PoisonFlags::PoisonFlags(const Instruction *I) {
47 NUW = false;
48 NSW = false;
49 Exact = false;
50 Disjoint = false;
51 NNeg = false;
52 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: I)) {
53 NUW = OBO->hasNoUnsignedWrap();
54 NSW = OBO->hasNoSignedWrap();
55 }
56 if (auto *PEO = dyn_cast<PossiblyExactOperator>(Val: I))
57 Exact = PEO->isExact();
58 if (auto *PDI = dyn_cast<PossiblyDisjointInst>(Val: I))
59 Disjoint = PDI->isDisjoint();
60 if (auto *PNI = dyn_cast<PossiblyNonNegInst>(Val: I))
61 NNeg = PNI->hasNonNeg();
62 if (auto *TI = dyn_cast<TruncInst>(Val: I)) {
63 NUW = TI->hasNoUnsignedWrap();
64 NSW = TI->hasNoSignedWrap();
65 }
66}
67
68void PoisonFlags::apply(Instruction *I) {
69 if (isa<OverflowingBinaryOperator>(Val: I)) {
70 I->setHasNoUnsignedWrap(NUW);
71 I->setHasNoSignedWrap(NSW);
72 }
73 if (isa<PossiblyExactOperator>(Val: I))
74 I->setIsExact(Exact);
75 if (auto *PDI = dyn_cast<PossiblyDisjointInst>(Val: I))
76 PDI->setIsDisjoint(Disjoint);
77 if (auto *PNI = dyn_cast<PossiblyNonNegInst>(Val: I))
78 PNI->setNonNeg(NNeg);
79 if (isa<TruncInst>(Val: I)) {
80 I->setHasNoUnsignedWrap(NUW);
81 I->setHasNoSignedWrap(NSW);
82 }
83}
84
85/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
86/// reusing an existing cast if a suitable one (= dominating IP) exists, or
87/// creating a new one.
88Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
89 Instruction::CastOps Op,
90 BasicBlock::iterator IP) {
91 // This function must be called with the builder having a valid insertion
92 // point. It doesn't need to be the actual IP where the uses of the returned
93 // cast will be added, but it must dominate such IP.
94 // We use this precondition to produce a cast that will dominate all its
95 // uses. In particular, this is crucial for the case where the builder's
96 // insertion point *is* the point where we were asked to put the cast.
97 // Since we don't know the builder's insertion point is actually
98 // where the uses will be added (only that it dominates it), we are
99 // not allowed to move it.
100 BasicBlock::iterator BIP = Builder.GetInsertPoint();
101
102 Value *Ret = nullptr;
103
104 // Check to see if there is already a cast!
105 for (User *U : V->users()) {
106 if (U->getType() != Ty)
107 continue;
108 CastInst *CI = dyn_cast<CastInst>(Val: U);
109 if (!CI || CI->getOpcode() != Op)
110 continue;
111
112 // Found a suitable cast that is at IP or comes before IP. Use it. Note that
113 // the cast must also properly dominate the Builder's insertion point.
114 if (IP->getParent() == CI->getParent() && &*BIP != CI &&
115 (&*IP == CI || CI->comesBefore(Other: &*IP))) {
116 Ret = CI;
117 break;
118 }
119 }
120
121 // Create a new cast.
122 if (!Ret) {
123 SCEVInsertPointGuard Guard(Builder, this);
124 Builder.SetInsertPoint(&*IP);
125 Ret = Builder.CreateCast(Op, V, DestTy: Ty, Name: V->getName());
126 }
127
128 // We assert at the end of the function since IP might point to an
129 // instruction with different dominance properties than a cast
130 // (an invoke for example) and not dominate BIP (but the cast does).
131 assert(!isa<Instruction>(Ret) ||
132 SE.DT.dominates(cast<Instruction>(Ret), &*BIP));
133
134 return Ret;
135}
136
137BasicBlock::iterator
138SCEVExpander::findInsertPointAfter(Instruction *I,
139 Instruction *MustDominate) const {
140 BasicBlock::iterator IP = ++I->getIterator();
141 if (auto *II = dyn_cast<InvokeInst>(Val: I))
142 IP = II->getNormalDest()->begin();
143
144 while (isa<PHINode>(Val: IP))
145 ++IP;
146
147 if (isa<FuncletPadInst>(Val: IP) || isa<LandingPadInst>(Val: IP)) {
148 ++IP;
149 } else if (isa<CatchSwitchInst>(Val: IP)) {
150 IP = MustDominate->getParent()->getFirstInsertionPt();
151 } else {
152 assert(!IP->isEHPad() && "unexpected eh pad!");
153 }
154
155 // Adjust insert point to be after instructions inserted by the expander, so
156 // we can re-use already inserted instructions. Avoid skipping past the
157 // original \p MustDominate, in case it is an inserted instruction.
158 while (isInsertedInstruction(I: &*IP) && &*IP != MustDominate)
159 ++IP;
160
161 return IP;
162}
163
164BasicBlock::iterator
165SCEVExpander::GetOptimalInsertionPointForCastOf(Value *V) const {
166 // Cast the argument at the beginning of the entry block, after
167 // any bitcasts of other arguments.
168 if (Argument *A = dyn_cast<Argument>(Val: V)) {
169 BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
170 while ((isa<BitCastInst>(Val: IP) &&
171 isa<Argument>(Val: cast<BitCastInst>(Val&: IP)->getOperand(i_nocapture: 0)) &&
172 cast<BitCastInst>(Val&: IP)->getOperand(i_nocapture: 0) != A) ||
173 isa<DbgInfoIntrinsic>(Val: IP))
174 ++IP;
175 return IP;
176 }
177
178 // Cast the instruction immediately after the instruction.
179 if (Instruction *I = dyn_cast<Instruction>(Val: V))
180 return findInsertPointAfter(I, MustDominate: &*Builder.GetInsertPoint());
181
182 // Otherwise, this must be some kind of a constant,
183 // so let's plop this cast into the function's entry block.
184 assert(isa<Constant>(V) &&
185 "Expected the cast argument to be a global/constant");
186 return Builder.GetInsertBlock()
187 ->getParent()
188 ->getEntryBlock()
189 .getFirstInsertionPt();
190}
191
192/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
193/// which must be possible with a noop cast, doing what we can to share
194/// the casts.
195Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
196 Instruction::CastOps Op = CastInst::getCastOpcode(Val: V, SrcIsSigned: false, Ty, DstIsSigned: false);
197 assert((Op == Instruction::BitCast ||
198 Op == Instruction::PtrToInt ||
199 Op == Instruction::IntToPtr) &&
200 "InsertNoopCastOfTo cannot perform non-noop casts!");
201 assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
202 "InsertNoopCastOfTo cannot change sizes!");
203
204 // inttoptr only works for integral pointers. For non-integral pointers, we
205 // can create a GEP on null with the integral value as index. Note that
206 // it is safe to use GEP of null instead of inttoptr here, because only
207 // expressions already based on a GEP of null should be converted to pointers
208 // during expansion.
209 if (Op == Instruction::IntToPtr) {
210 auto *PtrTy = cast<PointerType>(Val: Ty);
211 if (DL.isNonIntegralPointerType(PT: PtrTy))
212 return Builder.CreatePtrAdd(Ptr: Constant::getNullValue(Ty: PtrTy), Offset: V, Name: "scevgep");
213 }
214 // Short-circuit unnecessary bitcasts.
215 if (Op == Instruction::BitCast) {
216 if (V->getType() == Ty)
217 return V;
218 if (CastInst *CI = dyn_cast<CastInst>(Val: V)) {
219 if (CI->getOperand(i_nocapture: 0)->getType() == Ty)
220 return CI->getOperand(i_nocapture: 0);
221 }
222 }
223 // Short-circuit unnecessary inttoptr<->ptrtoint casts.
224 if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
225 SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(Ty: V->getType())) {
226 if (CastInst *CI = dyn_cast<CastInst>(Val: V))
227 if ((CI->getOpcode() == Instruction::PtrToInt ||
228 CI->getOpcode() == Instruction::IntToPtr) &&
229 SE.getTypeSizeInBits(Ty: CI->getType()) ==
230 SE.getTypeSizeInBits(Ty: CI->getOperand(i_nocapture: 0)->getType()))
231 return CI->getOperand(i_nocapture: 0);
232 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: V))
233 if ((CE->getOpcode() == Instruction::PtrToInt ||
234 CE->getOpcode() == Instruction::IntToPtr) &&
235 SE.getTypeSizeInBits(Ty: CE->getType()) ==
236 SE.getTypeSizeInBits(Ty: CE->getOperand(i_nocapture: 0)->getType()))
237 return CE->getOperand(i_nocapture: 0);
238 }
239
240 // Fold a cast of a constant.
241 if (Constant *C = dyn_cast<Constant>(Val: V))
242 return ConstantExpr::getCast(ops: Op, C, Ty);
243
244 // Try to reuse existing cast, or insert one.
245 return ReuseOrCreateCast(V, Ty, Op, IP: GetOptimalInsertionPointForCastOf(V));
246}
247
248/// InsertBinop - Insert the specified binary operator, doing a small amount
249/// of work to avoid inserting an obviously redundant operation, and hoisting
250/// to an outer loop when the opportunity is there and it is safe.
251Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
252 Value *LHS, Value *RHS,
253 SCEV::NoWrapFlags Flags, bool IsSafeToHoist) {
254 // Fold a binop with constant operands.
255 if (Constant *CLHS = dyn_cast<Constant>(Val: LHS))
256 if (Constant *CRHS = dyn_cast<Constant>(Val: RHS))
257 if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, LHS: CLHS, RHS: CRHS, DL))
258 return Res;
259
260 // Do a quick scan to see if we have this binop nearby. If so, reuse it.
261 unsigned ScanLimit = 6;
262 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
263 // Scanning starts from the last instruction before the insertion point.
264 BasicBlock::iterator IP = Builder.GetInsertPoint();
265 if (IP != BlockBegin) {
266 --IP;
267 for (; ScanLimit; --IP, --ScanLimit) {
268 // Don't count dbg.value against the ScanLimit, to avoid perturbing the
269 // generated code.
270 if (isa<DbgInfoIntrinsic>(Val: IP))
271 ScanLimit++;
272
273 auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) {
274 // Ensure that no-wrap flags match.
275 if (isa<OverflowingBinaryOperator>(Val: I)) {
276 if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW))
277 return true;
278 if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW))
279 return true;
280 }
281 // Conservatively, do not use any instruction which has any of exact
282 // flags installed.
283 if (isa<PossiblyExactOperator>(Val: I) && I->isExact())
284 return true;
285 return false;
286 };
287 if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(i: 0) == LHS &&
288 IP->getOperand(i: 1) == RHS && !canGenerateIncompatiblePoison(&*IP))
289 return &*IP;
290 if (IP == BlockBegin) break;
291 }
292 }
293
294 // Save the original insertion point so we can restore it when we're done.
295 DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
296 SCEVInsertPointGuard Guard(Builder, this);
297
298 if (IsSafeToHoist) {
299 // Move the insertion point out of as many loops as we can.
300 while (const Loop *L = SE.LI.getLoopFor(BB: Builder.GetInsertBlock())) {
301 if (!L->isLoopInvariant(V: LHS) || !L->isLoopInvariant(V: RHS)) break;
302 BasicBlock *Preheader = L->getLoopPreheader();
303 if (!Preheader) break;
304
305 // Ok, move up a level.
306 Builder.SetInsertPoint(Preheader->getTerminator());
307 }
308 }
309
310 // If we haven't found this binop, insert it.
311 // TODO: Use the Builder, which will make CreateBinOp below fold with
312 // InstSimplifyFolder.
313 Instruction *BO = Builder.Insert(I: BinaryOperator::Create(Op: Opcode, S1: LHS, S2: RHS));
314 BO->setDebugLoc(Loc);
315 if (Flags & SCEV::FlagNUW)
316 BO->setHasNoUnsignedWrap();
317 if (Flags & SCEV::FlagNSW)
318 BO->setHasNoSignedWrap();
319
320 return BO;
321}
322
323/// expandAddToGEP - Expand an addition expression with a pointer type into
324/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
325/// BasicAliasAnalysis and other passes analyze the result. See the rules
326/// for getelementptr vs. inttoptr in
327/// http://llvm.org/docs/LangRef.html#pointeraliasing
328/// for details.
329///
330/// Design note: The correctness of using getelementptr here depends on
331/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
332/// they may introduce pointer arithmetic which may not be safely converted
333/// into getelementptr.
334///
335/// Design note: It might seem desirable for this function to be more
336/// loop-aware. If some of the indices are loop-invariant while others
337/// aren't, it might seem desirable to emit multiple GEPs, keeping the
338/// loop-invariant portions of the overall computation outside the loop.
339/// However, there are a few reasons this is not done here. Hoisting simple
340/// arithmetic is a low-level optimization that often isn't very
341/// important until late in the optimization process. In fact, passes
342/// like InstructionCombining will combine GEPs, even if it means
343/// pushing loop-invariant computation down into loops, so even if the
344/// GEPs were split here, the work would quickly be undone. The
345/// LoopStrengthReduction pass, which is usually run quite late (and
346/// after the last InstructionCombining pass), takes care of hoisting
347/// loop-invariant portions of expressions, after considering what
348/// can be folded using target addressing modes.
349///
350Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Value *V) {
351 assert(!isa<Instruction>(V) ||
352 SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
353
354 Value *Idx = expand(S: Offset);
355
356 // Fold a GEP with constant operands.
357 if (Constant *CLHS = dyn_cast<Constant>(Val: V))
358 if (Constant *CRHS = dyn_cast<Constant>(Val: Idx))
359 return Builder.CreatePtrAdd(Ptr: CLHS, Offset: CRHS);
360
361 // Do a quick scan to see if we have this GEP nearby. If so, reuse it.
362 unsigned ScanLimit = 6;
363 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
364 // Scanning starts from the last instruction before the insertion point.
365 BasicBlock::iterator IP = Builder.GetInsertPoint();
366 if (IP != BlockBegin) {
367 --IP;
368 for (; ScanLimit; --IP, --ScanLimit) {
369 // Don't count dbg.value against the ScanLimit, to avoid perturbing the
370 // generated code.
371 if (isa<DbgInfoIntrinsic>(Val: IP))
372 ScanLimit++;
373 if (IP->getOpcode() == Instruction::GetElementPtr &&
374 IP->getOperand(i: 0) == V && IP->getOperand(i: 1) == Idx &&
375 cast<GEPOperator>(Val: &*IP)->getSourceElementType() ==
376 Builder.getInt8Ty())
377 return &*IP;
378 if (IP == BlockBegin) break;
379 }
380 }
381
382 // Save the original insertion point so we can restore it when we're done.
383 SCEVInsertPointGuard Guard(Builder, this);
384
385 // Move the insertion point out of as many loops as we can.
386 while (const Loop *L = SE.LI.getLoopFor(BB: Builder.GetInsertBlock())) {
387 if (!L->isLoopInvariant(V) || !L->isLoopInvariant(V: Idx)) break;
388 BasicBlock *Preheader = L->getLoopPreheader();
389 if (!Preheader) break;
390
391 // Ok, move up a level.
392 Builder.SetInsertPoint(Preheader->getTerminator());
393 }
394
395 // Emit a GEP.
396 return Builder.CreatePtrAdd(Ptr: V, Offset: Idx, Name: "scevgep");
397}
398
399/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
400/// SCEV expansion. If they are nested, this is the most nested. If they are
401/// neighboring, pick the later.
402static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
403 DominatorTree &DT) {
404 if (!A) return B;
405 if (!B) return A;
406 if (A->contains(L: B)) return B;
407 if (B->contains(L: A)) return A;
408 if (DT.dominates(A: A->getHeader(), B: B->getHeader())) return B;
409 if (DT.dominates(A: B->getHeader(), B: A->getHeader())) return A;
410 return A; // Arbitrarily break the tie.
411}
412
413/// getRelevantLoop - Get the most relevant loop associated with the given
414/// expression, according to PickMostRelevantLoop.
415const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
416 // Test whether we've already computed the most relevant loop for this SCEV.
417 auto Pair = RelevantLoops.insert(KV: std::make_pair(x&: S, y: nullptr));
418 if (!Pair.second)
419 return Pair.first->second;
420
421 switch (S->getSCEVType()) {
422 case scConstant:
423 case scVScale:
424 return nullptr; // A constant has no relevant loops.
425 case scTruncate:
426 case scZeroExtend:
427 case scSignExtend:
428 case scPtrToInt:
429 case scAddExpr:
430 case scMulExpr:
431 case scUDivExpr:
432 case scAddRecExpr:
433 case scUMaxExpr:
434 case scSMaxExpr:
435 case scUMinExpr:
436 case scSMinExpr:
437 case scSequentialUMinExpr: {
438 const Loop *L = nullptr;
439 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S))
440 L = AR->getLoop();
441 for (const SCEV *Op : S->operands())
442 L = PickMostRelevantLoop(A: L, B: getRelevantLoop(S: Op), DT&: SE.DT);
443 return RelevantLoops[S] = L;
444 }
445 case scUnknown: {
446 const SCEVUnknown *U = cast<SCEVUnknown>(Val: S);
447 if (const Instruction *I = dyn_cast<Instruction>(Val: U->getValue()))
448 return Pair.first->second = SE.LI.getLoopFor(BB: I->getParent());
449 // A non-instruction has no relevant loops.
450 return nullptr;
451 }
452 case scCouldNotCompute:
453 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
454 }
455 llvm_unreachable("Unexpected SCEV type!");
456}
457
458namespace {
459
460/// LoopCompare - Compare loops by PickMostRelevantLoop.
461class LoopCompare {
462 DominatorTree &DT;
463public:
464 explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
465
466 bool operator()(std::pair<const Loop *, const SCEV *> LHS,
467 std::pair<const Loop *, const SCEV *> RHS) const {
468 // Keep pointer operands sorted at the end.
469 if (LHS.second->getType()->isPointerTy() !=
470 RHS.second->getType()->isPointerTy())
471 return LHS.second->getType()->isPointerTy();
472
473 // Compare loops with PickMostRelevantLoop.
474 if (LHS.first != RHS.first)
475 return PickMostRelevantLoop(A: LHS.first, B: RHS.first, DT) != LHS.first;
476
477 // If one operand is a non-constant negative and the other is not,
478 // put the non-constant negative on the right so that a sub can
479 // be used instead of a negate and add.
480 if (LHS.second->isNonConstantNegative()) {
481 if (!RHS.second->isNonConstantNegative())
482 return false;
483 } else if (RHS.second->isNonConstantNegative())
484 return true;
485
486 // Otherwise they are equivalent according to this comparison.
487 return false;
488 }
489};
490
491}
492
493Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
494 // Collect all the add operands in a loop, along with their associated loops.
495 // Iterate in reverse so that constants are emitted last, all else equal, and
496 // so that pointer operands are inserted first, which the code below relies on
497 // to form more involved GEPs.
498 SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
499 for (const SCEV *Op : reverse(C: S->operands()))
500 OpsAndLoops.push_back(Elt: std::make_pair(x: getRelevantLoop(S: Op), y&: Op));
501
502 // Sort by loop. Use a stable sort so that constants follow non-constants and
503 // pointer operands precede non-pointer operands.
504 llvm::stable_sort(Range&: OpsAndLoops, C: LoopCompare(SE.DT));
505
506 // Emit instructions to add all the operands. Hoist as much as possible
507 // out of loops, and form meaningful getelementptrs where possible.
508 Value *Sum = nullptr;
509 for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) {
510 const Loop *CurLoop = I->first;
511 const SCEV *Op = I->second;
512 if (!Sum) {
513 // This is the first operand. Just expand it.
514 Sum = expand(S: Op);
515 ++I;
516 continue;
517 }
518
519 assert(!Op->getType()->isPointerTy() && "Only first op can be pointer");
520 if (isa<PointerType>(Val: Sum->getType())) {
521 // The running sum expression is a pointer. Try to form a getelementptr
522 // at this level with that as the base.
523 SmallVector<const SCEV *, 4> NewOps;
524 for (; I != E && I->first == CurLoop; ++I) {
525 // If the operand is SCEVUnknown and not instructions, peek through
526 // it, to enable more of it to be folded into the GEP.
527 const SCEV *X = I->second;
528 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Val: X))
529 if (!isa<Instruction>(Val: U->getValue()))
530 X = SE.getSCEV(V: U->getValue());
531 NewOps.push_back(Elt: X);
532 }
533 Sum = expandAddToGEP(Offset: SE.getAddExpr(Ops&: NewOps), V: Sum);
534 } else if (Op->isNonConstantNegative()) {
535 // Instead of doing a negate and add, just do a subtract.
536 Value *W = expand(S: SE.getNegativeSCEV(V: Op));
537 Sum = InsertBinop(Opcode: Instruction::Sub, LHS: Sum, RHS: W, Flags: SCEV::FlagAnyWrap,
538 /*IsSafeToHoist*/ true);
539 ++I;
540 } else {
541 // A simple add.
542 Value *W = expand(S: Op);
543 // Canonicalize a constant to the RHS.
544 if (isa<Constant>(Val: Sum))
545 std::swap(a&: Sum, b&: W);
546 Sum = InsertBinop(Opcode: Instruction::Add, LHS: Sum, RHS: W, Flags: S->getNoWrapFlags(),
547 /*IsSafeToHoist*/ true);
548 ++I;
549 }
550 }
551
552 return Sum;
553}
554
555Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
556 Type *Ty = S->getType();
557
558 // Collect all the mul operands in a loop, along with their associated loops.
559 // Iterate in reverse so that constants are emitted last, all else equal.
560 SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
561 for (const SCEV *Op : reverse(C: S->operands()))
562 OpsAndLoops.push_back(Elt: std::make_pair(x: getRelevantLoop(S: Op), y&: Op));
563
564 // Sort by loop. Use a stable sort so that constants follow non-constants.
565 llvm::stable_sort(Range&: OpsAndLoops, C: LoopCompare(SE.DT));
566
567 // Emit instructions to mul all the operands. Hoist as much as possible
568 // out of loops.
569 Value *Prod = nullptr;
570 auto I = OpsAndLoops.begin();
571
572 // Expand the calculation of X pow N in the following manner:
573 // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
574 // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK).
575 const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops]() {
576 auto E = I;
577 // Calculate how many times the same operand from the same loop is included
578 // into this power.
579 uint64_t Exponent = 0;
580 const uint64_t MaxExponent = UINT64_MAX >> 1;
581 // No one sane will ever try to calculate such huge exponents, but if we
582 // need this, we stop on UINT64_MAX / 2 because we need to exit the loop
583 // below when the power of 2 exceeds our Exponent, and we want it to be
584 // 1u << 31 at most to not deal with unsigned overflow.
585 while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) {
586 ++Exponent;
587 ++E;
588 }
589 assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?");
590
591 // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
592 // that are needed into the result.
593 Value *P = expand(S: I->second);
594 Value *Result = nullptr;
595 if (Exponent & 1)
596 Result = P;
597 for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) {
598 P = InsertBinop(Opcode: Instruction::Mul, LHS: P, RHS: P, Flags: SCEV::FlagAnyWrap,
599 /*IsSafeToHoist*/ true);
600 if (Exponent & BinExp)
601 Result = Result ? InsertBinop(Opcode: Instruction::Mul, LHS: Result, RHS: P,
602 Flags: SCEV::FlagAnyWrap,
603 /*IsSafeToHoist*/ true)
604 : P;
605 }
606
607 I = E;
608 assert(Result && "Nothing was expanded?");
609 return Result;
610 };
611
612 while (I != OpsAndLoops.end()) {
613 if (!Prod) {
614 // This is the first operand. Just expand it.
615 Prod = ExpandOpBinPowN();
616 } else if (I->second->isAllOnesValue()) {
617 // Instead of doing a multiply by negative one, just do a negate.
618 Prod = InsertBinop(Opcode: Instruction::Sub, LHS: Constant::getNullValue(Ty), RHS: Prod,
619 Flags: SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
620 ++I;
621 } else {
622 // A simple mul.
623 Value *W = ExpandOpBinPowN();
624 // Canonicalize a constant to the RHS.
625 if (isa<Constant>(Val: Prod)) std::swap(a&: Prod, b&: W);
626 const APInt *RHS;
627 if (match(V: W, P: m_Power2(V&: RHS))) {
628 // Canonicalize Prod*(1<<C) to Prod<<C.
629 assert(!Ty->isVectorTy() && "vector types are not SCEVable");
630 auto NWFlags = S->getNoWrapFlags();
631 // clear nsw flag if shl will produce poison value.
632 if (RHS->logBase2() == RHS->getBitWidth() - 1)
633 NWFlags = ScalarEvolution::clearFlags(Flags: NWFlags, OffFlags: SCEV::FlagNSW);
634 Prod = InsertBinop(Opcode: Instruction::Shl, LHS: Prod,
635 RHS: ConstantInt::get(Ty, V: RHS->logBase2()), Flags: NWFlags,
636 /*IsSafeToHoist*/ true);
637 } else {
638 Prod = InsertBinop(Opcode: Instruction::Mul, LHS: Prod, RHS: W, Flags: S->getNoWrapFlags(),
639 /*IsSafeToHoist*/ true);
640 }
641 }
642 }
643
644 return Prod;
645}
646
647Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
648 Value *LHS = expand(S: S->getLHS());
649 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Val: S->getRHS())) {
650 const APInt &RHS = SC->getAPInt();
651 if (RHS.isPowerOf2())
652 return InsertBinop(Opcode: Instruction::LShr, LHS,
653 RHS: ConstantInt::get(Ty: SC->getType(), V: RHS.logBase2()),
654 Flags: SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
655 }
656
657 Value *RHS = expand(S: S->getRHS());
658 return InsertBinop(Opcode: Instruction::UDiv, LHS, RHS, Flags: SCEV::FlagAnyWrap,
659 /*IsSafeToHoist*/ SE.isKnownNonZero(S: S->getRHS()));
660}
661
662/// Determine if this is a well-behaved chain of instructions leading back to
663/// the PHI. If so, it may be reused by expanded expressions.
664bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
665 const Loop *L) {
666 if (IncV->getNumOperands() == 0 || isa<PHINode>(Val: IncV) ||
667 (isa<CastInst>(Val: IncV) && !isa<BitCastInst>(Val: IncV)))
668 return false;
669 // If any of the operands don't dominate the insert position, bail.
670 // Addrec operands are always loop-invariant, so this can only happen
671 // if there are instructions which haven't been hoisted.
672 if (L == IVIncInsertLoop) {
673 for (Use &Op : llvm::drop_begin(RangeOrContainer: IncV->operands()))
674 if (Instruction *OInst = dyn_cast<Instruction>(Val&: Op))
675 if (!SE.DT.dominates(Def: OInst, User: IVIncInsertPos))
676 return false;
677 }
678 // Advance to the next instruction.
679 IncV = dyn_cast<Instruction>(Val: IncV->getOperand(i: 0));
680 if (!IncV)
681 return false;
682
683 if (IncV->mayHaveSideEffects())
684 return false;
685
686 if (IncV == PN)
687 return true;
688
689 return isNormalAddRecExprPHI(PN, IncV, L);
690}
691
692/// getIVIncOperand returns an induction variable increment's induction
693/// variable operand.
694///
695/// If allowScale is set, any type of GEP is allowed as long as the nonIV
696/// operands dominate InsertPos.
697///
698/// If allowScale is not set, ensure that a GEP increment conforms to one of the
699/// simple patterns generated by getAddRecExprPHILiterally and
700/// expandAddtoGEP. If the pattern isn't recognized, return NULL.
701Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
702 Instruction *InsertPos,
703 bool allowScale) {
704 if (IncV == InsertPos)
705 return nullptr;
706
707 switch (IncV->getOpcode()) {
708 default:
709 return nullptr;
710 // Check for a simple Add/Sub or GEP of a loop invariant step.
711 case Instruction::Add:
712 case Instruction::Sub: {
713 Instruction *OInst = dyn_cast<Instruction>(Val: IncV->getOperand(i: 1));
714 if (!OInst || SE.DT.dominates(Def: OInst, User: InsertPos))
715 return dyn_cast<Instruction>(Val: IncV->getOperand(i: 0));
716 return nullptr;
717 }
718 case Instruction::BitCast:
719 return dyn_cast<Instruction>(Val: IncV->getOperand(i: 0));
720 case Instruction::GetElementPtr:
721 for (Use &U : llvm::drop_begin(RangeOrContainer: IncV->operands())) {
722 if (isa<Constant>(Val: U))
723 continue;
724 if (Instruction *OInst = dyn_cast<Instruction>(Val&: U)) {
725 if (!SE.DT.dominates(Def: OInst, User: InsertPos))
726 return nullptr;
727 }
728 if (allowScale) {
729 // allow any kind of GEP as long as it can be hoisted.
730 continue;
731 }
732 // GEPs produced by SCEVExpander use i8 element type.
733 if (!cast<GEPOperator>(Val: IncV)->getSourceElementType()->isIntegerTy(Bitwidth: 8))
734 return nullptr;
735 break;
736 }
737 return dyn_cast<Instruction>(Val: IncV->getOperand(i: 0));
738 }
739}
740
741/// If the insert point of the current builder or any of the builders on the
742/// stack of saved builders has 'I' as its insert point, update it to point to
743/// the instruction after 'I'. This is intended to be used when the instruction
744/// 'I' is being moved. If this fixup is not done and 'I' is moved to a
745/// different block, the inconsistent insert point (with a mismatched
746/// Instruction and Block) can lead to an instruction being inserted in a block
747/// other than its parent.
748void SCEVExpander::fixupInsertPoints(Instruction *I) {
749 BasicBlock::iterator It(*I);
750 BasicBlock::iterator NewInsertPt = std::next(x: It);
751 if (Builder.GetInsertPoint() == It)
752 Builder.SetInsertPoint(&*NewInsertPt);
753 for (auto *InsertPtGuard : InsertPointGuards)
754 if (InsertPtGuard->GetInsertPoint() == It)
755 InsertPtGuard->SetInsertPoint(NewInsertPt);
756}
757
758/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
759/// it available to other uses in this loop. Recursively hoist any operands,
760/// until we reach a value that dominates InsertPos.
761bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos,
762 bool RecomputePoisonFlags) {
763 auto FixupPoisonFlags = [this](Instruction *I) {
764 // Drop flags that are potentially inferred from old context and infer flags
765 // in new context.
766 rememberFlags(I);
767 I->dropPoisonGeneratingFlags();
768 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: I))
769 if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
770 auto *BO = cast<BinaryOperator>(Val: I);
771 BO->setHasNoUnsignedWrap(
772 ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNUW) == SCEV::FlagNUW);
773 BO->setHasNoSignedWrap(
774 ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNSW) == SCEV::FlagNSW);
775 }
776 };
777
778 if (SE.DT.dominates(Def: IncV, User: InsertPos)) {
779 if (RecomputePoisonFlags)
780 FixupPoisonFlags(IncV);
781 return true;
782 }
783
784 // InsertPos must itself dominate IncV so that IncV's new position satisfies
785 // its existing users.
786 if (isa<PHINode>(Val: InsertPos) ||
787 !SE.DT.dominates(A: InsertPos->getParent(), B: IncV->getParent()))
788 return false;
789
790 if (!SE.LI.movementPreservesLCSSAForm(Inst: IncV, NewLoc: InsertPos))
791 return false;
792
793 // Check that the chain of IV operands leading back to Phi can be hoisted.
794 SmallVector<Instruction*, 4> IVIncs;
795 for(;;) {
796 Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true);
797 if (!Oper)
798 return false;
799 // IncV is safe to hoist.
800 IVIncs.push_back(Elt: IncV);
801 IncV = Oper;
802 if (SE.DT.dominates(Def: IncV, User: InsertPos))
803 break;
804 }
805 for (Instruction *I : llvm::reverse(C&: IVIncs)) {
806 fixupInsertPoints(I);
807 I->moveBefore(MovePos: InsertPos);
808 if (RecomputePoisonFlags)
809 FixupPoisonFlags(I);
810 }
811 return true;
812}
813
814bool SCEVExpander::canReuseFlagsFromOriginalIVInc(PHINode *OrigPhi,
815 PHINode *WidePhi,
816 Instruction *OrigInc,
817 Instruction *WideInc) {
818 return match(V: OrigInc, P: m_c_BinOp(L: m_Specific(V: OrigPhi), R: m_Value())) &&
819 match(V: WideInc, P: m_c_BinOp(L: m_Specific(V: WidePhi), R: m_Value())) &&
820 OrigInc->getOpcode() == WideInc->getOpcode();
821}
822
823/// Determine if this cyclic phi is in a form that would have been generated by
824/// LSR. We don't care if the phi was actually expanded in this pass, as long
825/// as it is in a low-cost form, for example, no implied multiplication. This
826/// should match any patterns generated by getAddRecExprPHILiterally and
827/// expandAddtoGEP.
828bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
829 const Loop *L) {
830 for(Instruction *IVOper = IncV;
831 (IVOper = getIVIncOperand(IncV: IVOper, InsertPos: L->getLoopPreheader()->getTerminator(),
832 /*allowScale=*/false));) {
833 if (IVOper == PN)
834 return true;
835 }
836 return false;
837}
838
839/// expandIVInc - Expand an IV increment at Builder's current InsertPos.
840/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
841/// need to materialize IV increments elsewhere to handle difficult situations.
842Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
843 bool useSubtract) {
844 Value *IncV;
845 // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
846 if (PN->getType()->isPointerTy()) {
847 // TODO: Change name to IVName.iv.next.
848 IncV = Builder.CreatePtrAdd(Ptr: PN, Offset: StepV, Name: "scevgep");
849 } else {
850 IncV = useSubtract ?
851 Builder.CreateSub(LHS: PN, RHS: StepV, Name: Twine(IVName) + ".iv.next") :
852 Builder.CreateAdd(LHS: PN, RHS: StepV, Name: Twine(IVName) + ".iv.next");
853 }
854 return IncV;
855}
856
857/// Check whether we can cheaply express the requested SCEV in terms of
858/// the available PHI SCEV by truncation and/or inversion of the step.
859static bool canBeCheaplyTransformed(ScalarEvolution &SE,
860 const SCEVAddRecExpr *Phi,
861 const SCEVAddRecExpr *Requested,
862 bool &InvertStep) {
863 // We can't transform to match a pointer PHI.
864 Type *PhiTy = Phi->getType();
865 Type *RequestedTy = Requested->getType();
866 if (PhiTy->isPointerTy() || RequestedTy->isPointerTy())
867 return false;
868
869 if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
870 return false;
871
872 // Try truncate it if necessary.
873 Phi = dyn_cast<SCEVAddRecExpr>(Val: SE.getTruncateOrNoop(V: Phi, Ty: RequestedTy));
874 if (!Phi)
875 return false;
876
877 // Check whether truncation will help.
878 if (Phi == Requested) {
879 InvertStep = false;
880 return true;
881 }
882
883 // Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
884 if (SE.getMinusSCEV(LHS: Requested->getStart(), RHS: Requested) == Phi) {
885 InvertStep = true;
886 return true;
887 }
888
889 return false;
890}
891
892static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
893 if (!isa<IntegerType>(Val: AR->getType()))
894 return false;
895
896 unsigned BitWidth = cast<IntegerType>(Val: AR->getType())->getBitWidth();
897 Type *WideTy = IntegerType::get(C&: AR->getType()->getContext(), NumBits: BitWidth * 2);
898 const SCEV *Step = AR->getStepRecurrence(SE);
899 const SCEV *OpAfterExtend = SE.getAddExpr(LHS: SE.getSignExtendExpr(Op: Step, Ty: WideTy),
900 RHS: SE.getSignExtendExpr(Op: AR, Ty: WideTy));
901 const SCEV *ExtendAfterOp =
902 SE.getSignExtendExpr(Op: SE.getAddExpr(LHS: AR, RHS: Step), Ty: WideTy);
903 return ExtendAfterOp == OpAfterExtend;
904}
905
906static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
907 if (!isa<IntegerType>(Val: AR->getType()))
908 return false;
909
910 unsigned BitWidth = cast<IntegerType>(Val: AR->getType())->getBitWidth();
911 Type *WideTy = IntegerType::get(C&: AR->getType()->getContext(), NumBits: BitWidth * 2);
912 const SCEV *Step = AR->getStepRecurrence(SE);
913 const SCEV *OpAfterExtend = SE.getAddExpr(LHS: SE.getZeroExtendExpr(Op: Step, Ty: WideTy),
914 RHS: SE.getZeroExtendExpr(Op: AR, Ty: WideTy));
915 const SCEV *ExtendAfterOp =
916 SE.getZeroExtendExpr(Op: SE.getAddExpr(LHS: AR, RHS: Step), Ty: WideTy);
917 return ExtendAfterOp == OpAfterExtend;
918}
919
920/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
921/// the base addrec, which is the addrec without any non-loop-dominating
922/// values, and return the PHI.
923PHINode *
924SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
925 const Loop *L, Type *&TruncTy,
926 bool &InvertStep) {
927 assert((!IVIncInsertLoop || IVIncInsertPos) &&
928 "Uninitialized insert position");
929
930 // Reuse a previously-inserted PHI, if present.
931 BasicBlock *LatchBlock = L->getLoopLatch();
932 if (LatchBlock) {
933 PHINode *AddRecPhiMatch = nullptr;
934 Instruction *IncV = nullptr;
935 TruncTy = nullptr;
936 InvertStep = false;
937
938 // Only try partially matching scevs that need truncation and/or
939 // step-inversion if we know this loop is outside the current loop.
940 bool TryNonMatchingSCEV =
941 IVIncInsertLoop &&
942 SE.DT.properlyDominates(A: LatchBlock, B: IVIncInsertLoop->getHeader());
943
944 for (PHINode &PN : L->getHeader()->phis()) {
945 if (!SE.isSCEVable(Ty: PN.getType()))
946 continue;
947
948 // We should not look for a incomplete PHI. Getting SCEV for a incomplete
949 // PHI has no meaning at all.
950 if (!PN.isComplete()) {
951 SCEV_DEBUG_WITH_TYPE(
952 DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n");
953 continue;
954 }
955
956 const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(Val: SE.getSCEV(V: &PN));
957 if (!PhiSCEV)
958 continue;
959
960 bool IsMatchingSCEV = PhiSCEV == Normalized;
961 // We only handle truncation and inversion of phi recurrences for the
962 // expanded expression if the expanded expression's loop dominates the
963 // loop we insert to. Check now, so we can bail out early.
964 if (!IsMatchingSCEV && !TryNonMatchingSCEV)
965 continue;
966
967 // TODO: this possibly can be reworked to avoid this cast at all.
968 Instruction *TempIncV =
969 dyn_cast<Instruction>(Val: PN.getIncomingValueForBlock(BB: LatchBlock));
970 if (!TempIncV)
971 continue;
972
973 // Check whether we can reuse this PHI node.
974 if (LSRMode) {
975 if (!isExpandedAddRecExprPHI(PN: &PN, IncV: TempIncV, L))
976 continue;
977 } else {
978 if (!isNormalAddRecExprPHI(PN: &PN, IncV: TempIncV, L))
979 continue;
980 }
981
982 // Stop if we have found an exact match SCEV.
983 if (IsMatchingSCEV) {
984 IncV = TempIncV;
985 TruncTy = nullptr;
986 InvertStep = false;
987 AddRecPhiMatch = &PN;
988 break;
989 }
990
991 // Try whether the phi can be translated into the requested form
992 // (truncated and/or offset by a constant).
993 if ((!TruncTy || InvertStep) &&
994 canBeCheaplyTransformed(SE, Phi: PhiSCEV, Requested: Normalized, InvertStep)) {
995 // Record the phi node. But don't stop we might find an exact match
996 // later.
997 AddRecPhiMatch = &PN;
998 IncV = TempIncV;
999 TruncTy = Normalized->getType();
1000 }
1001 }
1002
1003 if (AddRecPhiMatch) {
1004 // Ok, the add recurrence looks usable.
1005 // Remember this PHI, even in post-inc mode.
1006 InsertedValues.insert(V: AddRecPhiMatch);
1007 // Remember the increment.
1008 rememberInstruction(I: IncV);
1009 // Those values were not actually inserted but re-used.
1010 ReusedValues.insert(Ptr: AddRecPhiMatch);
1011 ReusedValues.insert(Ptr: IncV);
1012 return AddRecPhiMatch;
1013 }
1014 }
1015
1016 // Save the original insertion point so we can restore it when we're done.
1017 SCEVInsertPointGuard Guard(Builder, this);
1018
1019 // Another AddRec may need to be recursively expanded below. For example, if
1020 // this AddRec is quadratic, the StepV may itself be an AddRec in this
1021 // loop. Remove this loop from the PostIncLoops set before expanding such
1022 // AddRecs. Otherwise, we cannot find a valid position for the step
1023 // (i.e. StepV can never dominate its loop header). Ideally, we could do
1024 // SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
1025 // so it's not worth implementing SmallPtrSet::swap.
1026 PostIncLoopSet SavedPostIncLoops = PostIncLoops;
1027 PostIncLoops.clear();
1028
1029 // Expand code for the start value into the loop preheader.
1030 assert(L->getLoopPreheader() &&
1031 "Can't expand add recurrences without a loop preheader!");
1032 Value *StartV =
1033 expand(S: Normalized->getStart(), I: L->getLoopPreheader()->getTerminator());
1034
1035 // StartV must have been be inserted into L's preheader to dominate the new
1036 // phi.
1037 assert(!isa<Instruction>(StartV) ||
1038 SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
1039 L->getHeader()));
1040
1041 // Expand code for the step value. Do this before creating the PHI so that PHI
1042 // reuse code doesn't see an incomplete PHI.
1043 const SCEV *Step = Normalized->getStepRecurrence(SE);
1044 Type *ExpandTy = Normalized->getType();
1045 // If the stride is negative, insert a sub instead of an add for the increment
1046 // (unless it's a constant, because subtracts of constants are canonicalized
1047 // to adds).
1048 bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
1049 if (useSubtract)
1050 Step = SE.getNegativeSCEV(V: Step);
1051 // Expand the step somewhere that dominates the loop header.
1052 Value *StepV = expand(S: Step, I: L->getHeader()->getFirstInsertionPt());
1053
1054 // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
1055 // we actually do emit an addition. It does not apply if we emit a
1056 // subtraction.
1057 bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, AR: Normalized);
1058 bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, AR: Normalized);
1059
1060 // Create the PHI.
1061 BasicBlock *Header = L->getHeader();
1062 Builder.SetInsertPoint(TheBB: Header, IP: Header->begin());
1063 PHINode *PN =
1064 Builder.CreatePHI(Ty: ExpandTy, NumReservedValues: pred_size(BB: Header), Name: Twine(IVName) + ".iv");
1065
1066 // Create the step instructions and populate the PHI.
1067 for (BasicBlock *Pred : predecessors(BB: Header)) {
1068 // Add a start value.
1069 if (!L->contains(BB: Pred)) {
1070 PN->addIncoming(V: StartV, BB: Pred);
1071 continue;
1072 }
1073
1074 // Create a step value and add it to the PHI.
1075 // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
1076 // instructions at IVIncInsertPos.
1077 Instruction *InsertPos = L == IVIncInsertLoop ?
1078 IVIncInsertPos : Pred->getTerminator();
1079 Builder.SetInsertPoint(InsertPos);
1080 Value *IncV = expandIVInc(PN, StepV, L, useSubtract);
1081
1082 if (isa<OverflowingBinaryOperator>(Val: IncV)) {
1083 if (IncrementIsNUW)
1084 cast<BinaryOperator>(Val: IncV)->setHasNoUnsignedWrap();
1085 if (IncrementIsNSW)
1086 cast<BinaryOperator>(Val: IncV)->setHasNoSignedWrap();
1087 }
1088 PN->addIncoming(V: IncV, BB: Pred);
1089 }
1090
1091 // After expanding subexpressions, restore the PostIncLoops set so the caller
1092 // can ensure that IVIncrement dominates the current uses.
1093 PostIncLoops = SavedPostIncLoops;
1094
1095 // Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most
1096 // effective when we are able to use an IV inserted here, so record it.
1097 InsertedValues.insert(V: PN);
1098 InsertedIVs.push_back(Elt: PN);
1099 return PN;
1100}
1101
1102Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
1103 const Loop *L = S->getLoop();
1104
1105 // Determine a normalized form of this expression, which is the expression
1106 // before any post-inc adjustment is made.
1107 const SCEVAddRecExpr *Normalized = S;
1108 if (PostIncLoops.count(Ptr: L)) {
1109 PostIncLoopSet Loops;
1110 Loops.insert(Ptr: L);
1111 Normalized = cast<SCEVAddRecExpr>(
1112 Val: normalizeForPostIncUse(S, Loops, SE, /*CheckInvertible=*/false));
1113 }
1114
1115 [[maybe_unused]] const SCEV *Start = Normalized->getStart();
1116 const SCEV *Step = Normalized->getStepRecurrence(SE);
1117 assert(SE.properlyDominates(Start, L->getHeader()) &&
1118 "Start does not properly dominate loop header");
1119 assert(SE.dominates(Step, L->getHeader()) && "Step not dominate loop header");
1120
1121 // In some cases, we decide to reuse an existing phi node but need to truncate
1122 // it and/or invert the step.
1123 Type *TruncTy = nullptr;
1124 bool InvertStep = false;
1125 PHINode *PN = getAddRecExprPHILiterally(Normalized, L, TruncTy, InvertStep);
1126
1127 // Accommodate post-inc mode, if necessary.
1128 Value *Result;
1129 if (!PostIncLoops.count(Ptr: L))
1130 Result = PN;
1131 else {
1132 // In PostInc mode, use the post-incremented value.
1133 BasicBlock *LatchBlock = L->getLoopLatch();
1134 assert(LatchBlock && "PostInc mode requires a unique loop latch!");
1135 Result = PN->getIncomingValueForBlock(BB: LatchBlock);
1136
1137 // We might be introducing a new use of the post-inc IV that is not poison
1138 // safe, in which case we should drop poison generating flags. Only keep
1139 // those flags for which SCEV has proven that they always hold.
1140 if (isa<OverflowingBinaryOperator>(Val: Result)) {
1141 auto *I = cast<Instruction>(Val: Result);
1142 if (!S->hasNoUnsignedWrap())
1143 I->setHasNoUnsignedWrap(false);
1144 if (!S->hasNoSignedWrap())
1145 I->setHasNoSignedWrap(false);
1146 }
1147
1148 // For an expansion to use the postinc form, the client must call
1149 // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
1150 // or dominated by IVIncInsertPos.
1151 if (isa<Instruction>(Val: Result) &&
1152 !SE.DT.dominates(Def: cast<Instruction>(Val: Result),
1153 User: &*Builder.GetInsertPoint())) {
1154 // The induction variable's postinc expansion does not dominate this use.
1155 // IVUsers tries to prevent this case, so it is rare. However, it can
1156 // happen when an IVUser outside the loop is not dominated by the latch
1157 // block. Adjusting IVIncInsertPos before expansion begins cannot handle
1158 // all cases. Consider a phi outside whose operand is replaced during
1159 // expansion with the value of the postinc user. Without fundamentally
1160 // changing the way postinc users are tracked, the only remedy is
1161 // inserting an extra IV increment. StepV might fold into PostLoopOffset,
1162 // but hopefully expandCodeFor handles that.
1163 bool useSubtract =
1164 !S->getType()->isPointerTy() && Step->isNonConstantNegative();
1165 if (useSubtract)
1166 Step = SE.getNegativeSCEV(V: Step);
1167 Value *StepV;
1168 {
1169 // Expand the step somewhere that dominates the loop header.
1170 SCEVInsertPointGuard Guard(Builder, this);
1171 StepV = expand(S: Step, I: L->getHeader()->getFirstInsertionPt());
1172 }
1173 Result = expandIVInc(PN, StepV, L, useSubtract);
1174 }
1175 }
1176
1177 // We have decided to reuse an induction variable of a dominating loop. Apply
1178 // truncation and/or inversion of the step.
1179 if (TruncTy) {
1180 // Truncate the result.
1181 if (TruncTy != Result->getType())
1182 Result = Builder.CreateTrunc(V: Result, DestTy: TruncTy);
1183
1184 // Invert the result.
1185 if (InvertStep)
1186 Result = Builder.CreateSub(LHS: expand(S: Normalized->getStart()), RHS: Result);
1187 }
1188
1189 return Result;
1190}
1191
1192Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
1193 // In canonical mode we compute the addrec as an expression of a canonical IV
1194 // using evaluateAtIteration and expand the resulting SCEV expression. This
1195 // way we avoid introducing new IVs to carry on the computation of the addrec
1196 // throughout the loop.
1197 //
1198 // For nested addrecs evaluateAtIteration might need a canonical IV of a
1199 // type wider than the addrec itself. Emitting a canonical IV of the
1200 // proper type might produce non-legal types, for example expanding an i64
1201 // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall
1202 // back to non-canonical mode for nested addrecs.
1203 if (!CanonicalMode || (S->getNumOperands() > 2))
1204 return expandAddRecExprLiterally(S);
1205
1206 Type *Ty = SE.getEffectiveSCEVType(Ty: S->getType());
1207 const Loop *L = S->getLoop();
1208
1209 // First check for an existing canonical IV in a suitable type.
1210 PHINode *CanonicalIV = nullptr;
1211 if (PHINode *PN = L->getCanonicalInductionVariable())
1212 if (SE.getTypeSizeInBits(Ty: PN->getType()) >= SE.getTypeSizeInBits(Ty))
1213 CanonicalIV = PN;
1214
1215 // Rewrite an AddRec in terms of the canonical induction variable, if
1216 // its type is more narrow.
1217 if (CanonicalIV &&
1218 SE.getTypeSizeInBits(Ty: CanonicalIV->getType()) > SE.getTypeSizeInBits(Ty) &&
1219 !S->getType()->isPointerTy()) {
1220 SmallVector<const SCEV *, 4> NewOps(S->getNumOperands());
1221 for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
1222 NewOps[i] = SE.getAnyExtendExpr(Op: S->getOperand(i), Ty: CanonicalIV->getType());
1223 Value *V = expand(S: SE.getAddRecExpr(Operands&: NewOps, L: S->getLoop(),
1224 Flags: S->getNoWrapFlags(Mask: SCEV::FlagNW)));
1225 BasicBlock::iterator NewInsertPt =
1226 findInsertPointAfter(I: cast<Instruction>(Val: V), MustDominate: &*Builder.GetInsertPoint());
1227 V = expand(S: SE.getTruncateExpr(Op: SE.getUnknown(V), Ty), I: NewInsertPt);
1228 return V;
1229 }
1230
1231 // {X,+,F} --> X + {0,+,F}
1232 if (!S->getStart()->isZero()) {
1233 if (isa<PointerType>(Val: S->getType())) {
1234 Value *StartV = expand(S: SE.getPointerBase(V: S));
1235 return expandAddToGEP(Offset: SE.removePointerBase(S), V: StartV);
1236 }
1237
1238 SmallVector<const SCEV *, 4> NewOps(S->operands());
1239 NewOps[0] = SE.getConstant(Ty, V: 0);
1240 const SCEV *Rest = SE.getAddRecExpr(Operands&: NewOps, L,
1241 Flags: S->getNoWrapFlags(Mask: SCEV::FlagNW));
1242
1243 // Just do a normal add. Pre-expand the operands to suppress folding.
1244 //
1245 // The LHS and RHS values are factored out of the expand call to make the
1246 // output independent of the argument evaluation order.
1247 const SCEV *AddExprLHS = SE.getUnknown(V: expand(S: S->getStart()));
1248 const SCEV *AddExprRHS = SE.getUnknown(V: expand(S: Rest));
1249 return expand(S: SE.getAddExpr(LHS: AddExprLHS, RHS: AddExprRHS));
1250 }
1251
1252 // If we don't yet have a canonical IV, create one.
1253 if (!CanonicalIV) {
1254 // Create and insert the PHI node for the induction variable in the
1255 // specified loop.
1256 BasicBlock *Header = L->getHeader();
1257 pred_iterator HPB = pred_begin(BB: Header), HPE = pred_end(BB: Header);
1258 CanonicalIV = PHINode::Create(Ty, NumReservedValues: std::distance(first: HPB, last: HPE), NameStr: "indvar");
1259 CanonicalIV->insertBefore(InsertPos: Header->begin());
1260 rememberInstruction(I: CanonicalIV);
1261
1262 SmallSet<BasicBlock *, 4> PredSeen;
1263 Constant *One = ConstantInt::get(Ty, V: 1);
1264 for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
1265 BasicBlock *HP = *HPI;
1266 if (!PredSeen.insert(Ptr: HP).second) {
1267 // There must be an incoming value for each predecessor, even the
1268 // duplicates!
1269 CanonicalIV->addIncoming(V: CanonicalIV->getIncomingValueForBlock(BB: HP), BB: HP);
1270 continue;
1271 }
1272
1273 if (L->contains(BB: HP)) {
1274 // Insert a unit add instruction right before the terminator
1275 // corresponding to the back-edge.
1276 Instruction *Add = BinaryOperator::CreateAdd(V1: CanonicalIV, V2: One,
1277 Name: "indvar.next",
1278 It: HP->getTerminator()->getIterator());
1279 Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
1280 rememberInstruction(I: Add);
1281 CanonicalIV->addIncoming(V: Add, BB: HP);
1282 } else {
1283 CanonicalIV->addIncoming(V: Constant::getNullValue(Ty), BB: HP);
1284 }
1285 }
1286 }
1287
1288 // {0,+,1} --> Insert a canonical induction variable into the loop!
1289 if (S->isAffine() && S->getOperand(i: 1)->isOne()) {
1290 assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
1291 "IVs with types different from the canonical IV should "
1292 "already have been handled!");
1293 return CanonicalIV;
1294 }
1295
1296 // {0,+,F} --> {0,+,1} * F
1297
1298 // If this is a simple linear addrec, emit it now as a special case.
1299 if (S->isAffine()) // {0,+,F} --> i*F
1300 return
1301 expand(S: SE.getTruncateOrNoop(
1302 V: SE.getMulExpr(LHS: SE.getUnknown(V: CanonicalIV),
1303 RHS: SE.getNoopOrAnyExtend(V: S->getOperand(i: 1),
1304 Ty: CanonicalIV->getType())),
1305 Ty));
1306
1307 // If this is a chain of recurrences, turn it into a closed form, using the
1308 // folders, then expandCodeFor the closed form. This allows the folders to
1309 // simplify the expression without having to build a bunch of special code
1310 // into this folder.
1311 const SCEV *IH = SE.getUnknown(V: CanonicalIV); // Get I as a "symbolic" SCEV.
1312
1313 // Promote S up to the canonical IV type, if the cast is foldable.
1314 const SCEV *NewS = S;
1315 const SCEV *Ext = SE.getNoopOrAnyExtend(V: S, Ty: CanonicalIV->getType());
1316 if (isa<SCEVAddRecExpr>(Val: Ext))
1317 NewS = Ext;
1318
1319 const SCEV *V = cast<SCEVAddRecExpr>(Val: NewS)->evaluateAtIteration(It: IH, SE);
1320
1321 // Truncate the result down to the original type, if needed.
1322 const SCEV *T = SE.getTruncateOrNoop(V, Ty);
1323 return expand(S: T);
1324}
1325
1326Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) {
1327 Value *V = expand(S: S->getOperand());
1328 return ReuseOrCreateCast(V, Ty: S->getType(), Op: CastInst::PtrToInt,
1329 IP: GetOptimalInsertionPointForCastOf(V));
1330}
1331
1332Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
1333 Value *V = expand(S: S->getOperand());
1334 return Builder.CreateTrunc(V, DestTy: S->getType());
1335}
1336
1337Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
1338 Value *V = expand(S: S->getOperand());
1339 return Builder.CreateZExt(V, DestTy: S->getType(), Name: "",
1340 IsNonNeg: SE.isKnownNonNegative(S: S->getOperand()));
1341}
1342
1343Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
1344 Value *V = expand(S: S->getOperand());
1345 return Builder.CreateSExt(V, DestTy: S->getType());
1346}
1347
1348Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S,
1349 Intrinsic::ID IntrinID, Twine Name,
1350 bool IsSequential) {
1351 Value *LHS = expand(S: S->getOperand(i: S->getNumOperands() - 1));
1352 Type *Ty = LHS->getType();
1353 if (IsSequential)
1354 LHS = Builder.CreateFreeze(V: LHS);
1355 for (int i = S->getNumOperands() - 2; i >= 0; --i) {
1356 Value *RHS = expand(S: S->getOperand(i));
1357 if (IsSequential && i != 0)
1358 RHS = Builder.CreateFreeze(V: RHS);
1359 Value *Sel;
1360 if (Ty->isIntegerTy())
1361 Sel = Builder.CreateIntrinsic(ID: IntrinID, Types: {Ty}, Args: {LHS, RHS},
1362 /*FMFSource=*/nullptr, Name);
1363 else {
1364 Value *ICmp =
1365 Builder.CreateICmp(P: MinMaxIntrinsic::getPredicate(ID: IntrinID), LHS, RHS);
1366 Sel = Builder.CreateSelect(C: ICmp, True: LHS, False: RHS, Name);
1367 }
1368 LHS = Sel;
1369 }
1370 return LHS;
1371}
1372
1373Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
1374 return expandMinMaxExpr(S, Intrinsic::IntrinID: smax, Name: "smax");
1375}
1376
1377Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
1378 return expandMinMaxExpr(S, Intrinsic::IntrinID: umax, Name: "umax");
1379}
1380
1381Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
1382 return expandMinMaxExpr(S, Intrinsic::IntrinID: smin, Name: "smin");
1383}
1384
1385Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
1386 return expandMinMaxExpr(S, Intrinsic::IntrinID: umin, Name: "umin");
1387}
1388
1389Value *SCEVExpander::visitSequentialUMinExpr(const SCEVSequentialUMinExpr *S) {
1390 return expandMinMaxExpr(S, Intrinsic::IntrinID: umin, Name: "umin", /*IsSequential*/true);
1391}
1392
1393Value *SCEVExpander::visitVScale(const SCEVVScale *S) {
1394 return Builder.CreateVScale(Scaling: ConstantInt::get(Ty: S->getType(), V: 1));
1395}
1396
1397Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
1398 BasicBlock::iterator IP) {
1399 setInsertPoint(IP);
1400 Value *V = expandCodeFor(SH, Ty);
1401 return V;
1402}
1403
1404Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) {
1405 // Expand the code for this SCEV.
1406 Value *V = expand(S: SH);
1407
1408 if (Ty) {
1409 assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
1410 "non-trivial casts should be done with the SCEVs directly!");
1411 V = InsertNoopCastOfTo(V, Ty);
1412 }
1413 return V;
1414}
1415
1416Value *SCEVExpander::FindValueInExprValueMap(
1417 const SCEV *S, const Instruction *InsertPt,
1418 SmallVectorImpl<Instruction *> &DropPoisonGeneratingInsts) {
1419 // If the expansion is not in CanonicalMode, and the SCEV contains any
1420 // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
1421 if (!CanonicalMode && SE.containsAddRecurrence(S))
1422 return nullptr;
1423
1424 // If S is a constant, it may be worse to reuse an existing Value.
1425 if (isa<SCEVConstant>(Val: S))
1426 return nullptr;
1427
1428 for (Value *V : SE.getSCEVValues(S)) {
1429 Instruction *EntInst = dyn_cast<Instruction>(Val: V);
1430 if (!EntInst)
1431 continue;
1432
1433 // Choose a Value from the set which dominates the InsertPt.
1434 // InsertPt should be inside the Value's parent loop so as not to break
1435 // the LCSSA form.
1436 assert(EntInst->getFunction() == InsertPt->getFunction());
1437 if (S->getType() != V->getType() || !SE.DT.dominates(Def: EntInst, User: InsertPt) ||
1438 !(SE.LI.getLoopFor(BB: EntInst->getParent()) == nullptr ||
1439 SE.LI.getLoopFor(BB: EntInst->getParent())->contains(Inst: InsertPt)))
1440 continue;
1441
1442 // Make sure reusing the instruction is poison-safe.
1443 if (SE.canReuseInstruction(S, I: EntInst, DropPoisonGeneratingInsts))
1444 return V;
1445 DropPoisonGeneratingInsts.clear();
1446 }
1447 return nullptr;
1448}
1449
1450// The expansion of SCEV will either reuse a previous Value in ExprValueMap,
1451// or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
1452// and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
1453// literally, to prevent LSR's transformed SCEV from being reverted. Otherwise,
1454// the expansion will try to reuse Value from ExprValueMap, and only when it
1455// fails, expand the SCEV literally.
1456Value *SCEVExpander::expand(const SCEV *S) {
1457 // Compute an insertion point for this SCEV object. Hoist the instructions
1458 // as far out in the loop nest as possible.
1459 BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
1460
1461 // We can move insertion point only if there is no div or rem operations
1462 // otherwise we are risky to move it over the check for zero denominator.
1463 auto SafeToHoist = [](const SCEV *S) {
1464 return !SCEVExprContains(Root: S, Pred: [](const SCEV *S) {
1465 if (const auto *D = dyn_cast<SCEVUDivExpr>(Val: S)) {
1466 if (const auto *SC = dyn_cast<SCEVConstant>(Val: D->getRHS()))
1467 // Division by non-zero constants can be hoisted.
1468 return SC->getValue()->isZero();
1469 // All other divisions should not be moved as they may be
1470 // divisions by zero and should be kept within the
1471 // conditions of the surrounding loops that guard their
1472 // execution (see PR35406).
1473 return true;
1474 }
1475 return false;
1476 });
1477 };
1478 if (SafeToHoist(S)) {
1479 for (Loop *L = SE.LI.getLoopFor(BB: Builder.GetInsertBlock());;
1480 L = L->getParentLoop()) {
1481 if (SE.isLoopInvariant(S, L)) {
1482 if (!L) break;
1483 if (BasicBlock *Preheader = L->getLoopPreheader()) {
1484 InsertPt = Preheader->getTerminator()->getIterator();
1485 } else {
1486 // LSR sets the insertion point for AddRec start/step values to the
1487 // block start to simplify value reuse, even though it's an invalid
1488 // position. SCEVExpander must correct for this in all cases.
1489 InsertPt = L->getHeader()->getFirstInsertionPt();
1490 }
1491 } else {
1492 // If the SCEV is computable at this level, insert it into the header
1493 // after the PHIs (and after any other instructions that we've inserted
1494 // there) so that it is guaranteed to dominate any user inside the loop.
1495 if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(Ptr: L))
1496 InsertPt = L->getHeader()->getFirstInsertionPt();
1497
1498 while (InsertPt != Builder.GetInsertPoint() &&
1499 (isInsertedInstruction(I: &*InsertPt) ||
1500 isa<DbgInfoIntrinsic>(Val: &*InsertPt))) {
1501 InsertPt = std::next(x: InsertPt);
1502 }
1503 break;
1504 }
1505 }
1506 }
1507
1508 // Check to see if we already expanded this here.
1509 auto I = InsertedExpressions.find(Val: std::make_pair(x&: S, y: &*InsertPt));
1510 if (I != InsertedExpressions.end())
1511 return I->second;
1512
1513 SCEVInsertPointGuard Guard(Builder, this);
1514 Builder.SetInsertPoint(TheBB: InsertPt->getParent(), IP: InsertPt);
1515
1516 // Expand the expression into instructions.
1517 SmallVector<Instruction *> DropPoisonGeneratingInsts;
1518 Value *V = FindValueInExprValueMap(S, InsertPt: &*InsertPt, DropPoisonGeneratingInsts);
1519 if (!V) {
1520 V = visit(S);
1521 V = fixupLCSSAFormFor(V);
1522 } else {
1523 for (Instruction *I : DropPoisonGeneratingInsts) {
1524 rememberFlags(I);
1525 I->dropPoisonGeneratingAnnotations();
1526 // See if we can re-infer from first principles any of the flags we just
1527 // dropped.
1528 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: I))
1529 if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
1530 auto *BO = cast<BinaryOperator>(Val: I);
1531 BO->setHasNoUnsignedWrap(
1532 ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNUW) == SCEV::FlagNUW);
1533 BO->setHasNoSignedWrap(
1534 ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNSW) == SCEV::FlagNSW);
1535 }
1536 if (auto *NNI = dyn_cast<PossiblyNonNegInst>(Val: I)) {
1537 auto *Src = NNI->getOperand(i_nocapture: 0);
1538 if (isImpliedByDomCondition(Pred: ICmpInst::ICMP_SGE, LHS: Src,
1539 RHS: Constant::getNullValue(Ty: Src->getType()), ContextI: I,
1540 DL).value_or(u: false))
1541 NNI->setNonNeg(true);
1542 }
1543 }
1544 }
1545 // Remember the expanded value for this SCEV at this location.
1546 //
1547 // This is independent of PostIncLoops. The mapped value simply materializes
1548 // the expression at this insertion point. If the mapped value happened to be
1549 // a postinc expansion, it could be reused by a non-postinc user, but only if
1550 // its insertion point was already at the head of the loop.
1551 InsertedExpressions[std::make_pair(x&: S, y: &*InsertPt)] = V;
1552 return V;
1553}
1554
1555void SCEVExpander::rememberInstruction(Value *I) {
1556 auto DoInsert = [this](Value *V) {
1557 if (!PostIncLoops.empty())
1558 InsertedPostIncValues.insert(V);
1559 else
1560 InsertedValues.insert(V);
1561 };
1562 DoInsert(I);
1563}
1564
1565void SCEVExpander::rememberFlags(Instruction *I) {
1566 // If we already have flags for the instruction, keep the existing ones.
1567 OrigFlags.try_emplace(Key: I, Args: PoisonFlags(I));
1568}
1569
1570void SCEVExpander::replaceCongruentIVInc(
1571 PHINode *&Phi, PHINode *&OrigPhi, Loop *L, const DominatorTree *DT,
1572 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1573 BasicBlock *LatchBlock = L->getLoopLatch();
1574 if (!LatchBlock)
1575 return;
1576
1577 Instruction *OrigInc =
1578 dyn_cast<Instruction>(Val: OrigPhi->getIncomingValueForBlock(BB: LatchBlock));
1579 Instruction *IsomorphicInc =
1580 dyn_cast<Instruction>(Val: Phi->getIncomingValueForBlock(BB: LatchBlock));
1581 if (!OrigInc || !IsomorphicInc)
1582 return;
1583
1584 // If this phi has the same width but is more canonical, replace the
1585 // original with it. As part of the "more canonical" determination,
1586 // respect a prior decision to use an IV chain.
1587 if (OrigPhi->getType() == Phi->getType() &&
1588 !(ChainedPhis.count(V: Phi) ||
1589 isExpandedAddRecExprPHI(PN: OrigPhi, IncV: OrigInc, L)) &&
1590 (ChainedPhis.count(V: Phi) ||
1591 isExpandedAddRecExprPHI(PN: Phi, IncV: IsomorphicInc, L))) {
1592 std::swap(a&: OrigPhi, b&: Phi);
1593 std::swap(a&: OrigInc, b&: IsomorphicInc);
1594 }
1595
1596 // Replacing the congruent phi is sufficient because acyclic
1597 // redundancy elimination, CSE/GVN, should handle the
1598 // rest. However, once SCEV proves that a phi is congruent,
1599 // it's often the head of an IV user cycle that is isomorphic
1600 // with the original phi. It's worth eagerly cleaning up the
1601 // common case of a single IV increment so that DeleteDeadPHIs
1602 // can remove cycles that had postinc uses.
1603 // Because we may potentially introduce a new use of OrigIV that didn't
1604 // exist before at this point, its poison flags need readjustment.
1605 const SCEV *TruncExpr =
1606 SE.getTruncateOrNoop(V: SE.getSCEV(V: OrigInc), Ty: IsomorphicInc->getType());
1607 if (OrigInc == IsomorphicInc || TruncExpr != SE.getSCEV(V: IsomorphicInc) ||
1608 !SE.LI.replacementPreservesLCSSAForm(From: IsomorphicInc, To: OrigInc))
1609 return;
1610
1611 bool BothHaveNUW = false;
1612 bool BothHaveNSW = false;
1613 auto *OBOIncV = dyn_cast<OverflowingBinaryOperator>(Val: OrigInc);
1614 auto *OBOIsomorphic = dyn_cast<OverflowingBinaryOperator>(Val: IsomorphicInc);
1615 if (OBOIncV && OBOIsomorphic) {
1616 BothHaveNUW =
1617 OBOIncV->hasNoUnsignedWrap() && OBOIsomorphic->hasNoUnsignedWrap();
1618 BothHaveNSW =
1619 OBOIncV->hasNoSignedWrap() && OBOIsomorphic->hasNoSignedWrap();
1620 }
1621
1622 if (!hoistIVInc(IncV: OrigInc, InsertPos: IsomorphicInc,
1623 /*RecomputePoisonFlags*/ true))
1624 return;
1625
1626 // We are replacing with a wider increment. If both OrigInc and IsomorphicInc
1627 // are NUW/NSW, then we can preserve them on the wider increment; the narrower
1628 // IsomorphicInc would wrap before the wider OrigInc, so the replacement won't
1629 // make IsomorphicInc's uses more poisonous.
1630 assert(OrigInc->getType()->getScalarSizeInBits() >=
1631 IsomorphicInc->getType()->getScalarSizeInBits() &&
1632 "Should only replace an increment with a wider one.");
1633 if (BothHaveNUW || BothHaveNSW) {
1634 OrigInc->setHasNoUnsignedWrap(OBOIncV->hasNoUnsignedWrap() || BothHaveNUW);
1635 OrigInc->setHasNoSignedWrap(OBOIncV->hasNoSignedWrap() || BothHaveNSW);
1636 }
1637
1638 SCEV_DEBUG_WITH_TYPE(DebugType,
1639 dbgs() << "INDVARS: Eliminated congruent iv.inc: "
1640 << *IsomorphicInc << '\n');
1641 Value *NewInc = OrigInc;
1642 if (OrigInc->getType() != IsomorphicInc->getType()) {
1643 BasicBlock::iterator IP;
1644 if (PHINode *PN = dyn_cast<PHINode>(Val: OrigInc))
1645 IP = PN->getParent()->getFirstInsertionPt();
1646 else
1647 IP = OrigInc->getNextNonDebugInstruction()->getIterator();
1648
1649 IRBuilder<> Builder(IP->getParent(), IP);
1650 Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
1651 NewInc =
1652 Builder.CreateTruncOrBitCast(V: OrigInc, DestTy: IsomorphicInc->getType(), Name: IVName);
1653 }
1654 IsomorphicInc->replaceAllUsesWith(V: NewInc);
1655 DeadInsts.emplace_back(Args&: IsomorphicInc);
1656}
1657
1658/// replaceCongruentIVs - Check for congruent phis in this loop header and
1659/// replace them with their most canonical representative. Return the number of
1660/// phis eliminated.
1661///
1662/// This does not depend on any SCEVExpander state but should be used in
1663/// the same context that SCEVExpander is used.
1664unsigned
1665SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
1666 SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1667 const TargetTransformInfo *TTI) {
1668 // Find integer phis in order of increasing width.
1669 SmallVector<PHINode*, 8> Phis;
1670 for (PHINode &PN : L->getHeader()->phis())
1671 Phis.push_back(Elt: &PN);
1672
1673 if (TTI)
1674 // Use stable_sort to preserve order of equivalent PHIs, so the order
1675 // of the sorted Phis is the same from run to run on the same loop.
1676 llvm::stable_sort(Range&: Phis, C: [](Value *LHS, Value *RHS) {
1677 // Put pointers at the back and make sure pointer < pointer = false.
1678 if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
1679 return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
1680 return RHS->getType()->getPrimitiveSizeInBits().getFixedValue() <
1681 LHS->getType()->getPrimitiveSizeInBits().getFixedValue();
1682 });
1683
1684 unsigned NumElim = 0;
1685 DenseMap<const SCEV *, PHINode *> ExprToIVMap;
1686 // Process phis from wide to narrow. Map wide phis to their truncation
1687 // so narrow phis can reuse them.
1688 for (PHINode *Phi : Phis) {
1689 auto SimplifyPHINode = [&](PHINode *PN) -> Value * {
1690 if (Value *V = simplifyInstruction(I: PN, Q: {DL, &SE.TLI, &SE.DT, &SE.AC}))
1691 return V;
1692 if (!SE.isSCEVable(Ty: PN->getType()))
1693 return nullptr;
1694 auto *Const = dyn_cast<SCEVConstant>(Val: SE.getSCEV(V: PN));
1695 if (!Const)
1696 return nullptr;
1697 return Const->getValue();
1698 };
1699
1700 // Fold constant phis. They may be congruent to other constant phis and
1701 // would confuse the logic below that expects proper IVs.
1702 if (Value *V = SimplifyPHINode(Phi)) {
1703 if (V->getType() != Phi->getType())
1704 continue;
1705 SE.forgetValue(V: Phi);
1706 Phi->replaceAllUsesWith(V);
1707 DeadInsts.emplace_back(Args&: Phi);
1708 ++NumElim;
1709 SCEV_DEBUG_WITH_TYPE(DebugType,
1710 dbgs() << "INDVARS: Eliminated constant iv: " << *Phi
1711 << '\n');
1712 continue;
1713 }
1714
1715 if (!SE.isSCEVable(Ty: Phi->getType()))
1716 continue;
1717
1718 PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(V: Phi)];
1719 if (!OrigPhiRef) {
1720 OrigPhiRef = Phi;
1721 if (Phi->getType()->isIntegerTy() && TTI &&
1722 TTI->isTruncateFree(Ty1: Phi->getType(), Ty2: Phis.back()->getType())) {
1723 // Make sure we only rewrite using simple induction variables;
1724 // otherwise, we can make the trip count of a loop unanalyzable
1725 // to SCEV.
1726 const SCEV *PhiExpr = SE.getSCEV(V: Phi);
1727 if (isa<SCEVAddRecExpr>(Val: PhiExpr)) {
1728 // This phi can be freely truncated to the narrowest phi type. Map the
1729 // truncated expression to it so it will be reused for narrow types.
1730 const SCEV *TruncExpr =
1731 SE.getTruncateExpr(Op: PhiExpr, Ty: Phis.back()->getType());
1732 ExprToIVMap[TruncExpr] = Phi;
1733 }
1734 }
1735 continue;
1736 }
1737
1738 // Replacing a pointer phi with an integer phi or vice-versa doesn't make
1739 // sense.
1740 if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy())
1741 continue;
1742
1743 replaceCongruentIVInc(Phi, OrigPhi&: OrigPhiRef, L, DT, DeadInsts);
1744 SCEV_DEBUG_WITH_TYPE(DebugType,
1745 dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi
1746 << '\n');
1747 SCEV_DEBUG_WITH_TYPE(
1748 DebugType, dbgs() << "INDVARS: Original iv: " << *OrigPhiRef << '\n');
1749 ++NumElim;
1750 Value *NewIV = OrigPhiRef;
1751 if (OrigPhiRef->getType() != Phi->getType()) {
1752 IRBuilder<> Builder(L->getHeader(),
1753 L->getHeader()->getFirstInsertionPt());
1754 Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
1755 NewIV = Builder.CreateTruncOrBitCast(V: OrigPhiRef, DestTy: Phi->getType(), Name: IVName);
1756 }
1757 Phi->replaceAllUsesWith(V: NewIV);
1758 DeadInsts.emplace_back(Args&: Phi);
1759 }
1760 return NumElim;
1761}
1762
1763bool SCEVExpander::hasRelatedExistingExpansion(const SCEV *S,
1764 const Instruction *At,
1765 Loop *L) {
1766 using namespace llvm::PatternMatch;
1767
1768 SmallVector<BasicBlock *, 4> ExitingBlocks;
1769 L->getExitingBlocks(ExitingBlocks);
1770
1771 // Look for suitable value in simple conditions at the loop exits.
1772 for (BasicBlock *BB : ExitingBlocks) {
1773 ICmpInst::Predicate Pred;
1774 Instruction *LHS, *RHS;
1775
1776 if (!match(V: BB->getTerminator(),
1777 P: m_Br(C: m_ICmp(Pred, L: m_Instruction(I&: LHS), R: m_Instruction(I&: RHS)),
1778 T: m_BasicBlock(), F: m_BasicBlock())))
1779 continue;
1780
1781 if (SE.getSCEV(V: LHS) == S && SE.DT.dominates(Def: LHS, User: At))
1782 return true;
1783
1784 if (SE.getSCEV(V: RHS) == S && SE.DT.dominates(Def: RHS, User: At))
1785 return true;
1786 }
1787
1788 // Use expand's logic which is used for reusing a previous Value in
1789 // ExprValueMap. Note that we don't currently model the cost of
1790 // needing to drop poison generating flags on the instruction if we
1791 // want to reuse it. We effectively assume that has zero cost.
1792 SmallVector<Instruction *> DropPoisonGeneratingInsts;
1793 return FindValueInExprValueMap(S, InsertPt: At, DropPoisonGeneratingInsts) != nullptr;
1794}
1795
1796template<typename T> static InstructionCost costAndCollectOperands(
1797 const SCEVOperand &WorkItem, const TargetTransformInfo &TTI,
1798 TargetTransformInfo::TargetCostKind CostKind,
1799 SmallVectorImpl<SCEVOperand> &Worklist) {
1800
1801 const T *S = cast<T>(WorkItem.S);
1802 InstructionCost Cost = 0;
1803 // Object to help map SCEV operands to expanded IR instructions.
1804 struct OperationIndices {
1805 OperationIndices(unsigned Opc, size_t min, size_t max) :
1806 Opcode(Opc), MinIdx(min), MaxIdx(max) { }
1807 unsigned Opcode;
1808 size_t MinIdx;
1809 size_t MaxIdx;
1810 };
1811
1812 // Collect the operations of all the instructions that will be needed to
1813 // expand the SCEVExpr. This is so that when we come to cost the operands,
1814 // we know what the generated user(s) will be.
1815 SmallVector<OperationIndices, 2> Operations;
1816
1817 auto CastCost = [&](unsigned Opcode) -> InstructionCost {
1818 Operations.emplace_back(Opcode, 0, 0);
1819 return TTI.getCastInstrCost(Opcode, Dst: S->getType(),
1820 Src: S->getOperand(0)->getType(),
1821 CCH: TTI::CastContextHint::None, CostKind);
1822 };
1823
1824 auto ArithCost = [&](unsigned Opcode, unsigned NumRequired,
1825 unsigned MinIdx = 0,
1826 unsigned MaxIdx = 1) -> InstructionCost {
1827 Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1828 return NumRequired *
1829 TTI.getArithmeticInstrCost(Opcode, Ty: S->getType(), CostKind);
1830 };
1831
1832 auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, unsigned MinIdx,
1833 unsigned MaxIdx) -> InstructionCost {
1834 Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1835 Type *OpType = S->getType();
1836 return NumRequired * TTI.getCmpSelInstrCost(
1837 Opcode, ValTy: OpType, CondTy: CmpInst::makeCmpResultType(opnd_type: OpType),
1838 VecPred: CmpInst::BAD_ICMP_PREDICATE, CostKind);
1839 };
1840
1841 switch (S->getSCEVType()) {
1842 case scCouldNotCompute:
1843 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1844 case scUnknown:
1845 case scConstant:
1846 case scVScale:
1847 return 0;
1848 case scPtrToInt:
1849 Cost = CastCost(Instruction::PtrToInt);
1850 break;
1851 case scTruncate:
1852 Cost = CastCost(Instruction::Trunc);
1853 break;
1854 case scZeroExtend:
1855 Cost = CastCost(Instruction::ZExt);
1856 break;
1857 case scSignExtend:
1858 Cost = CastCost(Instruction::SExt);
1859 break;
1860 case scUDivExpr: {
1861 unsigned Opcode = Instruction::UDiv;
1862 if (auto *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
1863 if (SC->getAPInt().isPowerOf2())
1864 Opcode = Instruction::LShr;
1865 Cost = ArithCost(Opcode, 1);
1866 break;
1867 }
1868 case scAddExpr:
1869 Cost = ArithCost(Instruction::Add, S->getNumOperands() - 1);
1870 break;
1871 case scMulExpr:
1872 // TODO: this is a very pessimistic cost modelling for Mul,
1873 // because of Bin Pow algorithm actually used by the expander,
1874 // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
1875 Cost = ArithCost(Instruction::Mul, S->getNumOperands() - 1);
1876 break;
1877 case scSMaxExpr:
1878 case scUMaxExpr:
1879 case scSMinExpr:
1880 case scUMinExpr:
1881 case scSequentialUMinExpr: {
1882 // FIXME: should this ask the cost for Intrinsic's?
1883 // The reduction tree.
1884 Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1);
1885 Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2);
1886 switch (S->getSCEVType()) {
1887 case scSequentialUMinExpr: {
1888 // The safety net against poison.
1889 // FIXME: this is broken.
1890 Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 0);
1891 Cost += ArithCost(Instruction::Or,
1892 S->getNumOperands() > 2 ? S->getNumOperands() - 2 : 0);
1893 Cost += CmpSelCost(Instruction::Select, 1, 0, 1);
1894 break;
1895 }
1896 default:
1897 assert(!isa<SCEVSequentialMinMaxExpr>(S) &&
1898 "Unhandled SCEV expression type?");
1899 break;
1900 }
1901 break;
1902 }
1903 case scAddRecExpr: {
1904 // In this polynominal, we may have some zero operands, and we shouldn't
1905 // really charge for those. So how many non-zero coefficients are there?
1906 int NumTerms = llvm::count_if(S->operands(), [](const SCEV *Op) {
1907 return !Op->isZero();
1908 });
1909
1910 assert(NumTerms >= 1 && "Polynominal should have at least one term.");
1911 assert(!(*std::prev(S->operands().end()))->isZero() &&
1912 "Last operand should not be zero");
1913
1914 // Ignoring constant term (operand 0), how many of the coefficients are u> 1?
1915 int NumNonZeroDegreeNonOneTerms =
1916 llvm::count_if(S->operands(), [](const SCEV *Op) {
1917 auto *SConst = dyn_cast<SCEVConstant>(Val: Op);
1918 return !SConst || SConst->getAPInt().ugt(RHS: 1);
1919 });
1920
1921 // Much like with normal add expr, the polynominal will require
1922 // one less addition than the number of it's terms.
1923 InstructionCost AddCost = ArithCost(Instruction::Add, NumTerms - 1,
1924 /*MinIdx*/ 1, /*MaxIdx*/ 1);
1925 // Here, *each* one of those will require a multiplication.
1926 InstructionCost MulCost =
1927 ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms);
1928 Cost = AddCost + MulCost;
1929
1930 // What is the degree of this polynominal?
1931 int PolyDegree = S->getNumOperands() - 1;
1932 assert(PolyDegree >= 1 && "Should be at least affine.");
1933
1934 // The final term will be:
1935 // Op_{PolyDegree} * x ^ {PolyDegree}
1936 // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations.
1937 // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for
1938 // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free.
1939 // FIXME: this is conservatively correct, but might be overly pessimistic.
1940 Cost += MulCost * (PolyDegree - 1);
1941 break;
1942 }
1943 }
1944
1945 for (auto &CostOp : Operations) {
1946 for (auto SCEVOp : enumerate(S->operands())) {
1947 // Clamp the index to account for multiple IR operations being chained.
1948 size_t MinIdx = std::max(SCEVOp.index(), CostOp.MinIdx);
1949 size_t OpIdx = std::min(MinIdx, CostOp.MaxIdx);
1950 Worklist.emplace_back(CostOp.Opcode, OpIdx, SCEVOp.value());
1951 }
1952 }
1953 return Cost;
1954}
1955
1956bool SCEVExpander::isHighCostExpansionHelper(
1957 const SCEVOperand &WorkItem, Loop *L, const Instruction &At,
1958 InstructionCost &Cost, unsigned Budget, const TargetTransformInfo &TTI,
1959 SmallPtrSetImpl<const SCEV *> &Processed,
1960 SmallVectorImpl<SCEVOperand> &Worklist) {
1961 if (Cost > Budget)
1962 return true; // Already run out of budget, give up.
1963
1964 const SCEV *S = WorkItem.S;
1965 // Was the cost of expansion of this expression already accounted for?
1966 if (!isa<SCEVConstant>(Val: S) && !Processed.insert(Ptr: S).second)
1967 return false; // We have already accounted for this expression.
1968
1969 // If we can find an existing value for this scev available at the point "At"
1970 // then consider the expression cheap.
1971 if (hasRelatedExistingExpansion(S, At: &At, L))
1972 return false; // Consider the expression to be free.
1973
1974 TargetTransformInfo::TargetCostKind CostKind =
1975 L->getHeader()->getParent()->hasMinSize()
1976 ? TargetTransformInfo::TCK_CodeSize
1977 : TargetTransformInfo::TCK_RecipThroughput;
1978
1979 switch (S->getSCEVType()) {
1980 case scCouldNotCompute:
1981 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1982 case scUnknown:
1983 case scVScale:
1984 // Assume to be zero-cost.
1985 return false;
1986 case scConstant: {
1987 // Only evalulate the costs of constants when optimizing for size.
1988 if (CostKind != TargetTransformInfo::TCK_CodeSize)
1989 return false;
1990 const APInt &Imm = cast<SCEVConstant>(Val: S)->getAPInt();
1991 Type *Ty = S->getType();
1992 Cost += TTI.getIntImmCostInst(
1993 Opc: WorkItem.ParentOpcode, Idx: WorkItem.OperandIdx, Imm, Ty, CostKind);
1994 return Cost > Budget;
1995 }
1996 case scTruncate:
1997 case scPtrToInt:
1998 case scZeroExtend:
1999 case scSignExtend: {
2000 Cost +=
2001 costAndCollectOperands<SCEVCastExpr>(WorkItem, TTI, CostKind, Worklist);
2002 return false; // Will answer upon next entry into this function.
2003 }
2004 case scUDivExpr: {
2005 // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
2006 // HowManyLessThans produced to compute a precise expression, rather than a
2007 // UDiv from the user's code. If we can't find a UDiv in the code with some
2008 // simple searching, we need to account for it's cost.
2009
2010 // At the beginning of this function we already tried to find existing
2011 // value for plain 'S'. Now try to lookup 'S + 1' since it is common
2012 // pattern involving division. This is just a simple search heuristic.
2013 if (hasRelatedExistingExpansion(
2014 S: SE.getAddExpr(LHS: S, RHS: SE.getConstant(Ty: S->getType(), V: 1)), At: &At, L))
2015 return false; // Consider it to be free.
2016
2017 Cost +=
2018 costAndCollectOperands<SCEVUDivExpr>(WorkItem, TTI, CostKind, Worklist);
2019 return false; // Will answer upon next entry into this function.
2020 }
2021 case scAddExpr:
2022 case scMulExpr:
2023 case scUMaxExpr:
2024 case scSMaxExpr:
2025 case scUMinExpr:
2026 case scSMinExpr:
2027 case scSequentialUMinExpr: {
2028 assert(cast<SCEVNAryExpr>(S)->getNumOperands() > 1 &&
2029 "Nary expr should have more than 1 operand.");
2030 // The simple nary expr will require one less op (or pair of ops)
2031 // than the number of it's terms.
2032 Cost +=
2033 costAndCollectOperands<SCEVNAryExpr>(WorkItem, TTI, CostKind, Worklist);
2034 return Cost > Budget;
2035 }
2036 case scAddRecExpr: {
2037 assert(cast<SCEVAddRecExpr>(S)->getNumOperands() >= 2 &&
2038 "Polynomial should be at least linear");
2039 Cost += costAndCollectOperands<SCEVAddRecExpr>(
2040 WorkItem, TTI, CostKind, Worklist);
2041 return Cost > Budget;
2042 }
2043 }
2044 llvm_unreachable("Unknown SCEV kind!");
2045}
2046
2047Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred,
2048 Instruction *IP) {
2049 assert(IP);
2050 switch (Pred->getKind()) {
2051 case SCEVPredicate::P_Union:
2052 return expandUnionPredicate(Pred: cast<SCEVUnionPredicate>(Val: Pred), Loc: IP);
2053 case SCEVPredicate::P_Compare:
2054 return expandComparePredicate(Pred: cast<SCEVComparePredicate>(Val: Pred), Loc: IP);
2055 case SCEVPredicate::P_Wrap: {
2056 auto *AddRecPred = cast<SCEVWrapPredicate>(Val: Pred);
2057 return expandWrapPredicate(P: AddRecPred, Loc: IP);
2058 }
2059 }
2060 llvm_unreachable("Unknown SCEV predicate type");
2061}
2062
2063Value *SCEVExpander::expandComparePredicate(const SCEVComparePredicate *Pred,
2064 Instruction *IP) {
2065 Value *Expr0 = expand(S: Pred->getLHS(), I: IP);
2066 Value *Expr1 = expand(S: Pred->getRHS(), I: IP);
2067
2068 Builder.SetInsertPoint(IP);
2069 auto InvPred = ICmpInst::getInversePredicate(pred: Pred->getPredicate());
2070 auto *I = Builder.CreateICmp(P: InvPred, LHS: Expr0, RHS: Expr1, Name: "ident.check");
2071 return I;
2072}
2073
2074Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
2075 Instruction *Loc, bool Signed) {
2076 assert(AR->isAffine() && "Cannot generate RT check for "
2077 "non-affine expression");
2078
2079 // FIXME: It is highly suspicious that we're ignoring the predicates here.
2080 SmallVector<const SCEVPredicate *, 4> Pred;
2081 const SCEV *ExitCount =
2082 SE.getPredicatedBackedgeTakenCount(L: AR->getLoop(), Predicates&: Pred);
2083
2084 assert(!isa<SCEVCouldNotCompute>(ExitCount) && "Invalid loop count");
2085
2086 const SCEV *Step = AR->getStepRecurrence(SE);
2087 const SCEV *Start = AR->getStart();
2088
2089 Type *ARTy = AR->getType();
2090 unsigned SrcBits = SE.getTypeSizeInBits(Ty: ExitCount->getType());
2091 unsigned DstBits = SE.getTypeSizeInBits(Ty: ARTy);
2092
2093 // The expression {Start,+,Step} has nusw/nssw if
2094 // Step < 0, Start - |Step| * Backedge <= Start
2095 // Step >= 0, Start + |Step| * Backedge > Start
2096 // and |Step| * Backedge doesn't unsigned overflow.
2097
2098 Builder.SetInsertPoint(Loc);
2099 Value *TripCountVal = expand(S: ExitCount, I: Loc);
2100
2101 IntegerType *Ty =
2102 IntegerType::get(C&: Loc->getContext(), NumBits: SE.getTypeSizeInBits(Ty: ARTy));
2103
2104 Value *StepValue = expand(S: Step, I: Loc);
2105 Value *NegStepValue = expand(S: SE.getNegativeSCEV(V: Step), I: Loc);
2106 Value *StartValue = expand(S: Start, I: Loc);
2107
2108 ConstantInt *Zero =
2109 ConstantInt::get(Context&: Loc->getContext(), V: APInt::getZero(numBits: DstBits));
2110
2111 Builder.SetInsertPoint(Loc);
2112 // Compute |Step|
2113 Value *StepCompare = Builder.CreateICmp(P: ICmpInst::ICMP_SLT, LHS: StepValue, RHS: Zero);
2114 Value *AbsStep = Builder.CreateSelect(C: StepCompare, True: NegStepValue, False: StepValue);
2115
2116 // Compute |Step| * Backedge
2117 // Compute:
2118 // 1. Start + |Step| * Backedge < Start
2119 // 2. Start - |Step| * Backedge > Start
2120 //
2121 // And select either 1. or 2. depending on whether step is positive or
2122 // negative. If Step is known to be positive or negative, only create
2123 // either 1. or 2.
2124 auto ComputeEndCheck = [&]() -> Value * {
2125 // Checking <u 0 is always false.
2126 if (!Signed && Start->isZero() && SE.isKnownPositive(S: Step))
2127 return ConstantInt::getFalse(Context&: Loc->getContext());
2128
2129 // Get the backedge taken count and truncate or extended to the AR type.
2130 Value *TruncTripCount = Builder.CreateZExtOrTrunc(V: TripCountVal, DestTy: Ty);
2131
2132 Value *MulV, *OfMul;
2133 if (Step->isOne()) {
2134 // Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
2135 // needed, there is never an overflow, so to avoid artificially inflating
2136 // the cost of the check, directly emit the optimized IR.
2137 MulV = TruncTripCount;
2138 OfMul = ConstantInt::getFalse(Context&: MulV->getContext());
2139 } else {
2140 auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
2141 Intrinsic::umul_with_overflow, Ty);
2142 CallInst *Mul =
2143 Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
2144 MulV = Builder.CreateExtractValue(Agg: Mul, Idxs: 0, Name: "mul.result");
2145 OfMul = Builder.CreateExtractValue(Agg: Mul, Idxs: 1, Name: "mul.overflow");
2146 }
2147
2148 Value *Add = nullptr, *Sub = nullptr;
2149 bool NeedPosCheck = !SE.isKnownNegative(S: Step);
2150 bool NeedNegCheck = !SE.isKnownPositive(S: Step);
2151
2152 if (isa<PointerType>(Val: ARTy)) {
2153 Value *NegMulV = Builder.CreateNeg(V: MulV);
2154 if (NeedPosCheck)
2155 Add = Builder.CreatePtrAdd(Ptr: StartValue, Offset: MulV);
2156 if (NeedNegCheck)
2157 Sub = Builder.CreatePtrAdd(Ptr: StartValue, Offset: NegMulV);
2158 } else {
2159 if (NeedPosCheck)
2160 Add = Builder.CreateAdd(LHS: StartValue, RHS: MulV);
2161 if (NeedNegCheck)
2162 Sub = Builder.CreateSub(LHS: StartValue, RHS: MulV);
2163 }
2164
2165 Value *EndCompareLT = nullptr;
2166 Value *EndCompareGT = nullptr;
2167 Value *EndCheck = nullptr;
2168 if (NeedPosCheck)
2169 EndCheck = EndCompareLT = Builder.CreateICmp(
2170 P: Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, LHS: Add, RHS: StartValue);
2171 if (NeedNegCheck)
2172 EndCheck = EndCompareGT = Builder.CreateICmp(
2173 P: Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, LHS: Sub, RHS: StartValue);
2174 if (NeedPosCheck && NeedNegCheck) {
2175 // Select the answer based on the sign of Step.
2176 EndCheck = Builder.CreateSelect(C: StepCompare, True: EndCompareGT, False: EndCompareLT);
2177 }
2178 return Builder.CreateOr(LHS: EndCheck, RHS: OfMul);
2179 };
2180 Value *EndCheck = ComputeEndCheck();
2181
2182 // If the backedge taken count type is larger than the AR type,
2183 // check that we don't drop any bits by truncating it. If we are
2184 // dropping bits, then we have overflow (unless the step is zero).
2185 if (SrcBits > DstBits) {
2186 auto MaxVal = APInt::getMaxValue(numBits: DstBits).zext(width: SrcBits);
2187 auto *BackedgeCheck =
2188 Builder.CreateICmp(P: ICmpInst::ICMP_UGT, LHS: TripCountVal,
2189 RHS: ConstantInt::get(Context&: Loc->getContext(), V: MaxVal));
2190 BackedgeCheck = Builder.CreateAnd(
2191 LHS: BackedgeCheck, RHS: Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: StepValue, RHS: Zero));
2192
2193 EndCheck = Builder.CreateOr(LHS: EndCheck, RHS: BackedgeCheck);
2194 }
2195
2196 return EndCheck;
2197}
2198
2199Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred,
2200 Instruction *IP) {
2201 const auto *A = cast<SCEVAddRecExpr>(Val: Pred->getExpr());
2202 Value *NSSWCheck = nullptr, *NUSWCheck = nullptr;
2203
2204 // Add a check for NUSW
2205 if (Pred->getFlags() & SCEVWrapPredicate::IncrementNUSW)
2206 NUSWCheck = generateOverflowCheck(AR: A, Loc: IP, Signed: false);
2207
2208 // Add a check for NSSW
2209 if (Pred->getFlags() & SCEVWrapPredicate::IncrementNSSW)
2210 NSSWCheck = generateOverflowCheck(AR: A, Loc: IP, Signed: true);
2211
2212 if (NUSWCheck && NSSWCheck)
2213 return Builder.CreateOr(LHS: NUSWCheck, RHS: NSSWCheck);
2214
2215 if (NUSWCheck)
2216 return NUSWCheck;
2217
2218 if (NSSWCheck)
2219 return NSSWCheck;
2220
2221 return ConstantInt::getFalse(Context&: IP->getContext());
2222}
2223
2224Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union,
2225 Instruction *IP) {
2226 // Loop over all checks in this set.
2227 SmallVector<Value *> Checks;
2228 for (const auto *Pred : Union->getPredicates()) {
2229 Checks.push_back(Elt: expandCodeForPredicate(Pred, IP));
2230 Builder.SetInsertPoint(IP);
2231 }
2232
2233 if (Checks.empty())
2234 return ConstantInt::getFalse(Context&: IP->getContext());
2235 return Builder.CreateOr(Ops: Checks);
2236}
2237
2238Value *SCEVExpander::fixupLCSSAFormFor(Value *V) {
2239 auto *DefI = dyn_cast<Instruction>(Val: V);
2240 if (!PreserveLCSSA || !DefI)
2241 return V;
2242
2243 BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
2244 Loop *DefLoop = SE.LI.getLoopFor(BB: DefI->getParent());
2245 Loop *UseLoop = SE.LI.getLoopFor(BB: InsertPt->getParent());
2246 if (!DefLoop || UseLoop == DefLoop || DefLoop->contains(L: UseLoop))
2247 return V;
2248
2249 // Create a temporary instruction to at the current insertion point, so we
2250 // can hand it off to the helper to create LCSSA PHIs if required for the
2251 // new use.
2252 // FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor)
2253 // would accept a insertion point and return an LCSSA phi for that
2254 // insertion point, so there is no need to insert & remove the temporary
2255 // instruction.
2256 Type *ToTy;
2257 if (DefI->getType()->isIntegerTy())
2258 ToTy = PointerType::get(C&: DefI->getContext(), AddressSpace: 0);
2259 else
2260 ToTy = Type::getInt32Ty(C&: DefI->getContext());
2261 Instruction *User =
2262 CastInst::CreateBitOrPointerCast(S: DefI, Ty: ToTy, Name: "tmp.lcssa.user", InsertBefore: InsertPt);
2263 auto RemoveUserOnExit =
2264 make_scope_exit(F: [User]() { User->eraseFromParent(); });
2265
2266 SmallVector<Instruction *, 1> ToUpdate;
2267 ToUpdate.push_back(Elt: DefI);
2268 SmallVector<PHINode *, 16> PHIsToRemove;
2269 SmallVector<PHINode *, 16> InsertedPHIs;
2270 formLCSSAForInstructions(Worklist&: ToUpdate, DT: SE.DT, LI: SE.LI, SE: &SE, PHIsToRemove: &PHIsToRemove,
2271 InsertedPHIs: &InsertedPHIs);
2272 for (PHINode *PN : InsertedPHIs)
2273 rememberInstruction(I: PN);
2274 for (PHINode *PN : PHIsToRemove) {
2275 if (!PN->use_empty())
2276 continue;
2277 InsertedValues.erase(V: PN);
2278 InsertedPostIncValues.erase(V: PN);
2279 PN->eraseFromParent();
2280 }
2281
2282 return User->getOperand(i: 0);
2283}
2284
2285namespace {
2286// Search for a SCEV subexpression that is not safe to expand. Any expression
2287// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
2288// UDiv expressions. We don't know if the UDiv is derived from an IR divide
2289// instruction, but the important thing is that we prove the denominator is
2290// nonzero before expansion.
2291//
2292// IVUsers already checks that IV-derived expressions are safe. So this check is
2293// only needed when the expression includes some subexpression that is not IV
2294// derived.
2295//
2296// Currently, we only allow division by a value provably non-zero here.
2297//
2298// We cannot generally expand recurrences unless the step dominates the loop
2299// header. The expander handles the special case of affine recurrences by
2300// scaling the recurrence outside the loop, but this technique isn't generally
2301// applicable. Expanding a nested recurrence outside a loop requires computing
2302// binomial coefficients. This could be done, but the recurrence has to be in a
2303// perfectly reduced form, which can't be guaranteed.
2304struct SCEVFindUnsafe {
2305 ScalarEvolution &SE;
2306 bool CanonicalMode;
2307 bool IsUnsafe = false;
2308
2309 SCEVFindUnsafe(ScalarEvolution &SE, bool CanonicalMode)
2310 : SE(SE), CanonicalMode(CanonicalMode) {}
2311
2312 bool follow(const SCEV *S) {
2313 if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(Val: S)) {
2314 if (!SE.isKnownNonZero(S: D->getRHS())) {
2315 IsUnsafe = true;
2316 return false;
2317 }
2318 }
2319 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S)) {
2320 // For non-affine addrecs or in non-canonical mode we need a preheader
2321 // to insert into.
2322 if (!AR->getLoop()->getLoopPreheader() &&
2323 (!CanonicalMode || !AR->isAffine())) {
2324 IsUnsafe = true;
2325 return false;
2326 }
2327 }
2328 return true;
2329 }
2330 bool isDone() const { return IsUnsafe; }
2331};
2332} // namespace
2333
2334bool SCEVExpander::isSafeToExpand(const SCEV *S) const {
2335 SCEVFindUnsafe Search(SE, CanonicalMode);
2336 visitAll(Root: S, Visitor&: Search);
2337 return !Search.IsUnsafe;
2338}
2339
2340bool SCEVExpander::isSafeToExpandAt(const SCEV *S,
2341 const Instruction *InsertionPoint) const {
2342 if (!isSafeToExpand(S))
2343 return false;
2344 // We have to prove that the expanded site of S dominates InsertionPoint.
2345 // This is easy when not in the same block, but hard when S is an instruction
2346 // to be expanded somewhere inside the same block as our insertion point.
2347 // What we really need here is something analogous to an OrderedBasicBlock,
2348 // but for the moment, we paper over the problem by handling two common and
2349 // cheap to check cases.
2350 if (SE.properlyDominates(S, BB: InsertionPoint->getParent()))
2351 return true;
2352 if (SE.dominates(S, BB: InsertionPoint->getParent())) {
2353 if (InsertionPoint->getParent()->getTerminator() == InsertionPoint)
2354 return true;
2355 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Val: S))
2356 if (llvm::is_contained(Range: InsertionPoint->operand_values(), Element: U->getValue()))
2357 return true;
2358 }
2359 return false;
2360}
2361
2362void SCEVExpanderCleaner::cleanup() {
2363 // Result is used, nothing to remove.
2364 if (ResultUsed)
2365 return;
2366
2367 // Restore original poison flags.
2368 for (auto [I, Flags] : Expander.OrigFlags)
2369 Flags.apply(I);
2370
2371 auto InsertedInstructions = Expander.getAllInsertedInstructions();
2372#ifndef NDEBUG
2373 SmallPtrSet<Instruction *, 8> InsertedSet(InsertedInstructions.begin(),
2374 InsertedInstructions.end());
2375 (void)InsertedSet;
2376#endif
2377 // Remove sets with value handles.
2378 Expander.clear();
2379
2380 // Remove all inserted instructions.
2381 for (Instruction *I : reverse(C&: InsertedInstructions)) {
2382#ifndef NDEBUG
2383 assert(all_of(I->users(),
2384 [&InsertedSet](Value *U) {
2385 return InsertedSet.contains(cast<Instruction>(U));
2386 }) &&
2387 "removed instruction should only be used by instructions inserted "
2388 "during expansion");
2389#endif
2390 assert(!I->getType()->isVoidTy() &&
2391 "inserted instruction should have non-void types");
2392 I->replaceAllUsesWith(V: PoisonValue::get(T: I->getType()));
2393 I->eraseFromParent();
2394 }
2395}
2396

source code of llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp