1//===- ValueTracking.cpp - Walk computations to compute properties --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains routines that help analyze properties that chains of
10// computations have.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Analysis/ValueTracking.h"
15#include "llvm/ADT/APFloat.h"
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/ScopeExit.h"
20#include "llvm/ADT/SmallPtrSet.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/StringRef.h"
24#include "llvm/ADT/iterator_range.h"
25#include "llvm/Analysis/AliasAnalysis.h"
26#include "llvm/Analysis/AssumeBundleQueries.h"
27#include "llvm/Analysis/AssumptionCache.h"
28#include "llvm/Analysis/ConstantFolding.h"
29#include "llvm/Analysis/DomConditionCache.h"
30#include "llvm/Analysis/GuardUtils.h"
31#include "llvm/Analysis/InstructionSimplify.h"
32#include "llvm/Analysis/Loads.h"
33#include "llvm/Analysis/LoopInfo.h"
34#include "llvm/Analysis/OptimizationRemarkEmitter.h"
35#include "llvm/Analysis/TargetLibraryInfo.h"
36#include "llvm/Analysis/VectorUtils.h"
37#include "llvm/Analysis/WithCache.h"
38#include "llvm/IR/Argument.h"
39#include "llvm/IR/Attributes.h"
40#include "llvm/IR/BasicBlock.h"
41#include "llvm/IR/Constant.h"
42#include "llvm/IR/ConstantRange.h"
43#include "llvm/IR/Constants.h"
44#include "llvm/IR/DerivedTypes.h"
45#include "llvm/IR/DiagnosticInfo.h"
46#include "llvm/IR/Dominators.h"
47#include "llvm/IR/EHPersonalities.h"
48#include "llvm/IR/Function.h"
49#include "llvm/IR/GetElementPtrTypeIterator.h"
50#include "llvm/IR/GlobalAlias.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/GlobalVariable.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
55#include "llvm/IR/Instructions.h"
56#include "llvm/IR/IntrinsicInst.h"
57#include "llvm/IR/Intrinsics.h"
58#include "llvm/IR/IntrinsicsAArch64.h"
59#include "llvm/IR/IntrinsicsAMDGPU.h"
60#include "llvm/IR/IntrinsicsRISCV.h"
61#include "llvm/IR/IntrinsicsX86.h"
62#include "llvm/IR/LLVMContext.h"
63#include "llvm/IR/Metadata.h"
64#include "llvm/IR/Module.h"
65#include "llvm/IR/Operator.h"
66#include "llvm/IR/PatternMatch.h"
67#include "llvm/IR/Type.h"
68#include "llvm/IR/User.h"
69#include "llvm/IR/Value.h"
70#include "llvm/Support/Casting.h"
71#include "llvm/Support/CommandLine.h"
72#include "llvm/Support/Compiler.h"
73#include "llvm/Support/ErrorHandling.h"
74#include "llvm/Support/KnownBits.h"
75#include "llvm/Support/MathExtras.h"
76#include "llvm/TargetParser/RISCVTargetParser.h"
77#include <algorithm>
78#include <cassert>
79#include <cstdint>
80#include <optional>
81#include <utility>
82
83using namespace llvm;
84using namespace llvm::PatternMatch;
85
86// Controls the number of uses of the value searched for possible
87// dominating comparisons.
88static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
89 cl::Hidden, cl::init(Val: 20));
90
91
92/// Returns the bitwidth of the given scalar or pointer type. For vector types,
93/// returns the element type's bitwidth.
94static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
95 if (unsigned BitWidth = Ty->getScalarSizeInBits())
96 return BitWidth;
97
98 return DL.getPointerTypeSizeInBits(Ty);
99}
100
101// Given the provided Value and, potentially, a context instruction, return
102// the preferred context instruction (if any).
103static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
104 // If we've been provided with a context instruction, then use that (provided
105 // it has been inserted).
106 if (CxtI && CxtI->getParent())
107 return CxtI;
108
109 // If the value is really an already-inserted instruction, then use that.
110 CxtI = dyn_cast<Instruction>(Val: V);
111 if (CxtI && CxtI->getParent())
112 return CxtI;
113
114 return nullptr;
115}
116
117static const Instruction *safeCxtI(const Value *V1, const Value *V2, const Instruction *CxtI) {
118 // If we've been provided with a context instruction, then use that (provided
119 // it has been inserted).
120 if (CxtI && CxtI->getParent())
121 return CxtI;
122
123 // If the value is really an already-inserted instruction, then use that.
124 CxtI = dyn_cast<Instruction>(Val: V1);
125 if (CxtI && CxtI->getParent())
126 return CxtI;
127
128 CxtI = dyn_cast<Instruction>(Val: V2);
129 if (CxtI && CxtI->getParent())
130 return CxtI;
131
132 return nullptr;
133}
134
135static bool getShuffleDemandedElts(const ShuffleVectorInst *Shuf,
136 const APInt &DemandedElts,
137 APInt &DemandedLHS, APInt &DemandedRHS) {
138 if (isa<ScalableVectorType>(Val: Shuf->getType())) {
139 assert(DemandedElts == APInt(1,1));
140 DemandedLHS = DemandedRHS = DemandedElts;
141 return true;
142 }
143
144 int NumElts =
145 cast<FixedVectorType>(Val: Shuf->getOperand(i_nocapture: 0)->getType())->getNumElements();
146 return llvm::getShuffleDemandedElts(SrcWidth: NumElts, Mask: Shuf->getShuffleMask(),
147 DemandedElts, DemandedLHS, DemandedRHS);
148}
149
150static void computeKnownBits(const Value *V, const APInt &DemandedElts,
151 KnownBits &Known, unsigned Depth,
152 const SimplifyQuery &Q);
153
154void llvm::computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
155 const SimplifyQuery &Q) {
156 // Since the number of lanes in a scalable vector is unknown at compile time,
157 // we track one bit which is implicitly broadcast to all lanes. This means
158 // that all lanes in a scalable vector are considered demanded.
159 auto *FVTy = dyn_cast<FixedVectorType>(Val: V->getType());
160 APInt DemandedElts =
161 FVTy ? APInt::getAllOnes(numBits: FVTy->getNumElements()) : APInt(1, 1);
162 ::computeKnownBits(V, DemandedElts, Known, Depth, Q);
163}
164
165void llvm::computeKnownBits(const Value *V, KnownBits &Known,
166 const DataLayout &DL, unsigned Depth,
167 AssumptionCache *AC, const Instruction *CxtI,
168 const DominatorTree *DT, bool UseInstrInfo) {
169 computeKnownBits(
170 V, Known, Depth,
171 Q: SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
172}
173
174KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL,
175 unsigned Depth, AssumptionCache *AC,
176 const Instruction *CxtI,
177 const DominatorTree *DT, bool UseInstrInfo) {
178 return computeKnownBits(
179 V, Depth, Q: SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
180}
181
182KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
183 const DataLayout &DL, unsigned Depth,
184 AssumptionCache *AC, const Instruction *CxtI,
185 const DominatorTree *DT, bool UseInstrInfo) {
186 return computeKnownBits(
187 V, DemandedElts, Depth,
188 Q: SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
189}
190
191static bool haveNoCommonBitsSetSpecialCases(const Value *LHS, const Value *RHS,
192 const SimplifyQuery &SQ) {
193 // Look for an inverted mask: (X & ~M) op (Y & M).
194 {
195 Value *M;
196 if (match(V: LHS, P: m_c_And(L: m_Not(V: m_Value(V&: M)), R: m_Value())) &&
197 match(V: RHS, P: m_c_And(L: m_Specific(V: M), R: m_Value())) &&
198 isGuaranteedNotToBeUndef(V: M, AC: SQ.AC, CtxI: SQ.CxtI, DT: SQ.DT))
199 return true;
200 }
201
202 // X op (Y & ~X)
203 if (match(V: RHS, P: m_c_And(L: m_Not(V: m_Specific(V: LHS)), R: m_Value())) &&
204 isGuaranteedNotToBeUndef(V: LHS, AC: SQ.AC, CtxI: SQ.CxtI, DT: SQ.DT))
205 return true;
206
207 // X op ((X & Y) ^ Y) -- this is the canonical form of the previous pattern
208 // for constant Y.
209 Value *Y;
210 if (match(V: RHS,
211 P: m_c_Xor(L: m_c_And(L: m_Specific(V: LHS), R: m_Value(V&: Y)), R: m_Deferred(V: Y))) &&
212 isGuaranteedNotToBeUndef(V: LHS, AC: SQ.AC, CtxI: SQ.CxtI, DT: SQ.DT) &&
213 isGuaranteedNotToBeUndef(V: Y, AC: SQ.AC, CtxI: SQ.CxtI, DT: SQ.DT))
214 return true;
215
216 // Peek through extends to find a 'not' of the other side:
217 // (ext Y) op ext(~Y)
218 if (match(V: LHS, P: m_ZExtOrSExt(Op: m_Value(V&: Y))) &&
219 match(V: RHS, P: m_ZExtOrSExt(Op: m_Not(V: m_Specific(V: Y)))) &&
220 isGuaranteedNotToBeUndef(V: Y, AC: SQ.AC, CtxI: SQ.CxtI, DT: SQ.DT))
221 return true;
222
223 // Look for: (A & B) op ~(A | B)
224 {
225 Value *A, *B;
226 if (match(V: LHS, P: m_And(L: m_Value(V&: A), R: m_Value(V&: B))) &&
227 match(V: RHS, P: m_Not(V: m_c_Or(L: m_Specific(V: A), R: m_Specific(V: B)))) &&
228 isGuaranteedNotToBeUndef(V: A, AC: SQ.AC, CtxI: SQ.CxtI, DT: SQ.DT) &&
229 isGuaranteedNotToBeUndef(V: B, AC: SQ.AC, CtxI: SQ.CxtI, DT: SQ.DT))
230 return true;
231 }
232
233 return false;
234}
235
236bool llvm::haveNoCommonBitsSet(const WithCache<const Value *> &LHSCache,
237 const WithCache<const Value *> &RHSCache,
238 const SimplifyQuery &SQ) {
239 const Value *LHS = LHSCache.getValue();
240 const Value *RHS = RHSCache.getValue();
241
242 assert(LHS->getType() == RHS->getType() &&
243 "LHS and RHS should have the same type");
244 assert(LHS->getType()->isIntOrIntVectorTy() &&
245 "LHS and RHS should be integers");
246
247 if (haveNoCommonBitsSetSpecialCases(LHS, RHS, SQ) ||
248 haveNoCommonBitsSetSpecialCases(LHS: RHS, RHS: LHS, SQ))
249 return true;
250
251 return KnownBits::haveNoCommonBitsSet(LHS: LHSCache.getKnownBits(Q: SQ),
252 RHS: RHSCache.getKnownBits(Q: SQ));
253}
254
255bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) {
256 return !I->user_empty() && all_of(Range: I->users(), P: [](const User *U) {
257 ICmpInst::Predicate P;
258 return match(V: U, P: m_ICmp(Pred&: P, L: m_Value(), R: m_Zero())) && ICmpInst::isEquality(P);
259 });
260}
261
262static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
263 const SimplifyQuery &Q);
264
265bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
266 bool OrZero, unsigned Depth,
267 AssumptionCache *AC, const Instruction *CxtI,
268 const DominatorTree *DT, bool UseInstrInfo) {
269 return ::isKnownToBeAPowerOfTwo(
270 V, OrZero, Depth,
271 Q: SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
272}
273
274static bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
275 const SimplifyQuery &Q, unsigned Depth);
276
277bool llvm::isKnownNonNegative(const Value *V, const SimplifyQuery &SQ,
278 unsigned Depth) {
279 return computeKnownBits(V, Depth, Q: SQ).isNonNegative();
280}
281
282bool llvm::isKnownPositive(const Value *V, const SimplifyQuery &SQ,
283 unsigned Depth) {
284 if (auto *CI = dyn_cast<ConstantInt>(Val: V))
285 return CI->getValue().isStrictlyPositive();
286
287 // If `isKnownNonNegative` ever becomes more sophisticated, make sure to keep
288 // this updated.
289 KnownBits Known = computeKnownBits(V, Depth, Q: SQ);
290 return Known.isNonNegative() &&
291 (Known.isNonZero() || isKnownNonZero(V, Q: SQ, Depth));
292}
293
294bool llvm::isKnownNegative(const Value *V, const SimplifyQuery &SQ,
295 unsigned Depth) {
296 return computeKnownBits(V, Depth, Q: SQ).isNegative();
297}
298
299static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth,
300 const SimplifyQuery &Q);
301
302bool llvm::isKnownNonEqual(const Value *V1, const Value *V2,
303 const DataLayout &DL, AssumptionCache *AC,
304 const Instruction *CxtI, const DominatorTree *DT,
305 bool UseInstrInfo) {
306 return ::isKnownNonEqual(
307 V1, V2, Depth: 0,
308 Q: SimplifyQuery(DL, DT, AC, safeCxtI(V1: V2, V2: V1, CxtI), UseInstrInfo));
309}
310
311bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask,
312 const SimplifyQuery &SQ, unsigned Depth) {
313 KnownBits Known(Mask.getBitWidth());
314 computeKnownBits(V, Known, Depth, Q: SQ);
315 return Mask.isSubsetOf(RHS: Known.Zero);
316}
317
318static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
319 unsigned Depth, const SimplifyQuery &Q);
320
321static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
322 const SimplifyQuery &Q) {
323 auto *FVTy = dyn_cast<FixedVectorType>(Val: V->getType());
324 APInt DemandedElts =
325 FVTy ? APInt::getAllOnes(numBits: FVTy->getNumElements()) : APInt(1, 1);
326 return ComputeNumSignBits(V, DemandedElts, Depth, Q);
327}
328
329unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
330 unsigned Depth, AssumptionCache *AC,
331 const Instruction *CxtI,
332 const DominatorTree *DT, bool UseInstrInfo) {
333 return ::ComputeNumSignBits(
334 V, Depth, Q: SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
335}
336
337unsigned llvm::ComputeMaxSignificantBits(const Value *V, const DataLayout &DL,
338 unsigned Depth, AssumptionCache *AC,
339 const Instruction *CxtI,
340 const DominatorTree *DT) {
341 unsigned SignBits = ComputeNumSignBits(V, DL, Depth, AC, CxtI, DT);
342 return V->getType()->getScalarSizeInBits() - SignBits + 1;
343}
344
345static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
346 bool NSW, bool NUW,
347 const APInt &DemandedElts,
348 KnownBits &KnownOut, KnownBits &Known2,
349 unsigned Depth, const SimplifyQuery &Q) {
350 computeKnownBits(V: Op1, DemandedElts, Known&: KnownOut, Depth: Depth + 1, Q);
351
352 // If one operand is unknown and we have no nowrap information,
353 // the result will be unknown independently of the second operand.
354 if (KnownOut.isUnknown() && !NSW && !NUW)
355 return;
356
357 computeKnownBits(V: Op0, DemandedElts, Known&: Known2, Depth: Depth + 1, Q);
358 KnownOut = KnownBits::computeForAddSub(Add, NSW, NUW, LHS: Known2, RHS: KnownOut);
359}
360
361static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
362 const APInt &DemandedElts, KnownBits &Known,
363 KnownBits &Known2, unsigned Depth,
364 const SimplifyQuery &Q) {
365 computeKnownBits(V: Op1, DemandedElts, Known, Depth: Depth + 1, Q);
366 computeKnownBits(V: Op0, DemandedElts, Known&: Known2, Depth: Depth + 1, Q);
367
368 bool isKnownNegative = false;
369 bool isKnownNonNegative = false;
370 // If the multiplication is known not to overflow, compute the sign bit.
371 if (NSW) {
372 if (Op0 == Op1) {
373 // The product of a number with itself is non-negative.
374 isKnownNonNegative = true;
375 } else {
376 bool isKnownNonNegativeOp1 = Known.isNonNegative();
377 bool isKnownNonNegativeOp0 = Known2.isNonNegative();
378 bool isKnownNegativeOp1 = Known.isNegative();
379 bool isKnownNegativeOp0 = Known2.isNegative();
380 // The product of two numbers with the same sign is non-negative.
381 isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
382 (isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
383 // The product of a negative number and a non-negative number is either
384 // negative or zero.
385 if (!isKnownNonNegative)
386 isKnownNegative =
387 (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
388 Known2.isNonZero()) ||
389 (isKnownNegativeOp0 && isKnownNonNegativeOp1 && Known.isNonZero());
390 }
391 }
392
393 bool SelfMultiply = Op0 == Op1;
394 if (SelfMultiply)
395 SelfMultiply &=
396 isGuaranteedNotToBeUndef(V: Op0, AC: Q.AC, CtxI: Q.CxtI, DT: Q.DT, Depth: Depth + 1);
397 Known = KnownBits::mul(LHS: Known, RHS: Known2, NoUndefSelfMultiply: SelfMultiply);
398
399 // Only make use of no-wrap flags if we failed to compute the sign bit
400 // directly. This matters if the multiplication always overflows, in
401 // which case we prefer to follow the result of the direct computation,
402 // though as the program is invoking undefined behaviour we can choose
403 // whatever we like here.
404 if (isKnownNonNegative && !Known.isNegative())
405 Known.makeNonNegative();
406 else if (isKnownNegative && !Known.isNonNegative())
407 Known.makeNegative();
408}
409
410void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
411 KnownBits &Known) {
412 unsigned BitWidth = Known.getBitWidth();
413 unsigned NumRanges = Ranges.getNumOperands() / 2;
414 assert(NumRanges >= 1);
415
416 Known.Zero.setAllBits();
417 Known.One.setAllBits();
418
419 for (unsigned i = 0; i < NumRanges; ++i) {
420 ConstantInt *Lower =
421 mdconst::extract<ConstantInt>(MD: Ranges.getOperand(I: 2 * i + 0));
422 ConstantInt *Upper =
423 mdconst::extract<ConstantInt>(MD: Ranges.getOperand(I: 2 * i + 1));
424 ConstantRange Range(Lower->getValue(), Upper->getValue());
425
426 // The first CommonPrefixBits of all values in Range are equal.
427 unsigned CommonPrefixBits =
428 (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countl_zero();
429 APInt Mask = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: CommonPrefixBits);
430 APInt UnsignedMax = Range.getUnsignedMax().zextOrTrunc(width: BitWidth);
431 Known.One &= UnsignedMax & Mask;
432 Known.Zero &= ~UnsignedMax & Mask;
433 }
434}
435
436static bool isEphemeralValueOf(const Instruction *I, const Value *E) {
437 SmallVector<const Value *, 16> WorkSet(1, I);
438 SmallPtrSet<const Value *, 32> Visited;
439 SmallPtrSet<const Value *, 16> EphValues;
440
441 // The instruction defining an assumption's condition itself is always
442 // considered ephemeral to that assumption (even if it has other
443 // non-ephemeral users). See r246696's test case for an example.
444 if (is_contained(Range: I->operands(), Element: E))
445 return true;
446
447 while (!WorkSet.empty()) {
448 const Value *V = WorkSet.pop_back_val();
449 if (!Visited.insert(Ptr: V).second)
450 continue;
451
452 // If all uses of this value are ephemeral, then so is this value.
453 if (llvm::all_of(Range: V->users(), P: [&](const User *U) {
454 return EphValues.count(Ptr: U);
455 })) {
456 if (V == E)
457 return true;
458
459 if (V == I || (isa<Instruction>(Val: V) &&
460 !cast<Instruction>(Val: V)->mayHaveSideEffects() &&
461 !cast<Instruction>(Val: V)->isTerminator())) {
462 EphValues.insert(Ptr: V);
463 if (const User *U = dyn_cast<User>(Val: V))
464 append_range(C&: WorkSet, R: U->operands());
465 }
466 }
467 }
468
469 return false;
470}
471
472// Is this an intrinsic that cannot be speculated but also cannot trap?
473bool llvm::isAssumeLikeIntrinsic(const Instruction *I) {
474 if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Val: I))
475 return CI->isAssumeLikeIntrinsic();
476
477 return false;
478}
479
480bool llvm::isValidAssumeForContext(const Instruction *Inv,
481 const Instruction *CxtI,
482 const DominatorTree *DT,
483 bool AllowEphemerals) {
484 // There are two restrictions on the use of an assume:
485 // 1. The assume must dominate the context (or the control flow must
486 // reach the assume whenever it reaches the context).
487 // 2. The context must not be in the assume's set of ephemeral values
488 // (otherwise we will use the assume to prove that the condition
489 // feeding the assume is trivially true, thus causing the removal of
490 // the assume).
491
492 if (Inv->getParent() == CxtI->getParent()) {
493 // If Inv and CtxI are in the same block, check if the assume (Inv) is first
494 // in the BB.
495 if (Inv->comesBefore(Other: CxtI))
496 return true;
497
498 // Don't let an assume affect itself - this would cause the problems
499 // `isEphemeralValueOf` is trying to prevent, and it would also make
500 // the loop below go out of bounds.
501 if (!AllowEphemerals && Inv == CxtI)
502 return false;
503
504 // The context comes first, but they're both in the same block.
505 // Make sure there is nothing in between that might interrupt
506 // the control flow, not even CxtI itself.
507 // We limit the scan distance between the assume and its context instruction
508 // to avoid a compile-time explosion. This limit is chosen arbitrarily, so
509 // it can be adjusted if needed (could be turned into a cl::opt).
510 auto Range = make_range(x: CxtI->getIterator(), y: Inv->getIterator());
511 if (!isGuaranteedToTransferExecutionToSuccessor(Range, ScanLimit: 15))
512 return false;
513
514 return AllowEphemerals || !isEphemeralValueOf(I: Inv, E: CxtI);
515 }
516
517 // Inv and CxtI are in different blocks.
518 if (DT) {
519 if (DT->dominates(Def: Inv, User: CxtI))
520 return true;
521 } else if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor()) {
522 // We don't have a DT, but this trivially dominates.
523 return true;
524 }
525
526 return false;
527}
528
529// TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but
530// we still have enough information about `RHS` to conclude non-zero. For
531// example Pred=EQ, RHS=isKnownNonZero. cmpExcludesZero is called in loops
532// so the extra compile time may not be worth it, but possibly a second API
533// should be created for use outside of loops.
534static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) {
535 // v u> y implies v != 0.
536 if (Pred == ICmpInst::ICMP_UGT)
537 return true;
538
539 // Special-case v != 0 to also handle v != null.
540 if (Pred == ICmpInst::ICMP_NE)
541 return match(V: RHS, P: m_Zero());
542
543 // All other predicates - rely on generic ConstantRange handling.
544 const APInt *C;
545 auto Zero = APInt::getZero(numBits: RHS->getType()->getScalarSizeInBits());
546 if (match(V: RHS, P: m_APInt(Res&: C))) {
547 ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(Pred, Other: *C);
548 return !TrueValues.contains(Val: Zero);
549 }
550
551 auto *VC = dyn_cast<ConstantDataVector>(Val: RHS);
552 if (VC == nullptr)
553 return false;
554
555 for (unsigned ElemIdx = 0, NElem = VC->getNumElements(); ElemIdx < NElem;
556 ++ElemIdx) {
557 ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(
558 Pred, Other: VC->getElementAsAPInt(i: ElemIdx));
559 if (TrueValues.contains(Val: Zero))
560 return false;
561 }
562 return true;
563}
564
565static bool isKnownNonZeroFromAssume(const Value *V, const SimplifyQuery &Q) {
566 // Use of assumptions is context-sensitive. If we don't have a context, we
567 // cannot use them!
568 if (!Q.AC || !Q.CxtI)
569 return false;
570
571 for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) {
572 if (!Elem.Assume)
573 continue;
574
575 AssumeInst *I = cast<AssumeInst>(Val&: Elem.Assume);
576 assert(I->getFunction() == Q.CxtI->getFunction() &&
577 "Got assumption for the wrong function!");
578
579 if (Elem.Index != AssumptionCache::ExprResultIdx) {
580 if (!V->getType()->isPointerTy())
581 continue;
582 if (RetainedKnowledge RK = getKnowledgeFromBundle(
583 Assume&: *I, BOI: I->bundle_op_info_begin()[Elem.Index])) {
584 if (RK.WasOn == V &&
585 (RK.AttrKind == Attribute::NonNull ||
586 (RK.AttrKind == Attribute::Dereferenceable &&
587 !NullPointerIsDefined(F: Q.CxtI->getFunction(),
588 AS: V->getType()->getPointerAddressSpace()))) &&
589 isValidAssumeForContext(Inv: I, CxtI: Q.CxtI, DT: Q.DT))
590 return true;
591 }
592 continue;
593 }
594
595 // Warning: This loop can end up being somewhat performance sensitive.
596 // We're running this loop for once for each value queried resulting in a
597 // runtime of ~O(#assumes * #values).
598
599 Value *RHS;
600 CmpInst::Predicate Pred;
601 auto m_V = m_CombineOr(L: m_Specific(V), R: m_PtrToInt(Op: m_Specific(V)));
602 if (!match(V: I->getArgOperand(i: 0), P: m_c_ICmp(Pred, L: m_V, R: m_Value(V&: RHS))))
603 return false;
604
605 if (cmpExcludesZero(Pred, RHS) && isValidAssumeForContext(Inv: I, CxtI: Q.CxtI, DT: Q.DT))
606 return true;
607 }
608
609 return false;
610}
611
612static void computeKnownBitsFromCmp(const Value *V, CmpInst::Predicate Pred,
613 Value *LHS, Value *RHS, KnownBits &Known,
614 const SimplifyQuery &Q) {
615 if (RHS->getType()->isPointerTy()) {
616 // Handle comparison of pointer to null explicitly, as it will not be
617 // covered by the m_APInt() logic below.
618 if (LHS == V && match(V: RHS, P: m_Zero())) {
619 switch (Pred) {
620 case ICmpInst::ICMP_EQ:
621 Known.setAllZero();
622 break;
623 case ICmpInst::ICMP_SGE:
624 case ICmpInst::ICMP_SGT:
625 Known.makeNonNegative();
626 break;
627 case ICmpInst::ICMP_SLT:
628 Known.makeNegative();
629 break;
630 default:
631 break;
632 }
633 }
634 return;
635 }
636
637 unsigned BitWidth = Known.getBitWidth();
638 auto m_V =
639 m_CombineOr(L: m_Specific(V), R: m_PtrToIntSameSize(DL: Q.DL, Op: m_Specific(V)));
640
641 Value *Y;
642 const APInt *Mask, *C;
643 uint64_t ShAmt;
644 switch (Pred) {
645 case ICmpInst::ICMP_EQ:
646 // assume(V = C)
647 if (match(V: LHS, P: m_V) && match(V: RHS, P: m_APInt(Res&: C))) {
648 Known = Known.unionWith(RHS: KnownBits::makeConstant(C: *C));
649 // assume(V & Mask = C)
650 } else if (match(V: LHS, P: m_c_And(L: m_V, R: m_Value(V&: Y))) &&
651 match(V: RHS, P: m_APInt(Res&: C))) {
652 // For one bits in Mask, we can propagate bits from C to V.
653 Known.One |= *C;
654 if (match(V: Y, P: m_APInt(Res&: Mask)))
655 Known.Zero |= ~*C & *Mask;
656 // assume(V | Mask = C)
657 } else if (match(V: LHS, P: m_c_Or(L: m_V, R: m_Value(V&: Y))) && match(V: RHS, P: m_APInt(Res&: C))) {
658 // For zero bits in Mask, we can propagate bits from C to V.
659 Known.Zero |= ~*C;
660 if (match(V: Y, P: m_APInt(Res&: Mask)))
661 Known.One |= *C & ~*Mask;
662 // assume(V ^ Mask = C)
663 } else if (match(V: LHS, P: m_Xor(L: m_V, R: m_APInt(Res&: Mask))) &&
664 match(V: RHS, P: m_APInt(Res&: C))) {
665 // Equivalent to assume(V == Mask ^ C)
666 Known = Known.unionWith(RHS: KnownBits::makeConstant(C: *C ^ *Mask));
667 // assume(V << ShAmt = C)
668 } else if (match(V: LHS, P: m_Shl(L: m_V, R: m_ConstantInt(V&: ShAmt))) &&
669 match(V: RHS, P: m_APInt(Res&: C)) && ShAmt < BitWidth) {
670 // For those bits in C that are known, we can propagate them to known
671 // bits in V shifted to the right by ShAmt.
672 KnownBits RHSKnown = KnownBits::makeConstant(C: *C);
673 RHSKnown.Zero.lshrInPlace(ShiftAmt: ShAmt);
674 RHSKnown.One.lshrInPlace(ShiftAmt: ShAmt);
675 Known = Known.unionWith(RHS: RHSKnown);
676 // assume(V >> ShAmt = C)
677 } else if (match(V: LHS, P: m_Shr(L: m_V, R: m_ConstantInt(V&: ShAmt))) &&
678 match(V: RHS, P: m_APInt(Res&: C)) && ShAmt < BitWidth) {
679 KnownBits RHSKnown = KnownBits::makeConstant(C: *C);
680 // For those bits in RHS that are known, we can propagate them to known
681 // bits in V shifted to the right by C.
682 Known.Zero |= RHSKnown.Zero << ShAmt;
683 Known.One |= RHSKnown.One << ShAmt;
684 }
685 break;
686 case ICmpInst::ICMP_NE: {
687 // assume (V & B != 0) where B is a power of 2
688 const APInt *BPow2;
689 if (match(V: LHS, P: m_And(L: m_V, R: m_Power2(V&: BPow2))) && match(V: RHS, P: m_Zero()))
690 Known.One |= *BPow2;
691 break;
692 }
693 default:
694 if (match(V: RHS, P: m_APInt(Res&: C))) {
695 const APInt *Offset = nullptr;
696 if (match(V: LHS, P: m_CombineOr(L: m_V, R: m_AddLike(L: m_V, R: m_APInt(Res&: Offset))))) {
697 ConstantRange LHSRange = ConstantRange::makeAllowedICmpRegion(Pred, Other: *C);
698 if (Offset)
699 LHSRange = LHSRange.sub(Other: *Offset);
700 Known = Known.unionWith(RHS: LHSRange.toKnownBits());
701 }
702 // X & Y u> C -> X u> C && Y u> C
703 if ((Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) &&
704 match(V: LHS, P: m_c_And(L: m_V, R: m_Value()))) {
705 Known.One.setHighBits(
706 (*C + (Pred == ICmpInst::ICMP_UGT)).countLeadingOnes());
707 }
708 // X | Y u< C -> X u< C && Y u< C
709 if ((Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) &&
710 match(V: LHS, P: m_c_Or(L: m_V, R: m_Value()))) {
711 Known.Zero.setHighBits(
712 (*C - (Pred == ICmpInst::ICMP_ULT)).countLeadingZeros());
713 }
714 }
715 break;
716 }
717}
718
719static void computeKnownBitsFromICmpCond(const Value *V, ICmpInst *Cmp,
720 KnownBits &Known,
721 const SimplifyQuery &SQ, bool Invert) {
722 ICmpInst::Predicate Pred =
723 Invert ? Cmp->getInversePredicate() : Cmp->getPredicate();
724 Value *LHS = Cmp->getOperand(i_nocapture: 0);
725 Value *RHS = Cmp->getOperand(i_nocapture: 1);
726
727 // Handle icmp pred (trunc V), C
728 if (match(V: LHS, P: m_Trunc(Op: m_Specific(V)))) {
729 KnownBits DstKnown(LHS->getType()->getScalarSizeInBits());
730 computeKnownBitsFromCmp(V: LHS, Pred, LHS, RHS, Known&: DstKnown, Q: SQ);
731 Known = Known.unionWith(RHS: DstKnown.anyext(BitWidth: Known.getBitWidth()));
732 return;
733 }
734
735 computeKnownBitsFromCmp(V, Pred, LHS, RHS, Known, Q: SQ);
736}
737
738static void computeKnownBitsFromCond(const Value *V, Value *Cond,
739 KnownBits &Known, unsigned Depth,
740 const SimplifyQuery &SQ, bool Invert) {
741 Value *A, *B;
742 if (Depth < MaxAnalysisRecursionDepth &&
743 match(V: Cond, P: m_LogicalOp(L: m_Value(V&: A), R: m_Value(V&: B)))) {
744 KnownBits Known2(Known.getBitWidth());
745 KnownBits Known3(Known.getBitWidth());
746 computeKnownBitsFromCond(V, Cond: A, Known&: Known2, Depth: Depth + 1, SQ, Invert);
747 computeKnownBitsFromCond(V, Cond: B, Known&: Known3, Depth: Depth + 1, SQ, Invert);
748 if (Invert ? match(V: Cond, P: m_LogicalOr(L: m_Value(), R: m_Value()))
749 : match(V: Cond, P: m_LogicalAnd(L: m_Value(), R: m_Value())))
750 Known2 = Known2.unionWith(RHS: Known3);
751 else
752 Known2 = Known2.intersectWith(RHS: Known3);
753 Known = Known.unionWith(RHS: Known2);
754 }
755
756 if (auto *Cmp = dyn_cast<ICmpInst>(Val: Cond))
757 computeKnownBitsFromICmpCond(V, Cmp, Known, SQ, Invert);
758}
759
760void llvm::computeKnownBitsFromContext(const Value *V, KnownBits &Known,
761 unsigned Depth, const SimplifyQuery &Q) {
762 if (!Q.CxtI)
763 return;
764
765 if (Q.DC && Q.DT) {
766 // Handle dominating conditions.
767 for (BranchInst *BI : Q.DC->conditionsFor(V)) {
768 BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(i: 0));
769 if (Q.DT->dominates(BBE: Edge0, BB: Q.CxtI->getParent()))
770 computeKnownBitsFromCond(V, Cond: BI->getCondition(), Known, Depth, SQ: Q,
771 /*Invert*/ false);
772
773 BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(i: 1));
774 if (Q.DT->dominates(BBE: Edge1, BB: Q.CxtI->getParent()))
775 computeKnownBitsFromCond(V, Cond: BI->getCondition(), Known, Depth, SQ: Q,
776 /*Invert*/ true);
777 }
778
779 if (Known.hasConflict())
780 Known.resetAll();
781 }
782
783 if (!Q.AC)
784 return;
785
786 unsigned BitWidth = Known.getBitWidth();
787
788 // Note that the patterns below need to be kept in sync with the code
789 // in AssumptionCache::updateAffectedValues.
790
791 for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) {
792 if (!Elem.Assume)
793 continue;
794
795 AssumeInst *I = cast<AssumeInst>(Val&: Elem.Assume);
796 assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() &&
797 "Got assumption for the wrong function!");
798
799 if (Elem.Index != AssumptionCache::ExprResultIdx) {
800 if (!V->getType()->isPointerTy())
801 continue;
802 if (RetainedKnowledge RK = getKnowledgeFromBundle(
803 Assume&: *I, BOI: I->bundle_op_info_begin()[Elem.Index])) {
804 if (RK.WasOn == V && RK.AttrKind == Attribute::Alignment &&
805 isPowerOf2_64(Value: RK.ArgValue) &&
806 isValidAssumeForContext(Inv: I, CxtI: Q.CxtI, DT: Q.DT))
807 Known.Zero.setLowBits(Log2_64(Value: RK.ArgValue));
808 }
809 continue;
810 }
811
812 // Warning: This loop can end up being somewhat performance sensitive.
813 // We're running this loop for once for each value queried resulting in a
814 // runtime of ~O(#assumes * #values).
815
816 Value *Arg = I->getArgOperand(i: 0);
817
818 if (Arg == V && isValidAssumeForContext(Inv: I, CxtI: Q.CxtI, DT: Q.DT)) {
819 assert(BitWidth == 1 && "assume operand is not i1?");
820 (void)BitWidth;
821 Known.setAllOnes();
822 return;
823 }
824 if (match(V: Arg, P: m_Not(V: m_Specific(V))) &&
825 isValidAssumeForContext(Inv: I, CxtI: Q.CxtI, DT: Q.DT)) {
826 assert(BitWidth == 1 && "assume operand is not i1?");
827 (void)BitWidth;
828 Known.setAllZero();
829 return;
830 }
831
832 // The remaining tests are all recursive, so bail out if we hit the limit.
833 if (Depth == MaxAnalysisRecursionDepth)
834 continue;
835
836 ICmpInst *Cmp = dyn_cast<ICmpInst>(Val: Arg);
837 if (!Cmp)
838 continue;
839
840 if (!isValidAssumeForContext(Inv: I, CxtI: Q.CxtI, DT: Q.DT))
841 continue;
842
843 computeKnownBitsFromICmpCond(V, Cmp, Known, SQ: Q, /*Invert=*/false);
844 }
845
846 // Conflicting assumption: Undefined behavior will occur on this execution
847 // path.
848 if (Known.hasConflict())
849 Known.resetAll();
850}
851
852/// Compute known bits from a shift operator, including those with a
853/// non-constant shift amount. Known is the output of this function. Known2 is a
854/// pre-allocated temporary with the same bit width as Known and on return
855/// contains the known bit of the shift value source. KF is an
856/// operator-specific function that, given the known-bits and a shift amount,
857/// compute the implied known-bits of the shift operator's result respectively
858/// for that shift amount. The results from calling KF are conservatively
859/// combined for all permitted shift amounts.
860static void computeKnownBitsFromShiftOperator(
861 const Operator *I, const APInt &DemandedElts, KnownBits &Known,
862 KnownBits &Known2, unsigned Depth, const SimplifyQuery &Q,
863 function_ref<KnownBits(const KnownBits &, const KnownBits &, bool)> KF) {
864 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known&: Known2, Depth: Depth + 1, Q);
865 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Known, Depth: Depth + 1, Q);
866 // To limit compile-time impact, only query isKnownNonZero() if we know at
867 // least something about the shift amount.
868 bool ShAmtNonZero =
869 Known.isNonZero() ||
870 (Known.getMaxValue().ult(RHS: Known.getBitWidth()) &&
871 isKnownNonZero(V: I->getOperand(i: 1), DemandedElts, Q, Depth: Depth + 1));
872 Known = KF(Known2, Known, ShAmtNonZero);
873}
874
875static KnownBits
876getKnownBitsFromAndXorOr(const Operator *I, const APInt &DemandedElts,
877 const KnownBits &KnownLHS, const KnownBits &KnownRHS,
878 unsigned Depth, const SimplifyQuery &Q) {
879 unsigned BitWidth = KnownLHS.getBitWidth();
880 KnownBits KnownOut(BitWidth);
881 bool IsAnd = false;
882 bool HasKnownOne = !KnownLHS.One.isZero() || !KnownRHS.One.isZero();
883 Value *X = nullptr, *Y = nullptr;
884
885 switch (I->getOpcode()) {
886 case Instruction::And:
887 KnownOut = KnownLHS & KnownRHS;
888 IsAnd = true;
889 // and(x, -x) is common idioms that will clear all but lowest set
890 // bit. If we have a single known bit in x, we can clear all bits
891 // above it.
892 // TODO: instcombine often reassociates independent `and` which can hide
893 // this pattern. Try to match and(x, and(-x, y)) / and(and(x, y), -x).
894 if (HasKnownOne && match(V: I, P: m_c_And(L: m_Value(V&: X), R: m_Neg(V: m_Deferred(V: X))))) {
895 // -(-x) == x so using whichever (LHS/RHS) gets us a better result.
896 if (KnownLHS.countMaxTrailingZeros() <= KnownRHS.countMaxTrailingZeros())
897 KnownOut = KnownLHS.blsi();
898 else
899 KnownOut = KnownRHS.blsi();
900 }
901 break;
902 case Instruction::Or:
903 KnownOut = KnownLHS | KnownRHS;
904 break;
905 case Instruction::Xor:
906 KnownOut = KnownLHS ^ KnownRHS;
907 // xor(x, x-1) is common idioms that will clear all but lowest set
908 // bit. If we have a single known bit in x, we can clear all bits
909 // above it.
910 // TODO: xor(x, x-1) is often rewritting as xor(x, x-C) where C !=
911 // -1 but for the purpose of demanded bits (xor(x, x-C) &
912 // Demanded) == (xor(x, x-1) & Demanded). Extend the xor pattern
913 // to use arbitrary C if xor(x, x-C) as the same as xor(x, x-1).
914 if (HasKnownOne &&
915 match(V: I, P: m_c_Xor(L: m_Value(V&: X), R: m_c_Add(L: m_Deferred(V: X), R: m_AllOnes())))) {
916 const KnownBits &XBits = I->getOperand(i: 0) == X ? KnownLHS : KnownRHS;
917 KnownOut = XBits.blsmsk();
918 }
919 break;
920 default:
921 llvm_unreachable("Invalid Op used in 'analyzeKnownBitsFromAndXorOr'");
922 }
923
924 // and(x, add (x, -1)) is a common idiom that always clears the low bit;
925 // xor/or(x, add (x, -1)) is an idiom that will always set the low bit.
926 // here we handle the more general case of adding any odd number by
927 // matching the form and/xor/or(x, add(x, y)) where y is odd.
928 // TODO: This could be generalized to clearing any bit set in y where the
929 // following bit is known to be unset in y.
930 if (!KnownOut.Zero[0] && !KnownOut.One[0] &&
931 (match(V: I, P: m_c_BinOp(L: m_Value(V&: X), R: m_c_Add(L: m_Deferred(V: X), R: m_Value(V&: Y)))) ||
932 match(V: I, P: m_c_BinOp(L: m_Value(V&: X), R: m_Sub(L: m_Deferred(V: X), R: m_Value(V&: Y)))) ||
933 match(V: I, P: m_c_BinOp(L: m_Value(V&: X), R: m_Sub(L: m_Value(V&: Y), R: m_Deferred(V: X)))))) {
934 KnownBits KnownY(BitWidth);
935 computeKnownBits(V: Y, DemandedElts, Known&: KnownY, Depth: Depth + 1, Q);
936 if (KnownY.countMinTrailingOnes() > 0) {
937 if (IsAnd)
938 KnownOut.Zero.setBit(0);
939 else
940 KnownOut.One.setBit(0);
941 }
942 }
943 return KnownOut;
944}
945
946// Public so this can be used in `SimplifyDemandedUseBits`.
947KnownBits llvm::analyzeKnownBitsFromAndXorOr(const Operator *I,
948 const KnownBits &KnownLHS,
949 const KnownBits &KnownRHS,
950 unsigned Depth,
951 const SimplifyQuery &SQ) {
952 auto *FVTy = dyn_cast<FixedVectorType>(Val: I->getType());
953 APInt DemandedElts =
954 FVTy ? APInt::getAllOnes(numBits: FVTy->getNumElements()) : APInt(1, 1);
955
956 return getKnownBitsFromAndXorOr(I, DemandedElts, KnownLHS, KnownRHS, Depth,
957 Q: SQ);
958}
959
960ConstantRange llvm::getVScaleRange(const Function *F, unsigned BitWidth) {
961 Attribute Attr = F->getFnAttribute(Attribute::VScaleRange);
962 // Without vscale_range, we only know that vscale is non-zero.
963 if (!Attr.isValid())
964 return ConstantRange(APInt(BitWidth, 1), APInt::getZero(numBits: BitWidth));
965
966 unsigned AttrMin = Attr.getVScaleRangeMin();
967 // Minimum is larger than vscale width, result is always poison.
968 if ((unsigned)llvm::bit_width(Value: AttrMin) > BitWidth)
969 return ConstantRange::getEmpty(BitWidth);
970
971 APInt Min(BitWidth, AttrMin);
972 std::optional<unsigned> AttrMax = Attr.getVScaleRangeMax();
973 if (!AttrMax || (unsigned)llvm::bit_width(Value: *AttrMax) > BitWidth)
974 return ConstantRange(Min, APInt::getZero(numBits: BitWidth));
975
976 return ConstantRange(Min, APInt(BitWidth, *AttrMax) + 1);
977}
978
979static void computeKnownBitsFromOperator(const Operator *I,
980 const APInt &DemandedElts,
981 KnownBits &Known, unsigned Depth,
982 const SimplifyQuery &Q) {
983 unsigned BitWidth = Known.getBitWidth();
984
985 KnownBits Known2(BitWidth);
986 switch (I->getOpcode()) {
987 default: break;
988 case Instruction::Load:
989 if (MDNode *MD =
990 Q.IIQ.getMetadata(I: cast<LoadInst>(Val: I), KindID: LLVMContext::MD_range))
991 computeKnownBitsFromRangeMetadata(Ranges: *MD, Known);
992 break;
993 case Instruction::And:
994 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Known, Depth: Depth + 1, Q);
995 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known&: Known2, Depth: Depth + 1, Q);
996
997 Known = getKnownBitsFromAndXorOr(I, DemandedElts, KnownLHS: Known2, KnownRHS: Known, Depth, Q);
998 break;
999 case Instruction::Or:
1000 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Known, Depth: Depth + 1, Q);
1001 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known&: Known2, Depth: Depth + 1, Q);
1002
1003 Known = getKnownBitsFromAndXorOr(I, DemandedElts, KnownLHS: Known2, KnownRHS: Known, Depth, Q);
1004 break;
1005 case Instruction::Xor:
1006 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Known, Depth: Depth + 1, Q);
1007 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known&: Known2, Depth: Depth + 1, Q);
1008
1009 Known = getKnownBitsFromAndXorOr(I, DemandedElts, KnownLHS: Known2, KnownRHS: Known, Depth, Q);
1010 break;
1011 case Instruction::Mul: {
1012 bool NSW = Q.IIQ.hasNoSignedWrap(Op: cast<OverflowingBinaryOperator>(Val: I));
1013 computeKnownBitsMul(Op0: I->getOperand(i: 0), Op1: I->getOperand(i: 1), NSW, DemandedElts,
1014 Known, Known2, Depth, Q);
1015 break;
1016 }
1017 case Instruction::UDiv: {
1018 computeKnownBits(V: I->getOperand(i: 0), Known, Depth: Depth + 1, Q);
1019 computeKnownBits(V: I->getOperand(i: 1), Known&: Known2, Depth: Depth + 1, Q);
1020 Known =
1021 KnownBits::udiv(LHS: Known, RHS: Known2, Exact: Q.IIQ.isExact(Op: cast<BinaryOperator>(Val: I)));
1022 break;
1023 }
1024 case Instruction::SDiv: {
1025 computeKnownBits(V: I->getOperand(i: 0), Known, Depth: Depth + 1, Q);
1026 computeKnownBits(V: I->getOperand(i: 1), Known&: Known2, Depth: Depth + 1, Q);
1027 Known =
1028 KnownBits::sdiv(LHS: Known, RHS: Known2, Exact: Q.IIQ.isExact(Op: cast<BinaryOperator>(Val: I)));
1029 break;
1030 }
1031 case Instruction::Select: {
1032 auto ComputeForArm = [&](Value *Arm, bool Invert) {
1033 KnownBits Res(Known.getBitWidth());
1034 computeKnownBits(V: Arm, Known&: Res, Depth: Depth + 1, Q);
1035 // If we have a constant arm, we are done.
1036 if (Res.isConstant())
1037 return Res;
1038
1039 // See what condition implies about the bits of the two select arms.
1040 KnownBits CondRes(Res.getBitWidth());
1041 computeKnownBitsFromCond(V: Arm, Cond: I->getOperand(i: 0), Known&: CondRes, Depth: Depth + 1, SQ: Q,
1042 Invert);
1043 // If we don't get any information from the condition, no reason to
1044 // proceed.
1045 if (CondRes.isUnknown())
1046 return Res;
1047
1048 // We can have conflict if the condition is dead. I.e if we have
1049 // (x | 64) < 32 ? (x | 64) : y
1050 // we will have conflict at bit 6 from the condition/the `or`.
1051 // In that case just return. Its not particularly important
1052 // what we do, as this select is going to be simplified soon.
1053 CondRes = CondRes.unionWith(RHS: Res);
1054 if (CondRes.hasConflict())
1055 return Res;
1056
1057 // Finally make sure the information we found is valid. This is relatively
1058 // expensive so it's left for the very end.
1059 if (!isGuaranteedNotToBeUndef(V: Arm, AC: Q.AC, CtxI: Q.CxtI, DT: Q.DT, Depth: Depth + 1))
1060 return Res;
1061
1062 // Finally, we know we get information from the condition and its valid,
1063 // so return it.
1064 return CondRes;
1065 };
1066 // Only known if known in both the LHS and RHS.
1067 Known =
1068 ComputeForArm(I->getOperand(i: 1), /*Invert=*/false)
1069 .intersectWith(RHS: ComputeForArm(I->getOperand(i: 2), /*Invert=*/true));
1070 break;
1071 }
1072 case Instruction::FPTrunc:
1073 case Instruction::FPExt:
1074 case Instruction::FPToUI:
1075 case Instruction::FPToSI:
1076 case Instruction::SIToFP:
1077 case Instruction::UIToFP:
1078 break; // Can't work with floating point.
1079 case Instruction::PtrToInt:
1080 case Instruction::IntToPtr:
1081 // Fall through and handle them the same as zext/trunc.
1082 [[fallthrough]];
1083 case Instruction::ZExt:
1084 case Instruction::Trunc: {
1085 Type *SrcTy = I->getOperand(i: 0)->getType();
1086
1087 unsigned SrcBitWidth;
1088 // Note that we handle pointer operands here because of inttoptr/ptrtoint
1089 // which fall through here.
1090 Type *ScalarTy = SrcTy->getScalarType();
1091 SrcBitWidth = ScalarTy->isPointerTy() ?
1092 Q.DL.getPointerTypeSizeInBits(ScalarTy) :
1093 Q.DL.getTypeSizeInBits(Ty: ScalarTy);
1094
1095 assert(SrcBitWidth && "SrcBitWidth can't be zero");
1096 Known = Known.anyextOrTrunc(BitWidth: SrcBitWidth);
1097 computeKnownBits(V: I->getOperand(i: 0), Known, Depth: Depth + 1, Q);
1098 if (auto *Inst = dyn_cast<PossiblyNonNegInst>(Val: I);
1099 Inst && Inst->hasNonNeg() && !Known.isNegative())
1100 Known.makeNonNegative();
1101 Known = Known.zextOrTrunc(BitWidth);
1102 break;
1103 }
1104 case Instruction::BitCast: {
1105 Type *SrcTy = I->getOperand(i: 0)->getType();
1106 if (SrcTy->isIntOrPtrTy() &&
1107 // TODO: For now, not handling conversions like:
1108 // (bitcast i64 %x to <2 x i32>)
1109 !I->getType()->isVectorTy()) {
1110 computeKnownBits(V: I->getOperand(i: 0), Known, Depth: Depth + 1, Q);
1111 break;
1112 }
1113
1114 // Handle cast from vector integer type to scalar or vector integer.
1115 auto *SrcVecTy = dyn_cast<FixedVectorType>(Val: SrcTy);
1116 if (!SrcVecTy || !SrcVecTy->getElementType()->isIntegerTy() ||
1117 !I->getType()->isIntOrIntVectorTy() ||
1118 isa<ScalableVectorType>(Val: I->getType()))
1119 break;
1120
1121 // Look through a cast from narrow vector elements to wider type.
1122 // Examples: v4i32 -> v2i64, v3i8 -> v24
1123 unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits();
1124 if (BitWidth % SubBitWidth == 0) {
1125 // Known bits are automatically intersected across demanded elements of a
1126 // vector. So for example, if a bit is computed as known zero, it must be
1127 // zero across all demanded elements of the vector.
1128 //
1129 // For this bitcast, each demanded element of the output is sub-divided
1130 // across a set of smaller vector elements in the source vector. To get
1131 // the known bits for an entire element of the output, compute the known
1132 // bits for each sub-element sequentially. This is done by shifting the
1133 // one-set-bit demanded elements parameter across the sub-elements for
1134 // consecutive calls to computeKnownBits. We are using the demanded
1135 // elements parameter as a mask operator.
1136 //
1137 // The known bits of each sub-element are then inserted into place
1138 // (dependent on endian) to form the full result of known bits.
1139 unsigned NumElts = DemandedElts.getBitWidth();
1140 unsigned SubScale = BitWidth / SubBitWidth;
1141 APInt SubDemandedElts = APInt::getZero(numBits: NumElts * SubScale);
1142 for (unsigned i = 0; i != NumElts; ++i) {
1143 if (DemandedElts[i])
1144 SubDemandedElts.setBit(i * SubScale);
1145 }
1146
1147 KnownBits KnownSrc(SubBitWidth);
1148 for (unsigned i = 0; i != SubScale; ++i) {
1149 computeKnownBits(V: I->getOperand(i: 0), DemandedElts: SubDemandedElts.shl(shiftAmt: i), Known&: KnownSrc,
1150 Depth: Depth + 1, Q);
1151 unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i;
1152 Known.insertBits(SubBits: KnownSrc, BitPosition: ShiftElt * SubBitWidth);
1153 }
1154 }
1155 break;
1156 }
1157 case Instruction::SExt: {
1158 // Compute the bits in the result that are not present in the input.
1159 unsigned SrcBitWidth = I->getOperand(i: 0)->getType()->getScalarSizeInBits();
1160
1161 Known = Known.trunc(BitWidth: SrcBitWidth);
1162 computeKnownBits(V: I->getOperand(i: 0), Known, Depth: Depth + 1, Q);
1163 // If the sign bit of the input is known set or clear, then we know the
1164 // top bits of the result.
1165 Known = Known.sext(BitWidth);
1166 break;
1167 }
1168 case Instruction::Shl: {
1169 bool NUW = Q.IIQ.hasNoUnsignedWrap(Op: cast<OverflowingBinaryOperator>(Val: I));
1170 bool NSW = Q.IIQ.hasNoSignedWrap(Op: cast<OverflowingBinaryOperator>(Val: I));
1171 auto KF = [NUW, NSW](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1172 bool ShAmtNonZero) {
1173 return KnownBits::shl(LHS: KnownVal, RHS: KnownAmt, NUW, NSW, ShAmtNonZero);
1174 };
1175 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1176 KF);
1177 // Trailing zeros of a right-shifted constant never decrease.
1178 const APInt *C;
1179 if (match(V: I->getOperand(i: 0), P: m_APInt(Res&: C)))
1180 Known.Zero.setLowBits(C->countr_zero());
1181 break;
1182 }
1183 case Instruction::LShr: {
1184 bool Exact = Q.IIQ.isExact(Op: cast<BinaryOperator>(Val: I));
1185 auto KF = [Exact](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1186 bool ShAmtNonZero) {
1187 return KnownBits::lshr(LHS: KnownVal, RHS: KnownAmt, ShAmtNonZero, Exact);
1188 };
1189 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1190 KF);
1191 // Leading zeros of a left-shifted constant never decrease.
1192 const APInt *C;
1193 if (match(V: I->getOperand(i: 0), P: m_APInt(Res&: C)))
1194 Known.Zero.setHighBits(C->countl_zero());
1195 break;
1196 }
1197 case Instruction::AShr: {
1198 bool Exact = Q.IIQ.isExact(Op: cast<BinaryOperator>(Val: I));
1199 auto KF = [Exact](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1200 bool ShAmtNonZero) {
1201 return KnownBits::ashr(LHS: KnownVal, RHS: KnownAmt, ShAmtNonZero, Exact);
1202 };
1203 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1204 KF);
1205 break;
1206 }
1207 case Instruction::Sub: {
1208 bool NSW = Q.IIQ.hasNoSignedWrap(Op: cast<OverflowingBinaryOperator>(Val: I));
1209 bool NUW = Q.IIQ.hasNoUnsignedWrap(Op: cast<OverflowingBinaryOperator>(Val: I));
1210 computeKnownBitsAddSub(Add: false, Op0: I->getOperand(i: 0), Op1: I->getOperand(i: 1), NSW, NUW,
1211 DemandedElts, KnownOut&: Known, Known2, Depth, Q);
1212 break;
1213 }
1214 case Instruction::Add: {
1215 bool NSW = Q.IIQ.hasNoSignedWrap(Op: cast<OverflowingBinaryOperator>(Val: I));
1216 bool NUW = Q.IIQ.hasNoUnsignedWrap(Op: cast<OverflowingBinaryOperator>(Val: I));
1217 computeKnownBitsAddSub(Add: true, Op0: I->getOperand(i: 0), Op1: I->getOperand(i: 1), NSW, NUW,
1218 DemandedElts, KnownOut&: Known, Known2, Depth, Q);
1219 break;
1220 }
1221 case Instruction::SRem:
1222 computeKnownBits(V: I->getOperand(i: 0), Known, Depth: Depth + 1, Q);
1223 computeKnownBits(V: I->getOperand(i: 1), Known&: Known2, Depth: Depth + 1, Q);
1224 Known = KnownBits::srem(LHS: Known, RHS: Known2);
1225 break;
1226
1227 case Instruction::URem:
1228 computeKnownBits(V: I->getOperand(i: 0), Known, Depth: Depth + 1, Q);
1229 computeKnownBits(V: I->getOperand(i: 1), Known&: Known2, Depth: Depth + 1, Q);
1230 Known = KnownBits::urem(LHS: Known, RHS: Known2);
1231 break;
1232 case Instruction::Alloca:
1233 Known.Zero.setLowBits(Log2(A: cast<AllocaInst>(Val: I)->getAlign()));
1234 break;
1235 case Instruction::GetElementPtr: {
1236 // Analyze all of the subscripts of this getelementptr instruction
1237 // to determine if we can prove known low zero bits.
1238 computeKnownBits(V: I->getOperand(i: 0), Known, Depth: Depth + 1, Q);
1239 // Accumulate the constant indices in a separate variable
1240 // to minimize the number of calls to computeForAddSub.
1241 APInt AccConstIndices(BitWidth, 0, /*IsSigned*/ true);
1242
1243 gep_type_iterator GTI = gep_type_begin(GEP: I);
1244 for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
1245 // TrailZ can only become smaller, short-circuit if we hit zero.
1246 if (Known.isUnknown())
1247 break;
1248
1249 Value *Index = I->getOperand(i);
1250
1251 // Handle case when index is zero.
1252 Constant *CIndex = dyn_cast<Constant>(Val: Index);
1253 if (CIndex && CIndex->isZeroValue())
1254 continue;
1255
1256 if (StructType *STy = GTI.getStructTypeOrNull()) {
1257 // Handle struct member offset arithmetic.
1258
1259 assert(CIndex &&
1260 "Access to structure field must be known at compile time");
1261
1262 if (CIndex->getType()->isVectorTy())
1263 Index = CIndex->getSplatValue();
1264
1265 unsigned Idx = cast<ConstantInt>(Val: Index)->getZExtValue();
1266 const StructLayout *SL = Q.DL.getStructLayout(Ty: STy);
1267 uint64_t Offset = SL->getElementOffset(Idx);
1268 AccConstIndices += Offset;
1269 continue;
1270 }
1271
1272 // Handle array index arithmetic.
1273 Type *IndexedTy = GTI.getIndexedType();
1274 if (!IndexedTy->isSized()) {
1275 Known.resetAll();
1276 break;
1277 }
1278
1279 unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits();
1280 KnownBits IndexBits(IndexBitWidth);
1281 computeKnownBits(V: Index, Known&: IndexBits, Depth: Depth + 1, Q);
1282 TypeSize IndexTypeSize = GTI.getSequentialElementStride(DL: Q.DL);
1283 uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinValue();
1284 KnownBits ScalingFactor(IndexBitWidth);
1285 // Multiply by current sizeof type.
1286 // &A[i] == A + i * sizeof(*A[i]).
1287 if (IndexTypeSize.isScalable()) {
1288 // For scalable types the only thing we know about sizeof is
1289 // that this is a multiple of the minimum size.
1290 ScalingFactor.Zero.setLowBits(llvm::countr_zero(Val: TypeSizeInBytes));
1291 } else if (IndexBits.isConstant()) {
1292 APInt IndexConst = IndexBits.getConstant();
1293 APInt ScalingFactor(IndexBitWidth, TypeSizeInBytes);
1294 IndexConst *= ScalingFactor;
1295 AccConstIndices += IndexConst.sextOrTrunc(width: BitWidth);
1296 continue;
1297 } else {
1298 ScalingFactor =
1299 KnownBits::makeConstant(C: APInt(IndexBitWidth, TypeSizeInBytes));
1300 }
1301 IndexBits = KnownBits::mul(LHS: IndexBits, RHS: ScalingFactor);
1302
1303 // If the offsets have a different width from the pointer, according
1304 // to the language reference we need to sign-extend or truncate them
1305 // to the width of the pointer.
1306 IndexBits = IndexBits.sextOrTrunc(BitWidth);
1307
1308 // Note that inbounds does *not* guarantee nsw for the addition, as only
1309 // the offset is signed, while the base address is unsigned.
1310 Known = KnownBits::computeForAddSub(
1311 /*Add=*/true, /*NSW=*/false, /* NUW=*/false, LHS: Known, RHS: IndexBits);
1312 }
1313 if (!Known.isUnknown() && !AccConstIndices.isZero()) {
1314 KnownBits Index = KnownBits::makeConstant(C: AccConstIndices);
1315 Known = KnownBits::computeForAddSub(
1316 /*Add=*/true, /*NSW=*/false, /* NUW=*/false, LHS: Known, RHS: Index);
1317 }
1318 break;
1319 }
1320 case Instruction::PHI: {
1321 const PHINode *P = cast<PHINode>(Val: I);
1322 BinaryOperator *BO = nullptr;
1323 Value *R = nullptr, *L = nullptr;
1324 if (matchSimpleRecurrence(P, BO, Start&: R, Step&: L)) {
1325 // Handle the case of a simple two-predecessor recurrence PHI.
1326 // There's a lot more that could theoretically be done here, but
1327 // this is sufficient to catch some interesting cases.
1328 unsigned Opcode = BO->getOpcode();
1329
1330 // If this is a shift recurrence, we know the bits being shifted in.
1331 // We can combine that with information about the start value of the
1332 // recurrence to conclude facts about the result.
1333 if ((Opcode == Instruction::LShr || Opcode == Instruction::AShr ||
1334 Opcode == Instruction::Shl) &&
1335 BO->getOperand(i_nocapture: 0) == I) {
1336
1337 // We have matched a recurrence of the form:
1338 // %iv = [R, %entry], [%iv.next, %backedge]
1339 // %iv.next = shift_op %iv, L
1340
1341 // Recurse with the phi context to avoid concern about whether facts
1342 // inferred hold at original context instruction. TODO: It may be
1343 // correct to use the original context. IF warranted, explore and
1344 // add sufficient tests to cover.
1345 SimplifyQuery RecQ = Q;
1346 RecQ.CxtI = P;
1347 computeKnownBits(V: R, DemandedElts, Known&: Known2, Depth: Depth + 1, Q: RecQ);
1348 switch (Opcode) {
1349 case Instruction::Shl:
1350 // A shl recurrence will only increase the tailing zeros
1351 Known.Zero.setLowBits(Known2.countMinTrailingZeros());
1352 break;
1353 case Instruction::LShr:
1354 // A lshr recurrence will preserve the leading zeros of the
1355 // start value
1356 Known.Zero.setHighBits(Known2.countMinLeadingZeros());
1357 break;
1358 case Instruction::AShr:
1359 // An ashr recurrence will extend the initial sign bit
1360 Known.Zero.setHighBits(Known2.countMinLeadingZeros());
1361 Known.One.setHighBits(Known2.countMinLeadingOnes());
1362 break;
1363 };
1364 }
1365
1366 // Check for operations that have the property that if
1367 // both their operands have low zero bits, the result
1368 // will have low zero bits.
1369 if (Opcode == Instruction::Add ||
1370 Opcode == Instruction::Sub ||
1371 Opcode == Instruction::And ||
1372 Opcode == Instruction::Or ||
1373 Opcode == Instruction::Mul) {
1374 // Change the context instruction to the "edge" that flows into the
1375 // phi. This is important because that is where the value is actually
1376 // "evaluated" even though it is used later somewhere else. (see also
1377 // D69571).
1378 SimplifyQuery RecQ = Q;
1379
1380 unsigned OpNum = P->getOperand(i_nocapture: 0) == R ? 0 : 1;
1381 Instruction *RInst = P->getIncomingBlock(i: OpNum)->getTerminator();
1382 Instruction *LInst = P->getIncomingBlock(i: 1-OpNum)->getTerminator();
1383
1384 // Ok, we have a PHI of the form L op= R. Check for low
1385 // zero bits.
1386 RecQ.CxtI = RInst;
1387 computeKnownBits(V: R, Known&: Known2, Depth: Depth + 1, Q: RecQ);
1388
1389 // We need to take the minimum number of known bits
1390 KnownBits Known3(BitWidth);
1391 RecQ.CxtI = LInst;
1392 computeKnownBits(V: L, Known&: Known3, Depth: Depth + 1, Q: RecQ);
1393
1394 Known.Zero.setLowBits(std::min(a: Known2.countMinTrailingZeros(),
1395 b: Known3.countMinTrailingZeros()));
1396
1397 auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(Val: BO);
1398 if (OverflowOp && Q.IIQ.hasNoSignedWrap(Op: OverflowOp)) {
1399 // If initial value of recurrence is nonnegative, and we are adding
1400 // a nonnegative number with nsw, the result can only be nonnegative
1401 // or poison value regardless of the number of times we execute the
1402 // add in phi recurrence. If initial value is negative and we are
1403 // adding a negative number with nsw, the result can only be
1404 // negative or poison value. Similar arguments apply to sub and mul.
1405 //
1406 // (add non-negative, non-negative) --> non-negative
1407 // (add negative, negative) --> negative
1408 if (Opcode == Instruction::Add) {
1409 if (Known2.isNonNegative() && Known3.isNonNegative())
1410 Known.makeNonNegative();
1411 else if (Known2.isNegative() && Known3.isNegative())
1412 Known.makeNegative();
1413 }
1414
1415 // (sub nsw non-negative, negative) --> non-negative
1416 // (sub nsw negative, non-negative) --> negative
1417 else if (Opcode == Instruction::Sub && BO->getOperand(i_nocapture: 0) == I) {
1418 if (Known2.isNonNegative() && Known3.isNegative())
1419 Known.makeNonNegative();
1420 else if (Known2.isNegative() && Known3.isNonNegative())
1421 Known.makeNegative();
1422 }
1423
1424 // (mul nsw non-negative, non-negative) --> non-negative
1425 else if (Opcode == Instruction::Mul && Known2.isNonNegative() &&
1426 Known3.isNonNegative())
1427 Known.makeNonNegative();
1428 }
1429
1430 break;
1431 }
1432 }
1433
1434 // Unreachable blocks may have zero-operand PHI nodes.
1435 if (P->getNumIncomingValues() == 0)
1436 break;
1437
1438 // Otherwise take the unions of the known bit sets of the operands,
1439 // taking conservative care to avoid excessive recursion.
1440 if (Depth < MaxAnalysisRecursionDepth - 1 && Known.isUnknown()) {
1441 // Skip if every incoming value references to ourself.
1442 if (isa_and_nonnull<UndefValue>(Val: P->hasConstantValue()))
1443 break;
1444
1445 Known.Zero.setAllBits();
1446 Known.One.setAllBits();
1447 for (unsigned u = 0, e = P->getNumIncomingValues(); u < e; ++u) {
1448 Value *IncValue = P->getIncomingValue(i: u);
1449 // Skip direct self references.
1450 if (IncValue == P) continue;
1451
1452 // Change the context instruction to the "edge" that flows into the
1453 // phi. This is important because that is where the value is actually
1454 // "evaluated" even though it is used later somewhere else. (see also
1455 // D69571).
1456 SimplifyQuery RecQ = Q;
1457 RecQ.CxtI = P->getIncomingBlock(i: u)->getTerminator();
1458
1459 Known2 = KnownBits(BitWidth);
1460
1461 // Recurse, but cap the recursion to one level, because we don't
1462 // want to waste time spinning around in loops.
1463 // TODO: See if we can base recursion limiter on number of incoming phi
1464 // edges so we don't overly clamp analysis.
1465 computeKnownBits(V: IncValue, Known&: Known2, Depth: MaxAnalysisRecursionDepth - 1, Q: RecQ);
1466
1467 // See if we can further use a conditional branch into the phi
1468 // to help us determine the range of the value.
1469 if (!Known2.isConstant()) {
1470 ICmpInst::Predicate Pred;
1471 const APInt *RHSC;
1472 BasicBlock *TrueSucc, *FalseSucc;
1473 // TODO: Use RHS Value and compute range from its known bits.
1474 if (match(V: RecQ.CxtI,
1475 P: m_Br(C: m_c_ICmp(Pred, L: m_Specific(V: IncValue), R: m_APInt(Res&: RHSC)),
1476 T: m_BasicBlock(V&: TrueSucc), F: m_BasicBlock(V&: FalseSucc)))) {
1477 // Check for cases of duplicate successors.
1478 if ((TrueSucc == P->getParent()) != (FalseSucc == P->getParent())) {
1479 // If we're using the false successor, invert the predicate.
1480 if (FalseSucc == P->getParent())
1481 Pred = CmpInst::getInversePredicate(pred: Pred);
1482 // Get the knownbits implied by the incoming phi condition.
1483 auto CR = ConstantRange::makeExactICmpRegion(Pred, Other: *RHSC);
1484 KnownBits KnownUnion = Known2.unionWith(RHS: CR.toKnownBits());
1485 // We can have conflicts here if we are analyzing deadcode (its
1486 // impossible for us reach this BB based the icmp).
1487 if (KnownUnion.hasConflict()) {
1488 // No reason to continue analyzing in a known dead region, so
1489 // just resetAll and break. This will cause us to also exit the
1490 // outer loop.
1491 Known.resetAll();
1492 break;
1493 }
1494 Known2 = KnownUnion;
1495 }
1496 }
1497 }
1498
1499 Known = Known.intersectWith(RHS: Known2);
1500 // If all bits have been ruled out, there's no need to check
1501 // more operands.
1502 if (Known.isUnknown())
1503 break;
1504 }
1505 }
1506 break;
1507 }
1508 case Instruction::Call:
1509 case Instruction::Invoke: {
1510 // If range metadata is attached to this call, set known bits from that,
1511 // and then intersect with known bits based on other properties of the
1512 // function.
1513 if (MDNode *MD =
1514 Q.IIQ.getMetadata(I: cast<Instruction>(Val: I), KindID: LLVMContext::MD_range))
1515 computeKnownBitsFromRangeMetadata(Ranges: *MD, Known);
1516
1517 const auto *CB = cast<CallBase>(Val: I);
1518
1519 if (std::optional<ConstantRange> Range = CB->getRange())
1520 Known = Known.unionWith(RHS: Range->toKnownBits());
1521
1522 if (const Value *RV = CB->getReturnedArgOperand()) {
1523 if (RV->getType() == I->getType()) {
1524 computeKnownBits(V: RV, Known&: Known2, Depth: Depth + 1, Q);
1525 Known = Known.unionWith(RHS: Known2);
1526 // If the function doesn't return properly for all input values
1527 // (e.g. unreachable exits) then there might be conflicts between the
1528 // argument value and the range metadata. Simply discard the known bits
1529 // in case of conflicts.
1530 if (Known.hasConflict())
1531 Known.resetAll();
1532 }
1533 }
1534 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
1535 switch (II->getIntrinsicID()) {
1536 default: break;
1537 case Intrinsic::abs: {
1538 computeKnownBits(V: I->getOperand(i: 0), Known&: Known2, Depth: Depth + 1, Q);
1539 bool IntMinIsPoison = match(V: II->getArgOperand(i: 1), P: m_One());
1540 Known = Known2.abs(IntMinIsPoison);
1541 break;
1542 }
1543 case Intrinsic::bitreverse:
1544 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known&: Known2, Depth: Depth + 1, Q);
1545 Known.Zero |= Known2.Zero.reverseBits();
1546 Known.One |= Known2.One.reverseBits();
1547 break;
1548 case Intrinsic::bswap:
1549 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known&: Known2, Depth: Depth + 1, Q);
1550 Known.Zero |= Known2.Zero.byteSwap();
1551 Known.One |= Known2.One.byteSwap();
1552 break;
1553 case Intrinsic::ctlz: {
1554 computeKnownBits(V: I->getOperand(i: 0), Known&: Known2, Depth: Depth + 1, Q);
1555 // If we have a known 1, its position is our upper bound.
1556 unsigned PossibleLZ = Known2.countMaxLeadingZeros();
1557 // If this call is poison for 0 input, the result will be less than 2^n.
1558 if (II->getArgOperand(i: 1) == ConstantInt::getTrue(Context&: II->getContext()))
1559 PossibleLZ = std::min(a: PossibleLZ, b: BitWidth - 1);
1560 unsigned LowBits = llvm::bit_width(Value: PossibleLZ);
1561 Known.Zero.setBitsFrom(LowBits);
1562 break;
1563 }
1564 case Intrinsic::cttz: {
1565 computeKnownBits(V: I->getOperand(i: 0), Known&: Known2, Depth: Depth + 1, Q);
1566 // If we have a known 1, its position is our upper bound.
1567 unsigned PossibleTZ = Known2.countMaxTrailingZeros();
1568 // If this call is poison for 0 input, the result will be less than 2^n.
1569 if (II->getArgOperand(i: 1) == ConstantInt::getTrue(Context&: II->getContext()))
1570 PossibleTZ = std::min(a: PossibleTZ, b: BitWidth - 1);
1571 unsigned LowBits = llvm::bit_width(Value: PossibleTZ);
1572 Known.Zero.setBitsFrom(LowBits);
1573 break;
1574 }
1575 case Intrinsic::ctpop: {
1576 computeKnownBits(V: I->getOperand(i: 0), Known&: Known2, Depth: Depth + 1, Q);
1577 // We can bound the space the count needs. Also, bits known to be zero
1578 // can't contribute to the population.
1579 unsigned BitsPossiblySet = Known2.countMaxPopulation();
1580 unsigned LowBits = llvm::bit_width(Value: BitsPossiblySet);
1581 Known.Zero.setBitsFrom(LowBits);
1582 // TODO: we could bound KnownOne using the lower bound on the number
1583 // of bits which might be set provided by popcnt KnownOne2.
1584 break;
1585 }
1586 case Intrinsic::fshr:
1587 case Intrinsic::fshl: {
1588 const APInt *SA;
1589 if (!match(V: I->getOperand(i: 2), P: m_APInt(Res&: SA)))
1590 break;
1591
1592 // Normalize to funnel shift left.
1593 uint64_t ShiftAmt = SA->urem(RHS: BitWidth);
1594 if (II->getIntrinsicID() == Intrinsic::fshr)
1595 ShiftAmt = BitWidth - ShiftAmt;
1596
1597 KnownBits Known3(BitWidth);
1598 computeKnownBits(V: I->getOperand(i: 0), Known&: Known2, Depth: Depth + 1, Q);
1599 computeKnownBits(V: I->getOperand(i: 1), Known&: Known3, Depth: Depth + 1, Q);
1600
1601 Known.Zero =
1602 Known2.Zero.shl(shiftAmt: ShiftAmt) | Known3.Zero.lshr(shiftAmt: BitWidth - ShiftAmt);
1603 Known.One =
1604 Known2.One.shl(shiftAmt: ShiftAmt) | Known3.One.lshr(shiftAmt: BitWidth - ShiftAmt);
1605 break;
1606 }
1607 case Intrinsic::uadd_sat:
1608 computeKnownBits(V: I->getOperand(i: 0), Known, Depth: Depth + 1, Q);
1609 computeKnownBits(V: I->getOperand(i: 1), Known&: Known2, Depth: Depth + 1, Q);
1610 Known = KnownBits::uadd_sat(LHS: Known, RHS: Known2);
1611 break;
1612 case Intrinsic::usub_sat:
1613 computeKnownBits(V: I->getOperand(i: 0), Known, Depth: Depth + 1, Q);
1614 computeKnownBits(V: I->getOperand(i: 1), Known&: Known2, Depth: Depth + 1, Q);
1615 Known = KnownBits::usub_sat(LHS: Known, RHS: Known2);
1616 break;
1617 case Intrinsic::sadd_sat:
1618 computeKnownBits(V: I->getOperand(i: 0), Known, Depth: Depth + 1, Q);
1619 computeKnownBits(V: I->getOperand(i: 1), Known&: Known2, Depth: Depth + 1, Q);
1620 Known = KnownBits::sadd_sat(LHS: Known, RHS: Known2);
1621 break;
1622 case Intrinsic::ssub_sat:
1623 computeKnownBits(V: I->getOperand(i: 0), Known, Depth: Depth + 1, Q);
1624 computeKnownBits(V: I->getOperand(i: 1), Known&: Known2, Depth: Depth + 1, Q);
1625 Known = KnownBits::ssub_sat(LHS: Known, RHS: Known2);
1626 break;
1627 // for min/max/and/or reduce, any bit common to each element in the
1628 // input vec is set in the output.
1629 case Intrinsic::vector_reduce_and:
1630 case Intrinsic::vector_reduce_or:
1631 case Intrinsic::vector_reduce_umax:
1632 case Intrinsic::vector_reduce_umin:
1633 case Intrinsic::vector_reduce_smax:
1634 case Intrinsic::vector_reduce_smin:
1635 computeKnownBits(V: I->getOperand(i: 0), Known, Depth: Depth + 1, Q);
1636 break;
1637 case Intrinsic::vector_reduce_xor: {
1638 computeKnownBits(V: I->getOperand(i: 0), Known, Depth: Depth + 1, Q);
1639 // The zeros common to all vecs are zero in the output.
1640 // If the number of elements is odd, then the common ones remain. If the
1641 // number of elements is even, then the common ones becomes zeros.
1642 auto *VecTy = cast<VectorType>(Val: I->getOperand(i: 0)->getType());
1643 // Even, so the ones become zeros.
1644 bool EvenCnt = VecTy->getElementCount().isKnownEven();
1645 if (EvenCnt)
1646 Known.Zero |= Known.One;
1647 // Maybe even element count so need to clear ones.
1648 if (VecTy->isScalableTy() || EvenCnt)
1649 Known.One.clearAllBits();
1650 break;
1651 }
1652 case Intrinsic::umin:
1653 computeKnownBits(V: I->getOperand(i: 0), Known, Depth: Depth + 1, Q);
1654 computeKnownBits(V: I->getOperand(i: 1), Known&: Known2, Depth: Depth + 1, Q);
1655 Known = KnownBits::umin(LHS: Known, RHS: Known2);
1656 break;
1657 case Intrinsic::umax:
1658 computeKnownBits(V: I->getOperand(i: 0), Known, Depth: Depth + 1, Q);
1659 computeKnownBits(V: I->getOperand(i: 1), Known&: Known2, Depth: Depth + 1, Q);
1660 Known = KnownBits::umax(LHS: Known, RHS: Known2);
1661 break;
1662 case Intrinsic::smin:
1663 computeKnownBits(V: I->getOperand(i: 0), Known, Depth: Depth + 1, Q);
1664 computeKnownBits(V: I->getOperand(i: 1), Known&: Known2, Depth: Depth + 1, Q);
1665 Known = KnownBits::smin(LHS: Known, RHS: Known2);
1666 break;
1667 case Intrinsic::smax:
1668 computeKnownBits(V: I->getOperand(i: 0), Known, Depth: Depth + 1, Q);
1669 computeKnownBits(V: I->getOperand(i: 1), Known&: Known2, Depth: Depth + 1, Q);
1670 Known = KnownBits::smax(LHS: Known, RHS: Known2);
1671 break;
1672 case Intrinsic::ptrmask: {
1673 computeKnownBits(V: I->getOperand(i: 0), Known, Depth: Depth + 1, Q);
1674
1675 const Value *Mask = I->getOperand(i: 1);
1676 Known2 = KnownBits(Mask->getType()->getScalarSizeInBits());
1677 computeKnownBits(V: Mask, Known&: Known2, Depth: Depth + 1, Q);
1678 // TODO: 1-extend would be more precise.
1679 Known &= Known2.anyextOrTrunc(BitWidth);
1680 break;
1681 }
1682 case Intrinsic::x86_sse42_crc32_64_64:
1683 Known.Zero.setBitsFrom(32);
1684 break;
1685 case Intrinsic::riscv_vsetvli:
1686 case Intrinsic::riscv_vsetvlimax: {
1687 bool HasAVL = II->getIntrinsicID() == Intrinsic::riscv_vsetvli;
1688 const ConstantRange Range = getVScaleRange(F: II->getFunction(), BitWidth);
1689 uint64_t SEW = RISCVVType::decodeVSEW(
1690 VSEW: cast<ConstantInt>(Val: II->getArgOperand(i: HasAVL))->getZExtValue());
1691 RISCVII::VLMUL VLMUL = static_cast<RISCVII::VLMUL>(
1692 cast<ConstantInt>(Val: II->getArgOperand(i: 1 + HasAVL))->getZExtValue());
1693 // The Range is [Lower, Upper), so we need to subtract 1 here to get the
1694 // real upper value.
1695 uint64_t MaxVLEN =
1696 (Range.getUpper().getZExtValue() - 1) * RISCV::RVVBitsPerBlock;
1697 uint64_t MaxVL = MaxVLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul: VLMUL);
1698
1699 // Result of vsetvli must be not larger than AVL.
1700 if (HasAVL)
1701 if (auto *CI = dyn_cast<ConstantInt>(Val: II->getArgOperand(i: 0)))
1702 MaxVL = std::min(a: MaxVL, b: CI->getZExtValue());
1703
1704 unsigned KnownZeroFirstBit = Log2_32(Value: MaxVL) + 1;
1705 if (BitWidth > KnownZeroFirstBit)
1706 Known.Zero.setBitsFrom(KnownZeroFirstBit);
1707 break;
1708 }
1709 case Intrinsic::vscale: {
1710 if (!II->getParent() || !II->getFunction())
1711 break;
1712
1713 Known = getVScaleRange(F: II->getFunction(), BitWidth).toKnownBits();
1714 break;
1715 }
1716 }
1717 }
1718 break;
1719 }
1720 case Instruction::ShuffleVector: {
1721 auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: I);
1722 // FIXME: Do we need to handle ConstantExpr involving shufflevectors?
1723 if (!Shuf) {
1724 Known.resetAll();
1725 return;
1726 }
1727 // For undef elements, we don't know anything about the common state of
1728 // the shuffle result.
1729 APInt DemandedLHS, DemandedRHS;
1730 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) {
1731 Known.resetAll();
1732 return;
1733 }
1734 Known.One.setAllBits();
1735 Known.Zero.setAllBits();
1736 if (!!DemandedLHS) {
1737 const Value *LHS = Shuf->getOperand(i_nocapture: 0);
1738 computeKnownBits(V: LHS, DemandedElts: DemandedLHS, Known, Depth: Depth + 1, Q);
1739 // If we don't know any bits, early out.
1740 if (Known.isUnknown())
1741 break;
1742 }
1743 if (!!DemandedRHS) {
1744 const Value *RHS = Shuf->getOperand(i_nocapture: 1);
1745 computeKnownBits(V: RHS, DemandedElts: DemandedRHS, Known&: Known2, Depth: Depth + 1, Q);
1746 Known = Known.intersectWith(RHS: Known2);
1747 }
1748 break;
1749 }
1750 case Instruction::InsertElement: {
1751 if (isa<ScalableVectorType>(Val: I->getType())) {
1752 Known.resetAll();
1753 return;
1754 }
1755 const Value *Vec = I->getOperand(i: 0);
1756 const Value *Elt = I->getOperand(i: 1);
1757 auto *CIdx = dyn_cast<ConstantInt>(Val: I->getOperand(i: 2));
1758 unsigned NumElts = DemandedElts.getBitWidth();
1759 APInt DemandedVecElts = DemandedElts;
1760 bool NeedsElt = true;
1761 // If we know the index we are inserting too, clear it from Vec check.
1762 if (CIdx && CIdx->getValue().ult(RHS: NumElts)) {
1763 DemandedVecElts.clearBit(BitPosition: CIdx->getZExtValue());
1764 NeedsElt = DemandedElts[CIdx->getZExtValue()];
1765 }
1766
1767 Known.One.setAllBits();
1768 Known.Zero.setAllBits();
1769 if (NeedsElt) {
1770 computeKnownBits(V: Elt, Known, Depth: Depth + 1, Q);
1771 // If we don't know any bits, early out.
1772 if (Known.isUnknown())
1773 break;
1774 }
1775
1776 if (!DemandedVecElts.isZero()) {
1777 computeKnownBits(V: Vec, DemandedElts: DemandedVecElts, Known&: Known2, Depth: Depth + 1, Q);
1778 Known = Known.intersectWith(RHS: Known2);
1779 }
1780 break;
1781 }
1782 case Instruction::ExtractElement: {
1783 // Look through extract element. If the index is non-constant or
1784 // out-of-range demand all elements, otherwise just the extracted element.
1785 const Value *Vec = I->getOperand(i: 0);
1786 const Value *Idx = I->getOperand(i: 1);
1787 auto *CIdx = dyn_cast<ConstantInt>(Val: Idx);
1788 if (isa<ScalableVectorType>(Val: Vec->getType())) {
1789 // FIXME: there's probably *something* we can do with scalable vectors
1790 Known.resetAll();
1791 break;
1792 }
1793 unsigned NumElts = cast<FixedVectorType>(Val: Vec->getType())->getNumElements();
1794 APInt DemandedVecElts = APInt::getAllOnes(numBits: NumElts);
1795 if (CIdx && CIdx->getValue().ult(RHS: NumElts))
1796 DemandedVecElts = APInt::getOneBitSet(numBits: NumElts, BitNo: CIdx->getZExtValue());
1797 computeKnownBits(V: Vec, DemandedElts: DemandedVecElts, Known, Depth: Depth + 1, Q);
1798 break;
1799 }
1800 case Instruction::ExtractValue:
1801 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I->getOperand(i: 0))) {
1802 const ExtractValueInst *EVI = cast<ExtractValueInst>(Val: I);
1803 if (EVI->getNumIndices() != 1) break;
1804 if (EVI->getIndices()[0] == 0) {
1805 switch (II->getIntrinsicID()) {
1806 default: break;
1807 case Intrinsic::uadd_with_overflow:
1808 case Intrinsic::sadd_with_overflow:
1809 computeKnownBitsAddSub(
1810 Add: true, Op0: II->getArgOperand(i: 0), Op1: II->getArgOperand(i: 1), /*NSW=*/false,
1811 /* NUW=*/false, DemandedElts, KnownOut&: Known, Known2, Depth, Q);
1812 break;
1813 case Intrinsic::usub_with_overflow:
1814 case Intrinsic::ssub_with_overflow:
1815 computeKnownBitsAddSub(
1816 Add: false, Op0: II->getArgOperand(i: 0), Op1: II->getArgOperand(i: 1), /*NSW=*/false,
1817 /* NUW=*/false, DemandedElts, KnownOut&: Known, Known2, Depth, Q);
1818 break;
1819 case Intrinsic::umul_with_overflow:
1820 case Intrinsic::smul_with_overflow:
1821 computeKnownBitsMul(Op0: II->getArgOperand(i: 0), Op1: II->getArgOperand(i: 1), NSW: false,
1822 DemandedElts, Known, Known2, Depth, Q);
1823 break;
1824 }
1825 }
1826 }
1827 break;
1828 case Instruction::Freeze:
1829 if (isGuaranteedNotToBePoison(V: I->getOperand(i: 0), AC: Q.AC, CtxI: Q.CxtI, DT: Q.DT,
1830 Depth: Depth + 1))
1831 computeKnownBits(V: I->getOperand(i: 0), Known, Depth: Depth + 1, Q);
1832 break;
1833 }
1834}
1835
1836/// Determine which bits of V are known to be either zero or one and return
1837/// them.
1838KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
1839 unsigned Depth, const SimplifyQuery &Q) {
1840 KnownBits Known(getBitWidth(Ty: V->getType(), DL: Q.DL));
1841 ::computeKnownBits(V, DemandedElts, Known, Depth, Q);
1842 return Known;
1843}
1844
1845/// Determine which bits of V are known to be either zero or one and return
1846/// them.
1847KnownBits llvm::computeKnownBits(const Value *V, unsigned Depth,
1848 const SimplifyQuery &Q) {
1849 KnownBits Known(getBitWidth(Ty: V->getType(), DL: Q.DL));
1850 computeKnownBits(V, Known, Depth, Q);
1851 return Known;
1852}
1853
1854/// Determine which bits of V are known to be either zero or one and return
1855/// them in the Known bit set.
1856///
1857/// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that
1858/// we cannot optimize based on the assumption that it is zero without changing
1859/// it to be an explicit zero. If we don't change it to zero, other code could
1860/// optimized based on the contradictory assumption that it is non-zero.
1861/// Because instcombine aggressively folds operations with undef args anyway,
1862/// this won't lose us code quality.
1863///
1864/// This function is defined on values with integer type, values with pointer
1865/// type, and vectors of integers. In the case
1866/// where V is a vector, known zero, and known one values are the
1867/// same width as the vector element, and the bit is set only if it is true
1868/// for all of the demanded elements in the vector specified by DemandedElts.
1869void computeKnownBits(const Value *V, const APInt &DemandedElts,
1870 KnownBits &Known, unsigned Depth,
1871 const SimplifyQuery &Q) {
1872 if (!DemandedElts) {
1873 // No demanded elts, better to assume we don't know anything.
1874 Known.resetAll();
1875 return;
1876 }
1877
1878 assert(V && "No Value?");
1879 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
1880
1881#ifndef NDEBUG
1882 Type *Ty = V->getType();
1883 unsigned BitWidth = Known.getBitWidth();
1884
1885 assert((Ty->isIntOrIntVectorTy(BitWidth) || Ty->isPtrOrPtrVectorTy()) &&
1886 "Not integer or pointer type!");
1887
1888 if (auto *FVTy = dyn_cast<FixedVectorType>(Val: Ty)) {
1889 assert(
1890 FVTy->getNumElements() == DemandedElts.getBitWidth() &&
1891 "DemandedElt width should equal the fixed vector number of elements");
1892 } else {
1893 assert(DemandedElts == APInt(1, 1) &&
1894 "DemandedElt width should be 1 for scalars or scalable vectors");
1895 }
1896
1897 Type *ScalarTy = Ty->getScalarType();
1898 if (ScalarTy->isPointerTy()) {
1899 assert(BitWidth == Q.DL.getPointerTypeSizeInBits(ScalarTy) &&
1900 "V and Known should have same BitWidth");
1901 } else {
1902 assert(BitWidth == Q.DL.getTypeSizeInBits(ScalarTy) &&
1903 "V and Known should have same BitWidth");
1904 }
1905#endif
1906
1907 const APInt *C;
1908 if (match(V, P: m_APInt(Res&: C))) {
1909 // We know all of the bits for a scalar constant or a splat vector constant!
1910 Known = KnownBits::makeConstant(C: *C);
1911 return;
1912 }
1913 // Null and aggregate-zero are all-zeros.
1914 if (isa<ConstantPointerNull>(Val: V) || isa<ConstantAggregateZero>(Val: V)) {
1915 Known.setAllZero();
1916 return;
1917 }
1918 // Handle a constant vector by taking the intersection of the known bits of
1919 // each element.
1920 if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(Val: V)) {
1921 assert(!isa<ScalableVectorType>(V->getType()));
1922 // We know that CDV must be a vector of integers. Take the intersection of
1923 // each element.
1924 Known.Zero.setAllBits(); Known.One.setAllBits();
1925 for (unsigned i = 0, e = CDV->getNumElements(); i != e; ++i) {
1926 if (!DemandedElts[i])
1927 continue;
1928 APInt Elt = CDV->getElementAsAPInt(i);
1929 Known.Zero &= ~Elt;
1930 Known.One &= Elt;
1931 }
1932 if (Known.hasConflict())
1933 Known.resetAll();
1934 return;
1935 }
1936
1937 if (const auto *CV = dyn_cast<ConstantVector>(Val: V)) {
1938 assert(!isa<ScalableVectorType>(V->getType()));
1939 // We know that CV must be a vector of integers. Take the intersection of
1940 // each element.
1941 Known.Zero.setAllBits(); Known.One.setAllBits();
1942 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1943 if (!DemandedElts[i])
1944 continue;
1945 Constant *Element = CV->getAggregateElement(Elt: i);
1946 if (isa<PoisonValue>(Val: Element))
1947 continue;
1948 auto *ElementCI = dyn_cast_or_null<ConstantInt>(Val: Element);
1949 if (!ElementCI) {
1950 Known.resetAll();
1951 return;
1952 }
1953 const APInt &Elt = ElementCI->getValue();
1954 Known.Zero &= ~Elt;
1955 Known.One &= Elt;
1956 }
1957 if (Known.hasConflict())
1958 Known.resetAll();
1959 return;
1960 }
1961
1962 // Start out not knowing anything.
1963 Known.resetAll();
1964
1965 // We can't imply anything about undefs.
1966 if (isa<UndefValue>(Val: V))
1967 return;
1968
1969 // There's no point in looking through other users of ConstantData for
1970 // assumptions. Confirm that we've handled them all.
1971 assert(!isa<ConstantData>(V) && "Unhandled constant data!");
1972
1973 if (const auto *A = dyn_cast<Argument>(Val: V))
1974 if (std::optional<ConstantRange> Range = A->getRange())
1975 Known = Range->toKnownBits();
1976
1977 // All recursive calls that increase depth must come after this.
1978 if (Depth == MaxAnalysisRecursionDepth)
1979 return;
1980
1981 // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
1982 // the bits of its aliasee.
1983 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Val: V)) {
1984 if (!GA->isInterposable())
1985 computeKnownBits(V: GA->getAliasee(), Known, Depth: Depth + 1, Q);
1986 return;
1987 }
1988
1989 if (const Operator *I = dyn_cast<Operator>(Val: V))
1990 computeKnownBitsFromOperator(I, DemandedElts, Known, Depth, Q);
1991 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: V)) {
1992 if (std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange())
1993 Known = CR->toKnownBits();
1994 }
1995
1996 // Aligned pointers have trailing zeros - refine Known.Zero set
1997 if (isa<PointerType>(Val: V->getType())) {
1998 Align Alignment = V->getPointerAlignment(DL: Q.DL);
1999 Known.Zero.setLowBits(Log2(A: Alignment));
2000 }
2001
2002 // computeKnownBitsFromContext strictly refines Known.
2003 // Therefore, we run them after computeKnownBitsFromOperator.
2004
2005 // Check whether we can determine known bits from context such as assumes.
2006 computeKnownBitsFromContext(V, Known, Depth, Q);
2007
2008 assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
2009}
2010
2011/// Try to detect a recurrence that the value of the induction variable is
2012/// always a power of two (or zero).
2013static bool isPowerOfTwoRecurrence(const PHINode *PN, bool OrZero,
2014 unsigned Depth, SimplifyQuery &Q) {
2015 BinaryOperator *BO = nullptr;
2016 Value *Start = nullptr, *Step = nullptr;
2017 if (!matchSimpleRecurrence(P: PN, BO, Start, Step))
2018 return false;
2019
2020 // Initial value must be a power of two.
2021 for (const Use &U : PN->operands()) {
2022 if (U.get() == Start) {
2023 // Initial value comes from a different BB, need to adjust context
2024 // instruction for analysis.
2025 Q.CxtI = PN->getIncomingBlock(U)->getTerminator();
2026 if (!isKnownToBeAPowerOfTwo(V: Start, OrZero, Depth, Q))
2027 return false;
2028 }
2029 }
2030
2031 // Except for Mul, the induction variable must be on the left side of the
2032 // increment expression, otherwise its value can be arbitrary.
2033 if (BO->getOpcode() != Instruction::Mul && BO->getOperand(i_nocapture: 1) != Step)
2034 return false;
2035
2036 Q.CxtI = BO->getParent()->getTerminator();
2037 switch (BO->getOpcode()) {
2038 case Instruction::Mul:
2039 // Power of two is closed under multiplication.
2040 return (OrZero || Q.IIQ.hasNoUnsignedWrap(Op: BO) ||
2041 Q.IIQ.hasNoSignedWrap(Op: BO)) &&
2042 isKnownToBeAPowerOfTwo(V: Step, OrZero, Depth, Q);
2043 case Instruction::SDiv:
2044 // Start value must not be signmask for signed division, so simply being a
2045 // power of two is not sufficient, and it has to be a constant.
2046 if (!match(V: Start, P: m_Power2()) || match(V: Start, P: m_SignMask()))
2047 return false;
2048 [[fallthrough]];
2049 case Instruction::UDiv:
2050 // Divisor must be a power of two.
2051 // If OrZero is false, cannot guarantee induction variable is non-zero after
2052 // division, same for Shr, unless it is exact division.
2053 return (OrZero || Q.IIQ.isExact(Op: BO)) &&
2054 isKnownToBeAPowerOfTwo(V: Step, OrZero: false, Depth, Q);
2055 case Instruction::Shl:
2056 return OrZero || Q.IIQ.hasNoUnsignedWrap(Op: BO) || Q.IIQ.hasNoSignedWrap(Op: BO);
2057 case Instruction::AShr:
2058 if (!match(V: Start, P: m_Power2()) || match(V: Start, P: m_SignMask()))
2059 return false;
2060 [[fallthrough]];
2061 case Instruction::LShr:
2062 return OrZero || Q.IIQ.isExact(Op: BO);
2063 default:
2064 return false;
2065 }
2066}
2067
2068/// Return true if the given value is known to have exactly one
2069/// bit set when defined. For vectors return true if every element is known to
2070/// be a power of two when defined. Supports values with integer or pointer
2071/// types and vectors of integers.
2072bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
2073 const SimplifyQuery &Q) {
2074 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
2075
2076 if (isa<Constant>(Val: V))
2077 return OrZero ? match(V, P: m_Power2OrZero()) : match(V, P: m_Power2());
2078
2079 // i1 is by definition a power of 2 or zero.
2080 if (OrZero && V->getType()->getScalarSizeInBits() == 1)
2081 return true;
2082
2083 auto *I = dyn_cast<Instruction>(Val: V);
2084 if (!I)
2085 return false;
2086
2087 if (Q.CxtI && match(V, P: m_VScale())) {
2088 const Function *F = Q.CxtI->getFunction();
2089 // The vscale_range indicates vscale is a power-of-two.
2090 return F->hasFnAttribute(Attribute::VScaleRange);
2091 }
2092
2093 // 1 << X is clearly a power of two if the one is not shifted off the end. If
2094 // it is shifted off the end then the result is undefined.
2095 if (match(V: I, P: m_Shl(L: m_One(), R: m_Value())))
2096 return true;
2097
2098 // (signmask) >>l X is clearly a power of two if the one is not shifted off
2099 // the bottom. If it is shifted off the bottom then the result is undefined.
2100 if (match(V: I, P: m_LShr(L: m_SignMask(), R: m_Value())))
2101 return true;
2102
2103 // The remaining tests are all recursive, so bail out if we hit the limit.
2104 if (Depth++ == MaxAnalysisRecursionDepth)
2105 return false;
2106
2107 switch (I->getOpcode()) {
2108 case Instruction::ZExt:
2109 return isKnownToBeAPowerOfTwo(V: I->getOperand(i: 0), OrZero, Depth, Q);
2110 case Instruction::Trunc:
2111 return OrZero && isKnownToBeAPowerOfTwo(V: I->getOperand(i: 0), OrZero, Depth, Q);
2112 case Instruction::Shl:
2113 if (OrZero || Q.IIQ.hasNoUnsignedWrap(Op: I) || Q.IIQ.hasNoSignedWrap(Op: I))
2114 return isKnownToBeAPowerOfTwo(V: I->getOperand(i: 0), OrZero, Depth, Q);
2115 return false;
2116 case Instruction::LShr:
2117 if (OrZero || Q.IIQ.isExact(Op: cast<BinaryOperator>(Val: I)))
2118 return isKnownToBeAPowerOfTwo(V: I->getOperand(i: 0), OrZero, Depth, Q);
2119 return false;
2120 case Instruction::UDiv:
2121 if (Q.IIQ.isExact(Op: cast<BinaryOperator>(Val: I)))
2122 return isKnownToBeAPowerOfTwo(V: I->getOperand(i: 0), OrZero, Depth, Q);
2123 return false;
2124 case Instruction::Mul:
2125 return isKnownToBeAPowerOfTwo(V: I->getOperand(i: 1), OrZero, Depth, Q) &&
2126 isKnownToBeAPowerOfTwo(V: I->getOperand(i: 0), OrZero, Depth, Q) &&
2127 (OrZero || isKnownNonZero(V: I, Q, Depth));
2128 case Instruction::And:
2129 // A power of two and'd with anything is a power of two or zero.
2130 if (OrZero &&
2131 (isKnownToBeAPowerOfTwo(V: I->getOperand(i: 1), /*OrZero*/ true, Depth, Q) ||
2132 isKnownToBeAPowerOfTwo(V: I->getOperand(i: 0), /*OrZero*/ true, Depth, Q)))
2133 return true;
2134 // X & (-X) is always a power of two or zero.
2135 if (match(V: I->getOperand(i: 0), P: m_Neg(V: m_Specific(V: I->getOperand(i: 1)))) ||
2136 match(V: I->getOperand(i: 1), P: m_Neg(V: m_Specific(V: I->getOperand(i: 0)))))
2137 return OrZero || isKnownNonZero(V: I->getOperand(i: 0), Q, Depth);
2138 return false;
2139 case Instruction::Add: {
2140 // Adding a power-of-two or zero to the same power-of-two or zero yields
2141 // either the original power-of-two, a larger power-of-two or zero.
2142 const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(Val: V);
2143 if (OrZero || Q.IIQ.hasNoUnsignedWrap(Op: VOBO) ||
2144 Q.IIQ.hasNoSignedWrap(Op: VOBO)) {
2145 if (match(V: I->getOperand(i: 0),
2146 P: m_c_And(L: m_Specific(V: I->getOperand(i: 1)), R: m_Value())) &&
2147 isKnownToBeAPowerOfTwo(V: I->getOperand(i: 1), OrZero, Depth, Q))
2148 return true;
2149 if (match(V: I->getOperand(i: 1),
2150 P: m_c_And(L: m_Specific(V: I->getOperand(i: 0)), R: m_Value())) &&
2151 isKnownToBeAPowerOfTwo(V: I->getOperand(i: 0), OrZero, Depth, Q))
2152 return true;
2153
2154 unsigned BitWidth = V->getType()->getScalarSizeInBits();
2155 KnownBits LHSBits(BitWidth);
2156 computeKnownBits(V: I->getOperand(i: 0), Known&: LHSBits, Depth, Q);
2157
2158 KnownBits RHSBits(BitWidth);
2159 computeKnownBits(V: I->getOperand(i: 1), Known&: RHSBits, Depth, Q);
2160 // If i8 V is a power of two or zero:
2161 // ZeroBits: 1 1 1 0 1 1 1 1
2162 // ~ZeroBits: 0 0 0 1 0 0 0 0
2163 if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2())
2164 // If OrZero isn't set, we cannot give back a zero result.
2165 // Make sure either the LHS or RHS has a bit set.
2166 if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue())
2167 return true;
2168 }
2169 return false;
2170 }
2171 case Instruction::Select:
2172 return isKnownToBeAPowerOfTwo(V: I->getOperand(i: 1), OrZero, Depth, Q) &&
2173 isKnownToBeAPowerOfTwo(V: I->getOperand(i: 2), OrZero, Depth, Q);
2174 case Instruction::PHI: {
2175 // A PHI node is power of two if all incoming values are power of two, or if
2176 // it is an induction variable where in each step its value is a power of
2177 // two.
2178 auto *PN = cast<PHINode>(Val: I);
2179 SimplifyQuery RecQ = Q;
2180
2181 // Check if it is an induction variable and always power of two.
2182 if (isPowerOfTwoRecurrence(PN, OrZero, Depth, Q&: RecQ))
2183 return true;
2184
2185 // Recursively check all incoming values. Limit recursion to 2 levels, so
2186 // that search complexity is limited to number of operands^2.
2187 unsigned NewDepth = std::max(a: Depth, b: MaxAnalysisRecursionDepth - 1);
2188 return llvm::all_of(Range: PN->operands(), P: [&](const Use &U) {
2189 // Value is power of 2 if it is coming from PHI node itself by induction.
2190 if (U.get() == PN)
2191 return true;
2192
2193 // Change the context instruction to the incoming block where it is
2194 // evaluated.
2195 RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
2196 return isKnownToBeAPowerOfTwo(V: U.get(), OrZero, Depth: NewDepth, Q: RecQ);
2197 });
2198 }
2199 case Instruction::Invoke:
2200 case Instruction::Call: {
2201 if (auto *II = dyn_cast<IntrinsicInst>(Val: I)) {
2202 switch (II->getIntrinsicID()) {
2203 case Intrinsic::umax:
2204 case Intrinsic::smax:
2205 case Intrinsic::umin:
2206 case Intrinsic::smin:
2207 return isKnownToBeAPowerOfTwo(V: II->getArgOperand(i: 1), OrZero, Depth, Q) &&
2208 isKnownToBeAPowerOfTwo(V: II->getArgOperand(i: 0), OrZero, Depth, Q);
2209 // bswap/bitreverse just move around bits, but don't change any 1s/0s
2210 // thus dont change pow2/non-pow2 status.
2211 case Intrinsic::bitreverse:
2212 case Intrinsic::bswap:
2213 return isKnownToBeAPowerOfTwo(V: II->getArgOperand(i: 0), OrZero, Depth, Q);
2214 case Intrinsic::fshr:
2215 case Intrinsic::fshl:
2216 // If Op0 == Op1, this is a rotate. is_pow2(rotate(x, y)) == is_pow2(x)
2217 if (II->getArgOperand(i: 0) == II->getArgOperand(i: 1))
2218 return isKnownToBeAPowerOfTwo(V: II->getArgOperand(i: 0), OrZero, Depth, Q);
2219 break;
2220 default:
2221 break;
2222 }
2223 }
2224 return false;
2225 }
2226 default:
2227 return false;
2228 }
2229}
2230
2231/// Test whether a GEP's result is known to be non-null.
2232///
2233/// Uses properties inherent in a GEP to try to determine whether it is known
2234/// to be non-null.
2235///
2236/// Currently this routine does not support vector GEPs.
2237static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
2238 const SimplifyQuery &Q) {
2239 const Function *F = nullptr;
2240 if (const Instruction *I = dyn_cast<Instruction>(Val: GEP))
2241 F = I->getFunction();
2242
2243 if (!GEP->isInBounds() ||
2244 NullPointerIsDefined(F, AS: GEP->getPointerAddressSpace()))
2245 return false;
2246
2247 // FIXME: Support vector-GEPs.
2248 assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP");
2249
2250 // If the base pointer is non-null, we cannot walk to a null address with an
2251 // inbounds GEP in address space zero.
2252 if (isKnownNonZero(V: GEP->getPointerOperand(), Q, Depth))
2253 return true;
2254
2255 // Walk the GEP operands and see if any operand introduces a non-zero offset.
2256 // If so, then the GEP cannot produce a null pointer, as doing so would
2257 // inherently violate the inbounds contract within address space zero.
2258 for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
2259 GTI != GTE; ++GTI) {
2260 // Struct types are easy -- they must always be indexed by a constant.
2261 if (StructType *STy = GTI.getStructTypeOrNull()) {
2262 ConstantInt *OpC = cast<ConstantInt>(Val: GTI.getOperand());
2263 unsigned ElementIdx = OpC->getZExtValue();
2264 const StructLayout *SL = Q.DL.getStructLayout(Ty: STy);
2265 uint64_t ElementOffset = SL->getElementOffset(Idx: ElementIdx);
2266 if (ElementOffset > 0)
2267 return true;
2268 continue;
2269 }
2270
2271 // If we have a zero-sized type, the index doesn't matter. Keep looping.
2272 if (GTI.getSequentialElementStride(DL: Q.DL).isZero())
2273 continue;
2274
2275 // Fast path the constant operand case both for efficiency and so we don't
2276 // increment Depth when just zipping down an all-constant GEP.
2277 if (ConstantInt *OpC = dyn_cast<ConstantInt>(Val: GTI.getOperand())) {
2278 if (!OpC->isZero())
2279 return true;
2280 continue;
2281 }
2282
2283 // We post-increment Depth here because while isKnownNonZero increments it
2284 // as well, when we pop back up that increment won't persist. We don't want
2285 // to recurse 10k times just because we have 10k GEP operands. We don't
2286 // bail completely out because we want to handle constant GEPs regardless
2287 // of depth.
2288 if (Depth++ >= MaxAnalysisRecursionDepth)
2289 continue;
2290
2291 if (isKnownNonZero(V: GTI.getOperand(), Q, Depth))
2292 return true;
2293 }
2294
2295 return false;
2296}
2297
2298static bool isKnownNonNullFromDominatingCondition(const Value *V,
2299 const Instruction *CtxI,
2300 const DominatorTree *DT) {
2301 assert(!isa<Constant>(V) && "Called for constant?");
2302
2303 if (!CtxI || !DT)
2304 return false;
2305
2306 unsigned NumUsesExplored = 0;
2307 for (const auto *U : V->users()) {
2308 // Avoid massive lists
2309 if (NumUsesExplored >= DomConditionsMaxUses)
2310 break;
2311 NumUsesExplored++;
2312
2313 // If the value is used as an argument to a call or invoke, then argument
2314 // attributes may provide an answer about null-ness.
2315 if (const auto *CB = dyn_cast<CallBase>(Val: U))
2316 if (auto *CalledFunc = CB->getCalledFunction())
2317 for (const Argument &Arg : CalledFunc->args())
2318 if (CB->getArgOperand(i: Arg.getArgNo()) == V &&
2319 Arg.hasNonNullAttr(/* AllowUndefOrPoison */ false) &&
2320 DT->dominates(Def: CB, User: CtxI))
2321 return true;
2322
2323 // If the value is used as a load/store, then the pointer must be non null.
2324 if (V == getLoadStorePointerOperand(V: U)) {
2325 const Instruction *I = cast<Instruction>(Val: U);
2326 if (!NullPointerIsDefined(F: I->getFunction(),
2327 AS: V->getType()->getPointerAddressSpace()) &&
2328 DT->dominates(Def: I, User: CtxI))
2329 return true;
2330 }
2331
2332 if ((match(V: U, P: m_IDiv(L: m_Value(), R: m_Specific(V))) ||
2333 match(V: U, P: m_IRem(L: m_Value(), R: m_Specific(V)))) &&
2334 isValidAssumeForContext(Inv: cast<Instruction>(Val: U), CxtI: CtxI, DT))
2335 return true;
2336
2337 // Consider only compare instructions uniquely controlling a branch
2338 Value *RHS;
2339 CmpInst::Predicate Pred;
2340 if (!match(V: U, P: m_c_ICmp(Pred, L: m_Specific(V), R: m_Value(V&: RHS))))
2341 continue;
2342
2343 bool NonNullIfTrue;
2344 if (cmpExcludesZero(Pred, RHS))
2345 NonNullIfTrue = true;
2346 else if (cmpExcludesZero(Pred: CmpInst::getInversePredicate(pred: Pred), RHS))
2347 NonNullIfTrue = false;
2348 else
2349 continue;
2350
2351 SmallVector<const User *, 4> WorkList;
2352 SmallPtrSet<const User *, 4> Visited;
2353 for (const auto *CmpU : U->users()) {
2354 assert(WorkList.empty() && "Should be!");
2355 if (Visited.insert(Ptr: CmpU).second)
2356 WorkList.push_back(Elt: CmpU);
2357
2358 while (!WorkList.empty()) {
2359 auto *Curr = WorkList.pop_back_val();
2360
2361 // If a user is an AND, add all its users to the work list. We only
2362 // propagate "pred != null" condition through AND because it is only
2363 // correct to assume that all conditions of AND are met in true branch.
2364 // TODO: Support similar logic of OR and EQ predicate?
2365 if (NonNullIfTrue)
2366 if (match(V: Curr, P: m_LogicalAnd(L: m_Value(), R: m_Value()))) {
2367 for (const auto *CurrU : Curr->users())
2368 if (Visited.insert(Ptr: CurrU).second)
2369 WorkList.push_back(Elt: CurrU);
2370 continue;
2371 }
2372
2373 if (const BranchInst *BI = dyn_cast<BranchInst>(Val: Curr)) {
2374 assert(BI->isConditional() && "uses a comparison!");
2375
2376 BasicBlock *NonNullSuccessor =
2377 BI->getSuccessor(i: NonNullIfTrue ? 0 : 1);
2378 BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
2379 if (Edge.isSingleEdge() && DT->dominates(BBE: Edge, BB: CtxI->getParent()))
2380 return true;
2381 } else if (NonNullIfTrue && isGuard(U: Curr) &&
2382 DT->dominates(Def: cast<Instruction>(Val: Curr), User: CtxI)) {
2383 return true;
2384 }
2385 }
2386 }
2387 }
2388
2389 return false;
2390}
2391
2392/// Does the 'Range' metadata (which must be a valid MD_range operand list)
2393/// ensure that the value it's attached to is never Value? 'RangeType' is
2394/// is the type of the value described by the range.
2395static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) {
2396 const unsigned NumRanges = Ranges->getNumOperands() / 2;
2397 assert(NumRanges >= 1);
2398 for (unsigned i = 0; i < NumRanges; ++i) {
2399 ConstantInt *Lower =
2400 mdconst::extract<ConstantInt>(MD: Ranges->getOperand(I: 2 * i + 0));
2401 ConstantInt *Upper =
2402 mdconst::extract<ConstantInt>(MD: Ranges->getOperand(I: 2 * i + 1));
2403 ConstantRange Range(Lower->getValue(), Upper->getValue());
2404 if (Range.contains(Val: Value))
2405 return false;
2406 }
2407 return true;
2408}
2409
2410/// Try to detect a recurrence that monotonically increases/decreases from a
2411/// non-zero starting value. These are common as induction variables.
2412static bool isNonZeroRecurrence(const PHINode *PN) {
2413 BinaryOperator *BO = nullptr;
2414 Value *Start = nullptr, *Step = nullptr;
2415 const APInt *StartC, *StepC;
2416 if (!matchSimpleRecurrence(P: PN, BO, Start, Step) ||
2417 !match(V: Start, P: m_APInt(Res&: StartC)) || StartC->isZero())
2418 return false;
2419
2420 switch (BO->getOpcode()) {
2421 case Instruction::Add:
2422 // Starting from non-zero and stepping away from zero can never wrap back
2423 // to zero.
2424 return BO->hasNoUnsignedWrap() ||
2425 (BO->hasNoSignedWrap() && match(V: Step, P: m_APInt(Res&: StepC)) &&
2426 StartC->isNegative() == StepC->isNegative());
2427 case Instruction::Mul:
2428 return (BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap()) &&
2429 match(V: Step, P: m_APInt(Res&: StepC)) && !StepC->isZero();
2430 case Instruction::Shl:
2431 return BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap();
2432 case Instruction::AShr:
2433 case Instruction::LShr:
2434 return BO->isExact();
2435 default:
2436 return false;
2437 }
2438}
2439
2440static bool isNonZeroAdd(const APInt &DemandedElts, unsigned Depth,
2441 const SimplifyQuery &Q, unsigned BitWidth, Value *X,
2442 Value *Y, bool NSW, bool NUW) {
2443 if (NUW)
2444 return isKnownNonZero(V: Y, DemandedElts, Q, Depth) ||
2445 isKnownNonZero(V: X, DemandedElts, Q, Depth);
2446
2447 KnownBits XKnown = computeKnownBits(V: X, DemandedElts, Depth, Q);
2448 KnownBits YKnown = computeKnownBits(V: Y, DemandedElts, Depth, Q);
2449
2450 // If X and Y are both non-negative (as signed values) then their sum is not
2451 // zero unless both X and Y are zero.
2452 if (XKnown.isNonNegative() && YKnown.isNonNegative())
2453 if (isKnownNonZero(V: Y, DemandedElts, Q, Depth) ||
2454 isKnownNonZero(V: X, DemandedElts, Q, Depth))
2455 return true;
2456
2457 // If X and Y are both negative (as signed values) then their sum is not
2458 // zero unless both X and Y equal INT_MIN.
2459 if (XKnown.isNegative() && YKnown.isNegative()) {
2460 APInt Mask = APInt::getSignedMaxValue(numBits: BitWidth);
2461 // The sign bit of X is set. If some other bit is set then X is not equal
2462 // to INT_MIN.
2463 if (XKnown.One.intersects(RHS: Mask))
2464 return true;
2465 // The sign bit of Y is set. If some other bit is set then Y is not equal
2466 // to INT_MIN.
2467 if (YKnown.One.intersects(RHS: Mask))
2468 return true;
2469 }
2470
2471 // The sum of a non-negative number and a power of two is not zero.
2472 if (XKnown.isNonNegative() &&
2473 isKnownToBeAPowerOfTwo(V: Y, /*OrZero*/ false, Depth, Q))
2474 return true;
2475 if (YKnown.isNonNegative() &&
2476 isKnownToBeAPowerOfTwo(V: X, /*OrZero*/ false, Depth, Q))
2477 return true;
2478
2479 return KnownBits::computeForAddSub(/*Add=*/true, NSW, NUW, LHS: XKnown, RHS: YKnown)
2480 .isNonZero();
2481}
2482
2483static bool isNonZeroSub(const APInt &DemandedElts, unsigned Depth,
2484 const SimplifyQuery &Q, unsigned BitWidth, Value *X,
2485 Value *Y) {
2486 // TODO: Move this case into isKnownNonEqual().
2487 if (auto *C = dyn_cast<Constant>(Val: X))
2488 if (C->isNullValue() && isKnownNonZero(V: Y, DemandedElts, Q, Depth))
2489 return true;
2490
2491 return ::isKnownNonEqual(V1: X, V2: Y, Depth, Q);
2492}
2493
2494static bool isNonZeroMul(const APInt &DemandedElts, unsigned Depth,
2495 const SimplifyQuery &Q, unsigned BitWidth, Value *X,
2496 Value *Y, bool NSW, bool NUW) {
2497 // If X and Y are non-zero then so is X * Y as long as the multiplication
2498 // does not overflow.
2499 if (NSW || NUW)
2500 return isKnownNonZero(V: X, DemandedElts, Q, Depth) &&
2501 isKnownNonZero(V: Y, DemandedElts, Q, Depth);
2502
2503 // If either X or Y is odd, then if the other is non-zero the result can't
2504 // be zero.
2505 KnownBits XKnown = computeKnownBits(V: X, DemandedElts, Depth, Q);
2506 if (XKnown.One[0])
2507 return isKnownNonZero(V: Y, DemandedElts, Q, Depth);
2508
2509 KnownBits YKnown = computeKnownBits(V: Y, DemandedElts, Depth, Q);
2510 if (YKnown.One[0])
2511 return XKnown.isNonZero() || isKnownNonZero(V: X, DemandedElts, Q, Depth);
2512
2513 // If there exists any subset of X (sX) and subset of Y (sY) s.t sX * sY is
2514 // non-zero, then X * Y is non-zero. We can find sX and sY by just taking
2515 // the lowest known One of X and Y. If they are non-zero, the result
2516 // must be non-zero. We can check if LSB(X) * LSB(Y) != 0 by doing
2517 // X.CountLeadingZeros + Y.CountLeadingZeros < BitWidth.
2518 return (XKnown.countMaxTrailingZeros() + YKnown.countMaxTrailingZeros()) <
2519 BitWidth;
2520}
2521
2522static bool isNonZeroShift(const Operator *I, const APInt &DemandedElts,
2523 unsigned Depth, const SimplifyQuery &Q,
2524 const KnownBits &KnownVal) {
2525 auto ShiftOp = [&](const APInt &Lhs, const APInt &Rhs) {
2526 switch (I->getOpcode()) {
2527 case Instruction::Shl:
2528 return Lhs.shl(ShiftAmt: Rhs);
2529 case Instruction::LShr:
2530 return Lhs.lshr(ShiftAmt: Rhs);
2531 case Instruction::AShr:
2532 return Lhs.ashr(ShiftAmt: Rhs);
2533 default:
2534 llvm_unreachable("Unknown Shift Opcode");
2535 }
2536 };
2537
2538 auto InvShiftOp = [&](const APInt &Lhs, const APInt &Rhs) {
2539 switch (I->getOpcode()) {
2540 case Instruction::Shl:
2541 return Lhs.lshr(ShiftAmt: Rhs);
2542 case Instruction::LShr:
2543 case Instruction::AShr:
2544 return Lhs.shl(ShiftAmt: Rhs);
2545 default:
2546 llvm_unreachable("Unknown Shift Opcode");
2547 }
2548 };
2549
2550 if (KnownVal.isUnknown())
2551 return false;
2552
2553 KnownBits KnownCnt =
2554 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Depth, Q);
2555 APInt MaxShift = KnownCnt.getMaxValue();
2556 unsigned NumBits = KnownVal.getBitWidth();
2557 if (MaxShift.uge(RHS: NumBits))
2558 return false;
2559
2560 if (!ShiftOp(KnownVal.One, MaxShift).isZero())
2561 return true;
2562
2563 // If all of the bits shifted out are known to be zero, and Val is known
2564 // non-zero then at least one non-zero bit must remain.
2565 if (InvShiftOp(KnownVal.Zero, NumBits - MaxShift)
2566 .eq(RHS: InvShiftOp(APInt::getAllOnes(numBits: NumBits), NumBits - MaxShift)) &&
2567 isKnownNonZero(V: I->getOperand(i: 0), DemandedElts, Q, Depth))
2568 return true;
2569
2570 return false;
2571}
2572
2573static bool isKnownNonZeroFromOperator(const Operator *I,
2574 const APInt &DemandedElts,
2575 unsigned Depth, const SimplifyQuery &Q) {
2576 unsigned BitWidth = getBitWidth(Ty: I->getType()->getScalarType(), DL: Q.DL);
2577 switch (I->getOpcode()) {
2578 case Instruction::Alloca:
2579 // Alloca never returns null, malloc might.
2580 return I->getType()->getPointerAddressSpace() == 0;
2581 case Instruction::GetElementPtr:
2582 if (I->getType()->isPointerTy())
2583 return isGEPKnownNonNull(GEP: cast<GEPOperator>(Val: I), Depth, Q);
2584 break;
2585 case Instruction::BitCast: {
2586 // We need to be a bit careful here. We can only peek through the bitcast
2587 // if the scalar size of elements in the operand are smaller than and a
2588 // multiple of the size they are casting too. Take three cases:
2589 //
2590 // 1) Unsafe:
2591 // bitcast <2 x i16> %NonZero to <4 x i8>
2592 //
2593 // %NonZero can have 2 non-zero i16 elements, but isKnownNonZero on a
2594 // <4 x i8> requires that all 4 i8 elements be non-zero which isn't
2595 // guranteed (imagine just sign bit set in the 2 i16 elements).
2596 //
2597 // 2) Unsafe:
2598 // bitcast <4 x i3> %NonZero to <3 x i4>
2599 //
2600 // Even though the scalar size of the src (`i3`) is smaller than the
2601 // scalar size of the dst `i4`, because `i3` is not a multiple of `i4`
2602 // its possible for the `3 x i4` elements to be zero because there are
2603 // some elements in the destination that don't contain any full src
2604 // element.
2605 //
2606 // 3) Safe:
2607 // bitcast <4 x i8> %NonZero to <2 x i16>
2608 //
2609 // This is always safe as non-zero in the 4 i8 elements implies
2610 // non-zero in the combination of any two adjacent ones. Since i8 is a
2611 // multiple of i16, each i16 is guranteed to have 2 full i8 elements.
2612 // This all implies the 2 i16 elements are non-zero.
2613 Type *FromTy = I->getOperand(i: 0)->getType();
2614 if ((FromTy->isIntOrIntVectorTy() || FromTy->isPtrOrPtrVectorTy()) &&
2615 (BitWidth % getBitWidth(Ty: FromTy->getScalarType(), DL: Q.DL)) == 0)
2616 return isKnownNonZero(V: I->getOperand(i: 0), Q, Depth);
2617 } break;
2618 case Instruction::IntToPtr:
2619 // Note that we have to take special care to avoid looking through
2620 // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well
2621 // as casts that can alter the value, e.g., AddrSpaceCasts.
2622 if (!isa<ScalableVectorType>(Val: I->getType()) &&
2623 Q.DL.getTypeSizeInBits(Ty: I->getOperand(i: 0)->getType()).getFixedValue() <=
2624 Q.DL.getTypeSizeInBits(Ty: I->getType()).getFixedValue())
2625 return isKnownNonZero(V: I->getOperand(i: 0), Q, Depth);
2626 break;
2627 case Instruction::PtrToInt:
2628 // Similar to int2ptr above, we can look through ptr2int here if the cast
2629 // is a no-op or an extend and not a truncate.
2630 if (!isa<ScalableVectorType>(Val: I->getType()) &&
2631 Q.DL.getTypeSizeInBits(Ty: I->getOperand(i: 0)->getType()).getFixedValue() <=
2632 Q.DL.getTypeSizeInBits(Ty: I->getType()).getFixedValue())
2633 return isKnownNonZero(V: I->getOperand(i: 0), Q, Depth);
2634 break;
2635 case Instruction::Trunc:
2636 // nuw/nsw trunc preserves zero/non-zero status of input.
2637 if (auto *TI = dyn_cast<TruncInst>(Val: I))
2638 if (TI->hasNoSignedWrap() || TI->hasNoUnsignedWrap())
2639 return isKnownNonZero(V: TI->getOperand(i_nocapture: 0), Q, Depth);
2640 break;
2641
2642 case Instruction::Sub:
2643 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth, X: I->getOperand(i: 0),
2644 Y: I->getOperand(i: 1));
2645 case Instruction::Or:
2646 // X | Y != 0 if X != 0 or Y != 0.
2647 return isKnownNonZero(V: I->getOperand(i: 1), DemandedElts, Q, Depth) ||
2648 isKnownNonZero(V: I->getOperand(i: 0), DemandedElts, Q, Depth);
2649 case Instruction::SExt:
2650 case Instruction::ZExt:
2651 // ext X != 0 if X != 0.
2652 return isKnownNonZero(V: I->getOperand(i: 0), Q, Depth);
2653
2654 case Instruction::Shl: {
2655 // shl nsw/nuw can't remove any non-zero bits.
2656 const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(Val: I);
2657 if (Q.IIQ.hasNoUnsignedWrap(Op: BO) || Q.IIQ.hasNoSignedWrap(Op: BO))
2658 return isKnownNonZero(V: I->getOperand(i: 0), Q, Depth);
2659
2660 // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined
2661 // if the lowest bit is shifted off the end.
2662 KnownBits Known(BitWidth);
2663 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known, Depth, Q);
2664 if (Known.One[0])
2665 return true;
2666
2667 return isNonZeroShift(I, DemandedElts, Depth, Q, KnownVal: Known);
2668 }
2669 case Instruction::LShr:
2670 case Instruction::AShr: {
2671 // shr exact can only shift out zero bits.
2672 const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(Val: I);
2673 if (BO->isExact())
2674 return isKnownNonZero(V: I->getOperand(i: 0), Q, Depth);
2675
2676 // shr X, Y != 0 if X is negative. Note that the value of the shift is not
2677 // defined if the sign bit is shifted off the end.
2678 KnownBits Known =
2679 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Depth, Q);
2680 if (Known.isNegative())
2681 return true;
2682
2683 return isNonZeroShift(I, DemandedElts, Depth, Q, KnownVal: Known);
2684 }
2685 case Instruction::UDiv:
2686 case Instruction::SDiv: {
2687 // X / Y
2688 // div exact can only produce a zero if the dividend is zero.
2689 if (cast<PossiblyExactOperator>(Val: I)->isExact())
2690 return isKnownNonZero(V: I->getOperand(i: 0), DemandedElts, Q, Depth);
2691
2692 KnownBits XKnown =
2693 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Depth, Q);
2694 // If X is fully unknown we won't be able to figure anything out so don't
2695 // both computing knownbits for Y.
2696 if (XKnown.isUnknown())
2697 return false;
2698
2699 KnownBits YKnown =
2700 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Depth, Q);
2701 if (I->getOpcode() == Instruction::SDiv) {
2702 // For signed division need to compare abs value of the operands.
2703 XKnown = XKnown.abs(/*IntMinIsPoison*/ false);
2704 YKnown = YKnown.abs(/*IntMinIsPoison*/ false);
2705 }
2706 // If X u>= Y then div is non zero (0/0 is UB).
2707 std::optional<bool> XUgeY = KnownBits::uge(LHS: XKnown, RHS: YKnown);
2708 // If X is total unknown or X u< Y we won't be able to prove non-zero
2709 // with compute known bits so just return early.
2710 return XUgeY && *XUgeY;
2711 }
2712 case Instruction::Add: {
2713 // X + Y.
2714
2715 // If Add has nuw wrap flag, then if either X or Y is non-zero the result is
2716 // non-zero.
2717 auto *BO = cast<OverflowingBinaryOperator>(Val: I);
2718 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth, X: I->getOperand(i: 0),
2719 Y: I->getOperand(i: 1), NSW: Q.IIQ.hasNoSignedWrap(Op: BO),
2720 NUW: Q.IIQ.hasNoUnsignedWrap(Op: BO));
2721 }
2722 case Instruction::Mul: {
2723 const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(Val: I);
2724 return isNonZeroMul(DemandedElts, Depth, Q, BitWidth, X: I->getOperand(i: 0),
2725 Y: I->getOperand(i: 1), NSW: Q.IIQ.hasNoSignedWrap(Op: BO),
2726 NUW: Q.IIQ.hasNoUnsignedWrap(Op: BO));
2727 }
2728 case Instruction::Select: {
2729 // (C ? X : Y) != 0 if X != 0 and Y != 0.
2730
2731 // First check if the arm is non-zero using `isKnownNonZero`. If that fails,
2732 // then see if the select condition implies the arm is non-zero. For example
2733 // (X != 0 ? X : Y), we know the true arm is non-zero as the `X` "return" is
2734 // dominated by `X != 0`.
2735 auto SelectArmIsNonZero = [&](bool IsTrueArm) {
2736 Value *Op;
2737 Op = IsTrueArm ? I->getOperand(i: 1) : I->getOperand(i: 2);
2738 // Op is trivially non-zero.
2739 if (isKnownNonZero(V: Op, DemandedElts, Q, Depth))
2740 return true;
2741
2742 // The condition of the select dominates the true/false arm. Check if the
2743 // condition implies that a given arm is non-zero.
2744 Value *X;
2745 CmpInst::Predicate Pred;
2746 if (!match(V: I->getOperand(i: 0), P: m_c_ICmp(Pred, L: m_Specific(V: Op), R: m_Value(V&: X))))
2747 return false;
2748
2749 if (!IsTrueArm)
2750 Pred = ICmpInst::getInversePredicate(pred: Pred);
2751
2752 return cmpExcludesZero(Pred, RHS: X);
2753 };
2754
2755 if (SelectArmIsNonZero(/* IsTrueArm */ true) &&
2756 SelectArmIsNonZero(/* IsTrueArm */ false))
2757 return true;
2758 break;
2759 }
2760 case Instruction::PHI: {
2761 auto *PN = cast<PHINode>(Val: I);
2762 if (Q.IIQ.UseInstrInfo && isNonZeroRecurrence(PN))
2763 return true;
2764
2765 // Check if all incoming values are non-zero using recursion.
2766 SimplifyQuery RecQ = Q;
2767 unsigned NewDepth = std::max(a: Depth, b: MaxAnalysisRecursionDepth - 1);
2768 return llvm::all_of(Range: PN->operands(), P: [&](const Use &U) {
2769 if (U.get() == PN)
2770 return true;
2771 RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
2772 // Check if the branch on the phi excludes zero.
2773 ICmpInst::Predicate Pred;
2774 Value *X;
2775 BasicBlock *TrueSucc, *FalseSucc;
2776 if (match(V: RecQ.CxtI,
2777 P: m_Br(C: m_c_ICmp(Pred, L: m_Specific(V: U.get()), R: m_Value(V&: X)),
2778 T: m_BasicBlock(V&: TrueSucc), F: m_BasicBlock(V&: FalseSucc)))) {
2779 // Check for cases of duplicate successors.
2780 if ((TrueSucc == PN->getParent()) != (FalseSucc == PN->getParent())) {
2781 // If we're using the false successor, invert the predicate.
2782 if (FalseSucc == PN->getParent())
2783 Pred = CmpInst::getInversePredicate(pred: Pred);
2784 if (cmpExcludesZero(Pred, RHS: X))
2785 return true;
2786 }
2787 }
2788 // Finally recurse on the edge and check it directly.
2789 return isKnownNonZero(V: U.get(), DemandedElts, Q: RecQ, Depth: NewDepth);
2790 });
2791 }
2792 case Instruction::InsertElement: {
2793 if (isa<ScalableVectorType>(Val: I->getType()))
2794 break;
2795
2796 const Value *Vec = I->getOperand(i: 0);
2797 const Value *Elt = I->getOperand(i: 1);
2798 auto *CIdx = dyn_cast<ConstantInt>(Val: I->getOperand(i: 2));
2799
2800 unsigned NumElts = DemandedElts.getBitWidth();
2801 APInt DemandedVecElts = DemandedElts;
2802 bool SkipElt = false;
2803 // If we know the index we are inserting too, clear it from Vec check.
2804 if (CIdx && CIdx->getValue().ult(RHS: NumElts)) {
2805 DemandedVecElts.clearBit(BitPosition: CIdx->getZExtValue());
2806 SkipElt = !DemandedElts[CIdx->getZExtValue()];
2807 }
2808
2809 // Result is zero if Elt is non-zero and rest of the demanded elts in Vec
2810 // are non-zero.
2811 return (SkipElt || isKnownNonZero(V: Elt, Q, Depth)) &&
2812 (DemandedVecElts.isZero() ||
2813 isKnownNonZero(V: Vec, DemandedElts: DemandedVecElts, Q, Depth));
2814 }
2815 case Instruction::ExtractElement:
2816 if (const auto *EEI = dyn_cast<ExtractElementInst>(Val: I)) {
2817 const Value *Vec = EEI->getVectorOperand();
2818 const Value *Idx = EEI->getIndexOperand();
2819 auto *CIdx = dyn_cast<ConstantInt>(Val: Idx);
2820 if (auto *VecTy = dyn_cast<FixedVectorType>(Val: Vec->getType())) {
2821 unsigned NumElts = VecTy->getNumElements();
2822 APInt DemandedVecElts = APInt::getAllOnes(numBits: NumElts);
2823 if (CIdx && CIdx->getValue().ult(RHS: NumElts))
2824 DemandedVecElts = APInt::getOneBitSet(numBits: NumElts, BitNo: CIdx->getZExtValue());
2825 return isKnownNonZero(V: Vec, DemandedElts: DemandedVecElts, Q, Depth);
2826 }
2827 }
2828 break;
2829 case Instruction::ShuffleVector: {
2830 auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: I);
2831 if (!Shuf)
2832 break;
2833 APInt DemandedLHS, DemandedRHS;
2834 // For undef elements, we don't know anything about the common state of
2835 // the shuffle result.
2836 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
2837 break;
2838 // If demanded elements for both vecs are non-zero, the shuffle is non-zero.
2839 return (DemandedRHS.isZero() ||
2840 isKnownNonZero(V: Shuf->getOperand(i_nocapture: 1), DemandedElts: DemandedRHS, Q, Depth)) &&
2841 (DemandedLHS.isZero() ||
2842 isKnownNonZero(V: Shuf->getOperand(i_nocapture: 0), DemandedElts: DemandedLHS, Q, Depth));
2843 }
2844 case Instruction::Freeze:
2845 return isKnownNonZero(V: I->getOperand(i: 0), Q, Depth) &&
2846 isGuaranteedNotToBePoison(V: I->getOperand(i: 0), AC: Q.AC, CtxI: Q.CxtI, DT: Q.DT,
2847 Depth);
2848 case Instruction::Load: {
2849 auto *LI = cast<LoadInst>(Val: I);
2850 // A Load tagged with nonnull or dereferenceable with null pointer undefined
2851 // is never null.
2852 if (auto *PtrT = dyn_cast<PointerType>(Val: I->getType())) {
2853 if (Q.IIQ.getMetadata(I: LI, KindID: LLVMContext::MD_nonnull) ||
2854 (Q.IIQ.getMetadata(I: LI, KindID: LLVMContext::MD_dereferenceable) &&
2855 !NullPointerIsDefined(F: LI->getFunction(), AS: PtrT->getAddressSpace())))
2856 return true;
2857 } else if (MDNode *Ranges = Q.IIQ.getMetadata(I: LI, KindID: LLVMContext::MD_range)) {
2858 return rangeMetadataExcludesValue(Ranges, Value: APInt::getZero(numBits: BitWidth));
2859 }
2860
2861 // No need to fall through to computeKnownBits as range metadata is already
2862 // handled in isKnownNonZero.
2863 return false;
2864 }
2865 case Instruction::ExtractValue: {
2866 const WithOverflowInst *WO;
2867 if (match(V: I, P: m_ExtractValue<0>(V: m_WithOverflowInst(I&: WO)))) {
2868 switch (WO->getBinaryOp()) {
2869 default:
2870 break;
2871 case Instruction::Add:
2872 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth,
2873 X: WO->getArgOperand(i: 0), Y: WO->getArgOperand(i: 1),
2874 /*NSW=*/false,
2875 /*NUW=*/false);
2876 case Instruction::Sub:
2877 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth,
2878 X: WO->getArgOperand(i: 0), Y: WO->getArgOperand(i: 1));
2879 case Instruction::Mul:
2880 return isNonZeroMul(DemandedElts, Depth, Q, BitWidth,
2881 X: WO->getArgOperand(i: 0), Y: WO->getArgOperand(i: 1),
2882 /*NSW=*/false, /*NUW=*/false);
2883 break;
2884 }
2885 }
2886 break;
2887 }
2888 case Instruction::Call:
2889 case Instruction::Invoke: {
2890 const auto *Call = cast<CallBase>(Val: I);
2891 if (I->getType()->isPointerTy()) {
2892 if (Call->isReturnNonNull())
2893 return true;
2894 if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, MustPreserveNullness: true))
2895 return isKnownNonZero(V: RP, Q, Depth);
2896 } else {
2897 if (MDNode *Ranges = Q.IIQ.getMetadata(I: Call, KindID: LLVMContext::MD_range))
2898 return rangeMetadataExcludesValue(Ranges, Value: APInt::getZero(numBits: BitWidth));
2899 if (std::optional<ConstantRange> Range = Call->getRange()) {
2900 const APInt ZeroValue(Range->getBitWidth(), 0);
2901 if (!Range->contains(Val: ZeroValue))
2902 return true;
2903 }
2904 if (const Value *RV = Call->getReturnedArgOperand())
2905 if (RV->getType() == I->getType() && isKnownNonZero(V: RV, Q, Depth))
2906 return true;
2907 }
2908
2909 if (auto *II = dyn_cast<IntrinsicInst>(Val: I)) {
2910 switch (II->getIntrinsicID()) {
2911 case Intrinsic::sshl_sat:
2912 case Intrinsic::ushl_sat:
2913 case Intrinsic::abs:
2914 case Intrinsic::bitreverse:
2915 case Intrinsic::bswap:
2916 case Intrinsic::ctpop:
2917 return isKnownNonZero(V: II->getArgOperand(i: 0), DemandedElts, Q, Depth);
2918 // NB: We don't do usub_sat here as in any case we can prove its
2919 // non-zero, we will fold it to `sub nuw` in InstCombine.
2920 case Intrinsic::ssub_sat:
2921 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth,
2922 X: II->getArgOperand(i: 0), Y: II->getArgOperand(i: 1));
2923 case Intrinsic::sadd_sat:
2924 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth,
2925 X: II->getArgOperand(i: 0), Y: II->getArgOperand(i: 1),
2926 /*NSW=*/true, /* NUW=*/false);
2927 // umin/smin/smax/smin/or of all non-zero elements is always non-zero.
2928 case Intrinsic::vector_reduce_or:
2929 case Intrinsic::vector_reduce_umax:
2930 case Intrinsic::vector_reduce_umin:
2931 case Intrinsic::vector_reduce_smax:
2932 case Intrinsic::vector_reduce_smin:
2933 return isKnownNonZero(V: II->getArgOperand(i: 0), Q, Depth);
2934 case Intrinsic::umax:
2935 case Intrinsic::uadd_sat:
2936 return isKnownNonZero(V: II->getArgOperand(i: 1), DemandedElts, Q, Depth) ||
2937 isKnownNonZero(V: II->getArgOperand(i: 0), DemandedElts, Q, Depth);
2938 case Intrinsic::smax: {
2939 // If either arg is strictly positive the result is non-zero. Otherwise
2940 // the result is non-zero if both ops are non-zero.
2941 auto IsNonZero = [&](Value *Op, std::optional<bool> &OpNonZero,
2942 const KnownBits &OpKnown) {
2943 if (!OpNonZero.has_value())
2944 OpNonZero = OpKnown.isNonZero() ||
2945 isKnownNonZero(V: Op, DemandedElts, Q, Depth);
2946 return *OpNonZero;
2947 };
2948 // Avoid re-computing isKnownNonZero.
2949 std::optional<bool> Op0NonZero, Op1NonZero;
2950 KnownBits Op1Known =
2951 computeKnownBits(V: II->getArgOperand(i: 1), DemandedElts, Depth, Q);
2952 if (Op1Known.isNonNegative() &&
2953 IsNonZero(II->getArgOperand(i: 1), Op1NonZero, Op1Known))
2954 return true;
2955 KnownBits Op0Known =
2956 computeKnownBits(V: II->getArgOperand(i: 0), DemandedElts, Depth, Q);
2957 if (Op0Known.isNonNegative() &&
2958 IsNonZero(II->getArgOperand(i: 0), Op0NonZero, Op0Known))
2959 return true;
2960 return IsNonZero(II->getArgOperand(i: 1), Op1NonZero, Op1Known) &&
2961 IsNonZero(II->getArgOperand(i: 0), Op0NonZero, Op0Known);
2962 }
2963 case Intrinsic::smin: {
2964 // If either arg is negative the result is non-zero. Otherwise
2965 // the result is non-zero if both ops are non-zero.
2966 KnownBits Op1Known =
2967 computeKnownBits(V: II->getArgOperand(i: 1), DemandedElts, Depth, Q);
2968 if (Op1Known.isNegative())
2969 return true;
2970 KnownBits Op0Known =
2971 computeKnownBits(V: II->getArgOperand(i: 0), DemandedElts, Depth, Q);
2972 if (Op0Known.isNegative())
2973 return true;
2974
2975 if (Op1Known.isNonZero() && Op0Known.isNonZero())
2976 return true;
2977 }
2978 [[fallthrough]];
2979 case Intrinsic::umin:
2980 return isKnownNonZero(V: II->getArgOperand(i: 0), DemandedElts, Q, Depth) &&
2981 isKnownNonZero(V: II->getArgOperand(i: 1), DemandedElts, Q, Depth);
2982 case Intrinsic::cttz:
2983 return computeKnownBits(V: II->getArgOperand(i: 0), DemandedElts, Depth, Q)
2984 .Zero[0];
2985 case Intrinsic::ctlz:
2986 return computeKnownBits(V: II->getArgOperand(i: 0), DemandedElts, Depth, Q)
2987 .isNonNegative();
2988 case Intrinsic::fshr:
2989 case Intrinsic::fshl:
2990 // If Op0 == Op1, this is a rotate. rotate(x, y) != 0 iff x != 0.
2991 if (II->getArgOperand(i: 0) == II->getArgOperand(i: 1))
2992 return isKnownNonZero(V: II->getArgOperand(i: 0), DemandedElts, Q, Depth);
2993 break;
2994 case Intrinsic::vscale:
2995 return true;
2996 case Intrinsic::experimental_get_vector_length:
2997 return isKnownNonZero(V: I->getOperand(i: 0), Q, Depth);
2998 default:
2999 break;
3000 }
3001 break;
3002 }
3003
3004 return false;
3005 }
3006 }
3007
3008 KnownBits Known(BitWidth);
3009 computeKnownBits(V: I, DemandedElts, Known, Depth, Q);
3010 return Known.One != 0;
3011}
3012
3013/// Return true if the given value is known to be non-zero when defined. For
3014/// vectors, return true if every demanded element is known to be non-zero when
3015/// defined. For pointers, if the context instruction and dominator tree are
3016/// specified, perform context-sensitive analysis and return true if the
3017/// pointer couldn't possibly be null at the specified instruction.
3018/// Supports values with integer or pointer type and vectors of integers.
3019bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
3020 const SimplifyQuery &Q, unsigned Depth) {
3021 Type *Ty = V->getType();
3022
3023#ifndef NDEBUG
3024 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
3025
3026 if (auto *FVTy = dyn_cast<FixedVectorType>(Val: Ty)) {
3027 assert(
3028 FVTy->getNumElements() == DemandedElts.getBitWidth() &&
3029 "DemandedElt width should equal the fixed vector number of elements");
3030 } else {
3031 assert(DemandedElts == APInt(1, 1) &&
3032 "DemandedElt width should be 1 for scalars");
3033 }
3034#endif
3035
3036 if (auto *C = dyn_cast<Constant>(Val: V)) {
3037 if (C->isNullValue())
3038 return false;
3039 if (isa<ConstantInt>(Val: C))
3040 // Must be non-zero due to null test above.
3041 return true;
3042
3043 // For constant vectors, check that all elements are poison or known
3044 // non-zero to determine that the whole vector is known non-zero.
3045 if (auto *VecTy = dyn_cast<FixedVectorType>(Val: Ty)) {
3046 for (unsigned i = 0, e = VecTy->getNumElements(); i != e; ++i) {
3047 if (!DemandedElts[i])
3048 continue;
3049 Constant *Elt = C->getAggregateElement(Elt: i);
3050 if (!Elt || Elt->isNullValue())
3051 return false;
3052 if (!isa<PoisonValue>(Val: Elt) && !isa<ConstantInt>(Val: Elt))
3053 return false;
3054 }
3055 return true;
3056 }
3057
3058 // A global variable in address space 0 is non null unless extern weak
3059 // or an absolute symbol reference. Other address spaces may have null as a
3060 // valid address for a global, so we can't assume anything.
3061 if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: V)) {
3062 if (!GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() &&
3063 GV->getType()->getAddressSpace() == 0)
3064 return true;
3065 }
3066
3067 // For constant expressions, fall through to the Operator code below.
3068 if (!isa<ConstantExpr>(Val: V))
3069 return false;
3070 }
3071
3072 if (const auto *A = dyn_cast<Argument>(Val: V))
3073 if (std::optional<ConstantRange> Range = A->getRange()) {
3074 const APInt ZeroValue(Range->getBitWidth(), 0);
3075 if (!Range->contains(Val: ZeroValue))
3076 return true;
3077 }
3078
3079 if (!isa<Constant>(Val: V) && isKnownNonZeroFromAssume(V, Q))
3080 return true;
3081
3082 // Some of the tests below are recursive, so bail out if we hit the limit.
3083 if (Depth++ >= MaxAnalysisRecursionDepth)
3084 return false;
3085
3086 // Check for pointer simplifications.
3087
3088 if (PointerType *PtrTy = dyn_cast<PointerType>(Val: Ty)) {
3089 // A byval, inalloca may not be null in a non-default addres space. A
3090 // nonnull argument is assumed never 0.
3091 if (const Argument *A = dyn_cast<Argument>(Val: V)) {
3092 if (((A->hasPassPointeeByValueCopyAttr() &&
3093 !NullPointerIsDefined(F: A->getParent(), AS: PtrTy->getAddressSpace())) ||
3094 A->hasNonNullAttr()))
3095 return true;
3096 }
3097 }
3098
3099 if (const auto *I = dyn_cast<Operator>(Val: V))
3100 if (isKnownNonZeroFromOperator(I, DemandedElts, Depth, Q))
3101 return true;
3102
3103 if (!isa<Constant>(Val: V) &&
3104 isKnownNonNullFromDominatingCondition(V, CtxI: Q.CxtI, DT: Q.DT))
3105 return true;
3106
3107 return false;
3108}
3109
3110bool llvm::isKnownNonZero(const Value *V, const SimplifyQuery &Q,
3111 unsigned Depth) {
3112 auto *FVTy = dyn_cast<FixedVectorType>(Val: V->getType());
3113 APInt DemandedElts =
3114 FVTy ? APInt::getAllOnes(numBits: FVTy->getNumElements()) : APInt(1, 1);
3115 return ::isKnownNonZero(V, DemandedElts, Q, Depth);
3116}
3117
3118/// If the pair of operators are the same invertible function, return the
3119/// the operands of the function corresponding to each input. Otherwise,
3120/// return std::nullopt. An invertible function is one that is 1-to-1 and maps
3121/// every input value to exactly one output value. This is equivalent to
3122/// saying that Op1 and Op2 are equal exactly when the specified pair of
3123/// operands are equal, (except that Op1 and Op2 may be poison more often.)
3124static std::optional<std::pair<Value*, Value*>>
3125getInvertibleOperands(const Operator *Op1,
3126 const Operator *Op2) {
3127 if (Op1->getOpcode() != Op2->getOpcode())
3128 return std::nullopt;
3129
3130 auto getOperands = [&](unsigned OpNum) -> auto {
3131 return std::make_pair(x: Op1->getOperand(i: OpNum), y: Op2->getOperand(i: OpNum));
3132 };
3133
3134 switch (Op1->getOpcode()) {
3135 default:
3136 break;
3137 case Instruction::Or:
3138 if (!cast<PossiblyDisjointInst>(Val: Op1)->isDisjoint() ||
3139 !cast<PossiblyDisjointInst>(Val: Op2)->isDisjoint())
3140 break;
3141 [[fallthrough]];
3142 case Instruction::Xor:
3143 case Instruction::Add: {
3144 Value *Other;
3145 if (match(V: Op2, P: m_c_BinOp(L: m_Specific(V: Op1->getOperand(i: 0)), R: m_Value(V&: Other))))
3146 return std::make_pair(x: Op1->getOperand(i: 1), y&: Other);
3147 if (match(V: Op2, P: m_c_BinOp(L: m_Specific(V: Op1->getOperand(i: 1)), R: m_Value(V&: Other))))
3148 return std::make_pair(x: Op1->getOperand(i: 0), y&: Other);
3149 break;
3150 }
3151 case Instruction::Sub:
3152 if (Op1->getOperand(i: 0) == Op2->getOperand(i: 0))
3153 return getOperands(1);
3154 if (Op1->getOperand(i: 1) == Op2->getOperand(i: 1))
3155 return getOperands(0);
3156 break;
3157 case Instruction::Mul: {
3158 // invertible if A * B == (A * B) mod 2^N where A, and B are integers
3159 // and N is the bitwdith. The nsw case is non-obvious, but proven by
3160 // alive2: https://alive2.llvm.org/ce/z/Z6D5qK
3161 auto *OBO1 = cast<OverflowingBinaryOperator>(Val: Op1);
3162 auto *OBO2 = cast<OverflowingBinaryOperator>(Val: Op2);
3163 if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) &&
3164 (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap()))
3165 break;
3166
3167 // Assume operand order has been canonicalized
3168 if (Op1->getOperand(i: 1) == Op2->getOperand(i: 1) &&
3169 isa<ConstantInt>(Val: Op1->getOperand(i: 1)) &&
3170 !cast<ConstantInt>(Val: Op1->getOperand(i: 1))->isZero())
3171 return getOperands(0);
3172 break;
3173 }
3174 case Instruction::Shl: {
3175 // Same as multiplies, with the difference that we don't need to check
3176 // for a non-zero multiply. Shifts always multiply by non-zero.
3177 auto *OBO1 = cast<OverflowingBinaryOperator>(Val: Op1);
3178 auto *OBO2 = cast<OverflowingBinaryOperator>(Val: Op2);
3179 if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) &&
3180 (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap()))
3181 break;
3182
3183 if (Op1->getOperand(i: 1) == Op2->getOperand(i: 1))
3184 return getOperands(0);
3185 break;
3186 }
3187 case Instruction::AShr:
3188 case Instruction::LShr: {
3189 auto *PEO1 = cast<PossiblyExactOperator>(Val: Op1);
3190 auto *PEO2 = cast<PossiblyExactOperator>(Val: Op2);
3191 if (!PEO1->isExact() || !PEO2->isExact())
3192 break;
3193
3194 if (Op1->getOperand(i: 1) == Op2->getOperand(i: 1))
3195 return getOperands(0);
3196 break;
3197 }
3198 case Instruction::SExt:
3199 case Instruction::ZExt:
3200 if (Op1->getOperand(i: 0)->getType() == Op2->getOperand(i: 0)->getType())
3201 return getOperands(0);
3202 break;
3203 case Instruction::PHI: {
3204 const PHINode *PN1 = cast<PHINode>(Val: Op1);
3205 const PHINode *PN2 = cast<PHINode>(Val: Op2);
3206
3207 // If PN1 and PN2 are both recurrences, can we prove the entire recurrences
3208 // are a single invertible function of the start values? Note that repeated
3209 // application of an invertible function is also invertible
3210 BinaryOperator *BO1 = nullptr;
3211 Value *Start1 = nullptr, *Step1 = nullptr;
3212 BinaryOperator *BO2 = nullptr;
3213 Value *Start2 = nullptr, *Step2 = nullptr;
3214 if (PN1->getParent() != PN2->getParent() ||
3215 !matchSimpleRecurrence(P: PN1, BO&: BO1, Start&: Start1, Step&: Step1) ||
3216 !matchSimpleRecurrence(P: PN2, BO&: BO2, Start&: Start2, Step&: Step2))
3217 break;
3218
3219 auto Values = getInvertibleOperands(Op1: cast<Operator>(Val: BO1),
3220 Op2: cast<Operator>(Val: BO2));
3221 if (!Values)
3222 break;
3223
3224 // We have to be careful of mutually defined recurrences here. Ex:
3225 // * X_i = X_(i-1) OP Y_(i-1), and Y_i = X_(i-1) OP V
3226 // * X_i = Y_i = X_(i-1) OP Y_(i-1)
3227 // The invertibility of these is complicated, and not worth reasoning
3228 // about (yet?).
3229 if (Values->first != PN1 || Values->second != PN2)
3230 break;
3231
3232 return std::make_pair(x&: Start1, y&: Start2);
3233 }
3234 }
3235 return std::nullopt;
3236}
3237
3238/// Return true if V1 == (binop V2, X), where X is known non-zero.
3239/// Only handle a small subset of binops where (binop V2, X) with non-zero X
3240/// implies V2 != V1.
3241static bool isModifyingBinopOfNonZero(const Value *V1, const Value *V2,
3242 unsigned Depth, const SimplifyQuery &Q) {
3243 const BinaryOperator *BO = dyn_cast<BinaryOperator>(Val: V1);
3244 if (!BO)
3245 return false;
3246 switch (BO->getOpcode()) {
3247 default:
3248 break;
3249 case Instruction::Or:
3250 if (!cast<PossiblyDisjointInst>(Val: V1)->isDisjoint())
3251 break;
3252 [[fallthrough]];
3253 case Instruction::Xor:
3254 case Instruction::Add:
3255 Value *Op = nullptr;
3256 if (V2 == BO->getOperand(i_nocapture: 0))
3257 Op = BO->getOperand(i_nocapture: 1);
3258 else if (V2 == BO->getOperand(i_nocapture: 1))
3259 Op = BO->getOperand(i_nocapture: 0);
3260 else
3261 return false;
3262 return isKnownNonZero(V: Op, Q, Depth: Depth + 1);
3263 }
3264 return false;
3265}
3266
3267/// Return true if V2 == V1 * C, where V1 is known non-zero, C is not 0/1 and
3268/// the multiplication is nuw or nsw.
3269static bool isNonEqualMul(const Value *V1, const Value *V2, unsigned Depth,
3270 const SimplifyQuery &Q) {
3271 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: V2)) {
3272 const APInt *C;
3273 return match(V: OBO, P: m_Mul(L: m_Specific(V: V1), R: m_APInt(Res&: C))) &&
3274 (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
3275 !C->isZero() && !C->isOne() && isKnownNonZero(V: V1, Q, Depth: Depth + 1);
3276 }
3277 return false;
3278}
3279
3280/// Return true if V2 == V1 << C, where V1 is known non-zero, C is not 0 and
3281/// the shift is nuw or nsw.
3282static bool isNonEqualShl(const Value *V1, const Value *V2, unsigned Depth,
3283 const SimplifyQuery &Q) {
3284 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: V2)) {
3285 const APInt *C;
3286 return match(V: OBO, P: m_Shl(L: m_Specific(V: V1), R: m_APInt(Res&: C))) &&
3287 (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
3288 !C->isZero() && isKnownNonZero(V: V1, Q, Depth: Depth + 1);
3289 }
3290 return false;
3291}
3292
3293static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2,
3294 unsigned Depth, const SimplifyQuery &Q) {
3295 // Check two PHIs are in same block.
3296 if (PN1->getParent() != PN2->getParent())
3297 return false;
3298
3299 SmallPtrSet<const BasicBlock *, 8> VisitedBBs;
3300 bool UsedFullRecursion = false;
3301 for (const BasicBlock *IncomBB : PN1->blocks()) {
3302 if (!VisitedBBs.insert(Ptr: IncomBB).second)
3303 continue; // Don't reprocess blocks that we have dealt with already.
3304 const Value *IV1 = PN1->getIncomingValueForBlock(BB: IncomBB);
3305 const Value *IV2 = PN2->getIncomingValueForBlock(BB: IncomBB);
3306 const APInt *C1, *C2;
3307 if (match(V: IV1, P: m_APInt(Res&: C1)) && match(V: IV2, P: m_APInt(Res&: C2)) && *C1 != *C2)
3308 continue;
3309
3310 // Only one pair of phi operands is allowed for full recursion.
3311 if (UsedFullRecursion)
3312 return false;
3313
3314 SimplifyQuery RecQ = Q;
3315 RecQ.CxtI = IncomBB->getTerminator();
3316 if (!isKnownNonEqual(V1: IV1, V2: IV2, Depth: Depth + 1, Q: RecQ))
3317 return false;
3318 UsedFullRecursion = true;
3319 }
3320 return true;
3321}
3322
3323static bool isNonEqualSelect(const Value *V1, const Value *V2, unsigned Depth,
3324 const SimplifyQuery &Q) {
3325 const SelectInst *SI1 = dyn_cast<SelectInst>(Val: V1);
3326 if (!SI1)
3327 return false;
3328
3329 if (const SelectInst *SI2 = dyn_cast<SelectInst>(Val: V2)) {
3330 const Value *Cond1 = SI1->getCondition();
3331 const Value *Cond2 = SI2->getCondition();
3332 if (Cond1 == Cond2)
3333 return isKnownNonEqual(V1: SI1->getTrueValue(), V2: SI2->getTrueValue(),
3334 Depth: Depth + 1, Q) &&
3335 isKnownNonEqual(V1: SI1->getFalseValue(), V2: SI2->getFalseValue(),
3336 Depth: Depth + 1, Q);
3337 }
3338 return isKnownNonEqual(V1: SI1->getTrueValue(), V2, Depth: Depth + 1, Q) &&
3339 isKnownNonEqual(V1: SI1->getFalseValue(), V2, Depth: Depth + 1, Q);
3340}
3341
3342// Check to see if A is both a GEP and is the incoming value for a PHI in the
3343// loop, and B is either a ptr or another GEP. If the PHI has 2 incoming values,
3344// one of them being the recursive GEP A and the other a ptr at same base and at
3345// the same/higher offset than B we are only incrementing the pointer further in
3346// loop if offset of recursive GEP is greater than 0.
3347static bool isNonEqualPointersWithRecursiveGEP(const Value *A, const Value *B,
3348 const SimplifyQuery &Q) {
3349 if (!A->getType()->isPointerTy() || !B->getType()->isPointerTy())
3350 return false;
3351
3352 auto *GEPA = dyn_cast<GEPOperator>(Val: A);
3353 if (!GEPA || GEPA->getNumIndices() != 1 || !isa<Constant>(Val: GEPA->idx_begin()))
3354 return false;
3355
3356 // Handle 2 incoming PHI values with one being a recursive GEP.
3357 auto *PN = dyn_cast<PHINode>(Val: GEPA->getPointerOperand());
3358 if (!PN || PN->getNumIncomingValues() != 2)
3359 return false;
3360
3361 // Search for the recursive GEP as an incoming operand, and record that as
3362 // Step.
3363 Value *Start = nullptr;
3364 Value *Step = const_cast<Value *>(A);
3365 if (PN->getIncomingValue(i: 0) == Step)
3366 Start = PN->getIncomingValue(i: 1);
3367 else if (PN->getIncomingValue(i: 1) == Step)
3368 Start = PN->getIncomingValue(i: 0);
3369 else
3370 return false;
3371
3372 // Other incoming node base should match the B base.
3373 // StartOffset >= OffsetB && StepOffset > 0?
3374 // StartOffset <= OffsetB && StepOffset < 0?
3375 // Is non-equal if above are true.
3376 // We use stripAndAccumulateInBoundsConstantOffsets to restrict the
3377 // optimisation to inbounds GEPs only.
3378 unsigned IndexWidth = Q.DL.getIndexTypeSizeInBits(Ty: Start->getType());
3379 APInt StartOffset(IndexWidth, 0);
3380 Start = Start->stripAndAccumulateInBoundsConstantOffsets(DL: Q.DL, Offset&: StartOffset);
3381 APInt StepOffset(IndexWidth, 0);
3382 Step = Step->stripAndAccumulateInBoundsConstantOffsets(DL: Q.DL, Offset&: StepOffset);
3383
3384 // Check if Base Pointer of Step matches the PHI.
3385 if (Step != PN)
3386 return false;
3387 APInt OffsetB(IndexWidth, 0);
3388 B = B->stripAndAccumulateInBoundsConstantOffsets(DL: Q.DL, Offset&: OffsetB);
3389 return Start == B &&
3390 ((StartOffset.sge(RHS: OffsetB) && StepOffset.isStrictlyPositive()) ||
3391 (StartOffset.sle(RHS: OffsetB) && StepOffset.isNegative()));
3392}
3393
3394/// Return true if it is known that V1 != V2.
3395static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth,
3396 const SimplifyQuery &Q) {
3397 if (V1 == V2)
3398 return false;
3399 if (V1->getType() != V2->getType())
3400 // We can't look through casts yet.
3401 return false;
3402
3403 if (Depth >= MaxAnalysisRecursionDepth)
3404 return false;
3405
3406 // See if we can recurse through (exactly one of) our operands. This
3407 // requires our operation be 1-to-1 and map every input value to exactly
3408 // one output value. Such an operation is invertible.
3409 auto *O1 = dyn_cast<Operator>(Val: V1);
3410 auto *O2 = dyn_cast<Operator>(Val: V2);
3411 if (O1 && O2 && O1->getOpcode() == O2->getOpcode()) {
3412 if (auto Values = getInvertibleOperands(Op1: O1, Op2: O2))
3413 return isKnownNonEqual(V1: Values->first, V2: Values->second, Depth: Depth + 1, Q);
3414
3415 if (const PHINode *PN1 = dyn_cast<PHINode>(Val: V1)) {
3416 const PHINode *PN2 = cast<PHINode>(Val: V2);
3417 // FIXME: This is missing a generalization to handle the case where one is
3418 // a PHI and another one isn't.
3419 if (isNonEqualPHIs(PN1, PN2, Depth, Q))
3420 return true;
3421 };
3422 }
3423
3424 if (isModifyingBinopOfNonZero(V1, V2, Depth, Q) ||
3425 isModifyingBinopOfNonZero(V1: V2, V2: V1, Depth, Q))
3426 return true;
3427
3428 if (isNonEqualMul(V1, V2, Depth, Q) || isNonEqualMul(V1: V2, V2: V1, Depth, Q))
3429 return true;
3430
3431 if (isNonEqualShl(V1, V2, Depth, Q) || isNonEqualShl(V1: V2, V2: V1, Depth, Q))
3432 return true;
3433
3434 if (V1->getType()->isIntOrIntVectorTy()) {
3435 // Are any known bits in V1 contradictory to known bits in V2? If V1
3436 // has a known zero where V2 has a known one, they must not be equal.
3437 KnownBits Known1 = computeKnownBits(V: V1, Depth, Q);
3438 if (!Known1.isUnknown()) {
3439 KnownBits Known2 = computeKnownBits(V: V2, Depth, Q);
3440 if (Known1.Zero.intersects(RHS: Known2.One) ||
3441 Known2.Zero.intersects(RHS: Known1.One))
3442 return true;
3443 }
3444 }
3445
3446 if (isNonEqualSelect(V1, V2, Depth, Q) || isNonEqualSelect(V1: V2, V2: V1, Depth, Q))
3447 return true;
3448
3449 if (isNonEqualPointersWithRecursiveGEP(A: V1, B: V2, Q) ||
3450 isNonEqualPointersWithRecursiveGEP(A: V2, B: V1, Q))
3451 return true;
3452
3453 Value *A, *B;
3454 // PtrToInts are NonEqual if their Ptrs are NonEqual.
3455 // Check PtrToInt type matches the pointer size.
3456 if (match(V: V1, P: m_PtrToIntSameSize(DL: Q.DL, Op: m_Value(V&: A))) &&
3457 match(V: V2, P: m_PtrToIntSameSize(DL: Q.DL, Op: m_Value(V&: B))))
3458 return isKnownNonEqual(V1: A, V2: B, Depth: Depth + 1, Q);
3459
3460 return false;
3461}
3462
3463// Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow).
3464// Returns the input and lower/upper bounds.
3465static bool isSignedMinMaxClamp(const Value *Select, const Value *&In,
3466 const APInt *&CLow, const APInt *&CHigh) {
3467 assert(isa<Operator>(Select) &&
3468 cast<Operator>(Select)->getOpcode() == Instruction::Select &&
3469 "Input should be a Select!");
3470
3471 const Value *LHS = nullptr, *RHS = nullptr;
3472 SelectPatternFlavor SPF = matchSelectPattern(V: Select, LHS, RHS).Flavor;
3473 if (SPF != SPF_SMAX && SPF != SPF_SMIN)
3474 return false;
3475
3476 if (!match(V: RHS, P: m_APInt(Res&: CLow)))
3477 return false;
3478
3479 const Value *LHS2 = nullptr, *RHS2 = nullptr;
3480 SelectPatternFlavor SPF2 = matchSelectPattern(V: LHS, LHS&: LHS2, RHS&: RHS2).Flavor;
3481 if (getInverseMinMaxFlavor(SPF) != SPF2)
3482 return false;
3483
3484 if (!match(V: RHS2, P: m_APInt(Res&: CHigh)))
3485 return false;
3486
3487 if (SPF == SPF_SMIN)
3488 std::swap(a&: CLow, b&: CHigh);
3489
3490 In = LHS2;
3491 return CLow->sle(RHS: *CHigh);
3492}
3493
3494static bool isSignedMinMaxIntrinsicClamp(const IntrinsicInst *II,
3495 const APInt *&CLow,
3496 const APInt *&CHigh) {
3497 assert((II->getIntrinsicID() == Intrinsic::smin ||
3498 II->getIntrinsicID() == Intrinsic::smax) && "Must be smin/smax");
3499
3500 Intrinsic::ID InverseID = getInverseMinMaxIntrinsic(MinMaxID: II->getIntrinsicID());
3501 auto *InnerII = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: 0));
3502 if (!InnerII || InnerII->getIntrinsicID() != InverseID ||
3503 !match(V: II->getArgOperand(i: 1), P: m_APInt(Res&: CLow)) ||
3504 !match(V: InnerII->getArgOperand(i: 1), P: m_APInt(Res&: CHigh)))
3505 return false;
3506
3507 if (II->getIntrinsicID() == Intrinsic::smin)
3508 std::swap(a&: CLow, b&: CHigh);
3509 return CLow->sle(RHS: *CHigh);
3510}
3511
3512/// For vector constants, loop over the elements and find the constant with the
3513/// minimum number of sign bits. Return 0 if the value is not a vector constant
3514/// or if any element was not analyzed; otherwise, return the count for the
3515/// element with the minimum number of sign bits.
3516static unsigned computeNumSignBitsVectorConstant(const Value *V,
3517 const APInt &DemandedElts,
3518 unsigned TyBits) {
3519 const auto *CV = dyn_cast<Constant>(Val: V);
3520 if (!CV || !isa<FixedVectorType>(Val: CV->getType()))
3521 return 0;
3522
3523 unsigned MinSignBits = TyBits;
3524 unsigned NumElts = cast<FixedVectorType>(Val: CV->getType())->getNumElements();
3525 for (unsigned i = 0; i != NumElts; ++i) {
3526 if (!DemandedElts[i])
3527 continue;
3528 // If we find a non-ConstantInt, bail out.
3529 auto *Elt = dyn_cast_or_null<ConstantInt>(Val: CV->getAggregateElement(Elt: i));
3530 if (!Elt)
3531 return 0;
3532
3533 MinSignBits = std::min(a: MinSignBits, b: Elt->getValue().getNumSignBits());
3534 }
3535
3536 return MinSignBits;
3537}
3538
3539static unsigned ComputeNumSignBitsImpl(const Value *V,
3540 const APInt &DemandedElts,
3541 unsigned Depth, const SimplifyQuery &Q);
3542
3543static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
3544 unsigned Depth, const SimplifyQuery &Q) {
3545 unsigned Result = ComputeNumSignBitsImpl(V, DemandedElts, Depth, Q);
3546 assert(Result > 0 && "At least one sign bit needs to be present!");
3547 return Result;
3548}
3549
3550/// Return the number of times the sign bit of the register is replicated into
3551/// the other bits. We know that at least 1 bit is always equal to the sign bit
3552/// (itself), but other cases can give us information. For example, immediately
3553/// after an "ashr X, 2", we know that the top 3 bits are all equal to each
3554/// other, so we return 3. For vectors, return the number of sign bits for the
3555/// vector element with the minimum number of known sign bits of the demanded
3556/// elements in the vector specified by DemandedElts.
3557static unsigned ComputeNumSignBitsImpl(const Value *V,
3558 const APInt &DemandedElts,
3559 unsigned Depth, const SimplifyQuery &Q) {
3560 Type *Ty = V->getType();
3561#ifndef NDEBUG
3562 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
3563
3564 if (auto *FVTy = dyn_cast<FixedVectorType>(Val: Ty)) {
3565 assert(
3566 FVTy->getNumElements() == DemandedElts.getBitWidth() &&
3567 "DemandedElt width should equal the fixed vector number of elements");
3568 } else {
3569 assert(DemandedElts == APInt(1, 1) &&
3570 "DemandedElt width should be 1 for scalars");
3571 }
3572#endif
3573
3574 // We return the minimum number of sign bits that are guaranteed to be present
3575 // in V, so for undef we have to conservatively return 1. We don't have the
3576 // same behavior for poison though -- that's a FIXME today.
3577
3578 Type *ScalarTy = Ty->getScalarType();
3579 unsigned TyBits = ScalarTy->isPointerTy() ?
3580 Q.DL.getPointerTypeSizeInBits(ScalarTy) :
3581 Q.DL.getTypeSizeInBits(Ty: ScalarTy);
3582
3583 unsigned Tmp, Tmp2;
3584 unsigned FirstAnswer = 1;
3585
3586 // Note that ConstantInt is handled by the general computeKnownBits case
3587 // below.
3588
3589 if (Depth == MaxAnalysisRecursionDepth)
3590 return 1;
3591
3592 if (auto *U = dyn_cast<Operator>(Val: V)) {
3593 switch (Operator::getOpcode(V)) {
3594 default: break;
3595 case Instruction::SExt:
3596 Tmp = TyBits - U->getOperand(i: 0)->getType()->getScalarSizeInBits();
3597 return ComputeNumSignBits(V: U->getOperand(i: 0), Depth: Depth + 1, Q) + Tmp;
3598
3599 case Instruction::SDiv: {
3600 const APInt *Denominator;
3601 // sdiv X, C -> adds log(C) sign bits.
3602 if (match(V: U->getOperand(i: 1), P: m_APInt(Res&: Denominator))) {
3603
3604 // Ignore non-positive denominator.
3605 if (!Denominator->isStrictlyPositive())
3606 break;
3607
3608 // Calculate the incoming numerator bits.
3609 unsigned NumBits = ComputeNumSignBits(V: U->getOperand(i: 0), Depth: Depth + 1, Q);
3610
3611 // Add floor(log(C)) bits to the numerator bits.
3612 return std::min(a: TyBits, b: NumBits + Denominator->logBase2());
3613 }
3614 break;
3615 }
3616
3617 case Instruction::SRem: {
3618 Tmp = ComputeNumSignBits(V: U->getOperand(i: 0), Depth: Depth + 1, Q);
3619
3620 const APInt *Denominator;
3621 // srem X, C -> we know that the result is within [-C+1,C) when C is a
3622 // positive constant. This let us put a lower bound on the number of sign
3623 // bits.
3624 if (match(V: U->getOperand(i: 1), P: m_APInt(Res&: Denominator))) {
3625
3626 // Ignore non-positive denominator.
3627 if (Denominator->isStrictlyPositive()) {
3628 // Calculate the leading sign bit constraints by examining the
3629 // denominator. Given that the denominator is positive, there are two
3630 // cases:
3631 //
3632 // 1. The numerator is positive. The result range is [0,C) and
3633 // [0,C) u< (1 << ceilLogBase2(C)).
3634 //
3635 // 2. The numerator is negative. Then the result range is (-C,0] and
3636 // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
3637 //
3638 // Thus a lower bound on the number of sign bits is `TyBits -
3639 // ceilLogBase2(C)`.
3640
3641 unsigned ResBits = TyBits - Denominator->ceilLogBase2();
3642 Tmp = std::max(a: Tmp, b: ResBits);
3643 }
3644 }
3645 return Tmp;
3646 }
3647
3648 case Instruction::AShr: {
3649 Tmp = ComputeNumSignBits(V: U->getOperand(i: 0), Depth: Depth + 1, Q);
3650 // ashr X, C -> adds C sign bits. Vectors too.
3651 const APInt *ShAmt;
3652 if (match(V: U->getOperand(i: 1), P: m_APInt(Res&: ShAmt))) {
3653 if (ShAmt->uge(RHS: TyBits))
3654 break; // Bad shift.
3655 unsigned ShAmtLimited = ShAmt->getZExtValue();
3656 Tmp += ShAmtLimited;
3657 if (Tmp > TyBits) Tmp = TyBits;
3658 }
3659 return Tmp;
3660 }
3661 case Instruction::Shl: {
3662 const APInt *ShAmt;
3663 if (match(V: U->getOperand(i: 1), P: m_APInt(Res&: ShAmt))) {
3664 // shl destroys sign bits.
3665 Tmp = ComputeNumSignBits(V: U->getOperand(i: 0), Depth: Depth + 1, Q);
3666 if (ShAmt->uge(RHS: TyBits) || // Bad shift.
3667 ShAmt->uge(RHS: Tmp)) break; // Shifted all sign bits out.
3668 Tmp2 = ShAmt->getZExtValue();
3669 return Tmp - Tmp2;
3670 }
3671 break;
3672 }
3673 case Instruction::And:
3674 case Instruction::Or:
3675 case Instruction::Xor: // NOT is handled here.
3676 // Logical binary ops preserve the number of sign bits at the worst.
3677 Tmp = ComputeNumSignBits(V: U->getOperand(i: 0), Depth: Depth + 1, Q);
3678 if (Tmp != 1) {
3679 Tmp2 = ComputeNumSignBits(V: U->getOperand(i: 1), Depth: Depth + 1, Q);
3680 FirstAnswer = std::min(a: Tmp, b: Tmp2);
3681 // We computed what we know about the sign bits as our first
3682 // answer. Now proceed to the generic code that uses
3683 // computeKnownBits, and pick whichever answer is better.
3684 }
3685 break;
3686
3687 case Instruction::Select: {
3688 // If we have a clamp pattern, we know that the number of sign bits will
3689 // be the minimum of the clamp min/max range.
3690 const Value *X;
3691 const APInt *CLow, *CHigh;
3692 if (isSignedMinMaxClamp(Select: U, In&: X, CLow, CHigh))
3693 return std::min(a: CLow->getNumSignBits(), b: CHigh->getNumSignBits());
3694
3695 Tmp = ComputeNumSignBits(V: U->getOperand(i: 1), Depth: Depth + 1, Q);
3696 if (Tmp == 1) break;
3697 Tmp2 = ComputeNumSignBits(V: U->getOperand(i: 2), Depth: Depth + 1, Q);
3698 return std::min(a: Tmp, b: Tmp2);
3699 }
3700
3701 case Instruction::Add:
3702 // Add can have at most one carry bit. Thus we know that the output
3703 // is, at worst, one more bit than the inputs.
3704 Tmp = ComputeNumSignBits(V: U->getOperand(i: 0), Depth: Depth + 1, Q);
3705 if (Tmp == 1) break;
3706
3707 // Special case decrementing a value (ADD X, -1):
3708 if (const auto *CRHS = dyn_cast<Constant>(Val: U->getOperand(i: 1)))
3709 if (CRHS->isAllOnesValue()) {
3710 KnownBits Known(TyBits);
3711 computeKnownBits(V: U->getOperand(i: 0), Known, Depth: Depth + 1, Q);
3712
3713 // If the input is known to be 0 or 1, the output is 0/-1, which is
3714 // all sign bits set.
3715 if ((Known.Zero | 1).isAllOnes())
3716 return TyBits;
3717
3718 // If we are subtracting one from a positive number, there is no carry
3719 // out of the result.
3720 if (Known.isNonNegative())
3721 return Tmp;
3722 }
3723
3724 Tmp2 = ComputeNumSignBits(V: U->getOperand(i: 1), Depth: Depth + 1, Q);
3725 if (Tmp2 == 1) break;
3726 return std::min(a: Tmp, b: Tmp2) - 1;
3727
3728 case Instruction::Sub:
3729 Tmp2 = ComputeNumSignBits(V: U->getOperand(i: 1), Depth: Depth + 1, Q);
3730 if (Tmp2 == 1) break;
3731
3732 // Handle NEG.
3733 if (const auto *CLHS = dyn_cast<Constant>(Val: U->getOperand(i: 0)))
3734 if (CLHS->isNullValue()) {
3735 KnownBits Known(TyBits);
3736 computeKnownBits(V: U->getOperand(i: 1), Known, Depth: Depth + 1, Q);
3737 // If the input is known to be 0 or 1, the output is 0/-1, which is
3738 // all sign bits set.
3739 if ((Known.Zero | 1).isAllOnes())
3740 return TyBits;
3741
3742 // If the input is known to be positive (the sign bit is known clear),
3743 // the output of the NEG has the same number of sign bits as the
3744 // input.
3745 if (Known.isNonNegative())
3746 return Tmp2;
3747
3748 // Otherwise, we treat this like a SUB.
3749 }
3750
3751 // Sub can have at most one carry bit. Thus we know that the output
3752 // is, at worst, one more bit than the inputs.
3753 Tmp = ComputeNumSignBits(V: U->getOperand(i: 0), Depth: Depth + 1, Q);
3754 if (Tmp == 1) break;
3755 return std::min(a: Tmp, b: Tmp2) - 1;
3756
3757 case Instruction::Mul: {
3758 // The output of the Mul can be at most twice the valid bits in the
3759 // inputs.
3760 unsigned SignBitsOp0 = ComputeNumSignBits(V: U->getOperand(i: 0), Depth: Depth + 1, Q);
3761 if (SignBitsOp0 == 1) break;
3762 unsigned SignBitsOp1 = ComputeNumSignBits(V: U->getOperand(i: 1), Depth: Depth + 1, Q);
3763 if (SignBitsOp1 == 1) break;
3764 unsigned OutValidBits =
3765 (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1);
3766 return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1;
3767 }
3768
3769 case Instruction::PHI: {
3770 const PHINode *PN = cast<PHINode>(Val: U);
3771 unsigned NumIncomingValues = PN->getNumIncomingValues();
3772 // Don't analyze large in-degree PHIs.
3773 if (NumIncomingValues > 4) break;
3774 // Unreachable blocks may have zero-operand PHI nodes.
3775 if (NumIncomingValues == 0) break;
3776
3777 // Take the minimum of all incoming values. This can't infinitely loop
3778 // because of our depth threshold.
3779 SimplifyQuery RecQ = Q;
3780 Tmp = TyBits;
3781 for (unsigned i = 0, e = NumIncomingValues; i != e; ++i) {
3782 if (Tmp == 1) return Tmp;
3783 RecQ.CxtI = PN->getIncomingBlock(i)->getTerminator();
3784 Tmp = std::min(
3785 a: Tmp, b: ComputeNumSignBits(V: PN->getIncomingValue(i), Depth: Depth + 1, Q: RecQ));
3786 }
3787 return Tmp;
3788 }
3789
3790 case Instruction::Trunc: {
3791 // If the input contained enough sign bits that some remain after the
3792 // truncation, then we can make use of that. Otherwise we don't know
3793 // anything.
3794 Tmp = ComputeNumSignBits(V: U->getOperand(i: 0), Depth: Depth + 1, Q);
3795 unsigned OperandTyBits = U->getOperand(i: 0)->getType()->getScalarSizeInBits();
3796 if (Tmp > (OperandTyBits - TyBits))
3797 return Tmp - (OperandTyBits - TyBits);
3798
3799 return 1;
3800 }
3801
3802 case Instruction::ExtractElement:
3803 // Look through extract element. At the moment we keep this simple and
3804 // skip tracking the specific element. But at least we might find
3805 // information valid for all elements of the vector (for example if vector
3806 // is sign extended, shifted, etc).
3807 return ComputeNumSignBits(V: U->getOperand(i: 0), Depth: Depth + 1, Q);
3808
3809 case Instruction::ShuffleVector: {
3810 // Collect the minimum number of sign bits that are shared by every vector
3811 // element referenced by the shuffle.
3812 auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: U);
3813 if (!Shuf) {
3814 // FIXME: Add support for shufflevector constant expressions.
3815 return 1;
3816 }
3817 APInt DemandedLHS, DemandedRHS;
3818 // For undef elements, we don't know anything about the common state of
3819 // the shuffle result.
3820 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
3821 return 1;
3822 Tmp = std::numeric_limits<unsigned>::max();
3823 if (!!DemandedLHS) {
3824 const Value *LHS = Shuf->getOperand(i_nocapture: 0);
3825 Tmp = ComputeNumSignBits(V: LHS, DemandedElts: DemandedLHS, Depth: Depth + 1, Q);
3826 }
3827 // If we don't know anything, early out and try computeKnownBits
3828 // fall-back.
3829 if (Tmp == 1)
3830 break;
3831 if (!!DemandedRHS) {
3832 const Value *RHS = Shuf->getOperand(i_nocapture: 1);
3833 Tmp2 = ComputeNumSignBits(V: RHS, DemandedElts: DemandedRHS, Depth: Depth + 1, Q);
3834 Tmp = std::min(a: Tmp, b: Tmp2);
3835 }
3836 // If we don't know anything, early out and try computeKnownBits
3837 // fall-back.
3838 if (Tmp == 1)
3839 break;
3840 assert(Tmp <= TyBits && "Failed to determine minimum sign bits");
3841 return Tmp;
3842 }
3843 case Instruction::Call: {
3844 if (const auto *II = dyn_cast<IntrinsicInst>(Val: U)) {
3845 switch (II->getIntrinsicID()) {
3846 default: break;
3847 case Intrinsic::abs:
3848 Tmp = ComputeNumSignBits(V: U->getOperand(i: 0), Depth: Depth + 1, Q);
3849 if (Tmp == 1) break;
3850
3851 // Absolute value reduces number of sign bits by at most 1.
3852 return Tmp - 1;
3853 case Intrinsic::smin:
3854 case Intrinsic::smax: {
3855 const APInt *CLow, *CHigh;
3856 if (isSignedMinMaxIntrinsicClamp(II, CLow, CHigh))
3857 return std::min(a: CLow->getNumSignBits(), b: CHigh->getNumSignBits());
3858 }
3859 }
3860 }
3861 }
3862 }
3863 }
3864
3865 // Finally, if we can prove that the top bits of the result are 0's or 1's,
3866 // use this information.
3867
3868 // If we can examine all elements of a vector constant successfully, we're
3869 // done (we can't do any better than that). If not, keep trying.
3870 if (unsigned VecSignBits =
3871 computeNumSignBitsVectorConstant(V, DemandedElts, TyBits))
3872 return VecSignBits;
3873
3874 KnownBits Known(TyBits);
3875 computeKnownBits(V, DemandedElts, Known, Depth, Q);
3876
3877 // If we know that the sign bit is either zero or one, determine the number of
3878 // identical bits in the top of the input value.
3879 return std::max(a: FirstAnswer, b: Known.countMinSignBits());
3880}
3881
3882Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB,
3883 const TargetLibraryInfo *TLI) {
3884 const Function *F = CB.getCalledFunction();
3885 if (!F)
3886 return Intrinsic::not_intrinsic;
3887
3888 if (F->isIntrinsic())
3889 return F->getIntrinsicID();
3890
3891 // We are going to infer semantics of a library function based on mapping it
3892 // to an LLVM intrinsic. Check that the library function is available from
3893 // this callbase and in this environment.
3894 LibFunc Func;
3895 if (F->hasLocalLinkage() || !TLI || !TLI->getLibFunc(CB, F&: Func) ||
3896 !CB.onlyReadsMemory())
3897 return Intrinsic::not_intrinsic;
3898
3899 switch (Func) {
3900 default:
3901 break;
3902 case LibFunc_sin:
3903 case LibFunc_sinf:
3904 case LibFunc_sinl:
3905 return Intrinsic::sin;
3906 case LibFunc_cos:
3907 case LibFunc_cosf:
3908 case LibFunc_cosl:
3909 return Intrinsic::cos;
3910 case LibFunc_exp:
3911 case LibFunc_expf:
3912 case LibFunc_expl:
3913 return Intrinsic::exp;
3914 case LibFunc_exp2:
3915 case LibFunc_exp2f:
3916 case LibFunc_exp2l:
3917 return Intrinsic::exp2;
3918 case LibFunc_log:
3919 case LibFunc_logf:
3920 case LibFunc_logl:
3921 return Intrinsic::log;
3922 case LibFunc_log10:
3923 case LibFunc_log10f:
3924 case LibFunc_log10l:
3925 return Intrinsic::log10;
3926 case LibFunc_log2:
3927 case LibFunc_log2f:
3928 case LibFunc_log2l:
3929 return Intrinsic::log2;
3930 case LibFunc_fabs:
3931 case LibFunc_fabsf:
3932 case LibFunc_fabsl:
3933 return Intrinsic::fabs;
3934 case LibFunc_fmin:
3935 case LibFunc_fminf:
3936 case LibFunc_fminl:
3937 return Intrinsic::minnum;
3938 case LibFunc_fmax:
3939 case LibFunc_fmaxf:
3940 case LibFunc_fmaxl:
3941 return Intrinsic::maxnum;
3942 case LibFunc_copysign:
3943 case LibFunc_copysignf:
3944 case LibFunc_copysignl:
3945 return Intrinsic::copysign;
3946 case LibFunc_floor:
3947 case LibFunc_floorf:
3948 case LibFunc_floorl:
3949 return Intrinsic::floor;
3950 case LibFunc_ceil:
3951 case LibFunc_ceilf:
3952 case LibFunc_ceill:
3953 return Intrinsic::ceil;
3954 case LibFunc_trunc:
3955 case LibFunc_truncf:
3956 case LibFunc_truncl:
3957 return Intrinsic::trunc;
3958 case LibFunc_rint:
3959 case LibFunc_rintf:
3960 case LibFunc_rintl:
3961 return Intrinsic::rint;
3962 case LibFunc_nearbyint:
3963 case LibFunc_nearbyintf:
3964 case LibFunc_nearbyintl:
3965 return Intrinsic::nearbyint;
3966 case LibFunc_round:
3967 case LibFunc_roundf:
3968 case LibFunc_roundl:
3969 return Intrinsic::round;
3970 case LibFunc_roundeven:
3971 case LibFunc_roundevenf:
3972 case LibFunc_roundevenl:
3973 return Intrinsic::roundeven;
3974 case LibFunc_pow:
3975 case LibFunc_powf:
3976 case LibFunc_powl:
3977 return Intrinsic::pow;
3978 case LibFunc_sqrt:
3979 case LibFunc_sqrtf:
3980 case LibFunc_sqrtl:
3981 return Intrinsic::sqrt;
3982 }
3983
3984 return Intrinsic::not_intrinsic;
3985}
3986
3987/// Return true if it's possible to assume IEEE treatment of input denormals in
3988/// \p F for \p Val.
3989static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
3990 Ty = Ty->getScalarType();
3991 return F.getDenormalMode(FPType: Ty->getFltSemantics()).Input == DenormalMode::IEEE;
3992}
3993
3994static bool inputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) {
3995 Ty = Ty->getScalarType();
3996 DenormalMode Mode = F.getDenormalMode(FPType: Ty->getFltSemantics());
3997 return Mode.Input == DenormalMode::IEEE ||
3998 Mode.Input == DenormalMode::PositiveZero;
3999}
4000
4001static bool outputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) {
4002 Ty = Ty->getScalarType();
4003 DenormalMode Mode = F.getDenormalMode(FPType: Ty->getFltSemantics());
4004 return Mode.Output == DenormalMode::IEEE ||
4005 Mode.Output == DenormalMode::PositiveZero;
4006}
4007
4008bool KnownFPClass::isKnownNeverLogicalZero(const Function &F, Type *Ty) const {
4009 return isKnownNeverZero() &&
4010 (isKnownNeverSubnormal() || inputDenormalIsIEEE(F, Ty));
4011}
4012
4013bool KnownFPClass::isKnownNeverLogicalNegZero(const Function &F,
4014 Type *Ty) const {
4015 return isKnownNeverNegZero() &&
4016 (isKnownNeverNegSubnormal() || inputDenormalIsIEEEOrPosZero(F, Ty));
4017}
4018
4019bool KnownFPClass::isKnownNeverLogicalPosZero(const Function &F,
4020 Type *Ty) const {
4021 if (!isKnownNeverPosZero())
4022 return false;
4023
4024 // If we know there are no denormals, nothing can be flushed to zero.
4025 if (isKnownNeverSubnormal())
4026 return true;
4027
4028 DenormalMode Mode = F.getDenormalMode(FPType: Ty->getScalarType()->getFltSemantics());
4029 switch (Mode.Input) {
4030 case DenormalMode::IEEE:
4031 return true;
4032 case DenormalMode::PreserveSign:
4033 // Negative subnormal won't flush to +0
4034 return isKnownNeverPosSubnormal();
4035 case DenormalMode::PositiveZero:
4036 default:
4037 // Both positive and negative subnormal could flush to +0
4038 return false;
4039 }
4040
4041 llvm_unreachable("covered switch over denormal mode");
4042}
4043
4044void KnownFPClass::propagateDenormal(const KnownFPClass &Src, const Function &F,
4045 Type *Ty) {
4046 KnownFPClasses = Src.KnownFPClasses;
4047 // If we aren't assuming the source can't be a zero, we don't have to check if
4048 // a denormal input could be flushed.
4049 if (!Src.isKnownNeverPosZero() && !Src.isKnownNeverNegZero())
4050 return;
4051
4052 // If we know the input can't be a denormal, it can't be flushed to 0.
4053 if (Src.isKnownNeverSubnormal())
4054 return;
4055
4056 DenormalMode Mode = F.getDenormalMode(FPType: Ty->getScalarType()->getFltSemantics());
4057
4058 if (!Src.isKnownNeverPosSubnormal() && Mode != DenormalMode::getIEEE())
4059 KnownFPClasses |= fcPosZero;
4060
4061 if (!Src.isKnownNeverNegSubnormal() && Mode != DenormalMode::getIEEE()) {
4062 if (Mode != DenormalMode::getPositiveZero())
4063 KnownFPClasses |= fcNegZero;
4064
4065 if (Mode.Input == DenormalMode::PositiveZero ||
4066 Mode.Output == DenormalMode::PositiveZero ||
4067 Mode.Input == DenormalMode::Dynamic ||
4068 Mode.Output == DenormalMode::Dynamic)
4069 KnownFPClasses |= fcPosZero;
4070 }
4071}
4072
4073void KnownFPClass::propagateCanonicalizingSrc(const KnownFPClass &Src,
4074 const Function &F, Type *Ty) {
4075 propagateDenormal(Src, F, Ty);
4076 propagateNaN(Src, /*PreserveSign=*/true);
4077}
4078
4079/// Given an exploded icmp instruction, return true if the comparison only
4080/// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if
4081/// the result of the comparison is true when the input value is signed.
4082bool llvm::isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS,
4083 bool &TrueIfSigned) {
4084 switch (Pred) {
4085 case ICmpInst::ICMP_SLT: // True if LHS s< 0
4086 TrueIfSigned = true;
4087 return RHS.isZero();
4088 case ICmpInst::ICMP_SLE: // True if LHS s<= -1
4089 TrueIfSigned = true;
4090 return RHS.isAllOnes();
4091 case ICmpInst::ICMP_SGT: // True if LHS s> -1
4092 TrueIfSigned = false;
4093 return RHS.isAllOnes();
4094 case ICmpInst::ICMP_SGE: // True if LHS s>= 0
4095 TrueIfSigned = false;
4096 return RHS.isZero();
4097 case ICmpInst::ICMP_UGT:
4098 // True if LHS u> RHS and RHS == sign-bit-mask - 1
4099 TrueIfSigned = true;
4100 return RHS.isMaxSignedValue();
4101 case ICmpInst::ICMP_UGE:
4102 // True if LHS u>= RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
4103 TrueIfSigned = true;
4104 return RHS.isMinSignedValue();
4105 case ICmpInst::ICMP_ULT:
4106 // True if LHS u< RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
4107 TrueIfSigned = false;
4108 return RHS.isMinSignedValue();
4109 case ICmpInst::ICMP_ULE:
4110 // True if LHS u<= RHS and RHS == sign-bit-mask - 1
4111 TrueIfSigned = false;
4112 return RHS.isMaxSignedValue();
4113 default:
4114 return false;
4115 }
4116}
4117
4118/// Returns a pair of values, which if passed to llvm.is.fpclass, returns the
4119/// same result as an fcmp with the given operands.
4120std::pair<Value *, FPClassTest> llvm::fcmpToClassTest(FCmpInst::Predicate Pred,
4121 const Function &F,
4122 Value *LHS, Value *RHS,
4123 bool LookThroughSrc) {
4124 const APFloat *ConstRHS;
4125 if (!match(V: RHS, P: m_APFloatAllowPoison(Res&: ConstRHS)))
4126 return {nullptr, fcAllFlags};
4127
4128 return fcmpToClassTest(Pred, F, LHS, ConstRHS, LookThroughSrc);
4129}
4130
4131std::pair<Value *, FPClassTest>
4132llvm::fcmpToClassTest(FCmpInst::Predicate Pred, const Function &F, Value *LHS,
4133 const APFloat *ConstRHS, bool LookThroughSrc) {
4134
4135 auto [Src, ClassIfTrue, ClassIfFalse] =
4136 fcmpImpliesClass(Pred, F, LHS, RHS: *ConstRHS, LookThroughSrc);
4137 if (Src && ClassIfTrue == ~ClassIfFalse)
4138 return {Src, ClassIfTrue};
4139 return {nullptr, fcAllFlags};
4140}
4141
4142/// Return the return value for fcmpImpliesClass for a compare that produces an
4143/// exact class test.
4144static std::tuple<Value *, FPClassTest, FPClassTest> exactClass(Value *V,
4145 FPClassTest M) {
4146 return {V, M, ~M};
4147}
4148
4149std::tuple<Value *, FPClassTest, FPClassTest>
4150llvm::fcmpImpliesClass(CmpInst::Predicate Pred, const Function &F, Value *LHS,
4151 FPClassTest RHSClass, bool LookThroughSrc) {
4152 assert(RHSClass != fcNone);
4153 Value *Src = LHS;
4154
4155 if (Pred == FCmpInst::FCMP_TRUE)
4156 return exactClass(V: Src, M: fcAllFlags);
4157
4158 if (Pred == FCmpInst::FCMP_FALSE)
4159 return exactClass(V: Src, M: fcNone);
4160
4161 const FPClassTest OrigClass = RHSClass;
4162
4163 const bool IsNegativeRHS = (RHSClass & fcNegative) == RHSClass;
4164 const bool IsPositiveRHS = (RHSClass & fcPositive) == RHSClass;
4165 const bool IsNaN = (RHSClass & ~fcNan) == fcNone;
4166
4167 if (IsNaN) {
4168 // fcmp o__ x, nan -> false
4169 // fcmp u__ x, nan -> true
4170 return exactClass(V: Src, M: CmpInst::isOrdered(predicate: Pred) ? fcNone : fcAllFlags);
4171 }
4172
4173 // fcmp ord x, zero|normal|subnormal|inf -> ~fcNan
4174 if (Pred == FCmpInst::FCMP_ORD)
4175 return exactClass(V: Src, M: ~fcNan);
4176
4177 // fcmp uno x, zero|normal|subnormal|inf -> fcNan
4178 if (Pred == FCmpInst::FCMP_UNO)
4179 return exactClass(V: Src, M: fcNan);
4180
4181 const bool IsFabs = LookThroughSrc && match(V: LHS, P: m_FAbs(Op0: m_Value(V&: Src)));
4182 if (IsFabs)
4183 RHSClass = llvm::inverse_fabs(Mask: RHSClass);
4184
4185 const bool IsZero = (OrigClass & fcZero) == OrigClass;
4186 if (IsZero) {
4187 assert(Pred != FCmpInst::FCMP_ORD && Pred != FCmpInst::FCMP_UNO);
4188 // Compares with fcNone are only exactly equal to fcZero if input denormals
4189 // are not flushed.
4190 // TODO: Handle DAZ by expanding masks to cover subnormal cases.
4191 if (!inputDenormalIsIEEE(F, Ty: LHS->getType()))
4192 return {nullptr, fcAllFlags, fcAllFlags};
4193
4194 switch (Pred) {
4195 case FCmpInst::FCMP_OEQ: // Match x == 0.0
4196 return exactClass(V: Src, M: fcZero);
4197 case FCmpInst::FCMP_UEQ: // Match isnan(x) || (x == 0.0)
4198 return exactClass(V: Src, M: fcZero | fcNan);
4199 case FCmpInst::FCMP_UNE: // Match (x != 0.0)
4200 return exactClass(V: Src, M: ~fcZero);
4201 case FCmpInst::FCMP_ONE: // Match !isnan(x) && x != 0.0
4202 return exactClass(V: Src, M: ~fcNan & ~fcZero);
4203 case FCmpInst::FCMP_ORD:
4204 // Canonical form of ord/uno is with a zero. We could also handle
4205 // non-canonical other non-NaN constants or LHS == RHS.
4206 return exactClass(V: Src, M: ~fcNan);
4207 case FCmpInst::FCMP_UNO:
4208 return exactClass(V: Src, M: fcNan);
4209 case FCmpInst::FCMP_OGT: // x > 0
4210 return exactClass(V: Src, M: fcPosSubnormal | fcPosNormal | fcPosInf);
4211 case FCmpInst::FCMP_UGT: // isnan(x) || x > 0
4212 return exactClass(V: Src, M: fcPosSubnormal | fcPosNormal | fcPosInf | fcNan);
4213 case FCmpInst::FCMP_OGE: // x >= 0
4214 return exactClass(V: Src, M: fcPositive | fcNegZero);
4215 case FCmpInst::FCMP_UGE: // isnan(x) || x >= 0
4216 return exactClass(V: Src, M: fcPositive | fcNegZero | fcNan);
4217 case FCmpInst::FCMP_OLT: // x < 0
4218 return exactClass(V: Src, M: fcNegSubnormal | fcNegNormal | fcNegInf);
4219 case FCmpInst::FCMP_ULT: // isnan(x) || x < 0
4220 return exactClass(V: Src, M: fcNegSubnormal | fcNegNormal | fcNegInf | fcNan);
4221 case FCmpInst::FCMP_OLE: // x <= 0
4222 return exactClass(V: Src, M: fcNegative | fcPosZero);
4223 case FCmpInst::FCMP_ULE: // isnan(x) || x <= 0
4224 return exactClass(V: Src, M: fcNegative | fcPosZero | fcNan);
4225 default:
4226 llvm_unreachable("all compare types are handled");
4227 }
4228
4229 return {nullptr, fcAllFlags, fcAllFlags};
4230 }
4231
4232 const bool IsDenormalRHS = (OrigClass & fcSubnormal) == OrigClass;
4233
4234 const bool IsInf = (OrigClass & fcInf) == OrigClass;
4235 if (IsInf) {
4236 FPClassTest Mask = fcAllFlags;
4237
4238 switch (Pred) {
4239 case FCmpInst::FCMP_OEQ:
4240 case FCmpInst::FCMP_UNE: {
4241 // Match __builtin_isinf patterns
4242 //
4243 // fcmp oeq x, +inf -> is_fpclass x, fcPosInf
4244 // fcmp oeq fabs(x), +inf -> is_fpclass x, fcInf
4245 // fcmp oeq x, -inf -> is_fpclass x, fcNegInf
4246 // fcmp oeq fabs(x), -inf -> is_fpclass x, 0 -> false
4247 //
4248 // fcmp une x, +inf -> is_fpclass x, ~fcPosInf
4249 // fcmp une fabs(x), +inf -> is_fpclass x, ~fcInf
4250 // fcmp une x, -inf -> is_fpclass x, ~fcNegInf
4251 // fcmp une fabs(x), -inf -> is_fpclass x, fcAllFlags -> true
4252 if (IsNegativeRHS) {
4253 Mask = fcNegInf;
4254 if (IsFabs)
4255 Mask = fcNone;
4256 } else {
4257 Mask = fcPosInf;
4258 if (IsFabs)
4259 Mask |= fcNegInf;
4260 }
4261 break;
4262 }
4263 case FCmpInst::FCMP_ONE:
4264 case FCmpInst::FCMP_UEQ: {
4265 // Match __builtin_isinf patterns
4266 // fcmp one x, -inf -> is_fpclass x, fcNegInf
4267 // fcmp one fabs(x), -inf -> is_fpclass x, ~fcNegInf & ~fcNan
4268 // fcmp one x, +inf -> is_fpclass x, ~fcNegInf & ~fcNan
4269 // fcmp one fabs(x), +inf -> is_fpclass x, ~fcInf & fcNan
4270 //
4271 // fcmp ueq x, +inf -> is_fpclass x, fcPosInf|fcNan
4272 // fcmp ueq (fabs x), +inf -> is_fpclass x, fcInf|fcNan
4273 // fcmp ueq x, -inf -> is_fpclass x, fcNegInf|fcNan
4274 // fcmp ueq fabs(x), -inf -> is_fpclass x, fcNan
4275 if (IsNegativeRHS) {
4276 Mask = ~fcNegInf & ~fcNan;
4277 if (IsFabs)
4278 Mask = ~fcNan;
4279 } else {
4280 Mask = ~fcPosInf & ~fcNan;
4281 if (IsFabs)
4282 Mask &= ~fcNegInf;
4283 }
4284
4285 break;
4286 }
4287 case FCmpInst::FCMP_OLT:
4288 case FCmpInst::FCMP_UGE: {
4289 if (IsNegativeRHS) {
4290 // No value is ordered and less than negative infinity.
4291 // All values are unordered with or at least negative infinity.
4292 // fcmp olt x, -inf -> false
4293 // fcmp uge x, -inf -> true
4294 Mask = fcNone;
4295 break;
4296 }
4297
4298 // fcmp olt fabs(x), +inf -> fcFinite
4299 // fcmp uge fabs(x), +inf -> ~fcFinite
4300 // fcmp olt x, +inf -> fcFinite|fcNegInf
4301 // fcmp uge x, +inf -> ~(fcFinite|fcNegInf)
4302 Mask = fcFinite;
4303 if (!IsFabs)
4304 Mask |= fcNegInf;
4305 break;
4306 }
4307 case FCmpInst::FCMP_OGE:
4308 case FCmpInst::FCMP_ULT: {
4309 if (IsNegativeRHS) {
4310 // fcmp oge x, -inf -> ~fcNan
4311 // fcmp oge fabs(x), -inf -> ~fcNan
4312 // fcmp ult x, -inf -> fcNan
4313 // fcmp ult fabs(x), -inf -> fcNan
4314 Mask = ~fcNan;
4315 break;
4316 }
4317
4318 // fcmp oge fabs(x), +inf -> fcInf
4319 // fcmp oge x, +inf -> fcPosInf
4320 // fcmp ult fabs(x), +inf -> ~fcInf
4321 // fcmp ult x, +inf -> ~fcPosInf
4322 Mask = fcPosInf;
4323 if (IsFabs)
4324 Mask |= fcNegInf;
4325 break;
4326 }
4327 case FCmpInst::FCMP_OGT:
4328 case FCmpInst::FCMP_ULE: {
4329 if (IsNegativeRHS) {
4330 // fcmp ogt x, -inf -> fcmp one x, -inf
4331 // fcmp ogt fabs(x), -inf -> fcmp ord x, x
4332 // fcmp ule x, -inf -> fcmp ueq x, -inf
4333 // fcmp ule fabs(x), -inf -> fcmp uno x, x
4334 Mask = IsFabs ? ~fcNan : ~(fcNegInf | fcNan);
4335 break;
4336 }
4337
4338 // No value is ordered and greater than infinity.
4339 Mask = fcNone;
4340 break;
4341 }
4342 case FCmpInst::FCMP_OLE:
4343 case FCmpInst::FCMP_UGT: {
4344 if (IsNegativeRHS) {
4345 Mask = IsFabs ? fcNone : fcNegInf;
4346 break;
4347 }
4348
4349 // fcmp ole x, +inf -> fcmp ord x, x
4350 // fcmp ole fabs(x), +inf -> fcmp ord x, x
4351 // fcmp ole x, -inf -> fcmp oeq x, -inf
4352 // fcmp ole fabs(x), -inf -> false
4353 Mask = ~fcNan;
4354 break;
4355 }
4356 default:
4357 llvm_unreachable("all compare types are handled");
4358 }
4359
4360 // Invert the comparison for the unordered cases.
4361 if (FCmpInst::isUnordered(predicate: Pred))
4362 Mask = ~Mask;
4363
4364 return exactClass(V: Src, M: Mask);
4365 }
4366
4367 if (Pred == FCmpInst::FCMP_OEQ)
4368 return {Src, RHSClass, fcAllFlags};
4369
4370 if (Pred == FCmpInst::FCMP_UEQ) {
4371 FPClassTest Class = RHSClass | fcNan;
4372 return {Src, Class, ~fcNan};
4373 }
4374
4375 if (Pred == FCmpInst::FCMP_ONE)
4376 return {Src, ~fcNan, RHSClass | fcNan};
4377
4378 if (Pred == FCmpInst::FCMP_UNE)
4379 return {Src, fcAllFlags, RHSClass};
4380
4381 assert((RHSClass == fcNone || RHSClass == fcPosNormal ||
4382 RHSClass == fcNegNormal || RHSClass == fcNormal ||
4383 RHSClass == fcPosSubnormal || RHSClass == fcNegSubnormal ||
4384 RHSClass == fcSubnormal) &&
4385 "should have been recognized as an exact class test");
4386
4387 if (IsNegativeRHS) {
4388 // TODO: Handle fneg(fabs)
4389 if (IsFabs) {
4390 // fabs(x) o> -k -> fcmp ord x, x
4391 // fabs(x) u> -k -> true
4392 // fabs(x) o< -k -> false
4393 // fabs(x) u< -k -> fcmp uno x, x
4394 switch (Pred) {
4395 case FCmpInst::FCMP_OGT:
4396 case FCmpInst::FCMP_OGE:
4397 return {Src, ~fcNan, fcNan};
4398 case FCmpInst::FCMP_UGT:
4399 case FCmpInst::FCMP_UGE:
4400 return {Src, fcAllFlags, fcNone};
4401 case FCmpInst::FCMP_OLT:
4402 case FCmpInst::FCMP_OLE:
4403 return {Src, fcNone, fcAllFlags};
4404 case FCmpInst::FCMP_ULT:
4405 case FCmpInst::FCMP_ULE:
4406 return {Src, fcNan, ~fcNan};
4407 default:
4408 break;
4409 }
4410
4411 return {nullptr, fcAllFlags, fcAllFlags};
4412 }
4413
4414 FPClassTest ClassesLE = fcNegInf | fcNegNormal;
4415 FPClassTest ClassesGE = fcPositive | fcNegZero | fcNegSubnormal;
4416
4417 if (IsDenormalRHS)
4418 ClassesLE |= fcNegSubnormal;
4419 else
4420 ClassesGE |= fcNegNormal;
4421
4422 switch (Pred) {
4423 case FCmpInst::FCMP_OGT:
4424 case FCmpInst::FCMP_OGE:
4425 return {Src, ClassesGE, ~ClassesGE | RHSClass};
4426 case FCmpInst::FCMP_UGT:
4427 case FCmpInst::FCMP_UGE:
4428 return {Src, ClassesGE | fcNan, ~(ClassesGE | fcNan) | RHSClass};
4429 case FCmpInst::FCMP_OLT:
4430 case FCmpInst::FCMP_OLE:
4431 return {Src, ClassesLE, ~ClassesLE | RHSClass};
4432 case FCmpInst::FCMP_ULT:
4433 case FCmpInst::FCMP_ULE:
4434 return {Src, ClassesLE | fcNan, ~(ClassesLE | fcNan) | RHSClass};
4435 default:
4436 break;
4437 }
4438 } else if (IsPositiveRHS) {
4439 FPClassTest ClassesGE = fcPosNormal | fcPosInf;
4440 FPClassTest ClassesLE = fcNegative | fcPosZero | fcPosSubnormal;
4441 if (IsDenormalRHS)
4442 ClassesGE |= fcPosSubnormal;
4443 else
4444 ClassesLE |= fcPosNormal;
4445
4446 if (IsFabs) {
4447 ClassesGE = llvm::inverse_fabs(Mask: ClassesGE);
4448 ClassesLE = llvm::inverse_fabs(Mask: ClassesLE);
4449 }
4450
4451 switch (Pred) {
4452 case FCmpInst::FCMP_OGT:
4453 case FCmpInst::FCMP_OGE:
4454 return {Src, ClassesGE, ~ClassesGE | RHSClass};
4455 case FCmpInst::FCMP_UGT:
4456 case FCmpInst::FCMP_UGE:
4457 return {Src, ClassesGE | fcNan, ~(ClassesGE | fcNan) | RHSClass};
4458 case FCmpInst::FCMP_OLT:
4459 case FCmpInst::FCMP_OLE:
4460 return {Src, ClassesLE, ~ClassesLE | RHSClass};
4461 case FCmpInst::FCMP_ULT:
4462 case FCmpInst::FCMP_ULE:
4463 return {Src, ClassesLE | fcNan, ~(ClassesLE | fcNan) | RHSClass};
4464 default:
4465 break;
4466 }
4467 }
4468
4469 return {nullptr, fcAllFlags, fcAllFlags};
4470}
4471
4472std::tuple<Value *, FPClassTest, FPClassTest>
4473llvm::fcmpImpliesClass(CmpInst::Predicate Pred, const Function &F, Value *LHS,
4474 const APFloat &ConstRHS, bool LookThroughSrc) {
4475 // We can refine checks against smallest normal / largest denormal to an
4476 // exact class test.
4477 if (!ConstRHS.isNegative() && ConstRHS.isSmallestNormalized()) {
4478 Value *Src = LHS;
4479 const bool IsFabs = LookThroughSrc && match(V: LHS, P: m_FAbs(Op0: m_Value(V&: Src)));
4480
4481 FPClassTest Mask;
4482 // Match pattern that's used in __builtin_isnormal.
4483 switch (Pred) {
4484 case FCmpInst::FCMP_OLT:
4485 case FCmpInst::FCMP_UGE: {
4486 // fcmp olt x, smallest_normal -> fcNegInf|fcNegNormal|fcSubnormal|fcZero
4487 // fcmp olt fabs(x), smallest_normal -> fcSubnormal|fcZero
4488 // fcmp uge x, smallest_normal -> fcNan|fcPosNormal|fcPosInf
4489 // fcmp uge fabs(x), smallest_normal -> ~(fcSubnormal|fcZero)
4490 Mask = fcZero | fcSubnormal;
4491 if (!IsFabs)
4492 Mask |= fcNegNormal | fcNegInf;
4493
4494 break;
4495 }
4496 case FCmpInst::FCMP_OGE:
4497 case FCmpInst::FCMP_ULT: {
4498 // fcmp oge x, smallest_normal -> fcPosNormal | fcPosInf
4499 // fcmp oge fabs(x), smallest_normal -> fcInf | fcNormal
4500 // fcmp ult x, smallest_normal -> ~(fcPosNormal | fcPosInf)
4501 // fcmp ult fabs(x), smallest_normal -> ~(fcInf | fcNormal)
4502 Mask = fcPosInf | fcPosNormal;
4503 if (IsFabs)
4504 Mask |= fcNegInf | fcNegNormal;
4505 break;
4506 }
4507 default:
4508 return fcmpImpliesClass(Pred, F, LHS, RHSClass: ConstRHS.classify(),
4509 LookThroughSrc);
4510 }
4511
4512 // Invert the comparison for the unordered cases.
4513 if (FCmpInst::isUnordered(predicate: Pred))
4514 Mask = ~Mask;
4515
4516 return exactClass(V: Src, M: Mask);
4517 }
4518
4519 return fcmpImpliesClass(Pred, F, LHS, RHSClass: ConstRHS.classify(), LookThroughSrc);
4520}
4521
4522std::tuple<Value *, FPClassTest, FPClassTest>
4523llvm::fcmpImpliesClass(CmpInst::Predicate Pred, const Function &F, Value *LHS,
4524 Value *RHS, bool LookThroughSrc) {
4525 const APFloat *ConstRHS;
4526 if (!match(V: RHS, P: m_APFloatAllowPoison(Res&: ConstRHS)))
4527 return {nullptr, fcAllFlags, fcAllFlags};
4528
4529 // TODO: Just call computeKnownFPClass for RHS to handle non-constants.
4530 return fcmpImpliesClass(Pred, F, LHS, ConstRHS: *ConstRHS, LookThroughSrc);
4531}
4532
4533static void computeKnownFPClassFromCond(const Value *V, Value *Cond,
4534 bool CondIsTrue,
4535 const Instruction *CxtI,
4536 KnownFPClass &KnownFromContext) {
4537 CmpInst::Predicate Pred;
4538 Value *LHS;
4539 uint64_t ClassVal = 0;
4540 const APFloat *CRHS;
4541 const APInt *RHS;
4542 if (match(V: Cond, P: m_FCmp(Pred, L: m_Value(V&: LHS), R: m_APFloat(Res&: CRHS)))) {
4543 auto [CmpVal, MaskIfTrue, MaskIfFalse] = fcmpImpliesClass(
4544 Pred, F: *CxtI->getParent()->getParent(), LHS, ConstRHS: *CRHS, LookThroughSrc: LHS != V);
4545 if (CmpVal == V)
4546 KnownFromContext.knownNot(RuleOut: ~(CondIsTrue ? MaskIfTrue : MaskIfFalse));
4547 } else if (match(Cond, m_Intrinsic<Intrinsic::is_fpclass>(
4548 m_Value(LHS), m_ConstantInt(ClassVal)))) {
4549 FPClassTest Mask = static_cast<FPClassTest>(ClassVal);
4550 KnownFromContext.knownNot(RuleOut: CondIsTrue ? ~Mask : Mask);
4551 } else if (match(V: Cond, P: m_ICmp(Pred, L: m_ElementWiseBitCast(Op: m_Value(V&: LHS)),
4552 R: m_APInt(Res&: RHS)))) {
4553 bool TrueIfSigned;
4554 if (!isSignBitCheck(Pred, RHS: *RHS, TrueIfSigned))
4555 return;
4556 if (TrueIfSigned == CondIsTrue)
4557 KnownFromContext.signBitMustBeOne();
4558 else
4559 KnownFromContext.signBitMustBeZero();
4560 }
4561}
4562
4563static KnownFPClass computeKnownFPClassFromContext(const Value *V,
4564 const SimplifyQuery &Q) {
4565 KnownFPClass KnownFromContext;
4566
4567 if (!Q.CxtI)
4568 return KnownFromContext;
4569
4570 if (Q.DC && Q.DT) {
4571 // Handle dominating conditions.
4572 for (BranchInst *BI : Q.DC->conditionsFor(V)) {
4573 Value *Cond = BI->getCondition();
4574
4575 BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(i: 0));
4576 if (Q.DT->dominates(BBE: Edge0, BB: Q.CxtI->getParent()))
4577 computeKnownFPClassFromCond(V, Cond, /*CondIsTrue=*/true, CxtI: Q.CxtI,
4578 KnownFromContext);
4579
4580 BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(i: 1));
4581 if (Q.DT->dominates(BBE: Edge1, BB: Q.CxtI->getParent()))
4582 computeKnownFPClassFromCond(V, Cond, /*CondIsTrue=*/false, CxtI: Q.CxtI,
4583 KnownFromContext);
4584 }
4585 }
4586
4587 if (!Q.AC)
4588 return KnownFromContext;
4589
4590 // Try to restrict the floating-point classes based on information from
4591 // assumptions.
4592 for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
4593 if (!AssumeVH)
4594 continue;
4595 CallInst *I = cast<CallInst>(Val&: AssumeVH);
4596
4597 assert(I->getFunction() == Q.CxtI->getParent()->getParent() &&
4598 "Got assumption for the wrong function!");
4599 assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
4600 "must be an assume intrinsic");
4601
4602 if (!isValidAssumeForContext(Inv: I, CxtI: Q.CxtI, DT: Q.DT))
4603 continue;
4604
4605 computeKnownFPClassFromCond(V, Cond: I->getArgOperand(i: 0), /*CondIsTrue=*/true,
4606 CxtI: Q.CxtI, KnownFromContext);
4607 }
4608
4609 return KnownFromContext;
4610}
4611
4612void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
4613 FPClassTest InterestedClasses, KnownFPClass &Known,
4614 unsigned Depth, const SimplifyQuery &Q);
4615
4616static void computeKnownFPClass(const Value *V, KnownFPClass &Known,
4617 FPClassTest InterestedClasses, unsigned Depth,
4618 const SimplifyQuery &Q) {
4619 auto *FVTy = dyn_cast<FixedVectorType>(Val: V->getType());
4620 APInt DemandedElts =
4621 FVTy ? APInt::getAllOnes(numBits: FVTy->getNumElements()) : APInt(1, 1);
4622 computeKnownFPClass(V, DemandedElts, InterestedClasses, Known, Depth, Q);
4623}
4624
4625static void computeKnownFPClassForFPTrunc(const Operator *Op,
4626 const APInt &DemandedElts,
4627 FPClassTest InterestedClasses,
4628 KnownFPClass &Known, unsigned Depth,
4629 const SimplifyQuery &Q) {
4630 if ((InterestedClasses &
4631 (KnownFPClass::OrderedLessThanZeroMask | fcNan)) == fcNone)
4632 return;
4633
4634 KnownFPClass KnownSrc;
4635 computeKnownFPClass(V: Op->getOperand(i: 0), DemandedElts, InterestedClasses,
4636 Known&: KnownSrc, Depth: Depth + 1, Q);
4637
4638 // Sign should be preserved
4639 // TODO: Handle cannot be ordered greater than zero
4640 if (KnownSrc.cannotBeOrderedLessThanZero())
4641 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
4642
4643 Known.propagateNaN(Src: KnownSrc, PreserveSign: true);
4644
4645 // Infinity needs a range check.
4646}
4647
4648void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
4649 FPClassTest InterestedClasses, KnownFPClass &Known,
4650 unsigned Depth, const SimplifyQuery &Q) {
4651 assert(Known.isUnknown() && "should not be called with known information");
4652
4653 if (!DemandedElts) {
4654 // No demanded elts, better to assume we don't know anything.
4655 Known.resetAll();
4656 return;
4657 }
4658
4659 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
4660
4661 if (auto *CFP = dyn_cast<ConstantFP>(Val: V)) {
4662 Known.KnownFPClasses = CFP->getValueAPF().classify();
4663 Known.SignBit = CFP->isNegative();
4664 return;
4665 }
4666
4667 if (isa<ConstantAggregateZero>(Val: V)) {
4668 Known.KnownFPClasses = fcPosZero;
4669 Known.SignBit = false;
4670 return;
4671 }
4672
4673 if (isa<PoisonValue>(Val: V)) {
4674 Known.KnownFPClasses = fcNone;
4675 Known.SignBit = false;
4676 return;
4677 }
4678
4679 // Try to handle fixed width vector constants
4680 auto *VFVTy = dyn_cast<FixedVectorType>(Val: V->getType());
4681 const Constant *CV = dyn_cast<Constant>(Val: V);
4682 if (VFVTy && CV) {
4683 Known.KnownFPClasses = fcNone;
4684 bool SignBitAllZero = true;
4685 bool SignBitAllOne = true;
4686
4687 // For vectors, verify that each element is not NaN.
4688 unsigned NumElts = VFVTy->getNumElements();
4689 for (unsigned i = 0; i != NumElts; ++i) {
4690 if (!DemandedElts[i])
4691 continue;
4692
4693 Constant *Elt = CV->getAggregateElement(Elt: i);
4694 if (!Elt) {
4695 Known = KnownFPClass();
4696 return;
4697 }
4698 if (isa<UndefValue>(Val: Elt))
4699 continue;
4700 auto *CElt = dyn_cast<ConstantFP>(Val: Elt);
4701 if (!CElt) {
4702 Known = KnownFPClass();
4703 return;
4704 }
4705
4706 const APFloat &C = CElt->getValueAPF();
4707 Known.KnownFPClasses |= C.classify();
4708 if (C.isNegative())
4709 SignBitAllZero = false;
4710 else
4711 SignBitAllOne = false;
4712 }
4713 if (SignBitAllOne != SignBitAllZero)
4714 Known.SignBit = SignBitAllOne;
4715 return;
4716 }
4717
4718 FPClassTest KnownNotFromFlags = fcNone;
4719 if (const auto *CB = dyn_cast<CallBase>(Val: V))
4720 KnownNotFromFlags |= CB->getRetNoFPClass();
4721 else if (const auto *Arg = dyn_cast<Argument>(Val: V))
4722 KnownNotFromFlags |= Arg->getNoFPClass();
4723
4724 const Operator *Op = dyn_cast<Operator>(Val: V);
4725 if (const FPMathOperator *FPOp = dyn_cast_or_null<FPMathOperator>(Val: Op)) {
4726 if (FPOp->hasNoNaNs())
4727 KnownNotFromFlags |= fcNan;
4728 if (FPOp->hasNoInfs())
4729 KnownNotFromFlags |= fcInf;
4730 }
4731
4732 KnownFPClass AssumedClasses = computeKnownFPClassFromContext(V, Q);
4733 KnownNotFromFlags |= ~AssumedClasses.KnownFPClasses;
4734
4735 // We no longer need to find out about these bits from inputs if we can
4736 // assume this from flags/attributes.
4737 InterestedClasses &= ~KnownNotFromFlags;
4738
4739 auto ClearClassesFromFlags = make_scope_exit(F: [=, &Known] {
4740 Known.knownNot(RuleOut: KnownNotFromFlags);
4741 if (!Known.SignBit && AssumedClasses.SignBit) {
4742 if (*AssumedClasses.SignBit)
4743 Known.signBitMustBeOne();
4744 else
4745 Known.signBitMustBeZero();
4746 }
4747 });
4748
4749 if (!Op)
4750 return;
4751
4752 // All recursive calls that increase depth must come after this.
4753 if (Depth == MaxAnalysisRecursionDepth)
4754 return;
4755
4756 const unsigned Opc = Op->getOpcode();
4757 switch (Opc) {
4758 case Instruction::FNeg: {
4759 computeKnownFPClass(V: Op->getOperand(i: 0), DemandedElts, InterestedClasses,
4760 Known, Depth: Depth + 1, Q);
4761 Known.fneg();
4762 break;
4763 }
4764 case Instruction::Select: {
4765 Value *Cond = Op->getOperand(i: 0);
4766 Value *LHS = Op->getOperand(i: 1);
4767 Value *RHS = Op->getOperand(i: 2);
4768
4769 FPClassTest FilterLHS = fcAllFlags;
4770 FPClassTest FilterRHS = fcAllFlags;
4771
4772 Value *TestedValue = nullptr;
4773 FPClassTest MaskIfTrue = fcAllFlags;
4774 FPClassTest MaskIfFalse = fcAllFlags;
4775 uint64_t ClassVal = 0;
4776 const Function *F = cast<Instruction>(Val: Op)->getFunction();
4777 CmpInst::Predicate Pred;
4778 Value *CmpLHS, *CmpRHS;
4779 if (F && match(V: Cond, P: m_FCmp(Pred, L: m_Value(V&: CmpLHS), R: m_Value(V&: CmpRHS)))) {
4780 // If the select filters out a value based on the class, it no longer
4781 // participates in the class of the result
4782
4783 // TODO: In some degenerate cases we can infer something if we try again
4784 // without looking through sign operations.
4785 bool LookThroughFAbsFNeg = CmpLHS != LHS && CmpLHS != RHS;
4786 std::tie(args&: TestedValue, args&: MaskIfTrue, args&: MaskIfFalse) =
4787 fcmpImpliesClass(Pred, F: *F, LHS: CmpLHS, RHS: CmpRHS, LookThroughSrc: LookThroughFAbsFNeg);
4788 } else if (match(Cond,
4789 m_Intrinsic<Intrinsic::is_fpclass>(
4790 m_Value(TestedValue), m_ConstantInt(ClassVal)))) {
4791 FPClassTest TestedMask = static_cast<FPClassTest>(ClassVal);
4792 MaskIfTrue = TestedMask;
4793 MaskIfFalse = ~TestedMask;
4794 }
4795
4796 if (TestedValue == LHS) {
4797 // match !isnan(x) ? x : y
4798 FilterLHS = MaskIfTrue;
4799 } else if (TestedValue == RHS) { // && IsExactClass
4800 // match !isnan(x) ? y : x
4801 FilterRHS = MaskIfFalse;
4802 }
4803
4804 KnownFPClass Known2;
4805 computeKnownFPClass(V: LHS, DemandedElts, InterestedClasses: InterestedClasses & FilterLHS, Known,
4806 Depth: Depth + 1, Q);
4807 Known.KnownFPClasses &= FilterLHS;
4808
4809 computeKnownFPClass(V: RHS, DemandedElts, InterestedClasses: InterestedClasses & FilterRHS,
4810 Known&: Known2, Depth: Depth + 1, Q);
4811 Known2.KnownFPClasses &= FilterRHS;
4812
4813 Known |= Known2;
4814 break;
4815 }
4816 case Instruction::Call: {
4817 const CallInst *II = cast<CallInst>(Val: Op);
4818 const Intrinsic::ID IID = II->getIntrinsicID();
4819 switch (IID) {
4820 case Intrinsic::fabs: {
4821 if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) {
4822 // If we only care about the sign bit we don't need to inspect the
4823 // operand.
4824 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts,
4825 InterestedClasses, Known, Depth: Depth + 1, Q);
4826 }
4827
4828 Known.fabs();
4829 break;
4830 }
4831 case Intrinsic::copysign: {
4832 KnownFPClass KnownSign;
4833
4834 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts, InterestedClasses,
4835 Known, Depth: Depth + 1, Q);
4836 computeKnownFPClass(V: II->getArgOperand(i: 1), DemandedElts, InterestedClasses,
4837 Known&: KnownSign, Depth: Depth + 1, Q);
4838 Known.copysign(Sign: KnownSign);
4839 break;
4840 }
4841 case Intrinsic::fma:
4842 case Intrinsic::fmuladd: {
4843 if ((InterestedClasses & fcNegative) == fcNone)
4844 break;
4845
4846 if (II->getArgOperand(i: 0) != II->getArgOperand(i: 1))
4847 break;
4848
4849 // The multiply cannot be -0 and therefore the add can't be -0
4850 Known.knownNot(RuleOut: fcNegZero);
4851
4852 // x * x + y is non-negative if y is non-negative.
4853 KnownFPClass KnownAddend;
4854 computeKnownFPClass(V: II->getArgOperand(i: 2), DemandedElts, InterestedClasses,
4855 Known&: KnownAddend, Depth: Depth + 1, Q);
4856
4857 if (KnownAddend.cannotBeOrderedLessThanZero())
4858 Known.knownNot(RuleOut: fcNegative);
4859 break;
4860 }
4861 case Intrinsic::sqrt:
4862 case Intrinsic::experimental_constrained_sqrt: {
4863 KnownFPClass KnownSrc;
4864 FPClassTest InterestedSrcs = InterestedClasses;
4865 if (InterestedClasses & fcNan)
4866 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
4867
4868 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts, InterestedClasses: InterestedSrcs,
4869 Known&: KnownSrc, Depth: Depth + 1, Q);
4870
4871 if (KnownSrc.isKnownNeverPosInfinity())
4872 Known.knownNot(RuleOut: fcPosInf);
4873 if (KnownSrc.isKnownNever(Mask: fcSNan))
4874 Known.knownNot(RuleOut: fcSNan);
4875
4876 // Any negative value besides -0 returns a nan.
4877 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
4878 Known.knownNot(RuleOut: fcNan);
4879
4880 // The only negative value that can be returned is -0 for -0 inputs.
4881 Known.knownNot(RuleOut: fcNegInf | fcNegSubnormal | fcNegNormal);
4882
4883 // If the input denormal mode could be PreserveSign, a negative
4884 // subnormal input could produce a negative zero output.
4885 const Function *F = II->getFunction();
4886 if (Q.IIQ.hasNoSignedZeros(Op: II) ||
4887 (F && KnownSrc.isKnownNeverLogicalNegZero(F: *F, Ty: II->getType()))) {
4888 Known.knownNot(RuleOut: fcNegZero);
4889 if (KnownSrc.isKnownNeverNaN())
4890 Known.signBitMustBeZero();
4891 }
4892
4893 break;
4894 }
4895 case Intrinsic::sin:
4896 case Intrinsic::cos: {
4897 // Return NaN on infinite inputs.
4898 KnownFPClass KnownSrc;
4899 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts, InterestedClasses,
4900 Known&: KnownSrc, Depth: Depth + 1, Q);
4901 Known.knownNot(RuleOut: fcInf);
4902 if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity())
4903 Known.knownNot(RuleOut: fcNan);
4904 break;
4905 }
4906 case Intrinsic::maxnum:
4907 case Intrinsic::minnum:
4908 case Intrinsic::minimum:
4909 case Intrinsic::maximum: {
4910 KnownFPClass KnownLHS, KnownRHS;
4911 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts, InterestedClasses,
4912 Known&: KnownLHS, Depth: Depth + 1, Q);
4913 computeKnownFPClass(V: II->getArgOperand(i: 1), DemandedElts, InterestedClasses,
4914 Known&: KnownRHS, Depth: Depth + 1, Q);
4915
4916 bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN();
4917 Known = KnownLHS | KnownRHS;
4918
4919 // If either operand is not NaN, the result is not NaN.
4920 if (NeverNaN && (IID == Intrinsic::minnum || IID == Intrinsic::maxnum))
4921 Known.knownNot(RuleOut: fcNan);
4922
4923 if (IID == Intrinsic::maxnum) {
4924 // If at least one operand is known to be positive, the result must be
4925 // positive.
4926 if ((KnownLHS.cannotBeOrderedLessThanZero() &&
4927 KnownLHS.isKnownNeverNaN()) ||
4928 (KnownRHS.cannotBeOrderedLessThanZero() &&
4929 KnownRHS.isKnownNeverNaN()))
4930 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
4931 } else if (IID == Intrinsic::maximum) {
4932 // If at least one operand is known to be positive, the result must be
4933 // positive.
4934 if (KnownLHS.cannotBeOrderedLessThanZero() ||
4935 KnownRHS.cannotBeOrderedLessThanZero())
4936 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
4937 } else if (IID == Intrinsic::minnum) {
4938 // If at least one operand is known to be negative, the result must be
4939 // negative.
4940 if ((KnownLHS.cannotBeOrderedGreaterThanZero() &&
4941 KnownLHS.isKnownNeverNaN()) ||
4942 (KnownRHS.cannotBeOrderedGreaterThanZero() &&
4943 KnownRHS.isKnownNeverNaN()))
4944 Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask);
4945 } else {
4946 // If at least one operand is known to be negative, the result must be
4947 // negative.
4948 if (KnownLHS.cannotBeOrderedGreaterThanZero() ||
4949 KnownRHS.cannotBeOrderedGreaterThanZero())
4950 Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask);
4951 }
4952
4953 // Fixup zero handling if denormals could be returned as a zero.
4954 //
4955 // As there's no spec for denormal flushing, be conservative with the
4956 // treatment of denormals that could be flushed to zero. For older
4957 // subtargets on AMDGPU the min/max instructions would not flush the
4958 // output and return the original value.
4959 //
4960 if ((Known.KnownFPClasses & fcZero) != fcNone &&
4961 !Known.isKnownNeverSubnormal()) {
4962 const Function *Parent = II->getFunction();
4963 if (!Parent)
4964 break;
4965
4966 DenormalMode Mode = Parent->getDenormalMode(
4967 FPType: II->getType()->getScalarType()->getFltSemantics());
4968 if (Mode != DenormalMode::getIEEE())
4969 Known.KnownFPClasses |= fcZero;
4970 }
4971
4972 if (Known.isKnownNeverNaN()) {
4973 if (KnownLHS.SignBit && KnownRHS.SignBit &&
4974 *KnownLHS.SignBit == *KnownRHS.SignBit) {
4975 if (*KnownLHS.SignBit)
4976 Known.signBitMustBeOne();
4977 else
4978 Known.signBitMustBeZero();
4979 } else if ((IID == Intrinsic::maximum || IID == Intrinsic::minimum) ||
4980 ((KnownLHS.isKnownNeverNegZero() ||
4981 KnownRHS.isKnownNeverPosZero()) &&
4982 (KnownLHS.isKnownNeverPosZero() ||
4983 KnownRHS.isKnownNeverNegZero()))) {
4984 if ((IID == Intrinsic::maximum || IID == Intrinsic::maxnum) &&
4985 (KnownLHS.SignBit == false || KnownRHS.SignBit == false))
4986 Known.signBitMustBeZero();
4987 else if ((IID == Intrinsic::minimum || IID == Intrinsic::minnum) &&
4988 (KnownLHS.SignBit == true || KnownRHS.SignBit == true))
4989 Known.signBitMustBeOne();
4990 }
4991 }
4992 break;
4993 }
4994 case Intrinsic::canonicalize: {
4995 KnownFPClass KnownSrc;
4996 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts, InterestedClasses,
4997 Known&: KnownSrc, Depth: Depth + 1, Q);
4998
4999 // This is essentially a stronger form of
5000 // propagateCanonicalizingSrc. Other "canonicalizing" operations don't
5001 // actually have an IR canonicalization guarantee.
5002
5003 // Canonicalize may flush denormals to zero, so we have to consider the
5004 // denormal mode to preserve known-not-0 knowledge.
5005 Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan;
5006
5007 // Stronger version of propagateNaN
5008 // Canonicalize is guaranteed to quiet signaling nans.
5009 if (KnownSrc.isKnownNeverNaN())
5010 Known.knownNot(RuleOut: fcNan);
5011 else
5012 Known.knownNot(RuleOut: fcSNan);
5013
5014 const Function *F = II->getFunction();
5015 if (!F)
5016 break;
5017
5018 // If the parent function flushes denormals, the canonical output cannot
5019 // be a denormal.
5020 const fltSemantics &FPType =
5021 II->getType()->getScalarType()->getFltSemantics();
5022 DenormalMode DenormMode = F->getDenormalMode(FPType);
5023 if (DenormMode == DenormalMode::getIEEE()) {
5024 if (KnownSrc.isKnownNever(Mask: fcPosZero))
5025 Known.knownNot(RuleOut: fcPosZero);
5026 if (KnownSrc.isKnownNever(Mask: fcNegZero))
5027 Known.knownNot(RuleOut: fcNegZero);
5028 break;
5029 }
5030
5031 if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero())
5032 Known.knownNot(RuleOut: fcSubnormal);
5033
5034 if (DenormMode.Input == DenormalMode::PositiveZero ||
5035 (DenormMode.Output == DenormalMode::PositiveZero &&
5036 DenormMode.Input == DenormalMode::IEEE))
5037 Known.knownNot(RuleOut: fcNegZero);
5038
5039 break;
5040 }
5041 case Intrinsic::vector_reduce_fmax:
5042 case Intrinsic::vector_reduce_fmin:
5043 case Intrinsic::vector_reduce_fmaximum:
5044 case Intrinsic::vector_reduce_fminimum: {
5045 // reduce min/max will choose an element from one of the vector elements,
5046 // so we can infer and class information that is common to all elements.
5047 Known = computeKnownFPClass(V: II->getArgOperand(i: 0), FMF: II->getFastMathFlags(),
5048 InterestedClasses, Depth: Depth + 1, SQ: Q);
5049 // Can only propagate sign if output is never NaN.
5050 if (!Known.isKnownNeverNaN())
5051 Known.SignBit.reset();
5052 break;
5053 }
5054 case Intrinsic::trunc:
5055 case Intrinsic::floor:
5056 case Intrinsic::ceil:
5057 case Intrinsic::rint:
5058 case Intrinsic::nearbyint:
5059 case Intrinsic::round:
5060 case Intrinsic::roundeven: {
5061 KnownFPClass KnownSrc;
5062 FPClassTest InterestedSrcs = InterestedClasses;
5063 if (InterestedSrcs & fcPosFinite)
5064 InterestedSrcs |= fcPosFinite;
5065 if (InterestedSrcs & fcNegFinite)
5066 InterestedSrcs |= fcNegFinite;
5067 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts, InterestedClasses: InterestedSrcs,
5068 Known&: KnownSrc, Depth: Depth + 1, Q);
5069
5070 // Integer results cannot be subnormal.
5071 Known.knownNot(RuleOut: fcSubnormal);
5072
5073 Known.propagateNaN(Src: KnownSrc, PreserveSign: true);
5074
5075 // Pass through infinities, except PPC_FP128 is a special case for
5076 // intrinsics other than trunc.
5077 if (IID == Intrinsic::trunc || !V->getType()->isMultiUnitFPType()) {
5078 if (KnownSrc.isKnownNeverPosInfinity())
5079 Known.knownNot(RuleOut: fcPosInf);
5080 if (KnownSrc.isKnownNeverNegInfinity())
5081 Known.knownNot(RuleOut: fcNegInf);
5082 }
5083
5084 // Negative round ups to 0 produce -0
5085 if (KnownSrc.isKnownNever(Mask: fcPosFinite))
5086 Known.knownNot(RuleOut: fcPosFinite);
5087 if (KnownSrc.isKnownNever(Mask: fcNegFinite))
5088 Known.knownNot(RuleOut: fcNegFinite);
5089
5090 break;
5091 }
5092 case Intrinsic::exp:
5093 case Intrinsic::exp2:
5094 case Intrinsic::exp10: {
5095 Known.knownNot(RuleOut: fcNegative);
5096 if ((InterestedClasses & fcNan) == fcNone)
5097 break;
5098
5099 KnownFPClass KnownSrc;
5100 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts, InterestedClasses,
5101 Known&: KnownSrc, Depth: Depth + 1, Q);
5102 if (KnownSrc.isKnownNeverNaN()) {
5103 Known.knownNot(RuleOut: fcNan);
5104 Known.signBitMustBeZero();
5105 }
5106
5107 break;
5108 }
5109 case Intrinsic::fptrunc_round: {
5110 computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses, Known,
5111 Depth, Q);
5112 break;
5113 }
5114 case Intrinsic::log:
5115 case Intrinsic::log10:
5116 case Intrinsic::log2:
5117 case Intrinsic::experimental_constrained_log:
5118 case Intrinsic::experimental_constrained_log10:
5119 case Intrinsic::experimental_constrained_log2: {
5120 // log(+inf) -> +inf
5121 // log([+-]0.0) -> -inf
5122 // log(-inf) -> nan
5123 // log(-x) -> nan
5124 if ((InterestedClasses & (fcNan | fcInf)) == fcNone)
5125 break;
5126
5127 FPClassTest InterestedSrcs = InterestedClasses;
5128 if ((InterestedClasses & fcNegInf) != fcNone)
5129 InterestedSrcs |= fcZero | fcSubnormal;
5130 if ((InterestedClasses & fcNan) != fcNone)
5131 InterestedSrcs |= fcNan | (fcNegative & ~fcNan);
5132
5133 KnownFPClass KnownSrc;
5134 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts, InterestedClasses: InterestedSrcs,
5135 Known&: KnownSrc, Depth: Depth + 1, Q);
5136
5137 if (KnownSrc.isKnownNeverPosInfinity())
5138 Known.knownNot(RuleOut: fcPosInf);
5139
5140 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
5141 Known.knownNot(RuleOut: fcNan);
5142
5143 const Function *F = II->getFunction();
5144 if (F && KnownSrc.isKnownNeverLogicalZero(F: *F, Ty: II->getType()))
5145 Known.knownNot(RuleOut: fcNegInf);
5146
5147 break;
5148 }
5149 case Intrinsic::powi: {
5150 if ((InterestedClasses & fcNegative) == fcNone)
5151 break;
5152
5153 const Value *Exp = II->getArgOperand(i: 1);
5154 Type *ExpTy = Exp->getType();
5155 unsigned BitWidth = ExpTy->getScalarType()->getIntegerBitWidth();
5156 KnownBits ExponentKnownBits(BitWidth);
5157 computeKnownBits(V: Exp, DemandedElts: isa<VectorType>(Val: ExpTy) ? DemandedElts : APInt(1, 1),
5158 Known&: ExponentKnownBits, Depth: Depth + 1, Q);
5159
5160 if (ExponentKnownBits.Zero[0]) { // Is even
5161 Known.knownNot(RuleOut: fcNegative);
5162 break;
5163 }
5164
5165 // Given that exp is an integer, here are the
5166 // ways that pow can return a negative value:
5167 //
5168 // pow(-x, exp) --> negative if exp is odd and x is negative.
5169 // pow(-0, exp) --> -inf if exp is negative odd.
5170 // pow(-0, exp) --> -0 if exp is positive odd.
5171 // pow(-inf, exp) --> -0 if exp is negative odd.
5172 // pow(-inf, exp) --> -inf if exp is positive odd.
5173 KnownFPClass KnownSrc;
5174 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts, InterestedClasses: fcNegative,
5175 Known&: KnownSrc, Depth: Depth + 1, Q);
5176 if (KnownSrc.isKnownNever(Mask: fcNegative))
5177 Known.knownNot(RuleOut: fcNegative);
5178 break;
5179 }
5180 case Intrinsic::ldexp: {
5181 KnownFPClass KnownSrc;
5182 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts, InterestedClasses,
5183 Known&: KnownSrc, Depth: Depth + 1, Q);
5184 Known.propagateNaN(Src: KnownSrc, /*PropagateSign=*/PreserveSign: true);
5185
5186 // Sign is preserved, but underflows may produce zeroes.
5187 if (KnownSrc.isKnownNever(Mask: fcNegative))
5188 Known.knownNot(RuleOut: fcNegative);
5189 else if (KnownSrc.cannotBeOrderedLessThanZero())
5190 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
5191
5192 if (KnownSrc.isKnownNever(Mask: fcPositive))
5193 Known.knownNot(RuleOut: fcPositive);
5194 else if (KnownSrc.cannotBeOrderedGreaterThanZero())
5195 Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask);
5196
5197 // Can refine inf/zero handling based on the exponent operand.
5198 const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf;
5199 if ((InterestedClasses & ExpInfoMask) == fcNone)
5200 break;
5201 if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone)
5202 break;
5203
5204 const fltSemantics &Flt =
5205 II->getType()->getScalarType()->getFltSemantics();
5206 unsigned Precision = APFloat::semanticsPrecision(Flt);
5207 const Value *ExpArg = II->getArgOperand(i: 1);
5208 ConstantRange ExpRange = computeConstantRange(
5209 V: ExpArg, ForSigned: true, UseInstrInfo: Q.IIQ.UseInstrInfo, AC: Q.AC, CtxI: Q.CxtI, DT: Q.DT, Depth: Depth + 1);
5210
5211 const int MantissaBits = Precision - 1;
5212 if (ExpRange.getSignedMin().sge(RHS: static_cast<int64_t>(MantissaBits)))
5213 Known.knownNot(RuleOut: fcSubnormal);
5214
5215 const Function *F = II->getFunction();
5216 const APInt *ConstVal = ExpRange.getSingleElement();
5217 if (ConstVal && ConstVal->isZero()) {
5218 // ldexp(x, 0) -> x, so propagate everything.
5219 Known.propagateCanonicalizingSrc(Src: KnownSrc, F: *F, Ty: II->getType());
5220 } else if (ExpRange.isAllNegative()) {
5221 // If we know the power is <= 0, can't introduce inf
5222 if (KnownSrc.isKnownNeverPosInfinity())
5223 Known.knownNot(RuleOut: fcPosInf);
5224 if (KnownSrc.isKnownNeverNegInfinity())
5225 Known.knownNot(RuleOut: fcNegInf);
5226 } else if (ExpRange.isAllNonNegative()) {
5227 // If we know the power is >= 0, can't introduce subnormal or zero
5228 if (KnownSrc.isKnownNeverPosSubnormal())
5229 Known.knownNot(RuleOut: fcPosSubnormal);
5230 if (KnownSrc.isKnownNeverNegSubnormal())
5231 Known.knownNot(RuleOut: fcNegSubnormal);
5232 if (F && KnownSrc.isKnownNeverLogicalPosZero(F: *F, Ty: II->getType()))
5233 Known.knownNot(RuleOut: fcPosZero);
5234 if (F && KnownSrc.isKnownNeverLogicalNegZero(F: *F, Ty: II->getType()))
5235 Known.knownNot(RuleOut: fcNegZero);
5236 }
5237
5238 break;
5239 }
5240 case Intrinsic::arithmetic_fence: {
5241 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts, InterestedClasses,
5242 Known, Depth: Depth + 1, Q);
5243 break;
5244 }
5245 case Intrinsic::experimental_constrained_sitofp:
5246 case Intrinsic::experimental_constrained_uitofp:
5247 // Cannot produce nan
5248 Known.knownNot(RuleOut: fcNan);
5249
5250 // sitofp and uitofp turn into +0.0 for zero.
5251 Known.knownNot(RuleOut: fcNegZero);
5252
5253 // Integers cannot be subnormal
5254 Known.knownNot(RuleOut: fcSubnormal);
5255
5256 if (IID == Intrinsic::experimental_constrained_uitofp)
5257 Known.signBitMustBeZero();
5258
5259 // TODO: Copy inf handling from instructions
5260 break;
5261 default:
5262 break;
5263 }
5264
5265 break;
5266 }
5267 case Instruction::FAdd:
5268 case Instruction::FSub: {
5269 KnownFPClass KnownLHS, KnownRHS;
5270 bool WantNegative =
5271 Op->getOpcode() == Instruction::FAdd &&
5272 (InterestedClasses & KnownFPClass::OrderedLessThanZeroMask) != fcNone;
5273 bool WantNaN = (InterestedClasses & fcNan) != fcNone;
5274 bool WantNegZero = (InterestedClasses & fcNegZero) != fcNone;
5275
5276 if (!WantNaN && !WantNegative && !WantNegZero)
5277 break;
5278
5279 FPClassTest InterestedSrcs = InterestedClasses;
5280 if (WantNegative)
5281 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
5282 if (InterestedClasses & fcNan)
5283 InterestedSrcs |= fcInf;
5284 computeKnownFPClass(V: Op->getOperand(i: 1), DemandedElts, InterestedClasses: InterestedSrcs,
5285 Known&: KnownRHS, Depth: Depth + 1, Q);
5286
5287 if ((WantNaN && KnownRHS.isKnownNeverNaN()) ||
5288 (WantNegative && KnownRHS.cannotBeOrderedLessThanZero()) ||
5289 WantNegZero || Opc == Instruction::FSub) {
5290
5291 // RHS is canonically cheaper to compute. Skip inspecting the LHS if
5292 // there's no point.
5293 computeKnownFPClass(V: Op->getOperand(i: 0), DemandedElts, InterestedClasses: InterestedSrcs,
5294 Known&: KnownLHS, Depth: Depth + 1, Q);
5295 // Adding positive and negative infinity produces NaN.
5296 // TODO: Check sign of infinities.
5297 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
5298 (KnownLHS.isKnownNeverInfinity() || KnownRHS.isKnownNeverInfinity()))
5299 Known.knownNot(RuleOut: fcNan);
5300
5301 // FIXME: Context function should always be passed in separately
5302 const Function *F = cast<Instruction>(Val: Op)->getFunction();
5303
5304 if (Op->getOpcode() == Instruction::FAdd) {
5305 if (KnownLHS.cannotBeOrderedLessThanZero() &&
5306 KnownRHS.cannotBeOrderedLessThanZero())
5307 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
5308 if (!F)
5309 break;
5310
5311 // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
5312 if ((KnownLHS.isKnownNeverLogicalNegZero(F: *F, Ty: Op->getType()) ||
5313 KnownRHS.isKnownNeverLogicalNegZero(F: *F, Ty: Op->getType())) &&
5314 // Make sure output negative denormal can't flush to -0
5315 outputDenormalIsIEEEOrPosZero(F: *F, Ty: Op->getType()))
5316 Known.knownNot(RuleOut: fcNegZero);
5317 } else {
5318 if (!F)
5319 break;
5320
5321 // Only fsub -0, +0 can return -0
5322 if ((KnownLHS.isKnownNeverLogicalNegZero(F: *F, Ty: Op->getType()) ||
5323 KnownRHS.isKnownNeverLogicalPosZero(F: *F, Ty: Op->getType())) &&
5324 // Make sure output negative denormal can't flush to -0
5325 outputDenormalIsIEEEOrPosZero(F: *F, Ty: Op->getType()))
5326 Known.knownNot(RuleOut: fcNegZero);
5327 }
5328 }
5329
5330 break;
5331 }
5332 case Instruction::FMul: {
5333 // X * X is always non-negative or a NaN.
5334 if (Op->getOperand(i: 0) == Op->getOperand(i: 1))
5335 Known.knownNot(RuleOut: fcNegative);
5336
5337 if ((InterestedClasses & fcNan) != fcNan)
5338 break;
5339
5340 // fcSubnormal is only needed in case of DAZ.
5341 const FPClassTest NeedForNan = fcNan | fcInf | fcZero | fcSubnormal;
5342
5343 KnownFPClass KnownLHS, KnownRHS;
5344 computeKnownFPClass(V: Op->getOperand(i: 1), DemandedElts, InterestedClasses: NeedForNan, Known&: KnownRHS,
5345 Depth: Depth + 1, Q);
5346 if (!KnownRHS.isKnownNeverNaN())
5347 break;
5348
5349 computeKnownFPClass(V: Op->getOperand(i: 0), DemandedElts, InterestedClasses: NeedForNan, Known&: KnownLHS,
5350 Depth: Depth + 1, Q);
5351 if (!KnownLHS.isKnownNeverNaN())
5352 break;
5353
5354 if (KnownLHS.SignBit && KnownRHS.SignBit) {
5355 if (*KnownLHS.SignBit == *KnownRHS.SignBit)
5356 Known.signBitMustBeZero();
5357 else
5358 Known.signBitMustBeOne();
5359 }
5360
5361 // If 0 * +/-inf produces NaN.
5362 if (KnownLHS.isKnownNeverInfinity() && KnownRHS.isKnownNeverInfinity()) {
5363 Known.knownNot(RuleOut: fcNan);
5364 break;
5365 }
5366
5367 const Function *F = cast<Instruction>(Val: Op)->getFunction();
5368 if (!F)
5369 break;
5370
5371 if ((KnownRHS.isKnownNeverInfinity() ||
5372 KnownLHS.isKnownNeverLogicalZero(F: *F, Ty: Op->getType())) &&
5373 (KnownLHS.isKnownNeverInfinity() ||
5374 KnownRHS.isKnownNeverLogicalZero(F: *F, Ty: Op->getType())))
5375 Known.knownNot(RuleOut: fcNan);
5376
5377 break;
5378 }
5379 case Instruction::FDiv:
5380 case Instruction::FRem: {
5381 if (Op->getOperand(i: 0) == Op->getOperand(i: 1)) {
5382 // TODO: Could filter out snan if we inspect the operand
5383 if (Op->getOpcode() == Instruction::FDiv) {
5384 // X / X is always exactly 1.0 or a NaN.
5385 Known.KnownFPClasses = fcNan | fcPosNormal;
5386 } else {
5387 // X % X is always exactly [+-]0.0 or a NaN.
5388 Known.KnownFPClasses = fcNan | fcZero;
5389 }
5390
5391 break;
5392 }
5393
5394 const bool WantNan = (InterestedClasses & fcNan) != fcNone;
5395 const bool WantNegative = (InterestedClasses & fcNegative) != fcNone;
5396 const bool WantPositive =
5397 Opc == Instruction::FRem && (InterestedClasses & fcPositive) != fcNone;
5398 if (!WantNan && !WantNegative && !WantPositive)
5399 break;
5400
5401 KnownFPClass KnownLHS, KnownRHS;
5402
5403 computeKnownFPClass(V: Op->getOperand(i: 1), DemandedElts,
5404 InterestedClasses: fcNan | fcInf | fcZero | fcNegative, Known&: KnownRHS,
5405 Depth: Depth + 1, Q);
5406
5407 bool KnowSomethingUseful =
5408 KnownRHS.isKnownNeverNaN() || KnownRHS.isKnownNever(Mask: fcNegative);
5409
5410 if (KnowSomethingUseful || WantPositive) {
5411 const FPClassTest InterestedLHS =
5412 WantPositive ? fcAllFlags
5413 : fcNan | fcInf | fcZero | fcSubnormal | fcNegative;
5414
5415 computeKnownFPClass(V: Op->getOperand(i: 0), DemandedElts,
5416 InterestedClasses: InterestedClasses & InterestedLHS, Known&: KnownLHS,
5417 Depth: Depth + 1, Q);
5418 }
5419
5420 const Function *F = cast<Instruction>(Val: Op)->getFunction();
5421
5422 if (Op->getOpcode() == Instruction::FDiv) {
5423 // Only 0/0, Inf/Inf produce NaN.
5424 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
5425 (KnownLHS.isKnownNeverInfinity() ||
5426 KnownRHS.isKnownNeverInfinity()) &&
5427 ((F && KnownLHS.isKnownNeverLogicalZero(F: *F, Ty: Op->getType())) ||
5428 (F && KnownRHS.isKnownNeverLogicalZero(F: *F, Ty: Op->getType())))) {
5429 Known.knownNot(RuleOut: fcNan);
5430 }
5431
5432 // X / -0.0 is -Inf (or NaN).
5433 // +X / +X is +X
5434 if (KnownLHS.isKnownNever(Mask: fcNegative) && KnownRHS.isKnownNever(Mask: fcNegative))
5435 Known.knownNot(RuleOut: fcNegative);
5436 } else {
5437 // Inf REM x and x REM 0 produce NaN.
5438 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
5439 KnownLHS.isKnownNeverInfinity() && F &&
5440 KnownRHS.isKnownNeverLogicalZero(F: *F, Ty: Op->getType())) {
5441 Known.knownNot(RuleOut: fcNan);
5442 }
5443
5444 // The sign for frem is the same as the first operand.
5445 if (KnownLHS.cannotBeOrderedLessThanZero())
5446 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
5447 if (KnownLHS.cannotBeOrderedGreaterThanZero())
5448 Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask);
5449
5450 // See if we can be more aggressive about the sign of 0.
5451 if (KnownLHS.isKnownNever(Mask: fcNegative))
5452 Known.knownNot(RuleOut: fcNegative);
5453 if (KnownLHS.isKnownNever(Mask: fcPositive))
5454 Known.knownNot(RuleOut: fcPositive);
5455 }
5456
5457 break;
5458 }
5459 case Instruction::FPExt: {
5460 // Infinity, nan and zero propagate from source.
5461 computeKnownFPClass(V: Op->getOperand(i: 0), DemandedElts, InterestedClasses,
5462 Known, Depth: Depth + 1, Q);
5463
5464 const fltSemantics &DstTy =
5465 Op->getType()->getScalarType()->getFltSemantics();
5466 const fltSemantics &SrcTy =
5467 Op->getOperand(i: 0)->getType()->getScalarType()->getFltSemantics();
5468
5469 // All subnormal inputs should be in the normal range in the result type.
5470 if (APFloat::isRepresentableAsNormalIn(Src: SrcTy, Dst: DstTy)) {
5471 if (Known.KnownFPClasses & fcPosSubnormal)
5472 Known.KnownFPClasses |= fcPosNormal;
5473 if (Known.KnownFPClasses & fcNegSubnormal)
5474 Known.KnownFPClasses |= fcNegNormal;
5475 Known.knownNot(RuleOut: fcSubnormal);
5476 }
5477
5478 // Sign bit of a nan isn't guaranteed.
5479 if (!Known.isKnownNeverNaN())
5480 Known.SignBit = std::nullopt;
5481 break;
5482 }
5483 case Instruction::FPTrunc: {
5484 computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses, Known,
5485 Depth, Q);
5486 break;
5487 }
5488 case Instruction::SIToFP:
5489 case Instruction::UIToFP: {
5490 // Cannot produce nan
5491 Known.knownNot(RuleOut: fcNan);
5492
5493 // Integers cannot be subnormal
5494 Known.knownNot(RuleOut: fcSubnormal);
5495
5496 // sitofp and uitofp turn into +0.0 for zero.
5497 Known.knownNot(RuleOut: fcNegZero);
5498 if (Op->getOpcode() == Instruction::UIToFP)
5499 Known.signBitMustBeZero();
5500
5501 if (InterestedClasses & fcInf) {
5502 // Get width of largest magnitude integer (remove a bit if signed).
5503 // This still works for a signed minimum value because the largest FP
5504 // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx).
5505 int IntSize = Op->getOperand(i: 0)->getType()->getScalarSizeInBits();
5506 if (Op->getOpcode() == Instruction::SIToFP)
5507 --IntSize;
5508
5509 // If the exponent of the largest finite FP value can hold the largest
5510 // integer, the result of the cast must be finite.
5511 Type *FPTy = Op->getType()->getScalarType();
5512 if (ilogb(Arg: APFloat::getLargest(Sem: FPTy->getFltSemantics())) >= IntSize)
5513 Known.knownNot(RuleOut: fcInf);
5514 }
5515
5516 break;
5517 }
5518 case Instruction::ExtractElement: {
5519 // Look through extract element. If the index is non-constant or
5520 // out-of-range demand all elements, otherwise just the extracted element.
5521 const Value *Vec = Op->getOperand(i: 0);
5522 const Value *Idx = Op->getOperand(i: 1);
5523 auto *CIdx = dyn_cast<ConstantInt>(Val: Idx);
5524
5525 if (auto *VecTy = dyn_cast<FixedVectorType>(Val: Vec->getType())) {
5526 unsigned NumElts = VecTy->getNumElements();
5527 APInt DemandedVecElts = APInt::getAllOnes(numBits: NumElts);
5528 if (CIdx && CIdx->getValue().ult(RHS: NumElts))
5529 DemandedVecElts = APInt::getOneBitSet(numBits: NumElts, BitNo: CIdx->getZExtValue());
5530 return computeKnownFPClass(V: Vec, DemandedElts: DemandedVecElts, InterestedClasses, Known,
5531 Depth: Depth + 1, Q);
5532 }
5533
5534 break;
5535 }
5536 case Instruction::InsertElement: {
5537 if (isa<ScalableVectorType>(Val: Op->getType()))
5538 return;
5539
5540 const Value *Vec = Op->getOperand(i: 0);
5541 const Value *Elt = Op->getOperand(i: 1);
5542 auto *CIdx = dyn_cast<ConstantInt>(Val: Op->getOperand(i: 2));
5543 unsigned NumElts = DemandedElts.getBitWidth();
5544 APInt DemandedVecElts = DemandedElts;
5545 bool NeedsElt = true;
5546 // If we know the index we are inserting to, clear it from Vec check.
5547 if (CIdx && CIdx->getValue().ult(RHS: NumElts)) {
5548 DemandedVecElts.clearBit(BitPosition: CIdx->getZExtValue());
5549 NeedsElt = DemandedElts[CIdx->getZExtValue()];
5550 }
5551
5552 // Do we demand the inserted element?
5553 if (NeedsElt) {
5554 computeKnownFPClass(V: Elt, Known, InterestedClasses, Depth: Depth + 1, Q);
5555 // If we don't know any bits, early out.
5556 if (Known.isUnknown())
5557 break;
5558 } else {
5559 Known.KnownFPClasses = fcNone;
5560 }
5561
5562 // Do we need anymore elements from Vec?
5563 if (!DemandedVecElts.isZero()) {
5564 KnownFPClass Known2;
5565 computeKnownFPClass(V: Vec, DemandedElts: DemandedVecElts, InterestedClasses, Known&: Known2,
5566 Depth: Depth + 1, Q);
5567 Known |= Known2;
5568 }
5569
5570 break;
5571 }
5572 case Instruction::ShuffleVector: {
5573 // For undef elements, we don't know anything about the common state of
5574 // the shuffle result.
5575 APInt DemandedLHS, DemandedRHS;
5576 auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: Op);
5577 if (!Shuf || !getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
5578 return;
5579
5580 if (!!DemandedLHS) {
5581 const Value *LHS = Shuf->getOperand(i_nocapture: 0);
5582 computeKnownFPClass(V: LHS, DemandedElts: DemandedLHS, InterestedClasses, Known,
5583 Depth: Depth + 1, Q);
5584
5585 // If we don't know any bits, early out.
5586 if (Known.isUnknown())
5587 break;
5588 } else {
5589 Known.KnownFPClasses = fcNone;
5590 }
5591
5592 if (!!DemandedRHS) {
5593 KnownFPClass Known2;
5594 const Value *RHS = Shuf->getOperand(i_nocapture: 1);
5595 computeKnownFPClass(V: RHS, DemandedElts: DemandedRHS, InterestedClasses, Known&: Known2,
5596 Depth: Depth + 1, Q);
5597 Known |= Known2;
5598 }
5599
5600 break;
5601 }
5602 case Instruction::ExtractValue: {
5603 const ExtractValueInst *Extract = cast<ExtractValueInst>(Val: Op);
5604 ArrayRef<unsigned> Indices = Extract->getIndices();
5605 const Value *Src = Extract->getAggregateOperand();
5606 if (isa<StructType>(Val: Src->getType()) && Indices.size() == 1 &&
5607 Indices[0] == 0) {
5608 if (const auto *II = dyn_cast<IntrinsicInst>(Val: Src)) {
5609 switch (II->getIntrinsicID()) {
5610 case Intrinsic::frexp: {
5611 Known.knownNot(RuleOut: fcSubnormal);
5612
5613 KnownFPClass KnownSrc;
5614 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts,
5615 InterestedClasses, Known&: KnownSrc, Depth: Depth + 1, Q);
5616
5617 const Function *F = cast<Instruction>(Val: Op)->getFunction();
5618
5619 if (KnownSrc.isKnownNever(Mask: fcNegative))
5620 Known.knownNot(RuleOut: fcNegative);
5621 else {
5622 if (F && KnownSrc.isKnownNeverLogicalNegZero(F: *F, Ty: Op->getType()))
5623 Known.knownNot(RuleOut: fcNegZero);
5624 if (KnownSrc.isKnownNever(Mask: fcNegInf))
5625 Known.knownNot(RuleOut: fcNegInf);
5626 }
5627
5628 if (KnownSrc.isKnownNever(Mask: fcPositive))
5629 Known.knownNot(RuleOut: fcPositive);
5630 else {
5631 if (F && KnownSrc.isKnownNeverLogicalPosZero(F: *F, Ty: Op->getType()))
5632 Known.knownNot(RuleOut: fcPosZero);
5633 if (KnownSrc.isKnownNever(Mask: fcPosInf))
5634 Known.knownNot(RuleOut: fcPosInf);
5635 }
5636
5637 Known.propagateNaN(Src: KnownSrc);
5638 return;
5639 }
5640 default:
5641 break;
5642 }
5643 }
5644 }
5645
5646 computeKnownFPClass(V: Src, DemandedElts, InterestedClasses, Known, Depth: Depth + 1,
5647 Q);
5648 break;
5649 }
5650 case Instruction::PHI: {
5651 const PHINode *P = cast<PHINode>(Val: Op);
5652 // Unreachable blocks may have zero-operand PHI nodes.
5653 if (P->getNumIncomingValues() == 0)
5654 break;
5655
5656 // Otherwise take the unions of the known bit sets of the operands,
5657 // taking conservative care to avoid excessive recursion.
5658 const unsigned PhiRecursionLimit = MaxAnalysisRecursionDepth - 2;
5659
5660 if (Depth < PhiRecursionLimit) {
5661 // Skip if every incoming value references to ourself.
5662 if (isa_and_nonnull<UndefValue>(Val: P->hasConstantValue()))
5663 break;
5664
5665 bool First = true;
5666
5667 for (const Use &U : P->operands()) {
5668 Value *IncValue = U.get();
5669 // Skip direct self references.
5670 if (IncValue == P)
5671 continue;
5672
5673 KnownFPClass KnownSrc;
5674 // Recurse, but cap the recursion to two levels, because we don't want
5675 // to waste time spinning around in loops. We need at least depth 2 to
5676 // detect known sign bits.
5677 computeKnownFPClass(
5678 V: IncValue, DemandedElts, InterestedClasses, Known&: KnownSrc,
5679 Depth: PhiRecursionLimit,
5680 Q: Q.getWithInstruction(I: P->getIncomingBlock(U)->getTerminator()));
5681
5682 if (First) {
5683 Known = KnownSrc;
5684 First = false;
5685 } else {
5686 Known |= KnownSrc;
5687 }
5688
5689 if (Known.KnownFPClasses == fcAllFlags)
5690 break;
5691 }
5692 }
5693
5694 break;
5695 }
5696 default:
5697 break;
5698 }
5699}
5700
5701KnownFPClass llvm::computeKnownFPClass(const Value *V,
5702 const APInt &DemandedElts,
5703 FPClassTest InterestedClasses,
5704 unsigned Depth,
5705 const SimplifyQuery &SQ) {
5706 KnownFPClass KnownClasses;
5707 ::computeKnownFPClass(V, DemandedElts, InterestedClasses, Known&: KnownClasses, Depth,
5708 Q: SQ);
5709 return KnownClasses;
5710}
5711
5712KnownFPClass llvm::computeKnownFPClass(const Value *V,
5713 FPClassTest InterestedClasses,
5714 unsigned Depth,
5715 const SimplifyQuery &SQ) {
5716 KnownFPClass Known;
5717 ::computeKnownFPClass(V, Known, InterestedClasses, Depth, Q: SQ);
5718 return Known;
5719}
5720
5721Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) {
5722
5723 // All byte-wide stores are splatable, even of arbitrary variables.
5724 if (V->getType()->isIntegerTy(Bitwidth: 8))
5725 return V;
5726
5727 LLVMContext &Ctx = V->getContext();
5728
5729 // Undef don't care.
5730 auto *UndefInt8 = UndefValue::get(T: Type::getInt8Ty(C&: Ctx));
5731 if (isa<UndefValue>(Val: V))
5732 return UndefInt8;
5733
5734 // Return Undef for zero-sized type.
5735 if (DL.getTypeStoreSize(Ty: V->getType()).isZero())
5736 return UndefInt8;
5737
5738 Constant *C = dyn_cast<Constant>(Val: V);
5739 if (!C) {
5740 // Conceptually, we could handle things like:
5741 // %a = zext i8 %X to i16
5742 // %b = shl i16 %a, 8
5743 // %c = or i16 %a, %b
5744 // but until there is an example that actually needs this, it doesn't seem
5745 // worth worrying about.
5746 return nullptr;
5747 }
5748
5749 // Handle 'null' ConstantArrayZero etc.
5750 if (C->isNullValue())
5751 return Constant::getNullValue(Ty: Type::getInt8Ty(C&: Ctx));
5752
5753 // Constant floating-point values can be handled as integer values if the
5754 // corresponding integer value is "byteable". An important case is 0.0.
5755 if (ConstantFP *CFP = dyn_cast<ConstantFP>(Val: C)) {
5756 Type *Ty = nullptr;
5757 if (CFP->getType()->isHalfTy())
5758 Ty = Type::getInt16Ty(C&: Ctx);
5759 else if (CFP->getType()->isFloatTy())
5760 Ty = Type::getInt32Ty(C&: Ctx);
5761 else if (CFP->getType()->isDoubleTy())
5762 Ty = Type::getInt64Ty(C&: Ctx);
5763 // Don't handle long double formats, which have strange constraints.
5764 return Ty ? isBytewiseValue(V: ConstantExpr::getBitCast(C: CFP, Ty), DL)
5765 : nullptr;
5766 }
5767
5768 // We can handle constant integers that are multiple of 8 bits.
5769 if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: C)) {
5770 if (CI->getBitWidth() % 8 == 0) {
5771 assert(CI->getBitWidth() > 8 && "8 bits should be handled above!");
5772 if (!CI->getValue().isSplat(SplatSizeInBits: 8))
5773 return nullptr;
5774 return ConstantInt::get(Context&: Ctx, V: CI->getValue().trunc(width: 8));
5775 }
5776 }
5777
5778 if (auto *CE = dyn_cast<ConstantExpr>(Val: C)) {
5779 if (CE->getOpcode() == Instruction::IntToPtr) {
5780 if (auto *PtrTy = dyn_cast<PointerType>(Val: CE->getType())) {
5781 unsigned BitWidth = DL.getPointerSizeInBits(AS: PtrTy->getAddressSpace());
5782 if (Constant *Op = ConstantFoldIntegerCast(
5783 C: CE->getOperand(i_nocapture: 0), DestTy: Type::getIntNTy(C&: Ctx, N: BitWidth), IsSigned: false, DL))
5784 return isBytewiseValue(V: Op, DL);
5785 }
5786 }
5787 }
5788
5789 auto Merge = [&](Value *LHS, Value *RHS) -> Value * {
5790 if (LHS == RHS)
5791 return LHS;
5792 if (!LHS || !RHS)
5793 return nullptr;
5794 if (LHS == UndefInt8)
5795 return RHS;
5796 if (RHS == UndefInt8)
5797 return LHS;
5798 return nullptr;
5799 };
5800
5801 if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(Val: C)) {
5802 Value *Val = UndefInt8;
5803 for (unsigned I = 0, E = CA->getNumElements(); I != E; ++I)
5804 if (!(Val = Merge(Val, isBytewiseValue(V: CA->getElementAsConstant(i: I), DL))))
5805 return nullptr;
5806 return Val;
5807 }
5808
5809 if (isa<ConstantAggregate>(Val: C)) {
5810 Value *Val = UndefInt8;
5811 for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I)
5812 if (!(Val = Merge(Val, isBytewiseValue(V: C->getOperand(i: I), DL))))
5813 return nullptr;
5814 return Val;
5815 }
5816
5817 // Don't try to handle the handful of other constants.
5818 return nullptr;
5819}
5820
5821// This is the recursive version of BuildSubAggregate. It takes a few different
5822// arguments. Idxs is the index within the nested struct From that we are
5823// looking at now (which is of type IndexedType). IdxSkip is the number of
5824// indices from Idxs that should be left out when inserting into the resulting
5825// struct. To is the result struct built so far, new insertvalue instructions
5826// build on that.
5827static Value *BuildSubAggregate(Value *From, Value *To, Type *IndexedType,
5828 SmallVectorImpl<unsigned> &Idxs,
5829 unsigned IdxSkip,
5830 BasicBlock::iterator InsertBefore) {
5831 StructType *STy = dyn_cast<StructType>(Val: IndexedType);
5832 if (STy) {
5833 // Save the original To argument so we can modify it
5834 Value *OrigTo = To;
5835 // General case, the type indexed by Idxs is a struct
5836 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
5837 // Process each struct element recursively
5838 Idxs.push_back(Elt: i);
5839 Value *PrevTo = To;
5840 To = BuildSubAggregate(From, To, IndexedType: STy->getElementType(N: i), Idxs, IdxSkip,
5841 InsertBefore);
5842 Idxs.pop_back();
5843 if (!To) {
5844 // Couldn't find any inserted value for this index? Cleanup
5845 while (PrevTo != OrigTo) {
5846 InsertValueInst* Del = cast<InsertValueInst>(Val: PrevTo);
5847 PrevTo = Del->getAggregateOperand();
5848 Del->eraseFromParent();
5849 }
5850 // Stop processing elements
5851 break;
5852 }
5853 }
5854 // If we successfully found a value for each of our subaggregates
5855 if (To)
5856 return To;
5857 }
5858 // Base case, the type indexed by SourceIdxs is not a struct, or not all of
5859 // the struct's elements had a value that was inserted directly. In the latter
5860 // case, perhaps we can't determine each of the subelements individually, but
5861 // we might be able to find the complete struct somewhere.
5862
5863 // Find the value that is at that particular spot
5864 Value *V = FindInsertedValue(V: From, idx_range: Idxs);
5865
5866 if (!V)
5867 return nullptr;
5868
5869 // Insert the value in the new (sub) aggregate
5870 return InsertValueInst::Create(Agg: To, Val: V, Idxs: ArrayRef(Idxs).slice(N: IdxSkip), NameStr: "tmp",
5871 InsertBefore);
5872}
5873
5874// This helper takes a nested struct and extracts a part of it (which is again a
5875// struct) into a new value. For example, given the struct:
5876// { a, { b, { c, d }, e } }
5877// and the indices "1, 1" this returns
5878// { c, d }.
5879//
5880// It does this by inserting an insertvalue for each element in the resulting
5881// struct, as opposed to just inserting a single struct. This will only work if
5882// each of the elements of the substruct are known (ie, inserted into From by an
5883// insertvalue instruction somewhere).
5884//
5885// All inserted insertvalue instructions are inserted before InsertBefore
5886static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range,
5887 BasicBlock::iterator InsertBefore) {
5888 Type *IndexedType = ExtractValueInst::getIndexedType(Agg: From->getType(),
5889 Idxs: idx_range);
5890 Value *To = PoisonValue::get(T: IndexedType);
5891 SmallVector<unsigned, 10> Idxs(idx_range.begin(), idx_range.end());
5892 unsigned IdxSkip = Idxs.size();
5893
5894 return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore);
5895}
5896
5897/// Given an aggregate and a sequence of indices, see if the scalar value
5898/// indexed is already around as a register, for example if it was inserted
5899/// directly into the aggregate.
5900///
5901/// If InsertBefore is not null, this function will duplicate (modified)
5902/// insertvalues when a part of a nested struct is extracted.
5903Value *
5904llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
5905 std::optional<BasicBlock::iterator> InsertBefore) {
5906 // Nothing to index? Just return V then (this is useful at the end of our
5907 // recursion).
5908 if (idx_range.empty())
5909 return V;
5910 // We have indices, so V should have an indexable type.
5911 assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) &&
5912 "Not looking at a struct or array?");
5913 assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) &&
5914 "Invalid indices for type?");
5915
5916 if (Constant *C = dyn_cast<Constant>(Val: V)) {
5917 C = C->getAggregateElement(Elt: idx_range[0]);
5918 if (!C) return nullptr;
5919 return FindInsertedValue(V: C, idx_range: idx_range.slice(N: 1), InsertBefore);
5920 }
5921
5922 if (InsertValueInst *I = dyn_cast<InsertValueInst>(Val: V)) {
5923 // Loop the indices for the insertvalue instruction in parallel with the
5924 // requested indices
5925 const unsigned *req_idx = idx_range.begin();
5926 for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
5927 i != e; ++i, ++req_idx) {
5928 if (req_idx == idx_range.end()) {
5929 // We can't handle this without inserting insertvalues
5930 if (!InsertBefore)
5931 return nullptr;
5932
5933 // The requested index identifies a part of a nested aggregate. Handle
5934 // this specially. For example,
5935 // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0
5936 // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1
5937 // %C = extractvalue {i32, { i32, i32 } } %B, 1
5938 // This can be changed into
5939 // %A = insertvalue {i32, i32 } undef, i32 10, 0
5940 // %C = insertvalue {i32, i32 } %A, i32 11, 1
5941 // which allows the unused 0,0 element from the nested struct to be
5942 // removed.
5943 return BuildSubAggregate(From: V, idx_range: ArrayRef(idx_range.begin(), req_idx),
5944 InsertBefore: *InsertBefore);
5945 }
5946
5947 // This insert value inserts something else than what we are looking for.
5948 // See if the (aggregate) value inserted into has the value we are
5949 // looking for, then.
5950 if (*req_idx != *i)
5951 return FindInsertedValue(V: I->getAggregateOperand(), idx_range,
5952 InsertBefore);
5953 }
5954 // If we end up here, the indices of the insertvalue match with those
5955 // requested (though possibly only partially). Now we recursively look at
5956 // the inserted value, passing any remaining indices.
5957 return FindInsertedValue(V: I->getInsertedValueOperand(),
5958 idx_range: ArrayRef(req_idx, idx_range.end()), InsertBefore);
5959 }
5960
5961 if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(Val: V)) {
5962 // If we're extracting a value from an aggregate that was extracted from
5963 // something else, we can extract from that something else directly instead.
5964 // However, we will need to chain I's indices with the requested indices.
5965
5966 // Calculate the number of indices required
5967 unsigned size = I->getNumIndices() + idx_range.size();
5968 // Allocate some space to put the new indices in
5969 SmallVector<unsigned, 5> Idxs;
5970 Idxs.reserve(N: size);
5971 // Add indices from the extract value instruction
5972 Idxs.append(in_start: I->idx_begin(), in_end: I->idx_end());
5973
5974 // Add requested indices
5975 Idxs.append(in_start: idx_range.begin(), in_end: idx_range.end());
5976
5977 assert(Idxs.size() == size
5978 && "Number of indices added not correct?");
5979
5980 return FindInsertedValue(V: I->getAggregateOperand(), idx_range: Idxs, InsertBefore);
5981 }
5982 // Otherwise, we don't know (such as, extracting from a function return value
5983 // or load instruction)
5984 return nullptr;
5985}
5986
5987bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP,
5988 unsigned CharSize) {
5989 // Make sure the GEP has exactly three arguments.
5990 if (GEP->getNumOperands() != 3)
5991 return false;
5992
5993 // Make sure the index-ee is a pointer to array of \p CharSize integers.
5994 // CharSize.
5995 ArrayType *AT = dyn_cast<ArrayType>(Val: GEP->getSourceElementType());
5996 if (!AT || !AT->getElementType()->isIntegerTy(Bitwidth: CharSize))
5997 return false;
5998
5999 // Check to make sure that the first operand of the GEP is an integer and
6000 // has value 0 so that we are sure we're indexing into the initializer.
6001 const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(Val: GEP->getOperand(i_nocapture: 1));
6002 if (!FirstIdx || !FirstIdx->isZero())
6003 return false;
6004
6005 return true;
6006}
6007
6008// If V refers to an initialized global constant, set Slice either to
6009// its initializer if the size of its elements equals ElementSize, or,
6010// for ElementSize == 8, to its representation as an array of unsiged
6011// char. Return true on success.
6012// Offset is in the unit "nr of ElementSize sized elements".
6013bool llvm::getConstantDataArrayInfo(const Value *V,
6014 ConstantDataArraySlice &Slice,
6015 unsigned ElementSize, uint64_t Offset) {
6016 assert(V && "V should not be null.");
6017 assert((ElementSize % 8) == 0 &&
6018 "ElementSize expected to be a multiple of the size of a byte.");
6019 unsigned ElementSizeInBytes = ElementSize / 8;
6020
6021 // Drill down into the pointer expression V, ignoring any intervening
6022 // casts, and determine the identity of the object it references along
6023 // with the cumulative byte offset into it.
6024 const GlobalVariable *GV =
6025 dyn_cast<GlobalVariable>(Val: getUnderlyingObject(V));
6026 if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
6027 // Fail if V is not based on constant global object.
6028 return false;
6029
6030 const DataLayout &DL = GV->getParent()->getDataLayout();
6031 APInt Off(DL.getIndexTypeSizeInBits(Ty: V->getType()), 0);
6032
6033 if (GV != V->stripAndAccumulateConstantOffsets(DL, Offset&: Off,
6034 /*AllowNonInbounds*/ true))
6035 // Fail if a constant offset could not be determined.
6036 return false;
6037
6038 uint64_t StartIdx = Off.getLimitedValue();
6039 if (StartIdx == UINT64_MAX)
6040 // Fail if the constant offset is excessive.
6041 return false;
6042
6043 // Off/StartIdx is in the unit of bytes. So we need to convert to number of
6044 // elements. Simply bail out if that isn't possible.
6045 if ((StartIdx % ElementSizeInBytes) != 0)
6046 return false;
6047
6048 Offset += StartIdx / ElementSizeInBytes;
6049 ConstantDataArray *Array = nullptr;
6050 ArrayType *ArrayTy = nullptr;
6051
6052 if (GV->getInitializer()->isNullValue()) {
6053 Type *GVTy = GV->getValueType();
6054 uint64_t SizeInBytes = DL.getTypeStoreSize(Ty: GVTy).getFixedValue();
6055 uint64_t Length = SizeInBytes / ElementSizeInBytes;
6056
6057 Slice.Array = nullptr;
6058 Slice.Offset = 0;
6059 // Return an empty Slice for undersized constants to let callers
6060 // transform even undefined library calls into simpler, well-defined
6061 // expressions. This is preferable to making the calls although it
6062 // prevents sanitizers from detecting such calls.
6063 Slice.Length = Length < Offset ? 0 : Length - Offset;
6064 return true;
6065 }
6066
6067 auto *Init = const_cast<Constant *>(GV->getInitializer());
6068 if (auto *ArrayInit = dyn_cast<ConstantDataArray>(Val: Init)) {
6069 Type *InitElTy = ArrayInit->getElementType();
6070 if (InitElTy->isIntegerTy(Bitwidth: ElementSize)) {
6071 // If Init is an initializer for an array of the expected type
6072 // and size, use it as is.
6073 Array = ArrayInit;
6074 ArrayTy = ArrayInit->getType();
6075 }
6076 }
6077
6078 if (!Array) {
6079 if (ElementSize != 8)
6080 // TODO: Handle conversions to larger integral types.
6081 return false;
6082
6083 // Otherwise extract the portion of the initializer starting
6084 // at Offset as an array of bytes, and reset Offset.
6085 Init = ReadByteArrayFromGlobal(GV, Offset);
6086 if (!Init)
6087 return false;
6088
6089 Offset = 0;
6090 Array = dyn_cast<ConstantDataArray>(Val: Init);
6091 ArrayTy = dyn_cast<ArrayType>(Val: Init->getType());
6092 }
6093
6094 uint64_t NumElts = ArrayTy->getArrayNumElements();
6095 if (Offset > NumElts)
6096 return false;
6097
6098 Slice.Array = Array;
6099 Slice.Offset = Offset;
6100 Slice.Length = NumElts - Offset;
6101 return true;
6102}
6103
6104/// Extract bytes from the initializer of the constant array V, which need
6105/// not be a nul-terminated string. On success, store the bytes in Str and
6106/// return true. When TrimAtNul is set, Str will contain only the bytes up
6107/// to but not including the first nul. Return false on failure.
6108bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
6109 bool TrimAtNul) {
6110 ConstantDataArraySlice Slice;
6111 if (!getConstantDataArrayInfo(V, Slice, ElementSize: 8))
6112 return false;
6113
6114 if (Slice.Array == nullptr) {
6115 if (TrimAtNul) {
6116 // Return a nul-terminated string even for an empty Slice. This is
6117 // safe because all existing SimplifyLibcalls callers require string
6118 // arguments and the behavior of the functions they fold is undefined
6119 // otherwise. Folding the calls this way is preferable to making
6120 // the undefined library calls, even though it prevents sanitizers
6121 // from reporting such calls.
6122 Str = StringRef();
6123 return true;
6124 }
6125 if (Slice.Length == 1) {
6126 Str = StringRef("", 1);
6127 return true;
6128 }
6129 // We cannot instantiate a StringRef as we do not have an appropriate string
6130 // of 0s at hand.
6131 return false;
6132 }
6133
6134 // Start out with the entire array in the StringRef.
6135 Str = Slice.Array->getAsString();
6136 // Skip over 'offset' bytes.
6137 Str = Str.substr(Start: Slice.Offset);
6138
6139 if (TrimAtNul) {
6140 // Trim off the \0 and anything after it. If the array is not nul
6141 // terminated, we just return the whole end of string. The client may know
6142 // some other way that the string is length-bound.
6143 Str = Str.substr(Start: 0, N: Str.find(C: '\0'));
6144 }
6145 return true;
6146}
6147
6148// These next two are very similar to the above, but also look through PHI
6149// nodes.
6150// TODO: See if we can integrate these two together.
6151
6152/// If we can compute the length of the string pointed to by
6153/// the specified pointer, return 'len+1'. If we can't, return 0.
6154static uint64_t GetStringLengthH(const Value *V,
6155 SmallPtrSetImpl<const PHINode*> &PHIs,
6156 unsigned CharSize) {
6157 // Look through noop bitcast instructions.
6158 V = V->stripPointerCasts();
6159
6160 // If this is a PHI node, there are two cases: either we have already seen it
6161 // or we haven't.
6162 if (const PHINode *PN = dyn_cast<PHINode>(Val: V)) {
6163 if (!PHIs.insert(Ptr: PN).second)
6164 return ~0ULL; // already in the set.
6165
6166 // If it was new, see if all the input strings are the same length.
6167 uint64_t LenSoFar = ~0ULL;
6168 for (Value *IncValue : PN->incoming_values()) {
6169 uint64_t Len = GetStringLengthH(V: IncValue, PHIs, CharSize);
6170 if (Len == 0) return 0; // Unknown length -> unknown.
6171
6172 if (Len == ~0ULL) continue;
6173
6174 if (Len != LenSoFar && LenSoFar != ~0ULL)
6175 return 0; // Disagree -> unknown.
6176 LenSoFar = Len;
6177 }
6178
6179 // Success, all agree.
6180 return LenSoFar;
6181 }
6182
6183 // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
6184 if (const SelectInst *SI = dyn_cast<SelectInst>(Val: V)) {
6185 uint64_t Len1 = GetStringLengthH(V: SI->getTrueValue(), PHIs, CharSize);
6186 if (Len1 == 0) return 0;
6187 uint64_t Len2 = GetStringLengthH(V: SI->getFalseValue(), PHIs, CharSize);
6188 if (Len2 == 0) return 0;
6189 if (Len1 == ~0ULL) return Len2;
6190 if (Len2 == ~0ULL) return Len1;
6191 if (Len1 != Len2) return 0;
6192 return Len1;
6193 }
6194
6195 // Otherwise, see if we can read the string.
6196 ConstantDataArraySlice Slice;
6197 if (!getConstantDataArrayInfo(V, Slice, ElementSize: CharSize))
6198 return 0;
6199
6200 if (Slice.Array == nullptr)
6201 // Zeroinitializer (including an empty one).
6202 return 1;
6203
6204 // Search for the first nul character. Return a conservative result even
6205 // when there is no nul. This is safe since otherwise the string function
6206 // being folded such as strlen is undefined, and can be preferable to
6207 // making the undefined library call.
6208 unsigned NullIndex = 0;
6209 for (unsigned E = Slice.Length; NullIndex < E; ++NullIndex) {
6210 if (Slice.Array->getElementAsInteger(i: Slice.Offset + NullIndex) == 0)
6211 break;
6212 }
6213
6214 return NullIndex + 1;
6215}
6216
6217/// If we can compute the length of the string pointed to by
6218/// the specified pointer, return 'len+1'. If we can't, return 0.
6219uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) {
6220 if (!V->getType()->isPointerTy())
6221 return 0;
6222
6223 SmallPtrSet<const PHINode*, 32> PHIs;
6224 uint64_t Len = GetStringLengthH(V, PHIs, CharSize);
6225 // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
6226 // an empty string as a length.
6227 return Len == ~0ULL ? 1 : Len;
6228}
6229
6230const Value *
6231llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call,
6232 bool MustPreserveNullness) {
6233 assert(Call &&
6234 "getArgumentAliasingToReturnedPointer only works on nonnull calls");
6235 if (const Value *RV = Call->getReturnedArgOperand())
6236 return RV;
6237 // This can be used only as a aliasing property.
6238 if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
6239 Call, MustPreserveNullness))
6240 return Call->getArgOperand(i: 0);
6241 return nullptr;
6242}
6243
6244bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
6245 const CallBase *Call, bool MustPreserveNullness) {
6246 switch (Call->getIntrinsicID()) {
6247 case Intrinsic::launder_invariant_group:
6248 case Intrinsic::strip_invariant_group:
6249 case Intrinsic::aarch64_irg:
6250 case Intrinsic::aarch64_tagp:
6251 // The amdgcn_make_buffer_rsrc function does not alter the address of the
6252 // input pointer (and thus preserve null-ness for the purposes of escape
6253 // analysis, which is where the MustPreserveNullness flag comes in to play).
6254 // However, it will not necessarily map ptr addrspace(N) null to ptr
6255 // addrspace(8) null, aka the "null descriptor", which has "all loads return
6256 // 0, all stores are dropped" semantics. Given the context of this intrinsic
6257 // list, no one should be relying on such a strict interpretation of
6258 // MustPreserveNullness (and, at time of writing, they are not), but we
6259 // document this fact out of an abundance of caution.
6260 case Intrinsic::amdgcn_make_buffer_rsrc:
6261 return true;
6262 case Intrinsic::ptrmask:
6263 return !MustPreserveNullness;
6264 case Intrinsic::threadlocal_address:
6265 // The underlying variable changes with thread ID. The Thread ID may change
6266 // at coroutine suspend points.
6267 return !Call->getParent()->getParent()->isPresplitCoroutine();
6268 default:
6269 return false;
6270 }
6271}
6272
6273/// \p PN defines a loop-variant pointer to an object. Check if the
6274/// previous iteration of the loop was referring to the same object as \p PN.
6275static bool isSameUnderlyingObjectInLoop(const PHINode *PN,
6276 const LoopInfo *LI) {
6277 // Find the loop-defined value.
6278 Loop *L = LI->getLoopFor(BB: PN->getParent());
6279 if (PN->getNumIncomingValues() != 2)
6280 return true;
6281
6282 // Find the value from previous iteration.
6283 auto *PrevValue = dyn_cast<Instruction>(Val: PN->getIncomingValue(i: 0));
6284 if (!PrevValue || LI->getLoopFor(BB: PrevValue->getParent()) != L)
6285 PrevValue = dyn_cast<Instruction>(Val: PN->getIncomingValue(i: 1));
6286 if (!PrevValue || LI->getLoopFor(BB: PrevValue->getParent()) != L)
6287 return true;
6288
6289 // If a new pointer is loaded in the loop, the pointer references a different
6290 // object in every iteration. E.g.:
6291 // for (i)
6292 // int *p = a[i];
6293 // ...
6294 if (auto *Load = dyn_cast<LoadInst>(Val: PrevValue))
6295 if (!L->isLoopInvariant(V: Load->getPointerOperand()))
6296 return false;
6297 return true;
6298}
6299
6300const Value *llvm::getUnderlyingObject(const Value *V, unsigned MaxLookup) {
6301 if (!V->getType()->isPointerTy())
6302 return V;
6303 for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
6304 if (auto *GEP = dyn_cast<GEPOperator>(Val: V)) {
6305 V = GEP->getPointerOperand();
6306 } else if (Operator::getOpcode(V) == Instruction::BitCast ||
6307 Operator::getOpcode(V) == Instruction::AddrSpaceCast) {
6308 V = cast<Operator>(Val: V)->getOperand(i: 0);
6309 if (!V->getType()->isPointerTy())
6310 return V;
6311 } else if (auto *GA = dyn_cast<GlobalAlias>(Val: V)) {
6312 if (GA->isInterposable())
6313 return V;
6314 V = GA->getAliasee();
6315 } else {
6316 if (auto *PHI = dyn_cast<PHINode>(Val: V)) {
6317 // Look through single-arg phi nodes created by LCSSA.
6318 if (PHI->getNumIncomingValues() == 1) {
6319 V = PHI->getIncomingValue(i: 0);
6320 continue;
6321 }
6322 } else if (auto *Call = dyn_cast<CallBase>(Val: V)) {
6323 // CaptureTracking can know about special capturing properties of some
6324 // intrinsics like launder.invariant.group, that can't be expressed with
6325 // the attributes, but have properties like returning aliasing pointer.
6326 // Because some analysis may assume that nocaptured pointer is not
6327 // returned from some special intrinsic (because function would have to
6328 // be marked with returns attribute), it is crucial to use this function
6329 // because it should be in sync with CaptureTracking. Not using it may
6330 // cause weird miscompilations where 2 aliasing pointers are assumed to
6331 // noalias.
6332 if (auto *RP = getArgumentAliasingToReturnedPointer(Call, MustPreserveNullness: false)) {
6333 V = RP;
6334 continue;
6335 }
6336 }
6337
6338 return V;
6339 }
6340 assert(V->getType()->isPointerTy() && "Unexpected operand type!");
6341 }
6342 return V;
6343}
6344
6345void llvm::getUnderlyingObjects(const Value *V,
6346 SmallVectorImpl<const Value *> &Objects,
6347 LoopInfo *LI, unsigned MaxLookup) {
6348 SmallPtrSet<const Value *, 4> Visited;
6349 SmallVector<const Value *, 4> Worklist;
6350 Worklist.push_back(Elt: V);
6351 do {
6352 const Value *P = Worklist.pop_back_val();
6353 P = getUnderlyingObject(V: P, MaxLookup);
6354
6355 if (!Visited.insert(Ptr: P).second)
6356 continue;
6357
6358 if (auto *SI = dyn_cast<SelectInst>(Val: P)) {
6359 Worklist.push_back(Elt: SI->getTrueValue());
6360 Worklist.push_back(Elt: SI->getFalseValue());
6361 continue;
6362 }
6363
6364 if (auto *PN = dyn_cast<PHINode>(Val: P)) {
6365 // If this PHI changes the underlying object in every iteration of the
6366 // loop, don't look through it. Consider:
6367 // int **A;
6368 // for (i) {
6369 // Prev = Curr; // Prev = PHI (Prev_0, Curr)
6370 // Curr = A[i];
6371 // *Prev, *Curr;
6372 //
6373 // Prev is tracking Curr one iteration behind so they refer to different
6374 // underlying objects.
6375 if (!LI || !LI->isLoopHeader(BB: PN->getParent()) ||
6376 isSameUnderlyingObjectInLoop(PN, LI))
6377 append_range(C&: Worklist, R: PN->incoming_values());
6378 else
6379 Objects.push_back(Elt: P);
6380 continue;
6381 }
6382
6383 Objects.push_back(Elt: P);
6384 } while (!Worklist.empty());
6385}
6386
6387/// This is the function that does the work of looking through basic
6388/// ptrtoint+arithmetic+inttoptr sequences.
6389static const Value *getUnderlyingObjectFromInt(const Value *V) {
6390 do {
6391 if (const Operator *U = dyn_cast<Operator>(Val: V)) {
6392 // If we find a ptrtoint, we can transfer control back to the
6393 // regular getUnderlyingObjectFromInt.
6394 if (U->getOpcode() == Instruction::PtrToInt)
6395 return U->getOperand(i: 0);
6396 // If we find an add of a constant, a multiplied value, or a phi, it's
6397 // likely that the other operand will lead us to the base
6398 // object. We don't have to worry about the case where the
6399 // object address is somehow being computed by the multiply,
6400 // because our callers only care when the result is an
6401 // identifiable object.
6402 if (U->getOpcode() != Instruction::Add ||
6403 (!isa<ConstantInt>(Val: U->getOperand(i: 1)) &&
6404 Operator::getOpcode(V: U->getOperand(i: 1)) != Instruction::Mul &&
6405 !isa<PHINode>(Val: U->getOperand(i: 1))))
6406 return V;
6407 V = U->getOperand(i: 0);
6408 } else {
6409 return V;
6410 }
6411 assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
6412 } while (true);
6413}
6414
6415/// This is a wrapper around getUnderlyingObjects and adds support for basic
6416/// ptrtoint+arithmetic+inttoptr sequences.
6417/// It returns false if unidentified object is found in getUnderlyingObjects.
6418bool llvm::getUnderlyingObjectsForCodeGen(const Value *V,
6419 SmallVectorImpl<Value *> &Objects) {
6420 SmallPtrSet<const Value *, 16> Visited;
6421 SmallVector<const Value *, 4> Working(1, V);
6422 do {
6423 V = Working.pop_back_val();
6424
6425 SmallVector<const Value *, 4> Objs;
6426 getUnderlyingObjects(V, Objects&: Objs);
6427
6428 for (const Value *V : Objs) {
6429 if (!Visited.insert(Ptr: V).second)
6430 continue;
6431 if (Operator::getOpcode(V) == Instruction::IntToPtr) {
6432 const Value *O =
6433 getUnderlyingObjectFromInt(V: cast<User>(Val: V)->getOperand(i: 0));
6434 if (O->getType()->isPointerTy()) {
6435 Working.push_back(Elt: O);
6436 continue;
6437 }
6438 }
6439 // If getUnderlyingObjects fails to find an identifiable object,
6440 // getUnderlyingObjectsForCodeGen also fails for safety.
6441 if (!isIdentifiedObject(V)) {
6442 Objects.clear();
6443 return false;
6444 }
6445 Objects.push_back(Elt: const_cast<Value *>(V));
6446 }
6447 } while (!Working.empty());
6448 return true;
6449}
6450
6451AllocaInst *llvm::findAllocaForValue(Value *V, bool OffsetZero) {
6452 AllocaInst *Result = nullptr;
6453 SmallPtrSet<Value *, 4> Visited;
6454 SmallVector<Value *, 4> Worklist;
6455
6456 auto AddWork = [&](Value *V) {
6457 if (Visited.insert(Ptr: V).second)
6458 Worklist.push_back(Elt: V);
6459 };
6460
6461 AddWork(V);
6462 do {
6463 V = Worklist.pop_back_val();
6464 assert(Visited.count(V));
6465
6466 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val: V)) {
6467 if (Result && Result != AI)
6468 return nullptr;
6469 Result = AI;
6470 } else if (CastInst *CI = dyn_cast<CastInst>(Val: V)) {
6471 AddWork(CI->getOperand(i_nocapture: 0));
6472 } else if (PHINode *PN = dyn_cast<PHINode>(Val: V)) {
6473 for (Value *IncValue : PN->incoming_values())
6474 AddWork(IncValue);
6475 } else if (auto *SI = dyn_cast<SelectInst>(Val: V)) {
6476 AddWork(SI->getTrueValue());
6477 AddWork(SI->getFalseValue());
6478 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Val: V)) {
6479 if (OffsetZero && !GEP->hasAllZeroIndices())
6480 return nullptr;
6481 AddWork(GEP->getPointerOperand());
6482 } else if (CallBase *CB = dyn_cast<CallBase>(Val: V)) {
6483 Value *Returned = CB->getReturnedArgOperand();
6484 if (Returned)
6485 AddWork(Returned);
6486 else
6487 return nullptr;
6488 } else {
6489 return nullptr;
6490 }
6491 } while (!Worklist.empty());
6492
6493 return Result;
6494}
6495
6496static bool onlyUsedByLifetimeMarkersOrDroppableInstsHelper(
6497 const Value *V, bool AllowLifetime, bool AllowDroppable) {
6498 for (const User *U : V->users()) {
6499 const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: U);
6500 if (!II)
6501 return false;
6502
6503 if (AllowLifetime && II->isLifetimeStartOrEnd())
6504 continue;
6505
6506 if (AllowDroppable && II->isDroppable())
6507 continue;
6508
6509 return false;
6510 }
6511 return true;
6512}
6513
6514bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
6515 return onlyUsedByLifetimeMarkersOrDroppableInstsHelper(
6516 V, /* AllowLifetime */ true, /* AllowDroppable */ false);
6517}
6518bool llvm::onlyUsedByLifetimeMarkersOrDroppableInsts(const Value *V) {
6519 return onlyUsedByLifetimeMarkersOrDroppableInstsHelper(
6520 V, /* AllowLifetime */ true, /* AllowDroppable */ true);
6521}
6522
6523bool llvm::mustSuppressSpeculation(const LoadInst &LI) {
6524 if (!LI.isUnordered())
6525 return true;
6526 const Function &F = *LI.getFunction();
6527 // Speculative load may create a race that did not exist in the source.
6528 return F.hasFnAttribute(Attribute::SanitizeThread) ||
6529 // Speculative load may load data from dirty regions.
6530 F.hasFnAttribute(Attribute::SanitizeAddress) ||
6531 F.hasFnAttribute(Attribute::SanitizeHWAddress);
6532}
6533
6534bool llvm::isSafeToSpeculativelyExecute(const Instruction *Inst,
6535 const Instruction *CtxI,
6536 AssumptionCache *AC,
6537 const DominatorTree *DT,
6538 const TargetLibraryInfo *TLI) {
6539 return isSafeToSpeculativelyExecuteWithOpcode(Opcode: Inst->getOpcode(), Inst, CtxI,
6540 AC, DT, TLI);
6541}
6542
6543bool llvm::isSafeToSpeculativelyExecuteWithOpcode(
6544 unsigned Opcode, const Instruction *Inst, const Instruction *CtxI,
6545 AssumptionCache *AC, const DominatorTree *DT,
6546 const TargetLibraryInfo *TLI) {
6547#ifndef NDEBUG
6548 if (Inst->getOpcode() != Opcode) {
6549 // Check that the operands are actually compatible with the Opcode override.
6550 auto hasEqualReturnAndLeadingOperandTypes =
6551 [](const Instruction *Inst, unsigned NumLeadingOperands) {
6552 if (Inst->getNumOperands() < NumLeadingOperands)
6553 return false;
6554 const Type *ExpectedType = Inst->getType();
6555 for (unsigned ItOp = 0; ItOp < NumLeadingOperands; ++ItOp)
6556 if (Inst->getOperand(i: ItOp)->getType() != ExpectedType)
6557 return false;
6558 return true;
6559 };
6560 assert(!Instruction::isBinaryOp(Opcode) ||
6561 hasEqualReturnAndLeadingOperandTypes(Inst, 2));
6562 assert(!Instruction::isUnaryOp(Opcode) ||
6563 hasEqualReturnAndLeadingOperandTypes(Inst, 1));
6564 }
6565#endif
6566
6567 switch (Opcode) {
6568 default:
6569 return true;
6570 case Instruction::UDiv:
6571 case Instruction::URem: {
6572 // x / y is undefined if y == 0.
6573 const APInt *V;
6574 if (match(V: Inst->getOperand(i: 1), P: m_APInt(Res&: V)))
6575 return *V != 0;
6576 return false;
6577 }
6578 case Instruction::SDiv:
6579 case Instruction::SRem: {
6580 // x / y is undefined if y == 0 or x == INT_MIN and y == -1
6581 const APInt *Numerator, *Denominator;
6582 if (!match(V: Inst->getOperand(i: 1), P: m_APInt(Res&: Denominator)))
6583 return false;
6584 // We cannot hoist this division if the denominator is 0.
6585 if (*Denominator == 0)
6586 return false;
6587 // It's safe to hoist if the denominator is not 0 or -1.
6588 if (!Denominator->isAllOnes())
6589 return true;
6590 // At this point we know that the denominator is -1. It is safe to hoist as
6591 // long we know that the numerator is not INT_MIN.
6592 if (match(V: Inst->getOperand(i: 0), P: m_APInt(Res&: Numerator)))
6593 return !Numerator->isMinSignedValue();
6594 // The numerator *might* be MinSignedValue.
6595 return false;
6596 }
6597 case Instruction::Load: {
6598 const LoadInst *LI = dyn_cast<LoadInst>(Val: Inst);
6599 if (!LI)
6600 return false;
6601 if (mustSuppressSpeculation(LI: *LI))
6602 return false;
6603 const DataLayout &DL = LI->getModule()->getDataLayout();
6604 return isDereferenceableAndAlignedPointer(V: LI->getPointerOperand(),
6605 Ty: LI->getType(), Alignment: LI->getAlign(), DL,
6606 CtxI, AC, DT, TLI);
6607 }
6608 case Instruction::Call: {
6609 auto *CI = dyn_cast<const CallInst>(Val: Inst);
6610 if (!CI)
6611 return false;
6612 const Function *Callee = CI->getCalledFunction();
6613
6614 // The called function could have undefined behavior or side-effects, even
6615 // if marked readnone nounwind.
6616 return Callee && Callee->isSpeculatable();
6617 }
6618 case Instruction::VAArg:
6619 case Instruction::Alloca:
6620 case Instruction::Invoke:
6621 case Instruction::CallBr:
6622 case Instruction::PHI:
6623 case Instruction::Store:
6624 case Instruction::Ret:
6625 case Instruction::Br:
6626 case Instruction::IndirectBr:
6627 case Instruction::Switch:
6628 case Instruction::Unreachable:
6629 case Instruction::Fence:
6630 case Instruction::AtomicRMW:
6631 case Instruction::AtomicCmpXchg:
6632 case Instruction::LandingPad:
6633 case Instruction::Resume:
6634 case Instruction::CatchSwitch:
6635 case Instruction::CatchPad:
6636 case Instruction::CatchRet:
6637 case Instruction::CleanupPad:
6638 case Instruction::CleanupRet:
6639 return false; // Misc instructions which have effects
6640 }
6641}
6642
6643bool llvm::mayHaveNonDefUseDependency(const Instruction &I) {
6644 if (I.mayReadOrWriteMemory())
6645 // Memory dependency possible
6646 return true;
6647 if (!isSafeToSpeculativelyExecute(Inst: &I))
6648 // Can't move above a maythrow call or infinite loop. Or if an
6649 // inalloca alloca, above a stacksave call.
6650 return true;
6651 if (!isGuaranteedToTransferExecutionToSuccessor(I: &I))
6652 // 1) Can't reorder two inf-loop calls, even if readonly
6653 // 2) Also can't reorder an inf-loop call below a instruction which isn't
6654 // safe to speculative execute. (Inverse of above)
6655 return true;
6656 return false;
6657}
6658
6659/// Convert ConstantRange OverflowResult into ValueTracking OverflowResult.
6660static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) {
6661 switch (OR) {
6662 case ConstantRange::OverflowResult::MayOverflow:
6663 return OverflowResult::MayOverflow;
6664 case ConstantRange::OverflowResult::AlwaysOverflowsLow:
6665 return OverflowResult::AlwaysOverflowsLow;
6666 case ConstantRange::OverflowResult::AlwaysOverflowsHigh:
6667 return OverflowResult::AlwaysOverflowsHigh;
6668 case ConstantRange::OverflowResult::NeverOverflows:
6669 return OverflowResult::NeverOverflows;
6670 }
6671 llvm_unreachable("Unknown OverflowResult");
6672}
6673
6674/// Combine constant ranges from computeConstantRange() and computeKnownBits().
6675ConstantRange
6676llvm::computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V,
6677 bool ForSigned,
6678 const SimplifyQuery &SQ) {
6679 ConstantRange CR1 =
6680 ConstantRange::fromKnownBits(Known: V.getKnownBits(Q: SQ), IsSigned: ForSigned);
6681 ConstantRange CR2 = computeConstantRange(V, ForSigned, UseInstrInfo: SQ.IIQ.UseInstrInfo);
6682 ConstantRange::PreferredRangeType RangeType =
6683 ForSigned ? ConstantRange::Signed : ConstantRange::Unsigned;
6684 return CR1.intersectWith(CR: CR2, Type: RangeType);
6685}
6686
6687OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
6688 const Value *RHS,
6689 const SimplifyQuery &SQ) {
6690 KnownBits LHSKnown = computeKnownBits(V: LHS, /*Depth=*/0, Q: SQ);
6691 KnownBits RHSKnown = computeKnownBits(V: RHS, /*Depth=*/0, Q: SQ);
6692 ConstantRange LHSRange = ConstantRange::fromKnownBits(Known: LHSKnown, IsSigned: false);
6693 ConstantRange RHSRange = ConstantRange::fromKnownBits(Known: RHSKnown, IsSigned: false);
6694 return mapOverflowResult(OR: LHSRange.unsignedMulMayOverflow(Other: RHSRange));
6695}
6696
6697OverflowResult llvm::computeOverflowForSignedMul(const Value *LHS,
6698 const Value *RHS,
6699 const SimplifyQuery &SQ) {
6700 // Multiplying n * m significant bits yields a result of n + m significant
6701 // bits. If the total number of significant bits does not exceed the
6702 // result bit width (minus 1), there is no overflow.
6703 // This means if we have enough leading sign bits in the operands
6704 // we can guarantee that the result does not overflow.
6705 // Ref: "Hacker's Delight" by Henry Warren
6706 unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
6707
6708 // Note that underestimating the number of sign bits gives a more
6709 // conservative answer.
6710 unsigned SignBits =
6711 ::ComputeNumSignBits(V: LHS, Depth: 0, Q: SQ) + ::ComputeNumSignBits(V: RHS, Depth: 0, Q: SQ);
6712
6713 // First handle the easy case: if we have enough sign bits there's
6714 // definitely no overflow.
6715 if (SignBits > BitWidth + 1)
6716 return OverflowResult::NeverOverflows;
6717
6718 // There are two ambiguous cases where there can be no overflow:
6719 // SignBits == BitWidth + 1 and
6720 // SignBits == BitWidth
6721 // The second case is difficult to check, therefore we only handle the
6722 // first case.
6723 if (SignBits == BitWidth + 1) {
6724 // It overflows only when both arguments are negative and the true
6725 // product is exactly the minimum negative number.
6726 // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000
6727 // For simplicity we just check if at least one side is not negative.
6728 KnownBits LHSKnown = computeKnownBits(V: LHS, /*Depth=*/0, Q: SQ);
6729 KnownBits RHSKnown = computeKnownBits(V: RHS, /*Depth=*/0, Q: SQ);
6730 if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative())
6731 return OverflowResult::NeverOverflows;
6732 }
6733 return OverflowResult::MayOverflow;
6734}
6735
6736OverflowResult
6737llvm::computeOverflowForUnsignedAdd(const WithCache<const Value *> &LHS,
6738 const WithCache<const Value *> &RHS,
6739 const SimplifyQuery &SQ) {
6740 ConstantRange LHSRange =
6741 computeConstantRangeIncludingKnownBits(V: LHS, /*ForSigned=*/false, SQ);
6742 ConstantRange RHSRange =
6743 computeConstantRangeIncludingKnownBits(V: RHS, /*ForSigned=*/false, SQ);
6744 return mapOverflowResult(OR: LHSRange.unsignedAddMayOverflow(Other: RHSRange));
6745}
6746
6747static OverflowResult
6748computeOverflowForSignedAdd(const WithCache<const Value *> &LHS,
6749 const WithCache<const Value *> &RHS,
6750 const AddOperator *Add, const SimplifyQuery &SQ) {
6751 if (Add && Add->hasNoSignedWrap()) {
6752 return OverflowResult::NeverOverflows;
6753 }
6754
6755 // If LHS and RHS each have at least two sign bits, the addition will look
6756 // like
6757 //
6758 // XX..... +
6759 // YY.....
6760 //
6761 // If the carry into the most significant position is 0, X and Y can't both
6762 // be 1 and therefore the carry out of the addition is also 0.
6763 //
6764 // If the carry into the most significant position is 1, X and Y can't both
6765 // be 0 and therefore the carry out of the addition is also 1.
6766 //
6767 // Since the carry into the most significant position is always equal to
6768 // the carry out of the addition, there is no signed overflow.
6769 if (::ComputeNumSignBits(V: LHS, Depth: 0, Q: SQ) > 1 &&
6770 ::ComputeNumSignBits(V: RHS, Depth: 0, Q: SQ) > 1)
6771 return OverflowResult::NeverOverflows;
6772
6773 ConstantRange LHSRange =
6774 computeConstantRangeIncludingKnownBits(V: LHS, /*ForSigned=*/true, SQ);
6775 ConstantRange RHSRange =
6776 computeConstantRangeIncludingKnownBits(V: RHS, /*ForSigned=*/true, SQ);
6777 OverflowResult OR =
6778 mapOverflowResult(OR: LHSRange.signedAddMayOverflow(Other: RHSRange));
6779 if (OR != OverflowResult::MayOverflow)
6780 return OR;
6781
6782 // The remaining code needs Add to be available. Early returns if not so.
6783 if (!Add)
6784 return OverflowResult::MayOverflow;
6785
6786 // If the sign of Add is the same as at least one of the operands, this add
6787 // CANNOT overflow. If this can be determined from the known bits of the
6788 // operands the above signedAddMayOverflow() check will have already done so.
6789 // The only other way to improve on the known bits is from an assumption, so
6790 // call computeKnownBitsFromContext() directly.
6791 bool LHSOrRHSKnownNonNegative =
6792 (LHSRange.isAllNonNegative() || RHSRange.isAllNonNegative());
6793 bool LHSOrRHSKnownNegative =
6794 (LHSRange.isAllNegative() || RHSRange.isAllNegative());
6795 if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) {
6796 KnownBits AddKnown(LHSRange.getBitWidth());
6797 computeKnownBitsFromContext(V: Add, Known&: AddKnown, /*Depth=*/0, Q: SQ);
6798 if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) ||
6799 (AddKnown.isNegative() && LHSOrRHSKnownNegative))
6800 return OverflowResult::NeverOverflows;
6801 }
6802
6803 return OverflowResult::MayOverflow;
6804}
6805
6806OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS,
6807 const Value *RHS,
6808 const SimplifyQuery &SQ) {
6809 // X - (X % ?)
6810 // The remainder of a value can't have greater magnitude than itself,
6811 // so the subtraction can't overflow.
6812
6813 // X - (X -nuw ?)
6814 // In the minimal case, this would simplify to "?", so there's no subtract
6815 // at all. But if this analysis is used to peek through casts, for example,
6816 // then determining no-overflow may allow other transforms.
6817
6818 // TODO: There are other patterns like this.
6819 // See simplifyICmpWithBinOpOnLHS() for candidates.
6820 if (match(V: RHS, P: m_URem(L: m_Specific(V: LHS), R: m_Value())) ||
6821 match(V: RHS, P: m_NUWSub(L: m_Specific(V: LHS), R: m_Value())))
6822 if (isGuaranteedNotToBeUndef(V: LHS, AC: SQ.AC, CtxI: SQ.CxtI, DT: SQ.DT))
6823 return OverflowResult::NeverOverflows;
6824
6825 // Checking for conditions implied by dominating conditions may be expensive.
6826 // Limit it to usub_with_overflow calls for now.
6827 if (match(SQ.CxtI,
6828 m_Intrinsic<Intrinsic::usub_with_overflow>(m_Value(), m_Value())))
6829 if (auto C = isImpliedByDomCondition(Pred: CmpInst::ICMP_UGE, LHS, RHS, ContextI: SQ.CxtI,
6830 DL: SQ.DL)) {
6831 if (*C)
6832 return OverflowResult::NeverOverflows;
6833 return OverflowResult::AlwaysOverflowsLow;
6834 }
6835 ConstantRange LHSRange =
6836 computeConstantRangeIncludingKnownBits(V: LHS, /*ForSigned=*/false, SQ);
6837 ConstantRange RHSRange =
6838 computeConstantRangeIncludingKnownBits(V: RHS, /*ForSigned=*/false, SQ);
6839 return mapOverflowResult(OR: LHSRange.unsignedSubMayOverflow(Other: RHSRange));
6840}
6841
6842OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS,
6843 const Value *RHS,
6844 const SimplifyQuery &SQ) {
6845 // X - (X % ?)
6846 // The remainder of a value can't have greater magnitude than itself,
6847 // so the subtraction can't overflow.
6848
6849 // X - (X -nsw ?)
6850 // In the minimal case, this would simplify to "?", so there's no subtract
6851 // at all. But if this analysis is used to peek through casts, for example,
6852 // then determining no-overflow may allow other transforms.
6853 if (match(V: RHS, P: m_SRem(L: m_Specific(V: LHS), R: m_Value())) ||
6854 match(V: RHS, P: m_NSWSub(L: m_Specific(V: LHS), R: m_Value())))
6855 if (isGuaranteedNotToBeUndef(V: LHS, AC: SQ.AC, CtxI: SQ.CxtI, DT: SQ.DT))
6856 return OverflowResult::NeverOverflows;
6857
6858 // If LHS and RHS each have at least two sign bits, the subtraction
6859 // cannot overflow.
6860 if (::ComputeNumSignBits(V: LHS, Depth: 0, Q: SQ) > 1 &&
6861 ::ComputeNumSignBits(V: RHS, Depth: 0, Q: SQ) > 1)
6862 return OverflowResult::NeverOverflows;
6863
6864 ConstantRange LHSRange =
6865 computeConstantRangeIncludingKnownBits(V: LHS, /*ForSigned=*/true, SQ);
6866 ConstantRange RHSRange =
6867 computeConstantRangeIncludingKnownBits(V: RHS, /*ForSigned=*/true, SQ);
6868 return mapOverflowResult(OR: LHSRange.signedSubMayOverflow(Other: RHSRange));
6869}
6870
6871bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO,
6872 const DominatorTree &DT) {
6873 SmallVector<const BranchInst *, 2> GuardingBranches;
6874 SmallVector<const ExtractValueInst *, 2> Results;
6875
6876 for (const User *U : WO->users()) {
6877 if (const auto *EVI = dyn_cast<ExtractValueInst>(Val: U)) {
6878 assert(EVI->getNumIndices() == 1 && "Obvious from CI's type");
6879
6880 if (EVI->getIndices()[0] == 0)
6881 Results.push_back(Elt: EVI);
6882 else {
6883 assert(EVI->getIndices()[0] == 1 && "Obvious from CI's type");
6884
6885 for (const auto *U : EVI->users())
6886 if (const auto *B = dyn_cast<BranchInst>(Val: U)) {
6887 assert(B->isConditional() && "How else is it using an i1?");
6888 GuardingBranches.push_back(Elt: B);
6889 }
6890 }
6891 } else {
6892 // We are using the aggregate directly in a way we don't want to analyze
6893 // here (storing it to a global, say).
6894 return false;
6895 }
6896 }
6897
6898 auto AllUsesGuardedByBranch = [&](const BranchInst *BI) {
6899 BasicBlockEdge NoWrapEdge(BI->getParent(), BI->getSuccessor(i: 1));
6900 if (!NoWrapEdge.isSingleEdge())
6901 return false;
6902
6903 // Check if all users of the add are provably no-wrap.
6904 for (const auto *Result : Results) {
6905 // If the extractvalue itself is not executed on overflow, the we don't
6906 // need to check each use separately, since domination is transitive.
6907 if (DT.dominates(BBE: NoWrapEdge, BB: Result->getParent()))
6908 continue;
6909
6910 for (const auto &RU : Result->uses())
6911 if (!DT.dominates(BBE: NoWrapEdge, U: RU))
6912 return false;
6913 }
6914
6915 return true;
6916 };
6917
6918 return llvm::any_of(Range&: GuardingBranches, P: AllUsesGuardedByBranch);
6919}
6920
6921/// Shifts return poison if shiftwidth is larger than the bitwidth.
6922static bool shiftAmountKnownInRange(const Value *ShiftAmount) {
6923 auto *C = dyn_cast<Constant>(Val: ShiftAmount);
6924 if (!C)
6925 return false;
6926
6927 // Shifts return poison if shiftwidth is larger than the bitwidth.
6928 SmallVector<const Constant *, 4> ShiftAmounts;
6929 if (auto *FVTy = dyn_cast<FixedVectorType>(Val: C->getType())) {
6930 unsigned NumElts = FVTy->getNumElements();
6931 for (unsigned i = 0; i < NumElts; ++i)
6932 ShiftAmounts.push_back(Elt: C->getAggregateElement(Elt: i));
6933 } else if (isa<ScalableVectorType>(Val: C->getType()))
6934 return false; // Can't tell, just return false to be safe
6935 else
6936 ShiftAmounts.push_back(Elt: C);
6937
6938 bool Safe = llvm::all_of(Range&: ShiftAmounts, P: [](const Constant *C) {
6939 auto *CI = dyn_cast_or_null<ConstantInt>(Val: C);
6940 return CI && CI->getValue().ult(RHS: C->getType()->getIntegerBitWidth());
6941 });
6942
6943 return Safe;
6944}
6945
6946enum class UndefPoisonKind {
6947 PoisonOnly = (1 << 0),
6948 UndefOnly = (1 << 1),
6949 UndefOrPoison = PoisonOnly | UndefOnly,
6950};
6951
6952static bool includesPoison(UndefPoisonKind Kind) {
6953 return (unsigned(Kind) & unsigned(UndefPoisonKind::PoisonOnly)) != 0;
6954}
6955
6956static bool includesUndef(UndefPoisonKind Kind) {
6957 return (unsigned(Kind) & unsigned(UndefPoisonKind::UndefOnly)) != 0;
6958}
6959
6960static bool canCreateUndefOrPoison(const Operator *Op, UndefPoisonKind Kind,
6961 bool ConsiderFlagsAndMetadata) {
6962
6963 if (ConsiderFlagsAndMetadata && includesPoison(Kind) &&
6964 Op->hasPoisonGeneratingAnnotations())
6965 return true;
6966
6967 unsigned Opcode = Op->getOpcode();
6968
6969 // Check whether opcode is a poison/undef-generating operation
6970 switch (Opcode) {
6971 case Instruction::Shl:
6972 case Instruction::AShr:
6973 case Instruction::LShr:
6974 return includesPoison(Kind) && !shiftAmountKnownInRange(ShiftAmount: Op->getOperand(i: 1));
6975 case Instruction::FPToSI:
6976 case Instruction::FPToUI:
6977 // fptosi/ui yields poison if the resulting value does not fit in the
6978 // destination type.
6979 return true;
6980 case Instruction::Call:
6981 if (auto *II = dyn_cast<IntrinsicInst>(Val: Op)) {
6982 switch (II->getIntrinsicID()) {
6983 // TODO: Add more intrinsics.
6984 case Intrinsic::ctlz:
6985 case Intrinsic::cttz:
6986 case Intrinsic::abs:
6987 if (cast<ConstantInt>(Val: II->getArgOperand(i: 1))->isNullValue())
6988 return false;
6989 break;
6990 case Intrinsic::ctpop:
6991 case Intrinsic::bswap:
6992 case Intrinsic::bitreverse:
6993 case Intrinsic::fshl:
6994 case Intrinsic::fshr:
6995 case Intrinsic::smax:
6996 case Intrinsic::smin:
6997 case Intrinsic::umax:
6998 case Intrinsic::umin:
6999 case Intrinsic::ptrmask:
7000 case Intrinsic::fptoui_sat:
7001 case Intrinsic::fptosi_sat:
7002 case Intrinsic::sadd_with_overflow:
7003 case Intrinsic::ssub_with_overflow:
7004 case Intrinsic::smul_with_overflow:
7005 case Intrinsic::uadd_with_overflow:
7006 case Intrinsic::usub_with_overflow:
7007 case Intrinsic::umul_with_overflow:
7008 case Intrinsic::sadd_sat:
7009 case Intrinsic::uadd_sat:
7010 case Intrinsic::ssub_sat:
7011 case Intrinsic::usub_sat:
7012 return false;
7013 case Intrinsic::sshl_sat:
7014 case Intrinsic::ushl_sat:
7015 return includesPoison(Kind) &&
7016 !shiftAmountKnownInRange(ShiftAmount: II->getArgOperand(i: 1));
7017 case Intrinsic::fma:
7018 case Intrinsic::fmuladd:
7019 case Intrinsic::sqrt:
7020 case Intrinsic::powi:
7021 case Intrinsic::sin:
7022 case Intrinsic::cos:
7023 case Intrinsic::pow:
7024 case Intrinsic::log:
7025 case Intrinsic::log10:
7026 case Intrinsic::log2:
7027 case Intrinsic::exp:
7028 case Intrinsic::exp2:
7029 case Intrinsic::exp10:
7030 case Intrinsic::fabs:
7031 case Intrinsic::copysign:
7032 case Intrinsic::floor:
7033 case Intrinsic::ceil:
7034 case Intrinsic::trunc:
7035 case Intrinsic::rint:
7036 case Intrinsic::nearbyint:
7037 case Intrinsic::round:
7038 case Intrinsic::roundeven:
7039 case Intrinsic::fptrunc_round:
7040 case Intrinsic::canonicalize:
7041 case Intrinsic::arithmetic_fence:
7042 case Intrinsic::minnum:
7043 case Intrinsic::maxnum:
7044 case Intrinsic::minimum:
7045 case Intrinsic::maximum:
7046 case Intrinsic::is_fpclass:
7047 case Intrinsic::ldexp:
7048 case Intrinsic::frexp:
7049 return false;
7050 case Intrinsic::lround:
7051 case Intrinsic::llround:
7052 case Intrinsic::lrint:
7053 case Intrinsic::llrint:
7054 // If the value doesn't fit an unspecified value is returned (but this
7055 // is not poison).
7056 return false;
7057 }
7058 }
7059 [[fallthrough]];
7060 case Instruction::CallBr:
7061 case Instruction::Invoke: {
7062 const auto *CB = cast<CallBase>(Val: Op);
7063 return !CB->hasRetAttr(Attribute::NoUndef);
7064 }
7065 case Instruction::InsertElement:
7066 case Instruction::ExtractElement: {
7067 // If index exceeds the length of the vector, it returns poison
7068 auto *VTy = cast<VectorType>(Val: Op->getOperand(i: 0)->getType());
7069 unsigned IdxOp = Op->getOpcode() == Instruction::InsertElement ? 2 : 1;
7070 auto *Idx = dyn_cast<ConstantInt>(Val: Op->getOperand(i: IdxOp));
7071 if (includesPoison(Kind))
7072 return !Idx ||
7073 Idx->getValue().uge(RHS: VTy->getElementCount().getKnownMinValue());
7074 return false;
7075 }
7076 case Instruction::ShuffleVector: {
7077 ArrayRef<int> Mask = isa<ConstantExpr>(Val: Op)
7078 ? cast<ConstantExpr>(Val: Op)->getShuffleMask()
7079 : cast<ShuffleVectorInst>(Val: Op)->getShuffleMask();
7080 return includesPoison(Kind) && is_contained(Range&: Mask, Element: PoisonMaskElem);
7081 }
7082 case Instruction::FNeg:
7083 case Instruction::PHI:
7084 case Instruction::Select:
7085 case Instruction::URem:
7086 case Instruction::SRem:
7087 case Instruction::ExtractValue:
7088 case Instruction::InsertValue:
7089 case Instruction::Freeze:
7090 case Instruction::ICmp:
7091 case Instruction::FCmp:
7092 case Instruction::FAdd:
7093 case Instruction::FSub:
7094 case Instruction::FMul:
7095 case Instruction::FDiv:
7096 case Instruction::FRem:
7097 return false;
7098 case Instruction::GetElementPtr:
7099 // inbounds is handled above
7100 // TODO: what about inrange on constexpr?
7101 return false;
7102 default: {
7103 const auto *CE = dyn_cast<ConstantExpr>(Val: Op);
7104 if (isa<CastInst>(Val: Op) || (CE && CE->isCast()))
7105 return false;
7106 else if (Instruction::isBinaryOp(Opcode))
7107 return false;
7108 // Be conservative and return true.
7109 return true;
7110 }
7111 }
7112}
7113
7114bool llvm::canCreateUndefOrPoison(const Operator *Op,
7115 bool ConsiderFlagsAndMetadata) {
7116 return ::canCreateUndefOrPoison(Op, Kind: UndefPoisonKind::UndefOrPoison,
7117 ConsiderFlagsAndMetadata);
7118}
7119
7120bool llvm::canCreatePoison(const Operator *Op, bool ConsiderFlagsAndMetadata) {
7121 return ::canCreateUndefOrPoison(Op, Kind: UndefPoisonKind::PoisonOnly,
7122 ConsiderFlagsAndMetadata);
7123}
7124
7125static bool directlyImpliesPoison(const Value *ValAssumedPoison, const Value *V,
7126 unsigned Depth) {
7127 if (ValAssumedPoison == V)
7128 return true;
7129
7130 const unsigned MaxDepth = 2;
7131 if (Depth >= MaxDepth)
7132 return false;
7133
7134 if (const auto *I = dyn_cast<Instruction>(Val: V)) {
7135 if (any_of(Range: I->operands(), P: [=](const Use &Op) {
7136 return propagatesPoison(PoisonOp: Op) &&
7137 directlyImpliesPoison(ValAssumedPoison, V: Op, Depth: Depth + 1);
7138 }))
7139 return true;
7140
7141 // V = extractvalue V0, idx
7142 // V2 = extractvalue V0, idx2
7143 // V0's elements are all poison or not. (e.g., add_with_overflow)
7144 const WithOverflowInst *II;
7145 if (match(V: I, P: m_ExtractValue(V: m_WithOverflowInst(I&: II))) &&
7146 (match(V: ValAssumedPoison, P: m_ExtractValue(V: m_Specific(V: II))) ||
7147 llvm::is_contained(Range: II->args(), Element: ValAssumedPoison)))
7148 return true;
7149 }
7150 return false;
7151}
7152
7153static bool impliesPoison(const Value *ValAssumedPoison, const Value *V,
7154 unsigned Depth) {
7155 if (isGuaranteedNotToBePoison(V: ValAssumedPoison))
7156 return true;
7157
7158 if (directlyImpliesPoison(ValAssumedPoison, V, /* Depth */ 0))
7159 return true;
7160
7161 const unsigned MaxDepth = 2;
7162 if (Depth >= MaxDepth)
7163 return false;
7164
7165 const auto *I = dyn_cast<Instruction>(Val: ValAssumedPoison);
7166 if (I && !canCreatePoison(Op: cast<Operator>(Val: I))) {
7167 return all_of(Range: I->operands(), P: [=](const Value *Op) {
7168 return impliesPoison(ValAssumedPoison: Op, V, Depth: Depth + 1);
7169 });
7170 }
7171 return false;
7172}
7173
7174bool llvm::impliesPoison(const Value *ValAssumedPoison, const Value *V) {
7175 return ::impliesPoison(ValAssumedPoison, V, /* Depth */ 0);
7176}
7177
7178static bool programUndefinedIfUndefOrPoison(const Value *V, bool PoisonOnly);
7179
7180static bool isGuaranteedNotToBeUndefOrPoison(
7181 const Value *V, AssumptionCache *AC, const Instruction *CtxI,
7182 const DominatorTree *DT, unsigned Depth, UndefPoisonKind Kind) {
7183 if (Depth >= MaxAnalysisRecursionDepth)
7184 return false;
7185
7186 if (isa<MetadataAsValue>(Val: V))
7187 return false;
7188
7189 if (const auto *A = dyn_cast<Argument>(Val: V)) {
7190 if (A->hasAttribute(Attribute::NoUndef) ||
7191 A->hasAttribute(Attribute::Dereferenceable) ||
7192 A->hasAttribute(Attribute::DereferenceableOrNull))
7193 return true;
7194 }
7195
7196 if (auto *C = dyn_cast<Constant>(Val: V)) {
7197 if (isa<PoisonValue>(Val: C))
7198 return !includesPoison(Kind);
7199
7200 if (isa<UndefValue>(Val: C))
7201 return !includesUndef(Kind);
7202
7203 if (isa<ConstantInt>(Val: C) || isa<GlobalVariable>(Val: C) || isa<ConstantFP>(Val: V) ||
7204 isa<ConstantPointerNull>(Val: C) || isa<Function>(Val: C))
7205 return true;
7206
7207 if (C->getType()->isVectorTy() && !isa<ConstantExpr>(Val: C))
7208 return (!includesUndef(Kind) ? !C->containsPoisonElement()
7209 : !C->containsUndefOrPoisonElement()) &&
7210 !C->containsConstantExpression();
7211 }
7212
7213 // Strip cast operations from a pointer value.
7214 // Note that stripPointerCastsSameRepresentation can strip off getelementptr
7215 // inbounds with zero offset. To guarantee that the result isn't poison, the
7216 // stripped pointer is checked as it has to be pointing into an allocated
7217 // object or be null `null` to ensure `inbounds` getelement pointers with a
7218 // zero offset could not produce poison.
7219 // It can strip off addrspacecast that do not change bit representation as
7220 // well. We believe that such addrspacecast is equivalent to no-op.
7221 auto *StrippedV = V->stripPointerCastsSameRepresentation();
7222 if (isa<AllocaInst>(Val: StrippedV) || isa<GlobalVariable>(Val: StrippedV) ||
7223 isa<Function>(Val: StrippedV) || isa<ConstantPointerNull>(Val: StrippedV))
7224 return true;
7225
7226 auto OpCheck = [&](const Value *V) {
7227 return isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth: Depth + 1, Kind);
7228 };
7229
7230 if (auto *Opr = dyn_cast<Operator>(Val: V)) {
7231 // If the value is a freeze instruction, then it can never
7232 // be undef or poison.
7233 if (isa<FreezeInst>(Val: V))
7234 return true;
7235
7236 if (const auto *CB = dyn_cast<CallBase>(Val: V)) {
7237 if (CB->hasRetAttr(Attribute::NoUndef) ||
7238 CB->hasRetAttr(Attribute::Dereferenceable) ||
7239 CB->hasRetAttr(Attribute::DereferenceableOrNull))
7240 return true;
7241 }
7242
7243 if (const auto *PN = dyn_cast<PHINode>(Val: V)) {
7244 unsigned Num = PN->getNumIncomingValues();
7245 bool IsWellDefined = true;
7246 for (unsigned i = 0; i < Num; ++i) {
7247 auto *TI = PN->getIncomingBlock(i)->getTerminator();
7248 if (!isGuaranteedNotToBeUndefOrPoison(V: PN->getIncomingValue(i), AC, CtxI: TI,
7249 DT, Depth: Depth + 1, Kind)) {
7250 IsWellDefined = false;
7251 break;
7252 }
7253 }
7254 if (IsWellDefined)
7255 return true;
7256 } else if (!::canCreateUndefOrPoison(Op: Opr, Kind,
7257 /*ConsiderFlagsAndMetadata*/ true) &&
7258 all_of(Range: Opr->operands(), P: OpCheck))
7259 return true;
7260 }
7261
7262 if (auto *I = dyn_cast<LoadInst>(Val: V))
7263 if (I->hasMetadata(KindID: LLVMContext::MD_noundef) ||
7264 I->hasMetadata(KindID: LLVMContext::MD_dereferenceable) ||
7265 I->hasMetadata(KindID: LLVMContext::MD_dereferenceable_or_null))
7266 return true;
7267
7268 if (programUndefinedIfUndefOrPoison(V, PoisonOnly: !includesUndef(Kind)))
7269 return true;
7270
7271 // CxtI may be null or a cloned instruction.
7272 if (!CtxI || !CtxI->getParent() || !DT)
7273 return false;
7274
7275 auto *DNode = DT->getNode(BB: CtxI->getParent());
7276 if (!DNode)
7277 // Unreachable block
7278 return false;
7279
7280 // If V is used as a branch condition before reaching CtxI, V cannot be
7281 // undef or poison.
7282 // br V, BB1, BB2
7283 // BB1:
7284 // CtxI ; V cannot be undef or poison here
7285 auto *Dominator = DNode->getIDom();
7286 while (Dominator) {
7287 auto *TI = Dominator->getBlock()->getTerminator();
7288
7289 Value *Cond = nullptr;
7290 if (auto BI = dyn_cast_or_null<BranchInst>(Val: TI)) {
7291 if (BI->isConditional())
7292 Cond = BI->getCondition();
7293 } else if (auto SI = dyn_cast_or_null<SwitchInst>(Val: TI)) {
7294 Cond = SI->getCondition();
7295 }
7296
7297 if (Cond) {
7298 if (Cond == V)
7299 return true;
7300 else if (!includesUndef(Kind) && isa<Operator>(Val: Cond)) {
7301 // For poison, we can analyze further
7302 auto *Opr = cast<Operator>(Val: Cond);
7303 if (any_of(Range: Opr->operands(),
7304 P: [V](const Use &U) { return V == U && propagatesPoison(PoisonOp: U); }))
7305 return true;
7306 }
7307 }
7308
7309 Dominator = Dominator->getIDom();
7310 }
7311
7312 if (getKnowledgeValidInContext(V, {Attribute::NoUndef}, CtxI, DT, AC))
7313 return true;
7314
7315 return false;
7316}
7317
7318bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC,
7319 const Instruction *CtxI,
7320 const DominatorTree *DT,
7321 unsigned Depth) {
7322 return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth,
7323 Kind: UndefPoisonKind::UndefOrPoison);
7324}
7325
7326bool llvm::isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC,
7327 const Instruction *CtxI,
7328 const DominatorTree *DT, unsigned Depth) {
7329 return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth,
7330 Kind: UndefPoisonKind::PoisonOnly);
7331}
7332
7333bool llvm::isGuaranteedNotToBeUndef(const Value *V, AssumptionCache *AC,
7334 const Instruction *CtxI,
7335 const DominatorTree *DT, unsigned Depth) {
7336 return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth,
7337 Kind: UndefPoisonKind::UndefOnly);
7338}
7339
7340/// Return true if undefined behavior would provably be executed on the path to
7341/// OnPathTo if Root produced a posion result. Note that this doesn't say
7342/// anything about whether OnPathTo is actually executed or whether Root is
7343/// actually poison. This can be used to assess whether a new use of Root can
7344/// be added at a location which is control equivalent with OnPathTo (such as
7345/// immediately before it) without introducing UB which didn't previously
7346/// exist. Note that a false result conveys no information.
7347bool llvm::mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
7348 Instruction *OnPathTo,
7349 DominatorTree *DT) {
7350 // Basic approach is to assume Root is poison, propagate poison forward
7351 // through all users we can easily track, and then check whether any of those
7352 // users are provable UB and must execute before out exiting block might
7353 // exit.
7354
7355 // The set of all recursive users we've visited (which are assumed to all be
7356 // poison because of said visit)
7357 SmallSet<const Value *, 16> KnownPoison;
7358 SmallVector<const Instruction*, 16> Worklist;
7359 Worklist.push_back(Elt: Root);
7360 while (!Worklist.empty()) {
7361 const Instruction *I = Worklist.pop_back_val();
7362
7363 // If we know this must trigger UB on a path leading our target.
7364 if (mustTriggerUB(I, KnownPoison) && DT->dominates(Def: I, User: OnPathTo))
7365 return true;
7366
7367 // If we can't analyze propagation through this instruction, just skip it
7368 // and transitive users. Safe as false is a conservative result.
7369 if (I != Root && !any_of(Range: I->operands(), P: [&KnownPoison](const Use &U) {
7370 return KnownPoison.contains(Ptr: U) && propagatesPoison(PoisonOp: U);
7371 }))
7372 continue;
7373
7374 if (KnownPoison.insert(Ptr: I).second)
7375 for (const User *User : I->users())
7376 Worklist.push_back(Elt: cast<Instruction>(Val: User));
7377 }
7378
7379 // Might be non-UB, or might have a path we couldn't prove must execute on
7380 // way to exiting bb.
7381 return false;
7382}
7383
7384OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add,
7385 const SimplifyQuery &SQ) {
7386 return ::computeOverflowForSignedAdd(LHS: Add->getOperand(i_nocapture: 0), RHS: Add->getOperand(i_nocapture: 1),
7387 Add, SQ);
7388}
7389
7390OverflowResult
7391llvm::computeOverflowForSignedAdd(const WithCache<const Value *> &LHS,
7392 const WithCache<const Value *> &RHS,
7393 const SimplifyQuery &SQ) {
7394 return ::computeOverflowForSignedAdd(LHS, RHS, Add: nullptr, SQ);
7395}
7396
7397bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
7398 // Note: An atomic operation isn't guaranteed to return in a reasonable amount
7399 // of time because it's possible for another thread to interfere with it for an
7400 // arbitrary length of time, but programs aren't allowed to rely on that.
7401
7402 // If there is no successor, then execution can't transfer to it.
7403 if (isa<ReturnInst>(Val: I))
7404 return false;
7405 if (isa<UnreachableInst>(Val: I))
7406 return false;
7407
7408 // Note: Do not add new checks here; instead, change Instruction::mayThrow or
7409 // Instruction::willReturn.
7410 //
7411 // FIXME: Move this check into Instruction::willReturn.
7412 if (isa<CatchPadInst>(Val: I)) {
7413 switch (classifyEHPersonality(Pers: I->getFunction()->getPersonalityFn())) {
7414 default:
7415 // A catchpad may invoke exception object constructors and such, which
7416 // in some languages can be arbitrary code, so be conservative by default.
7417 return false;
7418 case EHPersonality::CoreCLR:
7419 // For CoreCLR, it just involves a type test.
7420 return true;
7421 }
7422 }
7423
7424 // An instruction that returns without throwing must transfer control flow
7425 // to a successor.
7426 return !I->mayThrow() && I->willReturn();
7427}
7428
7429bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) {
7430 // TODO: This is slightly conservative for invoke instruction since exiting
7431 // via an exception *is* normal control for them.
7432 for (const Instruction &I : *BB)
7433 if (!isGuaranteedToTransferExecutionToSuccessor(I: &I))
7434 return false;
7435 return true;
7436}
7437
7438bool llvm::isGuaranteedToTransferExecutionToSuccessor(
7439 BasicBlock::const_iterator Begin, BasicBlock::const_iterator End,
7440 unsigned ScanLimit) {
7441 return isGuaranteedToTransferExecutionToSuccessor(Range: make_range(x: Begin, y: End),
7442 ScanLimit);
7443}
7444
7445bool llvm::isGuaranteedToTransferExecutionToSuccessor(
7446 iterator_range<BasicBlock::const_iterator> Range, unsigned ScanLimit) {
7447 assert(ScanLimit && "scan limit must be non-zero");
7448 for (const Instruction &I : Range) {
7449 if (isa<DbgInfoIntrinsic>(Val: I))
7450 continue;
7451 if (--ScanLimit == 0)
7452 return false;
7453 if (!isGuaranteedToTransferExecutionToSuccessor(I: &I))
7454 return false;
7455 }
7456 return true;
7457}
7458
7459bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I,
7460 const Loop *L) {
7461 // The loop header is guaranteed to be executed for every iteration.
7462 //
7463 // FIXME: Relax this constraint to cover all basic blocks that are
7464 // guaranteed to be executed at every iteration.
7465 if (I->getParent() != L->getHeader()) return false;
7466
7467 for (const Instruction &LI : *L->getHeader()) {
7468 if (&LI == I) return true;
7469 if (!isGuaranteedToTransferExecutionToSuccessor(I: &LI)) return false;
7470 }
7471 llvm_unreachable("Instruction not contained in its own parent basic block.");
7472}
7473
7474bool llvm::propagatesPoison(const Use &PoisonOp) {
7475 const Operator *I = cast<Operator>(Val: PoisonOp.getUser());
7476 switch (I->getOpcode()) {
7477 case Instruction::Freeze:
7478 case Instruction::PHI:
7479 case Instruction::Invoke:
7480 return false;
7481 case Instruction::Select:
7482 return PoisonOp.getOperandNo() == 0;
7483 case Instruction::Call:
7484 if (auto *II = dyn_cast<IntrinsicInst>(Val: I)) {
7485 switch (II->getIntrinsicID()) {
7486 // TODO: Add more intrinsics.
7487 case Intrinsic::sadd_with_overflow:
7488 case Intrinsic::ssub_with_overflow:
7489 case Intrinsic::smul_with_overflow:
7490 case Intrinsic::uadd_with_overflow:
7491 case Intrinsic::usub_with_overflow:
7492 case Intrinsic::umul_with_overflow:
7493 // If an input is a vector containing a poison element, the
7494 // two output vectors (calculated results, overflow bits)'
7495 // corresponding lanes are poison.
7496 return true;
7497 case Intrinsic::ctpop:
7498 case Intrinsic::ctlz:
7499 case Intrinsic::cttz:
7500 case Intrinsic::abs:
7501 case Intrinsic::smax:
7502 case Intrinsic::smin:
7503 case Intrinsic::umax:
7504 case Intrinsic::umin:
7505 case Intrinsic::bitreverse:
7506 case Intrinsic::bswap:
7507 case Intrinsic::sadd_sat:
7508 case Intrinsic::ssub_sat:
7509 case Intrinsic::sshl_sat:
7510 case Intrinsic::uadd_sat:
7511 case Intrinsic::usub_sat:
7512 case Intrinsic::ushl_sat:
7513 return true;
7514 }
7515 }
7516 return false;
7517 case Instruction::ICmp:
7518 case Instruction::FCmp:
7519 case Instruction::GetElementPtr:
7520 return true;
7521 default:
7522 if (isa<BinaryOperator>(Val: I) || isa<UnaryOperator>(Val: I) || isa<CastInst>(Val: I))
7523 return true;
7524
7525 // Be conservative and return false.
7526 return false;
7527 }
7528}
7529
7530/// Enumerates all operands of \p I that are guaranteed to not be undef or
7531/// poison. If the callback \p Handle returns true, stop processing and return
7532/// true. Otherwise, return false.
7533template <typename CallableT>
7534static bool handleGuaranteedWellDefinedOps(const Instruction *I,
7535 const CallableT &Handle) {
7536 switch (I->getOpcode()) {
7537 case Instruction::Store:
7538 if (Handle(cast<StoreInst>(Val: I)->getPointerOperand()))
7539 return true;
7540 break;
7541
7542 case Instruction::Load:
7543 if (Handle(cast<LoadInst>(Val: I)->getPointerOperand()))
7544 return true;
7545 break;
7546
7547 // Since dereferenceable attribute imply noundef, atomic operations
7548 // also implicitly have noundef pointers too
7549 case Instruction::AtomicCmpXchg:
7550 if (Handle(cast<AtomicCmpXchgInst>(Val: I)->getPointerOperand()))
7551 return true;
7552 break;
7553
7554 case Instruction::AtomicRMW:
7555 if (Handle(cast<AtomicRMWInst>(Val: I)->getPointerOperand()))
7556 return true;
7557 break;
7558
7559 case Instruction::Call:
7560 case Instruction::Invoke: {
7561 const CallBase *CB = cast<CallBase>(Val: I);
7562 if (CB->isIndirectCall() && Handle(CB->getCalledOperand()))
7563 return true;
7564 for (unsigned i = 0; i < CB->arg_size(); ++i)
7565 if ((CB->paramHasAttr(i, Attribute::NoUndef) ||
7566 CB->paramHasAttr(i, Attribute::Dereferenceable) ||
7567 CB->paramHasAttr(i, Attribute::DereferenceableOrNull)) &&
7568 Handle(CB->getArgOperand(i)))
7569 return true;
7570 break;
7571 }
7572 case Instruction::Ret:
7573 if (I->getFunction()->hasRetAttribute(Attribute::NoUndef) &&
7574 Handle(I->getOperand(0)))
7575 return true;
7576 break;
7577 case Instruction::Switch:
7578 if (Handle(cast<SwitchInst>(Val: I)->getCondition()))
7579 return true;
7580 break;
7581 case Instruction::Br: {
7582 auto *BR = cast<BranchInst>(Val: I);
7583 if (BR->isConditional() && Handle(BR->getCondition()))
7584 return true;
7585 break;
7586 }
7587 default:
7588 break;
7589 }
7590
7591 return false;
7592}
7593
7594void llvm::getGuaranteedWellDefinedOps(
7595 const Instruction *I, SmallVectorImpl<const Value *> &Operands) {
7596 handleGuaranteedWellDefinedOps(I, Handle: [&](const Value *V) {
7597 Operands.push_back(Elt: V);
7598 return false;
7599 });
7600}
7601
7602/// Enumerates all operands of \p I that are guaranteed to not be poison.
7603template <typename CallableT>
7604static bool handleGuaranteedNonPoisonOps(const Instruction *I,
7605 const CallableT &Handle) {
7606 if (handleGuaranteedWellDefinedOps(I, Handle))
7607 return true;
7608 switch (I->getOpcode()) {
7609 // Divisors of these operations are allowed to be partially undef.
7610 case Instruction::UDiv:
7611 case Instruction::SDiv:
7612 case Instruction::URem:
7613 case Instruction::SRem:
7614 return Handle(I->getOperand(i: 1));
7615 default:
7616 return false;
7617 }
7618}
7619
7620void llvm::getGuaranteedNonPoisonOps(const Instruction *I,
7621 SmallVectorImpl<const Value *> &Operands) {
7622 handleGuaranteedNonPoisonOps(I, Handle: [&](const Value *V) {
7623 Operands.push_back(Elt: V);
7624 return false;
7625 });
7626}
7627
7628bool llvm::mustTriggerUB(const Instruction *I,
7629 const SmallPtrSetImpl<const Value *> &KnownPoison) {
7630 return handleGuaranteedNonPoisonOps(
7631 I, Handle: [&](const Value *V) { return KnownPoison.count(Ptr: V); });
7632}
7633
7634static bool programUndefinedIfUndefOrPoison(const Value *V,
7635 bool PoisonOnly) {
7636 // We currently only look for uses of values within the same basic
7637 // block, as that makes it easier to guarantee that the uses will be
7638 // executed given that Inst is executed.
7639 //
7640 // FIXME: Expand this to consider uses beyond the same basic block. To do
7641 // this, look out for the distinction between post-dominance and strong
7642 // post-dominance.
7643 const BasicBlock *BB = nullptr;
7644 BasicBlock::const_iterator Begin;
7645 if (const auto *Inst = dyn_cast<Instruction>(Val: V)) {
7646 BB = Inst->getParent();
7647 Begin = Inst->getIterator();
7648 Begin++;
7649 } else if (const auto *Arg = dyn_cast<Argument>(Val: V)) {
7650 if (Arg->getParent()->isDeclaration())
7651 return false;
7652 BB = &Arg->getParent()->getEntryBlock();
7653 Begin = BB->begin();
7654 } else {
7655 return false;
7656 }
7657
7658 // Limit number of instructions we look at, to avoid scanning through large
7659 // blocks. The current limit is chosen arbitrarily.
7660 unsigned ScanLimit = 32;
7661 BasicBlock::const_iterator End = BB->end();
7662
7663 if (!PoisonOnly) {
7664 // Since undef does not propagate eagerly, be conservative & just check
7665 // whether a value is directly passed to an instruction that must take
7666 // well-defined operands.
7667
7668 for (const auto &I : make_range(x: Begin, y: End)) {
7669 if (isa<DbgInfoIntrinsic>(Val: I))
7670 continue;
7671 if (--ScanLimit == 0)
7672 break;
7673
7674 if (handleGuaranteedWellDefinedOps(I: &I, Handle: [V](const Value *WellDefinedOp) {
7675 return WellDefinedOp == V;
7676 }))
7677 return true;
7678
7679 if (!isGuaranteedToTransferExecutionToSuccessor(I: &I))
7680 break;
7681 }
7682 return false;
7683 }
7684
7685 // Set of instructions that we have proved will yield poison if Inst
7686 // does.
7687 SmallSet<const Value *, 16> YieldsPoison;
7688 SmallSet<const BasicBlock *, 4> Visited;
7689
7690 YieldsPoison.insert(Ptr: V);
7691 Visited.insert(Ptr: BB);
7692
7693 while (true) {
7694 for (const auto &I : make_range(x: Begin, y: End)) {
7695 if (isa<DbgInfoIntrinsic>(Val: I))
7696 continue;
7697 if (--ScanLimit == 0)
7698 return false;
7699 if (mustTriggerUB(I: &I, KnownPoison: YieldsPoison))
7700 return true;
7701 if (!isGuaranteedToTransferExecutionToSuccessor(I: &I))
7702 return false;
7703
7704 // If an operand is poison and propagates it, mark I as yielding poison.
7705 for (const Use &Op : I.operands()) {
7706 if (YieldsPoison.count(Ptr: Op) && propagatesPoison(PoisonOp: Op)) {
7707 YieldsPoison.insert(Ptr: &I);
7708 break;
7709 }
7710 }
7711
7712 // Special handling for select, which returns poison if its operand 0 is
7713 // poison (handled in the loop above) *or* if both its true/false operands
7714 // are poison (handled here).
7715 if (I.getOpcode() == Instruction::Select &&
7716 YieldsPoison.count(Ptr: I.getOperand(i: 1)) &&
7717 YieldsPoison.count(Ptr: I.getOperand(i: 2))) {
7718 YieldsPoison.insert(Ptr: &I);
7719 }
7720 }
7721
7722 BB = BB->getSingleSuccessor();
7723 if (!BB || !Visited.insert(Ptr: BB).second)
7724 break;
7725
7726 Begin = BB->getFirstNonPHI()->getIterator();
7727 End = BB->end();
7728 }
7729 return false;
7730}
7731
7732bool llvm::programUndefinedIfUndefOrPoison(const Instruction *Inst) {
7733 return ::programUndefinedIfUndefOrPoison(V: Inst, PoisonOnly: false);
7734}
7735
7736bool llvm::programUndefinedIfPoison(const Instruction *Inst) {
7737 return ::programUndefinedIfUndefOrPoison(V: Inst, PoisonOnly: true);
7738}
7739
7740static bool isKnownNonNaN(const Value *V, FastMathFlags FMF) {
7741 if (FMF.noNaNs())
7742 return true;
7743
7744 if (auto *C = dyn_cast<ConstantFP>(Val: V))
7745 return !C->isNaN();
7746
7747 if (auto *C = dyn_cast<ConstantDataVector>(Val: V)) {
7748 if (!C->getElementType()->isFloatingPointTy())
7749 return false;
7750 for (unsigned I = 0, E = C->getNumElements(); I < E; ++I) {
7751 if (C->getElementAsAPFloat(i: I).isNaN())
7752 return false;
7753 }
7754 return true;
7755 }
7756
7757 if (isa<ConstantAggregateZero>(Val: V))
7758 return true;
7759
7760 return false;
7761}
7762
7763static bool isKnownNonZero(const Value *V) {
7764 if (auto *C = dyn_cast<ConstantFP>(Val: V))
7765 return !C->isZero();
7766
7767 if (auto *C = dyn_cast<ConstantDataVector>(Val: V)) {
7768 if (!C->getElementType()->isFloatingPointTy())
7769 return false;
7770 for (unsigned I = 0, E = C->getNumElements(); I < E; ++I) {
7771 if (C->getElementAsAPFloat(i: I).isZero())
7772 return false;
7773 }
7774 return true;
7775 }
7776
7777 return false;
7778}
7779
7780/// Match clamp pattern for float types without care about NaNs or signed zeros.
7781/// Given non-min/max outer cmp/select from the clamp pattern this
7782/// function recognizes if it can be substitued by a "canonical" min/max
7783/// pattern.
7784static SelectPatternResult matchFastFloatClamp(CmpInst::Predicate Pred,
7785 Value *CmpLHS, Value *CmpRHS,
7786 Value *TrueVal, Value *FalseVal,
7787 Value *&LHS, Value *&RHS) {
7788 // Try to match
7789 // X < C1 ? C1 : Min(X, C2) --> Max(C1, Min(X, C2))
7790 // X > C1 ? C1 : Max(X, C2) --> Min(C1, Max(X, C2))
7791 // and return description of the outer Max/Min.
7792
7793 // First, check if select has inverse order:
7794 if (CmpRHS == FalseVal) {
7795 std::swap(a&: TrueVal, b&: FalseVal);
7796 Pred = CmpInst::getInversePredicate(pred: Pred);
7797 }
7798
7799 // Assume success now. If there's no match, callers should not use these anyway.
7800 LHS = TrueVal;
7801 RHS = FalseVal;
7802
7803 const APFloat *FC1;
7804 if (CmpRHS != TrueVal || !match(V: CmpRHS, P: m_APFloat(Res&: FC1)) || !FC1->isFinite())
7805 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
7806
7807 const APFloat *FC2;
7808 switch (Pred) {
7809 case CmpInst::FCMP_OLT:
7810 case CmpInst::FCMP_OLE:
7811 case CmpInst::FCMP_ULT:
7812 case CmpInst::FCMP_ULE:
7813 if (match(V: FalseVal,
7814 P: m_CombineOr(L: m_OrdFMin(L: m_Specific(V: CmpLHS), R: m_APFloat(Res&: FC2)),
7815 R: m_UnordFMin(L: m_Specific(V: CmpLHS), R: m_APFloat(Res&: FC2)))) &&
7816 *FC1 < *FC2)
7817 return {.Flavor: SPF_FMAXNUM, .NaNBehavior: SPNB_RETURNS_ANY, .Ordered: false};
7818 break;
7819 case CmpInst::FCMP_OGT:
7820 case CmpInst::FCMP_OGE:
7821 case CmpInst::FCMP_UGT:
7822 case CmpInst::FCMP_UGE:
7823 if (match(V: FalseVal,
7824 P: m_CombineOr(L: m_OrdFMax(L: m_Specific(V: CmpLHS), R: m_APFloat(Res&: FC2)),
7825 R: m_UnordFMax(L: m_Specific(V: CmpLHS), R: m_APFloat(Res&: FC2)))) &&
7826 *FC1 > *FC2)
7827 return {.Flavor: SPF_FMINNUM, .NaNBehavior: SPNB_RETURNS_ANY, .Ordered: false};
7828 break;
7829 default:
7830 break;
7831 }
7832
7833 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
7834}
7835
7836/// Recognize variations of:
7837/// CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v)))
7838static SelectPatternResult matchClamp(CmpInst::Predicate Pred,
7839 Value *CmpLHS, Value *CmpRHS,
7840 Value *TrueVal, Value *FalseVal) {
7841 // Swap the select operands and predicate to match the patterns below.
7842 if (CmpRHS != TrueVal) {
7843 Pred = ICmpInst::getSwappedPredicate(pred: Pred);
7844 std::swap(a&: TrueVal, b&: FalseVal);
7845 }
7846 const APInt *C1;
7847 if (CmpRHS == TrueVal && match(V: CmpRHS, P: m_APInt(Res&: C1))) {
7848 const APInt *C2;
7849 // (X <s C1) ? C1 : SMIN(X, C2) ==> SMAX(SMIN(X, C2), C1)
7850 if (match(V: FalseVal, P: m_SMin(L: m_Specific(V: CmpLHS), R: m_APInt(Res&: C2))) &&
7851 C1->slt(RHS: *C2) && Pred == CmpInst::ICMP_SLT)
7852 return {.Flavor: SPF_SMAX, .NaNBehavior: SPNB_NA, .Ordered: false};
7853
7854 // (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1)
7855 if (match(V: FalseVal, P: m_SMax(L: m_Specific(V: CmpLHS), R: m_APInt(Res&: C2))) &&
7856 C1->sgt(RHS: *C2) && Pred == CmpInst::ICMP_SGT)
7857 return {.Flavor: SPF_SMIN, .NaNBehavior: SPNB_NA, .Ordered: false};
7858
7859 // (X <u C1) ? C1 : UMIN(X, C2) ==> UMAX(UMIN(X, C2), C1)
7860 if (match(V: FalseVal, P: m_UMin(L: m_Specific(V: CmpLHS), R: m_APInt(Res&: C2))) &&
7861 C1->ult(RHS: *C2) && Pred == CmpInst::ICMP_ULT)
7862 return {.Flavor: SPF_UMAX, .NaNBehavior: SPNB_NA, .Ordered: false};
7863
7864 // (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1)
7865 if (match(V: FalseVal, P: m_UMax(L: m_Specific(V: CmpLHS), R: m_APInt(Res&: C2))) &&
7866 C1->ugt(RHS: *C2) && Pred == CmpInst::ICMP_UGT)
7867 return {.Flavor: SPF_UMIN, .NaNBehavior: SPNB_NA, .Ordered: false};
7868 }
7869 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
7870}
7871
7872/// Recognize variations of:
7873/// a < c ? min(a,b) : min(b,c) ==> min(min(a,b),min(b,c))
7874static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred,
7875 Value *CmpLHS, Value *CmpRHS,
7876 Value *TVal, Value *FVal,
7877 unsigned Depth) {
7878 // TODO: Allow FP min/max with nnan/nsz.
7879 assert(CmpInst::isIntPredicate(Pred) && "Expected integer comparison");
7880
7881 Value *A = nullptr, *B = nullptr;
7882 SelectPatternResult L = matchSelectPattern(V: TVal, LHS&: A, RHS&: B, CastOp: nullptr, Depth: Depth + 1);
7883 if (!SelectPatternResult::isMinOrMax(SPF: L.Flavor))
7884 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
7885
7886 Value *C = nullptr, *D = nullptr;
7887 SelectPatternResult R = matchSelectPattern(V: FVal, LHS&: C, RHS&: D, CastOp: nullptr, Depth: Depth + 1);
7888 if (L.Flavor != R.Flavor)
7889 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
7890
7891 // We have something like: x Pred y ? min(a, b) : min(c, d).
7892 // Try to match the compare to the min/max operations of the select operands.
7893 // First, make sure we have the right compare predicate.
7894 switch (L.Flavor) {
7895 case SPF_SMIN:
7896 if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) {
7897 Pred = ICmpInst::getSwappedPredicate(pred: Pred);
7898 std::swap(a&: CmpLHS, b&: CmpRHS);
7899 }
7900 if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
7901 break;
7902 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
7903 case SPF_SMAX:
7904 if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) {
7905 Pred = ICmpInst::getSwappedPredicate(pred: Pred);
7906 std::swap(a&: CmpLHS, b&: CmpRHS);
7907 }
7908 if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE)
7909 break;
7910 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
7911 case SPF_UMIN:
7912 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
7913 Pred = ICmpInst::getSwappedPredicate(pred: Pred);
7914 std::swap(a&: CmpLHS, b&: CmpRHS);
7915 }
7916 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE)
7917 break;
7918 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
7919 case SPF_UMAX:
7920 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
7921 Pred = ICmpInst::getSwappedPredicate(pred: Pred);
7922 std::swap(a&: CmpLHS, b&: CmpRHS);
7923 }
7924 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
7925 break;
7926 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
7927 default:
7928 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
7929 }
7930
7931 // If there is a common operand in the already matched min/max and the other
7932 // min/max operands match the compare operands (either directly or inverted),
7933 // then this is min/max of the same flavor.
7934
7935 // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
7936 // ~c pred ~a ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
7937 if (D == B) {
7938 if ((CmpLHS == A && CmpRHS == C) || (match(V: C, P: m_Not(V: m_Specific(V: CmpLHS))) &&
7939 match(V: A, P: m_Not(V: m_Specific(V: CmpRHS)))))
7940 return {.Flavor: L.Flavor, .NaNBehavior: SPNB_NA, .Ordered: false};
7941 }
7942 // a pred d ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
7943 // ~d pred ~a ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
7944 if (C == B) {
7945 if ((CmpLHS == A && CmpRHS == D) || (match(V: D, P: m_Not(V: m_Specific(V: CmpLHS))) &&
7946 match(V: A, P: m_Not(V: m_Specific(V: CmpRHS)))))
7947 return {.Flavor: L.Flavor, .NaNBehavior: SPNB_NA, .Ordered: false};
7948 }
7949 // b pred c ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
7950 // ~c pred ~b ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
7951 if (D == A) {
7952 if ((CmpLHS == B && CmpRHS == C) || (match(V: C, P: m_Not(V: m_Specific(V: CmpLHS))) &&
7953 match(V: B, P: m_Not(V: m_Specific(V: CmpRHS)))))
7954 return {.Flavor: L.Flavor, .NaNBehavior: SPNB_NA, .Ordered: false};
7955 }
7956 // b pred d ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
7957 // ~d pred ~b ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
7958 if (C == A) {
7959 if ((CmpLHS == B && CmpRHS == D) || (match(V: D, P: m_Not(V: m_Specific(V: CmpLHS))) &&
7960 match(V: B, P: m_Not(V: m_Specific(V: CmpRHS)))))
7961 return {.Flavor: L.Flavor, .NaNBehavior: SPNB_NA, .Ordered: false};
7962 }
7963
7964 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
7965}
7966
7967/// If the input value is the result of a 'not' op, constant integer, or vector
7968/// splat of a constant integer, return the bitwise-not source value.
7969/// TODO: This could be extended to handle non-splat vector integer constants.
7970static Value *getNotValue(Value *V) {
7971 Value *NotV;
7972 if (match(V, P: m_Not(V: m_Value(V&: NotV))))
7973 return NotV;
7974
7975 const APInt *C;
7976 if (match(V, P: m_APInt(Res&: C)))
7977 return ConstantInt::get(Ty: V->getType(), V: ~(*C));
7978
7979 return nullptr;
7980}
7981
7982/// Match non-obvious integer minimum and maximum sequences.
7983static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
7984 Value *CmpLHS, Value *CmpRHS,
7985 Value *TrueVal, Value *FalseVal,
7986 Value *&LHS, Value *&RHS,
7987 unsigned Depth) {
7988 // Assume success. If there's no match, callers should not use these anyway.
7989 LHS = TrueVal;
7990 RHS = FalseVal;
7991
7992 SelectPatternResult SPR = matchClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal);
7993 if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN)
7994 return SPR;
7995
7996 SPR = matchMinMaxOfMinMax(Pred, CmpLHS, CmpRHS, TVal: TrueVal, FVal: FalseVal, Depth);
7997 if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN)
7998 return SPR;
7999
8000 // Look through 'not' ops to find disguised min/max.
8001 // (X > Y) ? ~X : ~Y ==> (~X < ~Y) ? ~X : ~Y ==> MIN(~X, ~Y)
8002 // (X < Y) ? ~X : ~Y ==> (~X > ~Y) ? ~X : ~Y ==> MAX(~X, ~Y)
8003 if (CmpLHS == getNotValue(V: TrueVal) && CmpRHS == getNotValue(V: FalseVal)) {
8004 switch (Pred) {
8005 case CmpInst::ICMP_SGT: return {.Flavor: SPF_SMIN, .NaNBehavior: SPNB_NA, .Ordered: false};
8006 case CmpInst::ICMP_SLT: return {.Flavor: SPF_SMAX, .NaNBehavior: SPNB_NA, .Ordered: false};
8007 case CmpInst::ICMP_UGT: return {.Flavor: SPF_UMIN, .NaNBehavior: SPNB_NA, .Ordered: false};
8008 case CmpInst::ICMP_ULT: return {.Flavor: SPF_UMAX, .NaNBehavior: SPNB_NA, .Ordered: false};
8009 default: break;
8010 }
8011 }
8012
8013 // (X > Y) ? ~Y : ~X ==> (~X < ~Y) ? ~Y : ~X ==> MAX(~Y, ~X)
8014 // (X < Y) ? ~Y : ~X ==> (~X > ~Y) ? ~Y : ~X ==> MIN(~Y, ~X)
8015 if (CmpLHS == getNotValue(V: FalseVal) && CmpRHS == getNotValue(V: TrueVal)) {
8016 switch (Pred) {
8017 case CmpInst::ICMP_SGT: return {.Flavor: SPF_SMAX, .NaNBehavior: SPNB_NA, .Ordered: false};
8018 case CmpInst::ICMP_SLT: return {.Flavor: SPF_SMIN, .NaNBehavior: SPNB_NA, .Ordered: false};
8019 case CmpInst::ICMP_UGT: return {.Flavor: SPF_UMAX, .NaNBehavior: SPNB_NA, .Ordered: false};
8020 case CmpInst::ICMP_ULT: return {.Flavor: SPF_UMIN, .NaNBehavior: SPNB_NA, .Ordered: false};
8021 default: break;
8022 }
8023 }
8024
8025 if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT)
8026 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8027
8028 const APInt *C1;
8029 if (!match(V: CmpRHS, P: m_APInt(Res&: C1)))
8030 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8031
8032 // An unsigned min/max can be written with a signed compare.
8033 const APInt *C2;
8034 if ((CmpLHS == TrueVal && match(V: FalseVal, P: m_APInt(Res&: C2))) ||
8035 (CmpLHS == FalseVal && match(V: TrueVal, P: m_APInt(Res&: C2)))) {
8036 // Is the sign bit set?
8037 // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX
8038 // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN
8039 if (Pred == CmpInst::ICMP_SLT && C1->isZero() && C2->isMaxSignedValue())
8040 return {.Flavor: CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, .NaNBehavior: SPNB_NA, .Ordered: false};
8041
8042 // Is the sign bit clear?
8043 // (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX
8044 // (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN
8045 if (Pred == CmpInst::ICMP_SGT && C1->isAllOnes() && C2->isMinSignedValue())
8046 return {.Flavor: CmpLHS == FalseVal ? SPF_UMAX : SPF_UMIN, .NaNBehavior: SPNB_NA, .Ordered: false};
8047 }
8048
8049 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8050}
8051
8052bool llvm::isKnownNegation(const Value *X, const Value *Y, bool NeedNSW,
8053 bool AllowPoison) {
8054 assert(X && Y && "Invalid operand");
8055
8056 auto IsNegationOf = [&](const Value *X, const Value *Y) {
8057 if (!match(V: X, P: m_Neg(V: m_Specific(V: Y))))
8058 return false;
8059
8060 auto *BO = cast<BinaryOperator>(Val: X);
8061 if (NeedNSW && !BO->hasNoSignedWrap())
8062 return false;
8063
8064 auto *Zero = cast<Constant>(Val: BO->getOperand(i_nocapture: 0));
8065 if (!AllowPoison && !Zero->isNullValue())
8066 return false;
8067
8068 return true;
8069 };
8070
8071 // X = -Y or Y = -X
8072 if (IsNegationOf(X, Y) || IsNegationOf(Y, X))
8073 return true;
8074
8075 // X = sub (A, B), Y = sub (B, A) || X = sub nsw (A, B), Y = sub nsw (B, A)
8076 Value *A, *B;
8077 return (!NeedNSW && (match(V: X, P: m_Sub(L: m_Value(V&: A), R: m_Value(V&: B))) &&
8078 match(V: Y, P: m_Sub(L: m_Specific(V: B), R: m_Specific(V: A))))) ||
8079 (NeedNSW && (match(V: X, P: m_NSWSub(L: m_Value(V&: A), R: m_Value(V&: B))) &&
8080 match(V: Y, P: m_NSWSub(L: m_Specific(V: B), R: m_Specific(V: A)))));
8081}
8082
8083static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
8084 FastMathFlags FMF,
8085 Value *CmpLHS, Value *CmpRHS,
8086 Value *TrueVal, Value *FalseVal,
8087 Value *&LHS, Value *&RHS,
8088 unsigned Depth) {
8089 bool HasMismatchedZeros = false;
8090 if (CmpInst::isFPPredicate(P: Pred)) {
8091 // IEEE-754 ignores the sign of 0.0 in comparisons. So if the select has one
8092 // 0.0 operand, set the compare's 0.0 operands to that same value for the
8093 // purpose of identifying min/max. Disregard vector constants with undefined
8094 // elements because those can not be back-propagated for analysis.
8095 Value *OutputZeroVal = nullptr;
8096 if (match(V: TrueVal, P: m_AnyZeroFP()) && !match(V: FalseVal, P: m_AnyZeroFP()) &&
8097 !cast<Constant>(Val: TrueVal)->containsUndefOrPoisonElement())
8098 OutputZeroVal = TrueVal;
8099 else if (match(V: FalseVal, P: m_AnyZeroFP()) && !match(V: TrueVal, P: m_AnyZeroFP()) &&
8100 !cast<Constant>(Val: FalseVal)->containsUndefOrPoisonElement())
8101 OutputZeroVal = FalseVal;
8102
8103 if (OutputZeroVal) {
8104 if (match(V: CmpLHS, P: m_AnyZeroFP()) && CmpLHS != OutputZeroVal) {
8105 HasMismatchedZeros = true;
8106 CmpLHS = OutputZeroVal;
8107 }
8108 if (match(V: CmpRHS, P: m_AnyZeroFP()) && CmpRHS != OutputZeroVal) {
8109 HasMismatchedZeros = true;
8110 CmpRHS = OutputZeroVal;
8111 }
8112 }
8113 }
8114
8115 LHS = CmpLHS;
8116 RHS = CmpRHS;
8117
8118 // Signed zero may return inconsistent results between implementations.
8119 // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0
8120 // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1)
8121 // Therefore, we behave conservatively and only proceed if at least one of the
8122 // operands is known to not be zero or if we don't care about signed zero.
8123 switch (Pred) {
8124 default: break;
8125 case CmpInst::FCMP_OGT: case CmpInst::FCMP_OLT:
8126 case CmpInst::FCMP_UGT: case CmpInst::FCMP_ULT:
8127 if (!HasMismatchedZeros)
8128 break;
8129 [[fallthrough]];
8130 case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE:
8131 case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE:
8132 if (!FMF.noSignedZeros() && !isKnownNonZero(V: CmpLHS) &&
8133 !isKnownNonZero(V: CmpRHS))
8134 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8135 }
8136
8137 SelectPatternNaNBehavior NaNBehavior = SPNB_NA;
8138 bool Ordered = false;
8139
8140 // When given one NaN and one non-NaN input:
8141 // - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input.
8142 // - A simple C99 (a < b ? a : b) construction will return 'b' (as the
8143 // ordered comparison fails), which could be NaN or non-NaN.
8144 // so here we discover exactly what NaN behavior is required/accepted.
8145 if (CmpInst::isFPPredicate(P: Pred)) {
8146 bool LHSSafe = isKnownNonNaN(V: CmpLHS, FMF);
8147 bool RHSSafe = isKnownNonNaN(V: CmpRHS, FMF);
8148
8149 if (LHSSafe && RHSSafe) {
8150 // Both operands are known non-NaN.
8151 NaNBehavior = SPNB_RETURNS_ANY;
8152 } else if (CmpInst::isOrdered(predicate: Pred)) {
8153 // An ordered comparison will return false when given a NaN, so it
8154 // returns the RHS.
8155 Ordered = true;
8156 if (LHSSafe)
8157 // LHS is non-NaN, so if RHS is NaN then NaN will be returned.
8158 NaNBehavior = SPNB_RETURNS_NAN;
8159 else if (RHSSafe)
8160 NaNBehavior = SPNB_RETURNS_OTHER;
8161 else
8162 // Completely unsafe.
8163 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8164 } else {
8165 Ordered = false;
8166 // An unordered comparison will return true when given a NaN, so it
8167 // returns the LHS.
8168 if (LHSSafe)
8169 // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned.
8170 NaNBehavior = SPNB_RETURNS_OTHER;
8171 else if (RHSSafe)
8172 NaNBehavior = SPNB_RETURNS_NAN;
8173 else
8174 // Completely unsafe.
8175 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8176 }
8177 }
8178
8179 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
8180 std::swap(a&: CmpLHS, b&: CmpRHS);
8181 Pred = CmpInst::getSwappedPredicate(pred: Pred);
8182 if (NaNBehavior == SPNB_RETURNS_NAN)
8183 NaNBehavior = SPNB_RETURNS_OTHER;
8184 else if (NaNBehavior == SPNB_RETURNS_OTHER)
8185 NaNBehavior = SPNB_RETURNS_NAN;
8186 Ordered = !Ordered;
8187 }
8188
8189 // ([if]cmp X, Y) ? X : Y
8190 if (TrueVal == CmpLHS && FalseVal == CmpRHS) {
8191 switch (Pred) {
8192 default: return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false}; // Equality.
8193 case ICmpInst::ICMP_UGT:
8194 case ICmpInst::ICMP_UGE: return {.Flavor: SPF_UMAX, .NaNBehavior: SPNB_NA, .Ordered: false};
8195 case ICmpInst::ICMP_SGT:
8196 case ICmpInst::ICMP_SGE: return {.Flavor: SPF_SMAX, .NaNBehavior: SPNB_NA, .Ordered: false};
8197 case ICmpInst::ICMP_ULT:
8198 case ICmpInst::ICMP_ULE: return {.Flavor: SPF_UMIN, .NaNBehavior: SPNB_NA, .Ordered: false};
8199 case ICmpInst::ICMP_SLT:
8200 case ICmpInst::ICMP_SLE: return {.Flavor: SPF_SMIN, .NaNBehavior: SPNB_NA, .Ordered: false};
8201 case FCmpInst::FCMP_UGT:
8202 case FCmpInst::FCMP_UGE:
8203 case FCmpInst::FCMP_OGT:
8204 case FCmpInst::FCMP_OGE: return {.Flavor: SPF_FMAXNUM, .NaNBehavior: NaNBehavior, .Ordered: Ordered};
8205 case FCmpInst::FCMP_ULT:
8206 case FCmpInst::FCMP_ULE:
8207 case FCmpInst::FCMP_OLT:
8208 case FCmpInst::FCMP_OLE: return {.Flavor: SPF_FMINNUM, .NaNBehavior: NaNBehavior, .Ordered: Ordered};
8209 }
8210 }
8211
8212 if (isKnownNegation(X: TrueVal, Y: FalseVal)) {
8213 // Sign-extending LHS does not change its sign, so TrueVal/FalseVal can
8214 // match against either LHS or sext(LHS).
8215 auto MaybeSExtCmpLHS =
8216 m_CombineOr(L: m_Specific(V: CmpLHS), R: m_SExt(Op: m_Specific(V: CmpLHS)));
8217 auto ZeroOrAllOnes = m_CombineOr(L: m_ZeroInt(), R: m_AllOnes());
8218 auto ZeroOrOne = m_CombineOr(L: m_ZeroInt(), R: m_One());
8219 if (match(V: TrueVal, P: MaybeSExtCmpLHS)) {
8220 // Set the return values. If the compare uses the negated value (-X >s 0),
8221 // swap the return values because the negated value is always 'RHS'.
8222 LHS = TrueVal;
8223 RHS = FalseVal;
8224 if (match(V: CmpLHS, P: m_Neg(V: m_Specific(V: FalseVal))))
8225 std::swap(a&: LHS, b&: RHS);
8226
8227 // (X >s 0) ? X : -X or (X >s -1) ? X : -X --> ABS(X)
8228 // (-X >s 0) ? -X : X or (-X >s -1) ? -X : X --> ABS(X)
8229 if (Pred == ICmpInst::ICMP_SGT && match(V: CmpRHS, P: ZeroOrAllOnes))
8230 return {.Flavor: SPF_ABS, .NaNBehavior: SPNB_NA, .Ordered: false};
8231
8232 // (X >=s 0) ? X : -X or (X >=s 1) ? X : -X --> ABS(X)
8233 if (Pred == ICmpInst::ICMP_SGE && match(V: CmpRHS, P: ZeroOrOne))
8234 return {.Flavor: SPF_ABS, .NaNBehavior: SPNB_NA, .Ordered: false};
8235
8236 // (X <s 0) ? X : -X or (X <s 1) ? X : -X --> NABS(X)
8237 // (-X <s 0) ? -X : X or (-X <s 1) ? -X : X --> NABS(X)
8238 if (Pred == ICmpInst::ICMP_SLT && match(V: CmpRHS, P: ZeroOrOne))
8239 return {.Flavor: SPF_NABS, .NaNBehavior: SPNB_NA, .Ordered: false};
8240 }
8241 else if (match(V: FalseVal, P: MaybeSExtCmpLHS)) {
8242 // Set the return values. If the compare uses the negated value (-X >s 0),
8243 // swap the return values because the negated value is always 'RHS'.
8244 LHS = FalseVal;
8245 RHS = TrueVal;
8246 if (match(V: CmpLHS, P: m_Neg(V: m_Specific(V: TrueVal))))
8247 std::swap(a&: LHS, b&: RHS);
8248
8249 // (X >s 0) ? -X : X or (X >s -1) ? -X : X --> NABS(X)
8250 // (-X >s 0) ? X : -X or (-X >s -1) ? X : -X --> NABS(X)
8251 if (Pred == ICmpInst::ICMP_SGT && match(V: CmpRHS, P: ZeroOrAllOnes))
8252 return {.Flavor: SPF_NABS, .NaNBehavior: SPNB_NA, .Ordered: false};
8253
8254 // (X <s 0) ? -X : X or (X <s 1) ? -X : X --> ABS(X)
8255 // (-X <s 0) ? X : -X or (-X <s 1) ? X : -X --> ABS(X)
8256 if (Pred == ICmpInst::ICMP_SLT && match(V: CmpRHS, P: ZeroOrOne))
8257 return {.Flavor: SPF_ABS, .NaNBehavior: SPNB_NA, .Ordered: false};
8258 }
8259 }
8260
8261 if (CmpInst::isIntPredicate(P: Pred))
8262 return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS, Depth);
8263
8264 // According to (IEEE 754-2008 5.3.1), minNum(0.0, -0.0) and similar
8265 // may return either -0.0 or 0.0, so fcmp/select pair has stricter
8266 // semantics than minNum. Be conservative in such case.
8267 if (NaNBehavior != SPNB_RETURNS_ANY ||
8268 (!FMF.noSignedZeros() && !isKnownNonZero(V: CmpLHS) &&
8269 !isKnownNonZero(V: CmpRHS)))
8270 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8271
8272 return matchFastFloatClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS);
8273}
8274
8275/// Helps to match a select pattern in case of a type mismatch.
8276///
8277/// The function processes the case when type of true and false values of a
8278/// select instruction differs from type of the cmp instruction operands because
8279/// of a cast instruction. The function checks if it is legal to move the cast
8280/// operation after "select". If yes, it returns the new second value of
8281/// "select" (with the assumption that cast is moved):
8282/// 1. As operand of cast instruction when both values of "select" are same cast
8283/// instructions.
8284/// 2. As restored constant (by applying reverse cast operation) when the first
8285/// value of the "select" is a cast operation and the second value is a
8286/// constant.
8287/// NOTE: We return only the new second value because the first value could be
8288/// accessed as operand of cast instruction.
8289static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
8290 Instruction::CastOps *CastOp) {
8291 auto *Cast1 = dyn_cast<CastInst>(Val: V1);
8292 if (!Cast1)
8293 return nullptr;
8294
8295 *CastOp = Cast1->getOpcode();
8296 Type *SrcTy = Cast1->getSrcTy();
8297 if (auto *Cast2 = dyn_cast<CastInst>(Val: V2)) {
8298 // If V1 and V2 are both the same cast from the same type, look through V1.
8299 if (*CastOp == Cast2->getOpcode() && SrcTy == Cast2->getSrcTy())
8300 return Cast2->getOperand(i_nocapture: 0);
8301 return nullptr;
8302 }
8303
8304 auto *C = dyn_cast<Constant>(Val: V2);
8305 if (!C)
8306 return nullptr;
8307
8308 const DataLayout &DL = CmpI->getModule()->getDataLayout();
8309 Constant *CastedTo = nullptr;
8310 switch (*CastOp) {
8311 case Instruction::ZExt:
8312 if (CmpI->isUnsigned())
8313 CastedTo = ConstantExpr::getTrunc(C, Ty: SrcTy);
8314 break;
8315 case Instruction::SExt:
8316 if (CmpI->isSigned())
8317 CastedTo = ConstantExpr::getTrunc(C, Ty: SrcTy, OnlyIfReduced: true);
8318 break;
8319 case Instruction::Trunc:
8320 Constant *CmpConst;
8321 if (match(V: CmpI->getOperand(i_nocapture: 1), P: m_Constant(C&: CmpConst)) &&
8322 CmpConst->getType() == SrcTy) {
8323 // Here we have the following case:
8324 //
8325 // %cond = cmp iN %x, CmpConst
8326 // %tr = trunc iN %x to iK
8327 // %narrowsel = select i1 %cond, iK %t, iK C
8328 //
8329 // We can always move trunc after select operation:
8330 //
8331 // %cond = cmp iN %x, CmpConst
8332 // %widesel = select i1 %cond, iN %x, iN CmpConst
8333 // %tr = trunc iN %widesel to iK
8334 //
8335 // Note that C could be extended in any way because we don't care about
8336 // upper bits after truncation. It can't be abs pattern, because it would
8337 // look like:
8338 //
8339 // select i1 %cond, x, -x.
8340 //
8341 // So only min/max pattern could be matched. Such match requires widened C
8342 // == CmpConst. That is why set widened C = CmpConst, condition trunc
8343 // CmpConst == C is checked below.
8344 CastedTo = CmpConst;
8345 } else {
8346 unsigned ExtOp = CmpI->isSigned() ? Instruction::SExt : Instruction::ZExt;
8347 CastedTo = ConstantFoldCastOperand(Opcode: ExtOp, C, DestTy: SrcTy, DL);
8348 }
8349 break;
8350 case Instruction::FPTrunc:
8351 CastedTo = ConstantFoldCastOperand(Opcode: Instruction::FPExt, C, DestTy: SrcTy, DL);
8352 break;
8353 case Instruction::FPExt:
8354 CastedTo = ConstantFoldCastOperand(Opcode: Instruction::FPTrunc, C, DestTy: SrcTy, DL);
8355 break;
8356 case Instruction::FPToUI:
8357 CastedTo = ConstantFoldCastOperand(Opcode: Instruction::UIToFP, C, DestTy: SrcTy, DL);
8358 break;
8359 case Instruction::FPToSI:
8360 CastedTo = ConstantFoldCastOperand(Opcode: Instruction::SIToFP, C, DestTy: SrcTy, DL);
8361 break;
8362 case Instruction::UIToFP:
8363 CastedTo = ConstantFoldCastOperand(Opcode: Instruction::FPToUI, C, DestTy: SrcTy, DL);
8364 break;
8365 case Instruction::SIToFP:
8366 CastedTo = ConstantFoldCastOperand(Opcode: Instruction::FPToSI, C, DestTy: SrcTy, DL);
8367 break;
8368 default:
8369 break;
8370 }
8371
8372 if (!CastedTo)
8373 return nullptr;
8374
8375 // Make sure the cast doesn't lose any information.
8376 Constant *CastedBack =
8377 ConstantFoldCastOperand(Opcode: *CastOp, C: CastedTo, DestTy: C->getType(), DL);
8378 if (CastedBack && CastedBack != C)
8379 return nullptr;
8380
8381 return CastedTo;
8382}
8383
8384SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
8385 Instruction::CastOps *CastOp,
8386 unsigned Depth) {
8387 if (Depth >= MaxAnalysisRecursionDepth)
8388 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8389
8390 SelectInst *SI = dyn_cast<SelectInst>(Val: V);
8391 if (!SI) return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8392
8393 CmpInst *CmpI = dyn_cast<CmpInst>(Val: SI->getCondition());
8394 if (!CmpI) return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8395
8396 Value *TrueVal = SI->getTrueValue();
8397 Value *FalseVal = SI->getFalseValue();
8398
8399 return llvm::matchDecomposedSelectPattern(CmpI, TrueVal, FalseVal, LHS, RHS,
8400 CastOp, Depth);
8401}
8402
8403SelectPatternResult llvm::matchDecomposedSelectPattern(
8404 CmpInst *CmpI, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS,
8405 Instruction::CastOps *CastOp, unsigned Depth) {
8406 CmpInst::Predicate Pred = CmpI->getPredicate();
8407 Value *CmpLHS = CmpI->getOperand(i_nocapture: 0);
8408 Value *CmpRHS = CmpI->getOperand(i_nocapture: 1);
8409 FastMathFlags FMF;
8410 if (isa<FPMathOperator>(Val: CmpI))
8411 FMF = CmpI->getFastMathFlags();
8412
8413 // Bail out early.
8414 if (CmpI->isEquality())
8415 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8416
8417 // Deal with type mismatches.
8418 if (CastOp && CmpLHS->getType() != TrueVal->getType()) {
8419 if (Value *C = lookThroughCast(CmpI, V1: TrueVal, V2: FalseVal, CastOp)) {
8420 // If this is a potential fmin/fmax with a cast to integer, then ignore
8421 // -0.0 because there is no corresponding integer value.
8422 if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI)
8423 FMF.setNoSignedZeros();
8424 return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
8425 TrueVal: cast<CastInst>(Val: TrueVal)->getOperand(i_nocapture: 0), FalseVal: C,
8426 LHS, RHS, Depth);
8427 }
8428 if (Value *C = lookThroughCast(CmpI, V1: FalseVal, V2: TrueVal, CastOp)) {
8429 // If this is a potential fmin/fmax with a cast to integer, then ignore
8430 // -0.0 because there is no corresponding integer value.
8431 if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI)
8432 FMF.setNoSignedZeros();
8433 return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
8434 TrueVal: C, FalseVal: cast<CastInst>(Val: FalseVal)->getOperand(i_nocapture: 0),
8435 LHS, RHS, Depth);
8436 }
8437 }
8438 return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal,
8439 LHS, RHS, Depth);
8440}
8441
8442CmpInst::Predicate llvm::getMinMaxPred(SelectPatternFlavor SPF, bool Ordered) {
8443 if (SPF == SPF_SMIN) return ICmpInst::ICMP_SLT;
8444 if (SPF == SPF_UMIN) return ICmpInst::ICMP_ULT;
8445 if (SPF == SPF_SMAX) return ICmpInst::ICMP_SGT;
8446 if (SPF == SPF_UMAX) return ICmpInst::ICMP_UGT;
8447 if (SPF == SPF_FMINNUM)
8448 return Ordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT;
8449 if (SPF == SPF_FMAXNUM)
8450 return Ordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT;
8451 llvm_unreachable("unhandled!");
8452}
8453
8454SelectPatternFlavor llvm::getInverseMinMaxFlavor(SelectPatternFlavor SPF) {
8455 if (SPF == SPF_SMIN) return SPF_SMAX;
8456 if (SPF == SPF_UMIN) return SPF_UMAX;
8457 if (SPF == SPF_SMAX) return SPF_SMIN;
8458 if (SPF == SPF_UMAX) return SPF_UMIN;
8459 llvm_unreachable("unhandled!");
8460}
8461
8462Intrinsic::ID llvm::getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID) {
8463 switch (MinMaxID) {
8464 case Intrinsic::smax: return Intrinsic::smin;
8465 case Intrinsic::smin: return Intrinsic::smax;
8466 case Intrinsic::umax: return Intrinsic::umin;
8467 case Intrinsic::umin: return Intrinsic::umax;
8468 // Please note that next four intrinsics may produce the same result for
8469 // original and inverted case even if X != Y due to NaN is handled specially.
8470 case Intrinsic::maximum: return Intrinsic::minimum;
8471 case Intrinsic::minimum: return Intrinsic::maximum;
8472 case Intrinsic::maxnum: return Intrinsic::minnum;
8473 case Intrinsic::minnum: return Intrinsic::maxnum;
8474 default: llvm_unreachable("Unexpected intrinsic");
8475 }
8476}
8477
8478APInt llvm::getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth) {
8479 switch (SPF) {
8480 case SPF_SMAX: return APInt::getSignedMaxValue(numBits: BitWidth);
8481 case SPF_SMIN: return APInt::getSignedMinValue(numBits: BitWidth);
8482 case SPF_UMAX: return APInt::getMaxValue(numBits: BitWidth);
8483 case SPF_UMIN: return APInt::getMinValue(numBits: BitWidth);
8484 default: llvm_unreachable("Unexpected flavor");
8485 }
8486}
8487
8488std::pair<Intrinsic::ID, bool>
8489llvm::canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL) {
8490 // Check if VL contains select instructions that can be folded into a min/max
8491 // vector intrinsic and return the intrinsic if it is possible.
8492 // TODO: Support floating point min/max.
8493 bool AllCmpSingleUse = true;
8494 SelectPatternResult SelectPattern;
8495 SelectPattern.Flavor = SPF_UNKNOWN;
8496 if (all_of(Range&: VL, P: [&SelectPattern, &AllCmpSingleUse](Value *I) {
8497 Value *LHS, *RHS;
8498 auto CurrentPattern = matchSelectPattern(V: I, LHS, RHS);
8499 if (!SelectPatternResult::isMinOrMax(SPF: CurrentPattern.Flavor) ||
8500 CurrentPattern.Flavor == SPF_FMINNUM ||
8501 CurrentPattern.Flavor == SPF_FMAXNUM ||
8502 !I->getType()->isIntOrIntVectorTy())
8503 return false;
8504 if (SelectPattern.Flavor != SPF_UNKNOWN &&
8505 SelectPattern.Flavor != CurrentPattern.Flavor)
8506 return false;
8507 SelectPattern = CurrentPattern;
8508 AllCmpSingleUse &=
8509 match(V: I, P: m_Select(C: m_OneUse(SubPattern: m_Value()), L: m_Value(), R: m_Value()));
8510 return true;
8511 })) {
8512 switch (SelectPattern.Flavor) {
8513 case SPF_SMIN:
8514 return {Intrinsic::smin, AllCmpSingleUse};
8515 case SPF_UMIN:
8516 return {Intrinsic::umin, AllCmpSingleUse};
8517 case SPF_SMAX:
8518 return {Intrinsic::smax, AllCmpSingleUse};
8519 case SPF_UMAX:
8520 return {Intrinsic::umax, AllCmpSingleUse};
8521 default:
8522 llvm_unreachable("unexpected select pattern flavor");
8523 }
8524 }
8525 return {Intrinsic::not_intrinsic, false};
8526}
8527
8528bool llvm::matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO,
8529 Value *&Start, Value *&Step) {
8530 // Handle the case of a simple two-predecessor recurrence PHI.
8531 // There's a lot more that could theoretically be done here, but
8532 // this is sufficient to catch some interesting cases.
8533 if (P->getNumIncomingValues() != 2)
8534 return false;
8535
8536 for (unsigned i = 0; i != 2; ++i) {
8537 Value *L = P->getIncomingValue(i);
8538 Value *R = P->getIncomingValue(i: !i);
8539 auto *LU = dyn_cast<BinaryOperator>(Val: L);
8540 if (!LU)
8541 continue;
8542 unsigned Opcode = LU->getOpcode();
8543
8544 switch (Opcode) {
8545 default:
8546 continue;
8547 // TODO: Expand list -- xor, div, gep, uaddo, etc..
8548 case Instruction::LShr:
8549 case Instruction::AShr:
8550 case Instruction::Shl:
8551 case Instruction::Add:
8552 case Instruction::Sub:
8553 case Instruction::And:
8554 case Instruction::Or:
8555 case Instruction::Mul:
8556 case Instruction::FMul: {
8557 Value *LL = LU->getOperand(i_nocapture: 0);
8558 Value *LR = LU->getOperand(i_nocapture: 1);
8559 // Find a recurrence.
8560 if (LL == P)
8561 L = LR;
8562 else if (LR == P)
8563 L = LL;
8564 else
8565 continue; // Check for recurrence with L and R flipped.
8566
8567 break; // Match!
8568 }
8569 };
8570
8571 // We have matched a recurrence of the form:
8572 // %iv = [R, %entry], [%iv.next, %backedge]
8573 // %iv.next = binop %iv, L
8574 // OR
8575 // %iv = [R, %entry], [%iv.next, %backedge]
8576 // %iv.next = binop L, %iv
8577 BO = LU;
8578 Start = R;
8579 Step = L;
8580 return true;
8581 }
8582 return false;
8583}
8584
8585bool llvm::matchSimpleRecurrence(const BinaryOperator *I, PHINode *&P,
8586 Value *&Start, Value *&Step) {
8587 BinaryOperator *BO = nullptr;
8588 P = dyn_cast<PHINode>(Val: I->getOperand(i_nocapture: 0));
8589 if (!P)
8590 P = dyn_cast<PHINode>(Val: I->getOperand(i_nocapture: 1));
8591 return P && matchSimpleRecurrence(P, BO, Start, Step) && BO == I;
8592}
8593
8594/// Return true if "icmp Pred LHS RHS" is always true.
8595static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS,
8596 const Value *RHS) {
8597 if (ICmpInst::isTrueWhenEqual(predicate: Pred) && LHS == RHS)
8598 return true;
8599
8600 switch (Pred) {
8601 default:
8602 return false;
8603
8604 case CmpInst::ICMP_SLE: {
8605 const APInt *C;
8606
8607 // LHS s<= LHS +_{nsw} C if C >= 0
8608 // LHS s<= LHS | C if C >= 0
8609 if (match(V: RHS, P: m_NSWAdd(L: m_Specific(V: LHS), R: m_APInt(Res&: C))) ||
8610 match(V: RHS, P: m_Or(L: m_Specific(V: LHS), R: m_APInt(Res&: C))))
8611 return !C->isNegative();
8612
8613 // LHS s<= smax(LHS, V) for any V
8614 if (match(V: RHS, P: m_c_SMax(L: m_Specific(V: LHS), R: m_Value())))
8615 return true;
8616
8617 // smin(RHS, V) s<= RHS for any V
8618 if (match(V: LHS, P: m_c_SMin(L: m_Specific(V: RHS), R: m_Value())))
8619 return true;
8620
8621 // Match A to (X +_{nsw} CA) and B to (X +_{nsw} CB)
8622 const Value *X;
8623 const APInt *CLHS, *CRHS;
8624 if (match(V: LHS, P: m_NSWAddLike(L: m_Value(V&: X), R: m_APInt(Res&: CLHS))) &&
8625 match(V: RHS, P: m_NSWAddLike(L: m_Specific(V: X), R: m_APInt(Res&: CRHS))))
8626 return CLHS->sle(RHS: *CRHS);
8627
8628 return false;
8629 }
8630
8631 case CmpInst::ICMP_ULE: {
8632 // LHS u<= LHS +_{nuw} V for any V
8633 if (match(V: RHS, P: m_c_Add(L: m_Specific(V: LHS), R: m_Value())) &&
8634 cast<OverflowingBinaryOperator>(Val: RHS)->hasNoUnsignedWrap())
8635 return true;
8636
8637 // LHS u<= LHS | V for any V
8638 if (match(V: RHS, P: m_c_Or(L: m_Specific(V: LHS), R: m_Value())))
8639 return true;
8640
8641 // LHS u<= umax(LHS, V) for any V
8642 if (match(V: RHS, P: m_c_UMax(L: m_Specific(V: LHS), R: m_Value())))
8643 return true;
8644
8645 // RHS >> V u<= RHS for any V
8646 if (match(V: LHS, P: m_LShr(L: m_Specific(V: RHS), R: m_Value())))
8647 return true;
8648
8649 // RHS u/ C_ugt_1 u<= RHS
8650 const APInt *C;
8651 if (match(V: LHS, P: m_UDiv(L: m_Specific(V: RHS), R: m_APInt(Res&: C))) && C->ugt(RHS: 1))
8652 return true;
8653
8654 // RHS & V u<= RHS for any V
8655 if (match(V: LHS, P: m_c_And(L: m_Specific(V: RHS), R: m_Value())))
8656 return true;
8657
8658 // umin(RHS, V) u<= RHS for any V
8659 if (match(V: LHS, P: m_c_UMin(L: m_Specific(V: RHS), R: m_Value())))
8660 return true;
8661
8662 // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB)
8663 const Value *X;
8664 const APInt *CLHS, *CRHS;
8665 if (match(V: LHS, P: m_NUWAddLike(L: m_Value(V&: X), R: m_APInt(Res&: CLHS))) &&
8666 match(V: RHS, P: m_NUWAddLike(L: m_Specific(V: X), R: m_APInt(Res&: CRHS))))
8667 return CLHS->ule(RHS: *CRHS);
8668
8669 return false;
8670 }
8671 }
8672}
8673
8674/// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred
8675/// ALHS ARHS" is true. Otherwise, return std::nullopt.
8676static std::optional<bool>
8677isImpliedCondOperands(CmpInst::Predicate Pred, const Value *ALHS,
8678 const Value *ARHS, const Value *BLHS, const Value *BRHS) {
8679 switch (Pred) {
8680 default:
8681 return std::nullopt;
8682
8683 case CmpInst::ICMP_SLT:
8684 case CmpInst::ICMP_SLE:
8685 if (isTruePredicate(Pred: CmpInst::ICMP_SLE, LHS: BLHS, RHS: ALHS) &&
8686 isTruePredicate(Pred: CmpInst::ICMP_SLE, LHS: ARHS, RHS: BRHS))
8687 return true;
8688 return std::nullopt;
8689
8690 case CmpInst::ICMP_SGT:
8691 case CmpInst::ICMP_SGE:
8692 if (isTruePredicate(Pred: CmpInst::ICMP_SLE, LHS: ALHS, RHS: BLHS) &&
8693 isTruePredicate(Pred: CmpInst::ICMP_SLE, LHS: BRHS, RHS: ARHS))
8694 return true;
8695 return std::nullopt;
8696
8697 case CmpInst::ICMP_ULT:
8698 case CmpInst::ICMP_ULE:
8699 if (isTruePredicate(Pred: CmpInst::ICMP_ULE, LHS: BLHS, RHS: ALHS) &&
8700 isTruePredicate(Pred: CmpInst::ICMP_ULE, LHS: ARHS, RHS: BRHS))
8701 return true;
8702 return std::nullopt;
8703
8704 case CmpInst::ICMP_UGT:
8705 case CmpInst::ICMP_UGE:
8706 if (isTruePredicate(Pred: CmpInst::ICMP_ULE, LHS: ALHS, RHS: BLHS) &&
8707 isTruePredicate(Pred: CmpInst::ICMP_ULE, LHS: BRHS, RHS: ARHS))
8708 return true;
8709 return std::nullopt;
8710 }
8711}
8712
8713/// Return true if "icmp1 LPred X, Y" implies "icmp2 RPred X, Y" is true.
8714/// Return false if "icmp1 LPred X, Y" implies "icmp2 RPred X, Y" is false.
8715/// Otherwise, return std::nullopt if we can't infer anything.
8716static std::optional<bool>
8717isImpliedCondMatchingOperands(CmpInst::Predicate LPred,
8718 CmpInst::Predicate RPred) {
8719 if (CmpInst::isImpliedTrueByMatchingCmp(Pred1: LPred, Pred2: RPred))
8720 return true;
8721 if (CmpInst::isImpliedFalseByMatchingCmp(Pred1: LPred, Pred2: RPred))
8722 return false;
8723
8724 return std::nullopt;
8725}
8726
8727/// Return true if "icmp LPred X, LC" implies "icmp RPred X, RC" is true.
8728/// Return false if "icmp LPred X, LC" implies "icmp RPred X, RC" is false.
8729/// Otherwise, return std::nullopt if we can't infer anything.
8730static std::optional<bool> isImpliedCondCommonOperandWithConstants(
8731 CmpInst::Predicate LPred, const APInt &LC, CmpInst::Predicate RPred,
8732 const APInt &RC) {
8733 ConstantRange DomCR = ConstantRange::makeExactICmpRegion(Pred: LPred, Other: LC);
8734 ConstantRange CR = ConstantRange::makeExactICmpRegion(Pred: RPred, Other: RC);
8735 ConstantRange Intersection = DomCR.intersectWith(CR);
8736 ConstantRange Difference = DomCR.difference(CR);
8737 if (Intersection.isEmptySet())
8738 return false;
8739 if (Difference.isEmptySet())
8740 return true;
8741 return std::nullopt;
8742}
8743
8744/// Return true if LHS implies RHS (expanded to its components as "R0 RPred R1")
8745/// is true. Return false if LHS implies RHS is false. Otherwise, return
8746/// std::nullopt if we can't infer anything.
8747static std::optional<bool> isImpliedCondICmps(const ICmpInst *LHS,
8748 CmpInst::Predicate RPred,
8749 const Value *R0, const Value *R1,
8750 const DataLayout &DL,
8751 bool LHSIsTrue) {
8752 Value *L0 = LHS->getOperand(i_nocapture: 0);
8753 Value *L1 = LHS->getOperand(i_nocapture: 1);
8754
8755 // The rest of the logic assumes the LHS condition is true. If that's not the
8756 // case, invert the predicate to make it so.
8757 CmpInst::Predicate LPred =
8758 LHSIsTrue ? LHS->getPredicate() : LHS->getInversePredicate();
8759
8760 // We can have non-canonical operands, so try to normalize any common operand
8761 // to L0/R0.
8762 if (L0 == R1) {
8763 std::swap(a&: R0, b&: R1);
8764 RPred = ICmpInst::getSwappedPredicate(pred: RPred);
8765 }
8766 if (R0 == L1) {
8767 std::swap(a&: L0, b&: L1);
8768 LPred = ICmpInst::getSwappedPredicate(pred: LPred);
8769 }
8770 if (L1 == R1) {
8771 // If we have L0 == R0 and L1 == R1, then make L1/R1 the constants.
8772 if (L0 != R0 || match(V: L0, P: m_ImmConstant())) {
8773 std::swap(a&: L0, b&: L1);
8774 LPred = ICmpInst::getSwappedPredicate(pred: LPred);
8775 std::swap(a&: R0, b&: R1);
8776 RPred = ICmpInst::getSwappedPredicate(pred: RPred);
8777 }
8778 }
8779
8780 // Can we infer anything when the 0-operands match and the 1-operands are
8781 // constants (not necessarily matching)?
8782 const APInt *LC, *RC;
8783 if (L0 == R0 && match(V: L1, P: m_APInt(Res&: LC)) && match(V: R1, P: m_APInt(Res&: RC)))
8784 return isImpliedCondCommonOperandWithConstants(LPred, LC: *LC, RPred, RC: *RC);
8785
8786 // Can we infer anything when the two compares have matching operands?
8787 if (L0 == R0 && L1 == R1)
8788 return isImpliedCondMatchingOperands(LPred, RPred);
8789
8790 // L0 = R0 = L1 + R1, L0 >=u L1 implies R0 >=u R1, L0 <u L1 implies R0 <u R1
8791 if (L0 == R0 &&
8792 (LPred == ICmpInst::ICMP_ULT || LPred == ICmpInst::ICMP_UGE) &&
8793 (RPred == ICmpInst::ICMP_ULT || RPred == ICmpInst::ICMP_UGE) &&
8794 match(V: L0, P: m_c_Add(L: m_Specific(V: L1), R: m_Specific(V: R1))))
8795 return LPred == RPred;
8796
8797 if (LPred == RPred)
8798 return isImpliedCondOperands(Pred: LPred, ALHS: L0, ARHS: L1, BLHS: R0, BRHS: R1);
8799
8800 return std::nullopt;
8801}
8802
8803/// Return true if LHS implies RHS is true. Return false if LHS implies RHS is
8804/// false. Otherwise, return std::nullopt if we can't infer anything. We
8805/// expect the RHS to be an icmp and the LHS to be an 'and', 'or', or a 'select'
8806/// instruction.
8807static std::optional<bool>
8808isImpliedCondAndOr(const Instruction *LHS, CmpInst::Predicate RHSPred,
8809 const Value *RHSOp0, const Value *RHSOp1,
8810 const DataLayout &DL, bool LHSIsTrue, unsigned Depth) {
8811 // The LHS must be an 'or', 'and', or a 'select' instruction.
8812 assert((LHS->getOpcode() == Instruction::And ||
8813 LHS->getOpcode() == Instruction::Or ||
8814 LHS->getOpcode() == Instruction::Select) &&
8815 "Expected LHS to be 'and', 'or', or 'select'.");
8816
8817 assert(Depth <= MaxAnalysisRecursionDepth && "Hit recursion limit");
8818
8819 // If the result of an 'or' is false, then we know both legs of the 'or' are
8820 // false. Similarly, if the result of an 'and' is true, then we know both
8821 // legs of the 'and' are true.
8822 const Value *ALHS, *ARHS;
8823 if ((!LHSIsTrue && match(V: LHS, P: m_LogicalOr(L: m_Value(V&: ALHS), R: m_Value(V&: ARHS)))) ||
8824 (LHSIsTrue && match(V: LHS, P: m_LogicalAnd(L: m_Value(V&: ALHS), R: m_Value(V&: ARHS))))) {
8825 // FIXME: Make this non-recursion.
8826 if (std::optional<bool> Implication = isImpliedCondition(
8827 LHS: ALHS, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth: Depth + 1))
8828 return Implication;
8829 if (std::optional<bool> Implication = isImpliedCondition(
8830 LHS: ARHS, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth: Depth + 1))
8831 return Implication;
8832 return std::nullopt;
8833 }
8834 return std::nullopt;
8835}
8836
8837std::optional<bool>
8838llvm::isImpliedCondition(const Value *LHS, CmpInst::Predicate RHSPred,
8839 const Value *RHSOp0, const Value *RHSOp1,
8840 const DataLayout &DL, bool LHSIsTrue, unsigned Depth) {
8841 // Bail out when we hit the limit.
8842 if (Depth == MaxAnalysisRecursionDepth)
8843 return std::nullopt;
8844
8845 // A mismatch occurs when we compare a scalar cmp to a vector cmp, for
8846 // example.
8847 if (RHSOp0->getType()->isVectorTy() != LHS->getType()->isVectorTy())
8848 return std::nullopt;
8849
8850 assert(LHS->getType()->isIntOrIntVectorTy(1) &&
8851 "Expected integer type only!");
8852
8853 // Match not
8854 if (match(V: LHS, P: m_Not(V: m_Value(V&: LHS))))
8855 LHSIsTrue = !LHSIsTrue;
8856
8857 // Both LHS and RHS are icmps.
8858 const ICmpInst *LHSCmp = dyn_cast<ICmpInst>(Val: LHS);
8859 if (LHSCmp)
8860 return isImpliedCondICmps(LHS: LHSCmp, RPred: RHSPred, R0: RHSOp0, R1: RHSOp1, DL, LHSIsTrue);
8861
8862 /// The LHS should be an 'or', 'and', or a 'select' instruction. We expect
8863 /// the RHS to be an icmp.
8864 /// FIXME: Add support for and/or/select on the RHS.
8865 if (const Instruction *LHSI = dyn_cast<Instruction>(Val: LHS)) {
8866 if ((LHSI->getOpcode() == Instruction::And ||
8867 LHSI->getOpcode() == Instruction::Or ||
8868 LHSI->getOpcode() == Instruction::Select))
8869 return isImpliedCondAndOr(LHS: LHSI, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue,
8870 Depth);
8871 }
8872 return std::nullopt;
8873}
8874
8875std::optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
8876 const DataLayout &DL,
8877 bool LHSIsTrue, unsigned Depth) {
8878 // LHS ==> RHS by definition
8879 if (LHS == RHS)
8880 return LHSIsTrue;
8881
8882 // Match not
8883 bool InvertRHS = false;
8884 if (match(V: RHS, P: m_Not(V: m_Value(V&: RHS)))) {
8885 if (LHS == RHS)
8886 return !LHSIsTrue;
8887 InvertRHS = true;
8888 }
8889
8890 if (const ICmpInst *RHSCmp = dyn_cast<ICmpInst>(Val: RHS)) {
8891 if (auto Implied = isImpliedCondition(
8892 LHS, RHSPred: RHSCmp->getPredicate(), RHSOp0: RHSCmp->getOperand(i_nocapture: 0),
8893 RHSOp1: RHSCmp->getOperand(i_nocapture: 1), DL, LHSIsTrue, Depth))
8894 return InvertRHS ? !*Implied : *Implied;
8895 return std::nullopt;
8896 }
8897
8898 if (Depth == MaxAnalysisRecursionDepth)
8899 return std::nullopt;
8900
8901 // LHS ==> (RHS1 || RHS2) if LHS ==> RHS1 or LHS ==> RHS2
8902 // LHS ==> !(RHS1 && RHS2) if LHS ==> !RHS1 or LHS ==> !RHS2
8903 const Value *RHS1, *RHS2;
8904 if (match(V: RHS, P: m_LogicalOr(L: m_Value(V&: RHS1), R: m_Value(V&: RHS2)))) {
8905 if (std::optional<bool> Imp =
8906 isImpliedCondition(LHS, RHS: RHS1, DL, LHSIsTrue, Depth: Depth + 1))
8907 if (*Imp == true)
8908 return !InvertRHS;
8909 if (std::optional<bool> Imp =
8910 isImpliedCondition(LHS, RHS: RHS2, DL, LHSIsTrue, Depth: Depth + 1))
8911 if (*Imp == true)
8912 return !InvertRHS;
8913 }
8914 if (match(V: RHS, P: m_LogicalAnd(L: m_Value(V&: RHS1), R: m_Value(V&: RHS2)))) {
8915 if (std::optional<bool> Imp =
8916 isImpliedCondition(LHS, RHS: RHS1, DL, LHSIsTrue, Depth: Depth + 1))
8917 if (*Imp == false)
8918 return InvertRHS;
8919 if (std::optional<bool> Imp =
8920 isImpliedCondition(LHS, RHS: RHS2, DL, LHSIsTrue, Depth: Depth + 1))
8921 if (*Imp == false)
8922 return InvertRHS;
8923 }
8924
8925 return std::nullopt;
8926}
8927
8928// Returns a pair (Condition, ConditionIsTrue), where Condition is a branch
8929// condition dominating ContextI or nullptr, if no condition is found.
8930static std::pair<Value *, bool>
8931getDomPredecessorCondition(const Instruction *ContextI) {
8932 if (!ContextI || !ContextI->getParent())
8933 return {nullptr, false};
8934
8935 // TODO: This is a poor/cheap way to determine dominance. Should we use a
8936 // dominator tree (eg, from a SimplifyQuery) instead?
8937 const BasicBlock *ContextBB = ContextI->getParent();
8938 const BasicBlock *PredBB = ContextBB->getSinglePredecessor();
8939 if (!PredBB)
8940 return {nullptr, false};
8941
8942 // We need a conditional branch in the predecessor.
8943 Value *PredCond;
8944 BasicBlock *TrueBB, *FalseBB;
8945 if (!match(V: PredBB->getTerminator(), P: m_Br(C: m_Value(V&: PredCond), T&: TrueBB, F&: FalseBB)))
8946 return {nullptr, false};
8947
8948 // The branch should get simplified. Don't bother simplifying this condition.
8949 if (TrueBB == FalseBB)
8950 return {nullptr, false};
8951
8952 assert((TrueBB == ContextBB || FalseBB == ContextBB) &&
8953 "Predecessor block does not point to successor?");
8954
8955 // Is this condition implied by the predecessor condition?
8956 return {PredCond, TrueBB == ContextBB};
8957}
8958
8959std::optional<bool> llvm::isImpliedByDomCondition(const Value *Cond,
8960 const Instruction *ContextI,
8961 const DataLayout &DL) {
8962 assert(Cond->getType()->isIntOrIntVectorTy(1) && "Condition must be bool");
8963 auto PredCond = getDomPredecessorCondition(ContextI);
8964 if (PredCond.first)
8965 return isImpliedCondition(LHS: PredCond.first, RHS: Cond, DL, LHSIsTrue: PredCond.second);
8966 return std::nullopt;
8967}
8968
8969std::optional<bool> llvm::isImpliedByDomCondition(CmpInst::Predicate Pred,
8970 const Value *LHS,
8971 const Value *RHS,
8972 const Instruction *ContextI,
8973 const DataLayout &DL) {
8974 auto PredCond = getDomPredecessorCondition(ContextI);
8975 if (PredCond.first)
8976 return isImpliedCondition(LHS: PredCond.first, RHSPred: Pred, RHSOp0: LHS, RHSOp1: RHS, DL,
8977 LHSIsTrue: PredCond.second);
8978 return std::nullopt;
8979}
8980
8981static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
8982 APInt &Upper, const InstrInfoQuery &IIQ,
8983 bool PreferSignedRange) {
8984 unsigned Width = Lower.getBitWidth();
8985 const APInt *C;
8986 switch (BO.getOpcode()) {
8987 case Instruction::Add:
8988 if (match(V: BO.getOperand(i_nocapture: 1), P: m_APInt(Res&: C)) && !C->isZero()) {
8989 bool HasNSW = IIQ.hasNoSignedWrap(Op: &BO);
8990 bool HasNUW = IIQ.hasNoUnsignedWrap(Op: &BO);
8991
8992 // If the caller expects a signed compare, then try to use a signed range.
8993 // Otherwise if both no-wraps are set, use the unsigned range because it
8994 // is never larger than the signed range. Example:
8995 // "add nuw nsw i8 X, -2" is unsigned [254,255] vs. signed [-128, 125].
8996 if (PreferSignedRange && HasNSW && HasNUW)
8997 HasNUW = false;
8998
8999 if (HasNUW) {
9000 // 'add nuw x, C' produces [C, UINT_MAX].
9001 Lower = *C;
9002 } else if (HasNSW) {
9003 if (C->isNegative()) {
9004 // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C].
9005 Lower = APInt::getSignedMinValue(numBits: Width);
9006 Upper = APInt::getSignedMaxValue(numBits: Width) + *C + 1;
9007 } else {
9008 // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX].
9009 Lower = APInt::getSignedMinValue(numBits: Width) + *C;
9010 Upper = APInt::getSignedMaxValue(numBits: Width) + 1;
9011 }
9012 }
9013 }
9014 break;
9015
9016 case Instruction::And:
9017 if (match(V: BO.getOperand(i_nocapture: 1), P: m_APInt(Res&: C)))
9018 // 'and x, C' produces [0, C].
9019 Upper = *C + 1;
9020 // X & -X is a power of two or zero. So we can cap the value at max power of
9021 // two.
9022 if (match(V: BO.getOperand(i_nocapture: 0), P: m_Neg(V: m_Specific(V: BO.getOperand(i_nocapture: 1)))) ||
9023 match(V: BO.getOperand(i_nocapture: 1), P: m_Neg(V: m_Specific(V: BO.getOperand(i_nocapture: 0)))))
9024 Upper = APInt::getSignedMinValue(numBits: Width) + 1;
9025 break;
9026
9027 case Instruction::Or:
9028 if (match(V: BO.getOperand(i_nocapture: 1), P: m_APInt(Res&: C)))
9029 // 'or x, C' produces [C, UINT_MAX].
9030 Lower = *C;
9031 break;
9032
9033 case Instruction::AShr:
9034 if (match(V: BO.getOperand(i_nocapture: 1), P: m_APInt(Res&: C)) && C->ult(RHS: Width)) {
9035 // 'ashr x, C' produces [INT_MIN >> C, INT_MAX >> C].
9036 Lower = APInt::getSignedMinValue(numBits: Width).ashr(ShiftAmt: *C);
9037 Upper = APInt::getSignedMaxValue(numBits: Width).ashr(ShiftAmt: *C) + 1;
9038 } else if (match(V: BO.getOperand(i_nocapture: 0), P: m_APInt(Res&: C))) {
9039 unsigned ShiftAmount = Width - 1;
9040 if (!C->isZero() && IIQ.isExact(Op: &BO))
9041 ShiftAmount = C->countr_zero();
9042 if (C->isNegative()) {
9043 // 'ashr C, x' produces [C, C >> (Width-1)]
9044 Lower = *C;
9045 Upper = C->ashr(ShiftAmt: ShiftAmount) + 1;
9046 } else {
9047 // 'ashr C, x' produces [C >> (Width-1), C]
9048 Lower = C->ashr(ShiftAmt: ShiftAmount);
9049 Upper = *C + 1;
9050 }
9051 }
9052 break;
9053
9054 case Instruction::LShr:
9055 if (match(V: BO.getOperand(i_nocapture: 1), P: m_APInt(Res&: C)) && C->ult(RHS: Width)) {
9056 // 'lshr x, C' produces [0, UINT_MAX >> C].
9057 Upper = APInt::getAllOnes(numBits: Width).lshr(ShiftAmt: *C) + 1;
9058 } else if (match(V: BO.getOperand(i_nocapture: 0), P: m_APInt(Res&: C))) {
9059 // 'lshr C, x' produces [C >> (Width-1), C].
9060 unsigned ShiftAmount = Width - 1;
9061 if (!C->isZero() && IIQ.isExact(Op: &BO))
9062 ShiftAmount = C->countr_zero();
9063 Lower = C->lshr(shiftAmt: ShiftAmount);
9064 Upper = *C + 1;
9065 }
9066 break;
9067
9068 case Instruction::Shl:
9069 if (match(V: BO.getOperand(i_nocapture: 0), P: m_APInt(Res&: C))) {
9070 if (IIQ.hasNoUnsignedWrap(Op: &BO)) {
9071 // 'shl nuw C, x' produces [C, C << CLZ(C)]
9072 Lower = *C;
9073 Upper = Lower.shl(shiftAmt: Lower.countl_zero()) + 1;
9074 } else if (BO.hasNoSignedWrap()) { // TODO: What if both nuw+nsw?
9075 if (C->isNegative()) {
9076 // 'shl nsw C, x' produces [C << CLO(C)-1, C]
9077 unsigned ShiftAmount = C->countl_one() - 1;
9078 Lower = C->shl(shiftAmt: ShiftAmount);
9079 Upper = *C + 1;
9080 } else {
9081 // 'shl nsw C, x' produces [C, C << CLZ(C)-1]
9082 unsigned ShiftAmount = C->countl_zero() - 1;
9083 Lower = *C;
9084 Upper = C->shl(shiftAmt: ShiftAmount) + 1;
9085 }
9086 } else {
9087 // If lowbit is set, value can never be zero.
9088 if ((*C)[0])
9089 Lower = APInt::getOneBitSet(numBits: Width, BitNo: 0);
9090 // If we are shifting a constant the largest it can be is if the longest
9091 // sequence of consecutive ones is shifted to the highbits (breaking
9092 // ties for which sequence is higher). At the moment we take a liberal
9093 // upper bound on this by just popcounting the constant.
9094 // TODO: There may be a bitwise trick for it longest/highest
9095 // consecutative sequence of ones (naive method is O(Width) loop).
9096 Upper = APInt::getHighBitsSet(numBits: Width, hiBitsSet: C->popcount()) + 1;
9097 }
9098 } else if (match(V: BO.getOperand(i_nocapture: 1), P: m_APInt(Res&: C)) && C->ult(RHS: Width)) {
9099 Upper = APInt::getBitsSetFrom(numBits: Width, loBit: C->getZExtValue()) + 1;
9100 }
9101 break;
9102
9103 case Instruction::SDiv:
9104 if (match(V: BO.getOperand(i_nocapture: 1), P: m_APInt(Res&: C))) {
9105 APInt IntMin = APInt::getSignedMinValue(numBits: Width);
9106 APInt IntMax = APInt::getSignedMaxValue(numBits: Width);
9107 if (C->isAllOnes()) {
9108 // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
9109 // where C != -1 and C != 0 and C != 1
9110 Lower = IntMin + 1;
9111 Upper = IntMax + 1;
9112 } else if (C->countl_zero() < Width - 1) {
9113 // 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C]
9114 // where C != -1 and C != 0 and C != 1
9115 Lower = IntMin.sdiv(RHS: *C);
9116 Upper = IntMax.sdiv(RHS: *C);
9117 if (Lower.sgt(RHS: Upper))
9118 std::swap(a&: Lower, b&: Upper);
9119 Upper = Upper + 1;
9120 assert(Upper != Lower && "Upper part of range has wrapped!");
9121 }
9122 } else if (match(V: BO.getOperand(i_nocapture: 0), P: m_APInt(Res&: C))) {
9123 if (C->isMinSignedValue()) {
9124 // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2].
9125 Lower = *C;
9126 Upper = Lower.lshr(shiftAmt: 1) + 1;
9127 } else {
9128 // 'sdiv C, x' produces [-|C|, |C|].
9129 Upper = C->abs() + 1;
9130 Lower = (-Upper) + 1;
9131 }
9132 }
9133 break;
9134
9135 case Instruction::UDiv:
9136 if (match(V: BO.getOperand(i_nocapture: 1), P: m_APInt(Res&: C)) && !C->isZero()) {
9137 // 'udiv x, C' produces [0, UINT_MAX / C].
9138 Upper = APInt::getMaxValue(numBits: Width).udiv(RHS: *C) + 1;
9139 } else if (match(V: BO.getOperand(i_nocapture: 0), P: m_APInt(Res&: C))) {
9140 // 'udiv C, x' produces [0, C].
9141 Upper = *C + 1;
9142 }
9143 break;
9144
9145 case Instruction::SRem:
9146 if (match(V: BO.getOperand(i_nocapture: 1), P: m_APInt(Res&: C))) {
9147 // 'srem x, C' produces (-|C|, |C|).
9148 Upper = C->abs();
9149 Lower = (-Upper) + 1;
9150 } else if (match(V: BO.getOperand(i_nocapture: 0), P: m_APInt(Res&: C))) {
9151 if (C->isNegative()) {
9152 // 'srem -|C|, x' produces [-|C|, 0].
9153 Upper = 1;
9154 Lower = *C;
9155 } else {
9156 // 'srem |C|, x' produces [0, |C|].
9157 Upper = *C + 1;
9158 }
9159 }
9160 break;
9161
9162 case Instruction::URem:
9163 if (match(V: BO.getOperand(i_nocapture: 1), P: m_APInt(Res&: C)))
9164 // 'urem x, C' produces [0, C).
9165 Upper = *C;
9166 else if (match(V: BO.getOperand(i_nocapture: 0), P: m_APInt(Res&: C)))
9167 // 'urem C, x' produces [0, C].
9168 Upper = *C + 1;
9169 break;
9170
9171 default:
9172 break;
9173 }
9174}
9175
9176static ConstantRange getRangeForIntrinsic(const IntrinsicInst &II) {
9177 unsigned Width = II.getType()->getScalarSizeInBits();
9178 const APInt *C;
9179 switch (II.getIntrinsicID()) {
9180 case Intrinsic::ctpop:
9181 case Intrinsic::ctlz:
9182 case Intrinsic::cttz:
9183 // Maximum of set/clear bits is the bit width.
9184 return ConstantRange::getNonEmpty(Lower: APInt::getZero(numBits: Width),
9185 Upper: APInt(Width, Width + 1));
9186 case Intrinsic::uadd_sat:
9187 // uadd.sat(x, C) produces [C, UINT_MAX].
9188 if (match(V: II.getOperand(i_nocapture: 0), P: m_APInt(Res&: C)) ||
9189 match(V: II.getOperand(i_nocapture: 1), P: m_APInt(Res&: C)))
9190 return ConstantRange::getNonEmpty(Lower: *C, Upper: APInt::getZero(numBits: Width));
9191 break;
9192 case Intrinsic::sadd_sat:
9193 if (match(V: II.getOperand(i_nocapture: 0), P: m_APInt(Res&: C)) ||
9194 match(V: II.getOperand(i_nocapture: 1), P: m_APInt(Res&: C))) {
9195 if (C->isNegative())
9196 // sadd.sat(x, -C) produces [SINT_MIN, SINT_MAX + (-C)].
9197 return ConstantRange::getNonEmpty(Lower: APInt::getSignedMinValue(numBits: Width),
9198 Upper: APInt::getSignedMaxValue(numBits: Width) + *C +
9199 1);
9200
9201 // sadd.sat(x, +C) produces [SINT_MIN + C, SINT_MAX].
9202 return ConstantRange::getNonEmpty(Lower: APInt::getSignedMinValue(numBits: Width) + *C,
9203 Upper: APInt::getSignedMaxValue(numBits: Width) + 1);
9204 }
9205 break;
9206 case Intrinsic::usub_sat:
9207 // usub.sat(C, x) produces [0, C].
9208 if (match(V: II.getOperand(i_nocapture: 0), P: m_APInt(Res&: C)))
9209 return ConstantRange::getNonEmpty(Lower: APInt::getZero(numBits: Width), Upper: *C + 1);
9210
9211 // usub.sat(x, C) produces [0, UINT_MAX - C].
9212 if (match(V: II.getOperand(i_nocapture: 1), P: m_APInt(Res&: C)))
9213 return ConstantRange::getNonEmpty(Lower: APInt::getZero(numBits: Width),
9214 Upper: APInt::getMaxValue(numBits: Width) - *C + 1);
9215 break;
9216 case Intrinsic::ssub_sat:
9217 if (match(V: II.getOperand(i_nocapture: 0), P: m_APInt(Res&: C))) {
9218 if (C->isNegative())
9219 // ssub.sat(-C, x) produces [SINT_MIN, -SINT_MIN + (-C)].
9220 return ConstantRange::getNonEmpty(Lower: APInt::getSignedMinValue(numBits: Width),
9221 Upper: *C - APInt::getSignedMinValue(numBits: Width) +
9222 1);
9223
9224 // ssub.sat(+C, x) produces [-SINT_MAX + C, SINT_MAX].
9225 return ConstantRange::getNonEmpty(Lower: *C - APInt::getSignedMaxValue(numBits: Width),
9226 Upper: APInt::getSignedMaxValue(numBits: Width) + 1);
9227 } else if (match(V: II.getOperand(i_nocapture: 1), P: m_APInt(Res&: C))) {
9228 if (C->isNegative())
9229 // ssub.sat(x, -C) produces [SINT_MIN - (-C), SINT_MAX]:
9230 return ConstantRange::getNonEmpty(Lower: APInt::getSignedMinValue(numBits: Width) - *C,
9231 Upper: APInt::getSignedMaxValue(numBits: Width) + 1);
9232
9233 // ssub.sat(x, +C) produces [SINT_MIN, SINT_MAX - C].
9234 return ConstantRange::getNonEmpty(Lower: APInt::getSignedMinValue(numBits: Width),
9235 Upper: APInt::getSignedMaxValue(numBits: Width) - *C +
9236 1);
9237 }
9238 break;
9239 case Intrinsic::umin:
9240 case Intrinsic::umax:
9241 case Intrinsic::smin:
9242 case Intrinsic::smax:
9243 if (!match(V: II.getOperand(i_nocapture: 0), P: m_APInt(Res&: C)) &&
9244 !match(V: II.getOperand(i_nocapture: 1), P: m_APInt(Res&: C)))
9245 break;
9246
9247 switch (II.getIntrinsicID()) {
9248 case Intrinsic::umin:
9249 return ConstantRange::getNonEmpty(Lower: APInt::getZero(numBits: Width), Upper: *C + 1);
9250 case Intrinsic::umax:
9251 return ConstantRange::getNonEmpty(Lower: *C, Upper: APInt::getZero(numBits: Width));
9252 case Intrinsic::smin:
9253 return ConstantRange::getNonEmpty(Lower: APInt::getSignedMinValue(numBits: Width),
9254 Upper: *C + 1);
9255 case Intrinsic::smax:
9256 return ConstantRange::getNonEmpty(Lower: *C,
9257 Upper: APInt::getSignedMaxValue(numBits: Width) + 1);
9258 default:
9259 llvm_unreachable("Must be min/max intrinsic");
9260 }
9261 break;
9262 case Intrinsic::abs:
9263 // If abs of SIGNED_MIN is poison, then the result is [0..SIGNED_MAX],
9264 // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN.
9265 if (match(V: II.getOperand(i_nocapture: 1), P: m_One()))
9266 return ConstantRange::getNonEmpty(Lower: APInt::getZero(numBits: Width),
9267 Upper: APInt::getSignedMaxValue(numBits: Width) + 1);
9268
9269 return ConstantRange::getNonEmpty(Lower: APInt::getZero(numBits: Width),
9270 Upper: APInt::getSignedMinValue(numBits: Width) + 1);
9271 case Intrinsic::vscale:
9272 if (!II.getParent() || !II.getFunction())
9273 break;
9274 return getVScaleRange(F: II.getFunction(), BitWidth: Width);
9275 default:
9276 break;
9277 }
9278
9279 return ConstantRange::getFull(BitWidth: Width);
9280}
9281
9282static ConstantRange getRangeForSelectPattern(const SelectInst &SI,
9283 const InstrInfoQuery &IIQ) {
9284 unsigned BitWidth = SI.getType()->getScalarSizeInBits();
9285 const Value *LHS = nullptr, *RHS = nullptr;
9286 SelectPatternResult R = matchSelectPattern(V: &SI, LHS, RHS);
9287 if (R.Flavor == SPF_UNKNOWN)
9288 return ConstantRange::getFull(BitWidth);
9289
9290 if (R.Flavor == SelectPatternFlavor::SPF_ABS) {
9291 // If the negation part of the abs (in RHS) has the NSW flag,
9292 // then the result of abs(X) is [0..SIGNED_MAX],
9293 // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN.
9294 if (match(V: RHS, P: m_Neg(V: m_Specific(V: LHS))) &&
9295 IIQ.hasNoSignedWrap(Op: cast<Instruction>(Val: RHS)))
9296 return ConstantRange::getNonEmpty(Lower: APInt::getZero(numBits: BitWidth),
9297 Upper: APInt::getSignedMaxValue(numBits: BitWidth) + 1);
9298
9299 return ConstantRange::getNonEmpty(Lower: APInt::getZero(numBits: BitWidth),
9300 Upper: APInt::getSignedMinValue(numBits: BitWidth) + 1);
9301 }
9302
9303 if (R.Flavor == SelectPatternFlavor::SPF_NABS) {
9304 // The result of -abs(X) is <= 0.
9305 return ConstantRange::getNonEmpty(Lower: APInt::getSignedMinValue(numBits: BitWidth),
9306 Upper: APInt(BitWidth, 1));
9307 }
9308
9309 const APInt *C;
9310 if (!match(V: LHS, P: m_APInt(Res&: C)) && !match(V: RHS, P: m_APInt(Res&: C)))
9311 return ConstantRange::getFull(BitWidth);
9312
9313 switch (R.Flavor) {
9314 case SPF_UMIN:
9315 return ConstantRange::getNonEmpty(Lower: APInt::getZero(numBits: BitWidth), Upper: *C + 1);
9316 case SPF_UMAX:
9317 return ConstantRange::getNonEmpty(Lower: *C, Upper: APInt::getZero(numBits: BitWidth));
9318 case SPF_SMIN:
9319 return ConstantRange::getNonEmpty(Lower: APInt::getSignedMinValue(numBits: BitWidth),
9320 Upper: *C + 1);
9321 case SPF_SMAX:
9322 return ConstantRange::getNonEmpty(Lower: *C,
9323 Upper: APInt::getSignedMaxValue(numBits: BitWidth) + 1);
9324 default:
9325 return ConstantRange::getFull(BitWidth);
9326 }
9327}
9328
9329static void setLimitForFPToI(const Instruction *I, APInt &Lower, APInt &Upper) {
9330 // The maximum representable value of a half is 65504. For floats the maximum
9331 // value is 3.4e38 which requires roughly 129 bits.
9332 unsigned BitWidth = I->getType()->getScalarSizeInBits();
9333 if (!I->getOperand(i: 0)->getType()->getScalarType()->isHalfTy())
9334 return;
9335 if (isa<FPToSIInst>(Val: I) && BitWidth >= 17) {
9336 Lower = APInt(BitWidth, -65504);
9337 Upper = APInt(BitWidth, 65505);
9338 }
9339
9340 if (isa<FPToUIInst>(Val: I) && BitWidth >= 16) {
9341 // For a fptoui the lower limit is left as 0.
9342 Upper = APInt(BitWidth, 65505);
9343 }
9344}
9345
9346ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned,
9347 bool UseInstrInfo, AssumptionCache *AC,
9348 const Instruction *CtxI,
9349 const DominatorTree *DT,
9350 unsigned Depth) {
9351 assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction");
9352
9353 if (Depth == MaxAnalysisRecursionDepth)
9354 return ConstantRange::getFull(BitWidth: V->getType()->getScalarSizeInBits());
9355
9356 const APInt *C;
9357 if (match(V, P: m_APInt(Res&: C)))
9358 return ConstantRange(*C);
9359 unsigned BitWidth = V->getType()->getScalarSizeInBits();
9360
9361 if (auto *VC = dyn_cast<ConstantDataVector>(Val: V)) {
9362 ConstantRange CR = ConstantRange::getEmpty(BitWidth);
9363 for (unsigned ElemIdx = 0, NElem = VC->getNumElements(); ElemIdx < NElem;
9364 ++ElemIdx)
9365 CR = CR.unionWith(CR: VC->getElementAsAPInt(i: ElemIdx));
9366 return CR;
9367 }
9368
9369 InstrInfoQuery IIQ(UseInstrInfo);
9370 ConstantRange CR = ConstantRange::getFull(BitWidth);
9371 if (auto *BO = dyn_cast<BinaryOperator>(Val: V)) {
9372 APInt Lower = APInt(BitWidth, 0);
9373 APInt Upper = APInt(BitWidth, 0);
9374 // TODO: Return ConstantRange.
9375 setLimitsForBinOp(BO: *BO, Lower, Upper, IIQ, PreferSignedRange: ForSigned);
9376 CR = ConstantRange::getNonEmpty(Lower, Upper);
9377 } else if (auto *II = dyn_cast<IntrinsicInst>(Val: V))
9378 CR = getRangeForIntrinsic(II: *II);
9379 else if (auto *SI = dyn_cast<SelectInst>(Val: V)) {
9380 ConstantRange CRTrue = computeConstantRange(
9381 V: SI->getTrueValue(), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth: Depth + 1);
9382 ConstantRange CRFalse = computeConstantRange(
9383 V: SI->getFalseValue(), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth: Depth + 1);
9384 CR = CRTrue.unionWith(CR: CRFalse);
9385 CR = CR.intersectWith(CR: getRangeForSelectPattern(SI: *SI, IIQ));
9386 } else if (isa<FPToUIInst>(Val: V) || isa<FPToSIInst>(Val: V)) {
9387 APInt Lower = APInt(BitWidth, 0);
9388 APInt Upper = APInt(BitWidth, 0);
9389 // TODO: Return ConstantRange.
9390 setLimitForFPToI(I: cast<Instruction>(Val: V), Lower, Upper);
9391 CR = ConstantRange::getNonEmpty(Lower, Upper);
9392 } else if (const auto *A = dyn_cast<Argument>(Val: V))
9393 if (std::optional<ConstantRange> Range = A->getRange())
9394 CR = *Range;
9395
9396 if (auto *I = dyn_cast<Instruction>(Val: V)) {
9397 if (auto *Range = IIQ.getMetadata(I, KindID: LLVMContext::MD_range))
9398 CR = CR.intersectWith(CR: getConstantRangeFromMetadata(RangeMD: *Range));
9399
9400 if (const auto *CB = dyn_cast<CallBase>(Val: V))
9401 if (std::optional<ConstantRange> Range = CB->getRange())
9402 CR = CR.intersectWith(CR: *Range);
9403 }
9404
9405 if (CtxI && AC) {
9406 // Try to restrict the range based on information from assumptions.
9407 for (auto &AssumeVH : AC->assumptionsFor(V)) {
9408 if (!AssumeVH)
9409 continue;
9410 CallInst *I = cast<CallInst>(Val&: AssumeVH);
9411 assert(I->getParent()->getParent() == CtxI->getParent()->getParent() &&
9412 "Got assumption for the wrong function!");
9413 assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
9414 "must be an assume intrinsic");
9415
9416 if (!isValidAssumeForContext(Inv: I, CxtI: CtxI, DT))
9417 continue;
9418 Value *Arg = I->getArgOperand(i: 0);
9419 ICmpInst *Cmp = dyn_cast<ICmpInst>(Val: Arg);
9420 // Currently we just use information from comparisons.
9421 if (!Cmp || Cmp->getOperand(i_nocapture: 0) != V)
9422 continue;
9423 // TODO: Set "ForSigned" parameter via Cmp->isSigned()?
9424 ConstantRange RHS =
9425 computeConstantRange(V: Cmp->getOperand(i_nocapture: 1), /* ForSigned */ false,
9426 UseInstrInfo, AC, CtxI: I, DT, Depth: Depth + 1);
9427 CR = CR.intersectWith(
9428 CR: ConstantRange::makeAllowedICmpRegion(Pred: Cmp->getPredicate(), Other: RHS));
9429 }
9430 }
9431
9432 return CR;
9433}
9434
9435static void
9436addValueAffectedByCondition(Value *V,
9437 function_ref<void(Value *)> InsertAffected) {
9438 assert(V != nullptr);
9439 if (isa<Argument>(Val: V) || isa<GlobalValue>(Val: V)) {
9440 InsertAffected(V);
9441 } else if (auto *I = dyn_cast<Instruction>(Val: V)) {
9442 InsertAffected(V);
9443
9444 // Peek through unary operators to find the source of the condition.
9445 Value *Op;
9446 if (match(V: I, P: m_CombineOr(L: m_PtrToInt(Op: m_Value(V&: Op)), R: m_Trunc(Op: m_Value(V&: Op))))) {
9447 if (isa<Instruction>(Val: Op) || isa<Argument>(Val: Op))
9448 InsertAffected(Op);
9449 }
9450 }
9451}
9452
9453void llvm::findValuesAffectedByCondition(
9454 Value *Cond, bool IsAssume, function_ref<void(Value *)> InsertAffected) {
9455 auto AddAffected = [&InsertAffected](Value *V) {
9456 addValueAffectedByCondition(V, InsertAffected);
9457 };
9458
9459 auto AddCmpOperands = [&AddAffected, IsAssume](Value *LHS, Value *RHS) {
9460 if (IsAssume) {
9461 AddAffected(LHS);
9462 AddAffected(RHS);
9463 } else if (match(V: RHS, P: m_Constant()))
9464 AddAffected(LHS);
9465 };
9466
9467 SmallVector<Value *, 8> Worklist;
9468 SmallPtrSet<Value *, 8> Visited;
9469 Worklist.push_back(Elt: Cond);
9470 while (!Worklist.empty()) {
9471 Value *V = Worklist.pop_back_val();
9472 if (!Visited.insert(Ptr: V).second)
9473 continue;
9474
9475 CmpInst::Predicate Pred;
9476 Value *A, *B, *X;
9477
9478 if (IsAssume) {
9479 AddAffected(V);
9480 if (match(V, P: m_Not(V: m_Value(V&: X))))
9481 AddAffected(X);
9482 }
9483
9484 if (match(V, P: m_LogicalOp(L: m_Value(V&: A), R: m_Value(V&: B)))) {
9485 // assume(A && B) is split to -> assume(A); assume(B);
9486 // assume(!(A || B)) is split to -> assume(!A); assume(!B);
9487 // Finally, assume(A || B) / assume(!(A && B)) generally don't provide
9488 // enough information to be worth handling (intersection of information as
9489 // opposed to union).
9490 if (!IsAssume) {
9491 Worklist.push_back(Elt: A);
9492 Worklist.push_back(Elt: B);
9493 }
9494 } else if (match(V, P: m_ICmp(Pred, L: m_Value(V&: A), R: m_Value(V&: B)))) {
9495 AddCmpOperands(A, B);
9496
9497 if (ICmpInst::isEquality(P: Pred)) {
9498 if (match(V: B, P: m_ConstantInt())) {
9499 Value *Y;
9500 // (X & C) or (X | C) or (X ^ C).
9501 // (X << C) or (X >>_s C) or (X >>_u C).
9502 if (match(V: A, P: m_BitwiseLogic(L: m_Value(V&: X), R: m_ConstantInt())) ||
9503 match(V: A, P: m_Shift(L: m_Value(V&: X), R: m_ConstantInt())))
9504 AddAffected(X);
9505 else if (match(V: A, P: m_And(L: m_Value(V&: X), R: m_Value(V&: Y))) ||
9506 match(V: A, P: m_Or(L: m_Value(V&: X), R: m_Value(V&: Y)))) {
9507 AddAffected(X);
9508 AddAffected(Y);
9509 }
9510 }
9511 } else {
9512 if (match(V: B, P: m_ConstantInt())) {
9513 // Handle (A + C1) u< C2, which is the canonical form of
9514 // A > C3 && A < C4.
9515 if (match(V: A, P: m_AddLike(L: m_Value(V&: X), R: m_ConstantInt())))
9516 AddAffected(X);
9517
9518 Value *Y;
9519 // X & Y u> C -> X >u C && Y >u C
9520 // X | Y u< C -> X u< C && Y u< C
9521 if (ICmpInst::isUnsigned(predicate: Pred) &&
9522 (match(V: A, P: m_And(L: m_Value(V&: X), R: m_Value(V&: Y))) ||
9523 match(V: A, P: m_Or(L: m_Value(V&: X), R: m_Value(V&: Y))))) {
9524 AddAffected(X);
9525 AddAffected(Y);
9526 }
9527 }
9528
9529 // Handle icmp slt/sgt (bitcast X to int), 0/-1, which is supported
9530 // by computeKnownFPClass().
9531 if (match(V: A, P: m_ElementWiseBitCast(Op: m_Value(V&: X)))) {
9532 if (Pred == ICmpInst::ICMP_SLT && match(V: B, P: m_Zero()))
9533 InsertAffected(X);
9534 else if (Pred == ICmpInst::ICMP_SGT && match(V: B, P: m_AllOnes()))
9535 InsertAffected(X);
9536 }
9537 }
9538 } else if (match(V: Cond, P: m_FCmp(Pred, L: m_Value(V&: A), R: m_Value(V&: B)))) {
9539 AddCmpOperands(A, B);
9540
9541 // fcmp fneg(x), y
9542 // fcmp fabs(x), y
9543 // fcmp fneg(fabs(x)), y
9544 if (match(V: A, P: m_FNeg(X: m_Value(V&: A))))
9545 AddAffected(A);
9546 if (match(V: A, P: m_FAbs(Op0: m_Value(V&: A))))
9547 AddAffected(A);
9548
9549 } else if (match(V, m_Intrinsic<Intrinsic::is_fpclass>(m_Value(A),
9550 m_Value()))) {
9551 // Handle patterns that computeKnownFPClass() support.
9552 AddAffected(A);
9553 }
9554 }
9555}
9556

source code of llvm/lib/Analysis/ValueTracking.cpp