InstCombineSimplifyDemanded.cpp source code [llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp]

1	//===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains logic for simplifying instructions based on information
10	// about how they are used.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "InstCombineInternal.h"
15	#include "llvm/Analysis/ValueTracking.h"
16	#include "llvm/IR/GetElementPtrTypeIterator.h"
17	#include "llvm/IR/IntrinsicInst.h"
18	#include "llvm/IR/PatternMatch.h"
19	#include "llvm/Support/KnownBits.h"
20	#include "llvm/Transforms/InstCombine/InstCombiner.h"
21
22	using namespace llvm;
23	using namespace llvm::PatternMatch;
24
25	#define DEBUG_TYPE "instcombine"
26
27	static cl::opt<bool>
28	VerifyKnownBits("instcombine-verify-known-bits",
29	cl::desc ("Verify that computeKnownBits() and "
30	"SimplifyDemandedBits() are consistent"),
31	cl::Hidden, cl::init(Val: false));
32
33	/// Check to see if the specified operand of the specified instruction is a
34	/// constant integer. If so, check to see if there are any bits set in the
35	/// constant that are not demanded. If so, shrink the constant and return true.
36	static bool ShrinkDemandedConstant(Instruction I, unsigned* OpNo,
37	const APInt &Demanded) {
38	assert(I && "No instruction?");
39	assert(OpNo < I->getNumOperands() && "Operand index too large");
40
41	// The operand must be a constant integer or splat integer.
42	Value *Op = I->getOperand(i: OpNo);
43	const APInt *C;
44	if (!match(V: Op, P: m_APInt(Res&: C)))
45	return false;
46
47	// If there are no bits set that aren't demanded, nothing to do.
48	if (C->isSubsetOf(RHS: Demanded))
49	return false;
50
51	// This instruction is producing bits that are not demanded. Shrink the RHS.
52	I->setOperand(i: OpNo, Val: ConstantInt::get(Ty: Op->getType(), V: *C & Demanded));
53
54	return true;
55	}
56
57	/// Returns the bitwidth of the given scalar or pointer type. For vector types,
58	/// returns the element type's bitwidth.
59	static unsigned getBitWidth(Type Ty, const* DataLayout &DL) {
60	if (unsigned BitWidth = Ty->getScalarSizeInBits())
61	return BitWidth;
62
63	return DL.getPointerTypeSizeInBits(Ty);
64	}
65
66	/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
67	/// the instruction has any properties that allow us to simplify its operands.
68	bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst,
69	KnownBits &Known) {
70	APInt DemandedMask(APInt::getAllOnes(numBits: Known.getBitWidth()));
71	Value *V = SimplifyDemandedUseBits(V: &Inst, DemandedMask, Known,
72	Depth: `0`, CxtI: &Inst);
73	if (!V) return false;
74	if (V == &Inst) return true;
75	replaceInstUsesWith(I&: Inst, V);
76	return true;
77	}
78
79	/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
80	/// the instruction has any properties that allow us to simplify its operands.
81	bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) {
82	KnownBits Known(getBitWidth(Ty: Inst.getType(), DL));
83	return SimplifyDemandedInstructionBits(Inst, Known);
84	}
85
86	/// This form of SimplifyDemandedBits simplifies the specified instruction
87	/// operand if possible, updating it in place. It returns true if it made any
88	/// change and false otherwise.
89	bool InstCombinerImpl::SimplifyDemandedBits(Instruction I, unsigned* OpNo,
90	const APInt &DemandedMask,
91	KnownBits &Known, unsigned Depth) {
92	Use &U = I->getOperandUse(i: OpNo);
93	Value *NewVal = SimplifyDemandedUseBits(V: U.get(), DemandedMask, Known,
94	Depth, CxtI: I);
95	if (!NewVal) return false;
96	if (Instruction* OpInst = dyn_cast<Instruction>(Val&: U))
97	salvageDebugInfo(I&: *OpInst);
98
99	replaceUse(U, NewValue: NewVal);
100	return true;
101	}
102
103	/// This function attempts to replace V with a simpler value based on the
104	/// demanded bits. When this function is called, it is known that only the bits
105	/// set in DemandedMask of the result of V are ever used downstream.
106	/// Consequently, depending on the mask and V, it may be possible to replace V
107	/// with a constant or one of its operands. In such cases, this function does
108	/// the replacement and returns true. In all other cases, it returns false after
109	/// analyzing the expression and setting KnownOne and known to be one in the
110	/// expression. Known.Zero contains all the bits that are known to be zero in
111	/// the expression. These are provided to potentially allow the caller (which
112	/// might recursively be SimplifyDemandedBits itself) to simplify the
113	/// expression.
114	/// Known.One and Known.Zero always follow the invariant that:
115	/// Known.One & Known.Zero == 0.
116	/// That is, a bit can't be both 1 and 0. The bits in Known.One and Known.Zero
117	/// are accurate even for bits not in DemandedMask. Note
118	/// also that the bitwidth of V, DemandedMask, Known.Zero and Known.One must all
119	/// be the same.
120	///
121	/// This returns null if it did not change anything and it permits no
122	/// simplification. This returns V itself if it did some simplification of V's
123	/// operands based on the information about what bits are demanded. This returns
124	/// some other non-null value if it found out that V is equal to another value
125	/// in the context where the specified bits are demanded, but not for all users.
126	Value InstCombinerImpl::SimplifyDemandedUseBits(Value V, APInt DemandedMask,
127	KnownBits &Known,
128	unsigned Depth,
129	Instruction *CxtI) {
130	assert(V != nullptr && "Null pointer of Value???");
131	assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
132	uint32_t BitWidth = DemandedMask.getBitWidth();
133	Type *VTy = V->getType();
134	assert(
135	(!VTy->isIntOrIntVectorTy() \|\| VTy->getScalarSizeInBits() == BitWidth) &&
136	Known.getBitWidth() == BitWidth &&
137	"Value *V, DemandedMask and Known must have same BitWidth");
138
139	if (isa<Constant>(Val: V)) {
140	computeKnownBits(V, Known, Depth, CxtI);
141	return nullptr;
142	}
143
144	Known.resetAll();
145	if (DemandedMask.isZero()) // Not demanding any bits from V.
146	return UndefValue::get(T: VTy);
147
148	if (Depth == MaxAnalysisRecursionDepth)
149	return nullptr;
150
151	Instruction *I = dyn_cast<Instruction>(Val: V);
152	if (!I) {
153	computeKnownBits(V, Known, Depth, CxtI);
154	return nullptr; // Only analyze instructions.
155	}
156
157	// If there are multiple uses of this value and we aren't at the root, then
158	// we can't do any simplifications of the operands, because DemandedMask
159	// only reflects the bits demanded by one* of the users.*
160	if (Depth != `0` && !I->hasOneUse())
161	return SimplifyMultipleUseDemandedBits(I, DemandedMask, Known, Depth, CxtI);
162
163	KnownBits LHSKnown(BitWidth), RHSKnown(BitWidth);
164	// If this is the root being simplified, allow it to have multiple uses,
165	// just set the DemandedMask to all bits so that we can try to simplify the
166	// operands. This allows visitTruncInst (for example) to simplify the
167	// operand of a trunc without duplicating all the logic below.
168	if (Depth == `0` && !V->hasOneUse())
169	DemandedMask.setAllBits();
170
171	// Update flags after simplifying an operand based on the fact that some high
172	// order bits are not demanded.
173	auto disableWrapFlagsBasedOnUnusedHighBits = [](Instruction *I,
174	unsigned NLZ) {
175	if (NLZ > `0`) {
176	// Disable the nsw and nuw flags here: We can no longer guarantee that
177	// we won't wrap after simplification. Removing the nsw/nuw flags is
178	// legal here because the top bit is not demanded.
179	I->setHasNoSignedWrap(false);
180	I->setHasNoUnsignedWrap(false);
181	}
182	return I;
183	};
184
185	// If the high-bits of an ADD/SUB/MUL are not demanded, then we do not care
186	// about the high bits of the operands.
187	auto simplifyOperandsBasedOnUnusedHighBits = [&](APInt &DemandedFromOps) {
188	unsigned NLZ = DemandedMask.countl_zero();
189	// Right fill the mask of bits for the operands to demand the most
190	// significant bit and all those below it.
191	DemandedFromOps = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - NLZ);
192	if (ShrinkDemandedConstant(I, OpNo: `0`, Demanded: DemandedFromOps) \|\|
193	SimplifyDemandedBits(I, OpNo: `0`, DemandedMask: DemandedFromOps, Known&: LHSKnown, Depth: Depth + `1`) \|\|
194	ShrinkDemandedConstant(I, OpNo: `1`, Demanded: DemandedFromOps) \|\|
195	SimplifyDemandedBits(I, OpNo: `1`, DemandedMask: DemandedFromOps, Known&: RHSKnown, Depth: Depth + `1`)) {
196	disableWrapFlagsBasedOnUnusedHighBits (I, NLZ);
197	return true;
198	}
199	return false;
200	};
201
202	switch (I->getOpcode()) {
203	default:
204	computeKnownBits(V: I, Known, Depth, CxtI);
205	break;
206	case Instruction::And: {
207	// If either the LHS or the RHS are Zero, the result is zero.
208	if (SimplifyDemandedBits(I, OpNo: `1`, DemandedMask, Known&: RHSKnown, Depth: Depth + `1`) \|\|
209	SimplifyDemandedBits(I, OpNo: `0`, DemandedMask: DemandedMask & ~RHSKnown.Zero, Known&: LHSKnown,
210	Depth: Depth + `1`))
211	return I;
212	assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
213	assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
214
215	Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown,
216	Depth, SQ: SQ.getWithInstruction(I: CxtI));
217
218	// If the client is only demanding bits that we know, return the known
219	// constant.
220	if (DemandedMask.isSubsetOf(RHS: Known.Zero \| Known.One))
221	return Constant::getIntegerValue(Ty: VTy, V: Known.One);
222
223	// If all of the demanded bits are known 1 on one side, return the other.
224	// These bits cannot contribute to the result of the 'and'.
225	if (DemandedMask.isSubsetOf(RHS: LHSKnown.Zero \| RHSKnown.One))
226	return I->getOperand(i: `0`);
227	if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero \| LHSKnown.One))
228	return I->getOperand(i: `1`);
229
230	// If the RHS is a constant, see if we can simplify it.
231	if (ShrinkDemandedConstant(I, OpNo: `1`, Demanded: DemandedMask & ~LHSKnown.Zero))
232	return I;
233
234	break;
235	}
236	case Instruction::Or: {
237	// If either the LHS or the RHS are One, the result is One.
238	if (SimplifyDemandedBits(I, OpNo: `1`, DemandedMask, Known&: RHSKnown, Depth: Depth + `1`) \|\|
239	SimplifyDemandedBits(I, OpNo: `0`, DemandedMask: DemandedMask & ~RHSKnown.One, Known&: LHSKnown,
240	Depth: Depth + `1`)) {
241	// Disjoint flag may not longer hold.
242	I->dropPoisonGeneratingFlags();
243	return I;
244	}
245	assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
246	assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
247
248	Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown,
249	Depth, SQ: SQ.getWithInstruction(I: CxtI));
250
251	// If the client is only demanding bits that we know, return the known
252	// constant.
253	if (DemandedMask.isSubsetOf(RHS: Known.Zero \| Known.One))
254	return Constant::getIntegerValue(Ty: VTy, V: Known.One);
255
256	// If all of the demanded bits are known zero on one side, return the other.
257	// These bits cannot contribute to the result of the 'or'.
258	if (DemandedMask.isSubsetOf(RHS: LHSKnown.One \| RHSKnown.Zero))
259	return I->getOperand(i: `0`);
260	if (DemandedMask.isSubsetOf(RHS: RHSKnown.One \| LHSKnown.Zero))
261	return I->getOperand(i: `1`);
262
263	// If the RHS is a constant, see if we can simplify it.
264	if (ShrinkDemandedConstant(I, OpNo: `1`, Demanded: DemandedMask))
265	return I;
266
267	// Infer disjoint flag if no common bits are set.
268	if (!cast<PossiblyDisjointInst>(Val: I)->isDisjoint()) {
269	WithCache<const Value *> LHSCache(I->getOperand(i: `0`), LHSKnown),
270	RHSCache(I->getOperand(i: `1`), RHSKnown);
271	if (haveNoCommonBitsSet(LHSCache, RHSCache, SQ: SQ.getWithInstruction(I))) {
272	cast<PossiblyDisjointInst>(Val: I)->setIsDisjoint(true);
273	return I;
274	}
275	}
276
277	break;
278	}
279	case Instruction::Xor: {
280	if (SimplifyDemandedBits(I, OpNo: `1`, DemandedMask, Known&: RHSKnown, Depth: Depth + `1`) \|\|
281	SimplifyDemandedBits(I, OpNo: `0`, DemandedMask, Known&: LHSKnown, Depth: Depth + `1`))
282	return I;
283	Value LHS, RHS;
284	if (DemandedMask == `1` &&
285	match(I->getOperand(`0`), m_Intrinsic<Intrinsic::ctpop>(m_Value(LHS))) &&
286	match(I->getOperand(`1`), m_Intrinsic<Intrinsic::ctpop>(m_Value(RHS)))) {
287	// (ctpop(X) ^ ctpop(Y)) & 1 --> ctpop(X^Y) & 1
288	IRBuilderBase::InsertPointGuard Guard(Builder);
289	Builder.SetInsertPoint(I);
290	auto *Xor = Builder.CreateXor(LHS, RHS);
291	return Builder.CreateUnaryIntrinsic(Intrinsic::ID: ctpop, V: Xor);
292	}
293
294	assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
295	assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
296
297	Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown,
298	Depth, SQ: SQ.getWithInstruction(I: CxtI));
299
300	// If the client is only demanding bits that we know, return the known
301	// constant.
302	if (DemandedMask.isSubsetOf(RHS: Known.Zero \| Known.One))
303	return Constant::getIntegerValue(Ty: VTy, V: Known.One);
304
305	// If all of the demanded bits are known zero on one side, return the other.
306	// These bits cannot contribute to the result of the 'xor'.
307	if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero))
308	return I->getOperand(i: `0`);
309	if (DemandedMask.isSubsetOf(RHS: LHSKnown.Zero))
310	return I->getOperand(i: `1`);
311
312	// If all of the demanded bits are known to be zero on one side or the
313	// other, turn this into an inclusive* or.*
314	// e.g. (A & C1)^(B & C2) -> (A & C1)\|(B & C2) iff C1&C2 == 0
315	if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero \| LHSKnown.Zero)) {
316	Instruction *Or =
317	BinaryOperator::CreateOr(V1: I->getOperand(i: `0`), V2: I->getOperand(i: `1`));
318	if (DemandedMask.isAllOnes())
319	cast<PossiblyDisjointInst>(Val: Or)->setIsDisjoint(true);
320	Or->takeName(V: I);
321	return InsertNewInstWith(New: Or, Old: I->getIterator());
322	}
323
324	// If all of the demanded bits on one side are known, and all of the set
325	// bits on that side are also known to be set on the other side, turn this
326	// into an AND, as we know the bits will be cleared.
327	// e.g. (X \| C1) ^ C2 --> (X \| C1) & ~C2 iff (C1&C2) == C2
328	if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero \|RHSKnown.One) &&
329	RHSKnown.One.isSubsetOf(RHS: LHSKnown.One)) {
330	Constant *AndC = Constant::getIntegerValue(Ty: VTy,
331	V: ~RHSKnown.One & DemandedMask);
332	Instruction *And = BinaryOperator::CreateAnd(V1: I->getOperand(i: `0`), V2: AndC);
333	return InsertNewInstWith(New: And, Old: I->getIterator());
334	}
335
336	// If the RHS is a constant, see if we can change it. Don't alter a -1
337	// constant because that's a canonical 'not' op, and that is better for
338	// combining, SCEV, and codegen.
339	const APInt *C;
340	if (match(V: I->getOperand(i: `1`), P: m_APInt(Res&: C)) && !C->isAllOnes()) {
341	if ((*C \| ~DemandedMask).isAllOnes()) {
342	// Force bits to 1 to create a 'not' op.
343	I->setOperand(i: `1`, Val: ConstantInt::getAllOnesValue(Ty: VTy));
344	return I;
345	}
346	// If we can't turn this into a 'not', try to shrink the constant.
347	if (ShrinkDemandedConstant(I, OpNo: `1`, Demanded: DemandedMask))
348	return I;
349	}
350
351	// If our LHS is an 'and' and if it has one use, and if any of the bits we
352	// are flipping are known to be set, then the xor is just resetting those
353	// bits to zero. We can just knock out bits from the 'and' and the 'xor',
354	// simplifying both of them.
355	if (Instruction *LHSInst = dyn_cast<Instruction>(Val: I->getOperand(i: `0`))) {
356	ConstantInt AndRHS, XorRHS;
357	if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
358	match(V: I->getOperand(i: `1`), P: m_ConstantInt(CI&: XorRHS)) &&
359	match(V: LHSInst->getOperand(i: `1`), P: m_ConstantInt(CI&: AndRHS)) &&
360	(LHSKnown.One & RHSKnown.One & DemandedMask) != `0`) {
361	APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask);
362
363	Constant *AndC = ConstantInt::get(Ty: VTy, V: NewMask & AndRHS->getValue());
364	Instruction *NewAnd = BinaryOperator::CreateAnd(V1: I->getOperand(i: `0`), V2: AndC);
365	InsertNewInstWith(New: NewAnd, Old: I->getIterator());
366
367	Constant *XorC = ConstantInt::get(Ty: VTy, V: NewMask & XorRHS->getValue());
368	Instruction *NewXor = BinaryOperator::CreateXor(V1: NewAnd, V2: XorC);
369	return InsertNewInstWith(New: NewXor, Old: I->getIterator());
370	}
371	}
372	break;
373	}
374	case Instruction::Select: {
375	if (SimplifyDemandedBits(I, OpNo: `2`, DemandedMask, Known&: RHSKnown, Depth: Depth + `1`) \|\|
376	SimplifyDemandedBits(I, OpNo: `1`, DemandedMask, Known&: LHSKnown, Depth: Depth + `1`))
377	return I;
378	assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
379	assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
380
381	// If the operands are constants, see if we can simplify them.
382	// This is similar to ShrinkDemandedConstant, but for a select we want to
383	// try to keep the selected constants the same as icmp value constants, if
384	// we can. This helps not break apart (or helps put back together)
385	// canonical patterns like min and max.
386	auto CanonicalizeSelectConstant = [](Instruction I, unsigned* OpNo,
387	const APInt &DemandedMask) {
388	const APInt *SelC;
389	if (!match(V: I->getOperand(i: OpNo), P: m_APInt(Res&: SelC)))
390	return false;
391
392	// Get the constant out of the ICmp, if there is one.
393	// Only try this when exactly 1 operand is a constant (if both operands
394	// are constant, the icmp should eventually simplify). Otherwise, we may
395	// invert the transform that reduces set bits and infinite-loop.
396	Value *X;
397	const APInt *CmpC;
398	ICmpInst::Predicate Pred;
399	if (!match(V: I->getOperand(i: `0`), P: m_ICmp(Pred, L: m_Value(V&: X), R: m_APInt(Res&: CmpC))) \|\|
400	isa<Constant>(Val: X) \|\| CmpC->getBitWidth() != SelC->getBitWidth())
401	return ShrinkDemandedConstant(I, OpNo, Demanded: DemandedMask);
402
403	// If the constant is already the same as the ICmp, leave it as-is.
404	if (CmpC == SelC)
405	return false;
406	// If the constants are not already the same, but can be with the demand
407	// mask, use the constant value from the ICmp.
408	if ((CmpC & DemandedMask) == (SelC & DemandedMask)) {
409	I->setOperand(i: OpNo, Val: ConstantInt::get(Ty: I->getType(), V: *CmpC));
410	return true;
411	}
412	return ShrinkDemandedConstant(I, OpNo, Demanded: DemandedMask);
413	};
414	if (CanonicalizeSelectConstant (I, `1`, DemandedMask) \|\|
415	CanonicalizeSelectConstant (I, `2`, DemandedMask))
416	return I;
417
418	// Only known if known in both the LHS and RHS.
419	Known = LHSKnown.intersectWith(RHS: RHSKnown);
420	break;
421	}
422	case Instruction::Trunc: {
423	// If we do not demand the high bits of a right-shifted and truncated value,
424	// then we may be able to truncate it before the shift.
425	Value *X;
426	const APInt *C;
427	if (match(V: I->getOperand(i: `0`), P: m_OneUse(SubPattern: m_LShr(L: m_Value(V&: X), R: m_APInt(Res&: C))))) {
428	// The shift amount must be valid (not poison) in the narrow type, and
429	// it must not be greater than the high bits demanded of the result.
430	if (C->ult(RHS: VTy->getScalarSizeInBits()) &&
431	C->ule(RHS: DemandedMask.countl_zero())) {
432	// trunc (lshr X, C) --> lshr (trunc X), C
433	IRBuilderBase::InsertPointGuard Guard(Builder);
434	Builder.SetInsertPoint(I);
435	Value *Trunc = Builder.CreateTrunc(V: X, DestTy: VTy);
436	return Builder.CreateLShr(LHS: Trunc, RHS: C->getZExtValue());
437	}
438	}
439	}
440	[[fallthrough]];
441	case Instruction::ZExt: {
442	unsigned SrcBitWidth = I->getOperand(i: `0`)->getType()->getScalarSizeInBits();
443
444	APInt InputDemandedMask = DemandedMask.zextOrTrunc(width: SrcBitWidth);
445	KnownBits InputKnown(SrcBitWidth);
446	if (SimplifyDemandedBits(I, OpNo: `0`, DemandedMask: InputDemandedMask, Known&: InputKnown, Depth: Depth + `1`)) {
447	// For zext nneg, we may have dropped the instruction which made the
448	// input non-negative.
449	I->dropPoisonGeneratingFlags();
450	return I;
451	}
452	assert(InputKnown.getBitWidth() == SrcBitWidth && "Src width changed?");
453	if (I->getOpcode() == Instruction::ZExt && I->hasNonNeg() &&
454	!InputKnown.isNegative())
455	InputKnown.makeNonNegative();
456	Known = InputKnown.zextOrTrunc(BitWidth);
457
458	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
459	break;
460	}
461	case Instruction::SExt: {
462	// Compute the bits in the result that are not present in the input.
463	unsigned SrcBitWidth = I->getOperand(i: `0`)->getType()->getScalarSizeInBits();
464
465	APInt InputDemandedBits = DemandedMask.trunc(width: SrcBitWidth);
466
467	// If any of the sign extended bits are demanded, we know that the sign
468	// bit is demanded.
469	if (DemandedMask.getActiveBits() > SrcBitWidth)
470	InputDemandedBits.setBit(SrcBitWidth-`1`);
471
472	KnownBits InputKnown(SrcBitWidth);
473	if (SimplifyDemandedBits(I, OpNo: `0`, DemandedMask: InputDemandedBits, Known&: InputKnown, Depth: Depth + `1`))
474	return I;
475
476	// If the input sign bit is known zero, or if the NewBits are not demanded
477	// convert this into a zero extension.
478	if (InputKnown.isNonNegative() \|\|
479	DemandedMask.getActiveBits() <= SrcBitWidth) {
480	// Convert to ZExt cast.
481	CastInst NewCast = new* ZExtInst (I->getOperand(i: `0`), VTy);
482	NewCast->takeName(V: I);
483	return InsertNewInstWith(New: NewCast, Old: I->getIterator());
484	}
485
486	// If the sign bit of the input is known set or clear, then we know the
487	// top bits of the result.
488	Known = InputKnown.sext(BitWidth);
489	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
490	break;
491	}
492	case Instruction::Add: {
493	if ((DemandedMask & `1`) == `0`) {
494	// If we do not need the low bit, try to convert bool math to logic:
495	// add iN (zext i1 X), (sext i1 Y) --> sext (~X & Y) to iN
496	Value X, Y;
497	if (match(V: I, P: m_c_Add(L: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X))),
498	R: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: Y))))) &&
499	X->getType()->isIntOrIntVectorTy(BitWidth: `1`) && X->getType() == Y->getType()) {
500	// Truth table for inputs and output signbits:
501	// X:0 \| X:1
502	// ----------
503	// Y:0 \| 0 \| 0 \|
504	// Y:1 \| -1 \| 0 \|
505	// ----------
506	IRBuilderBase::InsertPointGuard Guard(Builder);
507	Builder.SetInsertPoint(I);
508	Value *AndNot = Builder.CreateAnd(LHS: Builder.CreateNot(V: X), RHS: Y);
509	return Builder.CreateSExt(V: AndNot, DestTy: VTy);
510	}
511
512	// add iN (sext i1 X), (sext i1 Y) --> sext (X \| Y) to iN
513	// TODO: Relax the one-use checks because we are removing an instruction?
514	if (match(V: I, P: m_Add(L: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: X))),
515	R: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: Y))))) &&
516	X->getType()->isIntOrIntVectorTy(BitWidth: `1`) && X->getType() == Y->getType()) {
517	// Truth table for inputs and output signbits:
518	// X:0 \| X:1
519	// -----------
520	// Y:0 \| -1 \| -1 \|
521	// Y:1 \| -1 \| 0 \|
522	// -----------
523	IRBuilderBase::InsertPointGuard Guard(Builder);
524	Builder.SetInsertPoint(I);
525	Value *Or = Builder.CreateOr(LHS: X, RHS: Y);
526	return Builder.CreateSExt(V: Or, DestTy: VTy);
527	}
528	}
529
530	// Right fill the mask of bits for the operands to demand the most
531	// significant bit and all those below it.
532	unsigned NLZ = DemandedMask.countl_zero();
533	APInt DemandedFromOps = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - NLZ);
534	if (ShrinkDemandedConstant(I, OpNo: `1`, Demanded: DemandedFromOps) \|\|
535	SimplifyDemandedBits(I, OpNo: `1`, DemandedMask: DemandedFromOps, Known&: RHSKnown, Depth: Depth + `1`))
536	return disableWrapFlagsBasedOnUnusedHighBits (I, NLZ);
537
538	// If low order bits are not demanded and known to be zero in one operand,
539	// then we don't need to demand them from the other operand, since they
540	// can't cause overflow into any bits that are demanded in the result.
541	unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countr_one();
542	APInt DemandedFromLHS = DemandedFromOps;
543	DemandedFromLHS.clearLowBits(loBits: NTZ);
544	if (ShrinkDemandedConstant(I, OpNo: `0`, Demanded: DemandedFromLHS) \|\|
545	SimplifyDemandedBits(I, OpNo: `0`, DemandedMask: DemandedFromLHS, Known&: LHSKnown, Depth: Depth + `1`))
546	return disableWrapFlagsBasedOnUnusedHighBits (I, NLZ);
547
548	// If we are known to be adding zeros to every bit below
549	// the highest demanded bit, we just return the other side.
550	if (DemandedFromOps.isSubsetOf(RHS: RHSKnown.Zero))
551	return I->getOperand(i: `0`);
552	if (DemandedFromOps.isSubsetOf(RHS: LHSKnown.Zero))
553	return I->getOperand(i: `1`);
554
555	// (add X, C) --> (xor X, C) IFF C is equal to the top bit of the DemandMask
556	{
557	const APInt *C;
558	if (match(V: I->getOperand(i: `1`), P: m_APInt(Res&: C)) &&
559	C->isOneBitSet(BitNo: DemandedMask.getActiveBits() - `1`)) {
560	IRBuilderBase::InsertPointGuard Guard(Builder);
561	Builder.SetInsertPoint(I);
562	return Builder.CreateXor(LHS: I->getOperand(i: `0`), RHS: ConstantInt::get(Ty: VTy, V: *C));
563	}
564	}
565
566	// Otherwise just compute the known bits of the result.
567	bool NSW = cast<OverflowingBinaryOperator>(Val: I)->hasNoSignedWrap();
568	bool NUW = cast<OverflowingBinaryOperator>(Val: I)->hasNoUnsignedWrap();
569	Known = KnownBits::computeForAddSub(Add: true, NSW, NUW, LHS: LHSKnown, RHS: RHSKnown);
570	break;
571	}
572	case Instruction::Sub: {
573	// Right fill the mask of bits for the operands to demand the most
574	// significant bit and all those below it.
575	unsigned NLZ = DemandedMask.countl_zero();
576	APInt DemandedFromOps = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - NLZ);
577	if (ShrinkDemandedConstant(I, OpNo: `1`, Demanded: DemandedFromOps) \|\|
578	SimplifyDemandedBits(I, OpNo: `1`, DemandedMask: DemandedFromOps, Known&: RHSKnown, Depth: Depth + `1`))
579	return disableWrapFlagsBasedOnUnusedHighBits (I, NLZ);
580
581	// If low order bits are not demanded and are known to be zero in RHS,
582	// then we don't need to demand them from LHS, since they can't cause a
583	// borrow from any bits that are demanded in the result.
584	unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countr_one();
585	APInt DemandedFromLHS = DemandedFromOps;
586	DemandedFromLHS.clearLowBits(loBits: NTZ);
587	if (ShrinkDemandedConstant(I, OpNo: `0`, Demanded: DemandedFromLHS) \|\|
588	SimplifyDemandedBits(I, OpNo: `0`, DemandedMask: DemandedFromLHS, Known&: LHSKnown, Depth: Depth + `1`))
589	return disableWrapFlagsBasedOnUnusedHighBits (I, NLZ);
590
591	// If we are known to be subtracting zeros from every bit below
592	// the highest demanded bit, we just return the other side.
593	if (DemandedFromOps.isSubsetOf(RHS: RHSKnown.Zero))
594	return I->getOperand(i: `0`);
595	// We can't do this with the LHS for subtraction, unless we are only
596	// demanding the LSB.
597	if (DemandedFromOps.isOne() && DemandedFromOps.isSubsetOf(RHS: LHSKnown.Zero))
598	return I->getOperand(i: `1`);
599
600	// Otherwise just compute the known bits of the result.
601	bool NSW = cast<OverflowingBinaryOperator>(Val: I)->hasNoSignedWrap();
602	bool NUW = cast<OverflowingBinaryOperator>(Val: I)->hasNoUnsignedWrap();
603	Known = KnownBits::computeForAddSub(Add: false, NSW, NUW, LHS: LHSKnown, RHS: RHSKnown);
604	break;
605	}
606	case Instruction::Mul: {
607	APInt DemandedFromOps;
608	if (simplifyOperandsBasedOnUnusedHighBits (DemandedFromOps))
609	return I;
610
611	if (DemandedMask.isPowerOf2()) {
612	// The LSB of XY is set only if (X & 1) == 1 and (Y & 1) == 1.*
613	// If we demand exactly one bit N and we have "X (C' << N)" where C' is*
614	// odd (has LSB set), then the left-shifted low bit of X is the answer.
615	unsigned CTZ = DemandedMask.countr_zero();
616	const APInt *C;
617	if (match(V: I->getOperand(i: `1`), P: m_APInt(Res&: C)) && C->countr_zero() == CTZ) {
618	Constant *ShiftC = ConstantInt::get(Ty: VTy, V: CTZ);
619	Instruction *Shl = BinaryOperator::CreateShl(V1: I->getOperand(i: `0`), V2: ShiftC);
620	return InsertNewInstWith(New: Shl, Old: I->getIterator());
621	}
622	}
623	// For a squared value "X X", the bottom 2 bits are 0 and X[0] because:*
624	// X X is odd iff X is odd.*
625	// 'Quadratic Reciprocity': X X -> 0 for bit[1]*
626	if (I->getOperand(i: `0`) == I->getOperand(i: `1`) && DemandedMask.ult(RHS: `4`)) {
627	Constant *One = ConstantInt::get(Ty: VTy, V: `1`);
628	Instruction *And1 = BinaryOperator::CreateAnd(V1: I->getOperand(i: `0`), V2: One);
629	return InsertNewInstWith(New: And1, Old: I->getIterator());
630	}
631
632	computeKnownBits(V: I, Known, Depth, CxtI);
633	break;
634	}
635	case Instruction::Shl: {
636	const APInt *SA;
637	if (match(V: I->getOperand(i: `1`), P: m_APInt(Res&: SA))) {
638	const APInt *ShrAmt;
639	if (match(V: I->getOperand(i: `0`), P: m_Shr(L: m_Value(), R: m_APInt(Res&: ShrAmt))))
640	if (Instruction *Shr = dyn_cast<Instruction>(Val: I->getOperand(i: `0`)))
641	if (Value R = simplifyShrShlDemandedBits(Shr, ShrOp1: ShrAmt, Shl: I, ShlOp1: *SA,
642	DemandedMask, Known))
643	return R;
644
645	// Do not simplify if shl is part of funnel-shift pattern
646	if (I->hasOneUse()) {
647	auto *Inst = dyn_cast<Instruction>(Val: I->user_back());
648	if (Inst && Inst->getOpcode() == BinaryOperator::Or) {
649	if (auto Opt = convertOrOfShiftsToFunnelShift(Or&: *Inst)) {
650	auto [IID, FShiftArgs] = *Opt;
651	if ((IID == Intrinsic::fshl \|\| IID == Intrinsic::fshr) &&
652	FShiftArgs [`0`] == FShiftArgs [`1`])
653	return nullptr;
654	}
655	}
656	}
657
658	// We only want bits that already match the signbit then we don't
659	// need to shift.
660	uint64_t ShiftAmt = SA->getLimitedValue(Limit: BitWidth - `1`);
661	if (DemandedMask.countr_zero() >= ShiftAmt) {
662	if (I->hasNoSignedWrap()) {
663	unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero();
664	unsigned SignBits =
665	ComputeNumSignBits(Op: I->getOperand(i: `0`), Depth: Depth + `1`, CxtI);
666	if (SignBits > ShiftAmt && SignBits - ShiftAmt >= NumHiDemandedBits)
667	return I->getOperand(i: `0`);
668	}
669
670	// If we can pre-shift a right-shifted constant to the left without
671	// losing any high bits and we don't demand the low bits, then eliminate
672	// the left-shift:
673	// (C >> X) << LeftShiftAmtC --> (C << LeftShiftAmtC) >> X
674	Value *X;
675	Constant *C;
676	if (match(V: I->getOperand(i: `0`), P: m_LShr(L: m_ImmConstant(C), R: m_Value(V&: X)))) {
677	Constant *LeftShiftAmtC = ConstantInt::get(Ty: VTy, V: ShiftAmt);
678	Constant *NewC = ConstantFoldBinaryOpOperands(Opcode: Instruction::Shl, LHS: C,
679	RHS: LeftShiftAmtC, DL);
680	if (ConstantFoldBinaryOpOperands(Opcode: Instruction::LShr, LHS: NewC,
681	RHS: LeftShiftAmtC, DL) == C) {
682	Instruction *Lshr = BinaryOperator::CreateLShr(V1: NewC, V2: X);
683	return InsertNewInstWith(New: Lshr, Old: I->getIterator());
684	}
685	}
686	}
687
688	APInt DemandedMaskIn(DemandedMask.lshr(shiftAmt: ShiftAmt));
689
690	// If the shift is NUW/NSW, then it does demand the high bits.
691	ShlOperator *IOp = cast<ShlOperator>(Val: I);
692	if (IOp->hasNoSignedWrap())
693	DemandedMaskIn.setHighBits(ShiftAmt+`1`);
694	else if (IOp->hasNoUnsignedWrap())
695	DemandedMaskIn.setHighBits(ShiftAmt);
696
697	if (SimplifyDemandedBits(I, OpNo: `0`, DemandedMask: DemandedMaskIn, Known, Depth: Depth + `1`))
698	return I;
699	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
700
701	Known = KnownBits::shl(LHS: Known,
702	RHS: KnownBits::makeConstant(C: APInt (BitWidth, ShiftAmt)),
703	/ NUW / IOp->hasNoUnsignedWrap(),
704	/ NSW / IOp->hasNoSignedWrap());
705	} else {
706	// This is a variable shift, so we can't shift the demand mask by a known
707	// amount. But if we are not demanding high bits, then we are not
708	// demanding those bits from the pre-shifted operand either.
709	if (unsigned CTLZ = DemandedMask.countl_zero()) {
710	APInt DemandedFromOp(APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - CTLZ));
711	if (SimplifyDemandedBits(I, OpNo: `0`, DemandedMask: DemandedFromOp, Known, Depth: Depth + `1`)) {
712	// We can't guarantee that nsw/nuw hold after simplifying the operand.
713	I->dropPoisonGeneratingFlags();
714	return I;
715	}
716	}
717	computeKnownBits(V: I, Known, Depth, CxtI);
718	}
719	break;
720	}
721	case Instruction::LShr: {
722	const APInt *SA;
723	if (match(V: I->getOperand(i: `1`), P: m_APInt(Res&: SA))) {
724	uint64_t ShiftAmt = SA->getLimitedValue(Limit: BitWidth-`1`);
725
726	// Do not simplify if lshr is part of funnel-shift pattern
727	if (I->hasOneUse()) {
728	auto *Inst = dyn_cast<Instruction>(Val: I->user_back());
729	if (Inst && Inst->getOpcode() == BinaryOperator::Or) {
730	if (auto Opt = convertOrOfShiftsToFunnelShift(Or&: *Inst)) {
731	auto [IID, FShiftArgs] = *Opt;
732	if ((IID == Intrinsic::fshl \|\| IID == Intrinsic::fshr) &&
733	FShiftArgs [`0`] == FShiftArgs [`1`])
734	return nullptr;
735	}
736	}
737	}
738
739	// If we are just demanding the shifted sign bit and below, then this can
740	// be treated as an ASHR in disguise.
741	if (DemandedMask.countl_zero() >= ShiftAmt) {
742	// If we only want bits that already match the signbit then we don't
743	// need to shift.
744	unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero();
745	unsigned SignBits =
746	ComputeNumSignBits(Op: I->getOperand(i: `0`), Depth: Depth + `1`, CxtI);
747	if (SignBits >= NumHiDemandedBits)
748	return I->getOperand(i: `0`);
749
750	// If we can pre-shift a left-shifted constant to the right without
751	// losing any low bits (we already know we don't demand the high bits),
752	// then eliminate the right-shift:
753	// (C << X) >> RightShiftAmtC --> (C >> RightShiftAmtC) << X
754	Value *X;
755	Constant *C;
756	if (match(V: I->getOperand(i: `0`), P: m_Shl(L: m_ImmConstant(C), R: m_Value(V&: X)))) {
757	Constant *RightShiftAmtC = ConstantInt::get(Ty: VTy, V: ShiftAmt);
758	Constant *NewC = ConstantFoldBinaryOpOperands(Opcode: Instruction::LShr, LHS: C,
759	RHS: RightShiftAmtC, DL);
760	if (ConstantFoldBinaryOpOperands(Opcode: Instruction::Shl, LHS: NewC,
761	RHS: RightShiftAmtC, DL) == C) {
762	Instruction *Shl = BinaryOperator::CreateShl(V1: NewC, V2: X);
763	return InsertNewInstWith(New: Shl, Old: I->getIterator());
764	}
765	}
766	}
767
768	// Unsigned shift right.
769	APInt DemandedMaskIn(DemandedMask.shl(shiftAmt: ShiftAmt));
770	if (SimplifyDemandedBits(I, OpNo: `0`, DemandedMask: DemandedMaskIn, Known, Depth: Depth + `1`)) {
771	// exact flag may not longer hold.
772	I->dropPoisonGeneratingFlags();
773	return I;
774	}
775	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
776	Known.Zero.lshrInPlace(ShiftAmt);
777	Known.One.lshrInPlace(ShiftAmt);
778	if (ShiftAmt)
779	Known.Zero.setHighBits(ShiftAmt); // high bits known zero.
780	} else {
781	computeKnownBits(V: I, Known, Depth, CxtI);
782	}
783	break;
784	}
785	case Instruction::AShr: {
786	unsigned SignBits = ComputeNumSignBits(Op: I->getOperand(i: `0`), Depth: Depth + `1`, CxtI);
787
788	// If we only want bits that already match the signbit then we don't need
789	// to shift.
790	unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero();
791	if (SignBits >= NumHiDemandedBits)
792	return I->getOperand(i: `0`);
793
794	// If this is an arithmetic shift right and only the low-bit is set, we can
795	// always convert this into a logical shr, even if the shift amount is
796	// variable. The low bit of the shift cannot be an input sign bit unless
797	// the shift amount is >= the size of the datatype, which is undefined.
798	if (DemandedMask.isOne()) {
799	// Perform the logical shift right.
800	Instruction *NewVal = BinaryOperator::CreateLShr(
801	V1: I->getOperand(i: `0`), V2: I->getOperand(i: `1`), Name: I->getName());
802	return InsertNewInstWith(New: NewVal, Old: I->getIterator());
803	}
804
805	const APInt *SA;
806	if (match(V: I->getOperand(i: `1`), P: m_APInt(Res&: SA))) {
807	uint32_t ShiftAmt = SA->getLimitedValue(Limit: BitWidth-`1`);
808
809	// Signed shift right.
810	APInt DemandedMaskIn(DemandedMask.shl(shiftAmt: ShiftAmt));
811	// If any of the high bits are demanded, we should set the sign bit as
812	// demanded.
813	if (DemandedMask.countl_zero() <= ShiftAmt)
814	DemandedMaskIn.setSignBit();
815
816	if (SimplifyDemandedBits(I, OpNo: `0`, DemandedMask: DemandedMaskIn, Known, Depth: Depth + `1`)) {
817	// exact flag may not longer hold.
818	I->dropPoisonGeneratingFlags();
819	return I;
820	}
821
822	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
823	// Compute the new bits that are at the top now plus sign bits.
824	APInt HighBits(APInt::getHighBitsSet(
825	numBits: BitWidth, hiBitsSet: std::min(a: SignBits + ShiftAmt - `1`, b: BitWidth)));
826	Known.Zero.lshrInPlace(ShiftAmt);
827	Known.One.lshrInPlace(ShiftAmt);
828
829	// If the input sign bit is known to be zero, or if none of the top bits
830	// are demanded, turn this into an unsigned shift right.
831	assert(BitWidth > ShiftAmt && "Shift amount not saturated?");
832	if (Known.Zero [BitWidth-ShiftAmt-`1`] \|\|
833	!DemandedMask.intersects(RHS: HighBits)) {
834	BinaryOperator *LShr = BinaryOperator::CreateLShr(V1: I->getOperand(i: `0`),
835	V2: I->getOperand(i: `1`));
836	LShr->setIsExact(cast<BinaryOperator>(Val: I)->isExact());
837	LShr->takeName(V: I);
838	return InsertNewInstWith(New: LShr, Old: I->getIterator());
839	} else if (Known.One [BitWidth-ShiftAmt-`1`]) { // New bits are known one.
840	Known.One \|= HighBits;
841	// SignBits may be out-of-sync with Known.countMinSignBits(). Mask out
842	// high bits of Known.Zero to avoid conflicts.
843	Known.Zero &= ~HighBits;
844	}
845	} else {
846	computeKnownBits(V: I, Known, Depth, CxtI);
847	}
848	break;
849	}
850	case Instruction::UDiv: {
851	// UDiv doesn't demand low bits that are zero in the divisor.
852	const APInt *SA;
853	if (match(V: I->getOperand(i: `1`), P: m_APInt(Res&: SA))) {
854	// TODO: Take the demanded mask of the result into account.
855	unsigned RHSTrailingZeros = SA->countr_zero();
856	APInt DemandedMaskIn =
857	APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth - RHSTrailingZeros);
858	if (SimplifyDemandedBits(I, OpNo: `0`, DemandedMask: DemandedMaskIn, Known&: LHSKnown, Depth: Depth + `1`)) {
859	// We can't guarantee that "exact" is still true after changing the
860	// the dividend.
861	I->dropPoisonGeneratingFlags();
862	return I;
863	}
864
865	Known = KnownBits::udiv(LHS: LHSKnown, RHS: KnownBits::makeConstant(C: *SA),
866	Exact: cast<BinaryOperator>(Val: I)->isExact());
867	} else {
868	computeKnownBits(V: I, Known, Depth, CxtI);
869	}
870	break;
871	}
872	case Instruction::SRem: {
873	const APInt *Rem;
874	if (match(V: I->getOperand(i: `1`), P: m_APInt(Res&: Rem))) {
875	// X % -1 demands all the bits because we don't want to introduce
876	// INT_MIN % -1 (== undef) by accident.
877	if (Rem->isAllOnes())
878	break;
879	APInt RA = Rem->abs();
880	if (RA.isPowerOf2()) {
881	if (DemandedMask.ult(RHS: RA)) // srem won't affect demanded bits
882	return I->getOperand(i: `0`);
883
884	APInt LowBits = RA - `1`;
885	APInt Mask2 = LowBits \| APInt::getSignMask(BitWidth);
886	if (SimplifyDemandedBits(I, OpNo: `0`, DemandedMask: Mask2, Known&: LHSKnown, Depth: Depth + `1`))
887	return I;
888
889	// The low bits of LHS are unchanged by the srem.
890	Known.Zero = LHSKnown.Zero & LowBits;
891	Known.One = LHSKnown.One & LowBits;
892
893	// If LHS is non-negative or has all low bits zero, then the upper bits
894	// are all zero.
895	if (LHSKnown.isNonNegative() \|\| LowBits.isSubsetOf(RHS: LHSKnown.Zero))
896	Known.Zero \|= ~LowBits;
897
898	// If LHS is negative and not all low bits are zero, then the upper bits
899	// are all one.
900	if (LHSKnown.isNegative() && LowBits.intersects(RHS: LHSKnown.One))
901	Known.One \|= ~LowBits;
902
903	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
904	break;
905	}
906	}
907
908	computeKnownBits(V: I, Known, Depth, CxtI);
909	break;
910	}
911	case Instruction::URem: {
912	APInt AllOnes = APInt::getAllOnes(numBits: BitWidth);
913	if (SimplifyDemandedBits(I, OpNo: `0`, DemandedMask: AllOnes, Known&: LHSKnown, Depth: Depth + `1`) \|\|
914	SimplifyDemandedBits(I, OpNo: `1`, DemandedMask: AllOnes, Known&: RHSKnown, Depth: Depth + `1`))
915	return I;
916
917	Known = KnownBits::urem(LHS: LHSKnown, RHS: RHSKnown);
918	break;
919	}
920	case Instruction::Call: {
921	bool KnownBitsComputed = false;
922	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
923	switch (II->getIntrinsicID()) {
924	case Intrinsic::abs: {
925	if (DemandedMask == `1`)
926	return II->getArgOperand(i: `0`);
927	break;
928	}
929	case Intrinsic::ctpop: {
930	// Checking if the number of clear bits is odd (parity)? If the type has
931	// an even number of bits, that's the same as checking if the number of
932	// set bits is odd, so we can eliminate the 'not' op.
933	Value *X;
934	if (DemandedMask == `1` && VTy->getScalarSizeInBits() % `2` == `0` &&
935	match(V: II->getArgOperand(i: `0`), P: m_Not(V: m_Value(V&: X)))) {
936	Function *Ctpop = Intrinsic::getDeclaration(
937	M: II->getModule(), Intrinsic::id: ctpop, Tys: VTy);
938	return InsertNewInstWith(New: CallInst::Create(Func: Ctpop, Args: {X}), Old: I->getIterator());
939	}
940	break;
941	}
942	case Intrinsic::bswap: {
943	// If the only bits demanded come from one byte of the bswap result,
944	// just shift the input byte into position to eliminate the bswap.
945	unsigned NLZ = DemandedMask.countl_zero();
946	unsigned NTZ = DemandedMask.countr_zero();
947
948	// Round NTZ down to the next byte. If we have 11 trailing zeros, then
949	// we need all the bits down to bit 8. Likewise, round NLZ. If we
950	// have 14 leading zeros, round to 8.
951	NLZ = alignDown(Value: NLZ, Align: `8`);
952	NTZ = alignDown(Value: NTZ, Align: `8`);
953	// If we need exactly one byte, we can do this transformation.
954	if (BitWidth - NLZ - NTZ == `8`) {
955	// Replace this with either a left or right shift to get the byte into
956	// the right place.
957	Instruction *NewVal;
958	if (NLZ > NTZ)
959	NewVal = BinaryOperator::CreateLShr(
960	V1: II->getArgOperand(i: `0`), V2: ConstantInt::get(Ty: VTy, V: NLZ - NTZ));
961	else
962	NewVal = BinaryOperator::CreateShl(
963	V1: II->getArgOperand(i: `0`), V2: ConstantInt::get(Ty: VTy, V: NTZ - NLZ));
964	NewVal->takeName(V: I);
965	return InsertNewInstWith(New: NewVal, Old: I->getIterator());
966	}
967	break;
968	}
969	case Intrinsic::ptrmask: {
970	unsigned MaskWidth = I->getOperand(i: `1`)->getType()->getScalarSizeInBits();
971	RHSKnown = KnownBits (MaskWidth);
972	// If either the LHS or the RHS are Zero, the result is zero.
973	if (SimplifyDemandedBits(I, OpNo: `0`, DemandedMask, Known&: LHSKnown, Depth: Depth + `1`) \|\|
974	SimplifyDemandedBits(
975	I, OpNo: `1`, DemandedMask: (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(width: MaskWidth),
976	Known&: RHSKnown, Depth: Depth + `1`))
977	return I;
978
979	// TODO: Should be 1-extend
980	RHSKnown = RHSKnown.anyextOrTrunc(BitWidth);
981	assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
982	assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
983
984	Known = LHSKnown & RHSKnown;
985	KnownBitsComputed = true;
986
987	// If the client is only demanding bits we know to be zero, return
988	// `llvm.ptrmask(p, 0)`. We can't return `null` here due to pointer
989	// provenance, but making the mask zero will be easily optimizable in
990	// the backend.
991	if (DemandedMask.isSubsetOf(RHS: Known.Zero) &&
992	!match(V: I->getOperand(i: `1`), P: m_Zero()))
993	return replaceOperand(
994	I&: *I, OpNum: `1`, V: Constant::getNullValue(Ty: I->getOperand(i: `1`)->getType()));
995
996	// Mask in demanded space does nothing.
997	// NOTE: We may have attributes associated with the return value of the
998	// llvm.ptrmask intrinsic that will be lost when we just return the
999	// operand. We should try to preserve them.
1000	if (DemandedMask.isSubsetOf(RHS: RHSKnown.One \| LHSKnown.Zero))
1001	return I->getOperand(i: `0`);
1002
1003	// If the RHS is a constant, see if we can simplify it.
1004	if (ShrinkDemandedConstant(
1005	I, OpNo: `1`, Demanded: (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(width: MaskWidth)))
1006	return I;
1007
1008	// Combine:
1009	// (ptrmask (getelementptr i8, ptr p, imm i), imm mask)
1010	// -> (ptrmask (getelementptr i8, ptr p, imm (i & mask)), imm mask)
1011	// where only the low bits known to be zero in the pointer are changed
1012	Value *InnerPtr;
1013	uint64_t GEPIndex;
1014	uint64_t PtrMaskImmediate;
1015	if (match(I, m_Intrinsic<Intrinsic::ptrmask>(
1016	m_PtrAdd(m_Value(InnerPtr), m_ConstantInt(GEPIndex)),
1017	m_ConstantInt(PtrMaskImmediate)))) {
1018
1019	LHSKnown = computeKnownBits(V: InnerPtr, Depth: Depth + `1`, CxtI: I);
1020	if (!LHSKnown.isZero()) {
1021	const unsigned trailingZeros = LHSKnown.countMinTrailingZeros();
1022	uint64_t PointerAlignBits = (uint64_t(`1`) << trailingZeros) - `1`;
1023
1024	uint64_t HighBitsGEPIndex = GEPIndex & ~PointerAlignBits;
1025	uint64_t MaskedLowBitsGEPIndex =
1026	GEPIndex & PointerAlignBits & PtrMaskImmediate;
1027
1028	uint64_t MaskedGEPIndex = HighBitsGEPIndex \| MaskedLowBitsGEPIndex;
1029
1030	if (MaskedGEPIndex != GEPIndex) {
1031	auto *GEP = cast<GetElementPtrInst>(Val: II->getArgOperand(i: `0`));
1032	Builder.SetInsertPoint(I);
1033	Type *GEPIndexType =
1034	DL.getIndexType(PtrTy: GEP->getPointerOperand()->getType());
1035	Value *MaskedGEP = Builder.CreateGEP(
1036	Ty: GEP->getSourceElementType(), Ptr: InnerPtr,
1037	IdxList: ConstantInt::get(Ty: GEPIndexType, V: MaskedGEPIndex),
1038	Name: GEP->getName(), IsInBounds: GEP->isInBounds());
1039
1040	replaceOperand(I&: *I, OpNum: `0`, V: MaskedGEP);
1041	return I;
1042	}
1043	}
1044	}
1045
1046	break;
1047	}
1048
1049	case Intrinsic::fshr:
1050	case Intrinsic::fshl: {
1051	const APInt *SA;
1052	if (!match(V: I->getOperand(i: `2`), P: m_APInt(Res&: SA)))
1053	break;
1054
1055	// Normalize to funnel shift left. APInt shifts of BitWidth are well-
1056	// defined, so no need to special-case zero shifts here.
1057	uint64_t ShiftAmt = SA->urem(RHS: BitWidth);
1058	if (II->getIntrinsicID() == Intrinsic::fshr)
1059	ShiftAmt = BitWidth - ShiftAmt;
1060
1061	APInt DemandedMaskLHS(DemandedMask.lshr(shiftAmt: ShiftAmt));
1062	APInt DemandedMaskRHS(DemandedMask.shl(shiftAmt: BitWidth - ShiftAmt));
1063	if (I->getOperand(i: `0`) != I->getOperand(i: `1`)) {
1064	if (SimplifyDemandedBits(I, OpNo: `0`, DemandedMask: DemandedMaskLHS, Known&: LHSKnown,
1065	Depth: Depth + `1`) \|\|
1066	SimplifyDemandedBits(I, OpNo: `1`, DemandedMask: DemandedMaskRHS, Known&: RHSKnown, Depth: Depth + `1`))
1067	return I;
1068	} else { // fshl is a rotate
1069	// Avoid converting rotate into funnel shift.
1070	// Only simplify if one operand is constant.
1071	LHSKnown = computeKnownBits(V: I->getOperand(i: `0`), Depth: Depth + `1`, CxtI: I);
1072	if (DemandedMaskLHS.isSubsetOf(RHS: LHSKnown.Zero \| LHSKnown.One) &&
1073	!match(V: I->getOperand(i: `0`), P: m_SpecificInt(V: LHSKnown.One))) {
1074	replaceOperand(I&: *I, OpNum: `0`, V: Constant::getIntegerValue(Ty: VTy, V: LHSKnown.One));
1075	return I;
1076	}
1077
1078	RHSKnown = computeKnownBits(V: I->getOperand(i: `1`), Depth: Depth + `1`, CxtI: I);
1079	if (DemandedMaskRHS.isSubsetOf(RHS: RHSKnown.Zero \| RHSKnown.One) &&
1080	!match(V: I->getOperand(i: `1`), P: m_SpecificInt(V: RHSKnown.One))) {
1081	replaceOperand(I&: *I, OpNum: `1`, V: Constant::getIntegerValue(Ty: VTy, V: RHSKnown.One));
1082	return I;
1083	}
1084	}
1085
1086	Known.Zero = LHSKnown.Zero.shl(shiftAmt: ShiftAmt) \|
1087	RHSKnown.Zero.lshr(shiftAmt: BitWidth - ShiftAmt);
1088	Known.One = LHSKnown.One.shl(shiftAmt: ShiftAmt) \|
1089	RHSKnown.One.lshr(shiftAmt: BitWidth - ShiftAmt);
1090	KnownBitsComputed = true;
1091	break;
1092	}
1093	case Intrinsic::umax: {
1094	// UMax(A, C) == A if ...
1095	// The lowest non-zero bit of DemandMask is higher than the highest
1096	// non-zero bit of C.
1097	const APInt *C;
1098	unsigned CTZ = DemandedMask.countr_zero();
1099	if (match(V: II->getArgOperand(i: `1`), P: m_APInt(Res&: C)) &&
1100	CTZ >= C->getActiveBits())
1101	return II->getArgOperand(i: `0`);
1102	break;
1103	}
1104	case Intrinsic::umin: {
1105	// UMin(A, C) == A if ...
1106	// The lowest non-zero bit of DemandMask is higher than the highest
1107	// non-one bit of C.
1108	// This comes from using DeMorgans on the above umax example.
1109	const APInt *C;
1110	unsigned CTZ = DemandedMask.countr_zero();
1111	if (match(V: II->getArgOperand(i: `1`), P: m_APInt(Res&: C)) &&
1112	CTZ >= C->getBitWidth() - C->countl_one())
1113	return II->getArgOperand(i: `0`);
1114	break;
1115	}
1116	default: {
1117	// Handle target specific intrinsics
1118	std::optional<Value *> V = targetSimplifyDemandedUseBitsIntrinsic(
1119	II&: *II, DemandedMask, Known, KnownBitsComputed);
1120	if (V)
1121	return *V;
1122	break;
1123	}
1124	}
1125	}
1126
1127	if (!KnownBitsComputed)
1128	computeKnownBits(V, Known, Depth, CxtI);
1129	break;
1130	}
1131	}
1132
1133	if (V->getType()->isPointerTy()) {
1134	Align Alignment = V->getPointerAlignment(DL);
1135	Known.Zero.setLowBits(Log2(A: Alignment));
1136	}
1137
1138	// If the client is only demanding bits that we know, return the known
1139	// constant. We can't directly simplify pointers as a constant because of
1140	// pointer provenance.
1141	// TODO: We could return `(inttoptr const)` for pointers.
1142	if (!V->getType()->isPointerTy() && DemandedMask.isSubsetOf(RHS: Known.Zero \| Known.One))
1143	return Constant::getIntegerValue(Ty: VTy, V: Known.One);
1144
1145	if (VerifyKnownBits) {
1146	KnownBits ReferenceKnown = computeKnownBits(V, Depth, CxtI);
1147	if (Known != ReferenceKnown) {
1148	errs() << "Mismatched known bits for " << *V << " in "
1149	<< I->getFunction()->getName() << "\n";
1150	errs() << "computeKnownBits(): " << ReferenceKnown << "\n";
1151	errs() << "SimplifyDemandedBits(): " << Known << "\n";
1152	std::abort();
1153	}
1154	}
1155
1156	return nullptr;
1157	}
1158
1159	/// Helper routine of SimplifyDemandedUseBits. It computes Known
1160	/// bits. It also tries to handle simplifications that can be done based on
1161	/// DemandedMask, but without modifying the Instruction.
1162	Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits(
1163	Instruction I, const* APInt &DemandedMask, KnownBits &Known, unsigned Depth,
1164	Instruction *CxtI) {
1165	unsigned BitWidth = DemandedMask.getBitWidth();
1166	Type *ITy = I->getType();
1167
1168	KnownBits LHSKnown(BitWidth);
1169	KnownBits RHSKnown(BitWidth);
1170
1171	// Despite the fact that we can't simplify this instruction in all User's
1172	// context, we can at least compute the known bits, and we can
1173	// do simplifications that apply to just* the one user if we know that*
1174	// this instruction has a simpler value in that context.
1175	switch (I->getOpcode()) {
1176	case Instruction::And: {
1177	computeKnownBits(V: I->getOperand(i: `1`), Known&: RHSKnown, Depth: Depth + `1`, CxtI);
1178	computeKnownBits(V: I->getOperand(i: `0`), Known&: LHSKnown, Depth: Depth + `1`, CxtI);
1179	Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown,
1180	Depth, SQ: SQ.getWithInstruction(I: CxtI));
1181	computeKnownBitsFromContext(V: I, Known, Depth, Q: SQ.getWithInstruction(I: CxtI));
1182
1183	// If the client is only demanding bits that we know, return the known
1184	// constant.
1185	if (DemandedMask.isSubsetOf(RHS: Known.Zero \| Known.One))
1186	return Constant::getIntegerValue(Ty: ITy, V: Known.One);
1187
1188	// If all of the demanded bits are known 1 on one side, return the other.
1189	// These bits cannot contribute to the result of the 'and' in this context.
1190	if (DemandedMask.isSubsetOf(RHS: LHSKnown.Zero \| RHSKnown.One))
1191	return I->getOperand(i: `0`);
1192	if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero \| LHSKnown.One))
1193	return I->getOperand(i: `1`);
1194
1195	break;
1196	}
1197	case Instruction::Or: {
1198	computeKnownBits(V: I->getOperand(i: `1`), Known&: RHSKnown, Depth: Depth + `1`, CxtI);
1199	computeKnownBits(V: I->getOperand(i: `0`), Known&: LHSKnown, Depth: Depth + `1`, CxtI);
1200	Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown,
1201	Depth, SQ: SQ.getWithInstruction(I: CxtI));
1202	computeKnownBitsFromContext(V: I, Known, Depth, Q: SQ.getWithInstruction(I: CxtI));
1203
1204	// If the client is only demanding bits that we know, return the known
1205	// constant.
1206	if (DemandedMask.isSubsetOf(RHS: Known.Zero \| Known.One))
1207	return Constant::getIntegerValue(Ty: ITy, V: Known.One);
1208
1209	// We can simplify (X\|Y) -> X or Y in the user's context if we know that
1210	// only bits from X or Y are demanded.
1211	// If all of the demanded bits are known zero on one side, return the other.
1212	// These bits cannot contribute to the result of the 'or' in this context.
1213	if (DemandedMask.isSubsetOf(RHS: LHSKnown.One \| RHSKnown.Zero))
1214	return I->getOperand(i: `0`);
1215	if (DemandedMask.isSubsetOf(RHS: RHSKnown.One \| LHSKnown.Zero))
1216	return I->getOperand(i: `1`);
1217
1218	break;
1219	}
1220	case Instruction::Xor: {
1221	computeKnownBits(V: I->getOperand(i: `1`), Known&: RHSKnown, Depth: Depth + `1`, CxtI);
1222	computeKnownBits(V: I->getOperand(i: `0`), Known&: LHSKnown, Depth: Depth + `1`, CxtI);
1223	Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown,
1224	Depth, SQ: SQ.getWithInstruction(I: CxtI));
1225	computeKnownBitsFromContext(V: I, Known, Depth, Q: SQ.getWithInstruction(I: CxtI));
1226
1227	// If the client is only demanding bits that we know, return the known
1228	// constant.
1229	if (DemandedMask.isSubsetOf(RHS: Known.Zero \| Known.One))
1230	return Constant::getIntegerValue(Ty: ITy, V: Known.One);
1231
1232	// We can simplify (X^Y) -> X or Y in the user's context if we know that
1233	// only bits from X or Y are demanded.
1234	// If all of the demanded bits are known zero on one side, return the other.
1235	if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero))
1236	return I->getOperand(i: `0`);
1237	if (DemandedMask.isSubsetOf(RHS: LHSKnown.Zero))
1238	return I->getOperand(i: `1`);
1239
1240	break;
1241	}
1242	case Instruction::Add: {
1243	unsigned NLZ = DemandedMask.countl_zero();
1244	APInt DemandedFromOps = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - NLZ);
1245
1246	// If an operand adds zeros to every bit below the highest demanded bit,
1247	// that operand doesn't change the result. Return the other side.
1248	computeKnownBits(V: I->getOperand(i: `1`), Known&: RHSKnown, Depth: Depth + `1`, CxtI);
1249	if (DemandedFromOps.isSubsetOf(RHS: RHSKnown.Zero))
1250	return I->getOperand(i: `0`);
1251
1252	computeKnownBits(V: I->getOperand(i: `0`), Known&: LHSKnown, Depth: Depth + `1`, CxtI);
1253	if (DemandedFromOps.isSubsetOf(RHS: LHSKnown.Zero))
1254	return I->getOperand(i: `1`);
1255
1256	bool NSW = cast<OverflowingBinaryOperator>(Val: I)->hasNoSignedWrap();
1257	bool NUW = cast<OverflowingBinaryOperator>(Val: I)->hasNoUnsignedWrap();
1258	Known =
1259	KnownBits::computeForAddSub(/Add=/true, NSW, NUW, LHS: LHSKnown, RHS: RHSKnown);
1260	computeKnownBitsFromContext(V: I, Known, Depth, Q: SQ.getWithInstruction(I: CxtI));
1261	break;
1262	}
1263	case Instruction::Sub: {
1264	unsigned NLZ = DemandedMask.countl_zero();
1265	APInt DemandedFromOps = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - NLZ);
1266
1267	// If an operand subtracts zeros from every bit below the highest demanded
1268	// bit, that operand doesn't change the result. Return the other side.
1269	computeKnownBits(V: I->getOperand(i: `1`), Known&: RHSKnown, Depth: Depth + `1`, CxtI);
1270	if (DemandedFromOps.isSubsetOf(RHS: RHSKnown.Zero))
1271	return I->getOperand(i: `0`);
1272
1273	bool NSW = cast<OverflowingBinaryOperator>(Val: I)->hasNoSignedWrap();
1274	bool NUW = cast<OverflowingBinaryOperator>(Val: I)->hasNoUnsignedWrap();
1275	computeKnownBits(V: I->getOperand(i: `0`), Known&: LHSKnown, Depth: Depth + `1`, CxtI);
1276	Known = KnownBits::computeForAddSub(/Add=/false, NSW, NUW, LHS: LHSKnown,
1277	RHS: RHSKnown);
1278	computeKnownBitsFromContext(V: I, Known, Depth, Q: SQ.getWithInstruction(I: CxtI));
1279	break;
1280	}
1281	case Instruction::AShr: {
1282	// Compute the Known bits to simplify things downstream.
1283	computeKnownBits(V: I, Known, Depth, CxtI);
1284
1285	// If this user is only demanding bits that we know, return the known
1286	// constant.
1287	if (DemandedMask.isSubsetOf(RHS: Known.Zero \| Known.One))
1288	return Constant::getIntegerValue(Ty: ITy, V: Known.One);
1289
1290	// If the right shift operand 0 is a result of a left shift by the same
1291	// amount, this is probably a zero/sign extension, which may be unnecessary,
1292	// if we do not demand any of the new sign bits. So, return the original
1293	// operand instead.
1294	const APInt *ShiftRC;
1295	const APInt *ShiftLC;
1296	Value *X;
1297	unsigned BitWidth = DemandedMask.getBitWidth();
1298	if (match(V: I,
1299	P: m_AShr(L: m_Shl(L: m_Value(V&: X), R: m_APInt(Res&: ShiftLC)), R: m_APInt(Res&: ShiftRC))) &&
1300	ShiftLC == ShiftRC && ShiftLC->ult(RHS: BitWidth) &&
1301	DemandedMask.isSubsetOf(RHS: APInt::getLowBitsSet(
1302	numBits: BitWidth, loBitsSet: BitWidth - ShiftRC->getZExtValue()))) {
1303	return X;
1304	}
1305
1306	break;
1307	}
1308	default:
1309	// Compute the Known bits to simplify things downstream.
1310	computeKnownBits(V: I, Known, Depth, CxtI);
1311
1312	// If this user is only demanding bits that we know, return the known
1313	// constant.
1314	if (DemandedMask.isSubsetOf(RHS: Known.Zero \|Known.One))
1315	return Constant::getIntegerValue(Ty: ITy, V: Known.One);
1316
1317	break;
1318	}
1319
1320	return nullptr;
1321	}
1322
1323	/// Helper routine of SimplifyDemandedUseBits. It tries to simplify
1324	/// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
1325	/// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
1326	/// of "C2-C1".
1327	///
1328	/// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
1329	/// ..., bn}, without considering the specific value X is holding.
1330	/// This transformation is legal iff one of following conditions is hold:
1331	/// 1) All the bit in S are 0, in this case E1 == E2.
1332	/// 2) We don't care those bits in S, per the input DemandedMask.
1333	/// 3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
1334	/// rest bits.
1335	///
1336	/// Currently we only test condition 2).
1337	///
1338	/// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
1339	/// not successful.
1340	Value *InstCombinerImpl::simplifyShrShlDemandedBits(
1341	Instruction Shr, const* APInt &ShrOp1, Instruction *Shl,
1342	const APInt &ShlOp1, const APInt &DemandedMask, KnownBits &Known) {
1343	if (!ShlOp1 \|\| !ShrOp1)
1344	return nullptr; // No-op.
1345
1346	Value *VarX = Shr->getOperand(i: `0`);
1347	Type *Ty = VarX->getType();
1348	unsigned BitWidth = Ty->getScalarSizeInBits();
1349	if (ShlOp1.uge(RHS: BitWidth) \|\| ShrOp1.uge(RHS: BitWidth))
1350	return nullptr; // Undef.
1351
1352	unsigned ShlAmt = ShlOp1.getZExtValue();
1353	unsigned ShrAmt = ShrOp1.getZExtValue();
1354
1355	Known.One.clearAllBits();
1356	Known.Zero.setLowBits(ShlAmt - `1`);
1357	Known.Zero &= DemandedMask;
1358
1359	APInt BitMask1(APInt::getAllOnes(numBits: BitWidth));
1360	APInt BitMask2(APInt::getAllOnes(numBits: BitWidth));
1361
1362	bool isLshr = (Shr->getOpcode() == Instruction::LShr);
1363	BitMask1 = isLshr ? (BitMask1.lshr(shiftAmt: ShrAmt) << ShlAmt) :
1364	(BitMask1.ashr(ShiftAmt: ShrAmt) << ShlAmt);
1365
1366	if (ShrAmt <= ShlAmt) {
1367	BitMask2 <<= (ShlAmt - ShrAmt);
1368	} else {
1369	BitMask2 = isLshr ? BitMask2.lshr(shiftAmt: ShrAmt - ShlAmt):
1370	BitMask2.ashr(ShiftAmt: ShrAmt - ShlAmt);
1371	}
1372
1373	// Check if condition-2 (see the comment to this function) is satified.
1374	if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {
1375	if (ShrAmt == ShlAmt)
1376	return VarX;
1377
1378	if (!Shr->hasOneUse())
1379	return nullptr;
1380
1381	BinaryOperator *New;
1382	if (ShrAmt < ShlAmt) {
1383	Constant *Amt = ConstantInt::get(Ty: VarX->getType(), V: ShlAmt - ShrAmt);
1384	New = BinaryOperator::CreateShl(V1: VarX, V2: Amt);
1385	BinaryOperator *Orig = cast<BinaryOperator>(Val: Shl);
1386	New->setHasNoSignedWrap(Orig->hasNoSignedWrap());
1387	New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap());
1388	} else {
1389	Constant *Amt = ConstantInt::get(Ty: VarX->getType(), V: ShrAmt - ShlAmt);
1390	New = isLshr ? BinaryOperator::CreateLShr(V1: VarX, V2: Amt) :
1391	BinaryOperator::CreateAShr(V1: VarX, V2: Amt);
1392	if (cast<BinaryOperator>(Val: Shr)->isExact())
1393	New->setIsExact(true);
1394	}
1395
1396	return InsertNewInstWith(New, Old: Shl->getIterator());
1397	}
1398
1399	return nullptr;
1400	}
1401
1402	/// The specified value produces a vector with any number of elements.
1403	/// This method analyzes which elements of the operand are poison and
1404	/// returns that information in PoisonElts.
1405	///
1406	/// DemandedElts contains the set of elements that are actually used by the
1407	/// caller, and by default (AllowMultipleUsers equals false) the value is
1408	/// simplified only if it has a single caller. If AllowMultipleUsers is set
1409	/// to true, DemandedElts refers to the union of sets of elements that are
1410	/// used by all callers.
1411	///
1412	/// If the information about demanded elements can be used to simplify the
1413	/// operation, the operation is simplified, then the resultant value is
1414	/// returned. This returns null if no change was made.
1415	Value InstCombinerImpl::SimplifyDemandedVectorElts(Value V,
1416	APInt DemandedElts,
1417	APInt &PoisonElts,
1418	unsigned Depth,
1419	bool AllowMultipleUsers) {
1420	// Cannot analyze scalable type. The number of vector elements is not a
1421	// compile-time constant.
1422	if (isa<ScalableVectorType>(Val: V->getType()))
1423	return nullptr;
1424
1425	unsigned VWidth = cast<FixedVectorType>(Val: V->getType())->getNumElements();
1426	APInt EltMask(APInt::getAllOnes(numBits: VWidth));
1427	assert((DemandedElts & ~EltMask) == `0` && "Invalid DemandedElts!");
1428
1429	if (match(V, P: m_Poison())) {
1430	// If the entire vector is poison, just return this info.
1431	PoisonElts = EltMask;
1432	return nullptr;
1433	}
1434
1435	if (DemandedElts.isZero()) { // If nothing is demanded, provide poison.
1436	PoisonElts = EltMask;
1437	return PoisonValue::get(T: V->getType());
1438	}
1439
1440	PoisonElts = `0`;
1441
1442	if (auto *C = dyn_cast<Constant>(Val: V)) {
1443	// Check if this is identity. If so, return 0 since we are not simplifying
1444	// anything.
1445	if (DemandedElts.isAllOnes())
1446	return nullptr;
1447
1448	Type *EltTy = cast<VectorType>(Val: V->getType())->getElementType();
1449	Constant *Poison = PoisonValue::get(T: EltTy);
1450	SmallVector<Constant*, `16`> Elts;
1451	for (unsigned i = `0`; i != VWidth; ++i) {
1452	if (!DemandedElts [i]) { // If not demanded, set to poison.
1453	Elts.push_back(Elt: Poison);
1454	PoisonElts.setBit(i);
1455	continue;
1456	}
1457
1458	Constant *Elt = C->getAggregateElement(Elt: i);
1459	if (!Elt) return nullptr;
1460
1461	Elts.push_back(Elt);
1462	if (isa<PoisonValue>(Val: Elt)) // Already poison.
1463	PoisonElts.setBit(i);
1464	}
1465
1466	// If we changed the constant, return it.
1467	Constant *NewCV = ConstantVector::get(V: Elts);
1468	return NewCV != C ? NewCV : nullptr;
1469	}
1470
1471	// Limit search depth.
1472	if (Depth == `10`)
1473	return nullptr;
1474
1475	if (!AllowMultipleUsers) {
1476	// If multiple users are using the root value, proceed with
1477	// simplification conservatively assuming that all elements
1478	// are needed.
1479	if (!V->hasOneUse()) {
1480	// Quit if we find multiple users of a non-root value though.
1481	// They'll be handled when it's their turn to be visited by
1482	// the main instcombine process.
1483	if (Depth != `0`)
1484	// TODO: Just compute the PoisonElts information recursively.
1485	return nullptr;
1486
1487	// Conservatively assume that all elements are needed.
1488	DemandedElts = EltMask;
1489	}
1490	}
1491
1492	Instruction *I = dyn_cast<Instruction>(Val: V);
1493	if (!I) return nullptr; // Only analyze instructions.
1494
1495	bool MadeChange = false;
1496	auto simplifyAndSetOp = [&](Instruction Inst, unsigned* OpNum,
1497	APInt Demanded, APInt &Undef) {
1498	auto *II = dyn_cast<IntrinsicInst>(Val: Inst);
1499	Value *Op = II ? II->getArgOperand(i: OpNum) : Inst->getOperand(i: OpNum);
1500	if (Value *V = SimplifyDemandedVectorElts(V: Op, DemandedElts: Demanded, PoisonElts&: Undef, Depth: Depth + `1`)) {
1501	replaceOperand(I&: *Inst, OpNum, V);
1502	MadeChange = true;
1503	}
1504	};
1505
1506	APInt PoisonElts2(VWidth, `0`);
1507	APInt PoisonElts3(VWidth, `0`);
1508	switch (I->getOpcode()) {
1509	default: break;
1510
1511	case Instruction::GetElementPtr: {
1512	// The LangRef requires that struct geps have all constant indices. As
1513	// such, we can't convert any operand to partial undef.
1514	auto mayIndexStructType = [](GetElementPtrInst &GEP) {
1515	for (auto I = gep_type_begin(GEP), E = gep_type_end(GEP);
1516	I != E; I ++)
1517	if (I.isStruct())
1518	return true;
1519	return false;
1520	};
1521	if (mayIndexStructType (cast<GetElementPtrInst>(Val&: *I)))
1522	break;
1523
1524	// Conservatively track the demanded elements back through any vector
1525	// operands we may have. We know there must be at least one, or we
1526	// wouldn't have a vector result to get here. Note that we intentionally
1527	// merge the undef bits here since gepping with either an poison base or
1528	// index results in poison.
1529	for (unsigned i = `0`; i < I->getNumOperands(); i++) {
1530	if (i == `0` ? match(V: I->getOperand(i), P: m_Undef())
1531	: match(V: I->getOperand(i), P: m_Poison())) {
1532	// If the entire vector is undefined, just return this info.
1533	PoisonElts = EltMask;
1534	return nullptr;
1535	}
1536	if (I->getOperand(i)->getType()->isVectorTy()) {
1537	APInt PoisonEltsOp(VWidth, `0`);
1538	simplifyAndSetOp (I, i, DemandedElts, PoisonEltsOp);
1539	// gep(x, undef) is not undef, so skip considering idx ops here
1540	// Note that we could propagate poison, but we can't distinguish between
1541	// undef & poison bits ATM
1542	if (i == `0`)
1543	PoisonElts \|= PoisonEltsOp;
1544	}
1545	}
1546
1547	break;
1548	}
1549	case Instruction::InsertElement: {
1550	// If this is a variable index, we don't know which element it overwrites.
1551	// demand exactly the same input as we produce.
1552	ConstantInt *Idx = dyn_cast<ConstantInt>(Val: I->getOperand(i: `2`));
1553	if (!Idx) {
1554	// Note that we can't propagate undef elt info, because we don't know
1555	// which elt is getting updated.
1556	simplifyAndSetOp (I, `0`, DemandedElts, PoisonElts2);
1557	break;
1558	}
1559
1560	// The element inserted overwrites whatever was there, so the input demanded
1561	// set is simpler than the output set.
1562	unsigned IdxNo = Idx->getZExtValue();
1563	APInt PreInsertDemandedElts = DemandedElts;
1564	if (IdxNo < VWidth)
1565	PreInsertDemandedElts.clearBit(BitPosition: IdxNo);
1566
1567	// If we only demand the element that is being inserted and that element
1568	// was extracted from the same index in another vector with the same type,
1569	// replace this insert with that other vector.
1570	// Note: This is attempted before the call to simplifyAndSetOp because that
1571	// may change PoisonElts to a value that does not match with Vec.
1572	Value *Vec;
1573	if (PreInsertDemandedElts == `0` &&
1574	match(V: I->getOperand(i: `1`),
1575	P: m_ExtractElt(Val: m_Value(V&: Vec), Idx: m_SpecificInt(V: IdxNo))) &&
1576	Vec->getType() == I->getType()) {
1577	return Vec;
1578	}
1579
1580	simplifyAndSetOp (I, `0`, PreInsertDemandedElts, PoisonElts);
1581
1582	// If this is inserting an element that isn't demanded, remove this
1583	// insertelement.
1584	if (IdxNo >= VWidth \|\| !DemandedElts [IdxNo]) {
1585	Worklist.push(I);
1586	return I->getOperand(i: `0`);
1587	}
1588
1589	// The inserted element is defined.
1590	PoisonElts.clearBit(BitPosition: IdxNo);
1591	break;
1592	}
1593	case Instruction::ShuffleVector: {
1594	auto *Shuffle = cast<ShuffleVectorInst>(Val: I);
1595	assert(Shuffle->getOperand(`0`)->getType() ==
1596	Shuffle->getOperand(`1`)->getType() &&
1597	"Expected shuffle operands to have same type");
1598	unsigned OpWidth = cast<FixedVectorType>(Val: Shuffle->getOperand(i_nocapture: `0`)->getType())
1599	->getNumElements();
1600	// Handle trivial case of a splat. Only check the first element of LHS
1601	// operand.
1602	if (all_of(Range: Shuffle->getShuffleMask(), P: [](int Elt) { return Elt == `0`; }) &&
1603	DemandedElts.isAllOnes()) {
1604	if (!isa<PoisonValue>(Val: I->getOperand(i: `1`))) {
1605	I->setOperand(i: `1`, Val: PoisonValue::get(T: I->getOperand(i: `1`)->getType()));
1606	MadeChange = true;
1607	}
1608	APInt LeftDemanded(OpWidth, `1`);
1609	APInt LHSPoisonElts(OpWidth, `0`);
1610	simplifyAndSetOp (I, `0`, LeftDemanded, LHSPoisonElts);
1611	if (LHSPoisonElts [`0`])
1612	PoisonElts = EltMask;
1613	else
1614	PoisonElts.clearAllBits();
1615	break;
1616	}
1617
1618	APInt LeftDemanded(OpWidth, `0`), RightDemanded(OpWidth, `0`);
1619	for (unsigned i = `0`; i < VWidth; i++) {
1620	if (DemandedElts [i]) {
1621	unsigned MaskVal = Shuffle->getMaskValue(Elt: i);
1622	if (MaskVal != -`1u`) {
1623	assert(MaskVal < OpWidth * `2` &&
1624	"shufflevector mask index out of range!");
1625	if (MaskVal < OpWidth)
1626	LeftDemanded.setBit(MaskVal);
1627	else
1628	RightDemanded.setBit(MaskVal - OpWidth);
1629	}
1630	}
1631	}
1632
1633	APInt LHSPoisonElts(OpWidth, `0`);
1634	simplifyAndSetOp (I, `0`, LeftDemanded, LHSPoisonElts);
1635
1636	APInt RHSPoisonElts(OpWidth, `0`);
1637	simplifyAndSetOp (I, `1`, RightDemanded, RHSPoisonElts);
1638
1639	// If this shuffle does not change the vector length and the elements
1640	// demanded by this shuffle are an identity mask, then this shuffle is
1641	// unnecessary.
1642	//
1643	// We are assuming canonical form for the mask, so the source vector is
1644	// operand 0 and operand 1 is not used.
1645	//
1646	// Note that if an element is demanded and this shuffle mask is undefined
1647	// for that element, then the shuffle is not considered an identity
1648	// operation. The shuffle prevents poison from the operand vector from
1649	// leaking to the result by replacing poison with an undefined value.
1650	if (VWidth == OpWidth) {
1651	bool IsIdentityShuffle = true;
1652	for (unsigned i = `0`; i < VWidth; i++) {
1653	unsigned MaskVal = Shuffle->getMaskValue(Elt: i);
1654	if (DemandedElts [i] && i != MaskVal) {
1655	IsIdentityShuffle = false;
1656	break;
1657	}
1658	}
1659	if (IsIdentityShuffle)
1660	return Shuffle->getOperand(i_nocapture: `0`);
1661	}
1662
1663	bool NewPoisonElts = false;
1664	unsigned LHSIdx = -`1u`, LHSValIdx = -`1u`;
1665	unsigned RHSIdx = -`1u`, RHSValIdx = -`1u`;
1666	bool LHSUniform = true;
1667	bool RHSUniform = true;
1668	for (unsigned i = `0`; i < VWidth; i++) {
1669	unsigned MaskVal = Shuffle->getMaskValue(Elt: i);
1670	if (MaskVal == -`1u`) {
1671	PoisonElts.setBit(i);
1672	} else if (!DemandedElts [i]) {
1673	NewPoisonElts = true;
1674	PoisonElts.setBit(i);
1675	} else if (MaskVal < OpWidth) {
1676	if (LHSPoisonElts [MaskVal]) {
1677	NewPoisonElts = true;
1678	PoisonElts.setBit(i);
1679	} else {
1680	LHSIdx = LHSIdx == -`1u` ? i : OpWidth;
1681	LHSValIdx = LHSValIdx == -`1u` ? MaskVal : OpWidth;
1682	LHSUniform = LHSUniform && (MaskVal == i);
1683	}
1684	} else {
1685	if (RHSPoisonElts [MaskVal - OpWidth]) {
1686	NewPoisonElts = true;
1687	PoisonElts.setBit(i);
1688	} else {
1689	RHSIdx = RHSIdx == -`1u` ? i : OpWidth;
1690	RHSValIdx = RHSValIdx == -`1u` ? MaskVal - OpWidth : OpWidth;
1691	RHSUniform = RHSUniform && (MaskVal - OpWidth == i);
1692	}
1693	}
1694	}
1695
1696	// Try to transform shuffle with constant vector and single element from
1697	// this constant vector to single insertelement instruction.
1698	// shufflevector V, C, <v1, v2, .., ci, .., vm> ->
1699	// insertelement V, C[ci], ci-n
1700	if (OpWidth ==
1701	cast<FixedVectorType>(Val: Shuffle->getType())->getNumElements()) {
1702	Value Op = nullptr*;
1703	Constant Value = nullptr*;
1704	unsigned Idx = -`1u`;
1705
1706	// Find constant vector with the single element in shuffle (LHS or RHS).
1707	if (LHSIdx < OpWidth && RHSUniform) {
1708	if (auto *CV = dyn_cast<ConstantVector>(Val: Shuffle->getOperand(i_nocapture: `0`))) {
1709	Op = Shuffle->getOperand(i_nocapture: `1`);
1710	Value = CV->getOperand(i_nocapture: LHSValIdx);
1711	Idx = LHSIdx;
1712	}
1713	}
1714	if (RHSIdx < OpWidth && LHSUniform) {
1715	if (auto *CV = dyn_cast<ConstantVector>(Val: Shuffle->getOperand(i_nocapture: `1`))) {
1716	Op = Shuffle->getOperand(i_nocapture: `0`);
1717	Value = CV->getOperand(i_nocapture: RHSValIdx);
1718	Idx = RHSIdx;
1719	}
1720	}
1721	// Found constant vector with single element - convert to insertelement.
1722	if (Op && Value) {
1723	Instruction *New = InsertElementInst::Create(
1724	Vec: Op, NewElt: Value, Idx: ConstantInt::get(Ty: Type::getInt64Ty(C&: I->getContext()), V: Idx),
1725	NameStr: Shuffle->getName());
1726	InsertNewInstWith(New, Old: Shuffle->getIterator());
1727	return New;
1728	}
1729	}
1730	if (NewPoisonElts) {
1731	// Add additional discovered undefs.
1732	SmallVector<int, `16`> Elts;
1733	for (unsigned i = `0`; i < VWidth; ++i) {
1734	if (PoisonElts [i])
1735	Elts.push_back(Elt: PoisonMaskElem);
1736	else
1737	Elts.push_back(Elt: Shuffle->getMaskValue(Elt: i));
1738	}
1739	Shuffle->setShuffleMask(Elts);
1740	MadeChange = true;
1741	}
1742	break;
1743	}
1744	case Instruction::Select: {
1745	// If this is a vector select, try to transform the select condition based
1746	// on the current demanded elements.
1747	SelectInst *Sel = cast<SelectInst>(Val: I);
1748	if (Sel->getCondition()->getType()->isVectorTy()) {
1749	// TODO: We are not doing anything with PoisonElts based on this call.
1750	// It is overwritten below based on the other select operands. If an
1751	// element of the select condition is known undef, then we are free to
1752	// choose the output value from either arm of the select. If we know that
1753	// one of those values is undef, then the output can be undef.
1754	simplifyAndSetOp (I, `0`, DemandedElts, PoisonElts);
1755	}
1756
1757	// Next, see if we can transform the arms of the select.
1758	APInt DemandedLHS(DemandedElts), DemandedRHS(DemandedElts);
1759	if (auto *CV = dyn_cast<ConstantVector>(Val: Sel->getCondition())) {
1760	for (unsigned i = `0`; i < VWidth; i++) {
1761	// isNullValue() always returns false when called on a ConstantExpr.
1762	// Skip constant expressions to avoid propagating incorrect information.
1763	Constant *CElt = CV->getAggregateElement(Elt: i);
1764	if (isa<ConstantExpr>(Val: CElt))
1765	continue;
1766	// TODO: If a select condition element is undef, we can demand from
1767	// either side. If one side is known undef, choosing that side would
1768	// propagate undef.
1769	if (CElt->isNullValue())
1770	DemandedLHS.clearBit(BitPosition: i);
1771	else
1772	DemandedRHS.clearBit(BitPosition: i);
1773	}
1774	}
1775
1776	simplifyAndSetOp (I, `1`, DemandedLHS, PoisonElts2);
1777	simplifyAndSetOp (I, `2`, DemandedRHS, PoisonElts3);
1778
1779	// Output elements are undefined if the element from each arm is undefined.
1780	// TODO: This can be improved. See comment in select condition handling.
1781	PoisonElts = PoisonElts2 & PoisonElts3;
1782	break;
1783	}
1784	case Instruction::BitCast: {
1785	// Vector->vector casts only.
1786	VectorType *VTy = dyn_cast<VectorType>(Val: I->getOperand(i: `0`)->getType());
1787	if (!VTy) break;
1788	unsigned InVWidth = cast<FixedVectorType>(Val: VTy)->getNumElements();
1789	APInt InputDemandedElts(InVWidth, `0`);
1790	PoisonElts2 = APInt (InVWidth, `0`);
1791	unsigned Ratio;
1792
1793	if (VWidth == InVWidth) {
1794	// If we are converting from <4 x i32> -> <4 x f32>, we demand the same
1795	// elements as are demanded of us.
1796	Ratio = `1`;
1797	InputDemandedElts = DemandedElts;
1798	} else if ((VWidth % InVWidth) == `0`) {
1799	// If the number of elements in the output is a multiple of the number of
1800	// elements in the input then an input element is live if any of the
1801	// corresponding output elements are live.
1802	Ratio = VWidth / InVWidth;
1803	for (unsigned OutIdx = `0`; OutIdx != VWidth; ++OutIdx)
1804	if (DemandedElts [OutIdx])
1805	InputDemandedElts.setBit(OutIdx / Ratio);
1806	} else if ((InVWidth % VWidth) == `0`) {
1807	// If the number of elements in the input is a multiple of the number of
1808	// elements in the output then an input element is live if the
1809	// corresponding output element is live.
1810	Ratio = InVWidth / VWidth;
1811	for (unsigned InIdx = `0`; InIdx != InVWidth; ++InIdx)
1812	if (DemandedElts [InIdx / Ratio])
1813	InputDemandedElts.setBit(InIdx);
1814	} else {
1815	// Unsupported so far.
1816	break;
1817	}
1818
1819	simplifyAndSetOp (I, `0`, InputDemandedElts, PoisonElts2);
1820
1821	if (VWidth == InVWidth) {
1822	PoisonElts = PoisonElts2;
1823	} else if ((VWidth % InVWidth) == `0`) {
1824	// If the number of elements in the output is a multiple of the number of
1825	// elements in the input then an output element is undef if the
1826	// corresponding input element is undef.
1827	for (unsigned OutIdx = `0`; OutIdx != VWidth; ++OutIdx)
1828	if (PoisonElts2 [OutIdx / Ratio])
1829	PoisonElts.setBit(OutIdx);
1830	} else if ((InVWidth % VWidth) == `0`) {
1831	// If the number of elements in the input is a multiple of the number of
1832	// elements in the output then an output element is undef if all of the
1833	// corresponding input elements are undef.
1834	for (unsigned OutIdx = `0`; OutIdx != VWidth; ++OutIdx) {
1835	APInt SubUndef = PoisonElts2.lshr(shiftAmt: OutIdx * Ratio).zextOrTrunc(width: Ratio);
1836	if (SubUndef.popcount() == Ratio)
1837	PoisonElts.setBit(OutIdx);
1838	}
1839	} else {
1840	llvm_unreachable("Unimp");
1841	}
1842	break;
1843	}
1844	case Instruction::FPTrunc:
1845	case Instruction::FPExt:
1846	simplifyAndSetOp (I, `0`, DemandedElts, PoisonElts);
1847	break;
1848
1849	case Instruction::Call: {
1850	IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I);
1851	if (!II) break;
1852	switch (II->getIntrinsicID()) {
1853	case Intrinsic::masked_gather: // fallthrough
1854	case Intrinsic::masked_load: {
1855	// Subtlety: If we load from a pointer, the pointer must be valid
1856	// regardless of whether the element is demanded. Doing otherwise risks
1857	// segfaults which didn't exist in the original program.
1858	APInt DemandedPtrs(APInt::getAllOnes(numBits: VWidth)),
1859	DemandedPassThrough(DemandedElts);
1860	if (auto *CV = dyn_cast<ConstantVector>(Val: II->getOperand(i_nocapture: `2`)))
1861	for (unsigned i = `0`; i < VWidth; i++) {
1862	Constant *CElt = CV->getAggregateElement(Elt: i);
1863	if (CElt->isNullValue())
1864	DemandedPtrs.clearBit(BitPosition: i);
1865	else if (CElt->isAllOnesValue())
1866	DemandedPassThrough.clearBit(BitPosition: i);
1867	}
1868	if (II->getIntrinsicID() == Intrinsic::masked_gather)
1869	simplifyAndSetOp (II, `0`, DemandedPtrs, PoisonElts2);
1870	simplifyAndSetOp (II, `3`, DemandedPassThrough, PoisonElts3);
1871
1872	// Output elements are undefined if the element from both sources are.
1873	// TODO: can strengthen via mask as well.
1874	PoisonElts = PoisonElts2 & PoisonElts3;
1875	break;
1876	}
1877	default: {
1878	// Handle target specific intrinsics
1879	std::optional<Value *> V = targetSimplifyDemandedVectorEltsIntrinsic(
1880	II&: *II, DemandedElts, UndefElts&: PoisonElts, UndefElts2&: PoisonElts2, UndefElts3&: PoisonElts3,
1881	SimplifyAndSetOp: simplifyAndSetOp);
1882	if (V)
1883	return *V;
1884	break;
1885	}
1886	} // switch on IntrinsicID
1887	break;
1888	} // case Call
1889	} // switch on Opcode
1890
1891	// TODO: We bail completely on integer div/rem and shifts because they have
1892	// UB/poison potential, but that should be refined.
1893	BinaryOperator *BO;
1894	if (match(V: I, P: m_BinOp(I&: BO)) && !BO->isIntDivRem() && !BO->isShift()) {
1895	Value *X = BO->getOperand(i_nocapture: `0`);
1896	Value *Y = BO->getOperand(i_nocapture: `1`);
1897
1898	// Look for an equivalent binop except that one operand has been shuffled.
1899	// If the demand for this binop only includes elements that are the same as
1900	// the other binop, then we may be able to replace this binop with a use of
1901	// the earlier one.
1902	//
1903	// Example:
1904	// %other_bo = bo (shuf X, {0}), Y
1905	// %this_extracted_bo = extelt (bo X, Y), 0
1906	// -->
1907	// %other_bo = bo (shuf X, {0}), Y
1908	// %this_extracted_bo = extelt %other_bo, 0
1909	//
1910	// TODO: Handle demand of an arbitrary single element or more than one
1911	// element instead of just element 0.
1912	// TODO: Unlike general demanded elements transforms, this should be safe
1913	// for any (div/rem/shift) opcode too.
1914	if (DemandedElts == `1` && !X->hasOneUse() && !Y->hasOneUse() &&
1915	BO->hasOneUse() ) {
1916
1917	auto findShufBO = [&](bool MatchShufAsOp0) -> User * {
1918	// Try to use shuffle-of-operand in place of an operand:
1919	// bo X, Y --> bo (shuf X), Y
1920	// bo X, Y --> bo X, (shuf Y)
1921	BinaryOperator::BinaryOps Opcode = BO->getOpcode();
1922	Value *ShufOp = MatchShufAsOp0 ? X : Y;
1923	Value *OtherOp = MatchShufAsOp0 ? Y : X;
1924	for (User *U : OtherOp->users()) {
1925	ArrayRef<int> Mask;
1926	auto Shuf = m_Shuffle(v1: m_Specific(V: ShufOp), v2: m_Value(), mask: m_Mask (Mask));
1927	if (BO->isCommutative()
1928	? match(V: U, P: m_c_BinOp(Opcode, L: Shuf, R: m_Specific(V: OtherOp)))
1929	: MatchShufAsOp0
1930	? match(V: U, P: m_BinOp(Opcode, L: Shuf, R: m_Specific(V: OtherOp)))
1931	: match(V: U, P: m_BinOp(Opcode, L: m_Specific(V: OtherOp), R: Shuf)))
1932	if (match(Mask, P: m_ZeroMask ()) && Mask [`0`] != PoisonMaskElem)
1933	if (DT.dominates(Def: U, User: I))
1934	return U;
1935	}
1936	return nullptr;
1937	};
1938
1939	if (User ShufBO = findShufBO (/* MatchShufAsOp0 / true))
1940	return ShufBO;
1941	if (User ShufBO = findShufBO (/* MatchShufAsOp0 / false))
1942	return ShufBO;
1943	}
1944
1945	simplifyAndSetOp (I, `0`, DemandedElts, PoisonElts);
1946	simplifyAndSetOp (I, `1`, DemandedElts, PoisonElts2);
1947
1948	// Output elements are undefined if both are undefined. Consider things
1949	// like undef & 0. The result is known zero, not undef.
1950	PoisonElts &= PoisonElts2;
1951	}
1952
1953	// If we've proven all of the lanes poison, return a poison value.
1954	// TODO: Intersect w/demanded lanes
1955	if (PoisonElts.isAllOnes())
1956	return PoisonValue::get(T: I->getType());
1957
1958	return MadeChange ? I : nullptr;
1959	}
1960
1961	/// For floating-point classes that resolve to a single bit pattern, return that
1962	/// value.
1963	static Constant getFPClassConstant(Type Ty, FPClassTest Mask) {
1964	switch (Mask) {
1965	case fcPosZero:
1966	return ConstantFP::getZero(Ty);
1967	case fcNegZero:
1968	return ConstantFP::getZero(Ty, Negative: true);
1969	case fcPosInf:
1970	return ConstantFP::getInfinity(Ty);
1971	case fcNegInf:
1972	return ConstantFP::getInfinity(Ty, Negative: true);
1973	case fcNone:
1974	return PoisonValue::get(T: Ty);
1975	default:
1976	return nullptr;
1977	}
1978	}
1979
1980	Value *InstCombinerImpl::SimplifyDemandedUseFPClass(
1981	Value V, const* FPClassTest DemandedMask, KnownFPClass &Known,
1982	unsigned Depth, Instruction *CxtI) {
1983	assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
1984	Type *VTy = V->getType();
1985
1986	assert(Known == KnownFPClass () && "expected uninitialized state");
1987
1988	if (DemandedMask == fcNone)
1989	return isa<UndefValue>(Val: V) ? nullptr : PoisonValue::get(T: VTy);
1990
1991	if (Depth == MaxAnalysisRecursionDepth)
1992	return nullptr;
1993
1994	Instruction *I = dyn_cast<Instruction>(Val: V);
1995	if (!I) {
1996	// Handle constants and arguments
1997	Known = computeKnownFPClass(Val: V, Interested: fcAllFlags, CtxI: CxtI, Depth: Depth + `1`);
1998	Value *FoldedToConst =
1999	getFPClassConstant(Ty: VTy, Mask: DemandedMask & Known.KnownFPClasses);
2000	return FoldedToConst == V ? nullptr : FoldedToConst;
2001	}
2002
2003	if (!I->hasOneUse())
2004	return nullptr;
2005
2006	// TODO: Should account for nofpclass/FastMathFlags on current instruction
2007	switch (I->getOpcode()) {
2008	case Instruction::FNeg: {
2009	if (SimplifyDemandedFPClass(I, Op: `0`, DemandedMask: llvm::fneg(Mask: DemandedMask), Known,
2010	Depth: Depth + `1`))
2011	return I;
2012	Known.fneg();
2013	break;
2014	}
2015	case Instruction::Call: {
2016	CallInst *CI = cast<CallInst>(Val: I);
2017	switch (CI->getIntrinsicID()) {
2018	case Intrinsic::fabs:
2019	if (SimplifyDemandedFPClass(I, Op: `0`, DemandedMask: llvm::inverse_fabs(Mask: DemandedMask), Known,
2020	Depth: Depth + `1`))
2021	return I;
2022	Known.fabs();
2023	break;
2024	case Intrinsic::arithmetic_fence:
2025	if (SimplifyDemandedFPClass(I, Op: `0`, DemandedMask, Known, Depth: Depth + `1`))
2026	return I;
2027	break;
2028	case Intrinsic::copysign: {
2029	// Flip on more potentially demanded classes
2030	const FPClassTest DemandedMaskAnySign = llvm::unknown_sign(Mask: DemandedMask);
2031	if (SimplifyDemandedFPClass(I, Op: `0`, DemandedMask: DemandedMaskAnySign, Known, Depth: Depth + `1`))
2032	return I;
2033
2034	if ((DemandedMask & fcPositive) == fcNone) {
2035	// Roundabout way of replacing with fneg(fabs)
2036	I->setOperand(i: `1`, Val: ConstantFP::get(Ty: VTy, V: -`1.0`));
2037	return I;
2038	}
2039
2040	if ((DemandedMask & fcNegative) == fcNone) {
2041	// Roundabout way of replacing with fabs
2042	I->setOperand(i: `1`, Val: ConstantFP::getZero(Ty: VTy));
2043	return I;
2044	}
2045
2046	KnownFPClass KnownSign =
2047	computeKnownFPClass(Val: I->getOperand(i: `1`), Interested: fcAllFlags, CtxI: CxtI, Depth: Depth + `1`);
2048	Known.copysign(Sign: KnownSign);
2049	break;
2050	}
2051	default:
2052	Known = computeKnownFPClass(Val: I, Interested: ~DemandedMask, CtxI: CxtI, Depth: Depth + `1`);
2053	break;
2054	}
2055
2056	break;
2057	}
2058	case Instruction::Select: {
2059	KnownFPClass KnownLHS, KnownRHS;
2060	if (SimplifyDemandedFPClass(I, Op: `2`, DemandedMask, Known&: KnownRHS, Depth: Depth + `1`) \|\|
2061	SimplifyDemandedFPClass(I, Op: `1`, DemandedMask, Known&: KnownLHS, Depth: Depth + `1`))
2062	return I;
2063
2064	if (KnownLHS.isKnownNever(Mask: DemandedMask))
2065	return I->getOperand(i: `2`);
2066	if (KnownRHS.isKnownNever(Mask: DemandedMask))
2067	return I->getOperand(i: `1`);
2068
2069	// TODO: Recognize clamping patterns
2070	Known = KnownLHS \| KnownRHS;
2071	break;
2072	}
2073	default:
2074	Known = computeKnownFPClass(Val: I, Interested: ~DemandedMask, CtxI: CxtI, Depth: Depth + `1`);
2075	break;
2076	}
2077
2078	return getFPClassConstant(Ty: VTy, Mask: DemandedMask & Known.KnownFPClasses);
2079	}
2080
2081	bool InstCombinerImpl::SimplifyDemandedFPClass(Instruction I, unsigned* OpNo,
2082	FPClassTest DemandedMask,
2083	KnownFPClass &Known,
2084	unsigned Depth) {
2085	Use &U = I->getOperandUse(i: OpNo);
2086	Value *NewVal =
2087	SimplifyDemandedUseFPClass(V: U.get(), DemandedMask, Known, Depth, CxtI: I);
2088	if (!NewVal)
2089	return false;
2090	if (Instruction *OpInst = dyn_cast<Instruction>(Val&: U))
2091	salvageDebugInfo(I&: *OpInst);
2092
2093	replaceUse(U, NewValue: NewVal);
2094	return true;
2095	}
2096

source code of llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp