1 | //===- InstCombineSimplifyDemanded.cpp ------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains logic for simplifying instructions based on information |
10 | // about how they are used. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "InstCombineInternal.h" |
15 | #include "llvm/Analysis/ValueTracking.h" |
16 | #include "llvm/IR/GetElementPtrTypeIterator.h" |
17 | #include "llvm/IR/IntrinsicInst.h" |
18 | #include "llvm/IR/PatternMatch.h" |
19 | #include "llvm/Support/KnownBits.h" |
20 | #include "llvm/Transforms/InstCombine/InstCombiner.h" |
21 | |
22 | using namespace llvm; |
23 | using namespace llvm::PatternMatch; |
24 | |
25 | #define DEBUG_TYPE "instcombine" |
26 | |
27 | static cl::opt<bool> |
28 | VerifyKnownBits("instcombine-verify-known-bits" , |
29 | cl::desc("Verify that computeKnownBits() and " |
30 | "SimplifyDemandedBits() are consistent" ), |
31 | cl::Hidden, cl::init(Val: false)); |
32 | |
33 | /// Check to see if the specified operand of the specified instruction is a |
34 | /// constant integer. If so, check to see if there are any bits set in the |
35 | /// constant that are not demanded. If so, shrink the constant and return true. |
36 | static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, |
37 | const APInt &Demanded) { |
38 | assert(I && "No instruction?" ); |
39 | assert(OpNo < I->getNumOperands() && "Operand index too large" ); |
40 | |
41 | // The operand must be a constant integer or splat integer. |
42 | Value *Op = I->getOperand(i: OpNo); |
43 | const APInt *C; |
44 | if (!match(V: Op, P: m_APInt(Res&: C))) |
45 | return false; |
46 | |
47 | // If there are no bits set that aren't demanded, nothing to do. |
48 | if (C->isSubsetOf(RHS: Demanded)) |
49 | return false; |
50 | |
51 | // This instruction is producing bits that are not demanded. Shrink the RHS. |
52 | I->setOperand(i: OpNo, Val: ConstantInt::get(Ty: Op->getType(), V: *C & Demanded)); |
53 | |
54 | return true; |
55 | } |
56 | |
57 | /// Returns the bitwidth of the given scalar or pointer type. For vector types, |
58 | /// returns the element type's bitwidth. |
59 | static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { |
60 | if (unsigned BitWidth = Ty->getScalarSizeInBits()) |
61 | return BitWidth; |
62 | |
63 | return DL.getPointerTypeSizeInBits(Ty); |
64 | } |
65 | |
66 | /// Inst is an integer instruction that SimplifyDemandedBits knows about. See if |
67 | /// the instruction has any properties that allow us to simplify its operands. |
68 | bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst, |
69 | KnownBits &Known) { |
70 | APInt DemandedMask(APInt::getAllOnes(numBits: Known.getBitWidth())); |
71 | Value *V = SimplifyDemandedUseBits(V: &Inst, DemandedMask, Known, |
72 | Depth: 0, CxtI: &Inst); |
73 | if (!V) return false; |
74 | if (V == &Inst) return true; |
75 | replaceInstUsesWith(I&: Inst, V); |
76 | return true; |
77 | } |
78 | |
79 | /// Inst is an integer instruction that SimplifyDemandedBits knows about. See if |
80 | /// the instruction has any properties that allow us to simplify its operands. |
81 | bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) { |
82 | KnownBits Known(getBitWidth(Ty: Inst.getType(), DL)); |
83 | return SimplifyDemandedInstructionBits(Inst, Known); |
84 | } |
85 | |
86 | /// This form of SimplifyDemandedBits simplifies the specified instruction |
87 | /// operand if possible, updating it in place. It returns true if it made any |
88 | /// change and false otherwise. |
89 | bool InstCombinerImpl::SimplifyDemandedBits(Instruction *I, unsigned OpNo, |
90 | const APInt &DemandedMask, |
91 | KnownBits &Known, unsigned Depth) { |
92 | Use &U = I->getOperandUse(i: OpNo); |
93 | Value *NewVal = SimplifyDemandedUseBits(V: U.get(), DemandedMask, Known, |
94 | Depth, CxtI: I); |
95 | if (!NewVal) return false; |
96 | if (Instruction* OpInst = dyn_cast<Instruction>(Val&: U)) |
97 | salvageDebugInfo(I&: *OpInst); |
98 | |
99 | replaceUse(U, NewValue: NewVal); |
100 | return true; |
101 | } |
102 | |
103 | /// This function attempts to replace V with a simpler value based on the |
104 | /// demanded bits. When this function is called, it is known that only the bits |
105 | /// set in DemandedMask of the result of V are ever used downstream. |
106 | /// Consequently, depending on the mask and V, it may be possible to replace V |
107 | /// with a constant or one of its operands. In such cases, this function does |
108 | /// the replacement and returns true. In all other cases, it returns false after |
109 | /// analyzing the expression and setting KnownOne and known to be one in the |
110 | /// expression. Known.Zero contains all the bits that are known to be zero in |
111 | /// the expression. These are provided to potentially allow the caller (which |
112 | /// might recursively be SimplifyDemandedBits itself) to simplify the |
113 | /// expression. |
114 | /// Known.One and Known.Zero always follow the invariant that: |
115 | /// Known.One & Known.Zero == 0. |
116 | /// That is, a bit can't be both 1 and 0. The bits in Known.One and Known.Zero |
117 | /// are accurate even for bits not in DemandedMask. Note |
118 | /// also that the bitwidth of V, DemandedMask, Known.Zero and Known.One must all |
119 | /// be the same. |
120 | /// |
121 | /// This returns null if it did not change anything and it permits no |
122 | /// simplification. This returns V itself if it did some simplification of V's |
123 | /// operands based on the information about what bits are demanded. This returns |
124 | /// some other non-null value if it found out that V is equal to another value |
125 | /// in the context where the specified bits are demanded, but not for all users. |
126 | Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, |
127 | KnownBits &Known, |
128 | unsigned Depth, |
129 | Instruction *CxtI) { |
130 | assert(V != nullptr && "Null pointer of Value???" ); |
131 | assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth" ); |
132 | uint32_t BitWidth = DemandedMask.getBitWidth(); |
133 | Type *VTy = V->getType(); |
134 | assert( |
135 | (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) && |
136 | Known.getBitWidth() == BitWidth && |
137 | "Value *V, DemandedMask and Known must have same BitWidth" ); |
138 | |
139 | if (isa<Constant>(Val: V)) { |
140 | computeKnownBits(V, Known, Depth, CxtI); |
141 | return nullptr; |
142 | } |
143 | |
144 | Known.resetAll(); |
145 | if (DemandedMask.isZero()) // Not demanding any bits from V. |
146 | return UndefValue::get(T: VTy); |
147 | |
148 | if (Depth == MaxAnalysisRecursionDepth) |
149 | return nullptr; |
150 | |
151 | Instruction *I = dyn_cast<Instruction>(Val: V); |
152 | if (!I) { |
153 | computeKnownBits(V, Known, Depth, CxtI); |
154 | return nullptr; // Only analyze instructions. |
155 | } |
156 | |
157 | // If there are multiple uses of this value and we aren't at the root, then |
158 | // we can't do any simplifications of the operands, because DemandedMask |
159 | // only reflects the bits demanded by *one* of the users. |
160 | if (Depth != 0 && !I->hasOneUse()) |
161 | return SimplifyMultipleUseDemandedBits(I, DemandedMask, Known, Depth, CxtI); |
162 | |
163 | KnownBits LHSKnown(BitWidth), RHSKnown(BitWidth); |
164 | // If this is the root being simplified, allow it to have multiple uses, |
165 | // just set the DemandedMask to all bits so that we can try to simplify the |
166 | // operands. This allows visitTruncInst (for example) to simplify the |
167 | // operand of a trunc without duplicating all the logic below. |
168 | if (Depth == 0 && !V->hasOneUse()) |
169 | DemandedMask.setAllBits(); |
170 | |
171 | // Update flags after simplifying an operand based on the fact that some high |
172 | // order bits are not demanded. |
173 | auto disableWrapFlagsBasedOnUnusedHighBits = [](Instruction *I, |
174 | unsigned NLZ) { |
175 | if (NLZ > 0) { |
176 | // Disable the nsw and nuw flags here: We can no longer guarantee that |
177 | // we won't wrap after simplification. Removing the nsw/nuw flags is |
178 | // legal here because the top bit is not demanded. |
179 | I->setHasNoSignedWrap(false); |
180 | I->setHasNoUnsignedWrap(false); |
181 | } |
182 | return I; |
183 | }; |
184 | |
185 | // If the high-bits of an ADD/SUB/MUL are not demanded, then we do not care |
186 | // about the high bits of the operands. |
187 | auto simplifyOperandsBasedOnUnusedHighBits = [&](APInt &DemandedFromOps) { |
188 | unsigned NLZ = DemandedMask.countl_zero(); |
189 | // Right fill the mask of bits for the operands to demand the most |
190 | // significant bit and all those below it. |
191 | DemandedFromOps = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - NLZ); |
192 | if (ShrinkDemandedConstant(I, OpNo: 0, Demanded: DemandedFromOps) || |
193 | SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedFromOps, Known&: LHSKnown, Depth: Depth + 1) || |
194 | ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedFromOps) || |
195 | SimplifyDemandedBits(I, OpNo: 1, DemandedMask: DemandedFromOps, Known&: RHSKnown, Depth: Depth + 1)) { |
196 | disableWrapFlagsBasedOnUnusedHighBits(I, NLZ); |
197 | return true; |
198 | } |
199 | return false; |
200 | }; |
201 | |
202 | switch (I->getOpcode()) { |
203 | default: |
204 | computeKnownBits(V: I, Known, Depth, CxtI); |
205 | break; |
206 | case Instruction::And: { |
207 | // If either the LHS or the RHS are Zero, the result is zero. |
208 | if (SimplifyDemandedBits(I, OpNo: 1, DemandedMask, Known&: RHSKnown, Depth: Depth + 1) || |
209 | SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMask & ~RHSKnown.Zero, Known&: LHSKnown, |
210 | Depth: Depth + 1)) |
211 | return I; |
212 | assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?" ); |
213 | assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?" ); |
214 | |
215 | Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown, |
216 | Depth, SQ: SQ.getWithInstruction(I: CxtI)); |
217 | |
218 | // If the client is only demanding bits that we know, return the known |
219 | // constant. |
220 | if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One)) |
221 | return Constant::getIntegerValue(Ty: VTy, V: Known.One); |
222 | |
223 | // If all of the demanded bits are known 1 on one side, return the other. |
224 | // These bits cannot contribute to the result of the 'and'. |
225 | if (DemandedMask.isSubsetOf(RHS: LHSKnown.Zero | RHSKnown.One)) |
226 | return I->getOperand(i: 0); |
227 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero | LHSKnown.One)) |
228 | return I->getOperand(i: 1); |
229 | |
230 | // If the RHS is a constant, see if we can simplify it. |
231 | if (ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedMask & ~LHSKnown.Zero)) |
232 | return I; |
233 | |
234 | break; |
235 | } |
236 | case Instruction::Or: { |
237 | // If either the LHS or the RHS are One, the result is One. |
238 | if (SimplifyDemandedBits(I, OpNo: 1, DemandedMask, Known&: RHSKnown, Depth: Depth + 1) || |
239 | SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMask & ~RHSKnown.One, Known&: LHSKnown, |
240 | Depth: Depth + 1)) { |
241 | // Disjoint flag may not longer hold. |
242 | I->dropPoisonGeneratingFlags(); |
243 | return I; |
244 | } |
245 | assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?" ); |
246 | assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?" ); |
247 | |
248 | Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown, |
249 | Depth, SQ: SQ.getWithInstruction(I: CxtI)); |
250 | |
251 | // If the client is only demanding bits that we know, return the known |
252 | // constant. |
253 | if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One)) |
254 | return Constant::getIntegerValue(Ty: VTy, V: Known.One); |
255 | |
256 | // If all of the demanded bits are known zero on one side, return the other. |
257 | // These bits cannot contribute to the result of the 'or'. |
258 | if (DemandedMask.isSubsetOf(RHS: LHSKnown.One | RHSKnown.Zero)) |
259 | return I->getOperand(i: 0); |
260 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.One | LHSKnown.Zero)) |
261 | return I->getOperand(i: 1); |
262 | |
263 | // If the RHS is a constant, see if we can simplify it. |
264 | if (ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedMask)) |
265 | return I; |
266 | |
267 | // Infer disjoint flag if no common bits are set. |
268 | if (!cast<PossiblyDisjointInst>(Val: I)->isDisjoint()) { |
269 | WithCache<const Value *> LHSCache(I->getOperand(i: 0), LHSKnown), |
270 | RHSCache(I->getOperand(i: 1), RHSKnown); |
271 | if (haveNoCommonBitsSet(LHSCache, RHSCache, SQ: SQ.getWithInstruction(I))) { |
272 | cast<PossiblyDisjointInst>(Val: I)->setIsDisjoint(true); |
273 | return I; |
274 | } |
275 | } |
276 | |
277 | break; |
278 | } |
279 | case Instruction::Xor: { |
280 | if (SimplifyDemandedBits(I, OpNo: 1, DemandedMask, Known&: RHSKnown, Depth: Depth + 1) || |
281 | SimplifyDemandedBits(I, OpNo: 0, DemandedMask, Known&: LHSKnown, Depth: Depth + 1)) |
282 | return I; |
283 | Value *LHS, *RHS; |
284 | if (DemandedMask == 1 && |
285 | match(I->getOperand(0), m_Intrinsic<Intrinsic::ctpop>(m_Value(LHS))) && |
286 | match(I->getOperand(1), m_Intrinsic<Intrinsic::ctpop>(m_Value(RHS)))) { |
287 | // (ctpop(X) ^ ctpop(Y)) & 1 --> ctpop(X^Y) & 1 |
288 | IRBuilderBase::InsertPointGuard Guard(Builder); |
289 | Builder.SetInsertPoint(I); |
290 | auto *Xor = Builder.CreateXor(LHS, RHS); |
291 | return Builder.CreateUnaryIntrinsic(Intrinsic::ID: ctpop, V: Xor); |
292 | } |
293 | |
294 | assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?" ); |
295 | assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?" ); |
296 | |
297 | Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown, |
298 | Depth, SQ: SQ.getWithInstruction(I: CxtI)); |
299 | |
300 | // If the client is only demanding bits that we know, return the known |
301 | // constant. |
302 | if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One)) |
303 | return Constant::getIntegerValue(Ty: VTy, V: Known.One); |
304 | |
305 | // If all of the demanded bits are known zero on one side, return the other. |
306 | // These bits cannot contribute to the result of the 'xor'. |
307 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero)) |
308 | return I->getOperand(i: 0); |
309 | if (DemandedMask.isSubsetOf(RHS: LHSKnown.Zero)) |
310 | return I->getOperand(i: 1); |
311 | |
312 | // If all of the demanded bits are known to be zero on one side or the |
313 | // other, turn this into an *inclusive* or. |
314 | // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 |
315 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero | LHSKnown.Zero)) { |
316 | Instruction *Or = |
317 | BinaryOperator::CreateOr(V1: I->getOperand(i: 0), V2: I->getOperand(i: 1)); |
318 | if (DemandedMask.isAllOnes()) |
319 | cast<PossiblyDisjointInst>(Val: Or)->setIsDisjoint(true); |
320 | Or->takeName(V: I); |
321 | return InsertNewInstWith(New: Or, Old: I->getIterator()); |
322 | } |
323 | |
324 | // If all of the demanded bits on one side are known, and all of the set |
325 | // bits on that side are also known to be set on the other side, turn this |
326 | // into an AND, as we know the bits will be cleared. |
327 | // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 |
328 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero|RHSKnown.One) && |
329 | RHSKnown.One.isSubsetOf(RHS: LHSKnown.One)) { |
330 | Constant *AndC = Constant::getIntegerValue(Ty: VTy, |
331 | V: ~RHSKnown.One & DemandedMask); |
332 | Instruction *And = BinaryOperator::CreateAnd(V1: I->getOperand(i: 0), V2: AndC); |
333 | return InsertNewInstWith(New: And, Old: I->getIterator()); |
334 | } |
335 | |
336 | // If the RHS is a constant, see if we can change it. Don't alter a -1 |
337 | // constant because that's a canonical 'not' op, and that is better for |
338 | // combining, SCEV, and codegen. |
339 | const APInt *C; |
340 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: C)) && !C->isAllOnes()) { |
341 | if ((*C | ~DemandedMask).isAllOnes()) { |
342 | // Force bits to 1 to create a 'not' op. |
343 | I->setOperand(i: 1, Val: ConstantInt::getAllOnesValue(Ty: VTy)); |
344 | return I; |
345 | } |
346 | // If we can't turn this into a 'not', try to shrink the constant. |
347 | if (ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedMask)) |
348 | return I; |
349 | } |
350 | |
351 | // If our LHS is an 'and' and if it has one use, and if any of the bits we |
352 | // are flipping are known to be set, then the xor is just resetting those |
353 | // bits to zero. We can just knock out bits from the 'and' and the 'xor', |
354 | // simplifying both of them. |
355 | if (Instruction *LHSInst = dyn_cast<Instruction>(Val: I->getOperand(i: 0))) { |
356 | ConstantInt *AndRHS, *XorRHS; |
357 | if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() && |
358 | match(V: I->getOperand(i: 1), P: m_ConstantInt(CI&: XorRHS)) && |
359 | match(V: LHSInst->getOperand(i: 1), P: m_ConstantInt(CI&: AndRHS)) && |
360 | (LHSKnown.One & RHSKnown.One & DemandedMask) != 0) { |
361 | APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask); |
362 | |
363 | Constant *AndC = ConstantInt::get(Ty: VTy, V: NewMask & AndRHS->getValue()); |
364 | Instruction *NewAnd = BinaryOperator::CreateAnd(V1: I->getOperand(i: 0), V2: AndC); |
365 | InsertNewInstWith(New: NewAnd, Old: I->getIterator()); |
366 | |
367 | Constant *XorC = ConstantInt::get(Ty: VTy, V: NewMask & XorRHS->getValue()); |
368 | Instruction *NewXor = BinaryOperator::CreateXor(V1: NewAnd, V2: XorC); |
369 | return InsertNewInstWith(New: NewXor, Old: I->getIterator()); |
370 | } |
371 | } |
372 | break; |
373 | } |
374 | case Instruction::Select: { |
375 | if (SimplifyDemandedBits(I, OpNo: 2, DemandedMask, Known&: RHSKnown, Depth: Depth + 1) || |
376 | SimplifyDemandedBits(I, OpNo: 1, DemandedMask, Known&: LHSKnown, Depth: Depth + 1)) |
377 | return I; |
378 | assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?" ); |
379 | assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?" ); |
380 | |
381 | // If the operands are constants, see if we can simplify them. |
382 | // This is similar to ShrinkDemandedConstant, but for a select we want to |
383 | // try to keep the selected constants the same as icmp value constants, if |
384 | // we can. This helps not break apart (or helps put back together) |
385 | // canonical patterns like min and max. |
386 | auto CanonicalizeSelectConstant = [](Instruction *I, unsigned OpNo, |
387 | const APInt &DemandedMask) { |
388 | const APInt *SelC; |
389 | if (!match(V: I->getOperand(i: OpNo), P: m_APInt(Res&: SelC))) |
390 | return false; |
391 | |
392 | // Get the constant out of the ICmp, if there is one. |
393 | // Only try this when exactly 1 operand is a constant (if both operands |
394 | // are constant, the icmp should eventually simplify). Otherwise, we may |
395 | // invert the transform that reduces set bits and infinite-loop. |
396 | Value *X; |
397 | const APInt *CmpC; |
398 | ICmpInst::Predicate Pred; |
399 | if (!match(V: I->getOperand(i: 0), P: m_ICmp(Pred, L: m_Value(V&: X), R: m_APInt(Res&: CmpC))) || |
400 | isa<Constant>(Val: X) || CmpC->getBitWidth() != SelC->getBitWidth()) |
401 | return ShrinkDemandedConstant(I, OpNo, Demanded: DemandedMask); |
402 | |
403 | // If the constant is already the same as the ICmp, leave it as-is. |
404 | if (*CmpC == *SelC) |
405 | return false; |
406 | // If the constants are not already the same, but can be with the demand |
407 | // mask, use the constant value from the ICmp. |
408 | if ((*CmpC & DemandedMask) == (*SelC & DemandedMask)) { |
409 | I->setOperand(i: OpNo, Val: ConstantInt::get(Ty: I->getType(), V: *CmpC)); |
410 | return true; |
411 | } |
412 | return ShrinkDemandedConstant(I, OpNo, Demanded: DemandedMask); |
413 | }; |
414 | if (CanonicalizeSelectConstant(I, 1, DemandedMask) || |
415 | CanonicalizeSelectConstant(I, 2, DemandedMask)) |
416 | return I; |
417 | |
418 | // Only known if known in both the LHS and RHS. |
419 | Known = LHSKnown.intersectWith(RHS: RHSKnown); |
420 | break; |
421 | } |
422 | case Instruction::Trunc: { |
423 | // If we do not demand the high bits of a right-shifted and truncated value, |
424 | // then we may be able to truncate it before the shift. |
425 | Value *X; |
426 | const APInt *C; |
427 | if (match(V: I->getOperand(i: 0), P: m_OneUse(SubPattern: m_LShr(L: m_Value(V&: X), R: m_APInt(Res&: C))))) { |
428 | // The shift amount must be valid (not poison) in the narrow type, and |
429 | // it must not be greater than the high bits demanded of the result. |
430 | if (C->ult(RHS: VTy->getScalarSizeInBits()) && |
431 | C->ule(RHS: DemandedMask.countl_zero())) { |
432 | // trunc (lshr X, C) --> lshr (trunc X), C |
433 | IRBuilderBase::InsertPointGuard Guard(Builder); |
434 | Builder.SetInsertPoint(I); |
435 | Value *Trunc = Builder.CreateTrunc(V: X, DestTy: VTy); |
436 | return Builder.CreateLShr(LHS: Trunc, RHS: C->getZExtValue()); |
437 | } |
438 | } |
439 | } |
440 | [[fallthrough]]; |
441 | case Instruction::ZExt: { |
442 | unsigned SrcBitWidth = I->getOperand(i: 0)->getType()->getScalarSizeInBits(); |
443 | |
444 | APInt InputDemandedMask = DemandedMask.zextOrTrunc(width: SrcBitWidth); |
445 | KnownBits InputKnown(SrcBitWidth); |
446 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: InputDemandedMask, Known&: InputKnown, Depth: Depth + 1)) { |
447 | // For zext nneg, we may have dropped the instruction which made the |
448 | // input non-negative. |
449 | I->dropPoisonGeneratingFlags(); |
450 | return I; |
451 | } |
452 | assert(InputKnown.getBitWidth() == SrcBitWidth && "Src width changed?" ); |
453 | if (I->getOpcode() == Instruction::ZExt && I->hasNonNeg() && |
454 | !InputKnown.isNegative()) |
455 | InputKnown.makeNonNegative(); |
456 | Known = InputKnown.zextOrTrunc(BitWidth); |
457 | |
458 | assert(!Known.hasConflict() && "Bits known to be one AND zero?" ); |
459 | break; |
460 | } |
461 | case Instruction::SExt: { |
462 | // Compute the bits in the result that are not present in the input. |
463 | unsigned SrcBitWidth = I->getOperand(i: 0)->getType()->getScalarSizeInBits(); |
464 | |
465 | APInt InputDemandedBits = DemandedMask.trunc(width: SrcBitWidth); |
466 | |
467 | // If any of the sign extended bits are demanded, we know that the sign |
468 | // bit is demanded. |
469 | if (DemandedMask.getActiveBits() > SrcBitWidth) |
470 | InputDemandedBits.setBit(SrcBitWidth-1); |
471 | |
472 | KnownBits InputKnown(SrcBitWidth); |
473 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: InputDemandedBits, Known&: InputKnown, Depth: Depth + 1)) |
474 | return I; |
475 | |
476 | // If the input sign bit is known zero, or if the NewBits are not demanded |
477 | // convert this into a zero extension. |
478 | if (InputKnown.isNonNegative() || |
479 | DemandedMask.getActiveBits() <= SrcBitWidth) { |
480 | // Convert to ZExt cast. |
481 | CastInst *NewCast = new ZExtInst(I->getOperand(i: 0), VTy); |
482 | NewCast->takeName(V: I); |
483 | return InsertNewInstWith(New: NewCast, Old: I->getIterator()); |
484 | } |
485 | |
486 | // If the sign bit of the input is known set or clear, then we know the |
487 | // top bits of the result. |
488 | Known = InputKnown.sext(BitWidth); |
489 | assert(!Known.hasConflict() && "Bits known to be one AND zero?" ); |
490 | break; |
491 | } |
492 | case Instruction::Add: { |
493 | if ((DemandedMask & 1) == 0) { |
494 | // If we do not need the low bit, try to convert bool math to logic: |
495 | // add iN (zext i1 X), (sext i1 Y) --> sext (~X & Y) to iN |
496 | Value *X, *Y; |
497 | if (match(V: I, P: m_c_Add(L: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X))), |
498 | R: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: Y))))) && |
499 | X->getType()->isIntOrIntVectorTy(BitWidth: 1) && X->getType() == Y->getType()) { |
500 | // Truth table for inputs and output signbits: |
501 | // X:0 | X:1 |
502 | // ---------- |
503 | // Y:0 | 0 | 0 | |
504 | // Y:1 | -1 | 0 | |
505 | // ---------- |
506 | IRBuilderBase::InsertPointGuard Guard(Builder); |
507 | Builder.SetInsertPoint(I); |
508 | Value *AndNot = Builder.CreateAnd(LHS: Builder.CreateNot(V: X), RHS: Y); |
509 | return Builder.CreateSExt(V: AndNot, DestTy: VTy); |
510 | } |
511 | |
512 | // add iN (sext i1 X), (sext i1 Y) --> sext (X | Y) to iN |
513 | // TODO: Relax the one-use checks because we are removing an instruction? |
514 | if (match(V: I, P: m_Add(L: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: X))), |
515 | R: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: Y))))) && |
516 | X->getType()->isIntOrIntVectorTy(BitWidth: 1) && X->getType() == Y->getType()) { |
517 | // Truth table for inputs and output signbits: |
518 | // X:0 | X:1 |
519 | // ----------- |
520 | // Y:0 | -1 | -1 | |
521 | // Y:1 | -1 | 0 | |
522 | // ----------- |
523 | IRBuilderBase::InsertPointGuard Guard(Builder); |
524 | Builder.SetInsertPoint(I); |
525 | Value *Or = Builder.CreateOr(LHS: X, RHS: Y); |
526 | return Builder.CreateSExt(V: Or, DestTy: VTy); |
527 | } |
528 | } |
529 | |
530 | // Right fill the mask of bits for the operands to demand the most |
531 | // significant bit and all those below it. |
532 | unsigned NLZ = DemandedMask.countl_zero(); |
533 | APInt DemandedFromOps = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - NLZ); |
534 | if (ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedFromOps) || |
535 | SimplifyDemandedBits(I, OpNo: 1, DemandedMask: DemandedFromOps, Known&: RHSKnown, Depth: Depth + 1)) |
536 | return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ); |
537 | |
538 | // If low order bits are not demanded and known to be zero in one operand, |
539 | // then we don't need to demand them from the other operand, since they |
540 | // can't cause overflow into any bits that are demanded in the result. |
541 | unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countr_one(); |
542 | APInt DemandedFromLHS = DemandedFromOps; |
543 | DemandedFromLHS.clearLowBits(loBits: NTZ); |
544 | if (ShrinkDemandedConstant(I, OpNo: 0, Demanded: DemandedFromLHS) || |
545 | SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedFromLHS, Known&: LHSKnown, Depth: Depth + 1)) |
546 | return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ); |
547 | |
548 | // If we are known to be adding zeros to every bit below |
549 | // the highest demanded bit, we just return the other side. |
550 | if (DemandedFromOps.isSubsetOf(RHS: RHSKnown.Zero)) |
551 | return I->getOperand(i: 0); |
552 | if (DemandedFromOps.isSubsetOf(RHS: LHSKnown.Zero)) |
553 | return I->getOperand(i: 1); |
554 | |
555 | // (add X, C) --> (xor X, C) IFF C is equal to the top bit of the DemandMask |
556 | { |
557 | const APInt *C; |
558 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: C)) && |
559 | C->isOneBitSet(BitNo: DemandedMask.getActiveBits() - 1)) { |
560 | IRBuilderBase::InsertPointGuard Guard(Builder); |
561 | Builder.SetInsertPoint(I); |
562 | return Builder.CreateXor(LHS: I->getOperand(i: 0), RHS: ConstantInt::get(Ty: VTy, V: *C)); |
563 | } |
564 | } |
565 | |
566 | // Otherwise just compute the known bits of the result. |
567 | bool NSW = cast<OverflowingBinaryOperator>(Val: I)->hasNoSignedWrap(); |
568 | bool NUW = cast<OverflowingBinaryOperator>(Val: I)->hasNoUnsignedWrap(); |
569 | Known = KnownBits::computeForAddSub(Add: true, NSW, NUW, LHS: LHSKnown, RHS: RHSKnown); |
570 | break; |
571 | } |
572 | case Instruction::Sub: { |
573 | // Right fill the mask of bits for the operands to demand the most |
574 | // significant bit and all those below it. |
575 | unsigned NLZ = DemandedMask.countl_zero(); |
576 | APInt DemandedFromOps = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - NLZ); |
577 | if (ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedFromOps) || |
578 | SimplifyDemandedBits(I, OpNo: 1, DemandedMask: DemandedFromOps, Known&: RHSKnown, Depth: Depth + 1)) |
579 | return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ); |
580 | |
581 | // If low order bits are not demanded and are known to be zero in RHS, |
582 | // then we don't need to demand them from LHS, since they can't cause a |
583 | // borrow from any bits that are demanded in the result. |
584 | unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countr_one(); |
585 | APInt DemandedFromLHS = DemandedFromOps; |
586 | DemandedFromLHS.clearLowBits(loBits: NTZ); |
587 | if (ShrinkDemandedConstant(I, OpNo: 0, Demanded: DemandedFromLHS) || |
588 | SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedFromLHS, Known&: LHSKnown, Depth: Depth + 1)) |
589 | return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ); |
590 | |
591 | // If we are known to be subtracting zeros from every bit below |
592 | // the highest demanded bit, we just return the other side. |
593 | if (DemandedFromOps.isSubsetOf(RHS: RHSKnown.Zero)) |
594 | return I->getOperand(i: 0); |
595 | // We can't do this with the LHS for subtraction, unless we are only |
596 | // demanding the LSB. |
597 | if (DemandedFromOps.isOne() && DemandedFromOps.isSubsetOf(RHS: LHSKnown.Zero)) |
598 | return I->getOperand(i: 1); |
599 | |
600 | // Otherwise just compute the known bits of the result. |
601 | bool NSW = cast<OverflowingBinaryOperator>(Val: I)->hasNoSignedWrap(); |
602 | bool NUW = cast<OverflowingBinaryOperator>(Val: I)->hasNoUnsignedWrap(); |
603 | Known = KnownBits::computeForAddSub(Add: false, NSW, NUW, LHS: LHSKnown, RHS: RHSKnown); |
604 | break; |
605 | } |
606 | case Instruction::Mul: { |
607 | APInt DemandedFromOps; |
608 | if (simplifyOperandsBasedOnUnusedHighBits(DemandedFromOps)) |
609 | return I; |
610 | |
611 | if (DemandedMask.isPowerOf2()) { |
612 | // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1. |
613 | // If we demand exactly one bit N and we have "X * (C' << N)" where C' is |
614 | // odd (has LSB set), then the left-shifted low bit of X is the answer. |
615 | unsigned CTZ = DemandedMask.countr_zero(); |
616 | const APInt *C; |
617 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: C)) && C->countr_zero() == CTZ) { |
618 | Constant *ShiftC = ConstantInt::get(Ty: VTy, V: CTZ); |
619 | Instruction *Shl = BinaryOperator::CreateShl(V1: I->getOperand(i: 0), V2: ShiftC); |
620 | return InsertNewInstWith(New: Shl, Old: I->getIterator()); |
621 | } |
622 | } |
623 | // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because: |
624 | // X * X is odd iff X is odd. |
625 | // 'Quadratic Reciprocity': X * X -> 0 for bit[1] |
626 | if (I->getOperand(i: 0) == I->getOperand(i: 1) && DemandedMask.ult(RHS: 4)) { |
627 | Constant *One = ConstantInt::get(Ty: VTy, V: 1); |
628 | Instruction *And1 = BinaryOperator::CreateAnd(V1: I->getOperand(i: 0), V2: One); |
629 | return InsertNewInstWith(New: And1, Old: I->getIterator()); |
630 | } |
631 | |
632 | computeKnownBits(V: I, Known, Depth, CxtI); |
633 | break; |
634 | } |
635 | case Instruction::Shl: { |
636 | const APInt *SA; |
637 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: SA))) { |
638 | const APInt *ShrAmt; |
639 | if (match(V: I->getOperand(i: 0), P: m_Shr(L: m_Value(), R: m_APInt(Res&: ShrAmt)))) |
640 | if (Instruction *Shr = dyn_cast<Instruction>(Val: I->getOperand(i: 0))) |
641 | if (Value *R = simplifyShrShlDemandedBits(Shr, ShrOp1: *ShrAmt, Shl: I, ShlOp1: *SA, |
642 | DemandedMask, Known)) |
643 | return R; |
644 | |
645 | // Do not simplify if shl is part of funnel-shift pattern |
646 | if (I->hasOneUse()) { |
647 | auto *Inst = dyn_cast<Instruction>(Val: I->user_back()); |
648 | if (Inst && Inst->getOpcode() == BinaryOperator::Or) { |
649 | if (auto Opt = convertOrOfShiftsToFunnelShift(Or&: *Inst)) { |
650 | auto [IID, FShiftArgs] = *Opt; |
651 | if ((IID == Intrinsic::fshl || IID == Intrinsic::fshr) && |
652 | FShiftArgs[0] == FShiftArgs[1]) |
653 | return nullptr; |
654 | } |
655 | } |
656 | } |
657 | |
658 | // We only want bits that already match the signbit then we don't |
659 | // need to shift. |
660 | uint64_t ShiftAmt = SA->getLimitedValue(Limit: BitWidth - 1); |
661 | if (DemandedMask.countr_zero() >= ShiftAmt) { |
662 | if (I->hasNoSignedWrap()) { |
663 | unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero(); |
664 | unsigned SignBits = |
665 | ComputeNumSignBits(Op: I->getOperand(i: 0), Depth: Depth + 1, CxtI); |
666 | if (SignBits > ShiftAmt && SignBits - ShiftAmt >= NumHiDemandedBits) |
667 | return I->getOperand(i: 0); |
668 | } |
669 | |
670 | // If we can pre-shift a right-shifted constant to the left without |
671 | // losing any high bits and we don't demand the low bits, then eliminate |
672 | // the left-shift: |
673 | // (C >> X) << LeftShiftAmtC --> (C << LeftShiftAmtC) >> X |
674 | Value *X; |
675 | Constant *C; |
676 | if (match(V: I->getOperand(i: 0), P: m_LShr(L: m_ImmConstant(C), R: m_Value(V&: X)))) { |
677 | Constant *LeftShiftAmtC = ConstantInt::get(Ty: VTy, V: ShiftAmt); |
678 | Constant *NewC = ConstantFoldBinaryOpOperands(Opcode: Instruction::Shl, LHS: C, |
679 | RHS: LeftShiftAmtC, DL); |
680 | if (ConstantFoldBinaryOpOperands(Opcode: Instruction::LShr, LHS: NewC, |
681 | RHS: LeftShiftAmtC, DL) == C) { |
682 | Instruction *Lshr = BinaryOperator::CreateLShr(V1: NewC, V2: X); |
683 | return InsertNewInstWith(New: Lshr, Old: I->getIterator()); |
684 | } |
685 | } |
686 | } |
687 | |
688 | APInt DemandedMaskIn(DemandedMask.lshr(shiftAmt: ShiftAmt)); |
689 | |
690 | // If the shift is NUW/NSW, then it does demand the high bits. |
691 | ShlOperator *IOp = cast<ShlOperator>(Val: I); |
692 | if (IOp->hasNoSignedWrap()) |
693 | DemandedMaskIn.setHighBits(ShiftAmt+1); |
694 | else if (IOp->hasNoUnsignedWrap()) |
695 | DemandedMaskIn.setHighBits(ShiftAmt); |
696 | |
697 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMaskIn, Known, Depth: Depth + 1)) |
698 | return I; |
699 | assert(!Known.hasConflict() && "Bits known to be one AND zero?" ); |
700 | |
701 | Known = KnownBits::shl(LHS: Known, |
702 | RHS: KnownBits::makeConstant(C: APInt(BitWidth, ShiftAmt)), |
703 | /* NUW */ IOp->hasNoUnsignedWrap(), |
704 | /* NSW */ IOp->hasNoSignedWrap()); |
705 | } else { |
706 | // This is a variable shift, so we can't shift the demand mask by a known |
707 | // amount. But if we are not demanding high bits, then we are not |
708 | // demanding those bits from the pre-shifted operand either. |
709 | if (unsigned CTLZ = DemandedMask.countl_zero()) { |
710 | APInt DemandedFromOp(APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - CTLZ)); |
711 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedFromOp, Known, Depth: Depth + 1)) { |
712 | // We can't guarantee that nsw/nuw hold after simplifying the operand. |
713 | I->dropPoisonGeneratingFlags(); |
714 | return I; |
715 | } |
716 | } |
717 | computeKnownBits(V: I, Known, Depth, CxtI); |
718 | } |
719 | break; |
720 | } |
721 | case Instruction::LShr: { |
722 | const APInt *SA; |
723 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: SA))) { |
724 | uint64_t ShiftAmt = SA->getLimitedValue(Limit: BitWidth-1); |
725 | |
726 | // Do not simplify if lshr is part of funnel-shift pattern |
727 | if (I->hasOneUse()) { |
728 | auto *Inst = dyn_cast<Instruction>(Val: I->user_back()); |
729 | if (Inst && Inst->getOpcode() == BinaryOperator::Or) { |
730 | if (auto Opt = convertOrOfShiftsToFunnelShift(Or&: *Inst)) { |
731 | auto [IID, FShiftArgs] = *Opt; |
732 | if ((IID == Intrinsic::fshl || IID == Intrinsic::fshr) && |
733 | FShiftArgs[0] == FShiftArgs[1]) |
734 | return nullptr; |
735 | } |
736 | } |
737 | } |
738 | |
739 | // If we are just demanding the shifted sign bit and below, then this can |
740 | // be treated as an ASHR in disguise. |
741 | if (DemandedMask.countl_zero() >= ShiftAmt) { |
742 | // If we only want bits that already match the signbit then we don't |
743 | // need to shift. |
744 | unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero(); |
745 | unsigned SignBits = |
746 | ComputeNumSignBits(Op: I->getOperand(i: 0), Depth: Depth + 1, CxtI); |
747 | if (SignBits >= NumHiDemandedBits) |
748 | return I->getOperand(i: 0); |
749 | |
750 | // If we can pre-shift a left-shifted constant to the right without |
751 | // losing any low bits (we already know we don't demand the high bits), |
752 | // then eliminate the right-shift: |
753 | // (C << X) >> RightShiftAmtC --> (C >> RightShiftAmtC) << X |
754 | Value *X; |
755 | Constant *C; |
756 | if (match(V: I->getOperand(i: 0), P: m_Shl(L: m_ImmConstant(C), R: m_Value(V&: X)))) { |
757 | Constant *RightShiftAmtC = ConstantInt::get(Ty: VTy, V: ShiftAmt); |
758 | Constant *NewC = ConstantFoldBinaryOpOperands(Opcode: Instruction::LShr, LHS: C, |
759 | RHS: RightShiftAmtC, DL); |
760 | if (ConstantFoldBinaryOpOperands(Opcode: Instruction::Shl, LHS: NewC, |
761 | RHS: RightShiftAmtC, DL) == C) { |
762 | Instruction *Shl = BinaryOperator::CreateShl(V1: NewC, V2: X); |
763 | return InsertNewInstWith(New: Shl, Old: I->getIterator()); |
764 | } |
765 | } |
766 | } |
767 | |
768 | // Unsigned shift right. |
769 | APInt DemandedMaskIn(DemandedMask.shl(shiftAmt: ShiftAmt)); |
770 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMaskIn, Known, Depth: Depth + 1)) { |
771 | // exact flag may not longer hold. |
772 | I->dropPoisonGeneratingFlags(); |
773 | return I; |
774 | } |
775 | assert(!Known.hasConflict() && "Bits known to be one AND zero?" ); |
776 | Known.Zero.lshrInPlace(ShiftAmt); |
777 | Known.One.lshrInPlace(ShiftAmt); |
778 | if (ShiftAmt) |
779 | Known.Zero.setHighBits(ShiftAmt); // high bits known zero. |
780 | } else { |
781 | computeKnownBits(V: I, Known, Depth, CxtI); |
782 | } |
783 | break; |
784 | } |
785 | case Instruction::AShr: { |
786 | unsigned SignBits = ComputeNumSignBits(Op: I->getOperand(i: 0), Depth: Depth + 1, CxtI); |
787 | |
788 | // If we only want bits that already match the signbit then we don't need |
789 | // to shift. |
790 | unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero(); |
791 | if (SignBits >= NumHiDemandedBits) |
792 | return I->getOperand(i: 0); |
793 | |
794 | // If this is an arithmetic shift right and only the low-bit is set, we can |
795 | // always convert this into a logical shr, even if the shift amount is |
796 | // variable. The low bit of the shift cannot be an input sign bit unless |
797 | // the shift amount is >= the size of the datatype, which is undefined. |
798 | if (DemandedMask.isOne()) { |
799 | // Perform the logical shift right. |
800 | Instruction *NewVal = BinaryOperator::CreateLShr( |
801 | V1: I->getOperand(i: 0), V2: I->getOperand(i: 1), Name: I->getName()); |
802 | return InsertNewInstWith(New: NewVal, Old: I->getIterator()); |
803 | } |
804 | |
805 | const APInt *SA; |
806 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: SA))) { |
807 | uint32_t ShiftAmt = SA->getLimitedValue(Limit: BitWidth-1); |
808 | |
809 | // Signed shift right. |
810 | APInt DemandedMaskIn(DemandedMask.shl(shiftAmt: ShiftAmt)); |
811 | // If any of the high bits are demanded, we should set the sign bit as |
812 | // demanded. |
813 | if (DemandedMask.countl_zero() <= ShiftAmt) |
814 | DemandedMaskIn.setSignBit(); |
815 | |
816 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMaskIn, Known, Depth: Depth + 1)) { |
817 | // exact flag may not longer hold. |
818 | I->dropPoisonGeneratingFlags(); |
819 | return I; |
820 | } |
821 | |
822 | assert(!Known.hasConflict() && "Bits known to be one AND zero?" ); |
823 | // Compute the new bits that are at the top now plus sign bits. |
824 | APInt HighBits(APInt::getHighBitsSet( |
825 | numBits: BitWidth, hiBitsSet: std::min(a: SignBits + ShiftAmt - 1, b: BitWidth))); |
826 | Known.Zero.lshrInPlace(ShiftAmt); |
827 | Known.One.lshrInPlace(ShiftAmt); |
828 | |
829 | // If the input sign bit is known to be zero, or if none of the top bits |
830 | // are demanded, turn this into an unsigned shift right. |
831 | assert(BitWidth > ShiftAmt && "Shift amount not saturated?" ); |
832 | if (Known.Zero[BitWidth-ShiftAmt-1] || |
833 | !DemandedMask.intersects(RHS: HighBits)) { |
834 | BinaryOperator *LShr = BinaryOperator::CreateLShr(V1: I->getOperand(i: 0), |
835 | V2: I->getOperand(i: 1)); |
836 | LShr->setIsExact(cast<BinaryOperator>(Val: I)->isExact()); |
837 | LShr->takeName(V: I); |
838 | return InsertNewInstWith(New: LShr, Old: I->getIterator()); |
839 | } else if (Known.One[BitWidth-ShiftAmt-1]) { // New bits are known one. |
840 | Known.One |= HighBits; |
841 | // SignBits may be out-of-sync with Known.countMinSignBits(). Mask out |
842 | // high bits of Known.Zero to avoid conflicts. |
843 | Known.Zero &= ~HighBits; |
844 | } |
845 | } else { |
846 | computeKnownBits(V: I, Known, Depth, CxtI); |
847 | } |
848 | break; |
849 | } |
850 | case Instruction::UDiv: { |
851 | // UDiv doesn't demand low bits that are zero in the divisor. |
852 | const APInt *SA; |
853 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: SA))) { |
854 | // TODO: Take the demanded mask of the result into account. |
855 | unsigned RHSTrailingZeros = SA->countr_zero(); |
856 | APInt DemandedMaskIn = |
857 | APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth - RHSTrailingZeros); |
858 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMaskIn, Known&: LHSKnown, Depth: Depth + 1)) { |
859 | // We can't guarantee that "exact" is still true after changing the |
860 | // the dividend. |
861 | I->dropPoisonGeneratingFlags(); |
862 | return I; |
863 | } |
864 | |
865 | Known = KnownBits::udiv(LHS: LHSKnown, RHS: KnownBits::makeConstant(C: *SA), |
866 | Exact: cast<BinaryOperator>(Val: I)->isExact()); |
867 | } else { |
868 | computeKnownBits(V: I, Known, Depth, CxtI); |
869 | } |
870 | break; |
871 | } |
872 | case Instruction::SRem: { |
873 | const APInt *Rem; |
874 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: Rem))) { |
875 | // X % -1 demands all the bits because we don't want to introduce |
876 | // INT_MIN % -1 (== undef) by accident. |
877 | if (Rem->isAllOnes()) |
878 | break; |
879 | APInt RA = Rem->abs(); |
880 | if (RA.isPowerOf2()) { |
881 | if (DemandedMask.ult(RHS: RA)) // srem won't affect demanded bits |
882 | return I->getOperand(i: 0); |
883 | |
884 | APInt LowBits = RA - 1; |
885 | APInt Mask2 = LowBits | APInt::getSignMask(BitWidth); |
886 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: Mask2, Known&: LHSKnown, Depth: Depth + 1)) |
887 | return I; |
888 | |
889 | // The low bits of LHS are unchanged by the srem. |
890 | Known.Zero = LHSKnown.Zero & LowBits; |
891 | Known.One = LHSKnown.One & LowBits; |
892 | |
893 | // If LHS is non-negative or has all low bits zero, then the upper bits |
894 | // are all zero. |
895 | if (LHSKnown.isNonNegative() || LowBits.isSubsetOf(RHS: LHSKnown.Zero)) |
896 | Known.Zero |= ~LowBits; |
897 | |
898 | // If LHS is negative and not all low bits are zero, then the upper bits |
899 | // are all one. |
900 | if (LHSKnown.isNegative() && LowBits.intersects(RHS: LHSKnown.One)) |
901 | Known.One |= ~LowBits; |
902 | |
903 | assert(!Known.hasConflict() && "Bits known to be one AND zero?" ); |
904 | break; |
905 | } |
906 | } |
907 | |
908 | computeKnownBits(V: I, Known, Depth, CxtI); |
909 | break; |
910 | } |
911 | case Instruction::URem: { |
912 | APInt AllOnes = APInt::getAllOnes(numBits: BitWidth); |
913 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: AllOnes, Known&: LHSKnown, Depth: Depth + 1) || |
914 | SimplifyDemandedBits(I, OpNo: 1, DemandedMask: AllOnes, Known&: RHSKnown, Depth: Depth + 1)) |
915 | return I; |
916 | |
917 | Known = KnownBits::urem(LHS: LHSKnown, RHS: RHSKnown); |
918 | break; |
919 | } |
920 | case Instruction::Call: { |
921 | bool KnownBitsComputed = false; |
922 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) { |
923 | switch (II->getIntrinsicID()) { |
924 | case Intrinsic::abs: { |
925 | if (DemandedMask == 1) |
926 | return II->getArgOperand(i: 0); |
927 | break; |
928 | } |
929 | case Intrinsic::ctpop: { |
930 | // Checking if the number of clear bits is odd (parity)? If the type has |
931 | // an even number of bits, that's the same as checking if the number of |
932 | // set bits is odd, so we can eliminate the 'not' op. |
933 | Value *X; |
934 | if (DemandedMask == 1 && VTy->getScalarSizeInBits() % 2 == 0 && |
935 | match(V: II->getArgOperand(i: 0), P: m_Not(V: m_Value(V&: X)))) { |
936 | Function *Ctpop = Intrinsic::getDeclaration( |
937 | M: II->getModule(), Intrinsic::id: ctpop, Tys: VTy); |
938 | return InsertNewInstWith(New: CallInst::Create(Func: Ctpop, Args: {X}), Old: I->getIterator()); |
939 | } |
940 | break; |
941 | } |
942 | case Intrinsic::bswap: { |
943 | // If the only bits demanded come from one byte of the bswap result, |
944 | // just shift the input byte into position to eliminate the bswap. |
945 | unsigned NLZ = DemandedMask.countl_zero(); |
946 | unsigned NTZ = DemandedMask.countr_zero(); |
947 | |
948 | // Round NTZ down to the next byte. If we have 11 trailing zeros, then |
949 | // we need all the bits down to bit 8. Likewise, round NLZ. If we |
950 | // have 14 leading zeros, round to 8. |
951 | NLZ = alignDown(Value: NLZ, Align: 8); |
952 | NTZ = alignDown(Value: NTZ, Align: 8); |
953 | // If we need exactly one byte, we can do this transformation. |
954 | if (BitWidth - NLZ - NTZ == 8) { |
955 | // Replace this with either a left or right shift to get the byte into |
956 | // the right place. |
957 | Instruction *NewVal; |
958 | if (NLZ > NTZ) |
959 | NewVal = BinaryOperator::CreateLShr( |
960 | V1: II->getArgOperand(i: 0), V2: ConstantInt::get(Ty: VTy, V: NLZ - NTZ)); |
961 | else |
962 | NewVal = BinaryOperator::CreateShl( |
963 | V1: II->getArgOperand(i: 0), V2: ConstantInt::get(Ty: VTy, V: NTZ - NLZ)); |
964 | NewVal->takeName(V: I); |
965 | return InsertNewInstWith(New: NewVal, Old: I->getIterator()); |
966 | } |
967 | break; |
968 | } |
969 | case Intrinsic::ptrmask: { |
970 | unsigned MaskWidth = I->getOperand(i: 1)->getType()->getScalarSizeInBits(); |
971 | RHSKnown = KnownBits(MaskWidth); |
972 | // If either the LHS or the RHS are Zero, the result is zero. |
973 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask, Known&: LHSKnown, Depth: Depth + 1) || |
974 | SimplifyDemandedBits( |
975 | I, OpNo: 1, DemandedMask: (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(width: MaskWidth), |
976 | Known&: RHSKnown, Depth: Depth + 1)) |
977 | return I; |
978 | |
979 | // TODO: Should be 1-extend |
980 | RHSKnown = RHSKnown.anyextOrTrunc(BitWidth); |
981 | assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?" ); |
982 | assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?" ); |
983 | |
984 | Known = LHSKnown & RHSKnown; |
985 | KnownBitsComputed = true; |
986 | |
987 | // If the client is only demanding bits we know to be zero, return |
988 | // `llvm.ptrmask(p, 0)`. We can't return `null` here due to pointer |
989 | // provenance, but making the mask zero will be easily optimizable in |
990 | // the backend. |
991 | if (DemandedMask.isSubsetOf(RHS: Known.Zero) && |
992 | !match(V: I->getOperand(i: 1), P: m_Zero())) |
993 | return replaceOperand( |
994 | I&: *I, OpNum: 1, V: Constant::getNullValue(Ty: I->getOperand(i: 1)->getType())); |
995 | |
996 | // Mask in demanded space does nothing. |
997 | // NOTE: We may have attributes associated with the return value of the |
998 | // llvm.ptrmask intrinsic that will be lost when we just return the |
999 | // operand. We should try to preserve them. |
1000 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.One | LHSKnown.Zero)) |
1001 | return I->getOperand(i: 0); |
1002 | |
1003 | // If the RHS is a constant, see if we can simplify it. |
1004 | if (ShrinkDemandedConstant( |
1005 | I, OpNo: 1, Demanded: (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(width: MaskWidth))) |
1006 | return I; |
1007 | |
1008 | // Combine: |
1009 | // (ptrmask (getelementptr i8, ptr p, imm i), imm mask) |
1010 | // -> (ptrmask (getelementptr i8, ptr p, imm (i & mask)), imm mask) |
1011 | // where only the low bits known to be zero in the pointer are changed |
1012 | Value *InnerPtr; |
1013 | uint64_t GEPIndex; |
1014 | uint64_t PtrMaskImmediate; |
1015 | if (match(I, m_Intrinsic<Intrinsic::ptrmask>( |
1016 | m_PtrAdd(m_Value(InnerPtr), m_ConstantInt(GEPIndex)), |
1017 | m_ConstantInt(PtrMaskImmediate)))) { |
1018 | |
1019 | LHSKnown = computeKnownBits(V: InnerPtr, Depth: Depth + 1, CxtI: I); |
1020 | if (!LHSKnown.isZero()) { |
1021 | const unsigned trailingZeros = LHSKnown.countMinTrailingZeros(); |
1022 | uint64_t PointerAlignBits = (uint64_t(1) << trailingZeros) - 1; |
1023 | |
1024 | uint64_t HighBitsGEPIndex = GEPIndex & ~PointerAlignBits; |
1025 | uint64_t MaskedLowBitsGEPIndex = |
1026 | GEPIndex & PointerAlignBits & PtrMaskImmediate; |
1027 | |
1028 | uint64_t MaskedGEPIndex = HighBitsGEPIndex | MaskedLowBitsGEPIndex; |
1029 | |
1030 | if (MaskedGEPIndex != GEPIndex) { |
1031 | auto *GEP = cast<GetElementPtrInst>(Val: II->getArgOperand(i: 0)); |
1032 | Builder.SetInsertPoint(I); |
1033 | Type *GEPIndexType = |
1034 | DL.getIndexType(PtrTy: GEP->getPointerOperand()->getType()); |
1035 | Value *MaskedGEP = Builder.CreateGEP( |
1036 | Ty: GEP->getSourceElementType(), Ptr: InnerPtr, |
1037 | IdxList: ConstantInt::get(Ty: GEPIndexType, V: MaskedGEPIndex), |
1038 | Name: GEP->getName(), IsInBounds: GEP->isInBounds()); |
1039 | |
1040 | replaceOperand(I&: *I, OpNum: 0, V: MaskedGEP); |
1041 | return I; |
1042 | } |
1043 | } |
1044 | } |
1045 | |
1046 | break; |
1047 | } |
1048 | |
1049 | case Intrinsic::fshr: |
1050 | case Intrinsic::fshl: { |
1051 | const APInt *SA; |
1052 | if (!match(V: I->getOperand(i: 2), P: m_APInt(Res&: SA))) |
1053 | break; |
1054 | |
1055 | // Normalize to funnel shift left. APInt shifts of BitWidth are well- |
1056 | // defined, so no need to special-case zero shifts here. |
1057 | uint64_t ShiftAmt = SA->urem(RHS: BitWidth); |
1058 | if (II->getIntrinsicID() == Intrinsic::fshr) |
1059 | ShiftAmt = BitWidth - ShiftAmt; |
1060 | |
1061 | APInt DemandedMaskLHS(DemandedMask.lshr(shiftAmt: ShiftAmt)); |
1062 | APInt DemandedMaskRHS(DemandedMask.shl(shiftAmt: BitWidth - ShiftAmt)); |
1063 | if (I->getOperand(i: 0) != I->getOperand(i: 1)) { |
1064 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMaskLHS, Known&: LHSKnown, |
1065 | Depth: Depth + 1) || |
1066 | SimplifyDemandedBits(I, OpNo: 1, DemandedMask: DemandedMaskRHS, Known&: RHSKnown, Depth: Depth + 1)) |
1067 | return I; |
1068 | } else { // fshl is a rotate |
1069 | // Avoid converting rotate into funnel shift. |
1070 | // Only simplify if one operand is constant. |
1071 | LHSKnown = computeKnownBits(V: I->getOperand(i: 0), Depth: Depth + 1, CxtI: I); |
1072 | if (DemandedMaskLHS.isSubsetOf(RHS: LHSKnown.Zero | LHSKnown.One) && |
1073 | !match(V: I->getOperand(i: 0), P: m_SpecificInt(V: LHSKnown.One))) { |
1074 | replaceOperand(I&: *I, OpNum: 0, V: Constant::getIntegerValue(Ty: VTy, V: LHSKnown.One)); |
1075 | return I; |
1076 | } |
1077 | |
1078 | RHSKnown = computeKnownBits(V: I->getOperand(i: 1), Depth: Depth + 1, CxtI: I); |
1079 | if (DemandedMaskRHS.isSubsetOf(RHS: RHSKnown.Zero | RHSKnown.One) && |
1080 | !match(V: I->getOperand(i: 1), P: m_SpecificInt(V: RHSKnown.One))) { |
1081 | replaceOperand(I&: *I, OpNum: 1, V: Constant::getIntegerValue(Ty: VTy, V: RHSKnown.One)); |
1082 | return I; |
1083 | } |
1084 | } |
1085 | |
1086 | Known.Zero = LHSKnown.Zero.shl(shiftAmt: ShiftAmt) | |
1087 | RHSKnown.Zero.lshr(shiftAmt: BitWidth - ShiftAmt); |
1088 | Known.One = LHSKnown.One.shl(shiftAmt: ShiftAmt) | |
1089 | RHSKnown.One.lshr(shiftAmt: BitWidth - ShiftAmt); |
1090 | KnownBitsComputed = true; |
1091 | break; |
1092 | } |
1093 | case Intrinsic::umax: { |
1094 | // UMax(A, C) == A if ... |
1095 | // The lowest non-zero bit of DemandMask is higher than the highest |
1096 | // non-zero bit of C. |
1097 | const APInt *C; |
1098 | unsigned CTZ = DemandedMask.countr_zero(); |
1099 | if (match(V: II->getArgOperand(i: 1), P: m_APInt(Res&: C)) && |
1100 | CTZ >= C->getActiveBits()) |
1101 | return II->getArgOperand(i: 0); |
1102 | break; |
1103 | } |
1104 | case Intrinsic::umin: { |
1105 | // UMin(A, C) == A if ... |
1106 | // The lowest non-zero bit of DemandMask is higher than the highest |
1107 | // non-one bit of C. |
1108 | // This comes from using DeMorgans on the above umax example. |
1109 | const APInt *C; |
1110 | unsigned CTZ = DemandedMask.countr_zero(); |
1111 | if (match(V: II->getArgOperand(i: 1), P: m_APInt(Res&: C)) && |
1112 | CTZ >= C->getBitWidth() - C->countl_one()) |
1113 | return II->getArgOperand(i: 0); |
1114 | break; |
1115 | } |
1116 | default: { |
1117 | // Handle target specific intrinsics |
1118 | std::optional<Value *> V = targetSimplifyDemandedUseBitsIntrinsic( |
1119 | II&: *II, DemandedMask, Known, KnownBitsComputed); |
1120 | if (V) |
1121 | return *V; |
1122 | break; |
1123 | } |
1124 | } |
1125 | } |
1126 | |
1127 | if (!KnownBitsComputed) |
1128 | computeKnownBits(V, Known, Depth, CxtI); |
1129 | break; |
1130 | } |
1131 | } |
1132 | |
1133 | if (V->getType()->isPointerTy()) { |
1134 | Align Alignment = V->getPointerAlignment(DL); |
1135 | Known.Zero.setLowBits(Log2(A: Alignment)); |
1136 | } |
1137 | |
1138 | // If the client is only demanding bits that we know, return the known |
1139 | // constant. We can't directly simplify pointers as a constant because of |
1140 | // pointer provenance. |
1141 | // TODO: We could return `(inttoptr const)` for pointers. |
1142 | if (!V->getType()->isPointerTy() && DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One)) |
1143 | return Constant::getIntegerValue(Ty: VTy, V: Known.One); |
1144 | |
1145 | if (VerifyKnownBits) { |
1146 | KnownBits ReferenceKnown = computeKnownBits(V, Depth, CxtI); |
1147 | if (Known != ReferenceKnown) { |
1148 | errs() << "Mismatched known bits for " << *V << " in " |
1149 | << I->getFunction()->getName() << "\n" ; |
1150 | errs() << "computeKnownBits(): " << ReferenceKnown << "\n" ; |
1151 | errs() << "SimplifyDemandedBits(): " << Known << "\n" ; |
1152 | std::abort(); |
1153 | } |
1154 | } |
1155 | |
1156 | return nullptr; |
1157 | } |
1158 | |
1159 | /// Helper routine of SimplifyDemandedUseBits. It computes Known |
1160 | /// bits. It also tries to handle simplifications that can be done based on |
1161 | /// DemandedMask, but without modifying the Instruction. |
1162 | Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits( |
1163 | Instruction *I, const APInt &DemandedMask, KnownBits &Known, unsigned Depth, |
1164 | Instruction *CxtI) { |
1165 | unsigned BitWidth = DemandedMask.getBitWidth(); |
1166 | Type *ITy = I->getType(); |
1167 | |
1168 | KnownBits LHSKnown(BitWidth); |
1169 | KnownBits RHSKnown(BitWidth); |
1170 | |
1171 | // Despite the fact that we can't simplify this instruction in all User's |
1172 | // context, we can at least compute the known bits, and we can |
1173 | // do simplifications that apply to *just* the one user if we know that |
1174 | // this instruction has a simpler value in that context. |
1175 | switch (I->getOpcode()) { |
1176 | case Instruction::And: { |
1177 | computeKnownBits(V: I->getOperand(i: 1), Known&: RHSKnown, Depth: Depth + 1, CxtI); |
1178 | computeKnownBits(V: I->getOperand(i: 0), Known&: LHSKnown, Depth: Depth + 1, CxtI); |
1179 | Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown, |
1180 | Depth, SQ: SQ.getWithInstruction(I: CxtI)); |
1181 | computeKnownBitsFromContext(V: I, Known, Depth, Q: SQ.getWithInstruction(I: CxtI)); |
1182 | |
1183 | // If the client is only demanding bits that we know, return the known |
1184 | // constant. |
1185 | if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One)) |
1186 | return Constant::getIntegerValue(Ty: ITy, V: Known.One); |
1187 | |
1188 | // If all of the demanded bits are known 1 on one side, return the other. |
1189 | // These bits cannot contribute to the result of the 'and' in this context. |
1190 | if (DemandedMask.isSubsetOf(RHS: LHSKnown.Zero | RHSKnown.One)) |
1191 | return I->getOperand(i: 0); |
1192 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero | LHSKnown.One)) |
1193 | return I->getOperand(i: 1); |
1194 | |
1195 | break; |
1196 | } |
1197 | case Instruction::Or: { |
1198 | computeKnownBits(V: I->getOperand(i: 1), Known&: RHSKnown, Depth: Depth + 1, CxtI); |
1199 | computeKnownBits(V: I->getOperand(i: 0), Known&: LHSKnown, Depth: Depth + 1, CxtI); |
1200 | Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown, |
1201 | Depth, SQ: SQ.getWithInstruction(I: CxtI)); |
1202 | computeKnownBitsFromContext(V: I, Known, Depth, Q: SQ.getWithInstruction(I: CxtI)); |
1203 | |
1204 | // If the client is only demanding bits that we know, return the known |
1205 | // constant. |
1206 | if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One)) |
1207 | return Constant::getIntegerValue(Ty: ITy, V: Known.One); |
1208 | |
1209 | // We can simplify (X|Y) -> X or Y in the user's context if we know that |
1210 | // only bits from X or Y are demanded. |
1211 | // If all of the demanded bits are known zero on one side, return the other. |
1212 | // These bits cannot contribute to the result of the 'or' in this context. |
1213 | if (DemandedMask.isSubsetOf(RHS: LHSKnown.One | RHSKnown.Zero)) |
1214 | return I->getOperand(i: 0); |
1215 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.One | LHSKnown.Zero)) |
1216 | return I->getOperand(i: 1); |
1217 | |
1218 | break; |
1219 | } |
1220 | case Instruction::Xor: { |
1221 | computeKnownBits(V: I->getOperand(i: 1), Known&: RHSKnown, Depth: Depth + 1, CxtI); |
1222 | computeKnownBits(V: I->getOperand(i: 0), Known&: LHSKnown, Depth: Depth + 1, CxtI); |
1223 | Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown, |
1224 | Depth, SQ: SQ.getWithInstruction(I: CxtI)); |
1225 | computeKnownBitsFromContext(V: I, Known, Depth, Q: SQ.getWithInstruction(I: CxtI)); |
1226 | |
1227 | // If the client is only demanding bits that we know, return the known |
1228 | // constant. |
1229 | if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One)) |
1230 | return Constant::getIntegerValue(Ty: ITy, V: Known.One); |
1231 | |
1232 | // We can simplify (X^Y) -> X or Y in the user's context if we know that |
1233 | // only bits from X or Y are demanded. |
1234 | // If all of the demanded bits are known zero on one side, return the other. |
1235 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero)) |
1236 | return I->getOperand(i: 0); |
1237 | if (DemandedMask.isSubsetOf(RHS: LHSKnown.Zero)) |
1238 | return I->getOperand(i: 1); |
1239 | |
1240 | break; |
1241 | } |
1242 | case Instruction::Add: { |
1243 | unsigned NLZ = DemandedMask.countl_zero(); |
1244 | APInt DemandedFromOps = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - NLZ); |
1245 | |
1246 | // If an operand adds zeros to every bit below the highest demanded bit, |
1247 | // that operand doesn't change the result. Return the other side. |
1248 | computeKnownBits(V: I->getOperand(i: 1), Known&: RHSKnown, Depth: Depth + 1, CxtI); |
1249 | if (DemandedFromOps.isSubsetOf(RHS: RHSKnown.Zero)) |
1250 | return I->getOperand(i: 0); |
1251 | |
1252 | computeKnownBits(V: I->getOperand(i: 0), Known&: LHSKnown, Depth: Depth + 1, CxtI); |
1253 | if (DemandedFromOps.isSubsetOf(RHS: LHSKnown.Zero)) |
1254 | return I->getOperand(i: 1); |
1255 | |
1256 | bool NSW = cast<OverflowingBinaryOperator>(Val: I)->hasNoSignedWrap(); |
1257 | bool NUW = cast<OverflowingBinaryOperator>(Val: I)->hasNoUnsignedWrap(); |
1258 | Known = |
1259 | KnownBits::computeForAddSub(/*Add=*/true, NSW, NUW, LHS: LHSKnown, RHS: RHSKnown); |
1260 | computeKnownBitsFromContext(V: I, Known, Depth, Q: SQ.getWithInstruction(I: CxtI)); |
1261 | break; |
1262 | } |
1263 | case Instruction::Sub: { |
1264 | unsigned NLZ = DemandedMask.countl_zero(); |
1265 | APInt DemandedFromOps = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - NLZ); |
1266 | |
1267 | // If an operand subtracts zeros from every bit below the highest demanded |
1268 | // bit, that operand doesn't change the result. Return the other side. |
1269 | computeKnownBits(V: I->getOperand(i: 1), Known&: RHSKnown, Depth: Depth + 1, CxtI); |
1270 | if (DemandedFromOps.isSubsetOf(RHS: RHSKnown.Zero)) |
1271 | return I->getOperand(i: 0); |
1272 | |
1273 | bool NSW = cast<OverflowingBinaryOperator>(Val: I)->hasNoSignedWrap(); |
1274 | bool NUW = cast<OverflowingBinaryOperator>(Val: I)->hasNoUnsignedWrap(); |
1275 | computeKnownBits(V: I->getOperand(i: 0), Known&: LHSKnown, Depth: Depth + 1, CxtI); |
1276 | Known = KnownBits::computeForAddSub(/*Add=*/false, NSW, NUW, LHS: LHSKnown, |
1277 | RHS: RHSKnown); |
1278 | computeKnownBitsFromContext(V: I, Known, Depth, Q: SQ.getWithInstruction(I: CxtI)); |
1279 | break; |
1280 | } |
1281 | case Instruction::AShr: { |
1282 | // Compute the Known bits to simplify things downstream. |
1283 | computeKnownBits(V: I, Known, Depth, CxtI); |
1284 | |
1285 | // If this user is only demanding bits that we know, return the known |
1286 | // constant. |
1287 | if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One)) |
1288 | return Constant::getIntegerValue(Ty: ITy, V: Known.One); |
1289 | |
1290 | // If the right shift operand 0 is a result of a left shift by the same |
1291 | // amount, this is probably a zero/sign extension, which may be unnecessary, |
1292 | // if we do not demand any of the new sign bits. So, return the original |
1293 | // operand instead. |
1294 | const APInt *ShiftRC; |
1295 | const APInt *ShiftLC; |
1296 | Value *X; |
1297 | unsigned BitWidth = DemandedMask.getBitWidth(); |
1298 | if (match(V: I, |
1299 | P: m_AShr(L: m_Shl(L: m_Value(V&: X), R: m_APInt(Res&: ShiftLC)), R: m_APInt(Res&: ShiftRC))) && |
1300 | ShiftLC == ShiftRC && ShiftLC->ult(RHS: BitWidth) && |
1301 | DemandedMask.isSubsetOf(RHS: APInt::getLowBitsSet( |
1302 | numBits: BitWidth, loBitsSet: BitWidth - ShiftRC->getZExtValue()))) { |
1303 | return X; |
1304 | } |
1305 | |
1306 | break; |
1307 | } |
1308 | default: |
1309 | // Compute the Known bits to simplify things downstream. |
1310 | computeKnownBits(V: I, Known, Depth, CxtI); |
1311 | |
1312 | // If this user is only demanding bits that we know, return the known |
1313 | // constant. |
1314 | if (DemandedMask.isSubsetOf(RHS: Known.Zero|Known.One)) |
1315 | return Constant::getIntegerValue(Ty: ITy, V: Known.One); |
1316 | |
1317 | break; |
1318 | } |
1319 | |
1320 | return nullptr; |
1321 | } |
1322 | |
1323 | /// Helper routine of SimplifyDemandedUseBits. It tries to simplify |
1324 | /// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into |
1325 | /// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign |
1326 | /// of "C2-C1". |
1327 | /// |
1328 | /// Suppose E1 and E2 are generally different in bits S={bm, bm+1, |
1329 | /// ..., bn}, without considering the specific value X is holding. |
1330 | /// This transformation is legal iff one of following conditions is hold: |
1331 | /// 1) All the bit in S are 0, in this case E1 == E2. |
1332 | /// 2) We don't care those bits in S, per the input DemandedMask. |
1333 | /// 3) Combination of 1) and 2). Some bits in S are 0, and we don't care the |
1334 | /// rest bits. |
1335 | /// |
1336 | /// Currently we only test condition 2). |
1337 | /// |
1338 | /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was |
1339 | /// not successful. |
1340 | Value *InstCombinerImpl::simplifyShrShlDemandedBits( |
1341 | Instruction *Shr, const APInt &ShrOp1, Instruction *Shl, |
1342 | const APInt &ShlOp1, const APInt &DemandedMask, KnownBits &Known) { |
1343 | if (!ShlOp1 || !ShrOp1) |
1344 | return nullptr; // No-op. |
1345 | |
1346 | Value *VarX = Shr->getOperand(i: 0); |
1347 | Type *Ty = VarX->getType(); |
1348 | unsigned BitWidth = Ty->getScalarSizeInBits(); |
1349 | if (ShlOp1.uge(RHS: BitWidth) || ShrOp1.uge(RHS: BitWidth)) |
1350 | return nullptr; // Undef. |
1351 | |
1352 | unsigned ShlAmt = ShlOp1.getZExtValue(); |
1353 | unsigned ShrAmt = ShrOp1.getZExtValue(); |
1354 | |
1355 | Known.One.clearAllBits(); |
1356 | Known.Zero.setLowBits(ShlAmt - 1); |
1357 | Known.Zero &= DemandedMask; |
1358 | |
1359 | APInt BitMask1(APInt::getAllOnes(numBits: BitWidth)); |
1360 | APInt BitMask2(APInt::getAllOnes(numBits: BitWidth)); |
1361 | |
1362 | bool isLshr = (Shr->getOpcode() == Instruction::LShr); |
1363 | BitMask1 = isLshr ? (BitMask1.lshr(shiftAmt: ShrAmt) << ShlAmt) : |
1364 | (BitMask1.ashr(ShiftAmt: ShrAmt) << ShlAmt); |
1365 | |
1366 | if (ShrAmt <= ShlAmt) { |
1367 | BitMask2 <<= (ShlAmt - ShrAmt); |
1368 | } else { |
1369 | BitMask2 = isLshr ? BitMask2.lshr(shiftAmt: ShrAmt - ShlAmt): |
1370 | BitMask2.ashr(ShiftAmt: ShrAmt - ShlAmt); |
1371 | } |
1372 | |
1373 | // Check if condition-2 (see the comment to this function) is satified. |
1374 | if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) { |
1375 | if (ShrAmt == ShlAmt) |
1376 | return VarX; |
1377 | |
1378 | if (!Shr->hasOneUse()) |
1379 | return nullptr; |
1380 | |
1381 | BinaryOperator *New; |
1382 | if (ShrAmt < ShlAmt) { |
1383 | Constant *Amt = ConstantInt::get(Ty: VarX->getType(), V: ShlAmt - ShrAmt); |
1384 | New = BinaryOperator::CreateShl(V1: VarX, V2: Amt); |
1385 | BinaryOperator *Orig = cast<BinaryOperator>(Val: Shl); |
1386 | New->setHasNoSignedWrap(Orig->hasNoSignedWrap()); |
1387 | New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap()); |
1388 | } else { |
1389 | Constant *Amt = ConstantInt::get(Ty: VarX->getType(), V: ShrAmt - ShlAmt); |
1390 | New = isLshr ? BinaryOperator::CreateLShr(V1: VarX, V2: Amt) : |
1391 | BinaryOperator::CreateAShr(V1: VarX, V2: Amt); |
1392 | if (cast<BinaryOperator>(Val: Shr)->isExact()) |
1393 | New->setIsExact(true); |
1394 | } |
1395 | |
1396 | return InsertNewInstWith(New, Old: Shl->getIterator()); |
1397 | } |
1398 | |
1399 | return nullptr; |
1400 | } |
1401 | |
1402 | /// The specified value produces a vector with any number of elements. |
1403 | /// This method analyzes which elements of the operand are poison and |
1404 | /// returns that information in PoisonElts. |
1405 | /// |
1406 | /// DemandedElts contains the set of elements that are actually used by the |
1407 | /// caller, and by default (AllowMultipleUsers equals false) the value is |
1408 | /// simplified only if it has a single caller. If AllowMultipleUsers is set |
1409 | /// to true, DemandedElts refers to the union of sets of elements that are |
1410 | /// used by all callers. |
1411 | /// |
1412 | /// If the information about demanded elements can be used to simplify the |
1413 | /// operation, the operation is simplified, then the resultant value is |
1414 | /// returned. This returns null if no change was made. |
1415 | Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, |
1416 | APInt DemandedElts, |
1417 | APInt &PoisonElts, |
1418 | unsigned Depth, |
1419 | bool AllowMultipleUsers) { |
1420 | // Cannot analyze scalable type. The number of vector elements is not a |
1421 | // compile-time constant. |
1422 | if (isa<ScalableVectorType>(Val: V->getType())) |
1423 | return nullptr; |
1424 | |
1425 | unsigned VWidth = cast<FixedVectorType>(Val: V->getType())->getNumElements(); |
1426 | APInt EltMask(APInt::getAllOnes(numBits: VWidth)); |
1427 | assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!" ); |
1428 | |
1429 | if (match(V, P: m_Poison())) { |
1430 | // If the entire vector is poison, just return this info. |
1431 | PoisonElts = EltMask; |
1432 | return nullptr; |
1433 | } |
1434 | |
1435 | if (DemandedElts.isZero()) { // If nothing is demanded, provide poison. |
1436 | PoisonElts = EltMask; |
1437 | return PoisonValue::get(T: V->getType()); |
1438 | } |
1439 | |
1440 | PoisonElts = 0; |
1441 | |
1442 | if (auto *C = dyn_cast<Constant>(Val: V)) { |
1443 | // Check if this is identity. If so, return 0 since we are not simplifying |
1444 | // anything. |
1445 | if (DemandedElts.isAllOnes()) |
1446 | return nullptr; |
1447 | |
1448 | Type *EltTy = cast<VectorType>(Val: V->getType())->getElementType(); |
1449 | Constant *Poison = PoisonValue::get(T: EltTy); |
1450 | SmallVector<Constant*, 16> Elts; |
1451 | for (unsigned i = 0; i != VWidth; ++i) { |
1452 | if (!DemandedElts[i]) { // If not demanded, set to poison. |
1453 | Elts.push_back(Elt: Poison); |
1454 | PoisonElts.setBit(i); |
1455 | continue; |
1456 | } |
1457 | |
1458 | Constant *Elt = C->getAggregateElement(Elt: i); |
1459 | if (!Elt) return nullptr; |
1460 | |
1461 | Elts.push_back(Elt); |
1462 | if (isa<PoisonValue>(Val: Elt)) // Already poison. |
1463 | PoisonElts.setBit(i); |
1464 | } |
1465 | |
1466 | // If we changed the constant, return it. |
1467 | Constant *NewCV = ConstantVector::get(V: Elts); |
1468 | return NewCV != C ? NewCV : nullptr; |
1469 | } |
1470 | |
1471 | // Limit search depth. |
1472 | if (Depth == 10) |
1473 | return nullptr; |
1474 | |
1475 | if (!AllowMultipleUsers) { |
1476 | // If multiple users are using the root value, proceed with |
1477 | // simplification conservatively assuming that all elements |
1478 | // are needed. |
1479 | if (!V->hasOneUse()) { |
1480 | // Quit if we find multiple users of a non-root value though. |
1481 | // They'll be handled when it's their turn to be visited by |
1482 | // the main instcombine process. |
1483 | if (Depth != 0) |
1484 | // TODO: Just compute the PoisonElts information recursively. |
1485 | return nullptr; |
1486 | |
1487 | // Conservatively assume that all elements are needed. |
1488 | DemandedElts = EltMask; |
1489 | } |
1490 | } |
1491 | |
1492 | Instruction *I = dyn_cast<Instruction>(Val: V); |
1493 | if (!I) return nullptr; // Only analyze instructions. |
1494 | |
1495 | bool MadeChange = false; |
1496 | auto simplifyAndSetOp = [&](Instruction *Inst, unsigned OpNum, |
1497 | APInt Demanded, APInt &Undef) { |
1498 | auto *II = dyn_cast<IntrinsicInst>(Val: Inst); |
1499 | Value *Op = II ? II->getArgOperand(i: OpNum) : Inst->getOperand(i: OpNum); |
1500 | if (Value *V = SimplifyDemandedVectorElts(V: Op, DemandedElts: Demanded, PoisonElts&: Undef, Depth: Depth + 1)) { |
1501 | replaceOperand(I&: *Inst, OpNum, V); |
1502 | MadeChange = true; |
1503 | } |
1504 | }; |
1505 | |
1506 | APInt PoisonElts2(VWidth, 0); |
1507 | APInt PoisonElts3(VWidth, 0); |
1508 | switch (I->getOpcode()) { |
1509 | default: break; |
1510 | |
1511 | case Instruction::GetElementPtr: { |
1512 | // The LangRef requires that struct geps have all constant indices. As |
1513 | // such, we can't convert any operand to partial undef. |
1514 | auto mayIndexStructType = [](GetElementPtrInst &GEP) { |
1515 | for (auto I = gep_type_begin(GEP), E = gep_type_end(GEP); |
1516 | I != E; I++) |
1517 | if (I.isStruct()) |
1518 | return true; |
1519 | return false; |
1520 | }; |
1521 | if (mayIndexStructType(cast<GetElementPtrInst>(Val&: *I))) |
1522 | break; |
1523 | |
1524 | // Conservatively track the demanded elements back through any vector |
1525 | // operands we may have. We know there must be at least one, or we |
1526 | // wouldn't have a vector result to get here. Note that we intentionally |
1527 | // merge the undef bits here since gepping with either an poison base or |
1528 | // index results in poison. |
1529 | for (unsigned i = 0; i < I->getNumOperands(); i++) { |
1530 | if (i == 0 ? match(V: I->getOperand(i), P: m_Undef()) |
1531 | : match(V: I->getOperand(i), P: m_Poison())) { |
1532 | // If the entire vector is undefined, just return this info. |
1533 | PoisonElts = EltMask; |
1534 | return nullptr; |
1535 | } |
1536 | if (I->getOperand(i)->getType()->isVectorTy()) { |
1537 | APInt PoisonEltsOp(VWidth, 0); |
1538 | simplifyAndSetOp(I, i, DemandedElts, PoisonEltsOp); |
1539 | // gep(x, undef) is not undef, so skip considering idx ops here |
1540 | // Note that we could propagate poison, but we can't distinguish between |
1541 | // undef & poison bits ATM |
1542 | if (i == 0) |
1543 | PoisonElts |= PoisonEltsOp; |
1544 | } |
1545 | } |
1546 | |
1547 | break; |
1548 | } |
1549 | case Instruction::InsertElement: { |
1550 | // If this is a variable index, we don't know which element it overwrites. |
1551 | // demand exactly the same input as we produce. |
1552 | ConstantInt *Idx = dyn_cast<ConstantInt>(Val: I->getOperand(i: 2)); |
1553 | if (!Idx) { |
1554 | // Note that we can't propagate undef elt info, because we don't know |
1555 | // which elt is getting updated. |
1556 | simplifyAndSetOp(I, 0, DemandedElts, PoisonElts2); |
1557 | break; |
1558 | } |
1559 | |
1560 | // The element inserted overwrites whatever was there, so the input demanded |
1561 | // set is simpler than the output set. |
1562 | unsigned IdxNo = Idx->getZExtValue(); |
1563 | APInt PreInsertDemandedElts = DemandedElts; |
1564 | if (IdxNo < VWidth) |
1565 | PreInsertDemandedElts.clearBit(BitPosition: IdxNo); |
1566 | |
1567 | // If we only demand the element that is being inserted and that element |
1568 | // was extracted from the same index in another vector with the same type, |
1569 | // replace this insert with that other vector. |
1570 | // Note: This is attempted before the call to simplifyAndSetOp because that |
1571 | // may change PoisonElts to a value that does not match with Vec. |
1572 | Value *Vec; |
1573 | if (PreInsertDemandedElts == 0 && |
1574 | match(V: I->getOperand(i: 1), |
1575 | P: m_ExtractElt(Val: m_Value(V&: Vec), Idx: m_SpecificInt(V: IdxNo))) && |
1576 | Vec->getType() == I->getType()) { |
1577 | return Vec; |
1578 | } |
1579 | |
1580 | simplifyAndSetOp(I, 0, PreInsertDemandedElts, PoisonElts); |
1581 | |
1582 | // If this is inserting an element that isn't demanded, remove this |
1583 | // insertelement. |
1584 | if (IdxNo >= VWidth || !DemandedElts[IdxNo]) { |
1585 | Worklist.push(I); |
1586 | return I->getOperand(i: 0); |
1587 | } |
1588 | |
1589 | // The inserted element is defined. |
1590 | PoisonElts.clearBit(BitPosition: IdxNo); |
1591 | break; |
1592 | } |
1593 | case Instruction::ShuffleVector: { |
1594 | auto *Shuffle = cast<ShuffleVectorInst>(Val: I); |
1595 | assert(Shuffle->getOperand(0)->getType() == |
1596 | Shuffle->getOperand(1)->getType() && |
1597 | "Expected shuffle operands to have same type" ); |
1598 | unsigned OpWidth = cast<FixedVectorType>(Val: Shuffle->getOperand(i_nocapture: 0)->getType()) |
1599 | ->getNumElements(); |
1600 | // Handle trivial case of a splat. Only check the first element of LHS |
1601 | // operand. |
1602 | if (all_of(Range: Shuffle->getShuffleMask(), P: [](int Elt) { return Elt == 0; }) && |
1603 | DemandedElts.isAllOnes()) { |
1604 | if (!isa<PoisonValue>(Val: I->getOperand(i: 1))) { |
1605 | I->setOperand(i: 1, Val: PoisonValue::get(T: I->getOperand(i: 1)->getType())); |
1606 | MadeChange = true; |
1607 | } |
1608 | APInt LeftDemanded(OpWidth, 1); |
1609 | APInt LHSPoisonElts(OpWidth, 0); |
1610 | simplifyAndSetOp(I, 0, LeftDemanded, LHSPoisonElts); |
1611 | if (LHSPoisonElts[0]) |
1612 | PoisonElts = EltMask; |
1613 | else |
1614 | PoisonElts.clearAllBits(); |
1615 | break; |
1616 | } |
1617 | |
1618 | APInt LeftDemanded(OpWidth, 0), RightDemanded(OpWidth, 0); |
1619 | for (unsigned i = 0; i < VWidth; i++) { |
1620 | if (DemandedElts[i]) { |
1621 | unsigned MaskVal = Shuffle->getMaskValue(Elt: i); |
1622 | if (MaskVal != -1u) { |
1623 | assert(MaskVal < OpWidth * 2 && |
1624 | "shufflevector mask index out of range!" ); |
1625 | if (MaskVal < OpWidth) |
1626 | LeftDemanded.setBit(MaskVal); |
1627 | else |
1628 | RightDemanded.setBit(MaskVal - OpWidth); |
1629 | } |
1630 | } |
1631 | } |
1632 | |
1633 | APInt LHSPoisonElts(OpWidth, 0); |
1634 | simplifyAndSetOp(I, 0, LeftDemanded, LHSPoisonElts); |
1635 | |
1636 | APInt RHSPoisonElts(OpWidth, 0); |
1637 | simplifyAndSetOp(I, 1, RightDemanded, RHSPoisonElts); |
1638 | |
1639 | // If this shuffle does not change the vector length and the elements |
1640 | // demanded by this shuffle are an identity mask, then this shuffle is |
1641 | // unnecessary. |
1642 | // |
1643 | // We are assuming canonical form for the mask, so the source vector is |
1644 | // operand 0 and operand 1 is not used. |
1645 | // |
1646 | // Note that if an element is demanded and this shuffle mask is undefined |
1647 | // for that element, then the shuffle is not considered an identity |
1648 | // operation. The shuffle prevents poison from the operand vector from |
1649 | // leaking to the result by replacing poison with an undefined value. |
1650 | if (VWidth == OpWidth) { |
1651 | bool IsIdentityShuffle = true; |
1652 | for (unsigned i = 0; i < VWidth; i++) { |
1653 | unsigned MaskVal = Shuffle->getMaskValue(Elt: i); |
1654 | if (DemandedElts[i] && i != MaskVal) { |
1655 | IsIdentityShuffle = false; |
1656 | break; |
1657 | } |
1658 | } |
1659 | if (IsIdentityShuffle) |
1660 | return Shuffle->getOperand(i_nocapture: 0); |
1661 | } |
1662 | |
1663 | bool NewPoisonElts = false; |
1664 | unsigned LHSIdx = -1u, LHSValIdx = -1u; |
1665 | unsigned RHSIdx = -1u, RHSValIdx = -1u; |
1666 | bool LHSUniform = true; |
1667 | bool RHSUniform = true; |
1668 | for (unsigned i = 0; i < VWidth; i++) { |
1669 | unsigned MaskVal = Shuffle->getMaskValue(Elt: i); |
1670 | if (MaskVal == -1u) { |
1671 | PoisonElts.setBit(i); |
1672 | } else if (!DemandedElts[i]) { |
1673 | NewPoisonElts = true; |
1674 | PoisonElts.setBit(i); |
1675 | } else if (MaskVal < OpWidth) { |
1676 | if (LHSPoisonElts[MaskVal]) { |
1677 | NewPoisonElts = true; |
1678 | PoisonElts.setBit(i); |
1679 | } else { |
1680 | LHSIdx = LHSIdx == -1u ? i : OpWidth; |
1681 | LHSValIdx = LHSValIdx == -1u ? MaskVal : OpWidth; |
1682 | LHSUniform = LHSUniform && (MaskVal == i); |
1683 | } |
1684 | } else { |
1685 | if (RHSPoisonElts[MaskVal - OpWidth]) { |
1686 | NewPoisonElts = true; |
1687 | PoisonElts.setBit(i); |
1688 | } else { |
1689 | RHSIdx = RHSIdx == -1u ? i : OpWidth; |
1690 | RHSValIdx = RHSValIdx == -1u ? MaskVal - OpWidth : OpWidth; |
1691 | RHSUniform = RHSUniform && (MaskVal - OpWidth == i); |
1692 | } |
1693 | } |
1694 | } |
1695 | |
1696 | // Try to transform shuffle with constant vector and single element from |
1697 | // this constant vector to single insertelement instruction. |
1698 | // shufflevector V, C, <v1, v2, .., ci, .., vm> -> |
1699 | // insertelement V, C[ci], ci-n |
1700 | if (OpWidth == |
1701 | cast<FixedVectorType>(Val: Shuffle->getType())->getNumElements()) { |
1702 | Value *Op = nullptr; |
1703 | Constant *Value = nullptr; |
1704 | unsigned Idx = -1u; |
1705 | |
1706 | // Find constant vector with the single element in shuffle (LHS or RHS). |
1707 | if (LHSIdx < OpWidth && RHSUniform) { |
1708 | if (auto *CV = dyn_cast<ConstantVector>(Val: Shuffle->getOperand(i_nocapture: 0))) { |
1709 | Op = Shuffle->getOperand(i_nocapture: 1); |
1710 | Value = CV->getOperand(i_nocapture: LHSValIdx); |
1711 | Idx = LHSIdx; |
1712 | } |
1713 | } |
1714 | if (RHSIdx < OpWidth && LHSUniform) { |
1715 | if (auto *CV = dyn_cast<ConstantVector>(Val: Shuffle->getOperand(i_nocapture: 1))) { |
1716 | Op = Shuffle->getOperand(i_nocapture: 0); |
1717 | Value = CV->getOperand(i_nocapture: RHSValIdx); |
1718 | Idx = RHSIdx; |
1719 | } |
1720 | } |
1721 | // Found constant vector with single element - convert to insertelement. |
1722 | if (Op && Value) { |
1723 | Instruction *New = InsertElementInst::Create( |
1724 | Vec: Op, NewElt: Value, Idx: ConstantInt::get(Ty: Type::getInt64Ty(C&: I->getContext()), V: Idx), |
1725 | NameStr: Shuffle->getName()); |
1726 | InsertNewInstWith(New, Old: Shuffle->getIterator()); |
1727 | return New; |
1728 | } |
1729 | } |
1730 | if (NewPoisonElts) { |
1731 | // Add additional discovered undefs. |
1732 | SmallVector<int, 16> Elts; |
1733 | for (unsigned i = 0; i < VWidth; ++i) { |
1734 | if (PoisonElts[i]) |
1735 | Elts.push_back(Elt: PoisonMaskElem); |
1736 | else |
1737 | Elts.push_back(Elt: Shuffle->getMaskValue(Elt: i)); |
1738 | } |
1739 | Shuffle->setShuffleMask(Elts); |
1740 | MadeChange = true; |
1741 | } |
1742 | break; |
1743 | } |
1744 | case Instruction::Select: { |
1745 | // If this is a vector select, try to transform the select condition based |
1746 | // on the current demanded elements. |
1747 | SelectInst *Sel = cast<SelectInst>(Val: I); |
1748 | if (Sel->getCondition()->getType()->isVectorTy()) { |
1749 | // TODO: We are not doing anything with PoisonElts based on this call. |
1750 | // It is overwritten below based on the other select operands. If an |
1751 | // element of the select condition is known undef, then we are free to |
1752 | // choose the output value from either arm of the select. If we know that |
1753 | // one of those values is undef, then the output can be undef. |
1754 | simplifyAndSetOp(I, 0, DemandedElts, PoisonElts); |
1755 | } |
1756 | |
1757 | // Next, see if we can transform the arms of the select. |
1758 | APInt DemandedLHS(DemandedElts), DemandedRHS(DemandedElts); |
1759 | if (auto *CV = dyn_cast<ConstantVector>(Val: Sel->getCondition())) { |
1760 | for (unsigned i = 0; i < VWidth; i++) { |
1761 | // isNullValue() always returns false when called on a ConstantExpr. |
1762 | // Skip constant expressions to avoid propagating incorrect information. |
1763 | Constant *CElt = CV->getAggregateElement(Elt: i); |
1764 | if (isa<ConstantExpr>(Val: CElt)) |
1765 | continue; |
1766 | // TODO: If a select condition element is undef, we can demand from |
1767 | // either side. If one side is known undef, choosing that side would |
1768 | // propagate undef. |
1769 | if (CElt->isNullValue()) |
1770 | DemandedLHS.clearBit(BitPosition: i); |
1771 | else |
1772 | DemandedRHS.clearBit(BitPosition: i); |
1773 | } |
1774 | } |
1775 | |
1776 | simplifyAndSetOp(I, 1, DemandedLHS, PoisonElts2); |
1777 | simplifyAndSetOp(I, 2, DemandedRHS, PoisonElts3); |
1778 | |
1779 | // Output elements are undefined if the element from each arm is undefined. |
1780 | // TODO: This can be improved. See comment in select condition handling. |
1781 | PoisonElts = PoisonElts2 & PoisonElts3; |
1782 | break; |
1783 | } |
1784 | case Instruction::BitCast: { |
1785 | // Vector->vector casts only. |
1786 | VectorType *VTy = dyn_cast<VectorType>(Val: I->getOperand(i: 0)->getType()); |
1787 | if (!VTy) break; |
1788 | unsigned InVWidth = cast<FixedVectorType>(Val: VTy)->getNumElements(); |
1789 | APInt InputDemandedElts(InVWidth, 0); |
1790 | PoisonElts2 = APInt(InVWidth, 0); |
1791 | unsigned Ratio; |
1792 | |
1793 | if (VWidth == InVWidth) { |
1794 | // If we are converting from <4 x i32> -> <4 x f32>, we demand the same |
1795 | // elements as are demanded of us. |
1796 | Ratio = 1; |
1797 | InputDemandedElts = DemandedElts; |
1798 | } else if ((VWidth % InVWidth) == 0) { |
1799 | // If the number of elements in the output is a multiple of the number of |
1800 | // elements in the input then an input element is live if any of the |
1801 | // corresponding output elements are live. |
1802 | Ratio = VWidth / InVWidth; |
1803 | for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) |
1804 | if (DemandedElts[OutIdx]) |
1805 | InputDemandedElts.setBit(OutIdx / Ratio); |
1806 | } else if ((InVWidth % VWidth) == 0) { |
1807 | // If the number of elements in the input is a multiple of the number of |
1808 | // elements in the output then an input element is live if the |
1809 | // corresponding output element is live. |
1810 | Ratio = InVWidth / VWidth; |
1811 | for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) |
1812 | if (DemandedElts[InIdx / Ratio]) |
1813 | InputDemandedElts.setBit(InIdx); |
1814 | } else { |
1815 | // Unsupported so far. |
1816 | break; |
1817 | } |
1818 | |
1819 | simplifyAndSetOp(I, 0, InputDemandedElts, PoisonElts2); |
1820 | |
1821 | if (VWidth == InVWidth) { |
1822 | PoisonElts = PoisonElts2; |
1823 | } else if ((VWidth % InVWidth) == 0) { |
1824 | // If the number of elements in the output is a multiple of the number of |
1825 | // elements in the input then an output element is undef if the |
1826 | // corresponding input element is undef. |
1827 | for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) |
1828 | if (PoisonElts2[OutIdx / Ratio]) |
1829 | PoisonElts.setBit(OutIdx); |
1830 | } else if ((InVWidth % VWidth) == 0) { |
1831 | // If the number of elements in the input is a multiple of the number of |
1832 | // elements in the output then an output element is undef if all of the |
1833 | // corresponding input elements are undef. |
1834 | for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) { |
1835 | APInt SubUndef = PoisonElts2.lshr(shiftAmt: OutIdx * Ratio).zextOrTrunc(width: Ratio); |
1836 | if (SubUndef.popcount() == Ratio) |
1837 | PoisonElts.setBit(OutIdx); |
1838 | } |
1839 | } else { |
1840 | llvm_unreachable("Unimp" ); |
1841 | } |
1842 | break; |
1843 | } |
1844 | case Instruction::FPTrunc: |
1845 | case Instruction::FPExt: |
1846 | simplifyAndSetOp(I, 0, DemandedElts, PoisonElts); |
1847 | break; |
1848 | |
1849 | case Instruction::Call: { |
1850 | IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I); |
1851 | if (!II) break; |
1852 | switch (II->getIntrinsicID()) { |
1853 | case Intrinsic::masked_gather: // fallthrough |
1854 | case Intrinsic::masked_load: { |
1855 | // Subtlety: If we load from a pointer, the pointer must be valid |
1856 | // regardless of whether the element is demanded. Doing otherwise risks |
1857 | // segfaults which didn't exist in the original program. |
1858 | APInt DemandedPtrs(APInt::getAllOnes(numBits: VWidth)), |
1859 | DemandedPassThrough(DemandedElts); |
1860 | if (auto *CV = dyn_cast<ConstantVector>(Val: II->getOperand(i_nocapture: 2))) |
1861 | for (unsigned i = 0; i < VWidth; i++) { |
1862 | Constant *CElt = CV->getAggregateElement(Elt: i); |
1863 | if (CElt->isNullValue()) |
1864 | DemandedPtrs.clearBit(BitPosition: i); |
1865 | else if (CElt->isAllOnesValue()) |
1866 | DemandedPassThrough.clearBit(BitPosition: i); |
1867 | } |
1868 | if (II->getIntrinsicID() == Intrinsic::masked_gather) |
1869 | simplifyAndSetOp(II, 0, DemandedPtrs, PoisonElts2); |
1870 | simplifyAndSetOp(II, 3, DemandedPassThrough, PoisonElts3); |
1871 | |
1872 | // Output elements are undefined if the element from both sources are. |
1873 | // TODO: can strengthen via mask as well. |
1874 | PoisonElts = PoisonElts2 & PoisonElts3; |
1875 | break; |
1876 | } |
1877 | default: { |
1878 | // Handle target specific intrinsics |
1879 | std::optional<Value *> V = targetSimplifyDemandedVectorEltsIntrinsic( |
1880 | II&: *II, DemandedElts, UndefElts&: PoisonElts, UndefElts2&: PoisonElts2, UndefElts3&: PoisonElts3, |
1881 | SimplifyAndSetOp: simplifyAndSetOp); |
1882 | if (V) |
1883 | return *V; |
1884 | break; |
1885 | } |
1886 | } // switch on IntrinsicID |
1887 | break; |
1888 | } // case Call |
1889 | } // switch on Opcode |
1890 | |
1891 | // TODO: We bail completely on integer div/rem and shifts because they have |
1892 | // UB/poison potential, but that should be refined. |
1893 | BinaryOperator *BO; |
1894 | if (match(V: I, P: m_BinOp(I&: BO)) && !BO->isIntDivRem() && !BO->isShift()) { |
1895 | Value *X = BO->getOperand(i_nocapture: 0); |
1896 | Value *Y = BO->getOperand(i_nocapture: 1); |
1897 | |
1898 | // Look for an equivalent binop except that one operand has been shuffled. |
1899 | // If the demand for this binop only includes elements that are the same as |
1900 | // the other binop, then we may be able to replace this binop with a use of |
1901 | // the earlier one. |
1902 | // |
1903 | // Example: |
1904 | // %other_bo = bo (shuf X, {0}), Y |
1905 | // %this_extracted_bo = extelt (bo X, Y), 0 |
1906 | // --> |
1907 | // %other_bo = bo (shuf X, {0}), Y |
1908 | // %this_extracted_bo = extelt %other_bo, 0 |
1909 | // |
1910 | // TODO: Handle demand of an arbitrary single element or more than one |
1911 | // element instead of just element 0. |
1912 | // TODO: Unlike general demanded elements transforms, this should be safe |
1913 | // for any (div/rem/shift) opcode too. |
1914 | if (DemandedElts == 1 && !X->hasOneUse() && !Y->hasOneUse() && |
1915 | BO->hasOneUse() ) { |
1916 | |
1917 | auto findShufBO = [&](bool MatchShufAsOp0) -> User * { |
1918 | // Try to use shuffle-of-operand in place of an operand: |
1919 | // bo X, Y --> bo (shuf X), Y |
1920 | // bo X, Y --> bo X, (shuf Y) |
1921 | BinaryOperator::BinaryOps Opcode = BO->getOpcode(); |
1922 | Value *ShufOp = MatchShufAsOp0 ? X : Y; |
1923 | Value *OtherOp = MatchShufAsOp0 ? Y : X; |
1924 | for (User *U : OtherOp->users()) { |
1925 | ArrayRef<int> Mask; |
1926 | auto Shuf = m_Shuffle(v1: m_Specific(V: ShufOp), v2: m_Value(), mask: m_Mask(Mask)); |
1927 | if (BO->isCommutative() |
1928 | ? match(V: U, P: m_c_BinOp(Opcode, L: Shuf, R: m_Specific(V: OtherOp))) |
1929 | : MatchShufAsOp0 |
1930 | ? match(V: U, P: m_BinOp(Opcode, L: Shuf, R: m_Specific(V: OtherOp))) |
1931 | : match(V: U, P: m_BinOp(Opcode, L: m_Specific(V: OtherOp), R: Shuf))) |
1932 | if (match(Mask, P: m_ZeroMask()) && Mask[0] != PoisonMaskElem) |
1933 | if (DT.dominates(Def: U, User: I)) |
1934 | return U; |
1935 | } |
1936 | return nullptr; |
1937 | }; |
1938 | |
1939 | if (User *ShufBO = findShufBO(/* MatchShufAsOp0 */ true)) |
1940 | return ShufBO; |
1941 | if (User *ShufBO = findShufBO(/* MatchShufAsOp0 */ false)) |
1942 | return ShufBO; |
1943 | } |
1944 | |
1945 | simplifyAndSetOp(I, 0, DemandedElts, PoisonElts); |
1946 | simplifyAndSetOp(I, 1, DemandedElts, PoisonElts2); |
1947 | |
1948 | // Output elements are undefined if both are undefined. Consider things |
1949 | // like undef & 0. The result is known zero, not undef. |
1950 | PoisonElts &= PoisonElts2; |
1951 | } |
1952 | |
1953 | // If we've proven all of the lanes poison, return a poison value. |
1954 | // TODO: Intersect w/demanded lanes |
1955 | if (PoisonElts.isAllOnes()) |
1956 | return PoisonValue::get(T: I->getType()); |
1957 | |
1958 | return MadeChange ? I : nullptr; |
1959 | } |
1960 | |
1961 | /// For floating-point classes that resolve to a single bit pattern, return that |
1962 | /// value. |
1963 | static Constant *getFPClassConstant(Type *Ty, FPClassTest Mask) { |
1964 | switch (Mask) { |
1965 | case fcPosZero: |
1966 | return ConstantFP::getZero(Ty); |
1967 | case fcNegZero: |
1968 | return ConstantFP::getZero(Ty, Negative: true); |
1969 | case fcPosInf: |
1970 | return ConstantFP::getInfinity(Ty); |
1971 | case fcNegInf: |
1972 | return ConstantFP::getInfinity(Ty, Negative: true); |
1973 | case fcNone: |
1974 | return PoisonValue::get(T: Ty); |
1975 | default: |
1976 | return nullptr; |
1977 | } |
1978 | } |
1979 | |
1980 | Value *InstCombinerImpl::SimplifyDemandedUseFPClass( |
1981 | Value *V, const FPClassTest DemandedMask, KnownFPClass &Known, |
1982 | unsigned Depth, Instruction *CxtI) { |
1983 | assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth" ); |
1984 | Type *VTy = V->getType(); |
1985 | |
1986 | assert(Known == KnownFPClass() && "expected uninitialized state" ); |
1987 | |
1988 | if (DemandedMask == fcNone) |
1989 | return isa<UndefValue>(Val: V) ? nullptr : PoisonValue::get(T: VTy); |
1990 | |
1991 | if (Depth == MaxAnalysisRecursionDepth) |
1992 | return nullptr; |
1993 | |
1994 | Instruction *I = dyn_cast<Instruction>(Val: V); |
1995 | if (!I) { |
1996 | // Handle constants and arguments |
1997 | Known = computeKnownFPClass(Val: V, Interested: fcAllFlags, CtxI: CxtI, Depth: Depth + 1); |
1998 | Value *FoldedToConst = |
1999 | getFPClassConstant(Ty: VTy, Mask: DemandedMask & Known.KnownFPClasses); |
2000 | return FoldedToConst == V ? nullptr : FoldedToConst; |
2001 | } |
2002 | |
2003 | if (!I->hasOneUse()) |
2004 | return nullptr; |
2005 | |
2006 | // TODO: Should account for nofpclass/FastMathFlags on current instruction |
2007 | switch (I->getOpcode()) { |
2008 | case Instruction::FNeg: { |
2009 | if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: llvm::fneg(Mask: DemandedMask), Known, |
2010 | Depth: Depth + 1)) |
2011 | return I; |
2012 | Known.fneg(); |
2013 | break; |
2014 | } |
2015 | case Instruction::Call: { |
2016 | CallInst *CI = cast<CallInst>(Val: I); |
2017 | switch (CI->getIntrinsicID()) { |
2018 | case Intrinsic::fabs: |
2019 | if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: llvm::inverse_fabs(Mask: DemandedMask), Known, |
2020 | Depth: Depth + 1)) |
2021 | return I; |
2022 | Known.fabs(); |
2023 | break; |
2024 | case Intrinsic::arithmetic_fence: |
2025 | if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask, Known, Depth: Depth + 1)) |
2026 | return I; |
2027 | break; |
2028 | case Intrinsic::copysign: { |
2029 | // Flip on more potentially demanded classes |
2030 | const FPClassTest DemandedMaskAnySign = llvm::unknown_sign(Mask: DemandedMask); |
2031 | if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: DemandedMaskAnySign, Known, Depth: Depth + 1)) |
2032 | return I; |
2033 | |
2034 | if ((DemandedMask & fcPositive) == fcNone) { |
2035 | // Roundabout way of replacing with fneg(fabs) |
2036 | I->setOperand(i: 1, Val: ConstantFP::get(Ty: VTy, V: -1.0)); |
2037 | return I; |
2038 | } |
2039 | |
2040 | if ((DemandedMask & fcNegative) == fcNone) { |
2041 | // Roundabout way of replacing with fabs |
2042 | I->setOperand(i: 1, Val: ConstantFP::getZero(Ty: VTy)); |
2043 | return I; |
2044 | } |
2045 | |
2046 | KnownFPClass KnownSign = |
2047 | computeKnownFPClass(Val: I->getOperand(i: 1), Interested: fcAllFlags, CtxI: CxtI, Depth: Depth + 1); |
2048 | Known.copysign(Sign: KnownSign); |
2049 | break; |
2050 | } |
2051 | default: |
2052 | Known = computeKnownFPClass(Val: I, Interested: ~DemandedMask, CtxI: CxtI, Depth: Depth + 1); |
2053 | break; |
2054 | } |
2055 | |
2056 | break; |
2057 | } |
2058 | case Instruction::Select: { |
2059 | KnownFPClass KnownLHS, KnownRHS; |
2060 | if (SimplifyDemandedFPClass(I, Op: 2, DemandedMask, Known&: KnownRHS, Depth: Depth + 1) || |
2061 | SimplifyDemandedFPClass(I, Op: 1, DemandedMask, Known&: KnownLHS, Depth: Depth + 1)) |
2062 | return I; |
2063 | |
2064 | if (KnownLHS.isKnownNever(Mask: DemandedMask)) |
2065 | return I->getOperand(i: 2); |
2066 | if (KnownRHS.isKnownNever(Mask: DemandedMask)) |
2067 | return I->getOperand(i: 1); |
2068 | |
2069 | // TODO: Recognize clamping patterns |
2070 | Known = KnownLHS | KnownRHS; |
2071 | break; |
2072 | } |
2073 | default: |
2074 | Known = computeKnownFPClass(Val: I, Interested: ~DemandedMask, CtxI: CxtI, Depth: Depth + 1); |
2075 | break; |
2076 | } |
2077 | |
2078 | return getFPClassConstant(Ty: VTy, Mask: DemandedMask & Known.KnownFPClasses); |
2079 | } |
2080 | |
2081 | bool InstCombinerImpl::SimplifyDemandedFPClass(Instruction *I, unsigned OpNo, |
2082 | FPClassTest DemandedMask, |
2083 | KnownFPClass &Known, |
2084 | unsigned Depth) { |
2085 | Use &U = I->getOperandUse(i: OpNo); |
2086 | Value *NewVal = |
2087 | SimplifyDemandedUseFPClass(V: U.get(), DemandedMask, Known, Depth, CxtI: I); |
2088 | if (!NewVal) |
2089 | return false; |
2090 | if (Instruction *OpInst = dyn_cast<Instruction>(Val&: U)) |
2091 | salvageDebugInfo(I&: *OpInst); |
2092 | |
2093 | replaceUse(U, NewValue: NewVal); |
2094 | return true; |
2095 | } |
2096 | |