1 | //===- InstCombineAndOrXor.cpp --------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the visitAnd, visitOr, and visitXor functions. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "InstCombineInternal.h" |
14 | #include "llvm/Analysis/CmpInstAnalysis.h" |
15 | #include "llvm/Analysis/InstructionSimplify.h" |
16 | #include "llvm/IR/ConstantRange.h" |
17 | #include "llvm/IR/Intrinsics.h" |
18 | #include "llvm/IR/PatternMatch.h" |
19 | #include "llvm/Transforms/InstCombine/InstCombiner.h" |
20 | #include "llvm/Transforms/Utils/Local.h" |
21 | |
22 | using namespace llvm; |
23 | using namespace PatternMatch; |
24 | |
25 | #define DEBUG_TYPE "instcombine" |
26 | |
27 | /// This is the complement of getICmpCode, which turns an opcode and two |
28 | /// operands into either a constant true or false, or a brand new ICmp |
29 | /// instruction. The sign is passed in to determine which kind of predicate to |
30 | /// use in the new icmp instruction. |
31 | static Value *getNewICmpValue(unsigned Code, bool Sign, Value *LHS, Value *RHS, |
32 | InstCombiner::BuilderTy &Builder) { |
33 | ICmpInst::Predicate NewPred; |
34 | if (Constant *TorF = getPredForICmpCode(Code, Sign, OpTy: LHS->getType(), Pred&: NewPred)) |
35 | return TorF; |
36 | return Builder.CreateICmp(P: NewPred, LHS, RHS); |
37 | } |
38 | |
39 | /// This is the complement of getFCmpCode, which turns an opcode and two |
40 | /// operands into either a FCmp instruction, or a true/false constant. |
41 | static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS, |
42 | InstCombiner::BuilderTy &Builder) { |
43 | FCmpInst::Predicate NewPred; |
44 | if (Constant *TorF = getPredForFCmpCode(Code, OpTy: LHS->getType(), Pred&: NewPred)) |
45 | return TorF; |
46 | return Builder.CreateFCmp(P: NewPred, LHS, RHS); |
47 | } |
48 | |
49 | /// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise |
50 | /// (V < Lo || V >= Hi). This method expects that Lo < Hi. IsSigned indicates |
51 | /// whether to treat V, Lo, and Hi as signed or not. |
52 | Value *InstCombinerImpl::insertRangeTest(Value *V, const APInt &Lo, |
53 | const APInt &Hi, bool isSigned, |
54 | bool Inside) { |
55 | assert((isSigned ? Lo.slt(Hi) : Lo.ult(Hi)) && |
56 | "Lo is not < Hi in range emission code!" ); |
57 | |
58 | Type *Ty = V->getType(); |
59 | |
60 | // V >= Min && V < Hi --> V < Hi |
61 | // V < Min || V >= Hi --> V >= Hi |
62 | ICmpInst::Predicate Pred = Inside ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE; |
63 | if (isSigned ? Lo.isMinSignedValue() : Lo.isMinValue()) { |
64 | Pred = isSigned ? ICmpInst::getSignedPredicate(pred: Pred) : Pred; |
65 | return Builder.CreateICmp(P: Pred, LHS: V, RHS: ConstantInt::get(Ty, V: Hi)); |
66 | } |
67 | |
68 | // V >= Lo && V < Hi --> V - Lo u< Hi - Lo |
69 | // V < Lo || V >= Hi --> V - Lo u>= Hi - Lo |
70 | Value *VMinusLo = |
71 | Builder.CreateSub(LHS: V, RHS: ConstantInt::get(Ty, V: Lo), Name: V->getName() + ".off" ); |
72 | Constant *HiMinusLo = ConstantInt::get(Ty, V: Hi - Lo); |
73 | return Builder.CreateICmp(P: Pred, LHS: VMinusLo, RHS: HiMinusLo); |
74 | } |
75 | |
76 | /// Classify (icmp eq (A & B), C) and (icmp ne (A & B), C) as matching patterns |
77 | /// that can be simplified. |
78 | /// One of A and B is considered the mask. The other is the value. This is |
79 | /// described as the "AMask" or "BMask" part of the enum. If the enum contains |
80 | /// only "Mask", then both A and B can be considered masks. If A is the mask, |
81 | /// then it was proven that (A & C) == C. This is trivial if C == A or C == 0. |
82 | /// If both A and C are constants, this proof is also easy. |
83 | /// For the following explanations, we assume that A is the mask. |
84 | /// |
85 | /// "AllOnes" declares that the comparison is true only if (A & B) == A or all |
86 | /// bits of A are set in B. |
87 | /// Example: (icmp eq (A & 3), 3) -> AMask_AllOnes |
88 | /// |
89 | /// "AllZeros" declares that the comparison is true only if (A & B) == 0 or all |
90 | /// bits of A are cleared in B. |
91 | /// Example: (icmp eq (A & 3), 0) -> Mask_AllZeroes |
92 | /// |
93 | /// "Mixed" declares that (A & B) == C and C might or might not contain any |
94 | /// number of one bits and zero bits. |
95 | /// Example: (icmp eq (A & 3), 1) -> AMask_Mixed |
96 | /// |
97 | /// "Not" means that in above descriptions "==" should be replaced by "!=". |
98 | /// Example: (icmp ne (A & 3), 3) -> AMask_NotAllOnes |
99 | /// |
100 | /// If the mask A contains a single bit, then the following is equivalent: |
101 | /// (icmp eq (A & B), A) equals (icmp ne (A & B), 0) |
102 | /// (icmp ne (A & B), A) equals (icmp eq (A & B), 0) |
103 | enum MaskedICmpType { |
104 | AMask_AllOnes = 1, |
105 | AMask_NotAllOnes = 2, |
106 | BMask_AllOnes = 4, |
107 | BMask_NotAllOnes = 8, |
108 | Mask_AllZeros = 16, |
109 | Mask_NotAllZeros = 32, |
110 | AMask_Mixed = 64, |
111 | AMask_NotMixed = 128, |
112 | BMask_Mixed = 256, |
113 | BMask_NotMixed = 512 |
114 | }; |
115 | |
116 | /// Return the set of patterns (from MaskedICmpType) that (icmp SCC (A & B), C) |
117 | /// satisfies. |
118 | static unsigned getMaskedICmpType(Value *A, Value *B, Value *C, |
119 | ICmpInst::Predicate Pred) { |
120 | const APInt *ConstA = nullptr, *ConstB = nullptr, *ConstC = nullptr; |
121 | match(V: A, P: m_APInt(Res&: ConstA)); |
122 | match(V: B, P: m_APInt(Res&: ConstB)); |
123 | match(V: C, P: m_APInt(Res&: ConstC)); |
124 | bool IsEq = (Pred == ICmpInst::ICMP_EQ); |
125 | bool IsAPow2 = ConstA && ConstA->isPowerOf2(); |
126 | bool IsBPow2 = ConstB && ConstB->isPowerOf2(); |
127 | unsigned MaskVal = 0; |
128 | if (ConstC && ConstC->isZero()) { |
129 | // if C is zero, then both A and B qualify as mask |
130 | MaskVal |= (IsEq ? (Mask_AllZeros | AMask_Mixed | BMask_Mixed) |
131 | : (Mask_NotAllZeros | AMask_NotMixed | BMask_NotMixed)); |
132 | if (IsAPow2) |
133 | MaskVal |= (IsEq ? (AMask_NotAllOnes | AMask_NotMixed) |
134 | : (AMask_AllOnes | AMask_Mixed)); |
135 | if (IsBPow2) |
136 | MaskVal |= (IsEq ? (BMask_NotAllOnes | BMask_NotMixed) |
137 | : (BMask_AllOnes | BMask_Mixed)); |
138 | return MaskVal; |
139 | } |
140 | |
141 | if (A == C) { |
142 | MaskVal |= (IsEq ? (AMask_AllOnes | AMask_Mixed) |
143 | : (AMask_NotAllOnes | AMask_NotMixed)); |
144 | if (IsAPow2) |
145 | MaskVal |= (IsEq ? (Mask_NotAllZeros | AMask_NotMixed) |
146 | : (Mask_AllZeros | AMask_Mixed)); |
147 | } else if (ConstA && ConstC && ConstC->isSubsetOf(RHS: *ConstA)) { |
148 | MaskVal |= (IsEq ? AMask_Mixed : AMask_NotMixed); |
149 | } |
150 | |
151 | if (B == C) { |
152 | MaskVal |= (IsEq ? (BMask_AllOnes | BMask_Mixed) |
153 | : (BMask_NotAllOnes | BMask_NotMixed)); |
154 | if (IsBPow2) |
155 | MaskVal |= (IsEq ? (Mask_NotAllZeros | BMask_NotMixed) |
156 | : (Mask_AllZeros | BMask_Mixed)); |
157 | } else if (ConstB && ConstC && ConstC->isSubsetOf(RHS: *ConstB)) { |
158 | MaskVal |= (IsEq ? BMask_Mixed : BMask_NotMixed); |
159 | } |
160 | |
161 | return MaskVal; |
162 | } |
163 | |
164 | /// Convert an analysis of a masked ICmp into its equivalent if all boolean |
165 | /// operations had the opposite sense. Since each "NotXXX" flag (recording !=) |
166 | /// is adjacent to the corresponding normal flag (recording ==), this just |
167 | /// involves swapping those bits over. |
168 | static unsigned conjugateICmpMask(unsigned Mask) { |
169 | unsigned NewMask; |
170 | NewMask = (Mask & (AMask_AllOnes | BMask_AllOnes | Mask_AllZeros | |
171 | AMask_Mixed | BMask_Mixed)) |
172 | << 1; |
173 | |
174 | NewMask |= (Mask & (AMask_NotAllOnes | BMask_NotAllOnes | Mask_NotAllZeros | |
175 | AMask_NotMixed | BMask_NotMixed)) |
176 | >> 1; |
177 | |
178 | return NewMask; |
179 | } |
180 | |
181 | // Adapts the external decomposeBitTestICmp for local use. |
182 | static bool decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate &Pred, |
183 | Value *&X, Value *&Y, Value *&Z) { |
184 | APInt Mask; |
185 | if (!llvm::decomposeBitTestICmp(LHS, RHS, Pred, X, Mask)) |
186 | return false; |
187 | |
188 | Y = ConstantInt::get(Ty: X->getType(), V: Mask); |
189 | Z = ConstantInt::get(Ty: X->getType(), V: 0); |
190 | return true; |
191 | } |
192 | |
193 | /// Handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E). |
194 | /// Return the pattern classes (from MaskedICmpType) for the left hand side and |
195 | /// the right hand side as a pair. |
196 | /// LHS and RHS are the left hand side and the right hand side ICmps and PredL |
197 | /// and PredR are their predicates, respectively. |
198 | static std::optional<std::pair<unsigned, unsigned>> getMaskedTypeForICmpPair( |
199 | Value *&A, Value *&B, Value *&C, Value *&D, Value *&E, ICmpInst *LHS, |
200 | ICmpInst *RHS, ICmpInst::Predicate &PredL, ICmpInst::Predicate &PredR) { |
201 | // Don't allow pointers. Splat vectors are fine. |
202 | if (!LHS->getOperand(i_nocapture: 0)->getType()->isIntOrIntVectorTy() || |
203 | !RHS->getOperand(i_nocapture: 0)->getType()->isIntOrIntVectorTy()) |
204 | return std::nullopt; |
205 | |
206 | // Here comes the tricky part: |
207 | // LHS might be of the form L11 & L12 == X, X == L21 & L22, |
208 | // and L11 & L12 == L21 & L22. The same goes for RHS. |
209 | // Now we must find those components L** and R**, that are equal, so |
210 | // that we can extract the parameters A, B, C, D, and E for the canonical |
211 | // above. |
212 | Value *L1 = LHS->getOperand(i_nocapture: 0); |
213 | Value *L2 = LHS->getOperand(i_nocapture: 1); |
214 | Value *L11, *L12, *L21, *L22; |
215 | // Check whether the icmp can be decomposed into a bit test. |
216 | if (decomposeBitTestICmp(LHS: L1, RHS: L2, Pred&: PredL, X&: L11, Y&: L12, Z&: L2)) { |
217 | L21 = L22 = L1 = nullptr; |
218 | } else { |
219 | // Look for ANDs in the LHS icmp. |
220 | if (!match(V: L1, P: m_And(L: m_Value(V&: L11), R: m_Value(V&: L12)))) { |
221 | // Any icmp can be viewed as being trivially masked; if it allows us to |
222 | // remove one, it's worth it. |
223 | L11 = L1; |
224 | L12 = Constant::getAllOnesValue(Ty: L1->getType()); |
225 | } |
226 | |
227 | if (!match(V: L2, P: m_And(L: m_Value(V&: L21), R: m_Value(V&: L22)))) { |
228 | L21 = L2; |
229 | L22 = Constant::getAllOnesValue(Ty: L2->getType()); |
230 | } |
231 | } |
232 | |
233 | // Bail if LHS was a icmp that can't be decomposed into an equality. |
234 | if (!ICmpInst::isEquality(P: PredL)) |
235 | return std::nullopt; |
236 | |
237 | Value *R1 = RHS->getOperand(i_nocapture: 0); |
238 | Value *R2 = RHS->getOperand(i_nocapture: 1); |
239 | Value *R11, *R12; |
240 | bool Ok = false; |
241 | if (decomposeBitTestICmp(LHS: R1, RHS: R2, Pred&: PredR, X&: R11, Y&: R12, Z&: R2)) { |
242 | if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { |
243 | A = R11; |
244 | D = R12; |
245 | } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { |
246 | A = R12; |
247 | D = R11; |
248 | } else { |
249 | return std::nullopt; |
250 | } |
251 | E = R2; |
252 | R1 = nullptr; |
253 | Ok = true; |
254 | } else { |
255 | if (!match(V: R1, P: m_And(L: m_Value(V&: R11), R: m_Value(V&: R12)))) { |
256 | // As before, model no mask as a trivial mask if it'll let us do an |
257 | // optimization. |
258 | R11 = R1; |
259 | R12 = Constant::getAllOnesValue(Ty: R1->getType()); |
260 | } |
261 | |
262 | if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { |
263 | A = R11; |
264 | D = R12; |
265 | E = R2; |
266 | Ok = true; |
267 | } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { |
268 | A = R12; |
269 | D = R11; |
270 | E = R2; |
271 | Ok = true; |
272 | } |
273 | } |
274 | |
275 | // Bail if RHS was a icmp that can't be decomposed into an equality. |
276 | if (!ICmpInst::isEquality(P: PredR)) |
277 | return std::nullopt; |
278 | |
279 | // Look for ANDs on the right side of the RHS icmp. |
280 | if (!Ok) { |
281 | if (!match(V: R2, P: m_And(L: m_Value(V&: R11), R: m_Value(V&: R12)))) { |
282 | R11 = R2; |
283 | R12 = Constant::getAllOnesValue(Ty: R2->getType()); |
284 | } |
285 | |
286 | if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { |
287 | A = R11; |
288 | D = R12; |
289 | E = R1; |
290 | Ok = true; |
291 | } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { |
292 | A = R12; |
293 | D = R11; |
294 | E = R1; |
295 | Ok = true; |
296 | } else { |
297 | return std::nullopt; |
298 | } |
299 | |
300 | assert(Ok && "Failed to find AND on the right side of the RHS icmp." ); |
301 | } |
302 | |
303 | if (L11 == A) { |
304 | B = L12; |
305 | C = L2; |
306 | } else if (L12 == A) { |
307 | B = L11; |
308 | C = L2; |
309 | } else if (L21 == A) { |
310 | B = L22; |
311 | C = L1; |
312 | } else if (L22 == A) { |
313 | B = L21; |
314 | C = L1; |
315 | } |
316 | |
317 | unsigned LeftType = getMaskedICmpType(A, B, C, Pred: PredL); |
318 | unsigned RightType = getMaskedICmpType(A, B: D, C: E, Pred: PredR); |
319 | return std::optional<std::pair<unsigned, unsigned>>( |
320 | std::make_pair(x&: LeftType, y&: RightType)); |
321 | } |
322 | |
323 | /// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) into a single |
324 | /// (icmp(A & X) ==/!= Y), where the left-hand side is of type Mask_NotAllZeros |
325 | /// and the right hand side is of type BMask_Mixed. For example, |
326 | /// (icmp (A & 12) != 0) & (icmp (A & 15) == 8) -> (icmp (A & 15) == 8). |
327 | /// Also used for logical and/or, must be poison safe. |
328 | static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( |
329 | ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, Value *A, Value *B, Value *C, |
330 | Value *D, Value *E, ICmpInst::Predicate PredL, ICmpInst::Predicate PredR, |
331 | InstCombiner::BuilderTy &Builder) { |
332 | // We are given the canonical form: |
333 | // (icmp ne (A & B), 0) & (icmp eq (A & D), E). |
334 | // where D & E == E. |
335 | // |
336 | // If IsAnd is false, we get it in negated form: |
337 | // (icmp eq (A & B), 0) | (icmp ne (A & D), E) -> |
338 | // !((icmp ne (A & B), 0) & (icmp eq (A & D), E)). |
339 | // |
340 | // We currently handle the case of B, C, D, E are constant. |
341 | // |
342 | const APInt *BCst, *CCst, *DCst, *OrigECst; |
343 | if (!match(V: B, P: m_APInt(Res&: BCst)) || !match(V: C, P: m_APInt(Res&: CCst)) || |
344 | !match(V: D, P: m_APInt(Res&: DCst)) || !match(V: E, P: m_APInt(Res&: OrigECst))) |
345 | return nullptr; |
346 | |
347 | ICmpInst::Predicate NewCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE; |
348 | |
349 | // Update E to the canonical form when D is a power of two and RHS is |
350 | // canonicalized as, |
351 | // (icmp ne (A & D), 0) -> (icmp eq (A & D), D) or |
352 | // (icmp ne (A & D), D) -> (icmp eq (A & D), 0). |
353 | APInt ECst = *OrigECst; |
354 | if (PredR != NewCC) |
355 | ECst ^= *DCst; |
356 | |
357 | // If B or D is zero, skip because if LHS or RHS can be trivially folded by |
358 | // other folding rules and this pattern won't apply any more. |
359 | if (*BCst == 0 || *DCst == 0) |
360 | return nullptr; |
361 | |
362 | // If B and D don't intersect, ie. (B & D) == 0, no folding because we can't |
363 | // deduce anything from it. |
364 | // For example, |
365 | // (icmp ne (A & 12), 0) & (icmp eq (A & 3), 1) -> no folding. |
366 | if ((*BCst & *DCst) == 0) |
367 | return nullptr; |
368 | |
369 | // If the following two conditions are met: |
370 | // |
371 | // 1. mask B covers only a single bit that's not covered by mask D, that is, |
372 | // (B & (B ^ D)) is a power of 2 (in other words, B minus the intersection of |
373 | // B and D has only one bit set) and, |
374 | // |
375 | // 2. RHS (and E) indicates that the rest of B's bits are zero (in other |
376 | // words, the intersection of B and D is zero), that is, ((B & D) & E) == 0 |
377 | // |
378 | // then that single bit in B must be one and thus the whole expression can be |
379 | // folded to |
380 | // (A & (B | D)) == (B & (B ^ D)) | E. |
381 | // |
382 | // For example, |
383 | // (icmp ne (A & 12), 0) & (icmp eq (A & 7), 1) -> (icmp eq (A & 15), 9) |
384 | // (icmp ne (A & 15), 0) & (icmp eq (A & 7), 0) -> (icmp eq (A & 15), 8) |
385 | if ((((*BCst & *DCst) & ECst) == 0) && |
386 | (*BCst & (*BCst ^ *DCst)).isPowerOf2()) { |
387 | APInt BorD = *BCst | *DCst; |
388 | APInt BandBxorDorE = (*BCst & (*BCst ^ *DCst)) | ECst; |
389 | Value *NewMask = ConstantInt::get(Ty: A->getType(), V: BorD); |
390 | Value *NewMaskedValue = ConstantInt::get(Ty: A->getType(), V: BandBxorDorE); |
391 | Value *NewAnd = Builder.CreateAnd(LHS: A, RHS: NewMask); |
392 | return Builder.CreateICmp(P: NewCC, LHS: NewAnd, RHS: NewMaskedValue); |
393 | } |
394 | |
395 | auto IsSubSetOrEqual = [](const APInt *C1, const APInt *C2) { |
396 | return (*C1 & *C2) == *C1; |
397 | }; |
398 | auto IsSuperSetOrEqual = [](const APInt *C1, const APInt *C2) { |
399 | return (*C1 & *C2) == *C2; |
400 | }; |
401 | |
402 | // In the following, we consider only the cases where B is a superset of D, B |
403 | // is a subset of D, or B == D because otherwise there's at least one bit |
404 | // covered by B but not D, in which case we can't deduce much from it, so |
405 | // no folding (aside from the single must-be-one bit case right above.) |
406 | // For example, |
407 | // (icmp ne (A & 14), 0) & (icmp eq (A & 3), 1) -> no folding. |
408 | if (!IsSubSetOrEqual(BCst, DCst) && !IsSuperSetOrEqual(BCst, DCst)) |
409 | return nullptr; |
410 | |
411 | // At this point, either B is a superset of D, B is a subset of D or B == D. |
412 | |
413 | // If E is zero, if B is a subset of (or equal to) D, LHS and RHS contradict |
414 | // and the whole expression becomes false (or true if negated), otherwise, no |
415 | // folding. |
416 | // For example, |
417 | // (icmp ne (A & 3), 0) & (icmp eq (A & 7), 0) -> false. |
418 | // (icmp ne (A & 15), 0) & (icmp eq (A & 3), 0) -> no folding. |
419 | if (ECst.isZero()) { |
420 | if (IsSubSetOrEqual(BCst, DCst)) |
421 | return ConstantInt::get(Ty: LHS->getType(), V: !IsAnd); |
422 | return nullptr; |
423 | } |
424 | |
425 | // At this point, B, D, E aren't zero and (B & D) == B, (B & D) == D or B == |
426 | // D. If B is a superset of (or equal to) D, since E is not zero, LHS is |
427 | // subsumed by RHS (RHS implies LHS.) So the whole expression becomes |
428 | // RHS. For example, |
429 | // (icmp ne (A & 255), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8). |
430 | // (icmp ne (A & 15), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8). |
431 | if (IsSuperSetOrEqual(BCst, DCst)) |
432 | return RHS; |
433 | // Otherwise, B is a subset of D. If B and E have a common bit set, |
434 | // ie. (B & E) != 0, then LHS is subsumed by RHS. For example. |
435 | // (icmp ne (A & 12), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8). |
436 | assert(IsSubSetOrEqual(BCst, DCst) && "Precondition due to above code" ); |
437 | if ((*BCst & ECst) != 0) |
438 | return RHS; |
439 | // Otherwise, LHS and RHS contradict and the whole expression becomes false |
440 | // (or true if negated.) For example, |
441 | // (icmp ne (A & 7), 0) & (icmp eq (A & 15), 8) -> false. |
442 | // (icmp ne (A & 6), 0) & (icmp eq (A & 15), 8) -> false. |
443 | return ConstantInt::get(Ty: LHS->getType(), V: !IsAnd); |
444 | } |
445 | |
446 | /// Try to fold (icmp(A & B) ==/!= 0) &/| (icmp(A & D) ==/!= E) into a single |
447 | /// (icmp(A & X) ==/!= Y), where the left-hand side and the right hand side |
448 | /// aren't of the common mask pattern type. |
449 | /// Also used for logical and/or, must be poison safe. |
450 | static Value *foldLogOpOfMaskedICmpsAsymmetric( |
451 | ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, Value *A, Value *B, Value *C, |
452 | Value *D, Value *E, ICmpInst::Predicate PredL, ICmpInst::Predicate PredR, |
453 | unsigned LHSMask, unsigned RHSMask, InstCombiner::BuilderTy &Builder) { |
454 | assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) && |
455 | "Expected equality predicates for masked type of icmps." ); |
456 | // Handle Mask_NotAllZeros-BMask_Mixed cases. |
457 | // (icmp ne/eq (A & B), C) &/| (icmp eq/ne (A & D), E), or |
458 | // (icmp eq/ne (A & B), C) &/| (icmp ne/eq (A & D), E) |
459 | // which gets swapped to |
460 | // (icmp ne/eq (A & D), E) &/| (icmp eq/ne (A & B), C). |
461 | if (!IsAnd) { |
462 | LHSMask = conjugateICmpMask(Mask: LHSMask); |
463 | RHSMask = conjugateICmpMask(Mask: RHSMask); |
464 | } |
465 | if ((LHSMask & Mask_NotAllZeros) && (RHSMask & BMask_Mixed)) { |
466 | if (Value *V = foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( |
467 | LHS, RHS, IsAnd, A, B, C, D, E, |
468 | PredL, PredR, Builder)) { |
469 | return V; |
470 | } |
471 | } else if ((LHSMask & BMask_Mixed) && (RHSMask & Mask_NotAllZeros)) { |
472 | if (Value *V = foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( |
473 | LHS: RHS, RHS: LHS, IsAnd, A, B: D, C: E, D: B, E: C, |
474 | PredL: PredR, PredR: PredL, Builder)) { |
475 | return V; |
476 | } |
477 | } |
478 | return nullptr; |
479 | } |
480 | |
481 | /// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) |
482 | /// into a single (icmp(A & X) ==/!= Y). |
483 | static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, |
484 | bool IsLogical, |
485 | InstCombiner::BuilderTy &Builder) { |
486 | Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr; |
487 | ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); |
488 | std::optional<std::pair<unsigned, unsigned>> MaskPair = |
489 | getMaskedTypeForICmpPair(A, B, C, D, E, LHS, RHS, PredL, PredR); |
490 | if (!MaskPair) |
491 | return nullptr; |
492 | assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) && |
493 | "Expected equality predicates for masked type of icmps." ); |
494 | unsigned LHSMask = MaskPair->first; |
495 | unsigned RHSMask = MaskPair->second; |
496 | unsigned Mask = LHSMask & RHSMask; |
497 | if (Mask == 0) { |
498 | // Even if the two sides don't share a common pattern, check if folding can |
499 | // still happen. |
500 | if (Value *V = foldLogOpOfMaskedICmpsAsymmetric( |
501 | LHS, RHS, IsAnd, A, B, C, D, E, PredL, PredR, LHSMask, RHSMask, |
502 | Builder)) |
503 | return V; |
504 | return nullptr; |
505 | } |
506 | |
507 | // In full generality: |
508 | // (icmp (A & B) Op C) | (icmp (A & D) Op E) |
509 | // == ![ (icmp (A & B) !Op C) & (icmp (A & D) !Op E) ] |
510 | // |
511 | // If the latter can be converted into (icmp (A & X) Op Y) then the former is |
512 | // equivalent to (icmp (A & X) !Op Y). |
513 | // |
514 | // Therefore, we can pretend for the rest of this function that we're dealing |
515 | // with the conjunction, provided we flip the sense of any comparisons (both |
516 | // input and output). |
517 | |
518 | // In most cases we're going to produce an EQ for the "&&" case. |
519 | ICmpInst::Predicate NewCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE; |
520 | if (!IsAnd) { |
521 | // Convert the masking analysis into its equivalent with negated |
522 | // comparisons. |
523 | Mask = conjugateICmpMask(Mask); |
524 | } |
525 | |
526 | if (Mask & Mask_AllZeros) { |
527 | // (icmp eq (A & B), 0) & (icmp eq (A & D), 0) |
528 | // -> (icmp eq (A & (B|D)), 0) |
529 | if (IsLogical && !isGuaranteedNotToBeUndefOrPoison(V: D)) |
530 | return nullptr; // TODO: Use freeze? |
531 | Value *NewOr = Builder.CreateOr(LHS: B, RHS: D); |
532 | Value *NewAnd = Builder.CreateAnd(LHS: A, RHS: NewOr); |
533 | // We can't use C as zero because we might actually handle |
534 | // (icmp ne (A & B), B) & (icmp ne (A & D), D) |
535 | // with B and D, having a single bit set. |
536 | Value *Zero = Constant::getNullValue(Ty: A->getType()); |
537 | return Builder.CreateICmp(P: NewCC, LHS: NewAnd, RHS: Zero); |
538 | } |
539 | if (Mask & BMask_AllOnes) { |
540 | // (icmp eq (A & B), B) & (icmp eq (A & D), D) |
541 | // -> (icmp eq (A & (B|D)), (B|D)) |
542 | if (IsLogical && !isGuaranteedNotToBeUndefOrPoison(V: D)) |
543 | return nullptr; // TODO: Use freeze? |
544 | Value *NewOr = Builder.CreateOr(LHS: B, RHS: D); |
545 | Value *NewAnd = Builder.CreateAnd(LHS: A, RHS: NewOr); |
546 | return Builder.CreateICmp(P: NewCC, LHS: NewAnd, RHS: NewOr); |
547 | } |
548 | if (Mask & AMask_AllOnes) { |
549 | // (icmp eq (A & B), A) & (icmp eq (A & D), A) |
550 | // -> (icmp eq (A & (B&D)), A) |
551 | if (IsLogical && !isGuaranteedNotToBeUndefOrPoison(V: D)) |
552 | return nullptr; // TODO: Use freeze? |
553 | Value *NewAnd1 = Builder.CreateAnd(LHS: B, RHS: D); |
554 | Value *NewAnd2 = Builder.CreateAnd(LHS: A, RHS: NewAnd1); |
555 | return Builder.CreateICmp(P: NewCC, LHS: NewAnd2, RHS: A); |
556 | } |
557 | |
558 | // Remaining cases assume at least that B and D are constant, and depend on |
559 | // their actual values. This isn't strictly necessary, just a "handle the |
560 | // easy cases for now" decision. |
561 | const APInt *ConstB, *ConstD; |
562 | if (!match(V: B, P: m_APInt(Res&: ConstB)) || !match(V: D, P: m_APInt(Res&: ConstD))) |
563 | return nullptr; |
564 | |
565 | if (Mask & (Mask_NotAllZeros | BMask_NotAllOnes)) { |
566 | // (icmp ne (A & B), 0) & (icmp ne (A & D), 0) and |
567 | // (icmp ne (A & B), B) & (icmp ne (A & D), D) |
568 | // -> (icmp ne (A & B), 0) or (icmp ne (A & D), 0) |
569 | // Only valid if one of the masks is a superset of the other (check "B&D" is |
570 | // the same as either B or D). |
571 | APInt NewMask = *ConstB & *ConstD; |
572 | if (NewMask == *ConstB) |
573 | return LHS; |
574 | else if (NewMask == *ConstD) |
575 | return RHS; |
576 | } |
577 | |
578 | if (Mask & AMask_NotAllOnes) { |
579 | // (icmp ne (A & B), B) & (icmp ne (A & D), D) |
580 | // -> (icmp ne (A & B), A) or (icmp ne (A & D), A) |
581 | // Only valid if one of the masks is a superset of the other (check "B|D" is |
582 | // the same as either B or D). |
583 | APInt NewMask = *ConstB | *ConstD; |
584 | if (NewMask == *ConstB) |
585 | return LHS; |
586 | else if (NewMask == *ConstD) |
587 | return RHS; |
588 | } |
589 | |
590 | if (Mask & (BMask_Mixed | BMask_NotMixed)) { |
591 | // Mixed: |
592 | // (icmp eq (A & B), C) & (icmp eq (A & D), E) |
593 | // We already know that B & C == C && D & E == E. |
594 | // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of |
595 | // C and E, which are shared by both the mask B and the mask D, don't |
596 | // contradict, then we can transform to |
597 | // -> (icmp eq (A & (B|D)), (C|E)) |
598 | // Currently, we only handle the case of B, C, D, and E being constant. |
599 | // We can't simply use C and E because we might actually handle |
600 | // (icmp ne (A & B), B) & (icmp eq (A & D), D) |
601 | // with B and D, having a single bit set. |
602 | |
603 | // NotMixed: |
604 | // (icmp ne (A & B), C) & (icmp ne (A & D), E) |
605 | // -> (icmp ne (A & (B & D)), (C & E)) |
606 | // Check the intersection (B & D) for inequality. |
607 | // Assume that (B & D) == B || (B & D) == D, i.e B/D is a subset of D/B |
608 | // and (B & D) & (C ^ E) == 0, bits of C and E, which are shared by both the |
609 | // B and the D, don't contradict. |
610 | // Note that we can assume (~B & C) == 0 && (~D & E) == 0, previous |
611 | // operation should delete these icmps if it hadn't been met. |
612 | |
613 | const APInt *OldConstC, *OldConstE; |
614 | if (!match(V: C, P: m_APInt(Res&: OldConstC)) || !match(V: E, P: m_APInt(Res&: OldConstE))) |
615 | return nullptr; |
616 | |
617 | auto FoldBMixed = [&](ICmpInst::Predicate CC, bool IsNot) -> Value * { |
618 | CC = IsNot ? CmpInst::getInversePredicate(pred: CC) : CC; |
619 | const APInt ConstC = PredL != CC ? *ConstB ^ *OldConstC : *OldConstC; |
620 | const APInt ConstE = PredR != CC ? *ConstD ^ *OldConstE : *OldConstE; |
621 | |
622 | if (((*ConstB & *ConstD) & (ConstC ^ ConstE)).getBoolValue()) |
623 | return IsNot ? nullptr : ConstantInt::get(Ty: LHS->getType(), V: !IsAnd); |
624 | |
625 | if (IsNot && !ConstB->isSubsetOf(RHS: *ConstD) && !ConstD->isSubsetOf(RHS: *ConstB)) |
626 | return nullptr; |
627 | |
628 | APInt BD, CE; |
629 | if (IsNot) { |
630 | BD = *ConstB & *ConstD; |
631 | CE = ConstC & ConstE; |
632 | } else { |
633 | BD = *ConstB | *ConstD; |
634 | CE = ConstC | ConstE; |
635 | } |
636 | Value *NewAnd = Builder.CreateAnd(LHS: A, RHS: BD); |
637 | Value *CEVal = ConstantInt::get(Ty: A->getType(), V: CE); |
638 | return Builder.CreateICmp(P: CC, LHS: CEVal, RHS: NewAnd); |
639 | }; |
640 | |
641 | if (Mask & BMask_Mixed) |
642 | return FoldBMixed(NewCC, false); |
643 | if (Mask & BMask_NotMixed) // can be else also |
644 | return FoldBMixed(NewCC, true); |
645 | } |
646 | return nullptr; |
647 | } |
648 | |
649 | /// Try to fold a signed range checked with lower bound 0 to an unsigned icmp. |
650 | /// Example: (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n |
651 | /// If \p Inverted is true then the check is for the inverted range, e.g. |
652 | /// (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n |
653 | Value *InstCombinerImpl::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, |
654 | bool Inverted) { |
655 | // Check the lower range comparison, e.g. x >= 0 |
656 | // InstCombine already ensured that if there is a constant it's on the RHS. |
657 | ConstantInt *RangeStart = dyn_cast<ConstantInt>(Val: Cmp0->getOperand(i_nocapture: 1)); |
658 | if (!RangeStart) |
659 | return nullptr; |
660 | |
661 | ICmpInst::Predicate Pred0 = (Inverted ? Cmp0->getInversePredicate() : |
662 | Cmp0->getPredicate()); |
663 | |
664 | // Accept x > -1 or x >= 0 (after potentially inverting the predicate). |
665 | if (!((Pred0 == ICmpInst::ICMP_SGT && RangeStart->isMinusOne()) || |
666 | (Pred0 == ICmpInst::ICMP_SGE && RangeStart->isZero()))) |
667 | return nullptr; |
668 | |
669 | ICmpInst::Predicate Pred1 = (Inverted ? Cmp1->getInversePredicate() : |
670 | Cmp1->getPredicate()); |
671 | |
672 | Value *Input = Cmp0->getOperand(i_nocapture: 0); |
673 | Value *RangeEnd; |
674 | if (Cmp1->getOperand(i_nocapture: 0) == Input) { |
675 | // For the upper range compare we have: icmp x, n |
676 | RangeEnd = Cmp1->getOperand(i_nocapture: 1); |
677 | } else if (Cmp1->getOperand(i_nocapture: 1) == Input) { |
678 | // For the upper range compare we have: icmp n, x |
679 | RangeEnd = Cmp1->getOperand(i_nocapture: 0); |
680 | Pred1 = ICmpInst::getSwappedPredicate(pred: Pred1); |
681 | } else { |
682 | return nullptr; |
683 | } |
684 | |
685 | // Check the upper range comparison, e.g. x < n |
686 | ICmpInst::Predicate NewPred; |
687 | switch (Pred1) { |
688 | case ICmpInst::ICMP_SLT: NewPred = ICmpInst::ICMP_ULT; break; |
689 | case ICmpInst::ICMP_SLE: NewPred = ICmpInst::ICMP_ULE; break; |
690 | default: return nullptr; |
691 | } |
692 | |
693 | // This simplification is only valid if the upper range is not negative. |
694 | KnownBits Known = computeKnownBits(V: RangeEnd, /*Depth=*/0, CxtI: Cmp1); |
695 | if (!Known.isNonNegative()) |
696 | return nullptr; |
697 | |
698 | if (Inverted) |
699 | NewPred = ICmpInst::getInversePredicate(pred: NewPred); |
700 | |
701 | return Builder.CreateICmp(P: NewPred, LHS: Input, RHS: RangeEnd); |
702 | } |
703 | |
704 | // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2) |
705 | // Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2) |
706 | Value *InstCombinerImpl::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, |
707 | ICmpInst *RHS, |
708 | Instruction *CxtI, |
709 | bool IsAnd, |
710 | bool IsLogical) { |
711 | CmpInst::Predicate Pred = IsAnd ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ; |
712 | if (LHS->getPredicate() != Pred || RHS->getPredicate() != Pred) |
713 | return nullptr; |
714 | |
715 | if (!match(V: LHS->getOperand(i_nocapture: 1), P: m_Zero()) || |
716 | !match(V: RHS->getOperand(i_nocapture: 1), P: m_Zero())) |
717 | return nullptr; |
718 | |
719 | Value *L1, *L2, *R1, *R2; |
720 | if (match(V: LHS->getOperand(i_nocapture: 0), P: m_And(L: m_Value(V&: L1), R: m_Value(V&: L2))) && |
721 | match(V: RHS->getOperand(i_nocapture: 0), P: m_And(L: m_Value(V&: R1), R: m_Value(V&: R2)))) { |
722 | if (L1 == R2 || L2 == R2) |
723 | std::swap(a&: R1, b&: R2); |
724 | if (L2 == R1) |
725 | std::swap(a&: L1, b&: L2); |
726 | |
727 | if (L1 == R1 && |
728 | isKnownToBeAPowerOfTwo(V: L2, OrZero: false, Depth: 0, CxtI) && |
729 | isKnownToBeAPowerOfTwo(V: R2, OrZero: false, Depth: 0, CxtI)) { |
730 | // If this is a logical and/or, then we must prevent propagation of a |
731 | // poison value from the RHS by inserting freeze. |
732 | if (IsLogical) |
733 | R2 = Builder.CreateFreeze(V: R2); |
734 | Value *Mask = Builder.CreateOr(LHS: L2, RHS: R2); |
735 | Value *Masked = Builder.CreateAnd(LHS: L1, RHS: Mask); |
736 | auto NewPred = IsAnd ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE; |
737 | return Builder.CreateICmp(P: NewPred, LHS: Masked, RHS: Mask); |
738 | } |
739 | } |
740 | |
741 | return nullptr; |
742 | } |
743 | |
744 | /// General pattern: |
745 | /// X & Y |
746 | /// |
747 | /// Where Y is checking that all the high bits (covered by a mask 4294967168) |
748 | /// are uniform, i.e. %arg & 4294967168 can be either 4294967168 or 0 |
749 | /// Pattern can be one of: |
750 | /// %t = add i32 %arg, 128 |
751 | /// %r = icmp ult i32 %t, 256 |
752 | /// Or |
753 | /// %t0 = shl i32 %arg, 24 |
754 | /// %t1 = ashr i32 %t0, 24 |
755 | /// %r = icmp eq i32 %t1, %arg |
756 | /// Or |
757 | /// %t0 = trunc i32 %arg to i8 |
758 | /// %t1 = sext i8 %t0 to i32 |
759 | /// %r = icmp eq i32 %t1, %arg |
760 | /// This pattern is a signed truncation check. |
761 | /// |
762 | /// And X is checking that some bit in that same mask is zero. |
763 | /// I.e. can be one of: |
764 | /// %r = icmp sgt i32 %arg, -1 |
765 | /// Or |
766 | /// %t = and i32 %arg, 2147483648 |
767 | /// %r = icmp eq i32 %t, 0 |
768 | /// |
769 | /// Since we are checking that all the bits in that mask are the same, |
770 | /// and a particular bit is zero, what we are really checking is that all the |
771 | /// masked bits are zero. |
772 | /// So this should be transformed to: |
773 | /// %r = icmp ult i32 %arg, 128 |
774 | static Value *foldSignedTruncationCheck(ICmpInst *ICmp0, ICmpInst *ICmp1, |
775 | Instruction &CxtI, |
776 | InstCombiner::BuilderTy &Builder) { |
777 | assert(CxtI.getOpcode() == Instruction::And); |
778 | |
779 | // Match icmp ult (add %arg, C01), C1 (C1 == C01 << 1; powers of two) |
780 | auto tryToMatchSignedTruncationCheck = [](ICmpInst *ICmp, Value *&X, |
781 | APInt &SignBitMask) -> bool { |
782 | CmpInst::Predicate Pred; |
783 | const APInt *I01, *I1; // powers of two; I1 == I01 << 1 |
784 | if (!(match(V: ICmp, |
785 | P: m_ICmp(Pred, L: m_Add(L: m_Value(V&: X), R: m_Power2(V&: I01)), R: m_Power2(V&: I1))) && |
786 | Pred == ICmpInst::ICMP_ULT && I1->ugt(RHS: *I01) && I01->shl(shiftAmt: 1) == *I1)) |
787 | return false; |
788 | // Which bit is the new sign bit as per the 'signed truncation' pattern? |
789 | SignBitMask = *I01; |
790 | return true; |
791 | }; |
792 | |
793 | // One icmp needs to be 'signed truncation check'. |
794 | // We need to match this first, else we will mismatch commutative cases. |
795 | Value *X1; |
796 | APInt HighestBit; |
797 | ICmpInst *OtherICmp; |
798 | if (tryToMatchSignedTruncationCheck(ICmp1, X1, HighestBit)) |
799 | OtherICmp = ICmp0; |
800 | else if (tryToMatchSignedTruncationCheck(ICmp0, X1, HighestBit)) |
801 | OtherICmp = ICmp1; |
802 | else |
803 | return nullptr; |
804 | |
805 | assert(HighestBit.isPowerOf2() && "expected to be power of two (non-zero)" ); |
806 | |
807 | // Try to match/decompose into: icmp eq (X & Mask), 0 |
808 | auto tryToDecompose = [](ICmpInst *ICmp, Value *&X, |
809 | APInt &UnsetBitsMask) -> bool { |
810 | CmpInst::Predicate Pred = ICmp->getPredicate(); |
811 | // Can it be decomposed into icmp eq (X & Mask), 0 ? |
812 | if (llvm::decomposeBitTestICmp(LHS: ICmp->getOperand(i_nocapture: 0), RHS: ICmp->getOperand(i_nocapture: 1), |
813 | Pred, X, Mask&: UnsetBitsMask, |
814 | /*LookThroughTrunc=*/false) && |
815 | Pred == ICmpInst::ICMP_EQ) |
816 | return true; |
817 | // Is it icmp eq (X & Mask), 0 already? |
818 | const APInt *Mask; |
819 | if (match(V: ICmp, P: m_ICmp(Pred, L: m_And(L: m_Value(V&: X), R: m_APInt(Res&: Mask)), R: m_Zero())) && |
820 | Pred == ICmpInst::ICMP_EQ) { |
821 | UnsetBitsMask = *Mask; |
822 | return true; |
823 | } |
824 | return false; |
825 | }; |
826 | |
827 | // And the other icmp needs to be decomposable into a bit test. |
828 | Value *X0; |
829 | APInt UnsetBitsMask; |
830 | if (!tryToDecompose(OtherICmp, X0, UnsetBitsMask)) |
831 | return nullptr; |
832 | |
833 | assert(!UnsetBitsMask.isZero() && "empty mask makes no sense." ); |
834 | |
835 | // Are they working on the same value? |
836 | Value *X; |
837 | if (X1 == X0) { |
838 | // Ok as is. |
839 | X = X1; |
840 | } else if (match(V: X0, P: m_Trunc(Op: m_Specific(V: X1)))) { |
841 | UnsetBitsMask = UnsetBitsMask.zext(width: X1->getType()->getScalarSizeInBits()); |
842 | X = X1; |
843 | } else |
844 | return nullptr; |
845 | |
846 | // So which bits should be uniform as per the 'signed truncation check'? |
847 | // (all the bits starting with (i.e. including) HighestBit) |
848 | APInt SignBitsMask = ~(HighestBit - 1U); |
849 | |
850 | // UnsetBitsMask must have some common bits with SignBitsMask, |
851 | if (!UnsetBitsMask.intersects(RHS: SignBitsMask)) |
852 | return nullptr; |
853 | |
854 | // Does UnsetBitsMask contain any bits outside of SignBitsMask? |
855 | if (!UnsetBitsMask.isSubsetOf(RHS: SignBitsMask)) { |
856 | APInt OtherHighestBit = (~UnsetBitsMask) + 1U; |
857 | if (!OtherHighestBit.isPowerOf2()) |
858 | return nullptr; |
859 | HighestBit = APIntOps::umin(A: HighestBit, B: OtherHighestBit); |
860 | } |
861 | // Else, if it does not, then all is ok as-is. |
862 | |
863 | // %r = icmp ult %X, SignBit |
864 | return Builder.CreateICmpULT(LHS: X, RHS: ConstantInt::get(Ty: X->getType(), V: HighestBit), |
865 | Name: CxtI.getName() + ".simplified" ); |
866 | } |
867 | |
868 | /// Fold (icmp eq ctpop(X) 1) | (icmp eq X 0) into (icmp ult ctpop(X) 2) and |
869 | /// fold (icmp ne ctpop(X) 1) & (icmp ne X 0) into (icmp ugt ctpop(X) 1). |
870 | /// Also used for logical and/or, must be poison safe. |
871 | static Value *foldIsPowerOf2OrZero(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd, |
872 | InstCombiner::BuilderTy &Builder) { |
873 | CmpInst::Predicate Pred0, Pred1; |
874 | Value *X; |
875 | if (!match(Cmp0, m_ICmp(Pred0, m_Intrinsic<Intrinsic::ctpop>(m_Value(X)), |
876 | m_SpecificInt(1))) || |
877 | !match(Cmp1, m_ICmp(Pred1, m_Specific(X), m_ZeroInt()))) |
878 | return nullptr; |
879 | |
880 | Value *CtPop = Cmp0->getOperand(i_nocapture: 0); |
881 | if (IsAnd && Pred0 == ICmpInst::ICMP_NE && Pred1 == ICmpInst::ICMP_NE) |
882 | return Builder.CreateICmpUGT(LHS: CtPop, RHS: ConstantInt::get(Ty: CtPop->getType(), V: 1)); |
883 | if (!IsAnd && Pred0 == ICmpInst::ICMP_EQ && Pred1 == ICmpInst::ICMP_EQ) |
884 | return Builder.CreateICmpULT(LHS: CtPop, RHS: ConstantInt::get(Ty: CtPop->getType(), V: 2)); |
885 | |
886 | return nullptr; |
887 | } |
888 | |
889 | /// Reduce a pair of compares that check if a value has exactly 1 bit set. |
890 | /// Also used for logical and/or, must be poison safe. |
891 | static Value *foldIsPowerOf2(ICmpInst *Cmp0, ICmpInst *Cmp1, bool JoinedByAnd, |
892 | InstCombiner::BuilderTy &Builder) { |
893 | // Handle 'and' / 'or' commutation: make the equality check the first operand. |
894 | if (JoinedByAnd && Cmp1->getPredicate() == ICmpInst::ICMP_NE) |
895 | std::swap(a&: Cmp0, b&: Cmp1); |
896 | else if (!JoinedByAnd && Cmp1->getPredicate() == ICmpInst::ICMP_EQ) |
897 | std::swap(a&: Cmp0, b&: Cmp1); |
898 | |
899 | // (X != 0) && (ctpop(X) u< 2) --> ctpop(X) == 1 |
900 | CmpInst::Predicate Pred0, Pred1; |
901 | Value *X; |
902 | if (JoinedByAnd && match(Cmp0, m_ICmp(Pred0, m_Value(X), m_ZeroInt())) && |
903 | match(Cmp1, m_ICmp(Pred1, m_Intrinsic<Intrinsic::ctpop>(m_Specific(X)), |
904 | m_SpecificInt(2))) && |
905 | Pred0 == ICmpInst::ICMP_NE && Pred1 == ICmpInst::ICMP_ULT) { |
906 | Value *CtPop = Cmp1->getOperand(i_nocapture: 0); |
907 | return Builder.CreateICmpEQ(LHS: CtPop, RHS: ConstantInt::get(Ty: CtPop->getType(), V: 1)); |
908 | } |
909 | // (X == 0) || (ctpop(X) u> 1) --> ctpop(X) != 1 |
910 | if (!JoinedByAnd && match(Cmp0, m_ICmp(Pred0, m_Value(X), m_ZeroInt())) && |
911 | match(Cmp1, m_ICmp(Pred1, m_Intrinsic<Intrinsic::ctpop>(m_Specific(X)), |
912 | m_SpecificInt(1))) && |
913 | Pred0 == ICmpInst::ICMP_EQ && Pred1 == ICmpInst::ICMP_UGT) { |
914 | Value *CtPop = Cmp1->getOperand(i_nocapture: 0); |
915 | return Builder.CreateICmpNE(LHS: CtPop, RHS: ConstantInt::get(Ty: CtPop->getType(), V: 1)); |
916 | } |
917 | return nullptr; |
918 | } |
919 | |
920 | /// Try to fold (icmp(A & B) == 0) & (icmp(A & D) != E) into (icmp A u< D) iff |
921 | /// B is a contiguous set of ones starting from the most significant bit |
922 | /// (negative power of 2), D and E are equal, and D is a contiguous set of ones |
923 | /// starting at the most significant zero bit in B. Parameter B supports masking |
924 | /// using undef/poison in either scalar or vector values. |
925 | static Value *foldNegativePower2AndShiftedMask( |
926 | Value *A, Value *B, Value *D, Value *E, ICmpInst::Predicate PredL, |
927 | ICmpInst::Predicate PredR, InstCombiner::BuilderTy &Builder) { |
928 | assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) && |
929 | "Expected equality predicates for masked type of icmps." ); |
930 | if (PredL != ICmpInst::ICMP_EQ || PredR != ICmpInst::ICMP_NE) |
931 | return nullptr; |
932 | |
933 | if (!match(V: B, P: m_NegatedPower2()) || !match(V: D, P: m_ShiftedMask()) || |
934 | !match(V: E, P: m_ShiftedMask())) |
935 | return nullptr; |
936 | |
937 | // Test scalar arguments for conversion. B has been validated earlier to be a |
938 | // negative power of two and thus is guaranteed to have one or more contiguous |
939 | // ones starting from the MSB followed by zero or more contiguous zeros. D has |
940 | // been validated earlier to be a shifted set of one or more contiguous ones. |
941 | // In order to match, B leading ones and D leading zeros should be equal. The |
942 | // predicate that B be a negative power of 2 prevents the condition of there |
943 | // ever being zero leading ones. Thus 0 == 0 cannot occur. The predicate that |
944 | // D always be a shifted mask prevents the condition of D equaling 0. This |
945 | // prevents matching the condition where B contains the maximum number of |
946 | // leading one bits (-1) and D contains the maximum number of leading zero |
947 | // bits (0). |
948 | auto isReducible = [](const Value *B, const Value *D, const Value *E) { |
949 | const APInt *BCst, *DCst, *ECst; |
950 | return match(V: B, P: m_APIntAllowPoison(Res&: BCst)) && match(V: D, P: m_APInt(Res&: DCst)) && |
951 | match(V: E, P: m_APInt(Res&: ECst)) && *DCst == *ECst && |
952 | (isa<PoisonValue>(Val: B) || |
953 | (BCst->countLeadingOnes() == DCst->countLeadingZeros())); |
954 | }; |
955 | |
956 | // Test vector type arguments for conversion. |
957 | if (const auto *BVTy = dyn_cast<VectorType>(Val: B->getType())) { |
958 | const auto *BFVTy = dyn_cast<FixedVectorType>(Val: BVTy); |
959 | const auto *BConst = dyn_cast<Constant>(Val: B); |
960 | const auto *DConst = dyn_cast<Constant>(Val: D); |
961 | const auto *EConst = dyn_cast<Constant>(Val: E); |
962 | |
963 | if (!BFVTy || !BConst || !DConst || !EConst) |
964 | return nullptr; |
965 | |
966 | for (unsigned I = 0; I != BFVTy->getNumElements(); ++I) { |
967 | const auto *BElt = BConst->getAggregateElement(Elt: I); |
968 | const auto *DElt = DConst->getAggregateElement(Elt: I); |
969 | const auto *EElt = EConst->getAggregateElement(Elt: I); |
970 | |
971 | if (!BElt || !DElt || !EElt) |
972 | return nullptr; |
973 | if (!isReducible(BElt, DElt, EElt)) |
974 | return nullptr; |
975 | } |
976 | } else { |
977 | // Test scalar type arguments for conversion. |
978 | if (!isReducible(B, D, E)) |
979 | return nullptr; |
980 | } |
981 | return Builder.CreateICmp(P: ICmpInst::ICMP_ULT, LHS: A, RHS: D); |
982 | } |
983 | |
984 | /// Try to fold ((icmp X u< P) & (icmp(X & M) != M)) or ((icmp X s> -1) & |
985 | /// (icmp(X & M) != M)) into (icmp X u< M). Where P is a power of 2, M < P, and |
986 | /// M is a contiguous shifted mask starting at the right most significant zero |
987 | /// bit in P. SGT is supported as when P is the largest representable power of |
988 | /// 2, an earlier optimization converts the expression into (icmp X s> -1). |
989 | /// Parameter P supports masking using undef/poison in either scalar or vector |
990 | /// values. |
991 | static Value *foldPowerOf2AndShiftedMask(ICmpInst *Cmp0, ICmpInst *Cmp1, |
992 | bool JoinedByAnd, |
993 | InstCombiner::BuilderTy &Builder) { |
994 | if (!JoinedByAnd) |
995 | return nullptr; |
996 | Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr; |
997 | ICmpInst::Predicate CmpPred0 = Cmp0->getPredicate(), |
998 | CmpPred1 = Cmp1->getPredicate(); |
999 | // Assuming P is a 2^n, getMaskedTypeForICmpPair will normalize (icmp X u< |
1000 | // 2^n) into (icmp (X & ~(2^n-1)) == 0) and (icmp X s> -1) into (icmp (X & |
1001 | // SignMask) == 0). |
1002 | std::optional<std::pair<unsigned, unsigned>> MaskPair = |
1003 | getMaskedTypeForICmpPair(A, B, C, D, E, LHS: Cmp0, RHS: Cmp1, PredL&: CmpPred0, PredR&: CmpPred1); |
1004 | if (!MaskPair) |
1005 | return nullptr; |
1006 | |
1007 | const auto compareBMask = BMask_NotMixed | BMask_NotAllOnes; |
1008 | unsigned CmpMask0 = MaskPair->first; |
1009 | unsigned CmpMask1 = MaskPair->second; |
1010 | if ((CmpMask0 & Mask_AllZeros) && (CmpMask1 == compareBMask)) { |
1011 | if (Value *V = foldNegativePower2AndShiftedMask(A, B, D, E, PredL: CmpPred0, |
1012 | PredR: CmpPred1, Builder)) |
1013 | return V; |
1014 | } else if ((CmpMask0 == compareBMask) && (CmpMask1 & Mask_AllZeros)) { |
1015 | if (Value *V = foldNegativePower2AndShiftedMask(A, B: D, D: B, E: C, PredL: CmpPred1, |
1016 | PredR: CmpPred0, Builder)) |
1017 | return V; |
1018 | } |
1019 | return nullptr; |
1020 | } |
1021 | |
1022 | /// Commuted variants are assumed to be handled by calling this function again |
1023 | /// with the parameters swapped. |
1024 | static Value *foldUnsignedUnderflowCheck(ICmpInst *ZeroICmp, |
1025 | ICmpInst *UnsignedICmp, bool IsAnd, |
1026 | const SimplifyQuery &Q, |
1027 | InstCombiner::BuilderTy &Builder) { |
1028 | Value *ZeroCmpOp; |
1029 | ICmpInst::Predicate EqPred; |
1030 | if (!match(V: ZeroICmp, P: m_ICmp(Pred&: EqPred, L: m_Value(V&: ZeroCmpOp), R: m_Zero())) || |
1031 | !ICmpInst::isEquality(P: EqPred)) |
1032 | return nullptr; |
1033 | |
1034 | ICmpInst::Predicate UnsignedPred; |
1035 | |
1036 | Value *A, *B; |
1037 | if (match(V: UnsignedICmp, |
1038 | P: m_c_ICmp(Pred&: UnsignedPred, L: m_Specific(V: ZeroCmpOp), R: m_Value(V&: A))) && |
1039 | match(V: ZeroCmpOp, P: m_c_Add(L: m_Specific(V: A), R: m_Value(V&: B))) && |
1040 | (ZeroICmp->hasOneUse() || UnsignedICmp->hasOneUse())) { |
1041 | auto GetKnownNonZeroAndOther = [&](Value *&NonZero, Value *&Other) { |
1042 | if (!isKnownNonZero(V: NonZero, Q)) |
1043 | std::swap(a&: NonZero, b&: Other); |
1044 | return isKnownNonZero(V: NonZero, Q); |
1045 | }; |
1046 | |
1047 | // Given ZeroCmpOp = (A + B) |
1048 | // ZeroCmpOp < A && ZeroCmpOp != 0 --> (0-X) < Y iff |
1049 | // ZeroCmpOp >= A || ZeroCmpOp == 0 --> (0-X) >= Y iff |
1050 | // with X being the value (A/B) that is known to be non-zero, |
1051 | // and Y being remaining value. |
1052 | if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_NE && |
1053 | IsAnd && GetKnownNonZeroAndOther(B, A)) |
1054 | return Builder.CreateICmpULT(LHS: Builder.CreateNeg(V: B), RHS: A); |
1055 | if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_EQ && |
1056 | !IsAnd && GetKnownNonZeroAndOther(B, A)) |
1057 | return Builder.CreateICmpUGE(LHS: Builder.CreateNeg(V: B), RHS: A); |
1058 | } |
1059 | |
1060 | return nullptr; |
1061 | } |
1062 | |
1063 | struct IntPart { |
1064 | Value *From; |
1065 | unsigned StartBit; |
1066 | unsigned NumBits; |
1067 | }; |
1068 | |
1069 | /// Match an extraction of bits from an integer. |
1070 | static std::optional<IntPart> matchIntPart(Value *V) { |
1071 | Value *X; |
1072 | if (!match(V, P: m_OneUse(SubPattern: m_Trunc(Op: m_Value(V&: X))))) |
1073 | return std::nullopt; |
1074 | |
1075 | unsigned NumOriginalBits = X->getType()->getScalarSizeInBits(); |
1076 | unsigned = V->getType()->getScalarSizeInBits(); |
1077 | Value *Y; |
1078 | const APInt *Shift; |
1079 | // For a trunc(lshr Y, Shift) pattern, make sure we're only extracting bits |
1080 | // from Y, not any shifted-in zeroes. |
1081 | if (match(V: X, P: m_OneUse(SubPattern: m_LShr(L: m_Value(V&: Y), R: m_APInt(Res&: Shift)))) && |
1082 | Shift->ule(RHS: NumOriginalBits - NumExtractedBits)) |
1083 | return {{.From: Y, .StartBit: (unsigned)Shift->getZExtValue(), .NumBits: NumExtractedBits}}; |
1084 | return {{.From: X, .StartBit: 0, .NumBits: NumExtractedBits}}; |
1085 | } |
1086 | |
1087 | /// Materialize an extraction of bits from an integer in IR. |
1088 | static Value *(const IntPart &P, IRBuilderBase &Builder) { |
1089 | Value *V = P.From; |
1090 | if (P.StartBit) |
1091 | V = Builder.CreateLShr(LHS: V, RHS: P.StartBit); |
1092 | Type *TruncTy = V->getType()->getWithNewBitWidth(NewBitWidth: P.NumBits); |
1093 | if (TruncTy != V->getType()) |
1094 | V = Builder.CreateTrunc(V, DestTy: TruncTy); |
1095 | return V; |
1096 | } |
1097 | |
1098 | /// (icmp eq X0, Y0) & (icmp eq X1, Y1) -> icmp eq X01, Y01 |
1099 | /// (icmp ne X0, Y0) | (icmp ne X1, Y1) -> icmp ne X01, Y01 |
1100 | /// where X0, X1 and Y0, Y1 are adjacent parts extracted from an integer. |
1101 | Value *InstCombinerImpl::foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1, |
1102 | bool IsAnd) { |
1103 | if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse()) |
1104 | return nullptr; |
1105 | |
1106 | CmpInst::Predicate Pred = IsAnd ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE; |
1107 | auto GetMatchPart = [&](ICmpInst *Cmp, |
1108 | unsigned OpNo) -> std::optional<IntPart> { |
1109 | if (Pred == Cmp->getPredicate()) |
1110 | return matchIntPart(V: Cmp->getOperand(i_nocapture: OpNo)); |
1111 | |
1112 | const APInt *C; |
1113 | // (icmp eq (lshr x, C), (lshr y, C)) gets optimized to: |
1114 | // (icmp ult (xor x, y), 1 << C) so also look for that. |
1115 | if (Pred == CmpInst::ICMP_EQ && Cmp->getPredicate() == CmpInst::ICMP_ULT) { |
1116 | if (!match(V: Cmp->getOperand(i_nocapture: 1), P: m_Power2(V&: C)) || |
1117 | !match(V: Cmp->getOperand(i_nocapture: 0), P: m_Xor(L: m_Value(), R: m_Value()))) |
1118 | return std::nullopt; |
1119 | } |
1120 | |
1121 | // (icmp ne (lshr x, C), (lshr y, C)) gets optimized to: |
1122 | // (icmp ugt (xor x, y), (1 << C) - 1) so also look for that. |
1123 | else if (Pred == CmpInst::ICMP_NE && |
1124 | Cmp->getPredicate() == CmpInst::ICMP_UGT) { |
1125 | if (!match(V: Cmp->getOperand(i_nocapture: 1), P: m_LowBitMask(V&: C)) || |
1126 | !match(V: Cmp->getOperand(i_nocapture: 0), P: m_Xor(L: m_Value(), R: m_Value()))) |
1127 | return std::nullopt; |
1128 | } else { |
1129 | return std::nullopt; |
1130 | } |
1131 | |
1132 | unsigned From = Pred == CmpInst::ICMP_NE ? C->popcount() : C->countr_zero(); |
1133 | Instruction *I = cast<Instruction>(Val: Cmp->getOperand(i_nocapture: 0)); |
1134 | return {{.From: I->getOperand(i: OpNo), .StartBit: From, .NumBits: C->getBitWidth() - From}}; |
1135 | }; |
1136 | |
1137 | std::optional<IntPart> L0 = GetMatchPart(Cmp0, 0); |
1138 | std::optional<IntPart> R0 = GetMatchPart(Cmp0, 1); |
1139 | std::optional<IntPart> L1 = GetMatchPart(Cmp1, 0); |
1140 | std::optional<IntPart> R1 = GetMatchPart(Cmp1, 1); |
1141 | if (!L0 || !R0 || !L1 || !R1) |
1142 | return nullptr; |
1143 | |
1144 | // Make sure the LHS/RHS compare a part of the same value, possibly after |
1145 | // an operand swap. |
1146 | if (L0->From != L1->From || R0->From != R1->From) { |
1147 | if (L0->From != R1->From || R0->From != L1->From) |
1148 | return nullptr; |
1149 | std::swap(lhs&: L1, rhs&: R1); |
1150 | } |
1151 | |
1152 | // Make sure the extracted parts are adjacent, canonicalizing to L0/R0 being |
1153 | // the low part and L1/R1 being the high part. |
1154 | if (L0->StartBit + L0->NumBits != L1->StartBit || |
1155 | R0->StartBit + R0->NumBits != R1->StartBit) { |
1156 | if (L1->StartBit + L1->NumBits != L0->StartBit || |
1157 | R1->StartBit + R1->NumBits != R0->StartBit) |
1158 | return nullptr; |
1159 | std::swap(lhs&: L0, rhs&: L1); |
1160 | std::swap(lhs&: R0, rhs&: R1); |
1161 | } |
1162 | |
1163 | // We can simplify to a comparison of these larger parts of the integers. |
1164 | IntPart L = {.From: L0->From, .StartBit: L0->StartBit, .NumBits: L0->NumBits + L1->NumBits}; |
1165 | IntPart R = {.From: R0->From, .StartBit: R0->StartBit, .NumBits: R0->NumBits + R1->NumBits}; |
1166 | Value *LValue = extractIntPart(P: L, Builder); |
1167 | Value *RValue = extractIntPart(P: R, Builder); |
1168 | return Builder.CreateICmp(P: Pred, LHS: LValue, RHS: RValue); |
1169 | } |
1170 | |
1171 | /// Reduce logic-of-compares with equality to a constant by substituting a |
1172 | /// common operand with the constant. Callers are expected to call this with |
1173 | /// Cmp0/Cmp1 switched to handle logic op commutativity. |
1174 | static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1, |
1175 | bool IsAnd, bool IsLogical, |
1176 | InstCombiner::BuilderTy &Builder, |
1177 | const SimplifyQuery &Q) { |
1178 | // Match an equality compare with a non-poison constant as Cmp0. |
1179 | // Also, give up if the compare can be constant-folded to avoid looping. |
1180 | ICmpInst::Predicate Pred0; |
1181 | Value *X; |
1182 | Constant *C; |
1183 | if (!match(V: Cmp0, P: m_ICmp(Pred&: Pred0, L: m_Value(V&: X), R: m_Constant(C))) || |
1184 | !isGuaranteedNotToBeUndefOrPoison(V: C) || isa<Constant>(Val: X)) |
1185 | return nullptr; |
1186 | if ((IsAnd && Pred0 != ICmpInst::ICMP_EQ) || |
1187 | (!IsAnd && Pred0 != ICmpInst::ICMP_NE)) |
1188 | return nullptr; |
1189 | |
1190 | // The other compare must include a common operand (X). Canonicalize the |
1191 | // common operand as operand 1 (Pred1 is swapped if the common operand was |
1192 | // operand 0). |
1193 | Value *Y; |
1194 | ICmpInst::Predicate Pred1; |
1195 | if (!match(V: Cmp1, P: m_c_ICmp(Pred&: Pred1, L: m_Value(V&: Y), R: m_Deferred(V: X)))) |
1196 | return nullptr; |
1197 | |
1198 | // Replace variable with constant value equivalence to remove a variable use: |
1199 | // (X == C) && (Y Pred1 X) --> (X == C) && (Y Pred1 C) |
1200 | // (X != C) || (Y Pred1 X) --> (X != C) || (Y Pred1 C) |
1201 | // Can think of the 'or' substitution with the 'and' bool equivalent: |
1202 | // A || B --> A || (!A && B) |
1203 | Value *SubstituteCmp = simplifyICmpInst(Predicate: Pred1, LHS: Y, RHS: C, Q); |
1204 | if (!SubstituteCmp) { |
1205 | // If we need to create a new instruction, require that the old compare can |
1206 | // be removed. |
1207 | if (!Cmp1->hasOneUse()) |
1208 | return nullptr; |
1209 | SubstituteCmp = Builder.CreateICmp(P: Pred1, LHS: Y, RHS: C); |
1210 | } |
1211 | if (IsLogical) |
1212 | return IsAnd ? Builder.CreateLogicalAnd(Cond1: Cmp0, Cond2: SubstituteCmp) |
1213 | : Builder.CreateLogicalOr(Cond1: Cmp0, Cond2: SubstituteCmp); |
1214 | return Builder.CreateBinOp(Opc: IsAnd ? Instruction::And : Instruction::Or, LHS: Cmp0, |
1215 | RHS: SubstituteCmp); |
1216 | } |
1217 | |
1218 | /// Fold (icmp Pred1 V1, C1) & (icmp Pred2 V2, C2) |
1219 | /// or (icmp Pred1 V1, C1) | (icmp Pred2 V2, C2) |
1220 | /// into a single comparison using range-based reasoning. |
1221 | /// NOTE: This is also used for logical and/or, must be poison-safe! |
1222 | Value *InstCombinerImpl::foldAndOrOfICmpsUsingRanges(ICmpInst *ICmp1, |
1223 | ICmpInst *ICmp2, |
1224 | bool IsAnd) { |
1225 | ICmpInst::Predicate Pred1, Pred2; |
1226 | Value *V1, *V2; |
1227 | const APInt *C1, *C2; |
1228 | if (!match(V: ICmp1, P: m_ICmp(Pred&: Pred1, L: m_Value(V&: V1), R: m_APInt(Res&: C1))) || |
1229 | !match(V: ICmp2, P: m_ICmp(Pred&: Pred2, L: m_Value(V&: V2), R: m_APInt(Res&: C2)))) |
1230 | return nullptr; |
1231 | |
1232 | // Look through add of a constant offset on V1, V2, or both operands. This |
1233 | // allows us to interpret the V + C' < C'' range idiom into a proper range. |
1234 | const APInt *Offset1 = nullptr, *Offset2 = nullptr; |
1235 | if (V1 != V2) { |
1236 | Value *X; |
1237 | if (match(V: V1, P: m_Add(L: m_Value(V&: X), R: m_APInt(Res&: Offset1)))) |
1238 | V1 = X; |
1239 | if (match(V: V2, P: m_Add(L: m_Value(V&: X), R: m_APInt(Res&: Offset2)))) |
1240 | V2 = X; |
1241 | } |
1242 | |
1243 | if (V1 != V2) |
1244 | return nullptr; |
1245 | |
1246 | ConstantRange CR1 = ConstantRange::makeExactICmpRegion( |
1247 | Pred: IsAnd ? ICmpInst::getInversePredicate(pred: Pred1) : Pred1, Other: *C1); |
1248 | if (Offset1) |
1249 | CR1 = CR1.subtract(CI: *Offset1); |
1250 | |
1251 | ConstantRange CR2 = ConstantRange::makeExactICmpRegion( |
1252 | Pred: IsAnd ? ICmpInst::getInversePredicate(pred: Pred2) : Pred2, Other: *C2); |
1253 | if (Offset2) |
1254 | CR2 = CR2.subtract(CI: *Offset2); |
1255 | |
1256 | Type *Ty = V1->getType(); |
1257 | Value *NewV = V1; |
1258 | std::optional<ConstantRange> CR = CR1.exactUnionWith(CR: CR2); |
1259 | if (!CR) { |
1260 | if (!(ICmp1->hasOneUse() && ICmp2->hasOneUse()) || CR1.isWrappedSet() || |
1261 | CR2.isWrappedSet()) |
1262 | return nullptr; |
1263 | |
1264 | // Check whether we have equal-size ranges that only differ by one bit. |
1265 | // In that case we can apply a mask to map one range onto the other. |
1266 | APInt LowerDiff = CR1.getLower() ^ CR2.getLower(); |
1267 | APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1); |
1268 | APInt CR1Size = CR1.getUpper() - CR1.getLower(); |
1269 | if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff || |
1270 | CR1Size != CR2.getUpper() - CR2.getLower()) |
1271 | return nullptr; |
1272 | |
1273 | CR = CR1.getLower().ult(RHS: CR2.getLower()) ? CR1 : CR2; |
1274 | NewV = Builder.CreateAnd(LHS: NewV, RHS: ConstantInt::get(Ty, V: ~LowerDiff)); |
1275 | } |
1276 | |
1277 | if (IsAnd) |
1278 | CR = CR->inverse(); |
1279 | |
1280 | CmpInst::Predicate NewPred; |
1281 | APInt NewC, Offset; |
1282 | CR->getEquivalentICmp(Pred&: NewPred, RHS&: NewC, Offset); |
1283 | |
1284 | if (Offset != 0) |
1285 | NewV = Builder.CreateAdd(LHS: NewV, RHS: ConstantInt::get(Ty, V: Offset)); |
1286 | return Builder.CreateICmp(P: NewPred, LHS: NewV, RHS: ConstantInt::get(Ty, V: NewC)); |
1287 | } |
1288 | |
1289 | /// Ignore all operations which only change the sign of a value, returning the |
1290 | /// underlying magnitude value. |
1291 | static Value *stripSignOnlyFPOps(Value *Val) { |
1292 | match(V: Val, P: m_FNeg(X: m_Value(V&: Val))); |
1293 | match(V: Val, P: m_FAbs(Op0: m_Value(V&: Val))); |
1294 | match(V: Val, P: m_CopySign(Op0: m_Value(V&: Val), Op1: m_Value())); |
1295 | return Val; |
1296 | } |
1297 | |
1298 | /// Matches canonical form of isnan, fcmp ord x, 0 |
1299 | static bool matchIsNotNaN(FCmpInst::Predicate P, Value *LHS, Value *RHS) { |
1300 | return P == FCmpInst::FCMP_ORD && match(V: RHS, P: m_AnyZeroFP()); |
1301 | } |
1302 | |
1303 | /// Matches fcmp u__ x, +/-inf |
1304 | static bool matchUnorderedInfCompare(FCmpInst::Predicate P, Value *LHS, |
1305 | Value *RHS) { |
1306 | return FCmpInst::isUnordered(predicate: P) && match(V: RHS, P: m_Inf()); |
1307 | } |
1308 | |
1309 | /// and (fcmp ord x, 0), (fcmp u* x, inf) -> fcmp o* x, inf |
1310 | /// |
1311 | /// Clang emits this pattern for doing an isfinite check in __builtin_isnormal. |
1312 | static Value *matchIsFiniteTest(InstCombiner::BuilderTy &Builder, FCmpInst *LHS, |
1313 | FCmpInst *RHS) { |
1314 | Value *LHS0 = LHS->getOperand(i_nocapture: 0), *LHS1 = LHS->getOperand(i_nocapture: 1); |
1315 | Value *RHS0 = RHS->getOperand(i_nocapture: 0), *RHS1 = RHS->getOperand(i_nocapture: 1); |
1316 | FCmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); |
1317 | |
1318 | if (!matchIsNotNaN(P: PredL, LHS: LHS0, RHS: LHS1) || |
1319 | !matchUnorderedInfCompare(P: PredR, LHS: RHS0, RHS: RHS1)) |
1320 | return nullptr; |
1321 | |
1322 | IRBuilder<>::FastMathFlagGuard FMFG(Builder); |
1323 | FastMathFlags FMF = LHS->getFastMathFlags(); |
1324 | FMF &= RHS->getFastMathFlags(); |
1325 | Builder.setFastMathFlags(FMF); |
1326 | |
1327 | return Builder.CreateFCmp(P: FCmpInst::getOrderedPredicate(Pred: PredR), LHS: RHS0, RHS: RHS1); |
1328 | } |
1329 | |
1330 | Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS, |
1331 | bool IsAnd, bool IsLogicalSelect) { |
1332 | Value *LHS0 = LHS->getOperand(i_nocapture: 0), *LHS1 = LHS->getOperand(i_nocapture: 1); |
1333 | Value *RHS0 = RHS->getOperand(i_nocapture: 0), *RHS1 = RHS->getOperand(i_nocapture: 1); |
1334 | FCmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); |
1335 | |
1336 | if (LHS0 == RHS1 && RHS0 == LHS1) { |
1337 | // Swap RHS operands to match LHS. |
1338 | PredR = FCmpInst::getSwappedPredicate(pred: PredR); |
1339 | std::swap(a&: RHS0, b&: RHS1); |
1340 | } |
1341 | |
1342 | // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y). |
1343 | // Suppose the relation between x and y is R, where R is one of |
1344 | // U(1000), L(0100), G(0010) or E(0001), and CC0 and CC1 are the bitmasks for |
1345 | // testing the desired relations. |
1346 | // |
1347 | // Since (R & CC0) and (R & CC1) are either R or 0, we actually have this: |
1348 | // bool(R & CC0) && bool(R & CC1) |
1349 | // = bool((R & CC0) & (R & CC1)) |
1350 | // = bool(R & (CC0 & CC1)) <= by re-association, commutation, and idempotency |
1351 | // |
1352 | // Since (R & CC0) and (R & CC1) are either R or 0, we actually have this: |
1353 | // bool(R & CC0) || bool(R & CC1) |
1354 | // = bool((R & CC0) | (R & CC1)) |
1355 | // = bool(R & (CC0 | CC1)) <= by reversed distribution (contribution? ;) |
1356 | if (LHS0 == RHS0 && LHS1 == RHS1) { |
1357 | unsigned FCmpCodeL = getFCmpCode(CC: PredL); |
1358 | unsigned FCmpCodeR = getFCmpCode(CC: PredR); |
1359 | unsigned NewPred = IsAnd ? FCmpCodeL & FCmpCodeR : FCmpCodeL | FCmpCodeR; |
1360 | |
1361 | // Intersect the fast math flags. |
1362 | // TODO: We can union the fast math flags unless this is a logical select. |
1363 | IRBuilder<>::FastMathFlagGuard FMFG(Builder); |
1364 | FastMathFlags FMF = LHS->getFastMathFlags(); |
1365 | FMF &= RHS->getFastMathFlags(); |
1366 | Builder.setFastMathFlags(FMF); |
1367 | |
1368 | return getFCmpValue(Code: NewPred, LHS: LHS0, RHS: LHS1, Builder); |
1369 | } |
1370 | |
1371 | // This transform is not valid for a logical select. |
1372 | if (!IsLogicalSelect && |
1373 | ((PredL == FCmpInst::FCMP_ORD && PredR == FCmpInst::FCMP_ORD && IsAnd) || |
1374 | (PredL == FCmpInst::FCMP_UNO && PredR == FCmpInst::FCMP_UNO && |
1375 | !IsAnd))) { |
1376 | if (LHS0->getType() != RHS0->getType()) |
1377 | return nullptr; |
1378 | |
1379 | // FCmp canonicalization ensures that (fcmp ord/uno X, X) and |
1380 | // (fcmp ord/uno X, C) will be transformed to (fcmp X, +0.0). |
1381 | if (match(V: LHS1, P: m_PosZeroFP()) && match(V: RHS1, P: m_PosZeroFP())) |
1382 | // Ignore the constants because they are obviously not NANs: |
1383 | // (fcmp ord x, 0.0) & (fcmp ord y, 0.0) -> (fcmp ord x, y) |
1384 | // (fcmp uno x, 0.0) | (fcmp uno y, 0.0) -> (fcmp uno x, y) |
1385 | return Builder.CreateFCmp(P: PredL, LHS: LHS0, RHS: RHS0); |
1386 | } |
1387 | |
1388 | if (IsAnd && stripSignOnlyFPOps(Val: LHS0) == stripSignOnlyFPOps(Val: RHS0)) { |
1389 | // and (fcmp ord x, 0), (fcmp u* x, inf) -> fcmp o* x, inf |
1390 | // and (fcmp ord x, 0), (fcmp u* fabs(x), inf) -> fcmp o* x, inf |
1391 | if (Value *Left = matchIsFiniteTest(Builder, LHS, RHS)) |
1392 | return Left; |
1393 | if (Value *Right = matchIsFiniteTest(Builder, LHS: RHS, RHS: LHS)) |
1394 | return Right; |
1395 | } |
1396 | |
1397 | // Turn at least two fcmps with constants into llvm.is.fpclass. |
1398 | // |
1399 | // If we can represent a combined value test with one class call, we can |
1400 | // potentially eliminate 4-6 instructions. If we can represent a test with a |
1401 | // single fcmp with fneg and fabs, that's likely a better canonical form. |
1402 | if (LHS->hasOneUse() && RHS->hasOneUse()) { |
1403 | auto [ClassValRHS, ClassMaskRHS] = |
1404 | fcmpToClassTest(Pred: PredR, F: *RHS->getFunction(), LHS: RHS0, RHS: RHS1); |
1405 | if (ClassValRHS) { |
1406 | auto [ClassValLHS, ClassMaskLHS] = |
1407 | fcmpToClassTest(Pred: PredL, F: *LHS->getFunction(), LHS: LHS0, RHS: LHS1); |
1408 | if (ClassValLHS == ClassValRHS) { |
1409 | unsigned CombinedMask = IsAnd ? (ClassMaskLHS & ClassMaskRHS) |
1410 | : (ClassMaskLHS | ClassMaskRHS); |
1411 | return Builder.CreateIntrinsic( |
1412 | Intrinsic::is_fpclass, {ClassValLHS->getType()}, |
1413 | {ClassValLHS, Builder.getInt32(C: CombinedMask)}); |
1414 | } |
1415 | } |
1416 | } |
1417 | |
1418 | // Canonicalize the range check idiom: |
1419 | // and (fcmp olt/ole/ult/ule x, C), (fcmp ogt/oge/ugt/uge x, -C) |
1420 | // --> fabs(x) olt/ole/ult/ule C |
1421 | // or (fcmp ogt/oge/ugt/uge x, C), (fcmp olt/ole/ult/ule x, -C) |
1422 | // --> fabs(x) ogt/oge/ugt/uge C |
1423 | // TODO: Generalize to handle a negated variable operand? |
1424 | const APFloat *LHSC, *RHSC; |
1425 | if (LHS0 == RHS0 && LHS->hasOneUse() && RHS->hasOneUse() && |
1426 | FCmpInst::getSwappedPredicate(pred: PredL) == PredR && |
1427 | match(V: LHS1, P: m_APFloatAllowPoison(Res&: LHSC)) && |
1428 | match(V: RHS1, P: m_APFloatAllowPoison(Res&: RHSC)) && |
1429 | LHSC->bitwiseIsEqual(RHS: neg(X: *RHSC))) { |
1430 | auto IsLessThanOrLessEqual = [](FCmpInst::Predicate Pred) { |
1431 | switch (Pred) { |
1432 | case FCmpInst::FCMP_OLT: |
1433 | case FCmpInst::FCMP_OLE: |
1434 | case FCmpInst::FCMP_ULT: |
1435 | case FCmpInst::FCMP_ULE: |
1436 | return true; |
1437 | default: |
1438 | return false; |
1439 | } |
1440 | }; |
1441 | if (IsLessThanOrLessEqual(IsAnd ? PredR : PredL)) { |
1442 | std::swap(a&: LHSC, b&: RHSC); |
1443 | std::swap(a&: PredL, b&: PredR); |
1444 | } |
1445 | if (IsLessThanOrLessEqual(IsAnd ? PredL : PredR)) { |
1446 | BuilderTy::FastMathFlagGuard Guard(Builder); |
1447 | Builder.setFastMathFlags(LHS->getFastMathFlags() | |
1448 | RHS->getFastMathFlags()); |
1449 | |
1450 | Value *FAbs = Builder.CreateUnaryIntrinsic(Intrinsic::ID: fabs, V: LHS0); |
1451 | return Builder.CreateFCmp(P: PredL, LHS: FAbs, |
1452 | RHS: ConstantFP::get(Ty: LHS0->getType(), V: *LHSC)); |
1453 | } |
1454 | } |
1455 | |
1456 | return nullptr; |
1457 | } |
1458 | |
1459 | /// Match an fcmp against a special value that performs a test possible by |
1460 | /// llvm.is.fpclass. |
1461 | static bool matchIsFPClassLikeFCmp(Value *Op, Value *&ClassVal, |
1462 | uint64_t &ClassMask) { |
1463 | auto *FCmp = dyn_cast<FCmpInst>(Val: Op); |
1464 | if (!FCmp || !FCmp->hasOneUse()) |
1465 | return false; |
1466 | |
1467 | std::tie(args&: ClassVal, args&: ClassMask) = |
1468 | fcmpToClassTest(Pred: FCmp->getPredicate(), F: *FCmp->getParent()->getParent(), |
1469 | LHS: FCmp->getOperand(i_nocapture: 0), RHS: FCmp->getOperand(i_nocapture: 1)); |
1470 | return ClassVal != nullptr; |
1471 | } |
1472 | |
1473 | /// or (is_fpclass x, mask0), (is_fpclass x, mask1) |
1474 | /// -> is_fpclass x, (mask0 | mask1) |
1475 | /// and (is_fpclass x, mask0), (is_fpclass x, mask1) |
1476 | /// -> is_fpclass x, (mask0 & mask1) |
1477 | /// xor (is_fpclass x, mask0), (is_fpclass x, mask1) |
1478 | /// -> is_fpclass x, (mask0 ^ mask1) |
1479 | Instruction *InstCombinerImpl::foldLogicOfIsFPClass(BinaryOperator &BO, |
1480 | Value *Op0, Value *Op1) { |
1481 | Value *ClassVal0 = nullptr; |
1482 | Value *ClassVal1 = nullptr; |
1483 | uint64_t ClassMask0, ClassMask1; |
1484 | |
1485 | // Restrict to folding one fcmp into one is.fpclass for now, don't introduce a |
1486 | // new class. |
1487 | // |
1488 | // TODO: Support forming is.fpclass out of 2 separate fcmps when codegen is |
1489 | // better. |
1490 | |
1491 | bool IsLHSClass = |
1492 | match(Op0, m_OneUse(m_Intrinsic<Intrinsic::is_fpclass>( |
1493 | m_Value(V&: ClassVal0), m_ConstantInt(V&: ClassMask0)))); |
1494 | bool IsRHSClass = |
1495 | match(Op1, m_OneUse(m_Intrinsic<Intrinsic::is_fpclass>( |
1496 | m_Value(V&: ClassVal1), m_ConstantInt(V&: ClassMask1)))); |
1497 | if ((((IsLHSClass || matchIsFPClassLikeFCmp(Op: Op0, ClassVal&: ClassVal0, ClassMask&: ClassMask0)) && |
1498 | (IsRHSClass || matchIsFPClassLikeFCmp(Op: Op1, ClassVal&: ClassVal1, ClassMask&: ClassMask1)))) && |
1499 | ClassVal0 == ClassVal1) { |
1500 | unsigned NewClassMask; |
1501 | switch (BO.getOpcode()) { |
1502 | case Instruction::And: |
1503 | NewClassMask = ClassMask0 & ClassMask1; |
1504 | break; |
1505 | case Instruction::Or: |
1506 | NewClassMask = ClassMask0 | ClassMask1; |
1507 | break; |
1508 | case Instruction::Xor: |
1509 | NewClassMask = ClassMask0 ^ ClassMask1; |
1510 | break; |
1511 | default: |
1512 | llvm_unreachable("not a binary logic operator" ); |
1513 | } |
1514 | |
1515 | if (IsLHSClass) { |
1516 | auto *II = cast<IntrinsicInst>(Val: Op0); |
1517 | II->setArgOperand( |
1518 | i: 1, v: ConstantInt::get(Ty: II->getArgOperand(i: 1)->getType(), V: NewClassMask)); |
1519 | return replaceInstUsesWith(I&: BO, V: II); |
1520 | } |
1521 | |
1522 | if (IsRHSClass) { |
1523 | auto *II = cast<IntrinsicInst>(Val: Op1); |
1524 | II->setArgOperand( |
1525 | i: 1, v: ConstantInt::get(Ty: II->getArgOperand(i: 1)->getType(), V: NewClassMask)); |
1526 | return replaceInstUsesWith(I&: BO, V: II); |
1527 | } |
1528 | |
1529 | CallInst *NewClass = |
1530 | Builder.CreateIntrinsic(Intrinsic::is_fpclass, {ClassVal0->getType()}, |
1531 | {ClassVal0, Builder.getInt32(C: NewClassMask)}); |
1532 | return replaceInstUsesWith(I&: BO, V: NewClass); |
1533 | } |
1534 | |
1535 | return nullptr; |
1536 | } |
1537 | |
1538 | /// Look for the pattern that conditionally negates a value via math operations: |
1539 | /// cond.splat = sext i1 cond |
1540 | /// sub = add cond.splat, x |
1541 | /// xor = xor sub, cond.splat |
1542 | /// and rewrite it to do the same, but via logical operations: |
1543 | /// value.neg = sub 0, value |
1544 | /// cond = select i1 neg, value.neg, value |
1545 | Instruction *InstCombinerImpl::canonicalizeConditionalNegationViaMathToSelect( |
1546 | BinaryOperator &I) { |
1547 | assert(I.getOpcode() == BinaryOperator::Xor && "Only for xor!" ); |
1548 | Value *Cond, *X; |
1549 | // As per complexity ordering, `xor` is not commutative here. |
1550 | if (!match(V: &I, P: m_c_BinOp(L: m_OneUse(SubPattern: m_Value()), R: m_Value())) || |
1551 | !match(V: I.getOperand(i_nocapture: 1), P: m_SExt(Op: m_Value(V&: Cond))) || |
1552 | !Cond->getType()->isIntOrIntVectorTy(BitWidth: 1) || |
1553 | !match(V: I.getOperand(i_nocapture: 0), P: m_c_Add(L: m_SExt(Op: m_Deferred(V: Cond)), R: m_Value(V&: X)))) |
1554 | return nullptr; |
1555 | return SelectInst::Create(C: Cond, S1: Builder.CreateNeg(V: X, Name: X->getName() + ".neg" ), |
1556 | S2: X); |
1557 | } |
1558 | |
1559 | /// This a limited reassociation for a special case (see above) where we are |
1560 | /// checking if two values are either both NAN (unordered) or not-NAN (ordered). |
1561 | /// This could be handled more generally in '-reassociation', but it seems like |
1562 | /// an unlikely pattern for a large number of logic ops and fcmps. |
1563 | static Instruction *reassociateFCmps(BinaryOperator &BO, |
1564 | InstCombiner::BuilderTy &Builder) { |
1565 | Instruction::BinaryOps Opcode = BO.getOpcode(); |
1566 | assert((Opcode == Instruction::And || Opcode == Instruction::Or) && |
1567 | "Expecting and/or op for fcmp transform" ); |
1568 | |
1569 | // There are 4 commuted variants of the pattern. Canonicalize operands of this |
1570 | // logic op so an fcmp is operand 0 and a matching logic op is operand 1. |
1571 | Value *Op0 = BO.getOperand(i_nocapture: 0), *Op1 = BO.getOperand(i_nocapture: 1), *X; |
1572 | FCmpInst::Predicate Pred; |
1573 | if (match(V: Op1, P: m_FCmp(Pred, L: m_Value(), R: m_AnyZeroFP()))) |
1574 | std::swap(a&: Op0, b&: Op1); |
1575 | |
1576 | // Match inner binop and the predicate for combining 2 NAN checks into 1. |
1577 | Value *BO10, *BO11; |
1578 | FCmpInst::Predicate NanPred = Opcode == Instruction::And ? FCmpInst::FCMP_ORD |
1579 | : FCmpInst::FCMP_UNO; |
1580 | if (!match(V: Op0, P: m_FCmp(Pred, L: m_Value(V&: X), R: m_AnyZeroFP())) || Pred != NanPred || |
1581 | !match(V: Op1, P: m_BinOp(Opcode, L: m_Value(V&: BO10), R: m_Value(V&: BO11)))) |
1582 | return nullptr; |
1583 | |
1584 | // The inner logic op must have a matching fcmp operand. |
1585 | Value *Y; |
1586 | if (!match(V: BO10, P: m_FCmp(Pred, L: m_Value(V&: Y), R: m_AnyZeroFP())) || |
1587 | Pred != NanPred || X->getType() != Y->getType()) |
1588 | std::swap(a&: BO10, b&: BO11); |
1589 | |
1590 | if (!match(V: BO10, P: m_FCmp(Pred, L: m_Value(V&: Y), R: m_AnyZeroFP())) || |
1591 | Pred != NanPred || X->getType() != Y->getType()) |
1592 | return nullptr; |
1593 | |
1594 | // and (fcmp ord X, 0), (and (fcmp ord Y, 0), Z) --> and (fcmp ord X, Y), Z |
1595 | // or (fcmp uno X, 0), (or (fcmp uno Y, 0), Z) --> or (fcmp uno X, Y), Z |
1596 | Value *NewFCmp = Builder.CreateFCmp(P: Pred, LHS: X, RHS: Y); |
1597 | if (auto *NewFCmpInst = dyn_cast<FCmpInst>(Val: NewFCmp)) { |
1598 | // Intersect FMF from the 2 source fcmps. |
1599 | NewFCmpInst->copyIRFlags(V: Op0); |
1600 | NewFCmpInst->andIRFlags(V: BO10); |
1601 | } |
1602 | return BinaryOperator::Create(Op: Opcode, S1: NewFCmp, S2: BO11); |
1603 | } |
1604 | |
1605 | /// Match variations of De Morgan's Laws: |
1606 | /// (~A & ~B) == (~(A | B)) |
1607 | /// (~A | ~B) == (~(A & B)) |
1608 | static Instruction *matchDeMorgansLaws(BinaryOperator &I, |
1609 | InstCombiner &IC) { |
1610 | const Instruction::BinaryOps Opcode = I.getOpcode(); |
1611 | assert((Opcode == Instruction::And || Opcode == Instruction::Or) && |
1612 | "Trying to match De Morgan's Laws with something other than and/or" ); |
1613 | |
1614 | // Flip the logic operation. |
1615 | const Instruction::BinaryOps FlippedOpcode = |
1616 | (Opcode == Instruction::And) ? Instruction::Or : Instruction::And; |
1617 | |
1618 | Value *Op0 = I.getOperand(i_nocapture: 0), *Op1 = I.getOperand(i_nocapture: 1); |
1619 | Value *A, *B; |
1620 | if (match(V: Op0, P: m_OneUse(SubPattern: m_Not(V: m_Value(V&: A)))) && |
1621 | match(V: Op1, P: m_OneUse(SubPattern: m_Not(V: m_Value(V&: B)))) && |
1622 | !IC.isFreeToInvert(V: A, WillInvertAllUses: A->hasOneUse()) && |
1623 | !IC.isFreeToInvert(V: B, WillInvertAllUses: B->hasOneUse())) { |
1624 | Value *AndOr = |
1625 | IC.Builder.CreateBinOp(Opc: FlippedOpcode, LHS: A, RHS: B, Name: I.getName() + ".demorgan" ); |
1626 | return BinaryOperator::CreateNot(Op: AndOr); |
1627 | } |
1628 | |
1629 | // The 'not' ops may require reassociation. |
1630 | // (A & ~B) & ~C --> A & ~(B | C) |
1631 | // (~B & A) & ~C --> A & ~(B | C) |
1632 | // (A | ~B) | ~C --> A | ~(B & C) |
1633 | // (~B | A) | ~C --> A | ~(B & C) |
1634 | Value *C; |
1635 | if (match(V: Op0, P: m_OneUse(SubPattern: m_c_BinOp(Opcode, L: m_Value(V&: A), R: m_Not(V: m_Value(V&: B))))) && |
1636 | match(V: Op1, P: m_Not(V: m_Value(V&: C)))) { |
1637 | Value *FlippedBO = IC.Builder.CreateBinOp(Opc: FlippedOpcode, LHS: B, RHS: C); |
1638 | return BinaryOperator::Create(Op: Opcode, S1: A, S2: IC.Builder.CreateNot(V: FlippedBO)); |
1639 | } |
1640 | |
1641 | return nullptr; |
1642 | } |
1643 | |
1644 | bool InstCombinerImpl::shouldOptimizeCast(CastInst *CI) { |
1645 | Value *CastSrc = CI->getOperand(i_nocapture: 0); |
1646 | |
1647 | // Noop casts and casts of constants should be eliminated trivially. |
1648 | if (CI->getSrcTy() == CI->getDestTy() || isa<Constant>(Val: CastSrc)) |
1649 | return false; |
1650 | |
1651 | // If this cast is paired with another cast that can be eliminated, we prefer |
1652 | // to have it eliminated. |
1653 | if (const auto *PrecedingCI = dyn_cast<CastInst>(Val: CastSrc)) |
1654 | if (isEliminableCastPair(CI1: PrecedingCI, CI2: CI)) |
1655 | return false; |
1656 | |
1657 | return true; |
1658 | } |
1659 | |
1660 | /// Fold {and,or,xor} (cast X), C. |
1661 | static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast, |
1662 | InstCombinerImpl &IC) { |
1663 | Constant *C = dyn_cast<Constant>(Val: Logic.getOperand(i_nocapture: 1)); |
1664 | if (!C) |
1665 | return nullptr; |
1666 | |
1667 | auto LogicOpc = Logic.getOpcode(); |
1668 | Type *DestTy = Logic.getType(); |
1669 | Type *SrcTy = Cast->getSrcTy(); |
1670 | |
1671 | // Move the logic operation ahead of a zext or sext if the constant is |
1672 | // unchanged in the smaller source type. Performing the logic in a smaller |
1673 | // type may provide more information to later folds, and the smaller logic |
1674 | // instruction may be cheaper (particularly in the case of vectors). |
1675 | Value *X; |
1676 | if (match(V: Cast, P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X))))) { |
1677 | if (Constant *TruncC = IC.getLosslessUnsignedTrunc(C, TruncTy: SrcTy)) { |
1678 | // LogicOpc (zext X), C --> zext (LogicOpc X, C) |
1679 | Value *NewOp = IC.Builder.CreateBinOp(Opc: LogicOpc, LHS: X, RHS: TruncC); |
1680 | return new ZExtInst(NewOp, DestTy); |
1681 | } |
1682 | } |
1683 | |
1684 | if (match(V: Cast, P: m_OneUse(SubPattern: m_SExtLike(Op: m_Value(V&: X))))) { |
1685 | if (Constant *TruncC = IC.getLosslessSignedTrunc(C, TruncTy: SrcTy)) { |
1686 | // LogicOpc (sext X), C --> sext (LogicOpc X, C) |
1687 | Value *NewOp = IC.Builder.CreateBinOp(Opc: LogicOpc, LHS: X, RHS: TruncC); |
1688 | return new SExtInst(NewOp, DestTy); |
1689 | } |
1690 | } |
1691 | |
1692 | return nullptr; |
1693 | } |
1694 | |
1695 | /// Fold {and,or,xor} (cast X), Y. |
1696 | Instruction *InstCombinerImpl::foldCastedBitwiseLogic(BinaryOperator &I) { |
1697 | auto LogicOpc = I.getOpcode(); |
1698 | assert(I.isBitwiseLogicOp() && "Unexpected opcode for bitwise logic folding" ); |
1699 | |
1700 | Value *Op0 = I.getOperand(i_nocapture: 0), *Op1 = I.getOperand(i_nocapture: 1); |
1701 | |
1702 | // fold bitwise(A >> BW - 1, zext(icmp)) (BW is the scalar bits of the |
1703 | // type of A) |
1704 | // -> bitwise(zext(A < 0), zext(icmp)) |
1705 | // -> zext(bitwise(A < 0, icmp)) |
1706 | auto FoldBitwiseICmpZeroWithICmp = [&](Value *Op0, |
1707 | Value *Op1) -> Instruction * { |
1708 | ICmpInst::Predicate Pred; |
1709 | Value *A; |
1710 | bool IsMatched = |
1711 | match(V: Op0, |
1712 | P: m_OneUse(SubPattern: m_LShr( |
1713 | L: m_Value(V&: A), |
1714 | R: m_SpecificInt(V: Op0->getType()->getScalarSizeInBits() - 1)))) && |
1715 | match(V: Op1, P: m_OneUse(SubPattern: m_ZExt(Op: m_ICmp(Pred, L: m_Value(), R: m_Value())))); |
1716 | |
1717 | if (!IsMatched) |
1718 | return nullptr; |
1719 | |
1720 | auto *ICmpL = |
1721 | Builder.CreateICmpSLT(LHS: A, RHS: Constant::getNullValue(Ty: A->getType())); |
1722 | auto *ICmpR = cast<ZExtInst>(Val: Op1)->getOperand(i_nocapture: 0); |
1723 | auto *BitwiseOp = Builder.CreateBinOp(Opc: LogicOpc, LHS: ICmpL, RHS: ICmpR); |
1724 | |
1725 | return new ZExtInst(BitwiseOp, Op0->getType()); |
1726 | }; |
1727 | |
1728 | if (auto *Ret = FoldBitwiseICmpZeroWithICmp(Op0, Op1)) |
1729 | return Ret; |
1730 | |
1731 | if (auto *Ret = FoldBitwiseICmpZeroWithICmp(Op1, Op0)) |
1732 | return Ret; |
1733 | |
1734 | CastInst *Cast0 = dyn_cast<CastInst>(Val: Op0); |
1735 | if (!Cast0) |
1736 | return nullptr; |
1737 | |
1738 | // This must be a cast from an integer or integer vector source type to allow |
1739 | // transformation of the logic operation to the source type. |
1740 | Type *DestTy = I.getType(); |
1741 | Type *SrcTy = Cast0->getSrcTy(); |
1742 | if (!SrcTy->isIntOrIntVectorTy()) |
1743 | return nullptr; |
1744 | |
1745 | if (Instruction *Ret = foldLogicCastConstant(Logic&: I, Cast: Cast0, IC&: *this)) |
1746 | return Ret; |
1747 | |
1748 | CastInst *Cast1 = dyn_cast<CastInst>(Val: Op1); |
1749 | if (!Cast1) |
1750 | return nullptr; |
1751 | |
1752 | // Both operands of the logic operation are casts. The casts must be the |
1753 | // same kind for reduction. |
1754 | Instruction::CastOps CastOpcode = Cast0->getOpcode(); |
1755 | if (CastOpcode != Cast1->getOpcode()) |
1756 | return nullptr; |
1757 | |
1758 | // If the source types do not match, but the casts are matching extends, we |
1759 | // can still narrow the logic op. |
1760 | if (SrcTy != Cast1->getSrcTy()) { |
1761 | Value *X, *Y; |
1762 | if (match(V: Cast0, P: m_OneUse(SubPattern: m_ZExtOrSExt(Op: m_Value(V&: X)))) && |
1763 | match(V: Cast1, P: m_OneUse(SubPattern: m_ZExtOrSExt(Op: m_Value(V&: Y))))) { |
1764 | // Cast the narrower source to the wider source type. |
1765 | unsigned XNumBits = X->getType()->getScalarSizeInBits(); |
1766 | unsigned YNumBits = Y->getType()->getScalarSizeInBits(); |
1767 | if (XNumBits < YNumBits) |
1768 | X = Builder.CreateCast(Op: CastOpcode, V: X, DestTy: Y->getType()); |
1769 | else |
1770 | Y = Builder.CreateCast(Op: CastOpcode, V: Y, DestTy: X->getType()); |
1771 | // Do the logic op in the intermediate width, then widen more. |
1772 | Value *NarrowLogic = Builder.CreateBinOp(Opc: LogicOpc, LHS: X, RHS: Y); |
1773 | return CastInst::Create(CastOpcode, S: NarrowLogic, Ty: DestTy); |
1774 | } |
1775 | |
1776 | // Give up for other cast opcodes. |
1777 | return nullptr; |
1778 | } |
1779 | |
1780 | Value *Cast0Src = Cast0->getOperand(i_nocapture: 0); |
1781 | Value *Cast1Src = Cast1->getOperand(i_nocapture: 0); |
1782 | |
1783 | // fold logic(cast(A), cast(B)) -> cast(logic(A, B)) |
1784 | if ((Cast0->hasOneUse() || Cast1->hasOneUse()) && |
1785 | shouldOptimizeCast(CI: Cast0) && shouldOptimizeCast(CI: Cast1)) { |
1786 | Value *NewOp = Builder.CreateBinOp(Opc: LogicOpc, LHS: Cast0Src, RHS: Cast1Src, |
1787 | Name: I.getName()); |
1788 | return CastInst::Create(CastOpcode, S: NewOp, Ty: DestTy); |
1789 | } |
1790 | |
1791 | return nullptr; |
1792 | } |
1793 | |
1794 | static Instruction *foldAndToXor(BinaryOperator &I, |
1795 | InstCombiner::BuilderTy &Builder) { |
1796 | assert(I.getOpcode() == Instruction::And); |
1797 | Value *Op0 = I.getOperand(i_nocapture: 0); |
1798 | Value *Op1 = I.getOperand(i_nocapture: 1); |
1799 | Value *A, *B; |
1800 | |
1801 | // Operand complexity canonicalization guarantees that the 'or' is Op0. |
1802 | // (A | B) & ~(A & B) --> A ^ B |
1803 | // (A | B) & ~(B & A) --> A ^ B |
1804 | if (match(V: &I, P: m_BinOp(L: m_Or(L: m_Value(V&: A), R: m_Value(V&: B)), |
1805 | R: m_Not(V: m_c_And(L: m_Deferred(V: A), R: m_Deferred(V: B)))))) |
1806 | return BinaryOperator::CreateXor(V1: A, V2: B); |
1807 | |
1808 | // (A | ~B) & (~A | B) --> ~(A ^ B) |
1809 | // (A | ~B) & (B | ~A) --> ~(A ^ B) |
1810 | // (~B | A) & (~A | B) --> ~(A ^ B) |
1811 | // (~B | A) & (B | ~A) --> ~(A ^ B) |
1812 | if (Op0->hasOneUse() || Op1->hasOneUse()) |
1813 | if (match(V: &I, P: m_BinOp(L: m_c_Or(L: m_Value(V&: A), R: m_Not(V: m_Value(V&: B))), |
1814 | R: m_c_Or(L: m_Not(V: m_Deferred(V: A)), R: m_Deferred(V: B))))) |
1815 | return BinaryOperator::CreateNot(Op: Builder.CreateXor(LHS: A, RHS: B)); |
1816 | |
1817 | return nullptr; |
1818 | } |
1819 | |
1820 | static Instruction *foldOrToXor(BinaryOperator &I, |
1821 | InstCombiner::BuilderTy &Builder) { |
1822 | assert(I.getOpcode() == Instruction::Or); |
1823 | Value *Op0 = I.getOperand(i_nocapture: 0); |
1824 | Value *Op1 = I.getOperand(i_nocapture: 1); |
1825 | Value *A, *B; |
1826 | |
1827 | // Operand complexity canonicalization guarantees that the 'and' is Op0. |
1828 | // (A & B) | ~(A | B) --> ~(A ^ B) |
1829 | // (A & B) | ~(B | A) --> ~(A ^ B) |
1830 | if (Op0->hasOneUse() || Op1->hasOneUse()) |
1831 | if (match(V: Op0, P: m_And(L: m_Value(V&: A), R: m_Value(V&: B))) && |
1832 | match(V: Op1, P: m_Not(V: m_c_Or(L: m_Specific(V: A), R: m_Specific(V: B))))) |
1833 | return BinaryOperator::CreateNot(Op: Builder.CreateXor(LHS: A, RHS: B)); |
1834 | |
1835 | // Operand complexity canonicalization guarantees that the 'xor' is Op0. |
1836 | // (A ^ B) | ~(A | B) --> ~(A & B) |
1837 | // (A ^ B) | ~(B | A) --> ~(A & B) |
1838 | if (Op0->hasOneUse() || Op1->hasOneUse()) |
1839 | if (match(V: Op0, P: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B))) && |
1840 | match(V: Op1, P: m_Not(V: m_c_Or(L: m_Specific(V: A), R: m_Specific(V: B))))) |
1841 | return BinaryOperator::CreateNot(Op: Builder.CreateAnd(LHS: A, RHS: B)); |
1842 | |
1843 | // (A & ~B) | (~A & B) --> A ^ B |
1844 | // (A & ~B) | (B & ~A) --> A ^ B |
1845 | // (~B & A) | (~A & B) --> A ^ B |
1846 | // (~B & A) | (B & ~A) --> A ^ B |
1847 | if (match(V: Op0, P: m_c_And(L: m_Value(V&: A), R: m_Not(V: m_Value(V&: B)))) && |
1848 | match(V: Op1, P: m_c_And(L: m_Not(V: m_Specific(V: A)), R: m_Specific(V: B)))) |
1849 | return BinaryOperator::CreateXor(V1: A, V2: B); |
1850 | |
1851 | return nullptr; |
1852 | } |
1853 | |
1854 | /// Return true if a constant shift amount is always less than the specified |
1855 | /// bit-width. If not, the shift could create poison in the narrower type. |
1856 | static bool canNarrowShiftAmt(Constant *C, unsigned BitWidth) { |
1857 | APInt Threshold(C->getType()->getScalarSizeInBits(), BitWidth); |
1858 | return match(V: C, P: m_SpecificInt_ICMP(Predicate: ICmpInst::ICMP_ULT, Threshold)); |
1859 | } |
1860 | |
1861 | /// Try to use narrower ops (sink zext ops) for an 'and' with binop operand and |
1862 | /// a common zext operand: and (binop (zext X), C), (zext X). |
1863 | Instruction *InstCombinerImpl::narrowMaskedBinOp(BinaryOperator &And) { |
1864 | // This transform could also apply to {or, and, xor}, but there are better |
1865 | // folds for those cases, so we don't expect those patterns here. AShr is not |
1866 | // handled because it should always be transformed to LShr in this sequence. |
1867 | // The subtract transform is different because it has a constant on the left. |
1868 | // Add/mul commute the constant to RHS; sub with constant RHS becomes add. |
1869 | Value *Op0 = And.getOperand(i_nocapture: 0), *Op1 = And.getOperand(i_nocapture: 1); |
1870 | Constant *C; |
1871 | if (!match(V: Op0, P: m_OneUse(SubPattern: m_Add(L: m_Specific(V: Op1), R: m_Constant(C)))) && |
1872 | !match(V: Op0, P: m_OneUse(SubPattern: m_Mul(L: m_Specific(V: Op1), R: m_Constant(C)))) && |
1873 | !match(V: Op0, P: m_OneUse(SubPattern: m_LShr(L: m_Specific(V: Op1), R: m_Constant(C)))) && |
1874 | !match(V: Op0, P: m_OneUse(SubPattern: m_Shl(L: m_Specific(V: Op1), R: m_Constant(C)))) && |
1875 | !match(V: Op0, P: m_OneUse(SubPattern: m_Sub(L: m_Constant(C), R: m_Specific(V: Op1))))) |
1876 | return nullptr; |
1877 | |
1878 | Value *X; |
1879 | if (!match(V: Op1, P: m_ZExt(Op: m_Value(V&: X))) || Op1->hasNUsesOrMore(N: 3)) |
1880 | return nullptr; |
1881 | |
1882 | Type *Ty = And.getType(); |
1883 | if (!isa<VectorType>(Val: Ty) && !shouldChangeType(From: Ty, To: X->getType())) |
1884 | return nullptr; |
1885 | |
1886 | // If we're narrowing a shift, the shift amount must be safe (less than the |
1887 | // width) in the narrower type. If the shift amount is greater, instsimplify |
1888 | // usually handles that case, but we can't guarantee/assert it. |
1889 | Instruction::BinaryOps Opc = cast<BinaryOperator>(Val: Op0)->getOpcode(); |
1890 | if (Opc == Instruction::LShr || Opc == Instruction::Shl) |
1891 | if (!canNarrowShiftAmt(C, BitWidth: X->getType()->getScalarSizeInBits())) |
1892 | return nullptr; |
1893 | |
1894 | // and (sub C, (zext X)), (zext X) --> zext (and (sub C', X), X) |
1895 | // and (binop (zext X), C), (zext X) --> zext (and (binop X, C'), X) |
1896 | Value *NewC = ConstantExpr::getTrunc(C, Ty: X->getType()); |
1897 | Value *NewBO = Opc == Instruction::Sub ? Builder.CreateBinOp(Opc, LHS: NewC, RHS: X) |
1898 | : Builder.CreateBinOp(Opc, LHS: X, RHS: NewC); |
1899 | return new ZExtInst(Builder.CreateAnd(LHS: NewBO, RHS: X), Ty); |
1900 | } |
1901 | |
1902 | /// Try folding relatively complex patterns for both And and Or operations |
1903 | /// with all And and Or swapped. |
1904 | static Instruction *foldComplexAndOrPatterns(BinaryOperator &I, |
1905 | InstCombiner::BuilderTy &Builder) { |
1906 | const Instruction::BinaryOps Opcode = I.getOpcode(); |
1907 | assert(Opcode == Instruction::And || Opcode == Instruction::Or); |
1908 | |
1909 | // Flip the logic operation. |
1910 | const Instruction::BinaryOps FlippedOpcode = |
1911 | (Opcode == Instruction::And) ? Instruction::Or : Instruction::And; |
1912 | |
1913 | Value *Op0 = I.getOperand(i_nocapture: 0), *Op1 = I.getOperand(i_nocapture: 1); |
1914 | Value *A, *B, *C, *X, *Y, *Dummy; |
1915 | |
1916 | // Match following expressions: |
1917 | // (~(A | B) & C) |
1918 | // (~(A & B) | C) |
1919 | // Captures X = ~(A | B) or ~(A & B) |
1920 | const auto matchNotOrAnd = |
1921 | [Opcode, FlippedOpcode](Value *Op, auto m_A, auto m_B, auto m_C, |
1922 | Value *&X, bool CountUses = false) -> bool { |
1923 | if (CountUses && !Op->hasOneUse()) |
1924 | return false; |
1925 | |
1926 | if (match(Op, m_c_BinOp(FlippedOpcode, |
1927 | m_CombineAnd(m_Value(V&: X), |
1928 | m_Not(m_c_BinOp(Opcode, m_A, m_B))), |
1929 | m_C))) |
1930 | return !CountUses || X->hasOneUse(); |
1931 | |
1932 | return false; |
1933 | }; |
1934 | |
1935 | // (~(A | B) & C) | ... --> ... |
1936 | // (~(A & B) | C) & ... --> ... |
1937 | // TODO: One use checks are conservative. We just need to check that a total |
1938 | // number of multiple used values does not exceed reduction |
1939 | // in operations. |
1940 | if (matchNotOrAnd(Op0, m_Value(V&: A), m_Value(V&: B), m_Value(V&: C), X)) { |
1941 | // (~(A | B) & C) | (~(A | C) & B) --> (B ^ C) & ~A |
1942 | // (~(A & B) | C) & (~(A & C) | B) --> ~((B ^ C) & A) |
1943 | if (matchNotOrAnd(Op1, m_Specific(V: A), m_Specific(V: C), m_Specific(V: B), Dummy, |
1944 | true)) { |
1945 | Value *Xor = Builder.CreateXor(LHS: B, RHS: C); |
1946 | return (Opcode == Instruction::Or) |
1947 | ? BinaryOperator::CreateAnd(V1: Xor, V2: Builder.CreateNot(V: A)) |
1948 | : BinaryOperator::CreateNot(Op: Builder.CreateAnd(LHS: Xor, RHS: A)); |
1949 | } |
1950 | |
1951 | // (~(A | B) & C) | (~(B | C) & A) --> (A ^ C) & ~B |
1952 | // (~(A & B) | C) & (~(B & C) | A) --> ~((A ^ C) & B) |
1953 | if (matchNotOrAnd(Op1, m_Specific(V: B), m_Specific(V: C), m_Specific(V: A), Dummy, |
1954 | true)) { |
1955 | Value *Xor = Builder.CreateXor(LHS: A, RHS: C); |
1956 | return (Opcode == Instruction::Or) |
1957 | ? BinaryOperator::CreateAnd(V1: Xor, V2: Builder.CreateNot(V: B)) |
1958 | : BinaryOperator::CreateNot(Op: Builder.CreateAnd(LHS: Xor, RHS: B)); |
1959 | } |
1960 | |
1961 | // (~(A | B) & C) | ~(A | C) --> ~((B & C) | A) |
1962 | // (~(A & B) | C) & ~(A & C) --> ~((B | C) & A) |
1963 | if (match(V: Op1, P: m_OneUse(SubPattern: m_Not(V: m_OneUse( |
1964 | SubPattern: m_c_BinOp(Opcode, L: m_Specific(V: A), R: m_Specific(V: C))))))) |
1965 | return BinaryOperator::CreateNot(Op: Builder.CreateBinOp( |
1966 | Opc: Opcode, LHS: Builder.CreateBinOp(Opc: FlippedOpcode, LHS: B, RHS: C), RHS: A)); |
1967 | |
1968 | // (~(A | B) & C) | ~(B | C) --> ~((A & C) | B) |
1969 | // (~(A & B) | C) & ~(B & C) --> ~((A | C) & B) |
1970 | if (match(V: Op1, P: m_OneUse(SubPattern: m_Not(V: m_OneUse( |
1971 | SubPattern: m_c_BinOp(Opcode, L: m_Specific(V: B), R: m_Specific(V: C))))))) |
1972 | return BinaryOperator::CreateNot(Op: Builder.CreateBinOp( |
1973 | Opc: Opcode, LHS: Builder.CreateBinOp(Opc: FlippedOpcode, LHS: A, RHS: C), RHS: B)); |
1974 | |
1975 | // (~(A | B) & C) | ~(C | (A ^ B)) --> ~((A | B) & (C | (A ^ B))) |
1976 | // Note, the pattern with swapped and/or is not handled because the |
1977 | // result is more undefined than a source: |
1978 | // (~(A & B) | C) & ~(C & (A ^ B)) --> (A ^ B ^ C) | ~(A | C) is invalid. |
1979 | if (Opcode == Instruction::Or && Op0->hasOneUse() && |
1980 | match(V: Op1, P: m_OneUse(SubPattern: m_Not(V: m_CombineAnd( |
1981 | L: m_Value(V&: Y), |
1982 | R: m_c_BinOp(Opcode, L: m_Specific(V: C), |
1983 | R: m_c_Xor(L: m_Specific(V: A), R: m_Specific(V: B)))))))) { |
1984 | // X = ~(A | B) |
1985 | // Y = (C | (A ^ B) |
1986 | Value *Or = cast<BinaryOperator>(Val: X)->getOperand(i_nocapture: 0); |
1987 | return BinaryOperator::CreateNot(Op: Builder.CreateAnd(LHS: Or, RHS: Y)); |
1988 | } |
1989 | } |
1990 | |
1991 | // (~A & B & C) | ... --> ... |
1992 | // (~A | B | C) | ... --> ... |
1993 | // TODO: One use checks are conservative. We just need to check that a total |
1994 | // number of multiple used values does not exceed reduction |
1995 | // in operations. |
1996 | if (match(V: Op0, |
1997 | P: m_OneUse(SubPattern: m_c_BinOp(Opcode: FlippedOpcode, |
1998 | L: m_BinOp(Opcode: FlippedOpcode, L: m_Value(V&: B), R: m_Value(V&: C)), |
1999 | R: m_CombineAnd(L: m_Value(V&: X), R: m_Not(V: m_Value(V&: A)))))) || |
2000 | match(V: Op0, P: m_OneUse(SubPattern: m_c_BinOp( |
2001 | Opcode: FlippedOpcode, |
2002 | L: m_c_BinOp(Opcode: FlippedOpcode, L: m_Value(V&: C), |
2003 | R: m_CombineAnd(L: m_Value(V&: X), R: m_Not(V: m_Value(V&: A)))), |
2004 | R: m_Value(V&: B))))) { |
2005 | // X = ~A |
2006 | // (~A & B & C) | ~(A | B | C) --> ~(A | (B ^ C)) |
2007 | // (~A | B | C) & ~(A & B & C) --> (~A | (B ^ C)) |
2008 | if (match(V: Op1, P: m_OneUse(SubPattern: m_Not(V: m_c_BinOp( |
2009 | Opcode, L: m_c_BinOp(Opcode, L: m_Specific(V: A), R: m_Specific(V: B)), |
2010 | R: m_Specific(V: C))))) || |
2011 | match(V: Op1, P: m_OneUse(SubPattern: m_Not(V: m_c_BinOp( |
2012 | Opcode, L: m_c_BinOp(Opcode, L: m_Specific(V: B), R: m_Specific(V: C)), |
2013 | R: m_Specific(V: A))))) || |
2014 | match(V: Op1, P: m_OneUse(SubPattern: m_Not(V: m_c_BinOp( |
2015 | Opcode, L: m_c_BinOp(Opcode, L: m_Specific(V: A), R: m_Specific(V: C)), |
2016 | R: m_Specific(V: B)))))) { |
2017 | Value *Xor = Builder.CreateXor(LHS: B, RHS: C); |
2018 | return (Opcode == Instruction::Or) |
2019 | ? BinaryOperator::CreateNot(Op: Builder.CreateOr(LHS: Xor, RHS: A)) |
2020 | : BinaryOperator::CreateOr(V1: Xor, V2: X); |
2021 | } |
2022 | |
2023 | // (~A & B & C) | ~(A | B) --> (C | ~B) & ~A |
2024 | // (~A | B | C) & ~(A & B) --> (C & ~B) | ~A |
2025 | if (match(V: Op1, P: m_OneUse(SubPattern: m_Not(V: m_OneUse( |
2026 | SubPattern: m_c_BinOp(Opcode, L: m_Specific(V: A), R: m_Specific(V: B))))))) |
2027 | return BinaryOperator::Create( |
2028 | Op: FlippedOpcode, S1: Builder.CreateBinOp(Opc: Opcode, LHS: C, RHS: Builder.CreateNot(V: B)), |
2029 | S2: X); |
2030 | |
2031 | // (~A & B & C) | ~(A | C) --> (B | ~C) & ~A |
2032 | // (~A | B | C) & ~(A & C) --> (B & ~C) | ~A |
2033 | if (match(V: Op1, P: m_OneUse(SubPattern: m_Not(V: m_OneUse( |
2034 | SubPattern: m_c_BinOp(Opcode, L: m_Specific(V: A), R: m_Specific(V: C))))))) |
2035 | return BinaryOperator::Create( |
2036 | Op: FlippedOpcode, S1: Builder.CreateBinOp(Opc: Opcode, LHS: B, RHS: Builder.CreateNot(V: C)), |
2037 | S2: X); |
2038 | } |
2039 | |
2040 | return nullptr; |
2041 | } |
2042 | |
2043 | /// Try to reassociate a pair of binops so that values with one use only are |
2044 | /// part of the same instruction. This may enable folds that are limited with |
2045 | /// multi-use restrictions and makes it more likely to match other patterns that |
2046 | /// are looking for a common operand. |
2047 | static Instruction *reassociateForUses(BinaryOperator &BO, |
2048 | InstCombinerImpl::BuilderTy &Builder) { |
2049 | Instruction::BinaryOps Opcode = BO.getOpcode(); |
2050 | Value *X, *Y, *Z; |
2051 | if (match(V: &BO, |
2052 | P: m_c_BinOp(Opcode, L: m_OneUse(SubPattern: m_BinOp(Opcode, L: m_Value(V&: X), R: m_Value(V&: Y))), |
2053 | R: m_OneUse(SubPattern: m_Value(V&: Z))))) { |
2054 | if (!isa<Constant>(Val: X) && !isa<Constant>(Val: Y) && !isa<Constant>(Val: Z)) { |
2055 | // (X op Y) op Z --> (Y op Z) op X |
2056 | if (!X->hasOneUse()) { |
2057 | Value *YZ = Builder.CreateBinOp(Opc: Opcode, LHS: Y, RHS: Z); |
2058 | return BinaryOperator::Create(Op: Opcode, S1: YZ, S2: X); |
2059 | } |
2060 | // (X op Y) op Z --> (X op Z) op Y |
2061 | if (!Y->hasOneUse()) { |
2062 | Value *XZ = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Z); |
2063 | return BinaryOperator::Create(Op: Opcode, S1: XZ, S2: Y); |
2064 | } |
2065 | } |
2066 | } |
2067 | |
2068 | return nullptr; |
2069 | } |
2070 | |
2071 | // Match |
2072 | // (X + C2) | C |
2073 | // (X + C2) ^ C |
2074 | // (X + C2) & C |
2075 | // and convert to do the bitwise logic first: |
2076 | // (X | C) + C2 |
2077 | // (X ^ C) + C2 |
2078 | // (X & C) + C2 |
2079 | // iff bits affected by logic op are lower than last bit affected by math op |
2080 | static Instruction *canonicalizeLogicFirst(BinaryOperator &I, |
2081 | InstCombiner::BuilderTy &Builder) { |
2082 | Type *Ty = I.getType(); |
2083 | Instruction::BinaryOps OpC = I.getOpcode(); |
2084 | Value *Op0 = I.getOperand(i_nocapture: 0); |
2085 | Value *Op1 = I.getOperand(i_nocapture: 1); |
2086 | Value *X; |
2087 | const APInt *C, *C2; |
2088 | |
2089 | if (!(match(V: Op0, P: m_OneUse(SubPattern: m_Add(L: m_Value(V&: X), R: m_APInt(Res&: C2)))) && |
2090 | match(V: Op1, P: m_APInt(Res&: C)))) |
2091 | return nullptr; |
2092 | |
2093 | unsigned Width = Ty->getScalarSizeInBits(); |
2094 | unsigned LastOneMath = Width - C2->countr_zero(); |
2095 | |
2096 | switch (OpC) { |
2097 | case Instruction::And: |
2098 | if (C->countl_one() < LastOneMath) |
2099 | return nullptr; |
2100 | break; |
2101 | case Instruction::Xor: |
2102 | case Instruction::Or: |
2103 | if (C->countl_zero() < LastOneMath) |
2104 | return nullptr; |
2105 | break; |
2106 | default: |
2107 | llvm_unreachable("Unexpected BinaryOp!" ); |
2108 | } |
2109 | |
2110 | Value *NewBinOp = Builder.CreateBinOp(Opc: OpC, LHS: X, RHS: ConstantInt::get(Ty, V: *C)); |
2111 | return BinaryOperator::CreateWithCopiedFlags(Opc: Instruction::Add, V1: NewBinOp, |
2112 | V2: ConstantInt::get(Ty, V: *C2), CopyO: Op0); |
2113 | } |
2114 | |
2115 | // binop(shift(ShiftedC1, ShAmt), shift(ShiftedC2, add(ShAmt, AddC))) -> |
2116 | // shift(binop(ShiftedC1, shift(ShiftedC2, AddC)), ShAmt) |
2117 | // where both shifts are the same and AddC is a valid shift amount. |
2118 | Instruction *InstCombinerImpl::foldBinOpOfDisplacedShifts(BinaryOperator &I) { |
2119 | assert((I.isBitwiseLogicOp() || I.getOpcode() == Instruction::Add) && |
2120 | "Unexpected opcode" ); |
2121 | |
2122 | Value *ShAmt; |
2123 | Constant *ShiftedC1, *ShiftedC2, *AddC; |
2124 | Type *Ty = I.getType(); |
2125 | unsigned BitWidth = Ty->getScalarSizeInBits(); |
2126 | if (!match(V: &I, P: m_c_BinOp(L: m_Shift(L: m_ImmConstant(C&: ShiftedC1), R: m_Value(V&: ShAmt)), |
2127 | R: m_Shift(L: m_ImmConstant(C&: ShiftedC2), |
2128 | R: m_AddLike(L: m_Deferred(V: ShAmt), |
2129 | R: m_ImmConstant(C&: AddC)))))) |
2130 | return nullptr; |
2131 | |
2132 | // Make sure the add constant is a valid shift amount. |
2133 | if (!match(V: AddC, |
2134 | P: m_SpecificInt_ICMP(Predicate: ICmpInst::ICMP_ULT, Threshold: APInt(BitWidth, BitWidth)))) |
2135 | return nullptr; |
2136 | |
2137 | // Avoid constant expressions. |
2138 | auto *Op0Inst = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: 0)); |
2139 | auto *Op1Inst = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: 1)); |
2140 | if (!Op0Inst || !Op1Inst) |
2141 | return nullptr; |
2142 | |
2143 | // Both shifts must be the same. |
2144 | Instruction::BinaryOps ShiftOp = |
2145 | static_cast<Instruction::BinaryOps>(Op0Inst->getOpcode()); |
2146 | if (ShiftOp != Op1Inst->getOpcode()) |
2147 | return nullptr; |
2148 | |
2149 | // For adds, only left shifts are supported. |
2150 | if (I.getOpcode() == Instruction::Add && ShiftOp != Instruction::Shl) |
2151 | return nullptr; |
2152 | |
2153 | Value *NewC = Builder.CreateBinOp( |
2154 | Opc: I.getOpcode(), LHS: ShiftedC1, RHS: Builder.CreateBinOp(Opc: ShiftOp, LHS: ShiftedC2, RHS: AddC)); |
2155 | return BinaryOperator::Create(Op: ShiftOp, S1: NewC, S2: ShAmt); |
2156 | } |
2157 | |
2158 | // Fold and/or/xor with two equal intrinsic IDs: |
2159 | // bitwise(fshl (A, B, ShAmt), fshl(C, D, ShAmt)) |
2160 | // -> fshl(bitwise(A, C), bitwise(B, D), ShAmt) |
2161 | // bitwise(fshr (A, B, ShAmt), fshr(C, D, ShAmt)) |
2162 | // -> fshr(bitwise(A, C), bitwise(B, D), ShAmt) |
2163 | // bitwise(bswap(A), bswap(B)) -> bswap(bitwise(A, B)) |
2164 | // bitwise(bswap(A), C) -> bswap(bitwise(A, bswap(C))) |
2165 | // bitwise(bitreverse(A), bitreverse(B)) -> bitreverse(bitwise(A, B)) |
2166 | // bitwise(bitreverse(A), C) -> bitreverse(bitwise(A, bitreverse(C))) |
2167 | static Instruction * |
2168 | foldBitwiseLogicWithIntrinsics(BinaryOperator &I, |
2169 | InstCombiner::BuilderTy &Builder) { |
2170 | assert(I.isBitwiseLogicOp() && "Should and/or/xor" ); |
2171 | if (!I.getOperand(i_nocapture: 0)->hasOneUse()) |
2172 | return nullptr; |
2173 | IntrinsicInst *X = dyn_cast<IntrinsicInst>(Val: I.getOperand(i_nocapture: 0)); |
2174 | if (!X) |
2175 | return nullptr; |
2176 | |
2177 | IntrinsicInst *Y = dyn_cast<IntrinsicInst>(Val: I.getOperand(i_nocapture: 1)); |
2178 | if (Y && (!Y->hasOneUse() || X->getIntrinsicID() != Y->getIntrinsicID())) |
2179 | return nullptr; |
2180 | |
2181 | Intrinsic::ID IID = X->getIntrinsicID(); |
2182 | const APInt *RHSC; |
2183 | // Try to match constant RHS. |
2184 | if (!Y && (!(IID == Intrinsic::bswap || IID == Intrinsic::bitreverse) || |
2185 | !match(V: I.getOperand(i_nocapture: 1), P: m_APInt(Res&: RHSC)))) |
2186 | return nullptr; |
2187 | |
2188 | switch (IID) { |
2189 | case Intrinsic::fshl: |
2190 | case Intrinsic::fshr: { |
2191 | if (X->getOperand(i_nocapture: 2) != Y->getOperand(i_nocapture: 2)) |
2192 | return nullptr; |
2193 | Value *NewOp0 = |
2194 | Builder.CreateBinOp(Opc: I.getOpcode(), LHS: X->getOperand(i_nocapture: 0), RHS: Y->getOperand(i_nocapture: 0)); |
2195 | Value *NewOp1 = |
2196 | Builder.CreateBinOp(Opc: I.getOpcode(), LHS: X->getOperand(i_nocapture: 1), RHS: Y->getOperand(i_nocapture: 1)); |
2197 | Function *F = Intrinsic::getDeclaration(M: I.getModule(), id: IID, Tys: I.getType()); |
2198 | return CallInst::Create(Func: F, Args: {NewOp0, NewOp1, X->getOperand(i_nocapture: 2)}); |
2199 | } |
2200 | case Intrinsic::bswap: |
2201 | case Intrinsic::bitreverse: { |
2202 | Value *NewOp0 = Builder.CreateBinOp( |
2203 | Opc: I.getOpcode(), LHS: X->getOperand(i_nocapture: 0), |
2204 | RHS: Y ? Y->getOperand(i_nocapture: 0) |
2205 | : ConstantInt::get(I.getType(), IID == Intrinsic::bswap |
2206 | ? RHSC->byteSwap() |
2207 | : RHSC->reverseBits())); |
2208 | Function *F = Intrinsic::getDeclaration(M: I.getModule(), id: IID, Tys: I.getType()); |
2209 | return CallInst::Create(Func: F, Args: {NewOp0}); |
2210 | } |
2211 | default: |
2212 | return nullptr; |
2213 | } |
2214 | } |
2215 | |
2216 | // Try to simplify V by replacing occurrences of Op with RepOp, but only look |
2217 | // through bitwise operations. In particular, for X | Y we try to replace Y with |
2218 | // 0 inside X and for X & Y we try to replace Y with -1 inside X. |
2219 | // Return the simplified result of X if successful, and nullptr otherwise. |
2220 | // If SimplifyOnly is true, no new instructions will be created. |
2221 | static Value *simplifyAndOrWithOpReplaced(Value *V, Value *Op, Value *RepOp, |
2222 | bool SimplifyOnly, |
2223 | InstCombinerImpl &IC, |
2224 | unsigned Depth = 0) { |
2225 | if (Op == RepOp) |
2226 | return nullptr; |
2227 | |
2228 | if (V == Op) |
2229 | return RepOp; |
2230 | |
2231 | auto *I = dyn_cast<BinaryOperator>(Val: V); |
2232 | if (!I || !I->isBitwiseLogicOp() || Depth >= 3) |
2233 | return nullptr; |
2234 | |
2235 | if (!I->hasOneUse()) |
2236 | SimplifyOnly = true; |
2237 | |
2238 | Value *NewOp0 = simplifyAndOrWithOpReplaced(V: I->getOperand(i_nocapture: 0), Op, RepOp, |
2239 | SimplifyOnly, IC, Depth: Depth + 1); |
2240 | Value *NewOp1 = simplifyAndOrWithOpReplaced(V: I->getOperand(i_nocapture: 1), Op, RepOp, |
2241 | SimplifyOnly, IC, Depth: Depth + 1); |
2242 | if (!NewOp0 && !NewOp1) |
2243 | return nullptr; |
2244 | |
2245 | if (!NewOp0) |
2246 | NewOp0 = I->getOperand(i_nocapture: 0); |
2247 | if (!NewOp1) |
2248 | NewOp1 = I->getOperand(i_nocapture: 1); |
2249 | |
2250 | if (Value *Res = simplifyBinOp(Opcode: I->getOpcode(), LHS: NewOp0, RHS: NewOp1, |
2251 | Q: IC.getSimplifyQuery().getWithInstruction(I))) |
2252 | return Res; |
2253 | |
2254 | if (SimplifyOnly) |
2255 | return nullptr; |
2256 | return IC.Builder.CreateBinOp(Opc: I->getOpcode(), LHS: NewOp0, RHS: NewOp1); |
2257 | } |
2258 | |
2259 | // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches |
2260 | // here. We should standardize that construct where it is needed or choose some |
2261 | // other way to ensure that commutated variants of patterns are not missed. |
2262 | Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { |
2263 | Type *Ty = I.getType(); |
2264 | |
2265 | if (Value *V = simplifyAndInst(LHS: I.getOperand(i_nocapture: 0), RHS: I.getOperand(i_nocapture: 1), |
2266 | Q: SQ.getWithInstruction(I: &I))) |
2267 | return replaceInstUsesWith(I, V); |
2268 | |
2269 | if (SimplifyAssociativeOrCommutative(I)) |
2270 | return &I; |
2271 | |
2272 | if (Instruction *X = foldVectorBinop(Inst&: I)) |
2273 | return X; |
2274 | |
2275 | if (Instruction *Phi = foldBinopWithPhiOperands(BO&: I)) |
2276 | return Phi; |
2277 | |
2278 | // See if we can simplify any instructions used by the instruction whose sole |
2279 | // purpose is to compute bits we don't care about. |
2280 | if (SimplifyDemandedInstructionBits(Inst&: I)) |
2281 | return &I; |
2282 | |
2283 | // Do this before using distributive laws to catch simple and/or/not patterns. |
2284 | if (Instruction *Xor = foldAndToXor(I, Builder)) |
2285 | return Xor; |
2286 | |
2287 | if (Instruction *X = foldComplexAndOrPatterns(I, Builder)) |
2288 | return X; |
2289 | |
2290 | // (A|B)&(A|C) -> A|(B&C) etc |
2291 | if (Value *V = foldUsingDistributiveLaws(I)) |
2292 | return replaceInstUsesWith(I, V); |
2293 | |
2294 | if (Instruction *R = foldBinOpShiftWithShift(I)) |
2295 | return R; |
2296 | |
2297 | Value *Op0 = I.getOperand(i_nocapture: 0), *Op1 = I.getOperand(i_nocapture: 1); |
2298 | |
2299 | Value *X, *Y; |
2300 | const APInt *C; |
2301 | if ((match(V: Op0, P: m_OneUse(SubPattern: m_LogicalShift(L: m_One(), R: m_Value(V&: X)))) || |
2302 | (match(V: Op0, P: m_OneUse(SubPattern: m_Shl(L: m_APInt(Res&: C), R: m_Value(V&: X)))) && (*C)[0])) && |
2303 | match(V: Op1, P: m_One())) { |
2304 | // (1 >> X) & 1 --> zext(X == 0) |
2305 | // (C << X) & 1 --> zext(X == 0), when C is odd |
2306 | Value *IsZero = Builder.CreateICmpEQ(LHS: X, RHS: ConstantInt::get(Ty, V: 0)); |
2307 | return new ZExtInst(IsZero, Ty); |
2308 | } |
2309 | |
2310 | // (-(X & 1)) & Y --> (X & 1) == 0 ? 0 : Y |
2311 | Value *Neg; |
2312 | if (match(V: &I, |
2313 | P: m_c_And(L: m_CombineAnd(L: m_Value(V&: Neg), |
2314 | R: m_OneUse(SubPattern: m_Neg(V: m_And(L: m_Value(), R: m_One())))), |
2315 | R: m_Value(V&: Y)))) { |
2316 | Value *Cmp = Builder.CreateIsNull(Arg: Neg); |
2317 | return SelectInst::Create(C: Cmp, S1: ConstantInt::getNullValue(Ty), S2: Y); |
2318 | } |
2319 | |
2320 | // Canonicalize: |
2321 | // (X +/- Y) & Y --> ~X & Y when Y is a power of 2. |
2322 | if (match(V: &I, P: m_c_And(L: m_Value(V&: Y), R: m_OneUse(SubPattern: m_CombineOr( |
2323 | L: m_c_Add(L: m_Value(V&: X), R: m_Deferred(V: Y)), |
2324 | R: m_Sub(L: m_Value(V&: X), R: m_Deferred(V: Y)))))) && |
2325 | isKnownToBeAPowerOfTwo(V: Y, /*OrZero*/ true, /*Depth*/ 0, CxtI: &I)) |
2326 | return BinaryOperator::CreateAnd(V1: Builder.CreateNot(V: X), V2: Y); |
2327 | |
2328 | if (match(V: Op1, P: m_APInt(Res&: C))) { |
2329 | const APInt *XorC; |
2330 | if (match(V: Op0, P: m_OneUse(SubPattern: m_Xor(L: m_Value(V&: X), R: m_APInt(Res&: XorC))))) { |
2331 | // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2) |
2332 | Constant *NewC = ConstantInt::get(Ty, V: *C & *XorC); |
2333 | Value *And = Builder.CreateAnd(LHS: X, RHS: Op1); |
2334 | And->takeName(V: Op0); |
2335 | return BinaryOperator::CreateXor(V1: And, V2: NewC); |
2336 | } |
2337 | |
2338 | const APInt *OrC; |
2339 | if (match(V: Op0, P: m_OneUse(SubPattern: m_Or(L: m_Value(V&: X), R: m_APInt(Res&: OrC))))) { |
2340 | // (X | C1) & C2 --> (X & C2^(C1&C2)) | (C1&C2) |
2341 | // NOTE: This reduces the number of bits set in the & mask, which |
2342 | // can expose opportunities for store narrowing for scalars. |
2343 | // NOTE: SimplifyDemandedBits should have already removed bits from C1 |
2344 | // that aren't set in C2. Meaning we can replace (C1&C2) with C1 in |
2345 | // above, but this feels safer. |
2346 | APInt Together = *C & *OrC; |
2347 | Value *And = Builder.CreateAnd(LHS: X, RHS: ConstantInt::get(Ty, V: Together ^ *C)); |
2348 | And->takeName(V: Op0); |
2349 | return BinaryOperator::CreateOr(V1: And, V2: ConstantInt::get(Ty, V: Together)); |
2350 | } |
2351 | |
2352 | unsigned Width = Ty->getScalarSizeInBits(); |
2353 | const APInt *ShiftC; |
2354 | if (match(V: Op0, P: m_OneUse(SubPattern: m_SExt(Op: m_AShr(L: m_Value(V&: X), R: m_APInt(Res&: ShiftC))))) && |
2355 | ShiftC->ult(RHS: Width)) { |
2356 | if (*C == APInt::getLowBitsSet(numBits: Width, loBitsSet: Width - ShiftC->getZExtValue())) { |
2357 | // We are clearing high bits that were potentially set by sext+ashr: |
2358 | // and (sext (ashr X, ShiftC)), C --> lshr (sext X), ShiftC |
2359 | Value *Sext = Builder.CreateSExt(V: X, DestTy: Ty); |
2360 | Constant *ShAmtC = ConstantInt::get(Ty, V: ShiftC->zext(width: Width)); |
2361 | return BinaryOperator::CreateLShr(V1: Sext, V2: ShAmtC); |
2362 | } |
2363 | } |
2364 | |
2365 | // If this 'and' clears the sign-bits added by ashr, replace with lshr: |
2366 | // and (ashr X, ShiftC), C --> lshr X, ShiftC |
2367 | if (match(V: Op0, P: m_AShr(L: m_Value(V&: X), R: m_APInt(Res&: ShiftC))) && ShiftC->ult(RHS: Width) && |
2368 | C->isMask(numBits: Width - ShiftC->getZExtValue())) |
2369 | return BinaryOperator::CreateLShr(V1: X, V2: ConstantInt::get(Ty, V: *ShiftC)); |
2370 | |
2371 | const APInt *AddC; |
2372 | if (match(V: Op0, P: m_Add(L: m_Value(V&: X), R: m_APInt(Res&: AddC)))) { |
2373 | // If we are masking the result of the add down to exactly one bit and |
2374 | // the constant we are adding has no bits set below that bit, then the |
2375 | // add is flipping a single bit. Example: |
2376 | // (X + 4) & 4 --> (X & 4) ^ 4 |
2377 | if (Op0->hasOneUse() && C->isPowerOf2() && (*AddC & (*C - 1)) == 0) { |
2378 | assert((*C & *AddC) != 0 && "Expected common bit" ); |
2379 | Value *NewAnd = Builder.CreateAnd(LHS: X, RHS: Op1); |
2380 | return BinaryOperator::CreateXor(V1: NewAnd, V2: Op1); |
2381 | } |
2382 | } |
2383 | |
2384 | // ((C1 OP zext(X)) & C2) -> zext((C1 OP X) & C2) if C2 fits in the |
2385 | // bitwidth of X and OP behaves well when given trunc(C1) and X. |
2386 | auto isNarrowableBinOpcode = [](BinaryOperator *B) { |
2387 | switch (B->getOpcode()) { |
2388 | case Instruction::Xor: |
2389 | case Instruction::Or: |
2390 | case Instruction::Mul: |
2391 | case Instruction::Add: |
2392 | case Instruction::Sub: |
2393 | return true; |
2394 | default: |
2395 | return false; |
2396 | } |
2397 | }; |
2398 | BinaryOperator *BO; |
2399 | if (match(V: Op0, P: m_OneUse(SubPattern: m_BinOp(I&: BO))) && isNarrowableBinOpcode(BO)) { |
2400 | Instruction::BinaryOps BOpcode = BO->getOpcode(); |
2401 | Value *X; |
2402 | const APInt *C1; |
2403 | // TODO: The one-use restrictions could be relaxed a little if the AND |
2404 | // is going to be removed. |
2405 | // Try to narrow the 'and' and a binop with constant operand: |
2406 | // and (bo (zext X), C1), C --> zext (and (bo X, TruncC1), TruncC) |
2407 | if (match(V: BO, P: m_c_BinOp(L: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X))), R: m_APInt(Res&: C1))) && |
2408 | C->isIntN(N: X->getType()->getScalarSizeInBits())) { |
2409 | unsigned XWidth = X->getType()->getScalarSizeInBits(); |
2410 | Constant *TruncC1 = ConstantInt::get(Ty: X->getType(), V: C1->trunc(width: XWidth)); |
2411 | Value *BinOp = isa<ZExtInst>(Val: BO->getOperand(i_nocapture: 0)) |
2412 | ? Builder.CreateBinOp(Opc: BOpcode, LHS: X, RHS: TruncC1) |
2413 | : Builder.CreateBinOp(Opc: BOpcode, LHS: TruncC1, RHS: X); |
2414 | Constant *TruncC = ConstantInt::get(Ty: X->getType(), V: C->trunc(width: XWidth)); |
2415 | Value *And = Builder.CreateAnd(LHS: BinOp, RHS: TruncC); |
2416 | return new ZExtInst(And, Ty); |
2417 | } |
2418 | |
2419 | // Similar to above: if the mask matches the zext input width, then the |
2420 | // 'and' can be eliminated, so we can truncate the other variable op: |
2421 | // and (bo (zext X), Y), C --> zext (bo X, (trunc Y)) |
2422 | if (isa<Instruction>(Val: BO->getOperand(i_nocapture: 0)) && |
2423 | match(V: BO->getOperand(i_nocapture: 0), P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X)))) && |
2424 | C->isMask(numBits: X->getType()->getScalarSizeInBits())) { |
2425 | Y = BO->getOperand(i_nocapture: 1); |
2426 | Value *TrY = Builder.CreateTrunc(V: Y, DestTy: X->getType(), Name: Y->getName() + ".tr" ); |
2427 | Value *NewBO = |
2428 | Builder.CreateBinOp(Opc: BOpcode, LHS: X, RHS: TrY, Name: BO->getName() + ".narrow" ); |
2429 | return new ZExtInst(NewBO, Ty); |
2430 | } |
2431 | // and (bo Y, (zext X)), C --> zext (bo (trunc Y), X) |
2432 | if (isa<Instruction>(Val: BO->getOperand(i_nocapture: 1)) && |
2433 | match(V: BO->getOperand(i_nocapture: 1), P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X)))) && |
2434 | C->isMask(numBits: X->getType()->getScalarSizeInBits())) { |
2435 | Y = BO->getOperand(i_nocapture: 0); |
2436 | Value *TrY = Builder.CreateTrunc(V: Y, DestTy: X->getType(), Name: Y->getName() + ".tr" ); |
2437 | Value *NewBO = |
2438 | Builder.CreateBinOp(Opc: BOpcode, LHS: TrY, RHS: X, Name: BO->getName() + ".narrow" ); |
2439 | return new ZExtInst(NewBO, Ty); |
2440 | } |
2441 | } |
2442 | |
2443 | // This is intentionally placed after the narrowing transforms for |
2444 | // efficiency (transform directly to the narrow logic op if possible). |
2445 | // If the mask is only needed on one incoming arm, push the 'and' op up. |
2446 | if (match(V: Op0, P: m_OneUse(SubPattern: m_Xor(L: m_Value(V&: X), R: m_Value(V&: Y)))) || |
2447 | match(V: Op0, P: m_OneUse(SubPattern: m_Or(L: m_Value(V&: X), R: m_Value(V&: Y))))) { |
2448 | APInt NotAndMask(~(*C)); |
2449 | BinaryOperator::BinaryOps BinOp = cast<BinaryOperator>(Val: Op0)->getOpcode(); |
2450 | if (MaskedValueIsZero(V: X, Mask: NotAndMask, Depth: 0, CxtI: &I)) { |
2451 | // Not masking anything out for the LHS, move mask to RHS. |
2452 | // and ({x}or X, Y), C --> {x}or X, (and Y, C) |
2453 | Value *NewRHS = Builder.CreateAnd(LHS: Y, RHS: Op1, Name: Y->getName() + ".masked" ); |
2454 | return BinaryOperator::Create(Op: BinOp, S1: X, S2: NewRHS); |
2455 | } |
2456 | if (!isa<Constant>(Val: Y) && MaskedValueIsZero(V: Y, Mask: NotAndMask, Depth: 0, CxtI: &I)) { |
2457 | // Not masking anything out for the RHS, move mask to LHS. |
2458 | // and ({x}or X, Y), C --> {x}or (and X, C), Y |
2459 | Value *NewLHS = Builder.CreateAnd(LHS: X, RHS: Op1, Name: X->getName() + ".masked" ); |
2460 | return BinaryOperator::Create(Op: BinOp, S1: NewLHS, S2: Y); |
2461 | } |
2462 | } |
2463 | |
2464 | // When the mask is a power-of-2 constant and op0 is a shifted-power-of-2 |
2465 | // constant, test if the shift amount equals the offset bit index: |
2466 | // (ShiftC << X) & C --> X == (log2(C) - log2(ShiftC)) ? C : 0 |
2467 | // (ShiftC >> X) & C --> X == (log2(ShiftC) - log2(C)) ? C : 0 |
2468 | if (C->isPowerOf2() && |
2469 | match(V: Op0, P: m_OneUse(SubPattern: m_LogicalShift(L: m_Power2(V&: ShiftC), R: m_Value(V&: X))))) { |
2470 | int Log2ShiftC = ShiftC->exactLogBase2(); |
2471 | int Log2C = C->exactLogBase2(); |
2472 | bool IsShiftLeft = |
2473 | cast<BinaryOperator>(Val: Op0)->getOpcode() == Instruction::Shl; |
2474 | int BitNum = IsShiftLeft ? Log2C - Log2ShiftC : Log2ShiftC - Log2C; |
2475 | assert(BitNum >= 0 && "Expected demanded bits to handle impossible mask" ); |
2476 | Value *Cmp = Builder.CreateICmpEQ(LHS: X, RHS: ConstantInt::get(Ty, V: BitNum)); |
2477 | return SelectInst::Create(C: Cmp, S1: ConstantInt::get(Ty, V: *C), |
2478 | S2: ConstantInt::getNullValue(Ty)); |
2479 | } |
2480 | |
2481 | Constant *C1, *C2; |
2482 | const APInt *C3 = C; |
2483 | Value *X; |
2484 | if (C3->isPowerOf2()) { |
2485 | Constant *Log2C3 = ConstantInt::get(Ty, V: C3->countr_zero()); |
2486 | if (match(V: Op0, P: m_OneUse(SubPattern: m_LShr(L: m_Shl(L: m_ImmConstant(C&: C1), R: m_Value(V&: X)), |
2487 | R: m_ImmConstant(C&: C2)))) && |
2488 | match(V: C1, P: m_Power2())) { |
2489 | Constant *Log2C1 = ConstantExpr::getExactLogBase2(C: C1); |
2490 | Constant *LshrC = ConstantExpr::getAdd(C1: C2, C2: Log2C3); |
2491 | KnownBits KnownLShrc = computeKnownBits(V: LshrC, Depth: 0, CxtI: nullptr); |
2492 | if (KnownLShrc.getMaxValue().ult(RHS: Width)) { |
2493 | // iff C1,C3 is pow2 and C2 + cttz(C3) < BitWidth: |
2494 | // ((C1 << X) >> C2) & C3 -> X == (cttz(C3)+C2-cttz(C1)) ? C3 : 0 |
2495 | Constant *CmpC = ConstantExpr::getSub(C1: LshrC, C2: Log2C1); |
2496 | Value *Cmp = Builder.CreateICmpEQ(LHS: X, RHS: CmpC); |
2497 | return SelectInst::Create(C: Cmp, S1: ConstantInt::get(Ty, V: *C3), |
2498 | S2: ConstantInt::getNullValue(Ty)); |
2499 | } |
2500 | } |
2501 | |
2502 | if (match(V: Op0, P: m_OneUse(SubPattern: m_Shl(L: m_LShr(L: m_ImmConstant(C&: C1), R: m_Value(V&: X)), |
2503 | R: m_ImmConstant(C&: C2)))) && |
2504 | match(V: C1, P: m_Power2())) { |
2505 | Constant *Log2C1 = ConstantExpr::getExactLogBase2(C: C1); |
2506 | Constant *Cmp = |
2507 | ConstantExpr::getCompare(pred: ICmpInst::ICMP_ULT, C1: Log2C3, C2); |
2508 | if (Cmp->isZeroValue()) { |
2509 | // iff C1,C3 is pow2 and Log2(C3) >= C2: |
2510 | // ((C1 >> X) << C2) & C3 -> X == (cttz(C1)+C2-cttz(C3)) ? C3 : 0 |
2511 | Constant *ShlC = ConstantExpr::getAdd(C1: C2, C2: Log2C1); |
2512 | Constant *CmpC = ConstantExpr::getSub(C1: ShlC, C2: Log2C3); |
2513 | Value *Cmp = Builder.CreateICmpEQ(LHS: X, RHS: CmpC); |
2514 | return SelectInst::Create(C: Cmp, S1: ConstantInt::get(Ty, V: *C3), |
2515 | S2: ConstantInt::getNullValue(Ty)); |
2516 | } |
2517 | } |
2518 | } |
2519 | } |
2520 | |
2521 | // If we are clearing the sign bit of a floating-point value, convert this to |
2522 | // fabs, then cast back to integer. |
2523 | // |
2524 | // This is a generous interpretation for noimplicitfloat, this is not a true |
2525 | // floating-point operation. |
2526 | // |
2527 | // Assumes any IEEE-represented type has the sign bit in the high bit. |
2528 | // TODO: Unify with APInt matcher. This version allows undef unlike m_APInt |
2529 | Value *CastOp; |
2530 | if (match(V: Op0, P: m_ElementWiseBitCast(Op: m_Value(V&: CastOp))) && |
2531 | match(V: Op1, P: m_MaxSignedValue()) && |
2532 | !Builder.GetInsertBlock()->getParent()->hasFnAttribute( |
2533 | Attribute::NoImplicitFloat)) { |
2534 | Type *EltTy = CastOp->getType()->getScalarType(); |
2535 | if (EltTy->isFloatingPointTy() && EltTy->isIEEE()) { |
2536 | Value *FAbs = Builder.CreateUnaryIntrinsic(Intrinsic::ID: fabs, V: CastOp); |
2537 | return new BitCastInst(FAbs, I.getType()); |
2538 | } |
2539 | } |
2540 | |
2541 | // and(shl(zext(X), Y), SignMask) -> and(sext(X), SignMask) |
2542 | // where Y is a valid shift amount. |
2543 | if (match(V: &I, P: m_And(L: m_OneUse(SubPattern: m_Shl(L: m_ZExt(Op: m_Value(V&: X)), R: m_Value(V&: Y))), |
2544 | R: m_SignMask())) && |
2545 | match(V: Y, P: m_SpecificInt_ICMP( |
2546 | Predicate: ICmpInst::Predicate::ICMP_EQ, |
2547 | Threshold: APInt(Ty->getScalarSizeInBits(), |
2548 | Ty->getScalarSizeInBits() - |
2549 | X->getType()->getScalarSizeInBits())))) { |
2550 | auto *SExt = Builder.CreateSExt(V: X, DestTy: Ty, Name: X->getName() + ".signext" ); |
2551 | return BinaryOperator::CreateAnd(V1: SExt, V2: Op1); |
2552 | } |
2553 | |
2554 | if (Instruction *Z = narrowMaskedBinOp(And&: I)) |
2555 | return Z; |
2556 | |
2557 | if (I.getType()->isIntOrIntVectorTy(BitWidth: 1)) { |
2558 | if (auto *SI0 = dyn_cast<SelectInst>(Val: Op0)) { |
2559 | if (auto *R = |
2560 | foldAndOrOfSelectUsingImpliedCond(Op: Op1, SI&: *SI0, /* IsAnd */ true)) |
2561 | return R; |
2562 | } |
2563 | if (auto *SI1 = dyn_cast<SelectInst>(Val: Op1)) { |
2564 | if (auto *R = |
2565 | foldAndOrOfSelectUsingImpliedCond(Op: Op0, SI&: *SI1, /* IsAnd */ true)) |
2566 | return R; |
2567 | } |
2568 | } |
2569 | |
2570 | if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I)) |
2571 | return FoldedLogic; |
2572 | |
2573 | if (Instruction *DeMorgan = matchDeMorgansLaws(I, IC&: *this)) |
2574 | return DeMorgan; |
2575 | |
2576 | { |
2577 | Value *A, *B, *C; |
2578 | // A & ~(A ^ B) --> A & B |
2579 | if (match(V: Op1, P: m_Not(V: m_c_Xor(L: m_Specific(V: Op0), R: m_Value(V&: B))))) |
2580 | return BinaryOperator::CreateAnd(V1: Op0, V2: B); |
2581 | // ~(A ^ B) & A --> A & B |
2582 | if (match(V: Op0, P: m_Not(V: m_c_Xor(L: m_Specific(V: Op1), R: m_Value(V&: B))))) |
2583 | return BinaryOperator::CreateAnd(V1: Op1, V2: B); |
2584 | |
2585 | // (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C |
2586 | if (match(V: Op0, P: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B))) && |
2587 | match(V: Op1, P: m_Xor(L: m_Xor(L: m_Specific(V: B), R: m_Value(V&: C)), R: m_Specific(V: A)))) { |
2588 | Value *NotC = Op1->hasOneUse() |
2589 | ? Builder.CreateNot(V: C) |
2590 | : getFreelyInverted(V: C, WillInvertAllUses: C->hasOneUse(), Builder: &Builder); |
2591 | if (NotC != nullptr) |
2592 | return BinaryOperator::CreateAnd(V1: Op0, V2: NotC); |
2593 | } |
2594 | |
2595 | // ((A ^ C) ^ B) & (B ^ A) -> (B ^ A) & ~C |
2596 | if (match(V: Op0, P: m_Xor(L: m_Xor(L: m_Value(V&: A), R: m_Value(V&: C)), R: m_Value(V&: B))) && |
2597 | match(V: Op1, P: m_Xor(L: m_Specific(V: B), R: m_Specific(V: A)))) { |
2598 | Value *NotC = Op0->hasOneUse() |
2599 | ? Builder.CreateNot(V: C) |
2600 | : getFreelyInverted(V: C, WillInvertAllUses: C->hasOneUse(), Builder: &Builder); |
2601 | if (NotC != nullptr) |
2602 | return BinaryOperator::CreateAnd(V1: Op1, V2: Builder.CreateNot(V: C)); |
2603 | } |
2604 | |
2605 | // (A | B) & (~A ^ B) -> A & B |
2606 | // (A | B) & (B ^ ~A) -> A & B |
2607 | // (B | A) & (~A ^ B) -> A & B |
2608 | // (B | A) & (B ^ ~A) -> A & B |
2609 | if (match(V: Op1, P: m_c_Xor(L: m_Not(V: m_Value(V&: A)), R: m_Value(V&: B))) && |
2610 | match(V: Op0, P: m_c_Or(L: m_Specific(V: A), R: m_Specific(V: B)))) |
2611 | return BinaryOperator::CreateAnd(V1: A, V2: B); |
2612 | |
2613 | // (~A ^ B) & (A | B) -> A & B |
2614 | // (~A ^ B) & (B | A) -> A & B |
2615 | // (B ^ ~A) & (A | B) -> A & B |
2616 | // (B ^ ~A) & (B | A) -> A & B |
2617 | if (match(V: Op0, P: m_c_Xor(L: m_Not(V: m_Value(V&: A)), R: m_Value(V&: B))) && |
2618 | match(V: Op1, P: m_c_Or(L: m_Specific(V: A), R: m_Specific(V: B)))) |
2619 | return BinaryOperator::CreateAnd(V1: A, V2: B); |
2620 | |
2621 | // (~A | B) & (A ^ B) -> ~A & B |
2622 | // (~A | B) & (B ^ A) -> ~A & B |
2623 | // (B | ~A) & (A ^ B) -> ~A & B |
2624 | // (B | ~A) & (B ^ A) -> ~A & B |
2625 | if (match(V: Op0, P: m_c_Or(L: m_Not(V: m_Value(V&: A)), R: m_Value(V&: B))) && |
2626 | match(V: Op1, P: m_c_Xor(L: m_Specific(V: A), R: m_Specific(V: B)))) |
2627 | return BinaryOperator::CreateAnd(V1: Builder.CreateNot(V: A), V2: B); |
2628 | |
2629 | // (A ^ B) & (~A | B) -> ~A & B |
2630 | // (B ^ A) & (~A | B) -> ~A & B |
2631 | // (A ^ B) & (B | ~A) -> ~A & B |
2632 | // (B ^ A) & (B | ~A) -> ~A & B |
2633 | if (match(V: Op1, P: m_c_Or(L: m_Not(V: m_Value(V&: A)), R: m_Value(V&: B))) && |
2634 | match(V: Op0, P: m_c_Xor(L: m_Specific(V: A), R: m_Specific(V: B)))) |
2635 | return BinaryOperator::CreateAnd(V1: Builder.CreateNot(V: A), V2: B); |
2636 | } |
2637 | |
2638 | { |
2639 | ICmpInst *LHS = dyn_cast<ICmpInst>(Val: Op0); |
2640 | ICmpInst *RHS = dyn_cast<ICmpInst>(Val: Op1); |
2641 | if (LHS && RHS) |
2642 | if (Value *Res = foldAndOrOfICmps(LHS, RHS, I, /* IsAnd */ true)) |
2643 | return replaceInstUsesWith(I, V: Res); |
2644 | |
2645 | // TODO: Make this recursive; it's a little tricky because an arbitrary |
2646 | // number of 'and' instructions might have to be created. |
2647 | if (LHS && match(V: Op1, P: m_OneUse(SubPattern: m_LogicalAnd(L: m_Value(V&: X), R: m_Value(V&: Y))))) { |
2648 | bool IsLogical = isa<SelectInst>(Val: Op1); |
2649 | // LHS & (X && Y) --> (LHS && X) && Y |
2650 | if (auto *Cmp = dyn_cast<ICmpInst>(Val: X)) |
2651 | if (Value *Res = |
2652 | foldAndOrOfICmps(LHS, RHS: Cmp, I, /* IsAnd */ true, IsLogical)) |
2653 | return replaceInstUsesWith(I, V: IsLogical |
2654 | ? Builder.CreateLogicalAnd(Cond1: Res, Cond2: Y) |
2655 | : Builder.CreateAnd(LHS: Res, RHS: Y)); |
2656 | // LHS & (X && Y) --> X && (LHS & Y) |
2657 | if (auto *Cmp = dyn_cast<ICmpInst>(Val: Y)) |
2658 | if (Value *Res = foldAndOrOfICmps(LHS, RHS: Cmp, I, /* IsAnd */ true, |
2659 | /* IsLogical */ false)) |
2660 | return replaceInstUsesWith(I, V: IsLogical |
2661 | ? Builder.CreateLogicalAnd(Cond1: X, Cond2: Res) |
2662 | : Builder.CreateAnd(LHS: X, RHS: Res)); |
2663 | } |
2664 | if (RHS && match(V: Op0, P: m_OneUse(SubPattern: m_LogicalAnd(L: m_Value(V&: X), R: m_Value(V&: Y))))) { |
2665 | bool IsLogical = isa<SelectInst>(Val: Op0); |
2666 | // (X && Y) & RHS --> (X && RHS) && Y |
2667 | if (auto *Cmp = dyn_cast<ICmpInst>(Val: X)) |
2668 | if (Value *Res = |
2669 | foldAndOrOfICmps(LHS: Cmp, RHS, I, /* IsAnd */ true, IsLogical)) |
2670 | return replaceInstUsesWith(I, V: IsLogical |
2671 | ? Builder.CreateLogicalAnd(Cond1: Res, Cond2: Y) |
2672 | : Builder.CreateAnd(LHS: Res, RHS: Y)); |
2673 | // (X && Y) & RHS --> X && (Y & RHS) |
2674 | if (auto *Cmp = dyn_cast<ICmpInst>(Val: Y)) |
2675 | if (Value *Res = foldAndOrOfICmps(LHS: Cmp, RHS, I, /* IsAnd */ true, |
2676 | /* IsLogical */ false)) |
2677 | return replaceInstUsesWith(I, V: IsLogical |
2678 | ? Builder.CreateLogicalAnd(Cond1: X, Cond2: Res) |
2679 | : Builder.CreateAnd(LHS: X, RHS: Res)); |
2680 | } |
2681 | } |
2682 | |
2683 | if (FCmpInst *LHS = dyn_cast<FCmpInst>(Val: I.getOperand(i_nocapture: 0))) |
2684 | if (FCmpInst *RHS = dyn_cast<FCmpInst>(Val: I.getOperand(i_nocapture: 1))) |
2685 | if (Value *Res = foldLogicOfFCmps(LHS, RHS, /*IsAnd*/ true)) |
2686 | return replaceInstUsesWith(I, V: Res); |
2687 | |
2688 | if (Instruction *FoldedFCmps = reassociateFCmps(BO&: I, Builder)) |
2689 | return FoldedFCmps; |
2690 | |
2691 | if (Instruction *CastedAnd = foldCastedBitwiseLogic(I)) |
2692 | return CastedAnd; |
2693 | |
2694 | if (Instruction *Sel = foldBinopOfSextBoolToSelect(I)) |
2695 | return Sel; |
2696 | |
2697 | // and(sext(A), B) / and(B, sext(A)) --> A ? B : 0, where A is i1 or <N x i1>. |
2698 | // TODO: Move this into foldBinopOfSextBoolToSelect as a more generalized fold |
2699 | // with binop identity constant. But creating a select with non-constant |
2700 | // arm may not be reversible due to poison semantics. Is that a good |
2701 | // canonicalization? |
2702 | Value *A, *B; |
2703 | if (match(V: &I, P: m_c_And(L: m_SExt(Op: m_Value(V&: A)), R: m_Value(V&: B))) && |
2704 | A->getType()->isIntOrIntVectorTy(BitWidth: 1)) |
2705 | return SelectInst::Create(C: A, S1: B, S2: Constant::getNullValue(Ty)); |
2706 | |
2707 | // Similarly, a 'not' of the bool translates to a swap of the select arms: |
2708 | // ~sext(A) & B / B & ~sext(A) --> A ? 0 : B |
2709 | if (match(V: &I, P: m_c_And(L: m_Not(V: m_SExt(Op: m_Value(V&: A))), R: m_Value(V&: B))) && |
2710 | A->getType()->isIntOrIntVectorTy(BitWidth: 1)) |
2711 | return SelectInst::Create(C: A, S1: Constant::getNullValue(Ty), S2: B); |
2712 | |
2713 | // and(zext(A), B) -> A ? (B & 1) : 0 |
2714 | if (match(V: &I, P: m_c_And(L: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: A))), R: m_Value(V&: B))) && |
2715 | A->getType()->isIntOrIntVectorTy(BitWidth: 1)) |
2716 | return SelectInst::Create(C: A, S1: Builder.CreateAnd(LHS: B, RHS: ConstantInt::get(Ty, V: 1)), |
2717 | S2: Constant::getNullValue(Ty)); |
2718 | |
2719 | // (-1 + A) & B --> A ? 0 : B where A is 0/1. |
2720 | if (match(V: &I, P: m_c_And(L: m_OneUse(SubPattern: m_Add(L: m_ZExtOrSelf(Op: m_Value(V&: A)), R: m_AllOnes())), |
2721 | R: m_Value(V&: B)))) { |
2722 | if (A->getType()->isIntOrIntVectorTy(BitWidth: 1)) |
2723 | return SelectInst::Create(C: A, S1: Constant::getNullValue(Ty), S2: B); |
2724 | if (computeKnownBits(V: A, /* Depth */ 0, CxtI: &I).countMaxActiveBits() <= 1) { |
2725 | return SelectInst::Create( |
2726 | C: Builder.CreateICmpEQ(LHS: A, RHS: Constant::getNullValue(Ty: A->getType())), S1: B, |
2727 | S2: Constant::getNullValue(Ty)); |
2728 | } |
2729 | } |
2730 | |
2731 | // (iN X s>> (N-1)) & Y --> (X s< 0) ? Y : 0 -- with optional sext |
2732 | if (match(V: &I, P: m_c_And(L: m_OneUse(SubPattern: m_SExtOrSelf( |
2733 | Op: m_AShr(L: m_Value(V&: X), R: m_APIntAllowPoison(Res&: C)))), |
2734 | R: m_Value(V&: Y))) && |
2735 | *C == X->getType()->getScalarSizeInBits() - 1) { |
2736 | Value *IsNeg = Builder.CreateIsNeg(Arg: X, Name: "isneg" ); |
2737 | return SelectInst::Create(C: IsNeg, S1: Y, S2: ConstantInt::getNullValue(Ty)); |
2738 | } |
2739 | // If there's a 'not' of the shifted value, swap the select operands: |
2740 | // ~(iN X s>> (N-1)) & Y --> (X s< 0) ? 0 : Y -- with optional sext |
2741 | if (match(V: &I, P: m_c_And(L: m_OneUse(SubPattern: m_SExtOrSelf( |
2742 | Op: m_Not(V: m_AShr(L: m_Value(V&: X), R: m_APIntAllowPoison(Res&: C))))), |
2743 | R: m_Value(V&: Y))) && |
2744 | *C == X->getType()->getScalarSizeInBits() - 1) { |
2745 | Value *IsNeg = Builder.CreateIsNeg(Arg: X, Name: "isneg" ); |
2746 | return SelectInst::Create(C: IsNeg, S1: ConstantInt::getNullValue(Ty), S2: Y); |
2747 | } |
2748 | |
2749 | // (~x) & y --> ~(x | (~y)) iff that gets rid of inversions |
2750 | if (sinkNotIntoOtherHandOfLogicalOp(I)) |
2751 | return &I; |
2752 | |
2753 | // An and recurrence w/loop invariant step is equivelent to (and start, step) |
2754 | PHINode *PN = nullptr; |
2755 | Value *Start = nullptr, *Step = nullptr; |
2756 | if (matchSimpleRecurrence(I: &I, P&: PN, Start, Step) && DT.dominates(Def: Step, User: PN)) |
2757 | return replaceInstUsesWith(I, V: Builder.CreateAnd(LHS: Start, RHS: Step)); |
2758 | |
2759 | if (Instruction *R = reassociateForUses(BO&: I, Builder)) |
2760 | return R; |
2761 | |
2762 | if (Instruction *Canonicalized = canonicalizeLogicFirst(I, Builder)) |
2763 | return Canonicalized; |
2764 | |
2765 | if (Instruction *Folded = foldLogicOfIsFPClass(BO&: I, Op0, Op1)) |
2766 | return Folded; |
2767 | |
2768 | if (Instruction *Res = foldBinOpOfDisplacedShifts(I)) |
2769 | return Res; |
2770 | |
2771 | if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder)) |
2772 | return Res; |
2773 | |
2774 | if (Value *V = |
2775 | simplifyAndOrWithOpReplaced(V: Op0, Op: Op1, RepOp: Constant::getAllOnesValue(Ty), |
2776 | /*SimplifyOnly*/ false, IC&: *this)) |
2777 | return BinaryOperator::CreateAnd(V1: V, V2: Op1); |
2778 | if (Value *V = |
2779 | simplifyAndOrWithOpReplaced(V: Op1, Op: Op0, RepOp: Constant::getAllOnesValue(Ty), |
2780 | /*SimplifyOnly*/ false, IC&: *this)) |
2781 | return BinaryOperator::CreateAnd(V1: Op0, V2: V); |
2782 | |
2783 | return nullptr; |
2784 | } |
2785 | |
2786 | Instruction *InstCombinerImpl::matchBSwapOrBitReverse(Instruction &I, |
2787 | bool MatchBSwaps, |
2788 | bool MatchBitReversals) { |
2789 | SmallVector<Instruction *, 4> Insts; |
2790 | if (!recognizeBSwapOrBitReverseIdiom(I: &I, MatchBSwaps, MatchBitReversals, |
2791 | InsertedInsts&: Insts)) |
2792 | return nullptr; |
2793 | Instruction *LastInst = Insts.pop_back_val(); |
2794 | LastInst->removeFromParent(); |
2795 | |
2796 | for (auto *Inst : Insts) |
2797 | Worklist.push(I: Inst); |
2798 | return LastInst; |
2799 | } |
2800 | |
2801 | std::optional<std::pair<Intrinsic::ID, SmallVector<Value *, 3>>> |
2802 | InstCombinerImpl::convertOrOfShiftsToFunnelShift(Instruction &Or) { |
2803 | // TODO: Can we reduce the code duplication between this and the related |
2804 | // rotate matching code under visitSelect and visitTrunc? |
2805 | assert(Or.getOpcode() == BinaryOperator::Or && "Expecting or instruction" ); |
2806 | |
2807 | unsigned Width = Or.getType()->getScalarSizeInBits(); |
2808 | |
2809 | Instruction *Or0, *Or1; |
2810 | if (!match(V: Or.getOperand(i: 0), P: m_Instruction(I&: Or0)) || |
2811 | !match(V: Or.getOperand(i: 1), P: m_Instruction(I&: Or1))) |
2812 | return std::nullopt; |
2813 | |
2814 | bool IsFshl = true; // Sub on LSHR. |
2815 | SmallVector<Value *, 3> FShiftArgs; |
2816 | |
2817 | // First, find an or'd pair of opposite shifts: |
2818 | // or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1) |
2819 | if (isa<BinaryOperator>(Val: Or0) && isa<BinaryOperator>(Val: Or1)) { |
2820 | Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1; |
2821 | if (!match(V: Or0, |
2822 | P: m_OneUse(SubPattern: m_LogicalShift(L: m_Value(V&: ShVal0), R: m_Value(V&: ShAmt0)))) || |
2823 | !match(V: Or1, |
2824 | P: m_OneUse(SubPattern: m_LogicalShift(L: m_Value(V&: ShVal1), R: m_Value(V&: ShAmt1)))) || |
2825 | Or0->getOpcode() == Or1->getOpcode()) |
2826 | return std::nullopt; |
2827 | |
2828 | // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)). |
2829 | if (Or0->getOpcode() == BinaryOperator::LShr) { |
2830 | std::swap(a&: Or0, b&: Or1); |
2831 | std::swap(a&: ShVal0, b&: ShVal1); |
2832 | std::swap(a&: ShAmt0, b&: ShAmt1); |
2833 | } |
2834 | assert(Or0->getOpcode() == BinaryOperator::Shl && |
2835 | Or1->getOpcode() == BinaryOperator::LShr && |
2836 | "Illegal or(shift,shift) pair" ); |
2837 | |
2838 | // Match the shift amount operands for a funnel shift pattern. This always |
2839 | // matches a subtraction on the R operand. |
2840 | auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * { |
2841 | // Check for constant shift amounts that sum to the bitwidth. |
2842 | const APInt *LI, *RI; |
2843 | if (match(V: L, P: m_APIntAllowPoison(Res&: LI)) && match(V: R, P: m_APIntAllowPoison(Res&: RI))) |
2844 | if (LI->ult(RHS: Width) && RI->ult(RHS: Width) && (*LI + *RI) == Width) |
2845 | return ConstantInt::get(Ty: L->getType(), V: *LI); |
2846 | |
2847 | Constant *LC, *RC; |
2848 | if (match(V: L, P: m_Constant(C&: LC)) && match(V: R, P: m_Constant(C&: RC)) && |
2849 | match(V: L, |
2850 | P: m_SpecificInt_ICMP(Predicate: ICmpInst::ICMP_ULT, Threshold: APInt(Width, Width))) && |
2851 | match(V: R, |
2852 | P: m_SpecificInt_ICMP(Predicate: ICmpInst::ICMP_ULT, Threshold: APInt(Width, Width))) && |
2853 | match(V: ConstantExpr::getAdd(C1: LC, C2: RC), P: m_SpecificIntAllowPoison(V: Width))) |
2854 | return ConstantExpr::mergeUndefsWith(C: LC, Other: RC); |
2855 | |
2856 | // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width. |
2857 | // We limit this to X < Width in case the backend re-expands the |
2858 | // intrinsic, and has to reintroduce a shift modulo operation (InstCombine |
2859 | // might remove it after this fold). This still doesn't guarantee that the |
2860 | // final codegen will match this original pattern. |
2861 | if (match(V: R, P: m_OneUse(SubPattern: m_Sub(L: m_SpecificInt(V: Width), R: m_Specific(V: L))))) { |
2862 | KnownBits KnownL = computeKnownBits(V: L, /*Depth*/ 0, CxtI: &Or); |
2863 | return KnownL.getMaxValue().ult(RHS: Width) ? L : nullptr; |
2864 | } |
2865 | |
2866 | // For non-constant cases, the following patterns currently only work for |
2867 | // rotation patterns. |
2868 | // TODO: Add general funnel-shift compatible patterns. |
2869 | if (ShVal0 != ShVal1) |
2870 | return nullptr; |
2871 | |
2872 | // For non-constant cases we don't support non-pow2 shift masks. |
2873 | // TODO: Is it worth matching urem as well? |
2874 | if (!isPowerOf2_32(Value: Width)) |
2875 | return nullptr; |
2876 | |
2877 | // The shift amount may be masked with negation: |
2878 | // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1))) |
2879 | Value *X; |
2880 | unsigned Mask = Width - 1; |
2881 | if (match(V: L, P: m_And(L: m_Value(V&: X), R: m_SpecificInt(V: Mask))) && |
2882 | match(V: R, P: m_And(L: m_Neg(V: m_Specific(V: X)), R: m_SpecificInt(V: Mask)))) |
2883 | return X; |
2884 | |
2885 | // (shl ShVal, X) | (lshr ShVal, ((-X) & (Width - 1))) |
2886 | if (match(V: R, P: m_And(L: m_Neg(V: m_Specific(V: L)), R: m_SpecificInt(V: Mask)))) |
2887 | return L; |
2888 | |
2889 | // Similar to above, but the shift amount may be extended after masking, |
2890 | // so return the extended value as the parameter for the intrinsic. |
2891 | if (match(V: L, P: m_ZExt(Op: m_And(L: m_Value(V&: X), R: m_SpecificInt(V: Mask)))) && |
2892 | match(V: R, |
2893 | P: m_And(L: m_Neg(V: m_ZExt(Op: m_And(L: m_Specific(V: X), R: m_SpecificInt(V: Mask)))), |
2894 | R: m_SpecificInt(V: Mask)))) |
2895 | return L; |
2896 | |
2897 | if (match(V: L, P: m_ZExt(Op: m_And(L: m_Value(V&: X), R: m_SpecificInt(V: Mask)))) && |
2898 | match(V: R, P: m_ZExt(Op: m_And(L: m_Neg(V: m_Specific(V: X)), R: m_SpecificInt(V: Mask))))) |
2899 | return L; |
2900 | |
2901 | return nullptr; |
2902 | }; |
2903 | |
2904 | Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width); |
2905 | if (!ShAmt) { |
2906 | ShAmt = matchShiftAmount(ShAmt1, ShAmt0, Width); |
2907 | IsFshl = false; // Sub on SHL. |
2908 | } |
2909 | if (!ShAmt) |
2910 | return std::nullopt; |
2911 | |
2912 | FShiftArgs = {ShVal0, ShVal1, ShAmt}; |
2913 | } else if (isa<ZExtInst>(Val: Or0) || isa<ZExtInst>(Val: Or1)) { |
2914 | // If there are two 'or' instructions concat variables in opposite order: |
2915 | // |
2916 | // Slot1 and Slot2 are all zero bits. |
2917 | // | Slot1 | Low | Slot2 | High | |
2918 | // LowHigh = or (shl (zext Low), ZextLowShlAmt), (zext High) |
2919 | // | Slot2 | High | Slot1 | Low | |
2920 | // HighLow = or (shl (zext High), ZextHighShlAmt), (zext Low) |
2921 | // |
2922 | // the latter 'or' can be safely convert to |
2923 | // -> HighLow = fshl LowHigh, LowHigh, ZextHighShlAmt |
2924 | // if ZextLowShlAmt + ZextHighShlAmt == Width. |
2925 | if (!isa<ZExtInst>(Val: Or1)) |
2926 | std::swap(a&: Or0, b&: Or1); |
2927 | |
2928 | Value *High, *ZextHigh, *Low; |
2929 | const APInt *ZextHighShlAmt; |
2930 | if (!match(V: Or0, |
2931 | P: m_OneUse(SubPattern: m_Shl(L: m_Value(V&: ZextHigh), R: m_APInt(Res&: ZextHighShlAmt))))) |
2932 | return std::nullopt; |
2933 | |
2934 | if (!match(V: Or1, P: m_ZExt(Op: m_Value(V&: Low))) || |
2935 | !match(V: ZextHigh, P: m_ZExt(Op: m_Value(V&: High)))) |
2936 | return std::nullopt; |
2937 | |
2938 | unsigned HighSize = High->getType()->getScalarSizeInBits(); |
2939 | unsigned LowSize = Low->getType()->getScalarSizeInBits(); |
2940 | // Make sure High does not overlap with Low and most significant bits of |
2941 | // High aren't shifted out. |
2942 | if (ZextHighShlAmt->ult(RHS: LowSize) || ZextHighShlAmt->ugt(RHS: Width - HighSize)) |
2943 | return std::nullopt; |
2944 | |
2945 | for (User *U : ZextHigh->users()) { |
2946 | Value *X, *Y; |
2947 | if (!match(V: U, P: m_Or(L: m_Value(V&: X), R: m_Value(V&: Y)))) |
2948 | continue; |
2949 | |
2950 | if (!isa<ZExtInst>(Val: Y)) |
2951 | std::swap(a&: X, b&: Y); |
2952 | |
2953 | const APInt *ZextLowShlAmt; |
2954 | if (!match(V: X, P: m_Shl(L: m_Specific(V: Or1), R: m_APInt(Res&: ZextLowShlAmt))) || |
2955 | !match(V: Y, P: m_Specific(V: ZextHigh)) || !DT.dominates(Def: U, User: &Or)) |
2956 | continue; |
2957 | |
2958 | // HighLow is good concat. If sum of two shifts amount equals to Width, |
2959 | // LowHigh must also be a good concat. |
2960 | if (*ZextLowShlAmt + *ZextHighShlAmt != Width) |
2961 | continue; |
2962 | |
2963 | // Low must not overlap with High and most significant bits of Low must |
2964 | // not be shifted out. |
2965 | assert(ZextLowShlAmt->uge(HighSize) && |
2966 | ZextLowShlAmt->ule(Width - LowSize) && "Invalid concat" ); |
2967 | |
2968 | FShiftArgs = {U, U, ConstantInt::get(Ty: Or0->getType(), V: *ZextHighShlAmt)}; |
2969 | break; |
2970 | } |
2971 | } |
2972 | |
2973 | if (FShiftArgs.empty()) |
2974 | return std::nullopt; |
2975 | |
2976 | Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr; |
2977 | return std::make_pair(x&: IID, y&: FShiftArgs); |
2978 | } |
2979 | |
2980 | /// Match UB-safe variants of the funnel shift intrinsic. |
2981 | static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) { |
2982 | if (auto Opt = IC.convertOrOfShiftsToFunnelShift(Or)) { |
2983 | auto [IID, FShiftArgs] = *Opt; |
2984 | Function *F = Intrinsic::getDeclaration(M: Or.getModule(), id: IID, Tys: Or.getType()); |
2985 | return CallInst::Create(Func: F, Args: FShiftArgs); |
2986 | } |
2987 | |
2988 | return nullptr; |
2989 | } |
2990 | |
2991 | /// Attempt to combine or(zext(x),shl(zext(y),bw/2) concat packing patterns. |
2992 | static Instruction *matchOrConcat(Instruction &Or, |
2993 | InstCombiner::BuilderTy &Builder) { |
2994 | assert(Or.getOpcode() == Instruction::Or && "bswap requires an 'or'" ); |
2995 | Value *Op0 = Or.getOperand(i: 0), *Op1 = Or.getOperand(i: 1); |
2996 | Type *Ty = Or.getType(); |
2997 | |
2998 | unsigned Width = Ty->getScalarSizeInBits(); |
2999 | if ((Width & 1) != 0) |
3000 | return nullptr; |
3001 | unsigned HalfWidth = Width / 2; |
3002 | |
3003 | // Canonicalize zext (lower half) to LHS. |
3004 | if (!isa<ZExtInst>(Val: Op0)) |
3005 | std::swap(a&: Op0, b&: Op1); |
3006 | |
3007 | // Find lower/upper half. |
3008 | Value *LowerSrc, *ShlVal, *UpperSrc; |
3009 | const APInt *C; |
3010 | if (!match(V: Op0, P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: LowerSrc)))) || |
3011 | !match(V: Op1, P: m_OneUse(SubPattern: m_Shl(L: m_Value(V&: ShlVal), R: m_APInt(Res&: C)))) || |
3012 | !match(V: ShlVal, P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: UpperSrc))))) |
3013 | return nullptr; |
3014 | if (*C != HalfWidth || LowerSrc->getType() != UpperSrc->getType() || |
3015 | LowerSrc->getType()->getScalarSizeInBits() != HalfWidth) |
3016 | return nullptr; |
3017 | |
3018 | auto ConcatIntrinsicCalls = [&](Intrinsic::ID id, Value *Lo, Value *Hi) { |
3019 | Value *NewLower = Builder.CreateZExt(V: Lo, DestTy: Ty); |
3020 | Value *NewUpper = Builder.CreateZExt(V: Hi, DestTy: Ty); |
3021 | NewUpper = Builder.CreateShl(LHS: NewUpper, RHS: HalfWidth); |
3022 | Value *BinOp = Builder.CreateOr(LHS: NewLower, RHS: NewUpper); |
3023 | Function *F = Intrinsic::getDeclaration(M: Or.getModule(), id, Tys: Ty); |
3024 | return Builder.CreateCall(Callee: F, Args: BinOp); |
3025 | }; |
3026 | |
3027 | // BSWAP: Push the concat down, swapping the lower/upper sources. |
3028 | // concat(bswap(x),bswap(y)) -> bswap(concat(x,y)) |
3029 | Value *LowerBSwap, *UpperBSwap; |
3030 | if (match(V: LowerSrc, P: m_BSwap(Op0: m_Value(V&: LowerBSwap))) && |
3031 | match(V: UpperSrc, P: m_BSwap(Op0: m_Value(V&: UpperBSwap)))) |
3032 | return ConcatIntrinsicCalls(Intrinsic::bswap, UpperBSwap, LowerBSwap); |
3033 | |
3034 | // BITREVERSE: Push the concat down, swapping the lower/upper sources. |
3035 | // concat(bitreverse(x),bitreverse(y)) -> bitreverse(concat(x,y)) |
3036 | Value *LowerBRev, *UpperBRev; |
3037 | if (match(V: LowerSrc, P: m_BitReverse(Op0: m_Value(V&: LowerBRev))) && |
3038 | match(V: UpperSrc, P: m_BitReverse(Op0: m_Value(V&: UpperBRev)))) |
3039 | return ConcatIntrinsicCalls(Intrinsic::bitreverse, UpperBRev, LowerBRev); |
3040 | |
3041 | return nullptr; |
3042 | } |
3043 | |
3044 | /// If all elements of two constant vectors are 0/-1 and inverses, return true. |
3045 | static bool areInverseVectorBitmasks(Constant *C1, Constant *C2) { |
3046 | unsigned NumElts = cast<FixedVectorType>(Val: C1->getType())->getNumElements(); |
3047 | for (unsigned i = 0; i != NumElts; ++i) { |
3048 | Constant *EltC1 = C1->getAggregateElement(Elt: i); |
3049 | Constant *EltC2 = C2->getAggregateElement(Elt: i); |
3050 | if (!EltC1 || !EltC2) |
3051 | return false; |
3052 | |
3053 | // One element must be all ones, and the other must be all zeros. |
3054 | if (!((match(V: EltC1, P: m_Zero()) && match(V: EltC2, P: m_AllOnes())) || |
3055 | (match(V: EltC2, P: m_Zero()) && match(V: EltC1, P: m_AllOnes())))) |
3056 | return false; |
3057 | } |
3058 | return true; |
3059 | } |
3060 | |
3061 | /// We have an expression of the form (A & C) | (B & D). If A is a scalar or |
3062 | /// vector composed of all-zeros or all-ones values and is the bitwise 'not' of |
3063 | /// B, it can be used as the condition operand of a select instruction. |
3064 | /// We will detect (A & C) | ~(B | D) when the flag ABIsTheSame enabled. |
3065 | Value *InstCombinerImpl::getSelectCondition(Value *A, Value *B, |
3066 | bool ABIsTheSame) { |
3067 | // We may have peeked through bitcasts in the caller. |
3068 | // Exit immediately if we don't have (vector) integer types. |
3069 | Type *Ty = A->getType(); |
3070 | if (!Ty->isIntOrIntVectorTy() || !B->getType()->isIntOrIntVectorTy()) |
3071 | return nullptr; |
3072 | |
3073 | // If A is the 'not' operand of B and has enough signbits, we have our answer. |
3074 | if (ABIsTheSame ? (A == B) : match(V: B, P: m_Not(V: m_Specific(V: A)))) { |
3075 | // If these are scalars or vectors of i1, A can be used directly. |
3076 | if (Ty->isIntOrIntVectorTy(BitWidth: 1)) |
3077 | return A; |
3078 | |
3079 | // If we look through a vector bitcast, the caller will bitcast the operands |
3080 | // to match the condition's number of bits (N x i1). |
3081 | // To make this poison-safe, disallow bitcast from wide element to narrow |
3082 | // element. That could allow poison in lanes where it was not present in the |
3083 | // original code. |
3084 | A = peekThroughBitcast(V: A); |
3085 | if (A->getType()->isIntOrIntVectorTy()) { |
3086 | unsigned NumSignBits = ComputeNumSignBits(Op: A); |
3087 | if (NumSignBits == A->getType()->getScalarSizeInBits() && |
3088 | NumSignBits <= Ty->getScalarSizeInBits()) |
3089 | return Builder.CreateTrunc(V: A, DestTy: CmpInst::makeCmpResultType(opnd_type: A->getType())); |
3090 | } |
3091 | return nullptr; |
3092 | } |
3093 | |
3094 | // TODO: add support for sext and constant case |
3095 | if (ABIsTheSame) |
3096 | return nullptr; |
3097 | |
3098 | // If both operands are constants, see if the constants are inverse bitmasks. |
3099 | Constant *AConst, *BConst; |
3100 | if (match(V: A, P: m_Constant(C&: AConst)) && match(V: B, P: m_Constant(C&: BConst))) |
3101 | if (AConst == ConstantExpr::getNot(C: BConst) && |
3102 | ComputeNumSignBits(Op: A) == Ty->getScalarSizeInBits()) |
3103 | return Builder.CreateZExtOrTrunc(V: A, DestTy: CmpInst::makeCmpResultType(opnd_type: Ty)); |
3104 | |
3105 | // Look for more complex patterns. The 'not' op may be hidden behind various |
3106 | // casts. Look through sexts and bitcasts to find the booleans. |
3107 | Value *Cond; |
3108 | Value *NotB; |
3109 | if (match(V: A, P: m_SExt(Op: m_Value(V&: Cond))) && |
3110 | Cond->getType()->isIntOrIntVectorTy(BitWidth: 1)) { |
3111 | // A = sext i1 Cond; B = sext (not (i1 Cond)) |
3112 | if (match(V: B, P: m_SExt(Op: m_Not(V: m_Specific(V: Cond))))) |
3113 | return Cond; |
3114 | |
3115 | // A = sext i1 Cond; B = not ({bitcast} (sext (i1 Cond))) |
3116 | // TODO: The one-use checks are unnecessary or misplaced. If the caller |
3117 | // checked for uses on logic ops/casts, that should be enough to |
3118 | // make this transform worthwhile. |
3119 | if (match(V: B, P: m_OneUse(SubPattern: m_Not(V: m_Value(V&: NotB))))) { |
3120 | NotB = peekThroughBitcast(V: NotB, OneUseOnly: true); |
3121 | if (match(V: NotB, P: m_SExt(Op: m_Specific(V: Cond)))) |
3122 | return Cond; |
3123 | } |
3124 | } |
3125 | |
3126 | // All scalar (and most vector) possibilities should be handled now. |
3127 | // Try more matches that only apply to non-splat constant vectors. |
3128 | if (!Ty->isVectorTy()) |
3129 | return nullptr; |
3130 | |
3131 | // If both operands are xor'd with constants using the same sexted boolean |
3132 | // operand, see if the constants are inverse bitmasks. |
3133 | // TODO: Use ConstantExpr::getNot()? |
3134 | if (match(V: A, P: (m_Xor(L: m_SExt(Op: m_Value(V&: Cond)), R: m_Constant(C&: AConst)))) && |
3135 | match(V: B, P: (m_Xor(L: m_SExt(Op: m_Specific(V: Cond)), R: m_Constant(C&: BConst)))) && |
3136 | Cond->getType()->isIntOrIntVectorTy(BitWidth: 1) && |
3137 | areInverseVectorBitmasks(C1: AConst, C2: BConst)) { |
3138 | AConst = ConstantExpr::getTrunc(C: AConst, Ty: CmpInst::makeCmpResultType(opnd_type: Ty)); |
3139 | return Builder.CreateXor(LHS: Cond, RHS: AConst); |
3140 | } |
3141 | return nullptr; |
3142 | } |
3143 | |
3144 | /// We have an expression of the form (A & C) | (B & D). Try to simplify this |
3145 | /// to "A' ? C : D", where A' is a boolean or vector of booleans. |
3146 | /// When InvertFalseVal is set to true, we try to match the pattern |
3147 | /// where we have peeked through a 'not' op and A and B are the same: |
3148 | /// (A & C) | ~(A | D) --> (A & C) | (~A & ~D) --> A' ? C : ~D |
3149 | Value *InstCombinerImpl::matchSelectFromAndOr(Value *A, Value *C, Value *B, |
3150 | Value *D, bool InvertFalseVal) { |
3151 | // The potential condition of the select may be bitcasted. In that case, look |
3152 | // through its bitcast and the corresponding bitcast of the 'not' condition. |
3153 | Type *OrigType = A->getType(); |
3154 | A = peekThroughBitcast(V: A, OneUseOnly: true); |
3155 | B = peekThroughBitcast(V: B, OneUseOnly: true); |
3156 | if (Value *Cond = getSelectCondition(A, B, ABIsTheSame: InvertFalseVal)) { |
3157 | // ((bc Cond) & C) | ((bc ~Cond) & D) --> bc (select Cond, (bc C), (bc D)) |
3158 | // If this is a vector, we may need to cast to match the condition's length. |
3159 | // The bitcasts will either all exist or all not exist. The builder will |
3160 | // not create unnecessary casts if the types already match. |
3161 | Type *SelTy = A->getType(); |
3162 | if (auto *VecTy = dyn_cast<VectorType>(Val: Cond->getType())) { |
3163 | // For a fixed or scalable vector get N from <{vscale x} N x iM> |
3164 | unsigned Elts = VecTy->getElementCount().getKnownMinValue(); |
3165 | // For a fixed or scalable vector, get the size in bits of N x iM; for a |
3166 | // scalar this is just M. |
3167 | unsigned SelEltSize = SelTy->getPrimitiveSizeInBits().getKnownMinValue(); |
3168 | Type *EltTy = Builder.getIntNTy(N: SelEltSize / Elts); |
3169 | SelTy = VectorType::get(ElementType: EltTy, EC: VecTy->getElementCount()); |
3170 | } |
3171 | Value *BitcastC = Builder.CreateBitCast(V: C, DestTy: SelTy); |
3172 | if (InvertFalseVal) |
3173 | D = Builder.CreateNot(V: D); |
3174 | Value *BitcastD = Builder.CreateBitCast(V: D, DestTy: SelTy); |
3175 | Value *Select = Builder.CreateSelect(C: Cond, True: BitcastC, False: BitcastD); |
3176 | return Builder.CreateBitCast(V: Select, DestTy: OrigType); |
3177 | } |
3178 | |
3179 | return nullptr; |
3180 | } |
3181 | |
3182 | // (icmp eq X, C) | (icmp ult Other, (X - C)) -> (icmp ule Other, (X - (C + 1))) |
3183 | // (icmp ne X, C) & (icmp uge Other, (X - C)) -> (icmp ugt Other, (X - (C + 1))) |
3184 | static Value *foldAndOrOfICmpEqConstantAndICmp(ICmpInst *LHS, ICmpInst *RHS, |
3185 | bool IsAnd, bool IsLogical, |
3186 | IRBuilderBase &Builder) { |
3187 | Value *LHS0 = LHS->getOperand(i_nocapture: 0); |
3188 | Value *RHS0 = RHS->getOperand(i_nocapture: 0); |
3189 | Value *RHS1 = RHS->getOperand(i_nocapture: 1); |
3190 | |
3191 | ICmpInst::Predicate LPred = |
3192 | IsAnd ? LHS->getInversePredicate() : LHS->getPredicate(); |
3193 | ICmpInst::Predicate RPred = |
3194 | IsAnd ? RHS->getInversePredicate() : RHS->getPredicate(); |
3195 | |
3196 | const APInt *CInt; |
3197 | if (LPred != ICmpInst::ICMP_EQ || |
3198 | !match(V: LHS->getOperand(i_nocapture: 1), P: m_APIntAllowPoison(Res&: CInt)) || |
3199 | !LHS0->getType()->isIntOrIntVectorTy() || |
3200 | !(LHS->hasOneUse() || RHS->hasOneUse())) |
3201 | return nullptr; |
3202 | |
3203 | auto MatchRHSOp = [LHS0, CInt](const Value *RHSOp) { |
3204 | return match(V: RHSOp, |
3205 | P: m_Add(L: m_Specific(V: LHS0), R: m_SpecificIntAllowPoison(V: -*CInt))) || |
3206 | (CInt->isZero() && RHSOp == LHS0); |
3207 | }; |
3208 | |
3209 | Value *Other; |
3210 | if (RPred == ICmpInst::ICMP_ULT && MatchRHSOp(RHS1)) |
3211 | Other = RHS0; |
3212 | else if (RPred == ICmpInst::ICMP_UGT && MatchRHSOp(RHS0)) |
3213 | Other = RHS1; |
3214 | else |
3215 | return nullptr; |
3216 | |
3217 | if (IsLogical) |
3218 | Other = Builder.CreateFreeze(V: Other); |
3219 | |
3220 | return Builder.CreateICmp( |
3221 | P: IsAnd ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE, |
3222 | LHS: Builder.CreateSub(LHS: LHS0, RHS: ConstantInt::get(Ty: LHS0->getType(), V: *CInt + 1)), |
3223 | RHS: Other); |
3224 | } |
3225 | |
3226 | /// Fold (icmp)&(icmp) or (icmp)|(icmp) if possible. |
3227 | /// If IsLogical is true, then the and/or is in select form and the transform |
3228 | /// must be poison-safe. |
3229 | Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, |
3230 | Instruction &I, bool IsAnd, |
3231 | bool IsLogical) { |
3232 | const SimplifyQuery Q = SQ.getWithInstruction(I: &I); |
3233 | |
3234 | // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2) |
3235 | // Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2) |
3236 | // if K1 and K2 are a one-bit mask. |
3237 | if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, CxtI: &I, IsAnd, IsLogical)) |
3238 | return V; |
3239 | |
3240 | ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); |
3241 | Value *LHS0 = LHS->getOperand(i_nocapture: 0), *RHS0 = RHS->getOperand(i_nocapture: 0); |
3242 | Value *LHS1 = LHS->getOperand(i_nocapture: 1), *RHS1 = RHS->getOperand(i_nocapture: 1); |
3243 | const APInt *LHSC = nullptr, *RHSC = nullptr; |
3244 | match(V: LHS1, P: m_APInt(Res&: LHSC)); |
3245 | match(V: RHS1, P: m_APInt(Res&: RHSC)); |
3246 | |
3247 | // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) |
3248 | // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) |
3249 | if (predicatesFoldable(P1: PredL, P2: PredR)) { |
3250 | if (LHS0 == RHS1 && LHS1 == RHS0) { |
3251 | PredL = ICmpInst::getSwappedPredicate(pred: PredL); |
3252 | std::swap(a&: LHS0, b&: LHS1); |
3253 | } |
3254 | if (LHS0 == RHS0 && LHS1 == RHS1) { |
3255 | unsigned Code = IsAnd ? getICmpCode(Pred: PredL) & getICmpCode(Pred: PredR) |
3256 | : getICmpCode(Pred: PredL) | getICmpCode(Pred: PredR); |
3257 | bool IsSigned = LHS->isSigned() || RHS->isSigned(); |
3258 | return getNewICmpValue(Code, Sign: IsSigned, LHS: LHS0, RHS: LHS1, Builder); |
3259 | } |
3260 | } |
3261 | |
3262 | // handle (roughly): |
3263 | // (icmp ne (A & B), C) | (icmp ne (A & D), E) |
3264 | // (icmp eq (A & B), C) & (icmp eq (A & D), E) |
3265 | if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, IsAnd, IsLogical, Builder)) |
3266 | return V; |
3267 | |
3268 | if (Value *V = |
3269 | foldAndOrOfICmpEqConstantAndICmp(LHS, RHS, IsAnd, IsLogical, Builder)) |
3270 | return V; |
3271 | // We can treat logical like bitwise here, because both operands are used on |
3272 | // the LHS, and as such poison from both will propagate. |
3273 | if (Value *V = foldAndOrOfICmpEqConstantAndICmp(LHS: RHS, RHS: LHS, IsAnd, |
3274 | /*IsLogical*/ false, Builder)) |
3275 | return V; |
3276 | |
3277 | if (Value *V = |
3278 | foldAndOrOfICmpsWithConstEq(Cmp0: LHS, Cmp1: RHS, IsAnd, IsLogical, Builder, Q)) |
3279 | return V; |
3280 | // We can convert this case to bitwise and, because both operands are used |
3281 | // on the LHS, and as such poison from both will propagate. |
3282 | if (Value *V = foldAndOrOfICmpsWithConstEq(Cmp0: RHS, Cmp1: LHS, IsAnd, |
3283 | /*IsLogical*/ false, Builder, Q)) |
3284 | return V; |
3285 | |
3286 | if (Value *V = foldIsPowerOf2OrZero(Cmp0: LHS, Cmp1: RHS, IsAnd, Builder)) |
3287 | return V; |
3288 | if (Value *V = foldIsPowerOf2OrZero(Cmp0: RHS, Cmp1: LHS, IsAnd, Builder)) |
3289 | return V; |
3290 | |
3291 | // TODO: One of these directions is fine with logical and/or, the other could |
3292 | // be supported by inserting freeze. |
3293 | if (!IsLogical) { |
3294 | // E.g. (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n |
3295 | // E.g. (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n |
3296 | if (Value *V = simplifyRangeCheck(Cmp0: LHS, Cmp1: RHS, /*Inverted=*/!IsAnd)) |
3297 | return V; |
3298 | |
3299 | // E.g. (icmp sgt x, n) | (icmp slt x, 0) --> icmp ugt x, n |
3300 | // E.g. (icmp slt x, n) & (icmp sge x, 0) --> icmp ult x, n |
3301 | if (Value *V = simplifyRangeCheck(Cmp0: RHS, Cmp1: LHS, /*Inverted=*/!IsAnd)) |
3302 | return V; |
3303 | } |
3304 | |
3305 | // TODO: Add conjugated or fold, check whether it is safe for logical and/or. |
3306 | if (IsAnd && !IsLogical) |
3307 | if (Value *V = foldSignedTruncationCheck(ICmp0: LHS, ICmp1: RHS, CxtI&: I, Builder)) |
3308 | return V; |
3309 | |
3310 | if (Value *V = foldIsPowerOf2(Cmp0: LHS, Cmp1: RHS, JoinedByAnd: IsAnd, Builder)) |
3311 | return V; |
3312 | |
3313 | if (Value *V = foldPowerOf2AndShiftedMask(Cmp0: LHS, Cmp1: RHS, JoinedByAnd: IsAnd, Builder)) |
3314 | return V; |
3315 | |
3316 | // TODO: Verify whether this is safe for logical and/or. |
3317 | if (!IsLogical) { |
3318 | if (Value *X = foldUnsignedUnderflowCheck(ZeroICmp: LHS, UnsignedICmp: RHS, IsAnd, Q, Builder)) |
3319 | return X; |
3320 | if (Value *X = foldUnsignedUnderflowCheck(ZeroICmp: RHS, UnsignedICmp: LHS, IsAnd, Q, Builder)) |
3321 | return X; |
3322 | } |
3323 | |
3324 | if (Value *X = foldEqOfParts(Cmp0: LHS, Cmp1: RHS, IsAnd)) |
3325 | return X; |
3326 | |
3327 | // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) |
3328 | // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) |
3329 | // TODO: Remove this and below when foldLogOpOfMaskedICmps can handle undefs. |
3330 | if (!IsLogical && PredL == (IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) && |
3331 | PredL == PredR && match(V: LHS1, P: m_ZeroInt()) && match(V: RHS1, P: m_ZeroInt()) && |
3332 | LHS0->getType() == RHS0->getType()) { |
3333 | Value *NewOr = Builder.CreateOr(LHS: LHS0, RHS: RHS0); |
3334 | return Builder.CreateICmp(P: PredL, LHS: NewOr, |
3335 | RHS: Constant::getNullValue(Ty: NewOr->getType())); |
3336 | } |
3337 | |
3338 | // (icmp ne A, -1) | (icmp ne B, -1) --> (icmp ne (A&B), -1) |
3339 | // (icmp eq A, -1) & (icmp eq B, -1) --> (icmp eq (A&B), -1) |
3340 | if (!IsLogical && PredL == (IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) && |
3341 | PredL == PredR && match(V: LHS1, P: m_AllOnes()) && match(V: RHS1, P: m_AllOnes()) && |
3342 | LHS0->getType() == RHS0->getType()) { |
3343 | Value *NewAnd = Builder.CreateAnd(LHS: LHS0, RHS: RHS0); |
3344 | return Builder.CreateICmp(P: PredL, LHS: NewAnd, |
3345 | RHS: Constant::getAllOnesValue(Ty: LHS0->getType())); |
3346 | } |
3347 | |
3348 | // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). |
3349 | if (!LHSC || !RHSC) |
3350 | return nullptr; |
3351 | |
3352 | // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2 |
3353 | // (trunc x) != C1 | (and x, CA) != C2 -> (and x, CA|CMAX) != C1|C2 |
3354 | // where CMAX is the all ones value for the truncated type, |
3355 | // iff the lower bits of C2 and CA are zero. |
3356 | if (PredL == (IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) && |
3357 | PredL == PredR && LHS->hasOneUse() && RHS->hasOneUse()) { |
3358 | Value *V; |
3359 | const APInt *AndC, *SmallC = nullptr, *BigC = nullptr; |
3360 | |
3361 | // (trunc x) == C1 & (and x, CA) == C2 |
3362 | // (and x, CA) == C2 & (trunc x) == C1 |
3363 | if (match(V: RHS0, P: m_Trunc(Op: m_Value(V))) && |
3364 | match(V: LHS0, P: m_And(L: m_Specific(V), R: m_APInt(Res&: AndC)))) { |
3365 | SmallC = RHSC; |
3366 | BigC = LHSC; |
3367 | } else if (match(V: LHS0, P: m_Trunc(Op: m_Value(V))) && |
3368 | match(V: RHS0, P: m_And(L: m_Specific(V), R: m_APInt(Res&: AndC)))) { |
3369 | SmallC = LHSC; |
3370 | BigC = RHSC; |
3371 | } |
3372 | |
3373 | if (SmallC && BigC) { |
3374 | unsigned BigBitSize = BigC->getBitWidth(); |
3375 | unsigned SmallBitSize = SmallC->getBitWidth(); |
3376 | |
3377 | // Check that the low bits are zero. |
3378 | APInt Low = APInt::getLowBitsSet(numBits: BigBitSize, loBitsSet: SmallBitSize); |
3379 | if ((Low & *AndC).isZero() && (Low & *BigC).isZero()) { |
3380 | Value *NewAnd = Builder.CreateAnd(LHS: V, RHS: Low | *AndC); |
3381 | APInt N = SmallC->zext(width: BigBitSize) | *BigC; |
3382 | Value *NewVal = ConstantInt::get(Ty: NewAnd->getType(), V: N); |
3383 | return Builder.CreateICmp(P: PredL, LHS: NewAnd, RHS: NewVal); |
3384 | } |
3385 | } |
3386 | } |
3387 | |
3388 | // Match naive pattern (and its inverted form) for checking if two values |
3389 | // share same sign. An example of the pattern: |
3390 | // (icmp slt (X & Y), 0) | (icmp sgt (X | Y), -1) -> (icmp sgt (X ^ Y), -1) |
3391 | // Inverted form (example): |
3392 | // (icmp slt (X | Y), 0) & (icmp sgt (X & Y), -1) -> (icmp slt (X ^ Y), 0) |
3393 | bool TrueIfSignedL, TrueIfSignedR; |
3394 | if (isSignBitCheck(Pred: PredL, RHS: *LHSC, TrueIfSigned&: TrueIfSignedL) && |
3395 | isSignBitCheck(Pred: PredR, RHS: *RHSC, TrueIfSigned&: TrueIfSignedR) && |
3396 | (RHS->hasOneUse() || LHS->hasOneUse())) { |
3397 | Value *X, *Y; |
3398 | if (IsAnd) { |
3399 | if ((TrueIfSignedL && !TrueIfSignedR && |
3400 | match(V: LHS0, P: m_Or(L: m_Value(V&: X), R: m_Value(V&: Y))) && |
3401 | match(V: RHS0, P: m_c_And(L: m_Specific(V: X), R: m_Specific(V: Y)))) || |
3402 | (!TrueIfSignedL && TrueIfSignedR && |
3403 | match(V: LHS0, P: m_And(L: m_Value(V&: X), R: m_Value(V&: Y))) && |
3404 | match(V: RHS0, P: m_c_Or(L: m_Specific(V: X), R: m_Specific(V: Y))))) { |
3405 | Value *NewXor = Builder.CreateXor(LHS: X, RHS: Y); |
3406 | return Builder.CreateIsNeg(Arg: NewXor); |
3407 | } |
3408 | } else { |
3409 | if ((TrueIfSignedL && !TrueIfSignedR && |
3410 | match(V: LHS0, P: m_And(L: m_Value(V&: X), R: m_Value(V&: Y))) && |
3411 | match(V: RHS0, P: m_c_Or(L: m_Specific(V: X), R: m_Specific(V: Y)))) || |
3412 | (!TrueIfSignedL && TrueIfSignedR && |
3413 | match(V: LHS0, P: m_Or(L: m_Value(V&: X), R: m_Value(V&: Y))) && |
3414 | match(V: RHS0, P: m_c_And(L: m_Specific(V: X), R: m_Specific(V: Y))))) { |
3415 | Value *NewXor = Builder.CreateXor(LHS: X, RHS: Y); |
3416 | return Builder.CreateIsNotNeg(Arg: NewXor); |
3417 | } |
3418 | } |
3419 | } |
3420 | |
3421 | return foldAndOrOfICmpsUsingRanges(ICmp1: LHS, ICmp2: RHS, IsAnd); |
3422 | } |
3423 | |
3424 | // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches |
3425 | // here. We should standardize that construct where it is needed or choose some |
3426 | // other way to ensure that commutated variants of patterns are not missed. |
3427 | Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { |
3428 | if (Value *V = simplifyOrInst(LHS: I.getOperand(i_nocapture: 0), RHS: I.getOperand(i_nocapture: 1), |
3429 | Q: SQ.getWithInstruction(I: &I))) |
3430 | return replaceInstUsesWith(I, V); |
3431 | |
3432 | if (SimplifyAssociativeOrCommutative(I)) |
3433 | return &I; |
3434 | |
3435 | if (Instruction *X = foldVectorBinop(Inst&: I)) |
3436 | return X; |
3437 | |
3438 | if (Instruction *Phi = foldBinopWithPhiOperands(BO&: I)) |
3439 | return Phi; |
3440 | |
3441 | // See if we can simplify any instructions used by the instruction whose sole |
3442 | // purpose is to compute bits we don't care about. |
3443 | if (SimplifyDemandedInstructionBits(Inst&: I)) |
3444 | return &I; |
3445 | |
3446 | // Do this before using distributive laws to catch simple and/or/not patterns. |
3447 | if (Instruction *Xor = foldOrToXor(I, Builder)) |
3448 | return Xor; |
3449 | |
3450 | if (Instruction *X = foldComplexAndOrPatterns(I, Builder)) |
3451 | return X; |
3452 | |
3453 | // (A&B)|(A&C) -> A&(B|C) etc |
3454 | if (Value *V = foldUsingDistributiveLaws(I)) |
3455 | return replaceInstUsesWith(I, V); |
3456 | |
3457 | Value *Op0 = I.getOperand(i_nocapture: 0), *Op1 = I.getOperand(i_nocapture: 1); |
3458 | Type *Ty = I.getType(); |
3459 | if (Ty->isIntOrIntVectorTy(BitWidth: 1)) { |
3460 | if (auto *SI0 = dyn_cast<SelectInst>(Val: Op0)) { |
3461 | if (auto *R = |
3462 | foldAndOrOfSelectUsingImpliedCond(Op: Op1, SI&: *SI0, /* IsAnd */ false)) |
3463 | return R; |
3464 | } |
3465 | if (auto *SI1 = dyn_cast<SelectInst>(Val: Op1)) { |
3466 | if (auto *R = |
3467 | foldAndOrOfSelectUsingImpliedCond(Op: Op0, SI&: *SI1, /* IsAnd */ false)) |
3468 | return R; |
3469 | } |
3470 | } |
3471 | |
3472 | if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I)) |
3473 | return FoldedLogic; |
3474 | |
3475 | if (Instruction *BitOp = matchBSwapOrBitReverse(I, /*MatchBSwaps*/ true, |
3476 | /*MatchBitReversals*/ true)) |
3477 | return BitOp; |
3478 | |
3479 | if (Instruction *Funnel = matchFunnelShift(Or&: I, IC&: *this)) |
3480 | return Funnel; |
3481 | |
3482 | if (Instruction *Concat = matchOrConcat(Or&: I, Builder)) |
3483 | return replaceInstUsesWith(I, V: Concat); |
3484 | |
3485 | if (Instruction *R = foldBinOpShiftWithShift(I)) |
3486 | return R; |
3487 | |
3488 | if (Instruction *R = tryFoldInstWithCtpopWithNot(I: &I)) |
3489 | return R; |
3490 | |
3491 | Value *X, *Y; |
3492 | const APInt *CV; |
3493 | if (match(V: &I, P: m_c_Or(L: m_OneUse(SubPattern: m_Xor(L: m_Value(V&: X), R: m_APInt(Res&: CV))), R: m_Value(V&: Y))) && |
3494 | !CV->isAllOnes() && MaskedValueIsZero(V: Y, Mask: *CV, Depth: 0, CxtI: &I)) { |
3495 | // (X ^ C) | Y -> (X | Y) ^ C iff Y & C == 0 |
3496 | // The check for a 'not' op is for efficiency (if Y is known zero --> ~X). |
3497 | Value *Or = Builder.CreateOr(LHS: X, RHS: Y); |
3498 | return BinaryOperator::CreateXor(V1: Or, V2: ConstantInt::get(Ty, V: *CV)); |
3499 | } |
3500 | |
3501 | // If the operands have no common bits set: |
3502 | // or (mul X, Y), X --> add (mul X, Y), X --> mul X, (Y + 1) |
3503 | if (match(V: &I, P: m_c_DisjointOr(L: m_OneUse(SubPattern: m_Mul(L: m_Value(V&: X), R: m_Value(V&: Y))), |
3504 | R: m_Deferred(V: X)))) { |
3505 | Value *IncrementY = Builder.CreateAdd(LHS: Y, RHS: ConstantInt::get(Ty, V: 1)); |
3506 | return BinaryOperator::CreateMul(V1: X, V2: IncrementY); |
3507 | } |
3508 | |
3509 | // (A & C) | (B & D) |
3510 | Value *A, *B, *C, *D; |
3511 | if (match(V: Op0, P: m_And(L: m_Value(V&: A), R: m_Value(V&: C))) && |
3512 | match(V: Op1, P: m_And(L: m_Value(V&: B), R: m_Value(V&: D)))) { |
3513 | |
3514 | // (A & C0) | (B & C1) |
3515 | const APInt *C0, *C1; |
3516 | if (match(V: C, P: m_APInt(Res&: C0)) && match(V: D, P: m_APInt(Res&: C1))) { |
3517 | Value *X; |
3518 | if (*C0 == ~*C1) { |
3519 | // ((X | B) & MaskC) | (B & ~MaskC) -> (X & MaskC) | B |
3520 | if (match(V: A, P: m_c_Or(L: m_Value(V&: X), R: m_Specific(V: B)))) |
3521 | return BinaryOperator::CreateOr(V1: Builder.CreateAnd(LHS: X, RHS: *C0), V2: B); |
3522 | // (A & MaskC) | ((X | A) & ~MaskC) -> (X & ~MaskC) | A |
3523 | if (match(V: B, P: m_c_Or(L: m_Specific(V: A), R: m_Value(V&: X)))) |
3524 | return BinaryOperator::CreateOr(V1: Builder.CreateAnd(LHS: X, RHS: *C1), V2: A); |
3525 | |
3526 | // ((X ^ B) & MaskC) | (B & ~MaskC) -> (X & MaskC) ^ B |
3527 | if (match(V: A, P: m_c_Xor(L: m_Value(V&: X), R: m_Specific(V: B)))) |
3528 | return BinaryOperator::CreateXor(V1: Builder.CreateAnd(LHS: X, RHS: *C0), V2: B); |
3529 | // (A & MaskC) | ((X ^ A) & ~MaskC) -> (X & ~MaskC) ^ A |
3530 | if (match(V: B, P: m_c_Xor(L: m_Specific(V: A), R: m_Value(V&: X)))) |
3531 | return BinaryOperator::CreateXor(V1: Builder.CreateAnd(LHS: X, RHS: *C1), V2: A); |
3532 | } |
3533 | |
3534 | if ((*C0 & *C1).isZero()) { |
3535 | // ((X | B) & C0) | (B & C1) --> (X | B) & (C0 | C1) |
3536 | // iff (C0 & C1) == 0 and (X & ~C0) == 0 |
3537 | if (match(V: A, P: m_c_Or(L: m_Value(V&: X), R: m_Specific(V: B))) && |
3538 | MaskedValueIsZero(V: X, Mask: ~*C0, Depth: 0, CxtI: &I)) { |
3539 | Constant *C01 = ConstantInt::get(Ty, V: *C0 | *C1); |
3540 | return BinaryOperator::CreateAnd(V1: A, V2: C01); |
3541 | } |
3542 | // (A & C0) | ((X | A) & C1) --> (X | A) & (C0 | C1) |
3543 | // iff (C0 & C1) == 0 and (X & ~C1) == 0 |
3544 | if (match(V: B, P: m_c_Or(L: m_Value(V&: X), R: m_Specific(V: A))) && |
3545 | MaskedValueIsZero(V: X, Mask: ~*C1, Depth: 0, CxtI: &I)) { |
3546 | Constant *C01 = ConstantInt::get(Ty, V: *C0 | *C1); |
3547 | return BinaryOperator::CreateAnd(V1: B, V2: C01); |
3548 | } |
3549 | // ((X | C2) & C0) | ((X | C3) & C1) --> (X | C2 | C3) & (C0 | C1) |
3550 | // iff (C0 & C1) == 0 and (C2 & ~C0) == 0 and (C3 & ~C1) == 0. |
3551 | const APInt *C2, *C3; |
3552 | if (match(V: A, P: m_Or(L: m_Value(V&: X), R: m_APInt(Res&: C2))) && |
3553 | match(V: B, P: m_Or(L: m_Specific(V: X), R: m_APInt(Res&: C3))) && |
3554 | (*C2 & ~*C0).isZero() && (*C3 & ~*C1).isZero()) { |
3555 | Value *Or = Builder.CreateOr(LHS: X, RHS: *C2 | *C3, Name: "bitfield" ); |
3556 | Constant *C01 = ConstantInt::get(Ty, V: *C0 | *C1); |
3557 | return BinaryOperator::CreateAnd(V1: Or, V2: C01); |
3558 | } |
3559 | } |
3560 | } |
3561 | |
3562 | // Don't try to form a select if it's unlikely that we'll get rid of at |
3563 | // least one of the operands. A select is generally more expensive than the |
3564 | // 'or' that it is replacing. |
3565 | if (Op0->hasOneUse() || Op1->hasOneUse()) { |
3566 | // (Cond & C) | (~Cond & D) -> Cond ? C : D, and commuted variants. |
3567 | if (Value *V = matchSelectFromAndOr(A, C, B, D)) |
3568 | return replaceInstUsesWith(I, V); |
3569 | if (Value *V = matchSelectFromAndOr(A, C, B: D, D: B)) |
3570 | return replaceInstUsesWith(I, V); |
3571 | if (Value *V = matchSelectFromAndOr(A: C, C: A, B, D)) |
3572 | return replaceInstUsesWith(I, V); |
3573 | if (Value *V = matchSelectFromAndOr(A: C, C: A, B: D, D: B)) |
3574 | return replaceInstUsesWith(I, V); |
3575 | if (Value *V = matchSelectFromAndOr(A: B, C: D, B: A, D: C)) |
3576 | return replaceInstUsesWith(I, V); |
3577 | if (Value *V = matchSelectFromAndOr(A: B, C: D, B: C, D: A)) |
3578 | return replaceInstUsesWith(I, V); |
3579 | if (Value *V = matchSelectFromAndOr(A: D, C: B, B: A, D: C)) |
3580 | return replaceInstUsesWith(I, V); |
3581 | if (Value *V = matchSelectFromAndOr(A: D, C: B, B: C, D: A)) |
3582 | return replaceInstUsesWith(I, V); |
3583 | } |
3584 | } |
3585 | |
3586 | if (match(V: Op0, P: m_And(L: m_Value(V&: A), R: m_Value(V&: C))) && |
3587 | match(V: Op1, P: m_Not(V: m_Or(L: m_Value(V&: B), R: m_Value(V&: D)))) && |
3588 | (Op0->hasOneUse() || Op1->hasOneUse())) { |
3589 | // (Cond & C) | ~(Cond | D) -> Cond ? C : ~D |
3590 | if (Value *V = matchSelectFromAndOr(A, C, B, D, InvertFalseVal: true)) |
3591 | return replaceInstUsesWith(I, V); |
3592 | if (Value *V = matchSelectFromAndOr(A, C, B: D, D: B, InvertFalseVal: true)) |
3593 | return replaceInstUsesWith(I, V); |
3594 | if (Value *V = matchSelectFromAndOr(A: C, C: A, B, D, InvertFalseVal: true)) |
3595 | return replaceInstUsesWith(I, V); |
3596 | if (Value *V = matchSelectFromAndOr(A: C, C: A, B: D, D: B, InvertFalseVal: true)) |
3597 | return replaceInstUsesWith(I, V); |
3598 | } |
3599 | |
3600 | // (A ^ B) | ((B ^ C) ^ A) -> (A ^ B) | C |
3601 | if (match(V: Op0, P: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B)))) |
3602 | if (match(V: Op1, P: m_Xor(L: m_Xor(L: m_Specific(V: B), R: m_Value(V&: C)), R: m_Specific(V: A)))) |
3603 | return BinaryOperator::CreateOr(V1: Op0, V2: C); |
3604 | |
3605 | // ((A ^ C) ^ B) | (B ^ A) -> (B ^ A) | C |
3606 | if (match(V: Op0, P: m_Xor(L: m_Xor(L: m_Value(V&: A), R: m_Value(V&: C)), R: m_Value(V&: B)))) |
3607 | if (match(V: Op1, P: m_Xor(L: m_Specific(V: B), R: m_Specific(V: A)))) |
3608 | return BinaryOperator::CreateOr(V1: Op1, V2: C); |
3609 | |
3610 | if (Instruction *DeMorgan = matchDeMorgansLaws(I, IC&: *this)) |
3611 | return DeMorgan; |
3612 | |
3613 | // Canonicalize xor to the RHS. |
3614 | bool SwappedForXor = false; |
3615 | if (match(V: Op0, P: m_Xor(L: m_Value(), R: m_Value()))) { |
3616 | std::swap(a&: Op0, b&: Op1); |
3617 | SwappedForXor = true; |
3618 | } |
3619 | |
3620 | if (match(V: Op1, P: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B)))) { |
3621 | // (A | ?) | (A ^ B) --> (A | ?) | B |
3622 | // (B | ?) | (A ^ B) --> (B | ?) | A |
3623 | if (match(V: Op0, P: m_c_Or(L: m_Specific(V: A), R: m_Value()))) |
3624 | return BinaryOperator::CreateOr(V1: Op0, V2: B); |
3625 | if (match(V: Op0, P: m_c_Or(L: m_Specific(V: B), R: m_Value()))) |
3626 | return BinaryOperator::CreateOr(V1: Op0, V2: A); |
3627 | |
3628 | // (A & B) | (A ^ B) --> A | B |
3629 | // (B & A) | (A ^ B) --> A | B |
3630 | if (match(V: Op0, P: m_And(L: m_Specific(V: A), R: m_Specific(V: B))) || |
3631 | match(V: Op0, P: m_And(L: m_Specific(V: B), R: m_Specific(V: A)))) |
3632 | return BinaryOperator::CreateOr(V1: A, V2: B); |
3633 | |
3634 | // ~A | (A ^ B) --> ~(A & B) |
3635 | // ~B | (A ^ B) --> ~(A & B) |
3636 | // The swap above should always make Op0 the 'not'. |
3637 | if ((Op0->hasOneUse() || Op1->hasOneUse()) && |
3638 | (match(V: Op0, P: m_Not(V: m_Specific(V: A))) || match(V: Op0, P: m_Not(V: m_Specific(V: B))))) |
3639 | return BinaryOperator::CreateNot(Op: Builder.CreateAnd(LHS: A, RHS: B)); |
3640 | |
3641 | // Same as above, but peek through an 'and' to the common operand: |
3642 | // ~(A & ?) | (A ^ B) --> ~((A & ?) & B) |
3643 | // ~(B & ?) | (A ^ B) --> ~((B & ?) & A) |
3644 | Instruction *And; |
3645 | if ((Op0->hasOneUse() || Op1->hasOneUse()) && |
3646 | match(V: Op0, P: m_Not(V: m_CombineAnd(L: m_Instruction(I&: And), |
3647 | R: m_c_And(L: m_Specific(V: A), R: m_Value()))))) |
3648 | return BinaryOperator::CreateNot(Op: Builder.CreateAnd(LHS: And, RHS: B)); |
3649 | if ((Op0->hasOneUse() || Op1->hasOneUse()) && |
3650 | match(V: Op0, P: m_Not(V: m_CombineAnd(L: m_Instruction(I&: And), |
3651 | R: m_c_And(L: m_Specific(V: B), R: m_Value()))))) |
3652 | return BinaryOperator::CreateNot(Op: Builder.CreateAnd(LHS: And, RHS: A)); |
3653 | |
3654 | // (~A | C) | (A ^ B) --> ~(A & B) | C |
3655 | // (~B | C) | (A ^ B) --> ~(A & B) | C |
3656 | if (Op0->hasOneUse() && Op1->hasOneUse() && |
3657 | (match(V: Op0, P: m_c_Or(L: m_Not(V: m_Specific(V: A)), R: m_Value(V&: C))) || |
3658 | match(V: Op0, P: m_c_Or(L: m_Not(V: m_Specific(V: B)), R: m_Value(V&: C))))) { |
3659 | Value *Nand = Builder.CreateNot(V: Builder.CreateAnd(LHS: A, RHS: B), Name: "nand" ); |
3660 | return BinaryOperator::CreateOr(V1: Nand, V2: C); |
3661 | } |
3662 | } |
3663 | |
3664 | if (SwappedForXor) |
3665 | std::swap(a&: Op0, b&: Op1); |
3666 | |
3667 | { |
3668 | ICmpInst *LHS = dyn_cast<ICmpInst>(Val: Op0); |
3669 | ICmpInst *RHS = dyn_cast<ICmpInst>(Val: Op1); |
3670 | if (LHS && RHS) |
3671 | if (Value *Res = foldAndOrOfICmps(LHS, RHS, I, /* IsAnd */ false)) |
3672 | return replaceInstUsesWith(I, V: Res); |
3673 | |
3674 | // TODO: Make this recursive; it's a little tricky because an arbitrary |
3675 | // number of 'or' instructions might have to be created. |
3676 | Value *X, *Y; |
3677 | if (LHS && match(V: Op1, P: m_OneUse(SubPattern: m_LogicalOr(L: m_Value(V&: X), R: m_Value(V&: Y))))) { |
3678 | bool IsLogical = isa<SelectInst>(Val: Op1); |
3679 | // LHS | (X || Y) --> (LHS || X) || Y |
3680 | if (auto *Cmp = dyn_cast<ICmpInst>(Val: X)) |
3681 | if (Value *Res = |
3682 | foldAndOrOfICmps(LHS, RHS: Cmp, I, /* IsAnd */ false, IsLogical)) |
3683 | return replaceInstUsesWith(I, V: IsLogical |
3684 | ? Builder.CreateLogicalOr(Cond1: Res, Cond2: Y) |
3685 | : Builder.CreateOr(LHS: Res, RHS: Y)); |
3686 | // LHS | (X || Y) --> X || (LHS | Y) |
3687 | if (auto *Cmp = dyn_cast<ICmpInst>(Val: Y)) |
3688 | if (Value *Res = foldAndOrOfICmps(LHS, RHS: Cmp, I, /* IsAnd */ false, |
3689 | /* IsLogical */ false)) |
3690 | return replaceInstUsesWith(I, V: IsLogical |
3691 | ? Builder.CreateLogicalOr(Cond1: X, Cond2: Res) |
3692 | : Builder.CreateOr(LHS: X, RHS: Res)); |
3693 | } |
3694 | if (RHS && match(V: Op0, P: m_OneUse(SubPattern: m_LogicalOr(L: m_Value(V&: X), R: m_Value(V&: Y))))) { |
3695 | bool IsLogical = isa<SelectInst>(Val: Op0); |
3696 | // (X || Y) | RHS --> (X || RHS) || Y |
3697 | if (auto *Cmp = dyn_cast<ICmpInst>(Val: X)) |
3698 | if (Value *Res = |
3699 | foldAndOrOfICmps(LHS: Cmp, RHS, I, /* IsAnd */ false, IsLogical)) |
3700 | return replaceInstUsesWith(I, V: IsLogical |
3701 | ? Builder.CreateLogicalOr(Cond1: Res, Cond2: Y) |
3702 | : Builder.CreateOr(LHS: Res, RHS: Y)); |
3703 | // (X || Y) | RHS --> X || (Y | RHS) |
3704 | if (auto *Cmp = dyn_cast<ICmpInst>(Val: Y)) |
3705 | if (Value *Res = foldAndOrOfICmps(LHS: Cmp, RHS, I, /* IsAnd */ false, |
3706 | /* IsLogical */ false)) |
3707 | return replaceInstUsesWith(I, V: IsLogical |
3708 | ? Builder.CreateLogicalOr(Cond1: X, Cond2: Res) |
3709 | : Builder.CreateOr(LHS: X, RHS: Res)); |
3710 | } |
3711 | } |
3712 | |
3713 | if (FCmpInst *LHS = dyn_cast<FCmpInst>(Val: I.getOperand(i_nocapture: 0))) |
3714 | if (FCmpInst *RHS = dyn_cast<FCmpInst>(Val: I.getOperand(i_nocapture: 1))) |
3715 | if (Value *Res = foldLogicOfFCmps(LHS, RHS, /*IsAnd*/ false)) |
3716 | return replaceInstUsesWith(I, V: Res); |
3717 | |
3718 | if (Instruction *FoldedFCmps = reassociateFCmps(BO&: I, Builder)) |
3719 | return FoldedFCmps; |
3720 | |
3721 | if (Instruction *CastedOr = foldCastedBitwiseLogic(I)) |
3722 | return CastedOr; |
3723 | |
3724 | if (Instruction *Sel = foldBinopOfSextBoolToSelect(I)) |
3725 | return Sel; |
3726 | |
3727 | // or(sext(A), B) / or(B, sext(A)) --> A ? -1 : B, where A is i1 or <N x i1>. |
3728 | // TODO: Move this into foldBinopOfSextBoolToSelect as a more generalized fold |
3729 | // with binop identity constant. But creating a select with non-constant |
3730 | // arm may not be reversible due to poison semantics. Is that a good |
3731 | // canonicalization? |
3732 | if (match(V: &I, P: m_c_Or(L: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: A))), R: m_Value(V&: B))) && |
3733 | A->getType()->isIntOrIntVectorTy(BitWidth: 1)) |
3734 | return SelectInst::Create(C: A, S1: ConstantInt::getAllOnesValue(Ty), S2: B); |
3735 | |
3736 | // Note: If we've gotten to the point of visiting the outer OR, then the |
3737 | // inner one couldn't be simplified. If it was a constant, then it won't |
3738 | // be simplified by a later pass either, so we try swapping the inner/outer |
3739 | // ORs in the hopes that we'll be able to simplify it this way. |
3740 | // (X|C) | V --> (X|V) | C |
3741 | ConstantInt *CI; |
3742 | if (Op0->hasOneUse() && !match(V: Op1, P: m_ConstantInt()) && |
3743 | match(V: Op0, P: m_Or(L: m_Value(V&: A), R: m_ConstantInt(CI)))) { |
3744 | Value *Inner = Builder.CreateOr(LHS: A, RHS: Op1); |
3745 | Inner->takeName(V: Op0); |
3746 | return BinaryOperator::CreateOr(V1: Inner, V2: CI); |
3747 | } |
3748 | |
3749 | // Change (or (bool?A:B),(bool?C:D)) --> (bool?(or A,C):(or B,D)) |
3750 | // Since this OR statement hasn't been optimized further yet, we hope |
3751 | // that this transformation will allow the new ORs to be optimized. |
3752 | { |
3753 | Value *X = nullptr, *Y = nullptr; |
3754 | if (Op0->hasOneUse() && Op1->hasOneUse() && |
3755 | match(V: Op0, P: m_Select(C: m_Value(V&: X), L: m_Value(V&: A), R: m_Value(V&: B))) && |
3756 | match(V: Op1, P: m_Select(C: m_Value(V&: Y), L: m_Value(V&: C), R: m_Value(V&: D))) && X == Y) { |
3757 | Value *orTrue = Builder.CreateOr(LHS: A, RHS: C); |
3758 | Value *orFalse = Builder.CreateOr(LHS: B, RHS: D); |
3759 | return SelectInst::Create(C: X, S1: orTrue, S2: orFalse); |
3760 | } |
3761 | } |
3762 | |
3763 | // or(ashr(subNSW(Y, X), ScalarSizeInBits(Y) - 1), X) --> X s> Y ? -1 : X. |
3764 | { |
3765 | Value *X, *Y; |
3766 | if (match(V: &I, P: m_c_Or(L: m_OneUse(SubPattern: m_AShr( |
3767 | L: m_NSWSub(L: m_Value(V&: Y), R: m_Value(V&: X)), |
3768 | R: m_SpecificInt(V: Ty->getScalarSizeInBits() - 1))), |
3769 | R: m_Deferred(V: X)))) { |
3770 | Value *NewICmpInst = Builder.CreateICmpSGT(LHS: X, RHS: Y); |
3771 | Value *AllOnes = ConstantInt::getAllOnesValue(Ty); |
3772 | return SelectInst::Create(C: NewICmpInst, S1: AllOnes, S2: X); |
3773 | } |
3774 | } |
3775 | |
3776 | { |
3777 | // ((A & B) ^ A) | ((A & B) ^ B) -> A ^ B |
3778 | // (A ^ (A & B)) | (B ^ (A & B)) -> A ^ B |
3779 | // ((A & B) ^ B) | ((A & B) ^ A) -> A ^ B |
3780 | // (B ^ (A & B)) | (A ^ (A & B)) -> A ^ B |
3781 | const auto TryXorOpt = [&](Value *Lhs, Value *Rhs) -> Instruction * { |
3782 | if (match(V: Lhs, P: m_c_Xor(L: m_And(L: m_Value(V&: A), R: m_Value(V&: B)), R: m_Deferred(V: A))) && |
3783 | match(V: Rhs, |
3784 | P: m_c_Xor(L: m_And(L: m_Specific(V: A), R: m_Specific(V: B)), R: m_Deferred(V: B)))) { |
3785 | return BinaryOperator::CreateXor(V1: A, V2: B); |
3786 | } |
3787 | return nullptr; |
3788 | }; |
3789 | |
3790 | if (Instruction *Result = TryXorOpt(Op0, Op1)) |
3791 | return Result; |
3792 | if (Instruction *Result = TryXorOpt(Op1, Op0)) |
3793 | return Result; |
3794 | } |
3795 | |
3796 | if (Instruction *V = |
3797 | canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I)) |
3798 | return V; |
3799 | |
3800 | CmpInst::Predicate Pred; |
3801 | Value *Mul, *Ov, *MulIsNotZero, *UMulWithOv; |
3802 | // Check if the OR weakens the overflow condition for umul.with.overflow by |
3803 | // treating any non-zero result as overflow. In that case, we overflow if both |
3804 | // umul.with.overflow operands are != 0, as in that case the result can only |
3805 | // be 0, iff the multiplication overflows. |
3806 | if (match(V: &I, |
3807 | P: m_c_Or(L: m_CombineAnd(L: m_ExtractValue<1>(V: m_Value(V&: UMulWithOv)), |
3808 | R: m_Value(V&: Ov)), |
3809 | R: m_CombineAnd(L: m_ICmp(Pred, |
3810 | L: m_CombineAnd(L: m_ExtractValue<0>( |
3811 | V: m_Deferred(V: UMulWithOv)), |
3812 | R: m_Value(V&: Mul)), |
3813 | R: m_ZeroInt()), |
3814 | R: m_Value(V&: MulIsNotZero)))) && |
3815 | (Ov->hasOneUse() || (MulIsNotZero->hasOneUse() && Mul->hasOneUse())) && |
3816 | Pred == CmpInst::ICMP_NE) { |
3817 | Value *A, *B; |
3818 | if (match(UMulWithOv, m_Intrinsic<Intrinsic::umul_with_overflow>( |
3819 | m_Value(A), m_Value(B)))) { |
3820 | Value *NotNullA = Builder.CreateIsNotNull(Arg: A); |
3821 | Value *NotNullB = Builder.CreateIsNotNull(Arg: B); |
3822 | return BinaryOperator::CreateAnd(V1: NotNullA, V2: NotNullB); |
3823 | } |
3824 | } |
3825 | |
3826 | /// Res, Overflow = xxx_with_overflow X, C1 |
3827 | /// Try to canonicalize the pattern "Overflow | icmp pred Res, C2" into |
3828 | /// "Overflow | icmp pred X, C2 +/- C1". |
3829 | const WithOverflowInst *WO; |
3830 | const Value *WOV; |
3831 | const APInt *C1, *C2; |
3832 | if (match(V: &I, P: m_c_Or(L: m_CombineAnd(L: m_ExtractValue<1>(V: m_CombineAnd( |
3833 | L: m_WithOverflowInst(I&: WO), R: m_Value(V&: WOV))), |
3834 | R: m_Value(V&: Ov)), |
3835 | R: m_OneUse(SubPattern: m_ICmp(Pred, L: m_ExtractValue<0>(V: m_Deferred(V: WOV)), |
3836 | R: m_APInt(Res&: C2))))) && |
3837 | (WO->getBinaryOp() == Instruction::Add || |
3838 | WO->getBinaryOp() == Instruction::Sub) && |
3839 | (ICmpInst::isEquality(P: Pred) || |
3840 | WO->isSigned() == ICmpInst::isSigned(predicate: Pred)) && |
3841 | match(V: WO->getRHS(), P: m_APInt(Res&: C1))) { |
3842 | bool Overflow; |
3843 | APInt NewC = WO->getBinaryOp() == Instruction::Add |
3844 | ? (ICmpInst::isSigned(predicate: Pred) ? C2->ssub_ov(RHS: *C1, Overflow) |
3845 | : C2->usub_ov(RHS: *C1, Overflow)) |
3846 | : (ICmpInst::isSigned(predicate: Pred) ? C2->sadd_ov(RHS: *C1, Overflow) |
3847 | : C2->uadd_ov(RHS: *C1, Overflow)); |
3848 | if (!Overflow || ICmpInst::isEquality(P: Pred)) { |
3849 | Value *NewCmp = Builder.CreateICmp( |
3850 | P: Pred, LHS: WO->getLHS(), RHS: ConstantInt::get(Ty: WO->getLHS()->getType(), V: NewC)); |
3851 | return BinaryOperator::CreateOr(V1: Ov, V2: NewCmp); |
3852 | } |
3853 | } |
3854 | |
3855 | // (~x) | y --> ~(x & (~y)) iff that gets rid of inversions |
3856 | if (sinkNotIntoOtherHandOfLogicalOp(I)) |
3857 | return &I; |
3858 | |
3859 | // Improve "get low bit mask up to and including bit X" pattern: |
3860 | // (1 << X) | ((1 << X) + -1) --> -1 l>> (bitwidth(x) - 1 - X) |
3861 | if (match(V: &I, P: m_c_Or(L: m_Add(L: m_Shl(L: m_One(), R: m_Value(V&: X)), R: m_AllOnes()), |
3862 | R: m_Shl(L: m_One(), R: m_Deferred(V: X)))) && |
3863 | match(V: &I, P: m_c_Or(L: m_OneUse(SubPattern: m_Value()), R: m_Value()))) { |
3864 | Value *Sub = Builder.CreateSub( |
3865 | LHS: ConstantInt::get(Ty, V: Ty->getScalarSizeInBits() - 1), RHS: X); |
3866 | return BinaryOperator::CreateLShr(V1: Constant::getAllOnesValue(Ty), V2: Sub); |
3867 | } |
3868 | |
3869 | // An or recurrence w/loop invariant step is equivelent to (or start, step) |
3870 | PHINode *PN = nullptr; |
3871 | Value *Start = nullptr, *Step = nullptr; |
3872 | if (matchSimpleRecurrence(I: &I, P&: PN, Start, Step) && DT.dominates(Def: Step, User: PN)) |
3873 | return replaceInstUsesWith(I, V: Builder.CreateOr(LHS: Start, RHS: Step)); |
3874 | |
3875 | // (A & B) | (C | D) or (C | D) | (A & B) |
3876 | // Can be combined if C or D is of type (A/B & X) |
3877 | if (match(V: &I, P: m_c_Or(L: m_OneUse(SubPattern: m_And(L: m_Value(V&: A), R: m_Value(V&: B))), |
3878 | R: m_OneUse(SubPattern: m_Or(L: m_Value(V&: C), R: m_Value(V&: D)))))) { |
3879 | // (A & B) | (C | ?) -> C | (? | (A & B)) |
3880 | // (A & B) | (C | ?) -> C | (? | (A & B)) |
3881 | // (A & B) | (C | ?) -> C | (? | (A & B)) |
3882 | // (A & B) | (C | ?) -> C | (? | (A & B)) |
3883 | // (C | ?) | (A & B) -> C | (? | (A & B)) |
3884 | // (C | ?) | (A & B) -> C | (? | (A & B)) |
3885 | // (C | ?) | (A & B) -> C | (? | (A & B)) |
3886 | // (C | ?) | (A & B) -> C | (? | (A & B)) |
3887 | if (match(V: D, P: m_OneUse(SubPattern: m_c_And(L: m_Specific(V: A), R: m_Value()))) || |
3888 | match(V: D, P: m_OneUse(SubPattern: m_c_And(L: m_Specific(V: B), R: m_Value())))) |
3889 | return BinaryOperator::CreateOr( |
3890 | V1: C, V2: Builder.CreateOr(LHS: D, RHS: Builder.CreateAnd(LHS: A, RHS: B))); |
3891 | // (A & B) | (? | D) -> (? | (A & B)) | D |
3892 | // (A & B) | (? | D) -> (? | (A & B)) | D |
3893 | // (A & B) | (? | D) -> (? | (A & B)) | D |
3894 | // (A & B) | (? | D) -> (? | (A & B)) | D |
3895 | // (? | D) | (A & B) -> (? | (A & B)) | D |
3896 | // (? | D) | (A & B) -> (? | (A & B)) | D |
3897 | // (? | D) | (A & B) -> (? | (A & B)) | D |
3898 | // (? | D) | (A & B) -> (? | (A & B)) | D |
3899 | if (match(V: C, P: m_OneUse(SubPattern: m_c_And(L: m_Specific(V: A), R: m_Value()))) || |
3900 | match(V: C, P: m_OneUse(SubPattern: m_c_And(L: m_Specific(V: B), R: m_Value())))) |
3901 | return BinaryOperator::CreateOr( |
3902 | V1: Builder.CreateOr(LHS: C, RHS: Builder.CreateAnd(LHS: A, RHS: B)), V2: D); |
3903 | } |
3904 | |
3905 | if (Instruction *R = reassociateForUses(BO&: I, Builder)) |
3906 | return R; |
3907 | |
3908 | if (Instruction *Canonicalized = canonicalizeLogicFirst(I, Builder)) |
3909 | return Canonicalized; |
3910 | |
3911 | if (Instruction *Folded = foldLogicOfIsFPClass(BO&: I, Op0, Op1)) |
3912 | return Folded; |
3913 | |
3914 | if (Instruction *Res = foldBinOpOfDisplacedShifts(I)) |
3915 | return Res; |
3916 | |
3917 | // If we are setting the sign bit of a floating-point value, convert |
3918 | // this to fneg(fabs), then cast back to integer. |
3919 | // |
3920 | // If the result isn't immediately cast back to a float, this will increase |
3921 | // the number of instructions. This is still probably a better canonical form |
3922 | // as it enables FP value tracking. |
3923 | // |
3924 | // Assumes any IEEE-represented type has the sign bit in the high bit. |
3925 | // |
3926 | // This is generous interpretation of noimplicitfloat, this is not a true |
3927 | // floating-point operation. |
3928 | Value *CastOp; |
3929 | if (match(V: Op0, P: m_ElementWiseBitCast(Op: m_Value(V&: CastOp))) && |
3930 | match(V: Op1, P: m_SignMask()) && |
3931 | !Builder.GetInsertBlock()->getParent()->hasFnAttribute( |
3932 | Attribute::NoImplicitFloat)) { |
3933 | Type *EltTy = CastOp->getType()->getScalarType(); |
3934 | if (EltTy->isFloatingPointTy() && EltTy->isIEEE()) { |
3935 | Value *FAbs = Builder.CreateUnaryIntrinsic(Intrinsic::ID: fabs, V: CastOp); |
3936 | Value *FNegFAbs = Builder.CreateFNeg(V: FAbs); |
3937 | return new BitCastInst(FNegFAbs, I.getType()); |
3938 | } |
3939 | } |
3940 | |
3941 | // (X & C1) | C2 -> X & (C1 | C2) iff (X & C2) == C2 |
3942 | if (match(V: Op0, P: m_OneUse(SubPattern: m_And(L: m_Value(V&: X), R: m_APInt(Res&: C1)))) && |
3943 | match(V: Op1, P: m_APInt(Res&: C2))) { |
3944 | KnownBits KnownX = computeKnownBits(V: X, /*Depth*/ 0, CxtI: &I); |
3945 | if ((KnownX.One & *C2) == *C2) |
3946 | return BinaryOperator::CreateAnd(V1: X, V2: ConstantInt::get(Ty, V: *C1 | *C2)); |
3947 | } |
3948 | |
3949 | if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder)) |
3950 | return Res; |
3951 | |
3952 | if (Value *V = |
3953 | simplifyAndOrWithOpReplaced(V: Op0, Op: Op1, RepOp: Constant::getNullValue(Ty), |
3954 | /*SimplifyOnly*/ false, IC&: *this)) |
3955 | return BinaryOperator::CreateOr(V1: V, V2: Op1); |
3956 | if (Value *V = |
3957 | simplifyAndOrWithOpReplaced(V: Op1, Op: Op0, RepOp: Constant::getNullValue(Ty), |
3958 | /*SimplifyOnly*/ false, IC&: *this)) |
3959 | return BinaryOperator::CreateOr(V1: Op0, V2: V); |
3960 | |
3961 | if (cast<PossiblyDisjointInst>(Val&: I).isDisjoint()) |
3962 | if (Value *V = SimplifyAddWithRemainder(I)) |
3963 | return replaceInstUsesWith(I, V); |
3964 | |
3965 | return nullptr; |
3966 | } |
3967 | |
3968 | /// A ^ B can be specified using other logic ops in a variety of patterns. We |
3969 | /// can fold these early and efficiently by morphing an existing instruction. |
3970 | static Instruction *foldXorToXor(BinaryOperator &I, |
3971 | InstCombiner::BuilderTy &Builder) { |
3972 | assert(I.getOpcode() == Instruction::Xor); |
3973 | Value *Op0 = I.getOperand(i_nocapture: 0); |
3974 | Value *Op1 = I.getOperand(i_nocapture: 1); |
3975 | Value *A, *B; |
3976 | |
3977 | // There are 4 commuted variants for each of the basic patterns. |
3978 | |
3979 | // (A & B) ^ (A | B) -> A ^ B |
3980 | // (A & B) ^ (B | A) -> A ^ B |
3981 | // (A | B) ^ (A & B) -> A ^ B |
3982 | // (A | B) ^ (B & A) -> A ^ B |
3983 | if (match(V: &I, P: m_c_Xor(L: m_And(L: m_Value(V&: A), R: m_Value(V&: B)), |
3984 | R: m_c_Or(L: m_Deferred(V: A), R: m_Deferred(V: B))))) |
3985 | return BinaryOperator::CreateXor(V1: A, V2: B); |
3986 | |
3987 | // (A | ~B) ^ (~A | B) -> A ^ B |
3988 | // (~B | A) ^ (~A | B) -> A ^ B |
3989 | // (~A | B) ^ (A | ~B) -> A ^ B |
3990 | // (B | ~A) ^ (A | ~B) -> A ^ B |
3991 | if (match(V: &I, P: m_Xor(L: m_c_Or(L: m_Value(V&: A), R: m_Not(V: m_Value(V&: B))), |
3992 | R: m_c_Or(L: m_Not(V: m_Deferred(V: A)), R: m_Deferred(V: B))))) |
3993 | return BinaryOperator::CreateXor(V1: A, V2: B); |
3994 | |
3995 | // (A & ~B) ^ (~A & B) -> A ^ B |
3996 | // (~B & A) ^ (~A & B) -> A ^ B |
3997 | // (~A & B) ^ (A & ~B) -> A ^ B |
3998 | // (B & ~A) ^ (A & ~B) -> A ^ B |
3999 | if (match(V: &I, P: m_Xor(L: m_c_And(L: m_Value(V&: A), R: m_Not(V: m_Value(V&: B))), |
4000 | R: m_c_And(L: m_Not(V: m_Deferred(V: A)), R: m_Deferred(V: B))))) |
4001 | return BinaryOperator::CreateXor(V1: A, V2: B); |
4002 | |
4003 | // For the remaining cases we need to get rid of one of the operands. |
4004 | if (!Op0->hasOneUse() && !Op1->hasOneUse()) |
4005 | return nullptr; |
4006 | |
4007 | // (A | B) ^ ~(A & B) -> ~(A ^ B) |
4008 | // (A | B) ^ ~(B & A) -> ~(A ^ B) |
4009 | // (A & B) ^ ~(A | B) -> ~(A ^ B) |
4010 | // (A & B) ^ ~(B | A) -> ~(A ^ B) |
4011 | // Complexity sorting ensures the not will be on the right side. |
4012 | if ((match(V: Op0, P: m_Or(L: m_Value(V&: A), R: m_Value(V&: B))) && |
4013 | match(V: Op1, P: m_Not(V: m_c_And(L: m_Specific(V: A), R: m_Specific(V: B))))) || |
4014 | (match(V: Op0, P: m_And(L: m_Value(V&: A), R: m_Value(V&: B))) && |
4015 | match(V: Op1, P: m_Not(V: m_c_Or(L: m_Specific(V: A), R: m_Specific(V: B)))))) |
4016 | return BinaryOperator::CreateNot(Op: Builder.CreateXor(LHS: A, RHS: B)); |
4017 | |
4018 | return nullptr; |
4019 | } |
4020 | |
4021 | Value *InstCombinerImpl::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, |
4022 | BinaryOperator &I) { |
4023 | assert(I.getOpcode() == Instruction::Xor && I.getOperand(0) == LHS && |
4024 | I.getOperand(1) == RHS && "Should be 'xor' with these operands" ); |
4025 | |
4026 | ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); |
4027 | Value *LHS0 = LHS->getOperand(i_nocapture: 0), *LHS1 = LHS->getOperand(i_nocapture: 1); |
4028 | Value *RHS0 = RHS->getOperand(i_nocapture: 0), *RHS1 = RHS->getOperand(i_nocapture: 1); |
4029 | |
4030 | if (predicatesFoldable(P1: PredL, P2: PredR)) { |
4031 | if (LHS0 == RHS1 && LHS1 == RHS0) { |
4032 | std::swap(a&: LHS0, b&: LHS1); |
4033 | PredL = ICmpInst::getSwappedPredicate(pred: PredL); |
4034 | } |
4035 | if (LHS0 == RHS0 && LHS1 == RHS1) { |
4036 | // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) |
4037 | unsigned Code = getICmpCode(Pred: PredL) ^ getICmpCode(Pred: PredR); |
4038 | bool IsSigned = LHS->isSigned() || RHS->isSigned(); |
4039 | return getNewICmpValue(Code, Sign: IsSigned, LHS: LHS0, RHS: LHS1, Builder); |
4040 | } |
4041 | } |
4042 | |
4043 | // TODO: This can be generalized to compares of non-signbits using |
4044 | // decomposeBitTestICmp(). It could be enhanced more by using (something like) |
4045 | // foldLogOpOfMaskedICmps(). |
4046 | const APInt *LC, *RC; |
4047 | if (match(V: LHS1, P: m_APInt(Res&: LC)) && match(V: RHS1, P: m_APInt(Res&: RC)) && |
4048 | LHS0->getType() == RHS0->getType() && |
4049 | LHS0->getType()->isIntOrIntVectorTy()) { |
4050 | // Convert xor of signbit tests to signbit test of xor'd values: |
4051 | // (X > -1) ^ (Y > -1) --> (X ^ Y) < 0 |
4052 | // (X < 0) ^ (Y < 0) --> (X ^ Y) < 0 |
4053 | // (X > -1) ^ (Y < 0) --> (X ^ Y) > -1 |
4054 | // (X < 0) ^ (Y > -1) --> (X ^ Y) > -1 |
4055 | bool TrueIfSignedL, TrueIfSignedR; |
4056 | if ((LHS->hasOneUse() || RHS->hasOneUse()) && |
4057 | isSignBitCheck(Pred: PredL, RHS: *LC, TrueIfSigned&: TrueIfSignedL) && |
4058 | isSignBitCheck(Pred: PredR, RHS: *RC, TrueIfSigned&: TrueIfSignedR)) { |
4059 | Value *XorLR = Builder.CreateXor(LHS: LHS0, RHS: RHS0); |
4060 | return TrueIfSignedL == TrueIfSignedR ? Builder.CreateIsNeg(Arg: XorLR) : |
4061 | Builder.CreateIsNotNeg(Arg: XorLR); |
4062 | } |
4063 | |
4064 | // Fold (icmp pred1 X, C1) ^ (icmp pred2 X, C2) |
4065 | // into a single comparison using range-based reasoning. |
4066 | if (LHS0 == RHS0) { |
4067 | ConstantRange CR1 = ConstantRange::makeExactICmpRegion(Pred: PredL, Other: *LC); |
4068 | ConstantRange CR2 = ConstantRange::makeExactICmpRegion(Pred: PredR, Other: *RC); |
4069 | auto CRUnion = CR1.exactUnionWith(CR: CR2); |
4070 | auto CRIntersect = CR1.exactIntersectWith(CR: CR2); |
4071 | if (CRUnion && CRIntersect) |
4072 | if (auto CR = CRUnion->exactIntersectWith(CR: CRIntersect->inverse())) { |
4073 | if (CR->isFullSet()) |
4074 | return ConstantInt::getTrue(Ty: I.getType()); |
4075 | if (CR->isEmptySet()) |
4076 | return ConstantInt::getFalse(Ty: I.getType()); |
4077 | |
4078 | CmpInst::Predicate NewPred; |
4079 | APInt NewC, Offset; |
4080 | CR->getEquivalentICmp(Pred&: NewPred, RHS&: NewC, Offset); |
4081 | |
4082 | if ((Offset.isZero() && (LHS->hasOneUse() || RHS->hasOneUse())) || |
4083 | (LHS->hasOneUse() && RHS->hasOneUse())) { |
4084 | Value *NewV = LHS0; |
4085 | Type *Ty = LHS0->getType(); |
4086 | if (!Offset.isZero()) |
4087 | NewV = Builder.CreateAdd(LHS: NewV, RHS: ConstantInt::get(Ty, V: Offset)); |
4088 | return Builder.CreateICmp(P: NewPred, LHS: NewV, |
4089 | RHS: ConstantInt::get(Ty, V: NewC)); |
4090 | } |
4091 | } |
4092 | } |
4093 | } |
4094 | |
4095 | // Instead of trying to imitate the folds for and/or, decompose this 'xor' |
4096 | // into those logic ops. That is, try to turn this into an and-of-icmps |
4097 | // because we have many folds for that pattern. |
4098 | // |
4099 | // This is based on a truth table definition of xor: |
4100 | // X ^ Y --> (X | Y) & !(X & Y) |
4101 | if (Value *OrICmp = simplifyBinOp(Opcode: Instruction::Or, LHS, RHS, Q: SQ)) { |
4102 | // TODO: If OrICmp is true, then the definition of xor simplifies to !(X&Y). |
4103 | // TODO: If OrICmp is false, the whole thing is false (InstSimplify?). |
4104 | if (Value *AndICmp = simplifyBinOp(Opcode: Instruction::And, LHS, RHS, Q: SQ)) { |
4105 | // TODO: Independently handle cases where the 'and' side is a constant. |
4106 | ICmpInst *X = nullptr, *Y = nullptr; |
4107 | if (OrICmp == LHS && AndICmp == RHS) { |
4108 | // (LHS | RHS) & !(LHS & RHS) --> LHS & !RHS --> X & !Y |
4109 | X = LHS; |
4110 | Y = RHS; |
4111 | } |
4112 | if (OrICmp == RHS && AndICmp == LHS) { |
4113 | // !(LHS & RHS) & (LHS | RHS) --> !LHS & RHS --> !Y & X |
4114 | X = RHS; |
4115 | Y = LHS; |
4116 | } |
4117 | if (X && Y && (Y->hasOneUse() || canFreelyInvertAllUsersOf(V: Y, IgnoredUser: &I))) { |
4118 | // Invert the predicate of 'Y', thus inverting its output. |
4119 | Y->setPredicate(Y->getInversePredicate()); |
4120 | // So, are there other uses of Y? |
4121 | if (!Y->hasOneUse()) { |
4122 | // We need to adapt other uses of Y though. Get a value that matches |
4123 | // the original value of Y before inversion. While this increases |
4124 | // immediate instruction count, we have just ensured that all the |
4125 | // users are freely-invertible, so that 'not' *will* get folded away. |
4126 | BuilderTy::InsertPointGuard Guard(Builder); |
4127 | // Set insertion point to right after the Y. |
4128 | Builder.SetInsertPoint(TheBB: Y->getParent(), IP: ++(Y->getIterator())); |
4129 | Value *NotY = Builder.CreateNot(V: Y, Name: Y->getName() + ".not" ); |
4130 | // Replace all uses of Y (excluding the one in NotY!) with NotY. |
4131 | Worklist.pushUsersToWorkList(I&: *Y); |
4132 | Y->replaceUsesWithIf(New: NotY, |
4133 | ShouldReplace: [NotY](Use &U) { return U.getUser() != NotY; }); |
4134 | } |
4135 | // All done. |
4136 | return Builder.CreateAnd(LHS, RHS); |
4137 | } |
4138 | } |
4139 | } |
4140 | |
4141 | return nullptr; |
4142 | } |
4143 | |
4144 | /// If we have a masked merge, in the canonical form of: |
4145 | /// (assuming that A only has one use.) |
4146 | /// | A | |B| |
4147 | /// ((x ^ y) & M) ^ y |
4148 | /// | D | |
4149 | /// * If M is inverted: |
4150 | /// | D | |
4151 | /// ((x ^ y) & ~M) ^ y |
4152 | /// We can canonicalize by swapping the final xor operand |
4153 | /// to eliminate the 'not' of the mask. |
4154 | /// ((x ^ y) & M) ^ x |
4155 | /// * If M is a constant, and D has one use, we transform to 'and' / 'or' ops |
4156 | /// because that shortens the dependency chain and improves analysis: |
4157 | /// (x & M) | (y & ~M) |
4158 | static Instruction *visitMaskedMerge(BinaryOperator &I, |
4159 | InstCombiner::BuilderTy &Builder) { |
4160 | Value *B, *X, *D; |
4161 | Value *M; |
4162 | if (!match(V: &I, P: m_c_Xor(L: m_Value(V&: B), |
4163 | R: m_OneUse(SubPattern: m_c_And( |
4164 | L: m_CombineAnd(L: m_c_Xor(L: m_Deferred(V: B), R: m_Value(V&: X)), |
4165 | R: m_Value(V&: D)), |
4166 | R: m_Value(V&: M)))))) |
4167 | return nullptr; |
4168 | |
4169 | Value *NotM; |
4170 | if (match(V: M, P: m_Not(V: m_Value(V&: NotM)))) { |
4171 | // De-invert the mask and swap the value in B part. |
4172 | Value *NewA = Builder.CreateAnd(LHS: D, RHS: NotM); |
4173 | return BinaryOperator::CreateXor(V1: NewA, V2: X); |
4174 | } |
4175 | |
4176 | Constant *C; |
4177 | if (D->hasOneUse() && match(V: M, P: m_Constant(C))) { |
4178 | // Propagating undef is unsafe. Clamp undef elements to -1. |
4179 | Type *EltTy = C->getType()->getScalarType(); |
4180 | C = Constant::replaceUndefsWith(C, Replacement: ConstantInt::getAllOnesValue(Ty: EltTy)); |
4181 | // Unfold. |
4182 | Value *LHS = Builder.CreateAnd(LHS: X, RHS: C); |
4183 | Value *NotC = Builder.CreateNot(V: C); |
4184 | Value *RHS = Builder.CreateAnd(LHS: B, RHS: NotC); |
4185 | return BinaryOperator::CreateOr(V1: LHS, V2: RHS); |
4186 | } |
4187 | |
4188 | return nullptr; |
4189 | } |
4190 | |
4191 | static Instruction *foldNotXor(BinaryOperator &I, |
4192 | InstCombiner::BuilderTy &Builder) { |
4193 | Value *X, *Y; |
4194 | // FIXME: one-use check is not needed in general, but currently we are unable |
4195 | // to fold 'not' into 'icmp', if that 'icmp' has multiple uses. (D35182) |
4196 | if (!match(V: &I, P: m_Not(V: m_OneUse(SubPattern: m_Xor(L: m_Value(V&: X), R: m_Value(V&: Y)))))) |
4197 | return nullptr; |
4198 | |
4199 | auto hasCommonOperand = [](Value *A, Value *B, Value *C, Value *D) { |
4200 | return A == C || A == D || B == C || B == D; |
4201 | }; |
4202 | |
4203 | Value *A, *B, *C, *D; |
4204 | // Canonicalize ~((A & B) ^ (A | ?)) -> (A & B) | ~(A | ?) |
4205 | // 4 commuted variants |
4206 | if (match(V: X, P: m_And(L: m_Value(V&: A), R: m_Value(V&: B))) && |
4207 | match(V: Y, P: m_Or(L: m_Value(V&: C), R: m_Value(V&: D))) && hasCommonOperand(A, B, C, D)) { |
4208 | Value *NotY = Builder.CreateNot(V: Y); |
4209 | return BinaryOperator::CreateOr(V1: X, V2: NotY); |
4210 | }; |
4211 | |
4212 | // Canonicalize ~((A | ?) ^ (A & B)) -> (A & B) | ~(A | ?) |
4213 | // 4 commuted variants |
4214 | if (match(V: Y, P: m_And(L: m_Value(V&: A), R: m_Value(V&: B))) && |
4215 | match(V: X, P: m_Or(L: m_Value(V&: C), R: m_Value(V&: D))) && hasCommonOperand(A, B, C, D)) { |
4216 | Value *NotX = Builder.CreateNot(V: X); |
4217 | return BinaryOperator::CreateOr(V1: Y, V2: NotX); |
4218 | }; |
4219 | |
4220 | return nullptr; |
4221 | } |
4222 | |
4223 | /// Canonicalize a shifty way to code absolute value to the more common pattern |
4224 | /// that uses negation and select. |
4225 | static Instruction *canonicalizeAbs(BinaryOperator &Xor, |
4226 | InstCombiner::BuilderTy &Builder) { |
4227 | assert(Xor.getOpcode() == Instruction::Xor && "Expected an xor instruction." ); |
4228 | |
4229 | // There are 4 potential commuted variants. Move the 'ashr' candidate to Op1. |
4230 | // We're relying on the fact that we only do this transform when the shift has |
4231 | // exactly 2 uses and the add has exactly 1 use (otherwise, we might increase |
4232 | // instructions). |
4233 | Value *Op0 = Xor.getOperand(i_nocapture: 0), *Op1 = Xor.getOperand(i_nocapture: 1); |
4234 | if (Op0->hasNUses(N: 2)) |
4235 | std::swap(a&: Op0, b&: Op1); |
4236 | |
4237 | Type *Ty = Xor.getType(); |
4238 | Value *A; |
4239 | const APInt *ShAmt; |
4240 | if (match(V: Op1, P: m_AShr(L: m_Value(V&: A), R: m_APInt(Res&: ShAmt))) && |
4241 | Op1->hasNUses(N: 2) && *ShAmt == Ty->getScalarSizeInBits() - 1 && |
4242 | match(V: Op0, P: m_OneUse(SubPattern: m_c_Add(L: m_Specific(V: A), R: m_Specific(V: Op1))))) { |
4243 | // Op1 = ashr i32 A, 31 ; smear the sign bit |
4244 | // xor (add A, Op1), Op1 ; add -1 and flip bits if negative |
4245 | // --> (A < 0) ? -A : A |
4246 | Value *IsNeg = Builder.CreateIsNeg(Arg: A); |
4247 | // Copy the nsw flags from the add to the negate. |
4248 | auto *Add = cast<BinaryOperator>(Val: Op0); |
4249 | Value *NegA = Add->hasNoUnsignedWrap() |
4250 | ? Constant::getNullValue(Ty: A->getType()) |
4251 | : Builder.CreateNeg(V: A, Name: "" , HasNSW: Add->hasNoSignedWrap()); |
4252 | return SelectInst::Create(C: IsNeg, S1: NegA, S2: A); |
4253 | } |
4254 | return nullptr; |
4255 | } |
4256 | |
4257 | static bool canFreelyInvert(InstCombiner &IC, Value *Op, |
4258 | Instruction *IgnoredUser) { |
4259 | auto *I = dyn_cast<Instruction>(Val: Op); |
4260 | return I && IC.isFreeToInvert(V: I, /*WillInvertAllUses=*/true) && |
4261 | IC.canFreelyInvertAllUsersOf(V: I, IgnoredUser); |
4262 | } |
4263 | |
4264 | static Value *freelyInvert(InstCombinerImpl &IC, Value *Op, |
4265 | Instruction *IgnoredUser) { |
4266 | auto *I = cast<Instruction>(Val: Op); |
4267 | IC.Builder.SetInsertPoint(*I->getInsertionPointAfterDef()); |
4268 | Value *NotOp = IC.Builder.CreateNot(V: Op, Name: Op->getName() + ".not" ); |
4269 | Op->replaceUsesWithIf(New: NotOp, |
4270 | ShouldReplace: [NotOp](Use &U) { return U.getUser() != NotOp; }); |
4271 | IC.freelyInvertAllUsersOf(V: NotOp, IgnoredUser); |
4272 | return NotOp; |
4273 | } |
4274 | |
4275 | // Transform |
4276 | // z = ~(x &/| y) |
4277 | // into: |
4278 | // z = ((~x) |/& (~y)) |
4279 | // iff both x and y are free to invert and all uses of z can be freely updated. |
4280 | bool InstCombinerImpl::sinkNotIntoLogicalOp(Instruction &I) { |
4281 | Value *Op0, *Op1; |
4282 | if (!match(V: &I, P: m_LogicalOp(L: m_Value(V&: Op0), R: m_Value(V&: Op1)))) |
4283 | return false; |
4284 | |
4285 | // If this logic op has not been simplified yet, just bail out and let that |
4286 | // happen first. Otherwise, the code below may wrongly invert. |
4287 | if (Op0 == Op1) |
4288 | return false; |
4289 | |
4290 | Instruction::BinaryOps NewOpc = |
4291 | match(V: &I, P: m_LogicalAnd()) ? Instruction::Or : Instruction::And; |
4292 | bool IsBinaryOp = isa<BinaryOperator>(Val: I); |
4293 | |
4294 | // Can our users be adapted? |
4295 | if (!InstCombiner::canFreelyInvertAllUsersOf(V: &I, /*IgnoredUser=*/nullptr)) |
4296 | return false; |
4297 | |
4298 | // And can the operands be adapted? |
4299 | if (!canFreelyInvert(IC&: *this, Op: Op0, IgnoredUser: &I) || !canFreelyInvert(IC&: *this, Op: Op1, IgnoredUser: &I)) |
4300 | return false; |
4301 | |
4302 | Op0 = freelyInvert(IC&: *this, Op: Op0, IgnoredUser: &I); |
4303 | Op1 = freelyInvert(IC&: *this, Op: Op1, IgnoredUser: &I); |
4304 | |
4305 | Builder.SetInsertPoint(*I.getInsertionPointAfterDef()); |
4306 | Value *NewLogicOp; |
4307 | if (IsBinaryOp) |
4308 | NewLogicOp = Builder.CreateBinOp(Opc: NewOpc, LHS: Op0, RHS: Op1, Name: I.getName() + ".not" ); |
4309 | else |
4310 | NewLogicOp = |
4311 | Builder.CreateLogicalOp(Opc: NewOpc, Cond1: Op0, Cond2: Op1, Name: I.getName() + ".not" ); |
4312 | |
4313 | replaceInstUsesWith(I, V: NewLogicOp); |
4314 | // We can not just create an outer `not`, it will most likely be immediately |
4315 | // folded back, reconstructing our initial pattern, and causing an |
4316 | // infinite combine loop, so immediately manually fold it away. |
4317 | freelyInvertAllUsersOf(V: NewLogicOp); |
4318 | return true; |
4319 | } |
4320 | |
4321 | // Transform |
4322 | // z = (~x) &/| y |
4323 | // into: |
4324 | // z = ~(x |/& (~y)) |
4325 | // iff y is free to invert and all uses of z can be freely updated. |
4326 | bool InstCombinerImpl::sinkNotIntoOtherHandOfLogicalOp(Instruction &I) { |
4327 | Value *Op0, *Op1; |
4328 | if (!match(V: &I, P: m_LogicalOp(L: m_Value(V&: Op0), R: m_Value(V&: Op1)))) |
4329 | return false; |
4330 | Instruction::BinaryOps NewOpc = |
4331 | match(V: &I, P: m_LogicalAnd()) ? Instruction::Or : Instruction::And; |
4332 | bool IsBinaryOp = isa<BinaryOperator>(Val: I); |
4333 | |
4334 | Value *NotOp0 = nullptr; |
4335 | Value *NotOp1 = nullptr; |
4336 | Value **OpToInvert = nullptr; |
4337 | if (match(V: Op0, P: m_Not(V: m_Value(V&: NotOp0))) && canFreelyInvert(IC&: *this, Op: Op1, IgnoredUser: &I)) { |
4338 | Op0 = NotOp0; |
4339 | OpToInvert = &Op1; |
4340 | } else if (match(V: Op1, P: m_Not(V: m_Value(V&: NotOp1))) && |
4341 | canFreelyInvert(IC&: *this, Op: Op0, IgnoredUser: &I)) { |
4342 | Op1 = NotOp1; |
4343 | OpToInvert = &Op0; |
4344 | } else |
4345 | return false; |
4346 | |
4347 | // And can our users be adapted? |
4348 | if (!InstCombiner::canFreelyInvertAllUsersOf(V: &I, /*IgnoredUser=*/nullptr)) |
4349 | return false; |
4350 | |
4351 | *OpToInvert = freelyInvert(IC&: *this, Op: *OpToInvert, IgnoredUser: &I); |
4352 | |
4353 | Builder.SetInsertPoint(*I.getInsertionPointAfterDef()); |
4354 | Value *NewBinOp; |
4355 | if (IsBinaryOp) |
4356 | NewBinOp = Builder.CreateBinOp(Opc: NewOpc, LHS: Op0, RHS: Op1, Name: I.getName() + ".not" ); |
4357 | else |
4358 | NewBinOp = Builder.CreateLogicalOp(Opc: NewOpc, Cond1: Op0, Cond2: Op1, Name: I.getName() + ".not" ); |
4359 | replaceInstUsesWith(I, V: NewBinOp); |
4360 | // We can not just create an outer `not`, it will most likely be immediately |
4361 | // folded back, reconstructing our initial pattern, and causing an |
4362 | // infinite combine loop, so immediately manually fold it away. |
4363 | freelyInvertAllUsersOf(V: NewBinOp); |
4364 | return true; |
4365 | } |
4366 | |
4367 | Instruction *InstCombinerImpl::foldNot(BinaryOperator &I) { |
4368 | Value *NotOp; |
4369 | if (!match(V: &I, P: m_Not(V: m_Value(V&: NotOp)))) |
4370 | return nullptr; |
4371 | |
4372 | // Apply DeMorgan's Law for 'nand' / 'nor' logic with an inverted operand. |
4373 | // We must eliminate the and/or (one-use) for these transforms to not increase |
4374 | // the instruction count. |
4375 | // |
4376 | // ~(~X & Y) --> (X | ~Y) |
4377 | // ~(Y & ~X) --> (X | ~Y) |
4378 | // |
4379 | // Note: The logical matches do not check for the commuted patterns because |
4380 | // those are handled via SimplifySelectsFeedingBinaryOp(). |
4381 | Type *Ty = I.getType(); |
4382 | Value *X, *Y; |
4383 | if (match(V: NotOp, P: m_OneUse(SubPattern: m_c_And(L: m_Not(V: m_Value(V&: X)), R: m_Value(V&: Y))))) { |
4384 | Value *NotY = Builder.CreateNot(V: Y, Name: Y->getName() + ".not" ); |
4385 | return BinaryOperator::CreateOr(V1: X, V2: NotY); |
4386 | } |
4387 | if (match(V: NotOp, P: m_OneUse(SubPattern: m_LogicalAnd(L: m_Not(V: m_Value(V&: X)), R: m_Value(V&: Y))))) { |
4388 | Value *NotY = Builder.CreateNot(V: Y, Name: Y->getName() + ".not" ); |
4389 | return SelectInst::Create(C: X, S1: ConstantInt::getTrue(Ty), S2: NotY); |
4390 | } |
4391 | |
4392 | // ~(~X | Y) --> (X & ~Y) |
4393 | // ~(Y | ~X) --> (X & ~Y) |
4394 | if (match(V: NotOp, P: m_OneUse(SubPattern: m_c_Or(L: m_Not(V: m_Value(V&: X)), R: m_Value(V&: Y))))) { |
4395 | Value *NotY = Builder.CreateNot(V: Y, Name: Y->getName() + ".not" ); |
4396 | return BinaryOperator::CreateAnd(V1: X, V2: NotY); |
4397 | } |
4398 | if (match(V: NotOp, P: m_OneUse(SubPattern: m_LogicalOr(L: m_Not(V: m_Value(V&: X)), R: m_Value(V&: Y))))) { |
4399 | Value *NotY = Builder.CreateNot(V: Y, Name: Y->getName() + ".not" ); |
4400 | return SelectInst::Create(C: X, S1: NotY, S2: ConstantInt::getFalse(Ty)); |
4401 | } |
4402 | |
4403 | // Is this a 'not' (~) fed by a binary operator? |
4404 | BinaryOperator *NotVal; |
4405 | if (match(V: NotOp, P: m_BinOp(I&: NotVal))) { |
4406 | // ~((-X) | Y) --> (X - 1) & (~Y) |
4407 | if (match(V: NotVal, |
4408 | P: m_OneUse(SubPattern: m_c_Or(L: m_OneUse(SubPattern: m_Neg(V: m_Value(V&: X))), R: m_Value(V&: Y))))) { |
4409 | Value *DecX = Builder.CreateAdd(LHS: X, RHS: ConstantInt::getAllOnesValue(Ty)); |
4410 | Value *NotY = Builder.CreateNot(V: Y); |
4411 | return BinaryOperator::CreateAnd(V1: DecX, V2: NotY); |
4412 | } |
4413 | |
4414 | // ~(~X >>s Y) --> (X >>s Y) |
4415 | if (match(V: NotVal, P: m_AShr(L: m_Not(V: m_Value(V&: X)), R: m_Value(V&: Y)))) |
4416 | return BinaryOperator::CreateAShr(V1: X, V2: Y); |
4417 | |
4418 | // Treat lshr with non-negative operand as ashr. |
4419 | // ~(~X >>u Y) --> (X >>s Y) iff X is known negative |
4420 | if (match(V: NotVal, P: m_LShr(L: m_Not(V: m_Value(V&: X)), R: m_Value(V&: Y))) && |
4421 | isKnownNegative(V: X, DL: SQ.getWithInstruction(I: NotVal))) |
4422 | return BinaryOperator::CreateAShr(V1: X, V2: Y); |
4423 | |
4424 | // Bit-hack form of a signbit test for iN type: |
4425 | // ~(X >>s (N - 1)) --> sext i1 (X > -1) to iN |
4426 | unsigned FullShift = Ty->getScalarSizeInBits() - 1; |
4427 | if (match(V: NotVal, P: m_OneUse(SubPattern: m_AShr(L: m_Value(V&: X), R: m_SpecificInt(V: FullShift))))) { |
4428 | Value *IsNotNeg = Builder.CreateIsNotNeg(Arg: X, Name: "isnotneg" ); |
4429 | return new SExtInst(IsNotNeg, Ty); |
4430 | } |
4431 | |
4432 | // If we are inverting a right-shifted constant, we may be able to eliminate |
4433 | // the 'not' by inverting the constant and using the opposite shift type. |
4434 | // Canonicalization rules ensure that only a negative constant uses 'ashr', |
4435 | // but we must check that in case that transform has not fired yet. |
4436 | |
4437 | // ~(C >>s Y) --> ~C >>u Y (when inverting the replicated sign bits) |
4438 | Constant *C; |
4439 | if (match(V: NotVal, P: m_AShr(L: m_Constant(C), R: m_Value(V&: Y))) && |
4440 | match(V: C, P: m_Negative())) |
4441 | return BinaryOperator::CreateLShr(V1: ConstantExpr::getNot(C), V2: Y); |
4442 | |
4443 | // ~(C >>u Y) --> ~C >>s Y (when inverting the replicated sign bits) |
4444 | if (match(V: NotVal, P: m_LShr(L: m_Constant(C), R: m_Value(V&: Y))) && |
4445 | match(V: C, P: m_NonNegative())) |
4446 | return BinaryOperator::CreateAShr(V1: ConstantExpr::getNot(C), V2: Y); |
4447 | |
4448 | // ~(X + C) --> ~C - X |
4449 | if (match(V: NotVal, P: m_Add(L: m_Value(V&: X), R: m_ImmConstant(C)))) |
4450 | return BinaryOperator::CreateSub(V1: ConstantExpr::getNot(C), V2: X); |
4451 | |
4452 | // ~(X - Y) --> ~X + Y |
4453 | // FIXME: is it really beneficial to sink the `not` here? |
4454 | if (match(V: NotVal, P: m_Sub(L: m_Value(V&: X), R: m_Value(V&: Y)))) |
4455 | if (isa<Constant>(Val: X) || NotVal->hasOneUse()) |
4456 | return BinaryOperator::CreateAdd(V1: Builder.CreateNot(V: X), V2: Y); |
4457 | |
4458 | // ~(~X + Y) --> X - Y |
4459 | if (match(V: NotVal, P: m_c_Add(L: m_Not(V: m_Value(V&: X)), R: m_Value(V&: Y)))) |
4460 | return BinaryOperator::CreateWithCopiedFlags(Opc: Instruction::Sub, V1: X, V2: Y, |
4461 | CopyO: NotVal); |
4462 | } |
4463 | |
4464 | // not (cmp A, B) = !cmp A, B |
4465 | CmpInst::Predicate Pred; |
4466 | if (match(V: NotOp, P: m_Cmp(Pred, L: m_Value(), R: m_Value())) && |
4467 | (NotOp->hasOneUse() || |
4468 | InstCombiner::canFreelyInvertAllUsersOf(V: cast<Instruction>(Val: NotOp), |
4469 | /*IgnoredUser=*/nullptr))) { |
4470 | cast<CmpInst>(Val: NotOp)->setPredicate(CmpInst::getInversePredicate(pred: Pred)); |
4471 | freelyInvertAllUsersOf(V: NotOp); |
4472 | return &I; |
4473 | } |
4474 | |
4475 | // Move a 'not' ahead of casts of a bool to enable logic reduction: |
4476 | // not (bitcast (sext i1 X)) --> bitcast (sext (not i1 X)) |
4477 | if (match(V: NotOp, P: m_OneUse(SubPattern: m_BitCast(Op: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: X)))))) && X->getType()->isIntOrIntVectorTy(BitWidth: 1)) { |
4478 | Type *SextTy = cast<BitCastOperator>(Val: NotOp)->getSrcTy(); |
4479 | Value *NotX = Builder.CreateNot(V: X); |
4480 | Value *Sext = Builder.CreateSExt(V: NotX, DestTy: SextTy); |
4481 | return CastInst::CreateBitOrPointerCast(S: Sext, Ty); |
4482 | } |
4483 | |
4484 | if (auto *NotOpI = dyn_cast<Instruction>(Val: NotOp)) |
4485 | if (sinkNotIntoLogicalOp(I&: *NotOpI)) |
4486 | return &I; |
4487 | |
4488 | // Eliminate a bitwise 'not' op of 'not' min/max by inverting the min/max: |
4489 | // ~min(~X, ~Y) --> max(X, Y) |
4490 | // ~max(~X, Y) --> min(X, ~Y) |
4491 | auto *II = dyn_cast<IntrinsicInst>(Val: NotOp); |
4492 | if (II && II->hasOneUse()) { |
4493 | if (match(V: NotOp, P: m_c_MaxOrMin(L: m_Not(V: m_Value(V&: X)), R: m_Value(V&: Y)))) { |
4494 | Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMaxID: II->getIntrinsicID()); |
4495 | Value *NotY = Builder.CreateNot(V: Y); |
4496 | Value *InvMaxMin = Builder.CreateBinaryIntrinsic(ID: InvID, LHS: X, RHS: NotY); |
4497 | return replaceInstUsesWith(I, V: InvMaxMin); |
4498 | } |
4499 | |
4500 | if (II->getIntrinsicID() == Intrinsic::is_fpclass) { |
4501 | ConstantInt *ClassMask = cast<ConstantInt>(Val: II->getArgOperand(i: 1)); |
4502 | II->setArgOperand( |
4503 | i: 1, v: ConstantInt::get(Ty: ClassMask->getType(), |
4504 | V: ~ClassMask->getZExtValue() & fcAllFlags)); |
4505 | return replaceInstUsesWith(I, V: II); |
4506 | } |
4507 | } |
4508 | |
4509 | if (NotOp->hasOneUse()) { |
4510 | // Pull 'not' into operands of select if both operands are one-use compares |
4511 | // or one is one-use compare and the other one is a constant. |
4512 | // Inverting the predicates eliminates the 'not' operation. |
4513 | // Example: |
4514 | // not (select ?, (cmp TPred, ?, ?), (cmp FPred, ?, ?) --> |
4515 | // select ?, (cmp InvTPred, ?, ?), (cmp InvFPred, ?, ?) |
4516 | // not (select ?, (cmp TPred, ?, ?), true --> |
4517 | // select ?, (cmp InvTPred, ?, ?), false |
4518 | if (auto *Sel = dyn_cast<SelectInst>(Val: NotOp)) { |
4519 | Value *TV = Sel->getTrueValue(); |
4520 | Value *FV = Sel->getFalseValue(); |
4521 | auto *CmpT = dyn_cast<CmpInst>(Val: TV); |
4522 | auto *CmpF = dyn_cast<CmpInst>(Val: FV); |
4523 | bool InvertibleT = (CmpT && CmpT->hasOneUse()) || isa<Constant>(Val: TV); |
4524 | bool InvertibleF = (CmpF && CmpF->hasOneUse()) || isa<Constant>(Val: FV); |
4525 | if (InvertibleT && InvertibleF) { |
4526 | if (CmpT) |
4527 | CmpT->setPredicate(CmpT->getInversePredicate()); |
4528 | else |
4529 | Sel->setTrueValue(ConstantExpr::getNot(C: cast<Constant>(Val: TV))); |
4530 | if (CmpF) |
4531 | CmpF->setPredicate(CmpF->getInversePredicate()); |
4532 | else |
4533 | Sel->setFalseValue(ConstantExpr::getNot(C: cast<Constant>(Val: FV))); |
4534 | return replaceInstUsesWith(I, V: Sel); |
4535 | } |
4536 | } |
4537 | } |
4538 | |
4539 | if (Instruction *NewXor = foldNotXor(I, Builder)) |
4540 | return NewXor; |
4541 | |
4542 | // TODO: Could handle multi-use better by checking if all uses of NotOp (other |
4543 | // than I) can be inverted. |
4544 | if (Value *R = getFreelyInverted(V: NotOp, WillInvertAllUses: NotOp->hasOneUse(), Builder: &Builder)) |
4545 | return replaceInstUsesWith(I, V: R); |
4546 | |
4547 | return nullptr; |
4548 | } |
4549 | |
4550 | // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches |
4551 | // here. We should standardize that construct where it is needed or choose some |
4552 | // other way to ensure that commutated variants of patterns are not missed. |
4553 | Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { |
4554 | if (Value *V = simplifyXorInst(LHS: I.getOperand(i_nocapture: 0), RHS: I.getOperand(i_nocapture: 1), |
4555 | Q: SQ.getWithInstruction(I: &I))) |
4556 | return replaceInstUsesWith(I, V); |
4557 | |
4558 | if (SimplifyAssociativeOrCommutative(I)) |
4559 | return &I; |
4560 | |
4561 | if (Instruction *X = foldVectorBinop(Inst&: I)) |
4562 | return X; |
4563 | |
4564 | if (Instruction *Phi = foldBinopWithPhiOperands(BO&: I)) |
4565 | return Phi; |
4566 | |
4567 | if (Instruction *NewXor = foldXorToXor(I, Builder)) |
4568 | return NewXor; |
4569 | |
4570 | // (A&B)^(A&C) -> A&(B^C) etc |
4571 | if (Value *V = foldUsingDistributiveLaws(I)) |
4572 | return replaceInstUsesWith(I, V); |
4573 | |
4574 | // See if we can simplify any instructions used by the instruction whose sole |
4575 | // purpose is to compute bits we don't care about. |
4576 | if (SimplifyDemandedInstructionBits(Inst&: I)) |
4577 | return &I; |
4578 | |
4579 | if (Instruction *R = foldNot(I)) |
4580 | return R; |
4581 | |
4582 | if (Instruction *R = foldBinOpShiftWithShift(I)) |
4583 | return R; |
4584 | |
4585 | // Fold (X & M) ^ (Y & ~M) -> (X & M) | (Y & ~M) |
4586 | // This it a special case in haveNoCommonBitsSet, but the computeKnownBits |
4587 | // calls in there are unnecessary as SimplifyDemandedInstructionBits should |
4588 | // have already taken care of those cases. |
4589 | Value *Op0 = I.getOperand(i_nocapture: 0), *Op1 = I.getOperand(i_nocapture: 1); |
4590 | Value *M; |
4591 | if (match(V: &I, P: m_c_Xor(L: m_c_And(L: m_Not(V: m_Value(V&: M)), R: m_Value()), |
4592 | R: m_c_And(L: m_Deferred(V: M), R: m_Value())))) |
4593 | return BinaryOperator::CreateDisjointOr(V1: Op0, V2: Op1); |
4594 | |
4595 | if (Instruction *Xor = visitMaskedMerge(I, Builder)) |
4596 | return Xor; |
4597 | |
4598 | Value *X, *Y; |
4599 | Constant *C1; |
4600 | if (match(V: Op1, P: m_Constant(C&: C1))) { |
4601 | Constant *C2; |
4602 | |
4603 | if (match(V: Op0, P: m_OneUse(SubPattern: m_Or(L: m_Value(V&: X), R: m_ImmConstant(C&: C2)))) && |
4604 | match(V: C1, P: m_ImmConstant())) { |
4605 | // (X | C2) ^ C1 --> (X & ~C2) ^ (C1^C2) |
4606 | C2 = Constant::replaceUndefsWith( |
4607 | C: C2, Replacement: Constant::getAllOnesValue(Ty: C2->getType()->getScalarType())); |
4608 | Value *And = Builder.CreateAnd( |
4609 | LHS: X, RHS: Constant::mergeUndefsWith(C: ConstantExpr::getNot(C: C2), Other: C1)); |
4610 | return BinaryOperator::CreateXor( |
4611 | V1: And, V2: Constant::mergeUndefsWith(C: ConstantExpr::getXor(C1, C2), Other: C1)); |
4612 | } |
4613 | |
4614 | // Use DeMorgan and reassociation to eliminate a 'not' op. |
4615 | if (match(V: Op0, P: m_OneUse(SubPattern: m_Or(L: m_Not(V: m_Value(V&: X)), R: m_Constant(C&: C2))))) { |
4616 | // (~X | C2) ^ C1 --> ((X & ~C2) ^ -1) ^ C1 --> (X & ~C2) ^ ~C1 |
4617 | Value *And = Builder.CreateAnd(LHS: X, RHS: ConstantExpr::getNot(C: C2)); |
4618 | return BinaryOperator::CreateXor(V1: And, V2: ConstantExpr::getNot(C: C1)); |
4619 | } |
4620 | if (match(V: Op0, P: m_OneUse(SubPattern: m_And(L: m_Not(V: m_Value(V&: X)), R: m_Constant(C&: C2))))) { |
4621 | // (~X & C2) ^ C1 --> ((X | ~C2) ^ -1) ^ C1 --> (X | ~C2) ^ ~C1 |
4622 | Value *Or = Builder.CreateOr(LHS: X, RHS: ConstantExpr::getNot(C: C2)); |
4623 | return BinaryOperator::CreateXor(V1: Or, V2: ConstantExpr::getNot(C: C1)); |
4624 | } |
4625 | |
4626 | // Convert xor ([trunc] (ashr X, BW-1)), C => |
4627 | // select(X >s -1, C, ~C) |
4628 | // The ashr creates "AllZeroOrAllOne's", which then optionally inverses the |
4629 | // constant depending on whether this input is less than 0. |
4630 | const APInt *CA; |
4631 | if (match(V: Op0, P: m_OneUse(SubPattern: m_TruncOrSelf( |
4632 | Op: m_AShr(L: m_Value(V&: X), R: m_APIntAllowPoison(Res&: CA))))) && |
4633 | *CA == X->getType()->getScalarSizeInBits() - 1 && |
4634 | !match(V: C1, P: m_AllOnes())) { |
4635 | assert(!C1->isZeroValue() && "Unexpected xor with 0" ); |
4636 | Value *IsNotNeg = Builder.CreateIsNotNeg(Arg: X); |
4637 | return SelectInst::Create(C: IsNotNeg, S1: Op1, S2: Builder.CreateNot(V: Op1)); |
4638 | } |
4639 | } |
4640 | |
4641 | Type *Ty = I.getType(); |
4642 | { |
4643 | const APInt *RHSC; |
4644 | if (match(V: Op1, P: m_APInt(Res&: RHSC))) { |
4645 | Value *X; |
4646 | const APInt *C; |
4647 | // (C - X) ^ signmaskC --> (C + signmaskC) - X |
4648 | if (RHSC->isSignMask() && match(V: Op0, P: m_Sub(L: m_APInt(Res&: C), R: m_Value(V&: X)))) |
4649 | return BinaryOperator::CreateSub(V1: ConstantInt::get(Ty, V: *C + *RHSC), V2: X); |
4650 | |
4651 | // (X + C) ^ signmaskC --> X + (C + signmaskC) |
4652 | if (RHSC->isSignMask() && match(V: Op0, P: m_Add(L: m_Value(V&: X), R: m_APInt(Res&: C)))) |
4653 | return BinaryOperator::CreateAdd(V1: X, V2: ConstantInt::get(Ty, V: *C + *RHSC)); |
4654 | |
4655 | // (X | C) ^ RHSC --> X ^ (C ^ RHSC) iff X & C == 0 |
4656 | if (match(V: Op0, P: m_Or(L: m_Value(V&: X), R: m_APInt(Res&: C))) && |
4657 | MaskedValueIsZero(V: X, Mask: *C, Depth: 0, CxtI: &I)) |
4658 | return BinaryOperator::CreateXor(V1: X, V2: ConstantInt::get(Ty, V: *C ^ *RHSC)); |
4659 | |
4660 | // When X is a power-of-two or zero and zero input is poison: |
4661 | // ctlz(i32 X) ^ 31 --> cttz(X) |
4662 | // cttz(i32 X) ^ 31 --> ctlz(X) |
4663 | auto *II = dyn_cast<IntrinsicInst>(Val: Op0); |
4664 | if (II && II->hasOneUse() && *RHSC == Ty->getScalarSizeInBits() - 1) { |
4665 | Intrinsic::ID IID = II->getIntrinsicID(); |
4666 | if ((IID == Intrinsic::ctlz || IID == Intrinsic::cttz) && |
4667 | match(V: II->getArgOperand(i: 1), P: m_One()) && |
4668 | isKnownToBeAPowerOfTwo(V: II->getArgOperand(i: 0), /*OrZero */ true)) { |
4669 | IID = (IID == Intrinsic::ctlz) ? Intrinsic::cttz : Intrinsic::ctlz; |
4670 | Function *F = Intrinsic::getDeclaration(M: II->getModule(), id: IID, Tys: Ty); |
4671 | return CallInst::Create(Func: F, Args: {II->getArgOperand(i: 0), Builder.getTrue()}); |
4672 | } |
4673 | } |
4674 | |
4675 | // If RHSC is inverting the remaining bits of shifted X, |
4676 | // canonicalize to a 'not' before the shift to help SCEV and codegen: |
4677 | // (X << C) ^ RHSC --> ~X << C |
4678 | if (match(V: Op0, P: m_OneUse(SubPattern: m_Shl(L: m_Value(V&: X), R: m_APInt(Res&: C)))) && |
4679 | *RHSC == APInt::getAllOnes(numBits: Ty->getScalarSizeInBits()).shl(ShiftAmt: *C)) { |
4680 | Value *NotX = Builder.CreateNot(V: X); |
4681 | return BinaryOperator::CreateShl(V1: NotX, V2: ConstantInt::get(Ty, V: *C)); |
4682 | } |
4683 | // (X >>u C) ^ RHSC --> ~X >>u C |
4684 | if (match(V: Op0, P: m_OneUse(SubPattern: m_LShr(L: m_Value(V&: X), R: m_APInt(Res&: C)))) && |
4685 | *RHSC == APInt::getAllOnes(numBits: Ty->getScalarSizeInBits()).lshr(ShiftAmt: *C)) { |
4686 | Value *NotX = Builder.CreateNot(V: X); |
4687 | return BinaryOperator::CreateLShr(V1: NotX, V2: ConstantInt::get(Ty, V: *C)); |
4688 | } |
4689 | // TODO: We could handle 'ashr' here as well. That would be matching |
4690 | // a 'not' op and moving it before the shift. Doing that requires |
4691 | // preventing the inverse fold in canShiftBinOpWithConstantRHS(). |
4692 | } |
4693 | |
4694 | // If we are XORing the sign bit of a floating-point value, convert |
4695 | // this to fneg, then cast back to integer. |
4696 | // |
4697 | // This is generous interpretation of noimplicitfloat, this is not a true |
4698 | // floating-point operation. |
4699 | // |
4700 | // Assumes any IEEE-represented type has the sign bit in the high bit. |
4701 | // TODO: Unify with APInt matcher. This version allows undef unlike m_APInt |
4702 | Value *CastOp; |
4703 | if (match(V: Op0, P: m_ElementWiseBitCast(Op: m_Value(V&: CastOp))) && |
4704 | match(V: Op1, P: m_SignMask()) && |
4705 | !Builder.GetInsertBlock()->getParent()->hasFnAttribute( |
4706 | Attribute::NoImplicitFloat)) { |
4707 | Type *EltTy = CastOp->getType()->getScalarType(); |
4708 | if (EltTy->isFloatingPointTy() && EltTy->isIEEE()) { |
4709 | Value *FNeg = Builder.CreateFNeg(V: CastOp); |
4710 | return new BitCastInst(FNeg, I.getType()); |
4711 | } |
4712 | } |
4713 | } |
4714 | |
4715 | // FIXME: This should not be limited to scalar (pull into APInt match above). |
4716 | { |
4717 | Value *X; |
4718 | ConstantInt *C1, *C2, *C3; |
4719 | // ((X^C1) >> C2) ^ C3 -> (X>>C2) ^ ((C1>>C2)^C3) |
4720 | if (match(V: Op1, P: m_ConstantInt(CI&: C3)) && |
4721 | match(V: Op0, P: m_LShr(L: m_Xor(L: m_Value(V&: X), R: m_ConstantInt(CI&: C1)), |
4722 | R: m_ConstantInt(CI&: C2))) && |
4723 | Op0->hasOneUse()) { |
4724 | // fold (C1 >> C2) ^ C3 |
4725 | APInt FoldConst = C1->getValue().lshr(ShiftAmt: C2->getValue()); |
4726 | FoldConst ^= C3->getValue(); |
4727 | // Prepare the two operands. |
4728 | auto *Opnd0 = Builder.CreateLShr(LHS: X, RHS: C2); |
4729 | Opnd0->takeName(V: Op0); |
4730 | return BinaryOperator::CreateXor(V1: Opnd0, V2: ConstantInt::get(Ty, V: FoldConst)); |
4731 | } |
4732 | } |
4733 | |
4734 | if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I)) |
4735 | return FoldedLogic; |
4736 | |
4737 | // Y ^ (X | Y) --> X & ~Y |
4738 | // Y ^ (Y | X) --> X & ~Y |
4739 | if (match(V: Op1, P: m_OneUse(SubPattern: m_c_Or(L: m_Value(V&: X), R: m_Specific(V: Op0))))) |
4740 | return BinaryOperator::CreateAnd(V1: X, V2: Builder.CreateNot(V: Op0)); |
4741 | // (X | Y) ^ Y --> X & ~Y |
4742 | // (Y | X) ^ Y --> X & ~Y |
4743 | if (match(V: Op0, P: m_OneUse(SubPattern: m_c_Or(L: m_Value(V&: X), R: m_Specific(V: Op1))))) |
4744 | return BinaryOperator::CreateAnd(V1: X, V2: Builder.CreateNot(V: Op1)); |
4745 | |
4746 | // Y ^ (X & Y) --> ~X & Y |
4747 | // Y ^ (Y & X) --> ~X & Y |
4748 | if (match(V: Op1, P: m_OneUse(SubPattern: m_c_And(L: m_Value(V&: X), R: m_Specific(V: Op0))))) |
4749 | return BinaryOperator::CreateAnd(V1: Op0, V2: Builder.CreateNot(V: X)); |
4750 | // (X & Y) ^ Y --> ~X & Y |
4751 | // (Y & X) ^ Y --> ~X & Y |
4752 | // Canonical form is (X & C) ^ C; don't touch that. |
4753 | // TODO: A 'not' op is better for analysis and codegen, but demanded bits must |
4754 | // be fixed to prefer that (otherwise we get infinite looping). |
4755 | if (!match(V: Op1, P: m_Constant()) && |
4756 | match(V: Op0, P: m_OneUse(SubPattern: m_c_And(L: m_Value(V&: X), R: m_Specific(V: Op1))))) |
4757 | return BinaryOperator::CreateAnd(V1: Op1, V2: Builder.CreateNot(V: X)); |
4758 | |
4759 | Value *A, *B, *C; |
4760 | // (A ^ B) ^ (A | C) --> (~A & C) ^ B -- There are 4 commuted variants. |
4761 | if (match(V: &I, P: m_c_Xor(L: m_OneUse(SubPattern: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B))), |
4762 | R: m_OneUse(SubPattern: m_c_Or(L: m_Deferred(V: A), R: m_Value(V&: C)))))) |
4763 | return BinaryOperator::CreateXor( |
4764 | V1: Builder.CreateAnd(LHS: Builder.CreateNot(V: A), RHS: C), V2: B); |
4765 | |
4766 | // (A ^ B) ^ (B | C) --> (~B & C) ^ A -- There are 4 commuted variants. |
4767 | if (match(V: &I, P: m_c_Xor(L: m_OneUse(SubPattern: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B))), |
4768 | R: m_OneUse(SubPattern: m_c_Or(L: m_Deferred(V: B), R: m_Value(V&: C)))))) |
4769 | return BinaryOperator::CreateXor( |
4770 | V1: Builder.CreateAnd(LHS: Builder.CreateNot(V: B), RHS: C), V2: A); |
4771 | |
4772 | // (A & B) ^ (A ^ B) -> (A | B) |
4773 | if (match(V: Op0, P: m_And(L: m_Value(V&: A), R: m_Value(V&: B))) && |
4774 | match(V: Op1, P: m_c_Xor(L: m_Specific(V: A), R: m_Specific(V: B)))) |
4775 | return BinaryOperator::CreateOr(V1: A, V2: B); |
4776 | // (A ^ B) ^ (A & B) -> (A | B) |
4777 | if (match(V: Op0, P: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B))) && |
4778 | match(V: Op1, P: m_c_And(L: m_Specific(V: A), R: m_Specific(V: B)))) |
4779 | return BinaryOperator::CreateOr(V1: A, V2: B); |
4780 | |
4781 | // (A & ~B) ^ ~A -> ~(A & B) |
4782 | // (~B & A) ^ ~A -> ~(A & B) |
4783 | if (match(V: Op0, P: m_c_And(L: m_Value(V&: A), R: m_Not(V: m_Value(V&: B)))) && |
4784 | match(V: Op1, P: m_Not(V: m_Specific(V: A)))) |
4785 | return BinaryOperator::CreateNot(Op: Builder.CreateAnd(LHS: A, RHS: B)); |
4786 | |
4787 | // (~A & B) ^ A --> A | B -- There are 4 commuted variants. |
4788 | if (match(V: &I, P: m_c_Xor(L: m_c_And(L: m_Not(V: m_Value(V&: A)), R: m_Value(V&: B)), R: m_Deferred(V: A)))) |
4789 | return BinaryOperator::CreateOr(V1: A, V2: B); |
4790 | |
4791 | // (~A | B) ^ A --> ~(A & B) |
4792 | if (match(V: Op0, P: m_OneUse(SubPattern: m_c_Or(L: m_Not(V: m_Specific(V: Op1)), R: m_Value(V&: B))))) |
4793 | return BinaryOperator::CreateNot(Op: Builder.CreateAnd(LHS: Op1, RHS: B)); |
4794 | |
4795 | // A ^ (~A | B) --> ~(A & B) |
4796 | if (match(V: Op1, P: m_OneUse(SubPattern: m_c_Or(L: m_Not(V: m_Specific(V: Op0)), R: m_Value(V&: B))))) |
4797 | return BinaryOperator::CreateNot(Op: Builder.CreateAnd(LHS: Op0, RHS: B)); |
4798 | |
4799 | // (A | B) ^ (A | C) --> (B ^ C) & ~A -- There are 4 commuted variants. |
4800 | // TODO: Loosen one-use restriction if common operand is a constant. |
4801 | Value *D; |
4802 | if (match(V: Op0, P: m_OneUse(SubPattern: m_Or(L: m_Value(V&: A), R: m_Value(V&: B)))) && |
4803 | match(V: Op1, P: m_OneUse(SubPattern: m_Or(L: m_Value(V&: C), R: m_Value(V&: D))))) { |
4804 | if (B == C || B == D) |
4805 | std::swap(a&: A, b&: B); |
4806 | if (A == C) |
4807 | std::swap(a&: C, b&: D); |
4808 | if (A == D) { |
4809 | Value *NotA = Builder.CreateNot(V: A); |
4810 | return BinaryOperator::CreateAnd(V1: Builder.CreateXor(LHS: B, RHS: C), V2: NotA); |
4811 | } |
4812 | } |
4813 | |
4814 | // (A & B) ^ (A | C) --> A ? ~B : C -- There are 4 commuted variants. |
4815 | if (I.getType()->isIntOrIntVectorTy(BitWidth: 1) && |
4816 | match(V: Op0, P: m_OneUse(SubPattern: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B)))) && |
4817 | match(V: Op1, P: m_OneUse(SubPattern: m_LogicalOr(L: m_Value(V&: C), R: m_Value(V&: D))))) { |
4818 | bool NeedFreeze = isa<SelectInst>(Val: Op0) && isa<SelectInst>(Val: Op1) && B == D; |
4819 | if (B == C || B == D) |
4820 | std::swap(a&: A, b&: B); |
4821 | if (A == C) |
4822 | std::swap(a&: C, b&: D); |
4823 | if (A == D) { |
4824 | if (NeedFreeze) |
4825 | A = Builder.CreateFreeze(V: A); |
4826 | Value *NotB = Builder.CreateNot(V: B); |
4827 | return SelectInst::Create(C: A, S1: NotB, S2: C); |
4828 | } |
4829 | } |
4830 | |
4831 | if (auto *LHS = dyn_cast<ICmpInst>(Val: I.getOperand(i_nocapture: 0))) |
4832 | if (auto *RHS = dyn_cast<ICmpInst>(Val: I.getOperand(i_nocapture: 1))) |
4833 | if (Value *V = foldXorOfICmps(LHS, RHS, I)) |
4834 | return replaceInstUsesWith(I, V); |
4835 | |
4836 | if (Instruction *CastedXor = foldCastedBitwiseLogic(I)) |
4837 | return CastedXor; |
4838 | |
4839 | if (Instruction *Abs = canonicalizeAbs(Xor&: I, Builder)) |
4840 | return Abs; |
4841 | |
4842 | // Otherwise, if all else failed, try to hoist the xor-by-constant: |
4843 | // (X ^ C) ^ Y --> (X ^ Y) ^ C |
4844 | // Just like we do in other places, we completely avoid the fold |
4845 | // for constantexprs, at least to avoid endless combine loop. |
4846 | if (match(V: &I, P: m_c_Xor(L: m_OneUse(SubPattern: m_Xor(L: m_CombineAnd(L: m_Value(V&: X), |
4847 | R: m_Unless(M: m_ConstantExpr())), |
4848 | R: m_ImmConstant(C&: C1))), |
4849 | R: m_Value(V&: Y)))) |
4850 | return BinaryOperator::CreateXor(V1: Builder.CreateXor(LHS: X, RHS: Y), V2: C1); |
4851 | |
4852 | if (Instruction *R = reassociateForUses(BO&: I, Builder)) |
4853 | return R; |
4854 | |
4855 | if (Instruction *Canonicalized = canonicalizeLogicFirst(I, Builder)) |
4856 | return Canonicalized; |
4857 | |
4858 | if (Instruction *Folded = foldLogicOfIsFPClass(BO&: I, Op0, Op1)) |
4859 | return Folded; |
4860 | |
4861 | if (Instruction *Folded = canonicalizeConditionalNegationViaMathToSelect(I)) |
4862 | return Folded; |
4863 | |
4864 | if (Instruction *Res = foldBinOpOfDisplacedShifts(I)) |
4865 | return Res; |
4866 | |
4867 | if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder)) |
4868 | return Res; |
4869 | |
4870 | return nullptr; |
4871 | } |
4872 | |