1//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// InstructionCombining - Combine instructions to form fewer, simple
10// instructions. This pass does not modify the CFG. This pass is where
11// algebraic simplification happens.
12//
13// This pass combines things like:
14// %Y = add i32 %X, 1
15// %Z = add i32 %Y, 1
16// into:
17// %Z = add i32 %X, 2
18//
19// This is a simple worklist driven algorithm.
20//
21// This pass guarantees that the following canonicalizations are performed on
22// the program:
23// 1. If a binary operator has a constant operand, it is moved to the RHS
24// 2. Bitwise operators with constant operands are always grouped so that
25// shifts are performed first, then or's, then and's, then xor's.
26// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27// 4. All cmp instructions on boolean values are replaced with logical ops
28// 5. add X, X is represented as (X*2) => (X << 1)
29// 6. Multiplies with a power-of-two constant argument are transformed into
30// shifts.
31// ... etc.
32//
33//===----------------------------------------------------------------------===//
34
35#include "InstCombineInternal.h"
36#include "llvm/ADT/APInt.h"
37#include "llvm/ADT/ArrayRef.h"
38#include "llvm/ADT/DenseMap.h"
39#include "llvm/ADT/SmallPtrSet.h"
40#include "llvm/ADT/SmallVector.h"
41#include "llvm/ADT/Statistic.h"
42#include "llvm/Analysis/AliasAnalysis.h"
43#include "llvm/Analysis/AssumptionCache.h"
44#include "llvm/Analysis/BasicAliasAnalysis.h"
45#include "llvm/Analysis/BlockFrequencyInfo.h"
46#include "llvm/Analysis/CFG.h"
47#include "llvm/Analysis/ConstantFolding.h"
48#include "llvm/Analysis/GlobalsModRef.h"
49#include "llvm/Analysis/InstructionSimplify.h"
50#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
51#include "llvm/Analysis/LoopInfo.h"
52#include "llvm/Analysis/MemoryBuiltins.h"
53#include "llvm/Analysis/OptimizationRemarkEmitter.h"
54#include "llvm/Analysis/ProfileSummaryInfo.h"
55#include "llvm/Analysis/TargetFolder.h"
56#include "llvm/Analysis/TargetLibraryInfo.h"
57#include "llvm/Analysis/TargetTransformInfo.h"
58#include "llvm/Analysis/Utils/Local.h"
59#include "llvm/Analysis/ValueTracking.h"
60#include "llvm/Analysis/VectorUtils.h"
61#include "llvm/IR/BasicBlock.h"
62#include "llvm/IR/CFG.h"
63#include "llvm/IR/Constant.h"
64#include "llvm/IR/Constants.h"
65#include "llvm/IR/DIBuilder.h"
66#include "llvm/IR/DataLayout.h"
67#include "llvm/IR/DebugInfo.h"
68#include "llvm/IR/DerivedTypes.h"
69#include "llvm/IR/Dominators.h"
70#include "llvm/IR/EHPersonalities.h"
71#include "llvm/IR/Function.h"
72#include "llvm/IR/GetElementPtrTypeIterator.h"
73#include "llvm/IR/IRBuilder.h"
74#include "llvm/IR/InstrTypes.h"
75#include "llvm/IR/Instruction.h"
76#include "llvm/IR/Instructions.h"
77#include "llvm/IR/IntrinsicInst.h"
78#include "llvm/IR/Intrinsics.h"
79#include "llvm/IR/Metadata.h"
80#include "llvm/IR/Operator.h"
81#include "llvm/IR/PassManager.h"
82#include "llvm/IR/PatternMatch.h"
83#include "llvm/IR/Type.h"
84#include "llvm/IR/Use.h"
85#include "llvm/IR/User.h"
86#include "llvm/IR/Value.h"
87#include "llvm/IR/ValueHandle.h"
88#include "llvm/InitializePasses.h"
89#include "llvm/Support/Casting.h"
90#include "llvm/Support/CommandLine.h"
91#include "llvm/Support/Compiler.h"
92#include "llvm/Support/Debug.h"
93#include "llvm/Support/DebugCounter.h"
94#include "llvm/Support/ErrorHandling.h"
95#include "llvm/Support/KnownBits.h"
96#include "llvm/Support/raw_ostream.h"
97#include "llvm/Transforms/InstCombine/InstCombine.h"
98#include "llvm/Transforms/Utils/BasicBlockUtils.h"
99#include "llvm/Transforms/Utils/Local.h"
100#include <algorithm>
101#include <cassert>
102#include <cstdint>
103#include <memory>
104#include <optional>
105#include <string>
106#include <utility>
107
108#define DEBUG_TYPE "instcombine"
109#include "llvm/Transforms/Utils/InstructionWorklist.h"
110#include <optional>
111
112using namespace llvm;
113using namespace llvm::PatternMatch;
114
115STATISTIC(NumWorklistIterations,
116 "Number of instruction combining iterations performed");
117STATISTIC(NumOneIteration, "Number of functions with one iteration");
118STATISTIC(NumTwoIterations, "Number of functions with two iterations");
119STATISTIC(NumThreeIterations, "Number of functions with three iterations");
120STATISTIC(NumFourOrMoreIterations,
121 "Number of functions with four or more iterations");
122
123STATISTIC(NumCombined , "Number of insts combined");
124STATISTIC(NumConstProp, "Number of constant folds");
125STATISTIC(NumDeadInst , "Number of dead inst eliminated");
126STATISTIC(NumSunkInst , "Number of instructions sunk");
127STATISTIC(NumExpand, "Number of expansions");
128STATISTIC(NumFactor , "Number of factorizations");
129STATISTIC(NumReassoc , "Number of reassociations");
130DEBUG_COUNTER(VisitCounter, "instcombine-visit",
131 "Controls which instructions are visited");
132
133static cl::opt<bool>
134EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
135 cl::init(Val: true));
136
137static cl::opt<unsigned> MaxSinkNumUsers(
138 "instcombine-max-sink-users", cl::init(Val: 32),
139 cl::desc("Maximum number of undroppable users for instruction sinking"));
140
141static cl::opt<unsigned>
142MaxArraySize("instcombine-maxarray-size", cl::init(Val: 1024),
143 cl::desc("Maximum array size considered when doing a combine"));
144
145// FIXME: Remove this flag when it is no longer necessary to convert
146// llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
147// increases variable availability at the cost of accuracy. Variables that
148// cannot be promoted by mem2reg or SROA will be described as living in memory
149// for their entire lifetime. However, passes like DSE and instcombine can
150// delete stores to the alloca, leading to misleading and inaccurate debug
151// information. This flag can be removed when those passes are fixed.
152static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
153 cl::Hidden, cl::init(Val: true));
154
155std::optional<Instruction *>
156InstCombiner::targetInstCombineIntrinsic(IntrinsicInst &II) {
157 // Handle target specific intrinsics
158 if (II.getCalledFunction()->isTargetIntrinsic()) {
159 return TTI.instCombineIntrinsic(IC&: *this, II);
160 }
161 return std::nullopt;
162}
163
164std::optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic(
165 IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
166 bool &KnownBitsComputed) {
167 // Handle target specific intrinsics
168 if (II.getCalledFunction()->isTargetIntrinsic()) {
169 return TTI.simplifyDemandedUseBitsIntrinsic(IC&: *this, II, DemandedMask, Known,
170 KnownBitsComputed);
171 }
172 return std::nullopt;
173}
174
175std::optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic(
176 IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts,
177 APInt &PoisonElts2, APInt &PoisonElts3,
178 std::function<void(Instruction *, unsigned, APInt, APInt &)>
179 SimplifyAndSetOp) {
180 // Handle target specific intrinsics
181 if (II.getCalledFunction()->isTargetIntrinsic()) {
182 return TTI.simplifyDemandedVectorEltsIntrinsic(
183 IC&: *this, II, DemandedElts, UndefElts&: PoisonElts, UndefElts2&: PoisonElts2, UndefElts3&: PoisonElts3,
184 SimplifyAndSetOp);
185 }
186 return std::nullopt;
187}
188
189bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
190 return TTI.isValidAddrSpaceCast(FromAS, ToAS);
191}
192
193Value *InstCombinerImpl::EmitGEPOffset(User *GEP) {
194 return llvm::emitGEPOffset(Builder: &Builder, DL, GEP);
195}
196
197/// Legal integers and common types are considered desirable. This is used to
198/// avoid creating instructions with types that may not be supported well by the
199/// the backend.
200/// NOTE: This treats i8, i16 and i32 specially because they are common
201/// types in frontend languages.
202bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
203 switch (BitWidth) {
204 case 8:
205 case 16:
206 case 32:
207 return true;
208 default:
209 return DL.isLegalInteger(Width: BitWidth);
210 }
211}
212
213/// Return true if it is desirable to convert an integer computation from a
214/// given bit width to a new bit width.
215/// We don't want to convert from a legal or desirable type (like i8) to an
216/// illegal type or from a smaller to a larger illegal type. A width of '1'
217/// is always treated as a desirable type because i1 is a fundamental type in
218/// IR, and there are many specialized optimizations for i1 types.
219/// Common/desirable widths are equally treated as legal to convert to, in
220/// order to open up more combining opportunities.
221bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
222 unsigned ToWidth) const {
223 bool FromLegal = FromWidth == 1 || DL.isLegalInteger(Width: FromWidth);
224 bool ToLegal = ToWidth == 1 || DL.isLegalInteger(Width: ToWidth);
225
226 // Convert to desirable widths even if they are not legal types.
227 // Only shrink types, to prevent infinite loops.
228 if (ToWidth < FromWidth && isDesirableIntType(BitWidth: ToWidth))
229 return true;
230
231 // If this is a legal or desiable integer from type, and the result would be
232 // an illegal type, don't do the transformation.
233 if ((FromLegal || isDesirableIntType(BitWidth: FromWidth)) && !ToLegal)
234 return false;
235
236 // Otherwise, if both are illegal, do not increase the size of the result. We
237 // do allow things like i160 -> i64, but not i64 -> i160.
238 if (!FromLegal && !ToLegal && ToWidth > FromWidth)
239 return false;
240
241 return true;
242}
243
244/// Return true if it is desirable to convert a computation from 'From' to 'To'.
245/// We don't want to convert from a legal to an illegal type or from a smaller
246/// to a larger illegal type. i1 is always treated as a legal type because it is
247/// a fundamental type in IR, and there are many specialized optimizations for
248/// i1 types.
249bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
250 // TODO: This could be extended to allow vectors. Datalayout changes might be
251 // needed to properly support that.
252 if (!From->isIntegerTy() || !To->isIntegerTy())
253 return false;
254
255 unsigned FromWidth = From->getPrimitiveSizeInBits();
256 unsigned ToWidth = To->getPrimitiveSizeInBits();
257 return shouldChangeType(FromWidth, ToWidth);
258}
259
260// Return true, if No Signed Wrap should be maintained for I.
261// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
262// where both B and C should be ConstantInts, results in a constant that does
263// not overflow. This function only handles the Add and Sub opcodes. For
264// all other opcodes, the function conservatively returns false.
265static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {
266 auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
267 if (!OBO || !OBO->hasNoSignedWrap())
268 return false;
269
270 // We reason about Add and Sub Only.
271 Instruction::BinaryOps Opcode = I.getOpcode();
272 if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
273 return false;
274
275 const APInt *BVal, *CVal;
276 if (!match(V: B, P: m_APInt(Res&: BVal)) || !match(V: C, P: m_APInt(Res&: CVal)))
277 return false;
278
279 bool Overflow = false;
280 if (Opcode == Instruction::Add)
281 (void)BVal->sadd_ov(RHS: *CVal, Overflow);
282 else
283 (void)BVal->ssub_ov(RHS: *CVal, Overflow);
284
285 return !Overflow;
286}
287
288static bool hasNoUnsignedWrap(BinaryOperator &I) {
289 auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
290 return OBO && OBO->hasNoUnsignedWrap();
291}
292
293static bool hasNoSignedWrap(BinaryOperator &I) {
294 auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
295 return OBO && OBO->hasNoSignedWrap();
296}
297
298/// Conservatively clears subclassOptionalData after a reassociation or
299/// commutation. We preserve fast-math flags when applicable as they can be
300/// preserved.
301static void ClearSubclassDataAfterReassociation(BinaryOperator &I) {
302 FPMathOperator *FPMO = dyn_cast<FPMathOperator>(Val: &I);
303 if (!FPMO) {
304 I.clearSubclassOptionalData();
305 return;
306 }
307
308 FastMathFlags FMF = I.getFastMathFlags();
309 I.clearSubclassOptionalData();
310 I.setFastMathFlags(FMF);
311}
312
313/// Combine constant operands of associative operations either before or after a
314/// cast to eliminate one of the associative operations:
315/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
316/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
317static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1,
318 InstCombinerImpl &IC) {
319 auto *Cast = dyn_cast<CastInst>(Val: BinOp1->getOperand(i_nocapture: 0));
320 if (!Cast || !Cast->hasOneUse())
321 return false;
322
323 // TODO: Enhance logic for other casts and remove this check.
324 auto CastOpcode = Cast->getOpcode();
325 if (CastOpcode != Instruction::ZExt)
326 return false;
327
328 // TODO: Enhance logic for other BinOps and remove this check.
329 if (!BinOp1->isBitwiseLogicOp())
330 return false;
331
332 auto AssocOpcode = BinOp1->getOpcode();
333 auto *BinOp2 = dyn_cast<BinaryOperator>(Val: Cast->getOperand(i_nocapture: 0));
334 if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
335 return false;
336
337 Constant *C1, *C2;
338 if (!match(V: BinOp1->getOperand(i_nocapture: 1), P: m_Constant(C&: C1)) ||
339 !match(V: BinOp2->getOperand(i_nocapture: 1), P: m_Constant(C&: C2)))
340 return false;
341
342 // TODO: This assumes a zext cast.
343 // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
344 // to the destination type might lose bits.
345
346 // Fold the constants together in the destination type:
347 // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
348 const DataLayout &DL = IC.getDataLayout();
349 Type *DestTy = C1->getType();
350 Constant *CastC2 = ConstantFoldCastOperand(Opcode: CastOpcode, C: C2, DestTy, DL);
351 if (!CastC2)
352 return false;
353 Constant *FoldedC = ConstantFoldBinaryOpOperands(Opcode: AssocOpcode, LHS: C1, RHS: CastC2, DL);
354 if (!FoldedC)
355 return false;
356
357 IC.replaceOperand(I&: *Cast, OpNum: 0, V: BinOp2->getOperand(i_nocapture: 0));
358 IC.replaceOperand(I&: *BinOp1, OpNum: 1, V: FoldedC);
359 BinOp1->dropPoisonGeneratingFlags();
360 Cast->dropPoisonGeneratingFlags();
361 return true;
362}
363
364// Simplifies IntToPtr/PtrToInt RoundTrip Cast.
365// inttoptr ( ptrtoint (x) ) --> x
366Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
367 auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
368 if (IntToPtr && DL.getTypeSizeInBits(Ty: IntToPtr->getDestTy()) ==
369 DL.getTypeSizeInBits(Ty: IntToPtr->getSrcTy())) {
370 auto *PtrToInt = dyn_cast<PtrToIntInst>(Val: IntToPtr->getOperand(i_nocapture: 0));
371 Type *CastTy = IntToPtr->getDestTy();
372 if (PtrToInt &&
373 CastTy->getPointerAddressSpace() ==
374 PtrToInt->getSrcTy()->getPointerAddressSpace() &&
375 DL.getTypeSizeInBits(Ty: PtrToInt->getSrcTy()) ==
376 DL.getTypeSizeInBits(Ty: PtrToInt->getDestTy()))
377 return PtrToInt->getOperand(i_nocapture: 0);
378 }
379 return nullptr;
380}
381
382/// This performs a few simplifications for operators that are associative or
383/// commutative:
384///
385/// Commutative operators:
386///
387/// 1. Order operands such that they are listed from right (least complex) to
388/// left (most complex). This puts constants before unary operators before
389/// binary operators.
390///
391/// Associative operators:
392///
393/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
394/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
395///
396/// Associative and commutative operators:
397///
398/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
399/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
400/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
401/// if C1 and C2 are constants.
402bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
403 Instruction::BinaryOps Opcode = I.getOpcode();
404 bool Changed = false;
405
406 do {
407 // Order operands such that they are listed from right (least complex) to
408 // left (most complex). This puts constants before unary operators before
409 // binary operators.
410 if (I.isCommutative() && getComplexity(V: I.getOperand(i_nocapture: 0)) <
411 getComplexity(V: I.getOperand(i_nocapture: 1)))
412 Changed = !I.swapOperands();
413
414 if (I.isCommutative()) {
415 if (auto Pair = matchSymmetricPair(LHS: I.getOperand(i_nocapture: 0), RHS: I.getOperand(i_nocapture: 1))) {
416 replaceOperand(I, OpNum: 0, V: Pair->first);
417 replaceOperand(I, OpNum: 1, V: Pair->second);
418 Changed = true;
419 }
420 }
421
422 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: I.getOperand(i_nocapture: 0));
423 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: I.getOperand(i_nocapture: 1));
424
425 if (I.isAssociative()) {
426 // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
427 if (Op0 && Op0->getOpcode() == Opcode) {
428 Value *A = Op0->getOperand(i_nocapture: 0);
429 Value *B = Op0->getOperand(i_nocapture: 1);
430 Value *C = I.getOperand(i_nocapture: 1);
431
432 // Does "B op C" simplify?
433 if (Value *V = simplifyBinOp(Opcode, LHS: B, RHS: C, Q: SQ.getWithInstruction(I: &I))) {
434 // It simplifies to V. Form "A op V".
435 replaceOperand(I, OpNum: 0, V: A);
436 replaceOperand(I, OpNum: 1, V);
437 bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(I&: *Op0);
438 bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(I&: *Op0);
439
440 // Conservatively clear all optional flags since they may not be
441 // preserved by the reassociation. Reset nsw/nuw based on the above
442 // analysis.
443 ClearSubclassDataAfterReassociation(I);
444
445 // Note: this is only valid because SimplifyBinOp doesn't look at
446 // the operands to Op0.
447 if (IsNUW)
448 I.setHasNoUnsignedWrap(true);
449
450 if (IsNSW)
451 I.setHasNoSignedWrap(true);
452
453 Changed = true;
454 ++NumReassoc;
455 continue;
456 }
457 }
458
459 // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
460 if (Op1 && Op1->getOpcode() == Opcode) {
461 Value *A = I.getOperand(i_nocapture: 0);
462 Value *B = Op1->getOperand(i_nocapture: 0);
463 Value *C = Op1->getOperand(i_nocapture: 1);
464
465 // Does "A op B" simplify?
466 if (Value *V = simplifyBinOp(Opcode, LHS: A, RHS: B, Q: SQ.getWithInstruction(I: &I))) {
467 // It simplifies to V. Form "V op C".
468 replaceOperand(I, OpNum: 0, V);
469 replaceOperand(I, OpNum: 1, V: C);
470 // Conservatively clear the optional flags, since they may not be
471 // preserved by the reassociation.
472 ClearSubclassDataAfterReassociation(I);
473 Changed = true;
474 ++NumReassoc;
475 continue;
476 }
477 }
478 }
479
480 if (I.isAssociative() && I.isCommutative()) {
481 if (simplifyAssocCastAssoc(BinOp1: &I, IC&: *this)) {
482 Changed = true;
483 ++NumReassoc;
484 continue;
485 }
486
487 // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
488 if (Op0 && Op0->getOpcode() == Opcode) {
489 Value *A = Op0->getOperand(i_nocapture: 0);
490 Value *B = Op0->getOperand(i_nocapture: 1);
491 Value *C = I.getOperand(i_nocapture: 1);
492
493 // Does "C op A" simplify?
494 if (Value *V = simplifyBinOp(Opcode, LHS: C, RHS: A, Q: SQ.getWithInstruction(I: &I))) {
495 // It simplifies to V. Form "V op B".
496 replaceOperand(I, OpNum: 0, V);
497 replaceOperand(I, OpNum: 1, V: B);
498 // Conservatively clear the optional flags, since they may not be
499 // preserved by the reassociation.
500 ClearSubclassDataAfterReassociation(I);
501 Changed = true;
502 ++NumReassoc;
503 continue;
504 }
505 }
506
507 // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
508 if (Op1 && Op1->getOpcode() == Opcode) {
509 Value *A = I.getOperand(i_nocapture: 0);
510 Value *B = Op1->getOperand(i_nocapture: 0);
511 Value *C = Op1->getOperand(i_nocapture: 1);
512
513 // Does "C op A" simplify?
514 if (Value *V = simplifyBinOp(Opcode, LHS: C, RHS: A, Q: SQ.getWithInstruction(I: &I))) {
515 // It simplifies to V. Form "B op V".
516 replaceOperand(I, OpNum: 0, V: B);
517 replaceOperand(I, OpNum: 1, V);
518 // Conservatively clear the optional flags, since they may not be
519 // preserved by the reassociation.
520 ClearSubclassDataAfterReassociation(I);
521 Changed = true;
522 ++NumReassoc;
523 continue;
524 }
525 }
526
527 // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
528 // if C1 and C2 are constants.
529 Value *A, *B;
530 Constant *C1, *C2, *CRes;
531 if (Op0 && Op1 &&
532 Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
533 match(V: Op0, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: A), R: m_Constant(C&: C1)))) &&
534 match(V: Op1, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: B), R: m_Constant(C&: C2)))) &&
535 (CRes = ConstantFoldBinaryOpOperands(Opcode, LHS: C1, RHS: C2, DL))) {
536 bool IsNUW = hasNoUnsignedWrap(I) &&
537 hasNoUnsignedWrap(I&: *Op0) &&
538 hasNoUnsignedWrap(I&: *Op1);
539 BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
540 BinaryOperator::CreateNUW(Opc: Opcode, V1: A, V2: B) :
541 BinaryOperator::Create(Op: Opcode, S1: A, S2: B);
542
543 if (isa<FPMathOperator>(Val: NewBO)) {
544 FastMathFlags Flags = I.getFastMathFlags() &
545 Op0->getFastMathFlags() &
546 Op1->getFastMathFlags();
547 NewBO->setFastMathFlags(Flags);
548 }
549 InsertNewInstWith(New: NewBO, Old: I.getIterator());
550 NewBO->takeName(V: Op1);
551 replaceOperand(I, OpNum: 0, V: NewBO);
552 replaceOperand(I, OpNum: 1, V: CRes);
553 // Conservatively clear the optional flags, since they may not be
554 // preserved by the reassociation.
555 ClearSubclassDataAfterReassociation(I);
556 if (IsNUW)
557 I.setHasNoUnsignedWrap(true);
558
559 Changed = true;
560 continue;
561 }
562 }
563
564 // No further simplifications.
565 return Changed;
566 } while (true);
567}
568
569/// Return whether "X LOp (Y ROp Z)" is always equal to
570/// "(X LOp Y) ROp (X LOp Z)".
571static bool leftDistributesOverRight(Instruction::BinaryOps LOp,
572 Instruction::BinaryOps ROp) {
573 // X & (Y | Z) <--> (X & Y) | (X & Z)
574 // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
575 if (LOp == Instruction::And)
576 return ROp == Instruction::Or || ROp == Instruction::Xor;
577
578 // X | (Y & Z) <--> (X | Y) & (X | Z)
579 if (LOp == Instruction::Or)
580 return ROp == Instruction::And;
581
582 // X * (Y + Z) <--> (X * Y) + (X * Z)
583 // X * (Y - Z) <--> (X * Y) - (X * Z)
584 if (LOp == Instruction::Mul)
585 return ROp == Instruction::Add || ROp == Instruction::Sub;
586
587 return false;
588}
589
590/// Return whether "(X LOp Y) ROp Z" is always equal to
591/// "(X ROp Z) LOp (Y ROp Z)".
592static bool rightDistributesOverLeft(Instruction::BinaryOps LOp,
593 Instruction::BinaryOps ROp) {
594 if (Instruction::isCommutative(Opcode: ROp))
595 return leftDistributesOverRight(LOp: ROp, ROp: LOp);
596
597 // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
598 return Instruction::isBitwiseLogicOp(Opcode: LOp) && Instruction::isShift(Opcode: ROp);
599
600 // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
601 // but this requires knowing that the addition does not overflow and other
602 // such subtleties.
603}
604
605/// This function returns identity value for given opcode, which can be used to
606/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
607static Value *getIdentityValue(Instruction::BinaryOps Opcode, Value *V) {
608 if (isa<Constant>(Val: V))
609 return nullptr;
610
611 return ConstantExpr::getBinOpIdentity(Opcode, Ty: V->getType());
612}
613
614/// This function predicates factorization using distributive laws. By default,
615/// it just returns the 'Op' inputs. But for special-cases like
616/// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
617/// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
618/// allow more factorization opportunities.
619static Instruction::BinaryOps
620getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op,
621 Value *&LHS, Value *&RHS, BinaryOperator *OtherOp) {
622 assert(Op && "Expected a binary operator");
623 LHS = Op->getOperand(i_nocapture: 0);
624 RHS = Op->getOperand(i_nocapture: 1);
625 if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
626 Constant *C;
627 if (match(V: Op, P: m_Shl(L: m_Value(), R: m_Constant(C)))) {
628 // X << C --> X * (1 << C)
629 RHS = ConstantExpr::getShl(C1: ConstantInt::get(Ty: Op->getType(), V: 1), C2: C);
630 return Instruction::Mul;
631 }
632 // TODO: We can add other conversions e.g. shr => div etc.
633 }
634 if (Instruction::isBitwiseLogicOp(Opcode: TopOpcode)) {
635 if (OtherOp && OtherOp->getOpcode() == Instruction::AShr &&
636 match(V: Op, P: m_LShr(L: m_NonNegative(), R: m_Value()))) {
637 // lshr nneg C, X --> ashr nneg C, X
638 return Instruction::AShr;
639 }
640 }
641 return Op->getOpcode();
642}
643
644/// This tries to simplify binary operations by factorizing out common terms
645/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
646static Value *tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ,
647 InstCombiner::BuilderTy &Builder,
648 Instruction::BinaryOps InnerOpcode, Value *A,
649 Value *B, Value *C, Value *D) {
650 assert(A && B && C && D && "All values must be provided");
651
652 Value *V = nullptr;
653 Value *RetVal = nullptr;
654 Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1);
655 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
656
657 // Does "X op' Y" always equal "Y op' X"?
658 bool InnerCommutative = Instruction::isCommutative(Opcode: InnerOpcode);
659
660 // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
661 if (leftDistributesOverRight(LOp: InnerOpcode, ROp: TopLevelOpcode)) {
662 // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
663 // commutative case, "(A op' B) op (C op' A)"?
664 if (A == C || (InnerCommutative && A == D)) {
665 if (A != C)
666 std::swap(a&: C, b&: D);
667 // Consider forming "A op' (B op D)".
668 // If "B op D" simplifies then it can be formed with no cost.
669 V = simplifyBinOp(Opcode: TopLevelOpcode, LHS: B, RHS: D, Q: SQ.getWithInstruction(I: &I));
670
671 // If "B op D" doesn't simplify then only go on if one of the existing
672 // operations "A op' B" and "C op' D" will be zapped as no longer used.
673 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
674 V = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: B, RHS: D, Name: RHS->getName());
675 if (V)
676 RetVal = Builder.CreateBinOp(Opc: InnerOpcode, LHS: A, RHS: V);
677 }
678 }
679
680 // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
681 if (!RetVal && rightDistributesOverLeft(LOp: TopLevelOpcode, ROp: InnerOpcode)) {
682 // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
683 // commutative case, "(A op' B) op (B op' D)"?
684 if (B == D || (InnerCommutative && B == C)) {
685 if (B != D)
686 std::swap(a&: C, b&: D);
687 // Consider forming "(A op C) op' B".
688 // If "A op C" simplifies then it can be formed with no cost.
689 V = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQ.getWithInstruction(I: &I));
690
691 // If "A op C" doesn't simplify then only go on if one of the existing
692 // operations "A op' B" and "C op' D" will be zapped as no longer used.
693 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
694 V = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C, Name: LHS->getName());
695 if (V)
696 RetVal = Builder.CreateBinOp(Opc: InnerOpcode, LHS: V, RHS: B);
697 }
698 }
699
700 if (!RetVal)
701 return nullptr;
702
703 ++NumFactor;
704 RetVal->takeName(V: &I);
705
706 // Try to add no-overflow flags to the final value.
707 if (isa<OverflowingBinaryOperator>(Val: RetVal)) {
708 bool HasNSW = false;
709 bool HasNUW = false;
710 if (isa<OverflowingBinaryOperator>(Val: &I)) {
711 HasNSW = I.hasNoSignedWrap();
712 HasNUW = I.hasNoUnsignedWrap();
713 }
714 if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(Val: LHS)) {
715 HasNSW &= LOBO->hasNoSignedWrap();
716 HasNUW &= LOBO->hasNoUnsignedWrap();
717 }
718
719 if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(Val: RHS)) {
720 HasNSW &= ROBO->hasNoSignedWrap();
721 HasNUW &= ROBO->hasNoUnsignedWrap();
722 }
723
724 if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
725 // We can propagate 'nsw' if we know that
726 // %Y = mul nsw i16 %X, C
727 // %Z = add nsw i16 %Y, %X
728 // =>
729 // %Z = mul nsw i16 %X, C+1
730 //
731 // iff C+1 isn't INT_MIN
732 const APInt *CInt;
733 if (match(V, P: m_APInt(Res&: CInt)) && !CInt->isMinSignedValue())
734 cast<Instruction>(Val: RetVal)->setHasNoSignedWrap(HasNSW);
735
736 // nuw can be propagated with any constant or nuw value.
737 cast<Instruction>(Val: RetVal)->setHasNoUnsignedWrap(HasNUW);
738 }
739 }
740 return RetVal;
741}
742
743// If `I` has one Const operand and the other matches `(ctpop (not x))`,
744// replace `(ctpop (not x))` with `(sub nuw nsw BitWidth(x), (ctpop x))`.
745// This is only useful is the new subtract can fold so we only handle the
746// following cases:
747// 1) (add/sub/disjoint_or C, (ctpop (not x))
748// -> (add/sub/disjoint_or C', (ctpop x))
749// 1) (cmp pred C, (ctpop (not x))
750// -> (cmp pred C', (ctpop x))
751Instruction *InstCombinerImpl::tryFoldInstWithCtpopWithNot(Instruction *I) {
752 unsigned Opc = I->getOpcode();
753 unsigned ConstIdx = 1;
754 switch (Opc) {
755 default:
756 return nullptr;
757 // (ctpop (not x)) <-> (sub nuw nsw BitWidth(x) - (ctpop x))
758 // We can fold the BitWidth(x) with add/sub/icmp as long the other operand
759 // is constant.
760 case Instruction::Sub:
761 ConstIdx = 0;
762 break;
763 case Instruction::ICmp:
764 // Signed predicates aren't correct in some edge cases like for i2 types, as
765 // well since (ctpop x) is known [0, log2(BitWidth(x))] almost all signed
766 // comparisons against it are simplfied to unsigned.
767 if (cast<ICmpInst>(Val: I)->isSigned())
768 return nullptr;
769 break;
770 case Instruction::Or:
771 if (!match(V: I, P: m_DisjointOr(L: m_Value(), R: m_Value())))
772 return nullptr;
773 [[fallthrough]];
774 case Instruction::Add:
775 break;
776 }
777
778 Value *Op;
779 // Find ctpop.
780 if (!match(I->getOperand(1 - ConstIdx),
781 m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(Op)))))
782 return nullptr;
783
784 Constant *C;
785 // Check other operand is ImmConstant.
786 if (!match(V: I->getOperand(i: ConstIdx), P: m_ImmConstant(C)))
787 return nullptr;
788
789 Type *Ty = Op->getType();
790 Constant *BitWidthC = ConstantInt::get(Ty, V: Ty->getScalarSizeInBits());
791 // Need extra check for icmp. Note if this check is true, it generally means
792 // the icmp will simplify to true/false.
793 if (Opc == Instruction::ICmp && !cast<ICmpInst>(Val: I)->isEquality() &&
794 !ConstantExpr::getICmp(pred: ICmpInst::ICMP_UGT, LHS: C, RHS: BitWidthC)->isZeroValue())
795 return nullptr;
796
797 // Check we can invert `(not x)` for free.
798 bool Consumes = false;
799 if (!isFreeToInvert(V: Op, WillInvertAllUses: Op->hasOneUse(), DoesConsume&: Consumes) || !Consumes)
800 return nullptr;
801 Value *NotOp = getFreelyInverted(V: Op, WillInvertAllUses: Op->hasOneUse(), Builder: &Builder);
802 assert(NotOp != nullptr &&
803 "Desync between isFreeToInvert and getFreelyInverted");
804
805 Value *CtpopOfNotOp = Builder.CreateIntrinsic(Ty, Intrinsic::ctpop, NotOp);
806
807 Value *R = nullptr;
808
809 // Do the transformation here to avoid potentially introducing an infinite
810 // loop.
811 switch (Opc) {
812 case Instruction::Sub:
813 R = Builder.CreateAdd(LHS: CtpopOfNotOp, RHS: ConstantExpr::getSub(C1: C, C2: BitWidthC));
814 break;
815 case Instruction::Or:
816 case Instruction::Add:
817 R = Builder.CreateSub(LHS: ConstantExpr::getAdd(C1: C, C2: BitWidthC), RHS: CtpopOfNotOp);
818 break;
819 case Instruction::ICmp:
820 R = Builder.CreateICmp(P: cast<ICmpInst>(Val: I)->getSwappedPredicate(),
821 LHS: CtpopOfNotOp, RHS: ConstantExpr::getSub(C1: BitWidthC, C2: C));
822 break;
823 default:
824 llvm_unreachable("Unhandled Opcode");
825 }
826 assert(R != nullptr);
827 return replaceInstUsesWith(I&: *I, V: R);
828}
829
830// (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
831// IFF
832// 1) the logic_shifts match
833// 2) either both binops are binops and one is `and` or
834// BinOp1 is `and`
835// (logic_shift (inv_logic_shift C1, C), C) == C1 or
836//
837// -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
838//
839// (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
840// IFF
841// 1) the logic_shifts match
842// 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`).
843//
844// -> (BinOp (logic_shift (BinOp X, Y)), Mask)
845//
846// (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt))
847// IFF
848// 1) Binop1 is bitwise logical operator `and`, `or` or `xor`
849// 2) Binop2 is `not`
850//
851// -> (arithmetic_shift Binop1((not X), Y), Amt)
852
853Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) {
854 const DataLayout &DL = I.getModule()->getDataLayout();
855 auto IsValidBinOpc = [](unsigned Opc) {
856 switch (Opc) {
857 default:
858 return false;
859 case Instruction::And:
860 case Instruction::Or:
861 case Instruction::Xor:
862 case Instruction::Add:
863 // Skip Sub as we only match constant masks which will canonicalize to use
864 // add.
865 return true;
866 }
867 };
868
869 // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra
870 // constraints.
871 auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2,
872 unsigned ShOpc) {
873 assert(ShOpc != Instruction::AShr);
874 return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) ||
875 ShOpc == Instruction::Shl;
876 };
877
878 auto GetInvShift = [](unsigned ShOpc) {
879 assert(ShOpc != Instruction::AShr);
880 return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr;
881 };
882
883 auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2,
884 unsigned ShOpc, Constant *CMask,
885 Constant *CShift) {
886 // If the BinOp1 is `and` we don't need to check the mask.
887 if (BinOpc1 == Instruction::And)
888 return true;
889
890 // For all other possible transfers we need complete distributable
891 // binop/shift (anything but `add` + `lshr`).
892 if (!IsCompletelyDistributable(BinOpc1, BinOpc2, ShOpc))
893 return false;
894
895 // If BinOp2 is `and`, any mask works (this only really helps for non-splat
896 // vecs, otherwise the mask will be simplified and the following check will
897 // handle it).
898 if (BinOpc2 == Instruction::And)
899 return true;
900
901 // Otherwise, need mask that meets the below requirement.
902 // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask
903 Constant *MaskInvShift =
904 ConstantFoldBinaryOpOperands(Opcode: GetInvShift(ShOpc), LHS: CMask, RHS: CShift, DL);
905 return ConstantFoldBinaryOpOperands(Opcode: ShOpc, LHS: MaskInvShift, RHS: CShift, DL) ==
906 CMask;
907 };
908
909 auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * {
910 Constant *CMask, *CShift;
911 Value *X, *Y, *ShiftedX, *Mask, *Shift;
912 if (!match(V: I.getOperand(i_nocapture: ShOpnum),
913 P: m_OneUse(SubPattern: m_Shift(L: m_Value(V&: Y), R: m_Value(V&: Shift)))))
914 return nullptr;
915 if (!match(V: I.getOperand(i_nocapture: 1 - ShOpnum),
916 P: m_BinOp(L: m_Value(V&: ShiftedX), R: m_Value(V&: Mask))))
917 return nullptr;
918
919 if (!match(V: ShiftedX, P: m_OneUse(SubPattern: m_Shift(L: m_Value(V&: X), R: m_Specific(V: Shift)))))
920 return nullptr;
921
922 // Make sure we are matching instruction shifts and not ConstantExpr
923 auto *IY = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: ShOpnum));
924 auto *IX = dyn_cast<Instruction>(Val: ShiftedX);
925 if (!IY || !IX)
926 return nullptr;
927
928 // LHS and RHS need same shift opcode
929 unsigned ShOpc = IY->getOpcode();
930 if (ShOpc != IX->getOpcode())
931 return nullptr;
932
933 // Make sure binop is real instruction and not ConstantExpr
934 auto *BO2 = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: 1 - ShOpnum));
935 if (!BO2)
936 return nullptr;
937
938 unsigned BinOpc = BO2->getOpcode();
939 // Make sure we have valid binops.
940 if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc))
941 return nullptr;
942
943 if (ShOpc == Instruction::AShr) {
944 if (Instruction::isBitwiseLogicOp(Opcode: I.getOpcode()) &&
945 BinOpc == Instruction::Xor && match(V: Mask, P: m_AllOnes())) {
946 Value *NotX = Builder.CreateNot(V: X);
947 Value *NewBinOp = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: Y, RHS: NotX);
948 return BinaryOperator::Create(
949 Op: static_cast<Instruction::BinaryOps>(ShOpc), S1: NewBinOp, S2: Shift);
950 }
951
952 return nullptr;
953 }
954
955 // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just
956 // distribute to drop the shift irrelevant of constants.
957 if (BinOpc == I.getOpcode() &&
958 IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) {
959 Value *NewBinOp2 = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: X, RHS: Y);
960 Value *NewBinOp1 = Builder.CreateBinOp(
961 Opc: static_cast<Instruction::BinaryOps>(ShOpc), LHS: NewBinOp2, RHS: Shift);
962 return BinaryOperator::Create(Op: I.getOpcode(), S1: NewBinOp1, S2: Mask);
963 }
964
965 // Otherwise we can only distribute by constant shifting the mask, so
966 // ensure we have constants.
967 if (!match(V: Shift, P: m_ImmConstant(C&: CShift)))
968 return nullptr;
969 if (!match(V: Mask, P: m_ImmConstant(C&: CMask)))
970 return nullptr;
971
972 // Check if we can distribute the binops.
973 if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift))
974 return nullptr;
975
976 Constant *NewCMask =
977 ConstantFoldBinaryOpOperands(Opcode: GetInvShift(ShOpc), LHS: CMask, RHS: CShift, DL);
978 Value *NewBinOp2 = Builder.CreateBinOp(
979 Opc: static_cast<Instruction::BinaryOps>(BinOpc), LHS: X, RHS: NewCMask);
980 Value *NewBinOp1 = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: Y, RHS: NewBinOp2);
981 return BinaryOperator::Create(Op: static_cast<Instruction::BinaryOps>(ShOpc),
982 S1: NewBinOp1, S2: CShift);
983 };
984
985 if (Instruction *R = MatchBinOp(0))
986 return R;
987 return MatchBinOp(1);
988}
989
990// (Binop (zext C), (select C, T, F))
991// -> (select C, (binop 1, T), (binop 0, F))
992//
993// (Binop (sext C), (select C, T, F))
994// -> (select C, (binop -1, T), (binop 0, F))
995//
996// Attempt to simplify binary operations into a select with folded args, when
997// one operand of the binop is a select instruction and the other operand is a
998// zext/sext extension, whose value is the select condition.
999Instruction *
1000InstCombinerImpl::foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I) {
1001 // TODO: this simplification may be extended to any speculatable instruction,
1002 // not just binops, and would possibly be handled better in FoldOpIntoSelect.
1003 Instruction::BinaryOps Opc = I.getOpcode();
1004 Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1);
1005 Value *A, *CondVal, *TrueVal, *FalseVal;
1006 Value *CastOp;
1007
1008 auto MatchSelectAndCast = [&](Value *CastOp, Value *SelectOp) {
1009 return match(V: CastOp, P: m_ZExtOrSExt(Op: m_Value(V&: A))) &&
1010 A->getType()->getScalarSizeInBits() == 1 &&
1011 match(V: SelectOp, P: m_Select(C: m_Value(V&: CondVal), L: m_Value(V&: TrueVal),
1012 R: m_Value(V&: FalseVal)));
1013 };
1014
1015 // Make sure one side of the binop is a select instruction, and the other is a
1016 // zero/sign extension operating on a i1.
1017 if (MatchSelectAndCast(LHS, RHS))
1018 CastOp = LHS;
1019 else if (MatchSelectAndCast(RHS, LHS))
1020 CastOp = RHS;
1021 else
1022 return nullptr;
1023
1024 auto NewFoldedConst = [&](bool IsTrueArm, Value *V) {
1025 bool IsCastOpRHS = (CastOp == RHS);
1026 bool IsZExt = isa<ZExtInst>(Val: CastOp);
1027 Constant *C;
1028
1029 if (IsTrueArm) {
1030 C = Constant::getNullValue(Ty: V->getType());
1031 } else if (IsZExt) {
1032 unsigned BitWidth = V->getType()->getScalarSizeInBits();
1033 C = Constant::getIntegerValue(Ty: V->getType(), V: APInt(BitWidth, 1));
1034 } else {
1035 C = Constant::getAllOnesValue(Ty: V->getType());
1036 }
1037
1038 return IsCastOpRHS ? Builder.CreateBinOp(Opc, LHS: V, RHS: C)
1039 : Builder.CreateBinOp(Opc, LHS: C, RHS: V);
1040 };
1041
1042 // If the value used in the zext/sext is the select condition, or the negated
1043 // of the select condition, the binop can be simplified.
1044 if (CondVal == A) {
1045 Value *NewTrueVal = NewFoldedConst(false, TrueVal);
1046 return SelectInst::Create(C: CondVal, S1: NewTrueVal,
1047 S2: NewFoldedConst(true, FalseVal));
1048 }
1049
1050 if (match(V: A, P: m_Not(V: m_Specific(V: CondVal)))) {
1051 Value *NewTrueVal = NewFoldedConst(true, TrueVal);
1052 return SelectInst::Create(C: CondVal, S1: NewTrueVal,
1053 S2: NewFoldedConst(false, FalseVal));
1054 }
1055
1056 return nullptr;
1057}
1058
1059Value *InstCombinerImpl::tryFactorizationFolds(BinaryOperator &I) {
1060 Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1);
1061 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: LHS);
1062 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: RHS);
1063 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1064 Value *A, *B, *C, *D;
1065 Instruction::BinaryOps LHSOpcode, RHSOpcode;
1066
1067 if (Op0)
1068 LHSOpcode = getBinOpsForFactorization(TopOpcode: TopLevelOpcode, Op: Op0, LHS&: A, RHS&: B, OtherOp: Op1);
1069 if (Op1)
1070 RHSOpcode = getBinOpsForFactorization(TopOpcode: TopLevelOpcode, Op: Op1, LHS&: C, RHS&: D, OtherOp: Op0);
1071
1072 // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
1073 // a common term.
1074 if (Op0 && Op1 && LHSOpcode == RHSOpcode)
1075 if (Value *V = tryFactorization(I, SQ, Builder, InnerOpcode: LHSOpcode, A, B, C, D))
1076 return V;
1077
1078 // The instruction has the form "(A op' B) op (C)". Try to factorize common
1079 // term.
1080 if (Op0)
1081 if (Value *Ident = getIdentityValue(Opcode: LHSOpcode, V: RHS))
1082 if (Value *V =
1083 tryFactorization(I, SQ, Builder, InnerOpcode: LHSOpcode, A, B, C: RHS, D: Ident))
1084 return V;
1085
1086 // The instruction has the form "(B) op (C op' D)". Try to factorize common
1087 // term.
1088 if (Op1)
1089 if (Value *Ident = getIdentityValue(Opcode: RHSOpcode, V: LHS))
1090 if (Value *V =
1091 tryFactorization(I, SQ, Builder, InnerOpcode: RHSOpcode, A: LHS, B: Ident, C, D))
1092 return V;
1093
1094 return nullptr;
1095}
1096
1097/// This tries to simplify binary operations which some other binary operation
1098/// distributes over either by factorizing out common terms
1099/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
1100/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
1101/// Returns the simplified value, or null if it didn't simplify.
1102Value *InstCombinerImpl::foldUsingDistributiveLaws(BinaryOperator &I) {
1103 Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1);
1104 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: LHS);
1105 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: RHS);
1106 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1107
1108 // Factorization.
1109 if (Value *R = tryFactorizationFolds(I))
1110 return R;
1111
1112 // Expansion.
1113 if (Op0 && rightDistributesOverLeft(LOp: Op0->getOpcode(), ROp: TopLevelOpcode)) {
1114 // The instruction has the form "(A op' B) op C". See if expanding it out
1115 // to "(A op C) op' (B op C)" results in simplifications.
1116 Value *A = Op0->getOperand(i_nocapture: 0), *B = Op0->getOperand(i_nocapture: 1), *C = RHS;
1117 Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
1118
1119 // Disable the use of undef because it's not safe to distribute undef.
1120 auto SQDistributive = SQ.getWithInstruction(I: &I).getWithoutUndef();
1121 Value *L = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQDistributive);
1122 Value *R = simplifyBinOp(Opcode: TopLevelOpcode, LHS: B, RHS: C, Q: SQDistributive);
1123
1124 // Do "A op C" and "B op C" both simplify?
1125 if (L && R) {
1126 // They do! Return "L op' R".
1127 ++NumExpand;
1128 C = Builder.CreateBinOp(Opc: InnerOpcode, LHS: L, RHS: R);
1129 C->takeName(V: &I);
1130 return C;
1131 }
1132
1133 // Does "A op C" simplify to the identity value for the inner opcode?
1134 if (L && L == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: L->getType())) {
1135 // They do! Return "B op C".
1136 ++NumExpand;
1137 C = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: B, RHS: C);
1138 C->takeName(V: &I);
1139 return C;
1140 }
1141
1142 // Does "B op C" simplify to the identity value for the inner opcode?
1143 if (R && R == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: R->getType())) {
1144 // They do! Return "A op C".
1145 ++NumExpand;
1146 C = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C);
1147 C->takeName(V: &I);
1148 return C;
1149 }
1150 }
1151
1152 if (Op1 && leftDistributesOverRight(LOp: TopLevelOpcode, ROp: Op1->getOpcode())) {
1153 // The instruction has the form "A op (B op' C)". See if expanding it out
1154 // to "(A op B) op' (A op C)" results in simplifications.
1155 Value *A = LHS, *B = Op1->getOperand(i_nocapture: 0), *C = Op1->getOperand(i_nocapture: 1);
1156 Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
1157
1158 // Disable the use of undef because it's not safe to distribute undef.
1159 auto SQDistributive = SQ.getWithInstruction(I: &I).getWithoutUndef();
1160 Value *L = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: B, Q: SQDistributive);
1161 Value *R = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQDistributive);
1162
1163 // Do "A op B" and "A op C" both simplify?
1164 if (L && R) {
1165 // They do! Return "L op' R".
1166 ++NumExpand;
1167 A = Builder.CreateBinOp(Opc: InnerOpcode, LHS: L, RHS: R);
1168 A->takeName(V: &I);
1169 return A;
1170 }
1171
1172 // Does "A op B" simplify to the identity value for the inner opcode?
1173 if (L && L == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: L->getType())) {
1174 // They do! Return "A op C".
1175 ++NumExpand;
1176 A = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C);
1177 A->takeName(V: &I);
1178 return A;
1179 }
1180
1181 // Does "A op C" simplify to the identity value for the inner opcode?
1182 if (R && R == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: R->getType())) {
1183 // They do! Return "A op B".
1184 ++NumExpand;
1185 A = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: B);
1186 A->takeName(V: &I);
1187 return A;
1188 }
1189 }
1190
1191 return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
1192}
1193
1194static std::optional<std::pair<Value *, Value *>>
1195matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS) {
1196 if (LHS->getParent() != RHS->getParent())
1197 return std::nullopt;
1198
1199 if (LHS->getNumIncomingValues() < 2)
1200 return std::nullopt;
1201
1202 if (!equal(LRange: LHS->blocks(), RRange: RHS->blocks()))
1203 return std::nullopt;
1204
1205 Value *L0 = LHS->getIncomingValue(i: 0);
1206 Value *R0 = RHS->getIncomingValue(i: 0);
1207
1208 for (unsigned I = 1, E = LHS->getNumIncomingValues(); I != E; ++I) {
1209 Value *L1 = LHS->getIncomingValue(i: I);
1210 Value *R1 = RHS->getIncomingValue(i: I);
1211
1212 if ((L0 == L1 && R0 == R1) || (L0 == R1 && R0 == L1))
1213 continue;
1214
1215 return std::nullopt;
1216 }
1217
1218 return std::optional(std::pair(L0, R0));
1219}
1220
1221std::optional<std::pair<Value *, Value *>>
1222InstCombinerImpl::matchSymmetricPair(Value *LHS, Value *RHS) {
1223 Instruction *LHSInst = dyn_cast<Instruction>(Val: LHS);
1224 Instruction *RHSInst = dyn_cast<Instruction>(Val: RHS);
1225 if (!LHSInst || !RHSInst || LHSInst->getOpcode() != RHSInst->getOpcode())
1226 return std::nullopt;
1227 switch (LHSInst->getOpcode()) {
1228 case Instruction::PHI:
1229 return matchSymmetricPhiNodesPair(LHS: cast<PHINode>(Val: LHS), RHS: cast<PHINode>(Val: RHS));
1230 case Instruction::Select: {
1231 Value *Cond = LHSInst->getOperand(i: 0);
1232 Value *TrueVal = LHSInst->getOperand(i: 1);
1233 Value *FalseVal = LHSInst->getOperand(i: 2);
1234 if (Cond == RHSInst->getOperand(i: 0) && TrueVal == RHSInst->getOperand(i: 2) &&
1235 FalseVal == RHSInst->getOperand(i: 1))
1236 return std::pair(TrueVal, FalseVal);
1237 return std::nullopt;
1238 }
1239 case Instruction::Call: {
1240 // Match min(a, b) and max(a, b)
1241 MinMaxIntrinsic *LHSMinMax = dyn_cast<MinMaxIntrinsic>(Val: LHSInst);
1242 MinMaxIntrinsic *RHSMinMax = dyn_cast<MinMaxIntrinsic>(Val: RHSInst);
1243 if (LHSMinMax && RHSMinMax &&
1244 LHSMinMax->getPredicate() ==
1245 ICmpInst::getSwappedPredicate(pred: RHSMinMax->getPredicate()) &&
1246 ((LHSMinMax->getLHS() == RHSMinMax->getLHS() &&
1247 LHSMinMax->getRHS() == RHSMinMax->getRHS()) ||
1248 (LHSMinMax->getLHS() == RHSMinMax->getRHS() &&
1249 LHSMinMax->getRHS() == RHSMinMax->getLHS())))
1250 return std::pair(LHSMinMax->getLHS(), LHSMinMax->getRHS());
1251 return std::nullopt;
1252 }
1253 default:
1254 return std::nullopt;
1255 }
1256}
1257
1258Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
1259 Value *LHS,
1260 Value *RHS) {
1261 Value *A, *B, *C, *D, *E, *F;
1262 bool LHSIsSelect = match(V: LHS, P: m_Select(C: m_Value(V&: A), L: m_Value(V&: B), R: m_Value(V&: C)));
1263 bool RHSIsSelect = match(V: RHS, P: m_Select(C: m_Value(V&: D), L: m_Value(V&: E), R: m_Value(V&: F)));
1264 if (!LHSIsSelect && !RHSIsSelect)
1265 return nullptr;
1266
1267 FastMathFlags FMF;
1268 BuilderTy::FastMathFlagGuard Guard(Builder);
1269 if (isa<FPMathOperator>(Val: &I)) {
1270 FMF = I.getFastMathFlags();
1271 Builder.setFastMathFlags(FMF);
1272 }
1273
1274 Instruction::BinaryOps Opcode = I.getOpcode();
1275 SimplifyQuery Q = SQ.getWithInstruction(I: &I);
1276
1277 Value *Cond, *True = nullptr, *False = nullptr;
1278
1279 // Special-case for add/negate combination. Replace the zero in the negation
1280 // with the trailing add operand:
1281 // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
1282 // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
1283 auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * {
1284 // We need an 'add' and exactly 1 arm of the select to have been simplified.
1285 if (Opcode != Instruction::Add || (!True && !False) || (True && False))
1286 return nullptr;
1287
1288 Value *N;
1289 if (True && match(V: FVal, P: m_Neg(V: m_Value(V&: N)))) {
1290 Value *Sub = Builder.CreateSub(LHS: Z, RHS: N);
1291 return Builder.CreateSelect(C: Cond, True, False: Sub, Name: I.getName());
1292 }
1293 if (False && match(V: TVal, P: m_Neg(V: m_Value(V&: N)))) {
1294 Value *Sub = Builder.CreateSub(LHS: Z, RHS: N);
1295 return Builder.CreateSelect(C: Cond, True: Sub, False, Name: I.getName());
1296 }
1297 return nullptr;
1298 };
1299
1300 if (LHSIsSelect && RHSIsSelect && A == D) {
1301 // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
1302 Cond = A;
1303 True = simplifyBinOp(Opcode, LHS: B, RHS: E, FMF, Q);
1304 False = simplifyBinOp(Opcode, LHS: C, RHS: F, FMF, Q);
1305
1306 if (LHS->hasOneUse() && RHS->hasOneUse()) {
1307 if (False && !True)
1308 True = Builder.CreateBinOp(Opc: Opcode, LHS: B, RHS: E);
1309 else if (True && !False)
1310 False = Builder.CreateBinOp(Opc: Opcode, LHS: C, RHS: F);
1311 }
1312 } else if (LHSIsSelect && LHS->hasOneUse()) {
1313 // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
1314 Cond = A;
1315 True = simplifyBinOp(Opcode, LHS: B, RHS, FMF, Q);
1316 False = simplifyBinOp(Opcode, LHS: C, RHS, FMF, Q);
1317 if (Value *NewSel = foldAddNegate(B, C, RHS))
1318 return NewSel;
1319 } else if (RHSIsSelect && RHS->hasOneUse()) {
1320 // X op (D ? E : F) -> D ? (X op E) : (X op F)
1321 Cond = D;
1322 True = simplifyBinOp(Opcode, LHS, RHS: E, FMF, Q);
1323 False = simplifyBinOp(Opcode, LHS, RHS: F, FMF, Q);
1324 if (Value *NewSel = foldAddNegate(E, F, LHS))
1325 return NewSel;
1326 }
1327
1328 if (!True || !False)
1329 return nullptr;
1330
1331 Value *SI = Builder.CreateSelect(C: Cond, True, False);
1332 SI->takeName(V: &I);
1333 return SI;
1334}
1335
1336/// Freely adapt every user of V as-if V was changed to !V.
1337/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
1338void InstCombinerImpl::freelyInvertAllUsersOf(Value *I, Value *IgnoredUser) {
1339 assert(!isa<Constant>(I) && "Shouldn't invert users of constant");
1340 for (User *U : make_early_inc_range(Range: I->users())) {
1341 if (U == IgnoredUser)
1342 continue; // Don't consider this user.
1343 switch (cast<Instruction>(Val: U)->getOpcode()) {
1344 case Instruction::Select: {
1345 auto *SI = cast<SelectInst>(Val: U);
1346 SI->swapValues();
1347 SI->swapProfMetadata();
1348 break;
1349 }
1350 case Instruction::Br: {
1351 BranchInst *BI = cast<BranchInst>(Val: U);
1352 BI->swapSuccessors(); // swaps prof metadata too
1353 if (BPI)
1354 BPI->swapSuccEdgesProbabilities(Src: BI->getParent());
1355 break;
1356 }
1357 case Instruction::Xor:
1358 replaceInstUsesWith(I&: cast<Instruction>(Val&: *U), V: I);
1359 // Add to worklist for DCE.
1360 addToWorklist(I: cast<Instruction>(Val: U));
1361 break;
1362 default:
1363 llvm_unreachable("Got unexpected user - out of sync with "
1364 "canFreelyInvertAllUsersOf() ?");
1365 }
1366 }
1367}
1368
1369/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
1370/// constant zero (which is the 'negate' form).
1371Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
1372 Value *NegV;
1373 if (match(V, P: m_Neg(V: m_Value(V&: NegV))))
1374 return NegV;
1375
1376 // Constants can be considered to be negated values if they can be folded.
1377 if (ConstantInt *C = dyn_cast<ConstantInt>(Val: V))
1378 return ConstantExpr::getNeg(C);
1379
1380 if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(Val: V))
1381 if (C->getType()->getElementType()->isIntegerTy())
1382 return ConstantExpr::getNeg(C);
1383
1384 if (ConstantVector *CV = dyn_cast<ConstantVector>(Val: V)) {
1385 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1386 Constant *Elt = CV->getAggregateElement(Elt: i);
1387 if (!Elt)
1388 return nullptr;
1389
1390 if (isa<UndefValue>(Val: Elt))
1391 continue;
1392
1393 if (!isa<ConstantInt>(Val: Elt))
1394 return nullptr;
1395 }
1396 return ConstantExpr::getNeg(C: CV);
1397 }
1398
1399 // Negate integer vector splats.
1400 if (auto *CV = dyn_cast<Constant>(Val: V))
1401 if (CV->getType()->isVectorTy() &&
1402 CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
1403 return ConstantExpr::getNeg(C: CV);
1404
1405 return nullptr;
1406}
1407
1408// Try to fold:
1409// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1410// -> ({s|u}itofp (int_binop x, y))
1411// 2) (fp_binop ({s|u}itofp x), FpC)
1412// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1413//
1414// Assuming the sign of the cast for x/y is `OpsFromSigned`.
1415Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
1416 BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, 2> IntOps,
1417 Constant *Op1FpC, SmallVectorImpl<WithCache<const Value *>> &OpsKnown) {
1418
1419 Type *FPTy = BO.getType();
1420 Type *IntTy = IntOps[0]->getType();
1421
1422 unsigned IntSz = IntTy->getScalarSizeInBits();
1423 // This is the maximum number of inuse bits by the integer where the int -> fp
1424 // casts are exact.
1425 unsigned MaxRepresentableBits =
1426 APFloat::semanticsPrecision(FPTy->getScalarType()->getFltSemantics());
1427
1428 // Preserve known number of leading bits. This can allow us to trivial nsw/nuw
1429 // checks later on.
1430 unsigned NumUsedLeadingBits[2] = {IntSz, IntSz};
1431
1432 // NB: This only comes up if OpsFromSigned is true, so there is no need to
1433 // cache if between calls to `foldFBinOpOfIntCastsFromSign`.
1434 auto IsNonZero = [&](unsigned OpNo) -> bool {
1435 if (OpsKnown[OpNo].hasKnownBits() &&
1436 OpsKnown[OpNo].getKnownBits(Q: SQ).isNonZero())
1437 return true;
1438 return isKnownNonZero(V: IntOps[OpNo], Q: SQ);
1439 };
1440
1441 auto IsNonNeg = [&](unsigned OpNo) -> bool {
1442 // NB: This matches the impl in ValueTracking, we just try to use cached
1443 // knownbits here. If we ever start supporting WithCache for
1444 // `isKnownNonNegative`, change this to an explicit call.
1445 return OpsKnown[OpNo].getKnownBits(Q: SQ).isNonNegative();
1446 };
1447
1448 // Check if we know for certain that ({s|u}itofp op) is exact.
1449 auto IsValidPromotion = [&](unsigned OpNo) -> bool {
1450 // Can we treat this operand as the desired sign?
1451 if (OpsFromSigned != isa<SIToFPInst>(Val: BO.getOperand(i_nocapture: OpNo)) &&
1452 !IsNonNeg(OpNo))
1453 return false;
1454
1455 // If fp precision >= bitwidth(op) then its exact.
1456 // NB: This is slightly conservative for `sitofp`. For signed conversion, we
1457 // can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be
1458 // handled specially. We can't, however, increase the bound arbitrarily for
1459 // `sitofp` as for larger sizes, it won't sign extend.
1460 if (MaxRepresentableBits < IntSz) {
1461 // Otherwise if its signed cast check that fp precisions >= bitwidth(op) -
1462 // numSignBits(op).
1463 // TODO: If we add support for `WithCache` in `ComputeNumSignBits`, change
1464 // `IntOps[OpNo]` arguments to `KnownOps[OpNo]`.
1465 if (OpsFromSigned)
1466 NumUsedLeadingBits[OpNo] = IntSz - ComputeNumSignBits(Op: IntOps[OpNo]);
1467 // Finally for unsigned check that fp precision >= bitwidth(op) -
1468 // numLeadingZeros(op).
1469 else {
1470 NumUsedLeadingBits[OpNo] =
1471 IntSz - OpsKnown[OpNo].getKnownBits(Q: SQ).countMinLeadingZeros();
1472 }
1473 }
1474 // NB: We could also check if op is known to be a power of 2 or zero (which
1475 // will always be representable). Its unlikely, however, that is we are
1476 // unable to bound op in any way we will be able to pass the overflow checks
1477 // later on.
1478
1479 if (MaxRepresentableBits < NumUsedLeadingBits[OpNo])
1480 return false;
1481 // Signed + Mul also requires that op is non-zero to avoid -0 cases.
1482 return !OpsFromSigned || BO.getOpcode() != Instruction::FMul ||
1483 IsNonZero(OpNo);
1484 };
1485
1486 // If we have a constant rhs, see if we can losslessly convert it to an int.
1487 if (Op1FpC != nullptr) {
1488 // Signed + Mul req non-zero
1489 if (OpsFromSigned && BO.getOpcode() == Instruction::FMul &&
1490 !match(V: Op1FpC, P: m_NonZeroFP()))
1491 return nullptr;
1492
1493 Constant *Op1IntC = ConstantFoldCastOperand(
1494 Opcode: OpsFromSigned ? Instruction::FPToSI : Instruction::FPToUI, C: Op1FpC,
1495 DestTy: IntTy, DL);
1496 if (Op1IntC == nullptr)
1497 return nullptr;
1498 if (ConstantFoldCastOperand(Opcode: OpsFromSigned ? Instruction::SIToFP
1499 : Instruction::UIToFP,
1500 C: Op1IntC, DestTy: FPTy, DL) != Op1FpC)
1501 return nullptr;
1502
1503 // First try to keep sign of cast the same.
1504 IntOps[1] = Op1IntC;
1505 }
1506
1507 // Ensure lhs/rhs integer types match.
1508 if (IntTy != IntOps[1]->getType())
1509 return nullptr;
1510
1511 if (Op1FpC == nullptr) {
1512 if (!IsValidPromotion(1))
1513 return nullptr;
1514 }
1515 if (!IsValidPromotion(0))
1516 return nullptr;
1517
1518 // Final we check if the integer version of the binop will not overflow.
1519 BinaryOperator::BinaryOps IntOpc;
1520 // Because of the precision check, we can often rule out overflows.
1521 bool NeedsOverflowCheck = true;
1522 // Try to conservatively rule out overflow based on the already done precision
1523 // checks.
1524 unsigned OverflowMaxOutputBits = OpsFromSigned ? 2 : 1;
1525 unsigned OverflowMaxCurBits =
1526 std::max(a: NumUsedLeadingBits[0], b: NumUsedLeadingBits[1]);
1527 bool OutputSigned = OpsFromSigned;
1528 switch (BO.getOpcode()) {
1529 case Instruction::FAdd:
1530 IntOpc = Instruction::Add;
1531 OverflowMaxOutputBits += OverflowMaxCurBits;
1532 break;
1533 case Instruction::FSub:
1534 IntOpc = Instruction::Sub;
1535 OverflowMaxOutputBits += OverflowMaxCurBits;
1536 break;
1537 case Instruction::FMul:
1538 IntOpc = Instruction::Mul;
1539 OverflowMaxOutputBits += OverflowMaxCurBits * 2;
1540 break;
1541 default:
1542 llvm_unreachable("Unsupported binop");
1543 }
1544 // The precision check may have already ruled out overflow.
1545 if (OverflowMaxOutputBits < IntSz) {
1546 NeedsOverflowCheck = false;
1547 // We can bound unsigned overflow from sub to in range signed value (this is
1548 // what allows us to avoid the overflow check for sub).
1549 if (IntOpc == Instruction::Sub)
1550 OutputSigned = true;
1551 }
1552
1553 // Precision check did not rule out overflow, so need to check.
1554 // TODO: If we add support for `WithCache` in `willNotOverflow`, change
1555 // `IntOps[...]` arguments to `KnownOps[...]`.
1556 if (NeedsOverflowCheck &&
1557 !willNotOverflow(Opcode: IntOpc, LHS: IntOps[0], RHS: IntOps[1], CxtI: BO, IsSigned: OutputSigned))
1558 return nullptr;
1559
1560 Value *IntBinOp = Builder.CreateBinOp(Opc: IntOpc, LHS: IntOps[0], RHS: IntOps[1]);
1561 if (auto *IntBO = dyn_cast<BinaryOperator>(Val: IntBinOp)) {
1562 IntBO->setHasNoSignedWrap(OutputSigned);
1563 IntBO->setHasNoUnsignedWrap(!OutputSigned);
1564 }
1565 if (OutputSigned)
1566 return new SIToFPInst(IntBinOp, FPTy);
1567 return new UIToFPInst(IntBinOp, FPTy);
1568}
1569
1570// Try to fold:
1571// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1572// -> ({s|u}itofp (int_binop x, y))
1573// 2) (fp_binop ({s|u}itofp x), FpC)
1574// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1575Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
1576 std::array<Value *, 2> IntOps = {nullptr, nullptr};
1577 Constant *Op1FpC = nullptr;
1578 // Check for:
1579 // 1) (binop ({s|u}itofp x), ({s|u}itofp y))
1580 // 2) (binop ({s|u}itofp x), FpC)
1581 if (!match(V: BO.getOperand(i_nocapture: 0), P: m_SIToFP(Op: m_Value(V&: IntOps[0]))) &&
1582 !match(V: BO.getOperand(i_nocapture: 0), P: m_UIToFP(Op: m_Value(V&: IntOps[0]))))
1583 return nullptr;
1584
1585 if (!match(V: BO.getOperand(i_nocapture: 1), P: m_Constant(C&: Op1FpC)) &&
1586 !match(V: BO.getOperand(i_nocapture: 1), P: m_SIToFP(Op: m_Value(V&: IntOps[1]))) &&
1587 !match(V: BO.getOperand(i_nocapture: 1), P: m_UIToFP(Op: m_Value(V&: IntOps[1]))))
1588 return nullptr;
1589
1590 // Cache KnownBits a bit to potentially save some analysis.
1591 SmallVector<WithCache<const Value *>, 2> OpsKnown = {IntOps[0], IntOps[1]};
1592
1593 // Try treating x/y as coming from both `uitofp` and `sitofp`. There are
1594 // different constraints depending on the sign of the cast.
1595 // NB: `(uitofp nneg X)` == `(sitofp nneg X)`.
1596 if (Instruction *R = foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/false,
1597 IntOps, Op1FpC, OpsKnown))
1598 return R;
1599 return foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/true, IntOps,
1600 Op1FpC, OpsKnown);
1601}
1602
1603/// A binop with a constant operand and a sign-extended boolean operand may be
1604/// converted into a select of constants by applying the binary operation to
1605/// the constant with the two possible values of the extended boolean (0 or -1).
1606Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
1607 // TODO: Handle non-commutative binop (constant is operand 0).
1608 // TODO: Handle zext.
1609 // TODO: Peek through 'not' of cast.
1610 Value *BO0 = BO.getOperand(i_nocapture: 0);
1611 Value *BO1 = BO.getOperand(i_nocapture: 1);
1612 Value *X;
1613 Constant *C;
1614 if (!match(V: BO0, P: m_SExt(Op: m_Value(V&: X))) || !match(V: BO1, P: m_ImmConstant(C)) ||
1615 !X->getType()->isIntOrIntVectorTy(BitWidth: 1))
1616 return nullptr;
1617
1618 // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1619 Constant *Ones = ConstantInt::getAllOnesValue(Ty: BO.getType());
1620 Constant *Zero = ConstantInt::getNullValue(Ty: BO.getType());
1621 Value *TVal = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Ones, RHS: C);
1622 Value *FVal = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Zero, RHS: C);
1623 return SelectInst::Create(C: X, S1: TVal, S2: FVal);
1624}
1625
1626static Constant *constantFoldOperationIntoSelectOperand(Instruction &I,
1627 SelectInst *SI,
1628 bool IsTrueArm) {
1629 SmallVector<Constant *> ConstOps;
1630 for (Value *Op : I.operands()) {
1631 CmpInst::Predicate Pred;
1632 Constant *C = nullptr;
1633 if (Op == SI) {
1634 C = dyn_cast<Constant>(Val: IsTrueArm ? SI->getTrueValue()
1635 : SI->getFalseValue());
1636 } else if (match(V: SI->getCondition(),
1637 P: m_ICmp(Pred, L: m_Specific(V: Op), R: m_Constant(C))) &&
1638 Pred == (IsTrueArm ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) &&
1639 isGuaranteedNotToBeUndefOrPoison(V: C)) {
1640 // Pass
1641 } else {
1642 C = dyn_cast<Constant>(Val: Op);
1643 }
1644 if (C == nullptr)
1645 return nullptr;
1646
1647 ConstOps.push_back(Elt: C);
1648 }
1649
1650 return ConstantFoldInstOperands(I: &I, Ops: ConstOps, DL: I.getModule()->getDataLayout());
1651}
1652
1653static Value *foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI,
1654 Value *NewOp, InstCombiner &IC) {
1655 Instruction *Clone = I.clone();
1656 Clone->replaceUsesOfWith(From: SI, To: NewOp);
1657 Clone->dropUBImplyingAttrsAndMetadata();
1658 IC.InsertNewInstBefore(New: Clone, Old: SI->getIterator());
1659 return Clone;
1660}
1661
1662Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
1663 bool FoldWithMultiUse) {
1664 // Don't modify shared select instructions unless set FoldWithMultiUse
1665 if (!SI->hasOneUse() && !FoldWithMultiUse)
1666 return nullptr;
1667
1668 Value *TV = SI->getTrueValue();
1669 Value *FV = SI->getFalseValue();
1670 if (!(isa<Constant>(Val: TV) || isa<Constant>(Val: FV)))
1671 return nullptr;
1672
1673 // Bool selects with constant operands can be folded to logical ops.
1674 if (SI->getType()->isIntOrIntVectorTy(BitWidth: 1))
1675 return nullptr;
1676
1677 // Test if a FCmpInst instruction is used exclusively by a select as
1678 // part of a minimum or maximum operation. If so, refrain from doing
1679 // any other folding. This helps out other analyses which understand
1680 // non-obfuscated minimum and maximum idioms. And in this case, at
1681 // least one of the comparison operands has at least one user besides
1682 // the compare (the select), which would often largely negate the
1683 // benefit of folding anyway.
1684 if (auto *CI = dyn_cast<FCmpInst>(Val: SI->getCondition())) {
1685 if (CI->hasOneUse()) {
1686 Value *Op0 = CI->getOperand(i_nocapture: 0), *Op1 = CI->getOperand(i_nocapture: 1);
1687 if ((TV == Op0 && FV == Op1) || (FV == Op0 && TV == Op1))
1688 return nullptr;
1689 }
1690 }
1691
1692 // Make sure that one of the select arms constant folds successfully.
1693 Value *NewTV = constantFoldOperationIntoSelectOperand(I&: Op, SI, /*IsTrueArm*/ true);
1694 Value *NewFV = constantFoldOperationIntoSelectOperand(I&: Op, SI, /*IsTrueArm*/ false);
1695 if (!NewTV && !NewFV)
1696 return nullptr;
1697
1698 // Create an instruction for the arm that did not fold.
1699 if (!NewTV)
1700 NewTV = foldOperationIntoSelectOperand(I&: Op, SI, NewOp: TV, IC&: *this);
1701 if (!NewFV)
1702 NewFV = foldOperationIntoSelectOperand(I&: Op, SI, NewOp: FV, IC&: *this);
1703 return SelectInst::Create(C: SI->getCondition(), S1: NewTV, S2: NewFV, NameStr: "", InsertBefore: nullptr, MDFrom: SI);
1704}
1705
1706static Value *simplifyInstructionWithPHI(Instruction &I, PHINode *PN,
1707 Value *InValue, BasicBlock *InBB,
1708 const DataLayout &DL,
1709 const SimplifyQuery SQ) {
1710 // NB: It is a precondition of this transform that the operands be
1711 // phi translatable! This is usually trivially satisfied by limiting it
1712 // to constant ops, and for selects we do a more sophisticated check.
1713 SmallVector<Value *> Ops;
1714 for (Value *Op : I.operands()) {
1715 if (Op == PN)
1716 Ops.push_back(Elt: InValue);
1717 else
1718 Ops.push_back(Elt: Op->DoPHITranslation(CurBB: PN->getParent(), PredBB: InBB));
1719 }
1720
1721 // Don't consider the simplification successful if we get back a constant
1722 // expression. That's just an instruction in hiding.
1723 // Also reject the case where we simplify back to the phi node. We wouldn't
1724 // be able to remove it in that case.
1725 Value *NewVal = simplifyInstructionWithOperands(
1726 I: &I, NewOps: Ops, Q: SQ.getWithInstruction(I: InBB->getTerminator()));
1727 if (NewVal && NewVal != PN && !match(V: NewVal, P: m_ConstantExpr()))
1728 return NewVal;
1729
1730 // Check if incoming PHI value can be replaced with constant
1731 // based on implied condition.
1732 BranchInst *TerminatorBI = dyn_cast<BranchInst>(Val: InBB->getTerminator());
1733 const ICmpInst *ICmp = dyn_cast<ICmpInst>(Val: &I);
1734 if (TerminatorBI && TerminatorBI->isConditional() &&
1735 TerminatorBI->getSuccessor(i: 0) != TerminatorBI->getSuccessor(i: 1) && ICmp) {
1736 bool LHSIsTrue = TerminatorBI->getSuccessor(i: 0) == PN->getParent();
1737 std::optional<bool> ImpliedCond =
1738 isImpliedCondition(LHS: TerminatorBI->getCondition(), RHSPred: ICmp->getPredicate(),
1739 RHSOp0: Ops[0], RHSOp1: Ops[1], DL, LHSIsTrue);
1740 if (ImpliedCond)
1741 return ConstantInt::getBool(Ty: I.getType(), V: ImpliedCond.value());
1742 }
1743
1744 return nullptr;
1745}
1746
1747Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
1748 unsigned NumPHIValues = PN->getNumIncomingValues();
1749 if (NumPHIValues == 0)
1750 return nullptr;
1751
1752 // We normally only transform phis with a single use. However, if a PHI has
1753 // multiple uses and they are all the same operation, we can fold *all* of the
1754 // uses into the PHI.
1755 if (!PN->hasOneUse()) {
1756 // Walk the use list for the instruction, comparing them to I.
1757 for (User *U : PN->users()) {
1758 Instruction *UI = cast<Instruction>(Val: U);
1759 if (UI != &I && !I.isIdenticalTo(I: UI))
1760 return nullptr;
1761 }
1762 // Otherwise, we can replace *all* users with the new PHI we form.
1763 }
1764
1765 // Check to see whether the instruction can be folded into each phi operand.
1766 // If there is one operand that does not fold, remember the BB it is in.
1767 // If there is more than one or if *it* is a PHI, bail out.
1768 SmallVector<Value *> NewPhiValues;
1769 BasicBlock *NonSimplifiedBB = nullptr;
1770 Value *NonSimplifiedInVal = nullptr;
1771 for (unsigned i = 0; i != NumPHIValues; ++i) {
1772 Value *InVal = PN->getIncomingValue(i);
1773 BasicBlock *InBB = PN->getIncomingBlock(i);
1774
1775 if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InValue: InVal, InBB, DL, SQ)) {
1776 NewPhiValues.push_back(Elt: NewVal);
1777 continue;
1778 }
1779
1780 if (NonSimplifiedBB) return nullptr; // More than one non-simplified value.
1781
1782 NonSimplifiedBB = InBB;
1783 NonSimplifiedInVal = InVal;
1784 NewPhiValues.push_back(Elt: nullptr);
1785
1786 // If the InVal is an invoke at the end of the pred block, then we can't
1787 // insert a computation after it without breaking the edge.
1788 if (isa<InvokeInst>(Val: InVal))
1789 if (cast<Instruction>(Val: InVal)->getParent() == NonSimplifiedBB)
1790 return nullptr;
1791
1792 // If the incoming non-constant value is reachable from the phis block,
1793 // we'll push the operation across a loop backedge. This could result in
1794 // an infinite combine loop, and is generally non-profitable (especially
1795 // if the operation was originally outside the loop).
1796 if (isPotentiallyReachable(From: PN->getParent(), To: NonSimplifiedBB, ExclusionSet: nullptr, DT: &DT,
1797 LI))
1798 return nullptr;
1799 }
1800
1801 // If there is exactly one non-simplified value, we can insert a copy of the
1802 // operation in that block. However, if this is a critical edge, we would be
1803 // inserting the computation on some other paths (e.g. inside a loop). Only
1804 // do this if the pred block is unconditionally branching into the phi block.
1805 // Also, make sure that the pred block is not dead code.
1806 if (NonSimplifiedBB != nullptr) {
1807 BranchInst *BI = dyn_cast<BranchInst>(Val: NonSimplifiedBB->getTerminator());
1808 if (!BI || !BI->isUnconditional() ||
1809 !DT.isReachableFromEntry(A: NonSimplifiedBB))
1810 return nullptr;
1811 }
1812
1813 // Okay, we can do the transformation: create the new PHI node.
1814 PHINode *NewPN = PHINode::Create(Ty: I.getType(), NumReservedValues: PN->getNumIncomingValues());
1815 InsertNewInstBefore(New: NewPN, Old: PN->getIterator());
1816 NewPN->takeName(V: PN);
1817 NewPN->setDebugLoc(PN->getDebugLoc());
1818
1819 // If we are going to have to insert a new computation, do so right before the
1820 // predecessor's terminator.
1821 Instruction *Clone = nullptr;
1822 if (NonSimplifiedBB) {
1823 Clone = I.clone();
1824 for (Use &U : Clone->operands()) {
1825 if (U == PN)
1826 U = NonSimplifiedInVal;
1827 else
1828 U = U->DoPHITranslation(CurBB: PN->getParent(), PredBB: NonSimplifiedBB);
1829 }
1830 InsertNewInstBefore(New: Clone, Old: NonSimplifiedBB->getTerminator()->getIterator());
1831 }
1832
1833 for (unsigned i = 0; i != NumPHIValues; ++i) {
1834 if (NewPhiValues[i])
1835 NewPN->addIncoming(V: NewPhiValues[i], BB: PN->getIncomingBlock(i));
1836 else
1837 NewPN->addIncoming(V: Clone, BB: PN->getIncomingBlock(i));
1838 }
1839
1840 for (User *U : make_early_inc_range(Range: PN->users())) {
1841 Instruction *User = cast<Instruction>(Val: U);
1842 if (User == &I) continue;
1843 replaceInstUsesWith(I&: *User, V: NewPN);
1844 eraseInstFromFunction(I&: *User);
1845 }
1846
1847 replaceAllDbgUsesWith(From&: const_cast<PHINode &>(*PN),
1848 To&: const_cast<PHINode &>(*NewPN),
1849 DomPoint&: const_cast<PHINode &>(*PN), DT);
1850 return replaceInstUsesWith(I, V: NewPN);
1851}
1852
1853Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) {
1854 // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
1855 // we are guarding against replicating the binop in >1 predecessor.
1856 // This could miss matching a phi with 2 constant incoming values.
1857 auto *Phi0 = dyn_cast<PHINode>(Val: BO.getOperand(i_nocapture: 0));
1858 auto *Phi1 = dyn_cast<PHINode>(Val: BO.getOperand(i_nocapture: 1));
1859 if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
1860 Phi0->getNumOperands() != Phi1->getNumOperands())
1861 return nullptr;
1862
1863 // TODO: Remove the restriction for binop being in the same block as the phis.
1864 if (BO.getParent() != Phi0->getParent() ||
1865 BO.getParent() != Phi1->getParent())
1866 return nullptr;
1867
1868 // Fold if there is at least one specific constant value in phi0 or phi1's
1869 // incoming values that comes from the same block and this specific constant
1870 // value can be used to do optimization for specific binary operator.
1871 // For example:
1872 // %phi0 = phi i32 [0, %bb0], [%i, %bb1]
1873 // %phi1 = phi i32 [%j, %bb0], [0, %bb1]
1874 // %add = add i32 %phi0, %phi1
1875 // ==>
1876 // %add = phi i32 [%j, %bb0], [%i, %bb1]
1877 Constant *C = ConstantExpr::getBinOpIdentity(Opcode: BO.getOpcode(), Ty: BO.getType(),
1878 /*AllowRHSConstant*/ false);
1879 if (C) {
1880 SmallVector<Value *, 4> NewIncomingValues;
1881 auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) {
1882 auto &Phi0Use = std::get<0>(t&: T);
1883 auto &Phi1Use = std::get<1>(t&: T);
1884 if (Phi0->getIncomingBlock(U: Phi0Use) != Phi1->getIncomingBlock(U: Phi1Use))
1885 return false;
1886 Value *Phi0UseV = Phi0Use.get();
1887 Value *Phi1UseV = Phi1Use.get();
1888 if (Phi0UseV == C)
1889 NewIncomingValues.push_back(Elt: Phi1UseV);
1890 else if (Phi1UseV == C)
1891 NewIncomingValues.push_back(Elt: Phi0UseV);
1892 else
1893 return false;
1894 return true;
1895 };
1896
1897 if (all_of(Range: zip(t: Phi0->operands(), u: Phi1->operands()),
1898 P: CanFoldIncomingValuePair)) {
1899 PHINode *NewPhi =
1900 PHINode::Create(Ty: Phi0->getType(), NumReservedValues: Phi0->getNumOperands());
1901 assert(NewIncomingValues.size() == Phi0->getNumOperands() &&
1902 "The number of collected incoming values should equal the number "
1903 "of the original PHINode operands!");
1904 for (unsigned I = 0; I < Phi0->getNumOperands(); I++)
1905 NewPhi->addIncoming(V: NewIncomingValues[I], BB: Phi0->getIncomingBlock(i: I));
1906 return NewPhi;
1907 }
1908 }
1909
1910 if (Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
1911 return nullptr;
1912
1913 // Match a pair of incoming constants for one of the predecessor blocks.
1914 BasicBlock *ConstBB, *OtherBB;
1915 Constant *C0, *C1;
1916 if (match(V: Phi0->getIncomingValue(i: 0), P: m_ImmConstant(C&: C0))) {
1917 ConstBB = Phi0->getIncomingBlock(i: 0);
1918 OtherBB = Phi0->getIncomingBlock(i: 1);
1919 } else if (match(V: Phi0->getIncomingValue(i: 1), P: m_ImmConstant(C&: C0))) {
1920 ConstBB = Phi0->getIncomingBlock(i: 1);
1921 OtherBB = Phi0->getIncomingBlock(i: 0);
1922 } else {
1923 return nullptr;
1924 }
1925 if (!match(V: Phi1->getIncomingValueForBlock(BB: ConstBB), P: m_ImmConstant(C&: C1)))
1926 return nullptr;
1927
1928 // The block that we are hoisting to must reach here unconditionally.
1929 // Otherwise, we could be speculatively executing an expensive or
1930 // non-speculative op.
1931 auto *PredBlockBranch = dyn_cast<BranchInst>(Val: OtherBB->getTerminator());
1932 if (!PredBlockBranch || PredBlockBranch->isConditional() ||
1933 !DT.isReachableFromEntry(A: OtherBB))
1934 return nullptr;
1935
1936 // TODO: This check could be tightened to only apply to binops (div/rem) that
1937 // are not safe to speculatively execute. But that could allow hoisting
1938 // potentially expensive instructions (fdiv for example).
1939 for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
1940 if (!isGuaranteedToTransferExecutionToSuccessor(I: &*BBIter))
1941 return nullptr;
1942
1943 // Fold constants for the predecessor block with constant incoming values.
1944 Constant *NewC = ConstantFoldBinaryOpOperands(Opcode: BO.getOpcode(), LHS: C0, RHS: C1, DL);
1945 if (!NewC)
1946 return nullptr;
1947
1948 // Make a new binop in the predecessor block with the non-constant incoming
1949 // values.
1950 Builder.SetInsertPoint(PredBlockBranch);
1951 Value *NewBO = Builder.CreateBinOp(Opc: BO.getOpcode(),
1952 LHS: Phi0->getIncomingValueForBlock(BB: OtherBB),
1953 RHS: Phi1->getIncomingValueForBlock(BB: OtherBB));
1954 if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(Val: NewBO))
1955 NotFoldedNewBO->copyIRFlags(V: &BO);
1956
1957 // Replace the binop with a phi of the new values. The old phis are dead.
1958 PHINode *NewPhi = PHINode::Create(Ty: BO.getType(), NumReservedValues: 2);
1959 NewPhi->addIncoming(V: NewBO, BB: OtherBB);
1960 NewPhi->addIncoming(V: NewC, BB: ConstBB);
1961 return NewPhi;
1962}
1963
1964Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
1965 if (!isa<Constant>(Val: I.getOperand(i_nocapture: 1)))
1966 return nullptr;
1967
1968 if (auto *Sel = dyn_cast<SelectInst>(Val: I.getOperand(i_nocapture: 0))) {
1969 if (Instruction *NewSel = FoldOpIntoSelect(Op&: I, SI: Sel))
1970 return NewSel;
1971 } else if (auto *PN = dyn_cast<PHINode>(Val: I.getOperand(i_nocapture: 0))) {
1972 if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
1973 return NewPhi;
1974 }
1975 return nullptr;
1976}
1977
1978static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
1979 // If this GEP has only 0 indices, it is the same pointer as
1980 // Src. If Src is not a trivial GEP too, don't combine
1981 // the indices.
1982 if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
1983 !Src.hasOneUse())
1984 return false;
1985 return true;
1986}
1987
1988Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
1989 if (!isa<VectorType>(Val: Inst.getType()))
1990 return nullptr;
1991
1992 BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
1993 Value *LHS = Inst.getOperand(i_nocapture: 0), *RHS = Inst.getOperand(i_nocapture: 1);
1994 assert(cast<VectorType>(LHS->getType())->getElementCount() ==
1995 cast<VectorType>(Inst.getType())->getElementCount());
1996 assert(cast<VectorType>(RHS->getType())->getElementCount() ==
1997 cast<VectorType>(Inst.getType())->getElementCount());
1998
1999 // If both operands of the binop are vector concatenations, then perform the
2000 // narrow binop on each pair of the source operands followed by concatenation
2001 // of the results.
2002 Value *L0, *L1, *R0, *R1;
2003 ArrayRef<int> Mask;
2004 if (match(V: LHS, P: m_Shuffle(v1: m_Value(V&: L0), v2: m_Value(V&: L1), mask: m_Mask(Mask))) &&
2005 match(V: RHS, P: m_Shuffle(v1: m_Value(V&: R0), v2: m_Value(V&: R1), mask: m_SpecificMask(Mask))) &&
2006 LHS->hasOneUse() && RHS->hasOneUse() &&
2007 cast<ShuffleVectorInst>(Val: LHS)->isConcat() &&
2008 cast<ShuffleVectorInst>(Val: RHS)->isConcat()) {
2009 // This transform does not have the speculative execution constraint as
2010 // below because the shuffle is a concatenation. The new binops are
2011 // operating on exactly the same elements as the existing binop.
2012 // TODO: We could ease the mask requirement to allow different undef lanes,
2013 // but that requires an analysis of the binop-with-undef output value.
2014 Value *NewBO0 = Builder.CreateBinOp(Opc: Opcode, LHS: L0, RHS: R0);
2015 if (auto *BO = dyn_cast<BinaryOperator>(Val: NewBO0))
2016 BO->copyIRFlags(V: &Inst);
2017 Value *NewBO1 = Builder.CreateBinOp(Opc: Opcode, LHS: L1, RHS: R1);
2018 if (auto *BO = dyn_cast<BinaryOperator>(Val: NewBO1))
2019 BO->copyIRFlags(V: &Inst);
2020 return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
2021 }
2022
2023 auto createBinOpReverse = [&](Value *X, Value *Y) {
2024 Value *V = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y, Name: Inst.getName());
2025 if (auto *BO = dyn_cast<BinaryOperator>(Val: V))
2026 BO->copyIRFlags(V: &Inst);
2027 Module *M = Inst.getModule();
2028 Function *F = Intrinsic::getDeclaration(
2029 M, Intrinsic::id: experimental_vector_reverse, Tys: V->getType());
2030 return CallInst::Create(F, V);
2031 };
2032
2033 // NOTE: Reverse shuffles don't require the speculative execution protection
2034 // below because they don't affect which lanes take part in the computation.
2035
2036 Value *V1, *V2;
2037 if (match(V: LHS, P: m_VecReverse(Op0: m_Value(V&: V1)))) {
2038 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2039 if (match(V: RHS, P: m_VecReverse(Op0: m_Value(V&: V2))) &&
2040 (LHS->hasOneUse() || RHS->hasOneUse() ||
2041 (LHS == RHS && LHS->hasNUses(N: 2))))
2042 return createBinOpReverse(V1, V2);
2043
2044 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2045 if (LHS->hasOneUse() && isSplatValue(V: RHS))
2046 return createBinOpReverse(V1, RHS);
2047 }
2048 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2049 else if (isSplatValue(V: LHS) && match(V: RHS, P: m_OneUse(SubPattern: m_VecReverse(Op0: m_Value(V&: V2)))))
2050 return createBinOpReverse(LHS, V2);
2051
2052 // It may not be safe to reorder shuffles and things like div, urem, etc.
2053 // because we may trap when executing those ops on unknown vector elements.
2054 // See PR20059.
2055 if (!isSafeToSpeculativelyExecute(I: &Inst))
2056 return nullptr;
2057
2058 auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
2059 Value *XY = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y);
2060 if (auto *BO = dyn_cast<BinaryOperator>(Val: XY))
2061 BO->copyIRFlags(V: &Inst);
2062 return new ShuffleVectorInst(XY, M);
2063 };
2064
2065 // If both arguments of the binary operation are shuffles that use the same
2066 // mask and shuffle within a single vector, move the shuffle after the binop.
2067 if (match(V: LHS, P: m_Shuffle(v1: m_Value(V&: V1), v2: m_Poison(), mask: m_Mask(Mask))) &&
2068 match(V: RHS, P: m_Shuffle(v1: m_Value(V&: V2), v2: m_Poison(), mask: m_SpecificMask(Mask))) &&
2069 V1->getType() == V2->getType() &&
2070 (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
2071 // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
2072 return createBinOpShuffle(V1, V2, Mask);
2073 }
2074
2075 // If both arguments of a commutative binop are select-shuffles that use the
2076 // same mask with commuted operands, the shuffles are unnecessary.
2077 if (Inst.isCommutative() &&
2078 match(V: LHS, P: m_Shuffle(v1: m_Value(V&: V1), v2: m_Value(V&: V2), mask: m_Mask(Mask))) &&
2079 match(V: RHS,
2080 P: m_Shuffle(v1: m_Specific(V: V2), v2: m_Specific(V: V1), mask: m_SpecificMask(Mask)))) {
2081 auto *LShuf = cast<ShuffleVectorInst>(Val: LHS);
2082 auto *RShuf = cast<ShuffleVectorInst>(Val: RHS);
2083 // TODO: Allow shuffles that contain undefs in the mask?
2084 // That is legal, but it reduces undef knowledge.
2085 // TODO: Allow arbitrary shuffles by shuffling after binop?
2086 // That might be legal, but we have to deal with poison.
2087 if (LShuf->isSelect() &&
2088 !is_contained(Range: LShuf->getShuffleMask(), Element: PoisonMaskElem) &&
2089 RShuf->isSelect() &&
2090 !is_contained(Range: RShuf->getShuffleMask(), Element: PoisonMaskElem)) {
2091 // Example:
2092 // LHS = shuffle V1, V2, <0, 5, 6, 3>
2093 // RHS = shuffle V2, V1, <0, 5, 6, 3>
2094 // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
2095 Instruction *NewBO = BinaryOperator::Create(Op: Opcode, S1: V1, S2: V2);
2096 NewBO->copyIRFlags(V: &Inst);
2097 return NewBO;
2098 }
2099 }
2100
2101 // If one argument is a shuffle within one vector and the other is a constant,
2102 // try moving the shuffle after the binary operation. This canonicalization
2103 // intends to move shuffles closer to other shuffles and binops closer to
2104 // other binops, so they can be folded. It may also enable demanded elements
2105 // transforms.
2106 Constant *C;
2107 auto *InstVTy = dyn_cast<FixedVectorType>(Val: Inst.getType());
2108 if (InstVTy &&
2109 match(V: &Inst, P: m_c_BinOp(L: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: V1), v2: m_Poison(),
2110 mask: m_Mask(Mask))),
2111 R: m_ImmConstant(C))) &&
2112 cast<FixedVectorType>(Val: V1->getType())->getNumElements() <=
2113 InstVTy->getNumElements()) {
2114 assert(InstVTy->getScalarType() == V1->getType()->getScalarType() &&
2115 "Shuffle should not change scalar type");
2116
2117 // Find constant NewC that has property:
2118 // shuffle(NewC, ShMask) = C
2119 // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
2120 // reorder is not possible. A 1-to-1 mapping is not required. Example:
2121 // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
2122 bool ConstOp1 = isa<Constant>(Val: RHS);
2123 ArrayRef<int> ShMask = Mask;
2124 unsigned SrcVecNumElts =
2125 cast<FixedVectorType>(Val: V1->getType())->getNumElements();
2126 PoisonValue *PoisonScalar = PoisonValue::get(T: C->getType()->getScalarType());
2127 SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, PoisonScalar);
2128 bool MayChange = true;
2129 unsigned NumElts = InstVTy->getNumElements();
2130 for (unsigned I = 0; I < NumElts; ++I) {
2131 Constant *CElt = C->getAggregateElement(Elt: I);
2132 if (ShMask[I] >= 0) {
2133 assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
2134 Constant *NewCElt = NewVecC[ShMask[I]];
2135 // Bail out if:
2136 // 1. The constant vector contains a constant expression.
2137 // 2. The shuffle needs an element of the constant vector that can't
2138 // be mapped to a new constant vector.
2139 // 3. This is a widening shuffle that copies elements of V1 into the
2140 // extended elements (extending with poison is allowed).
2141 if (!CElt || (!isa<PoisonValue>(Val: NewCElt) && NewCElt != CElt) ||
2142 I >= SrcVecNumElts) {
2143 MayChange = false;
2144 break;
2145 }
2146 NewVecC[ShMask[I]] = CElt;
2147 }
2148 // If this is a widening shuffle, we must be able to extend with poison
2149 // elements. If the original binop does not produce a poison in the high
2150 // lanes, then this transform is not safe.
2151 // Similarly for poison lanes due to the shuffle mask, we can only
2152 // transform binops that preserve poison.
2153 // TODO: We could shuffle those non-poison constant values into the
2154 // result by using a constant vector (rather than an poison vector)
2155 // as operand 1 of the new binop, but that might be too aggressive
2156 // for target-independent shuffle creation.
2157 if (I >= SrcVecNumElts || ShMask[I] < 0) {
2158 Constant *MaybePoison =
2159 ConstOp1
2160 ? ConstantFoldBinaryOpOperands(Opcode, LHS: PoisonScalar, RHS: CElt, DL)
2161 : ConstantFoldBinaryOpOperands(Opcode, LHS: CElt, RHS: PoisonScalar, DL);
2162 if (!MaybePoison || !isa<PoisonValue>(Val: MaybePoison)) {
2163 MayChange = false;
2164 break;
2165 }
2166 }
2167 }
2168 if (MayChange) {
2169 Constant *NewC = ConstantVector::get(V: NewVecC);
2170 // It may not be safe to execute a binop on a vector with poison elements
2171 // because the entire instruction can be folded to undef or create poison
2172 // that did not exist in the original code.
2173 // TODO: The shift case should not be necessary.
2174 if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1))
2175 NewC = getSafeVectorConstantForBinop(Opcode, In: NewC, IsRHSConstant: ConstOp1);
2176
2177 // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
2178 // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
2179 Value *NewLHS = ConstOp1 ? V1 : NewC;
2180 Value *NewRHS = ConstOp1 ? NewC : V1;
2181 return createBinOpShuffle(NewLHS, NewRHS, Mask);
2182 }
2183 }
2184
2185 // Try to reassociate to sink a splat shuffle after a binary operation.
2186 if (Inst.isAssociative() && Inst.isCommutative()) {
2187 // Canonicalize shuffle operand as LHS.
2188 if (isa<ShuffleVectorInst>(Val: RHS))
2189 std::swap(a&: LHS, b&: RHS);
2190
2191 Value *X;
2192 ArrayRef<int> MaskC;
2193 int SplatIndex;
2194 Value *Y, *OtherOp;
2195 if (!match(V: LHS,
2196 P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: X), v2: m_Undef(), mask: m_Mask(MaskC)))) ||
2197 !match(Mask: MaskC, P: m_SplatOrPoisonMask(SplatIndex)) ||
2198 X->getType() != Inst.getType() ||
2199 !match(V: RHS, P: m_OneUse(SubPattern: m_BinOp(Opcode, L: m_Value(V&: Y), R: m_Value(V&: OtherOp)))))
2200 return nullptr;
2201
2202 // FIXME: This may not be safe if the analysis allows undef elements. By
2203 // moving 'Y' before the splat shuffle, we are implicitly assuming
2204 // that it is not undef/poison at the splat index.
2205 if (isSplatValue(V: OtherOp, Index: SplatIndex)) {
2206 std::swap(a&: Y, b&: OtherOp);
2207 } else if (!isSplatValue(V: Y, Index: SplatIndex)) {
2208 return nullptr;
2209 }
2210
2211 // X and Y are splatted values, so perform the binary operation on those
2212 // values followed by a splat followed by the 2nd binary operation:
2213 // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
2214 Value *NewBO = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y);
2215 SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
2216 Value *NewSplat = Builder.CreateShuffleVector(V: NewBO, Mask: NewMask);
2217 Instruction *R = BinaryOperator::Create(Op: Opcode, S1: NewSplat, S2: OtherOp);
2218
2219 // Intersect FMF on both new binops. Other (poison-generating) flags are
2220 // dropped to be safe.
2221 if (isa<FPMathOperator>(Val: R)) {
2222 R->copyFastMathFlags(I: &Inst);
2223 R->andIRFlags(V: RHS);
2224 }
2225 if (auto *NewInstBO = dyn_cast<BinaryOperator>(Val: NewBO))
2226 NewInstBO->copyIRFlags(V: R);
2227 return R;
2228 }
2229
2230 return nullptr;
2231}
2232
2233/// Try to narrow the width of a binop if at least 1 operand is an extend of
2234/// of a value. This requires a potentially expensive known bits check to make
2235/// sure the narrow op does not overflow.
2236Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
2237 // We need at least one extended operand.
2238 Value *Op0 = BO.getOperand(i_nocapture: 0), *Op1 = BO.getOperand(i_nocapture: 1);
2239
2240 // If this is a sub, we swap the operands since we always want an extension
2241 // on the RHS. The LHS can be an extension or a constant.
2242 if (BO.getOpcode() == Instruction::Sub)
2243 std::swap(a&: Op0, b&: Op1);
2244
2245 Value *X;
2246 bool IsSext = match(V: Op0, P: m_SExt(Op: m_Value(V&: X)));
2247 if (!IsSext && !match(V: Op0, P: m_ZExt(Op: m_Value(V&: X))))
2248 return nullptr;
2249
2250 // If both operands are the same extension from the same source type and we
2251 // can eliminate at least one (hasOneUse), this might work.
2252 CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
2253 Value *Y;
2254 if (!(match(V: Op1, P: m_ZExtOrSExt(Op: m_Value(V&: Y))) && X->getType() == Y->getType() &&
2255 cast<Operator>(Val: Op1)->getOpcode() == CastOpc &&
2256 (Op0->hasOneUse() || Op1->hasOneUse()))) {
2257 // If that did not match, see if we have a suitable constant operand.
2258 // Truncating and extending must produce the same constant.
2259 Constant *WideC;
2260 if (!Op0->hasOneUse() || !match(V: Op1, P: m_Constant(C&: WideC)))
2261 return nullptr;
2262 Constant *NarrowC = getLosslessTrunc(C: WideC, TruncTy: X->getType(), ExtOp: CastOpc);
2263 if (!NarrowC)
2264 return nullptr;
2265 Y = NarrowC;
2266 }
2267
2268 // Swap back now that we found our operands.
2269 if (BO.getOpcode() == Instruction::Sub)
2270 std::swap(a&: X, b&: Y);
2271
2272 // Both operands have narrow versions. Last step: the math must not overflow
2273 // in the narrow width.
2274 if (!willNotOverflow(Opcode: BO.getOpcode(), LHS: X, RHS: Y, CxtI: BO, IsSigned: IsSext))
2275 return nullptr;
2276
2277 // bo (ext X), (ext Y) --> ext (bo X, Y)
2278 // bo (ext X), C --> ext (bo X, C')
2279 Value *NarrowBO = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: X, RHS: Y, Name: "narrow");
2280 if (auto *NewBinOp = dyn_cast<BinaryOperator>(Val: NarrowBO)) {
2281 if (IsSext)
2282 NewBinOp->setHasNoSignedWrap();
2283 else
2284 NewBinOp->setHasNoUnsignedWrap();
2285 }
2286 return CastInst::Create(CastOpc, S: NarrowBO, Ty: BO.getType());
2287}
2288
2289static bool isMergedGEPInBounds(GEPOperator &GEP1, GEPOperator &GEP2) {
2290 // At least one GEP must be inbounds.
2291 if (!GEP1.isInBounds() && !GEP2.isInBounds())
2292 return false;
2293
2294 return (GEP1.isInBounds() || GEP1.hasAllZeroIndices()) &&
2295 (GEP2.isInBounds() || GEP2.hasAllZeroIndices());
2296}
2297
2298/// Thread a GEP operation with constant indices through the constant true/false
2299/// arms of a select.
2300static Instruction *foldSelectGEP(GetElementPtrInst &GEP,
2301 InstCombiner::BuilderTy &Builder) {
2302 if (!GEP.hasAllConstantIndices())
2303 return nullptr;
2304
2305 Instruction *Sel;
2306 Value *Cond;
2307 Constant *TrueC, *FalseC;
2308 if (!match(V: GEP.getPointerOperand(), P: m_Instruction(I&: Sel)) ||
2309 !match(V: Sel,
2310 P: m_Select(C: m_Value(V&: Cond), L: m_Constant(C&: TrueC), R: m_Constant(C&: FalseC))))
2311 return nullptr;
2312
2313 // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
2314 // Propagate 'inbounds' and metadata from existing instructions.
2315 // Note: using IRBuilder to create the constants for efficiency.
2316 SmallVector<Value *, 4> IndexC(GEP.indices());
2317 bool IsInBounds = GEP.isInBounds();
2318 Type *Ty = GEP.getSourceElementType();
2319 Value *NewTrueC = Builder.CreateGEP(Ty, Ptr: TrueC, IdxList: IndexC, Name: "", IsInBounds);
2320 Value *NewFalseC = Builder.CreateGEP(Ty, Ptr: FalseC, IdxList: IndexC, Name: "", IsInBounds);
2321 return SelectInst::Create(C: Cond, S1: NewTrueC, S2: NewFalseC, NameStr: "", InsertBefore: nullptr, MDFrom: Sel);
2322}
2323
2324Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
2325 GEPOperator *Src) {
2326 // Combine Indices - If the source pointer to this getelementptr instruction
2327 // is a getelementptr instruction with matching element type, combine the
2328 // indices of the two getelementptr instructions into a single instruction.
2329 if (!shouldMergeGEPs(GEP&: *cast<GEPOperator>(Val: &GEP), Src&: *Src))
2330 return nullptr;
2331
2332 // For constant GEPs, use a more general offset-based folding approach.
2333 Type *PtrTy = Src->getType()->getScalarType();
2334 if (GEP.hasAllConstantIndices() &&
2335 (Src->hasOneUse() || Src->hasAllConstantIndices())) {
2336 // Split Src into a variable part and a constant suffix.
2337 gep_type_iterator GTI = gep_type_begin(GEP: *Src);
2338 Type *BaseType = GTI.getIndexedType();
2339 bool IsFirstType = true;
2340 unsigned NumVarIndices = 0;
2341 for (auto Pair : enumerate(First: Src->indices())) {
2342 if (!isa<ConstantInt>(Val: Pair.value())) {
2343 BaseType = GTI.getIndexedType();
2344 IsFirstType = false;
2345 NumVarIndices = Pair.index() + 1;
2346 }
2347 ++GTI;
2348 }
2349
2350 // Determine the offset for the constant suffix of Src.
2351 APInt Offset(DL.getIndexTypeSizeInBits(Ty: PtrTy), 0);
2352 if (NumVarIndices != Src->getNumIndices()) {
2353 // FIXME: getIndexedOffsetInType() does not handled scalable vectors.
2354 if (BaseType->isScalableTy())
2355 return nullptr;
2356
2357 SmallVector<Value *> ConstantIndices;
2358 if (!IsFirstType)
2359 ConstantIndices.push_back(
2360 Elt: Constant::getNullValue(Ty: Type::getInt32Ty(C&: GEP.getContext())));
2361 append_range(C&: ConstantIndices, R: drop_begin(RangeOrContainer: Src->indices(), N: NumVarIndices));
2362 Offset += DL.getIndexedOffsetInType(ElemTy: BaseType, Indices: ConstantIndices);
2363 }
2364
2365 // Add the offset for GEP (which is fully constant).
2366 if (!GEP.accumulateConstantOffset(DL, Offset))
2367 return nullptr;
2368
2369 APInt OffsetOld = Offset;
2370 // Convert the total offset back into indices.
2371 SmallVector<APInt> ConstIndices =
2372 DL.getGEPIndicesForOffset(ElemTy&: BaseType, Offset);
2373 if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) {
2374 // If both GEP are constant-indexed, and cannot be merged in either way,
2375 // convert them to a GEP of i8.
2376 if (Src->hasAllConstantIndices())
2377 return replaceInstUsesWith(
2378 I&: GEP, V: Builder.CreateGEP(
2379 Ty: Builder.getInt8Ty(), Ptr: Src->getOperand(i_nocapture: 0),
2380 IdxList: Builder.getInt(AI: OffsetOld), Name: "",
2381 IsInBounds: isMergedGEPInBounds(GEP1&: *Src, GEP2&: *cast<GEPOperator>(Val: &GEP))));
2382 return nullptr;
2383 }
2384
2385 bool IsInBounds = isMergedGEPInBounds(GEP1&: *Src, GEP2&: *cast<GEPOperator>(Val: &GEP));
2386 SmallVector<Value *> Indices;
2387 append_range(C&: Indices, R: drop_end(RangeOrContainer: Src->indices(),
2388 N: Src->getNumIndices() - NumVarIndices));
2389 for (const APInt &Idx : drop_begin(RangeOrContainer&: ConstIndices, N: !IsFirstType)) {
2390 Indices.push_back(Elt: ConstantInt::get(Context&: GEP.getContext(), V: Idx));
2391 // Even if the total offset is inbounds, we may end up representing it
2392 // by first performing a larger negative offset, and then a smaller
2393 // positive one. The large negative offset might go out of bounds. Only
2394 // preserve inbounds if all signs are the same.
2395 IsInBounds &= Idx.isNonNegative() == ConstIndices[0].isNonNegative();
2396 }
2397
2398 return replaceInstUsesWith(
2399 I&: GEP, V: Builder.CreateGEP(Ty: Src->getSourceElementType(), Ptr: Src->getOperand(i_nocapture: 0),
2400 IdxList: Indices, Name: "", IsInBounds));
2401 }
2402
2403 if (Src->getResultElementType() != GEP.getSourceElementType())
2404 return nullptr;
2405
2406 SmallVector<Value*, 8> Indices;
2407
2408 // Find out whether the last index in the source GEP is a sequential idx.
2409 bool EndsWithSequential = false;
2410 for (gep_type_iterator I = gep_type_begin(GEP: *Src), E = gep_type_end(GEP: *Src);
2411 I != E; ++I)
2412 EndsWithSequential = I.isSequential();
2413
2414 // Can we combine the two pointer arithmetics offsets?
2415 if (EndsWithSequential) {
2416 // Replace: gep (gep %P, long B), long A, ...
2417 // With: T = long A+B; gep %P, T, ...
2418 Value *SO1 = Src->getOperand(i_nocapture: Src->getNumOperands()-1);
2419 Value *GO1 = GEP.getOperand(i_nocapture: 1);
2420
2421 // If they aren't the same type, then the input hasn't been processed
2422 // by the loop above yet (which canonicalizes sequential index types to
2423 // intptr_t). Just avoid transforming this until the input has been
2424 // normalized.
2425 if (SO1->getType() != GO1->getType())
2426 return nullptr;
2427
2428 Value *Sum =
2429 simplifyAddInst(LHS: GO1, RHS: SO1, IsNSW: false, IsNUW: false, Q: SQ.getWithInstruction(I: &GEP));
2430 // Only do the combine when we are sure the cost after the
2431 // merge is never more than that before the merge.
2432 if (Sum == nullptr)
2433 return nullptr;
2434
2435 // Update the GEP in place if possible.
2436 if (Src->getNumOperands() == 2) {
2437 GEP.setIsInBounds(isMergedGEPInBounds(GEP1&: *Src, GEP2&: *cast<GEPOperator>(Val: &GEP)));
2438 replaceOperand(I&: GEP, OpNum: 0, V: Src->getOperand(i_nocapture: 0));
2439 replaceOperand(I&: GEP, OpNum: 1, V: Sum);
2440 return &GEP;
2441 }
2442 Indices.append(in_start: Src->op_begin()+1, in_end: Src->op_end()-1);
2443 Indices.push_back(Elt: Sum);
2444 Indices.append(in_start: GEP.op_begin()+2, in_end: GEP.op_end());
2445 } else if (isa<Constant>(Val: *GEP.idx_begin()) &&
2446 cast<Constant>(Val&: *GEP.idx_begin())->isNullValue() &&
2447 Src->getNumOperands() != 1) {
2448 // Otherwise we can do the fold if the first index of the GEP is a zero
2449 Indices.append(in_start: Src->op_begin()+1, in_end: Src->op_end());
2450 Indices.append(in_start: GEP.idx_begin()+1, in_end: GEP.idx_end());
2451 }
2452
2453 if (!Indices.empty())
2454 return replaceInstUsesWith(
2455 I&: GEP, V: Builder.CreateGEP(
2456 Ty: Src->getSourceElementType(), Ptr: Src->getOperand(i_nocapture: 0), IdxList: Indices, Name: "",
2457 IsInBounds: isMergedGEPInBounds(GEP1&: *Src, GEP2&: *cast<GEPOperator>(Val: &GEP))));
2458
2459 return nullptr;
2460}
2461
2462Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
2463 BuilderTy *Builder,
2464 bool &DoesConsume, unsigned Depth) {
2465 static Value *const NonNull = reinterpret_cast<Value *>(uintptr_t(1));
2466 // ~(~(X)) -> X.
2467 Value *A, *B;
2468 if (match(V, P: m_Not(V: m_Value(V&: A)))) {
2469 DoesConsume = true;
2470 return A;
2471 }
2472
2473 Constant *C;
2474 // Constants can be considered to be not'ed values.
2475 if (match(V, P: m_ImmConstant(C)))
2476 return ConstantExpr::getNot(C);
2477
2478 if (Depth++ >= MaxAnalysisRecursionDepth)
2479 return nullptr;
2480
2481 // The rest of the cases require that we invert all uses so don't bother
2482 // doing the analysis if we know we can't use the result.
2483 if (!WillInvertAllUses)
2484 return nullptr;
2485
2486 // Compares can be inverted if all of their uses are being modified to use
2487 // the ~V.
2488 if (auto *I = dyn_cast<CmpInst>(Val: V)) {
2489 if (Builder != nullptr)
2490 return Builder->CreateCmp(Pred: I->getInversePredicate(), LHS: I->getOperand(i_nocapture: 0),
2491 RHS: I->getOperand(i_nocapture: 1));
2492 return NonNull;
2493 }
2494
2495 // If `V` is of the form `A + B` then `-1 - V` can be folded into
2496 // `(-1 - B) - A` if we are willing to invert all of the uses.
2497 if (match(V, P: m_Add(L: m_Value(V&: A), R: m_Value(V&: B)))) {
2498 if (auto *BV = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
2499 DoesConsume, Depth))
2500 return Builder ? Builder->CreateSub(LHS: BV, RHS: A) : NonNull;
2501 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2502 DoesConsume, Depth))
2503 return Builder ? Builder->CreateSub(LHS: AV, RHS: B) : NonNull;
2504 return nullptr;
2505 }
2506
2507 // If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded
2508 // into `A ^ B` if we are willing to invert all of the uses.
2509 if (match(V, P: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B)))) {
2510 if (auto *BV = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
2511 DoesConsume, Depth))
2512 return Builder ? Builder->CreateXor(LHS: A, RHS: BV) : NonNull;
2513 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2514 DoesConsume, Depth))
2515 return Builder ? Builder->CreateXor(LHS: AV, RHS: B) : NonNull;
2516 return nullptr;
2517 }
2518
2519 // If `V` is of the form `B - A` then `-1 - V` can be folded into
2520 // `A + (-1 - B)` if we are willing to invert all of the uses.
2521 if (match(V, P: m_Sub(L: m_Value(V&: A), R: m_Value(V&: B)))) {
2522 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2523 DoesConsume, Depth))
2524 return Builder ? Builder->CreateAdd(LHS: AV, RHS: B) : NonNull;
2525 return nullptr;
2526 }
2527
2528 // If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded
2529 // into `A s>> B` if we are willing to invert all of the uses.
2530 if (match(V, P: m_AShr(L: m_Value(V&: A), R: m_Value(V&: B)))) {
2531 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2532 DoesConsume, Depth))
2533 return Builder ? Builder->CreateAShr(LHS: AV, RHS: B) : NonNull;
2534 return nullptr;
2535 }
2536
2537 Value *Cond;
2538 // LogicOps are special in that we canonicalize them at the cost of an
2539 // instruction.
2540 bool IsSelect = match(V, P: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: A), R: m_Value(V&: B))) &&
2541 !shouldAvoidAbsorbingNotIntoSelect(SI: *cast<SelectInst>(Val: V));
2542 // Selects/min/max with invertible operands are freely invertible
2543 if (IsSelect || match(V, P: m_MaxOrMin(L: m_Value(V&: A), R: m_Value(V&: B)))) {
2544 bool LocalDoesConsume = DoesConsume;
2545 if (!getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), /*Builder*/ nullptr,
2546 DoesConsume&: LocalDoesConsume, Depth))
2547 return nullptr;
2548 if (Value *NotA = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2549 DoesConsume&: LocalDoesConsume, Depth)) {
2550 DoesConsume = LocalDoesConsume;
2551 if (Builder != nullptr) {
2552 Value *NotB = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
2553 DoesConsume, Depth);
2554 assert(NotB != nullptr &&
2555 "Unable to build inverted value for known freely invertable op");
2556 if (auto *II = dyn_cast<IntrinsicInst>(Val: V))
2557 return Builder->CreateBinaryIntrinsic(
2558 ID: getInverseMinMaxIntrinsic(MinMaxID: II->getIntrinsicID()), LHS: NotA, RHS: NotB);
2559 return Builder->CreateSelect(C: Cond, True: NotA, False: NotB);
2560 }
2561 return NonNull;
2562 }
2563 }
2564
2565 if (PHINode *PN = dyn_cast<PHINode>(Val: V)) {
2566 bool LocalDoesConsume = DoesConsume;
2567 SmallVector<std::pair<Value *, BasicBlock *>, 8> IncomingValues;
2568 for (Use &U : PN->operands()) {
2569 BasicBlock *IncomingBlock = PN->getIncomingBlock(U);
2570 Value *NewIncomingVal = getFreelyInvertedImpl(
2571 V: U.get(), /*WillInvertAllUses=*/false,
2572 /*Builder=*/nullptr, DoesConsume&: LocalDoesConsume, Depth: MaxAnalysisRecursionDepth - 1);
2573 if (NewIncomingVal == nullptr)
2574 return nullptr;
2575 // Make sure that we can safely erase the original PHI node.
2576 if (NewIncomingVal == V)
2577 return nullptr;
2578 if (Builder != nullptr)
2579 IncomingValues.emplace_back(Args&: NewIncomingVal, Args&: IncomingBlock);
2580 }
2581
2582 DoesConsume = LocalDoesConsume;
2583 if (Builder != nullptr) {
2584 IRBuilderBase::InsertPointGuard Guard(*Builder);
2585 Builder->SetInsertPoint(PN);
2586 PHINode *NewPN =
2587 Builder->CreatePHI(Ty: PN->getType(), NumReservedValues: PN->getNumIncomingValues());
2588 for (auto [Val, Pred] : IncomingValues)
2589 NewPN->addIncoming(V: Val, BB: Pred);
2590 return NewPN;
2591 }
2592 return NonNull;
2593 }
2594
2595 if (match(V, P: m_SExtLike(Op: m_Value(V&: A)))) {
2596 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2597 DoesConsume, Depth))
2598 return Builder ? Builder->CreateSExt(V: AV, DestTy: V->getType()) : NonNull;
2599 return nullptr;
2600 }
2601
2602 if (match(V, P: m_Trunc(Op: m_Value(V&: A)))) {
2603 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2604 DoesConsume, Depth))
2605 return Builder ? Builder->CreateTrunc(V: AV, DestTy: V->getType()) : NonNull;
2606 return nullptr;
2607 }
2608
2609 // De Morgan's Laws:
2610 // (~(A | B)) -> (~A & ~B)
2611 // (~(A & B)) -> (~A | ~B)
2612 auto TryInvertAndOrUsingDeMorgan = [&](Instruction::BinaryOps Opcode,
2613 bool IsLogical, Value *A,
2614 Value *B) -> Value * {
2615 bool LocalDoesConsume = DoesConsume;
2616 if (!getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), /*Builder=*/nullptr,
2617 DoesConsume&: LocalDoesConsume, Depth))
2618 return nullptr;
2619 if (auto *NotA = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2620 DoesConsume&: LocalDoesConsume, Depth)) {
2621 auto *NotB = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
2622 DoesConsume&: LocalDoesConsume, Depth);
2623 DoesConsume = LocalDoesConsume;
2624 if (IsLogical)
2625 return Builder ? Builder->CreateLogicalOp(Opc: Opcode, Cond1: NotA, Cond2: NotB) : NonNull;
2626 return Builder ? Builder->CreateBinOp(Opc: Opcode, LHS: NotA, RHS: NotB) : NonNull;
2627 }
2628
2629 return nullptr;
2630 };
2631
2632 if (match(V, P: m_Or(L: m_Value(V&: A), R: m_Value(V&: B))))
2633 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/false, A,
2634 B);
2635
2636 if (match(V, P: m_And(L: m_Value(V&: A), R: m_Value(V&: B))))
2637 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/false, A,
2638 B);
2639
2640 if (match(V, P: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B))))
2641 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/true, A,
2642 B);
2643
2644 if (match(V, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B))))
2645 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/true, A,
2646 B);
2647
2648 return nullptr;
2649}
2650
2651Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
2652 Value *PtrOp = GEP.getOperand(i_nocapture: 0);
2653 SmallVector<Value *, 8> Indices(GEP.indices());
2654 Type *GEPType = GEP.getType();
2655 Type *GEPEltType = GEP.getSourceElementType();
2656 bool IsGEPSrcEleScalable = GEPEltType->isScalableTy();
2657 if (Value *V = simplifyGEPInst(SrcTy: GEPEltType, Ptr: PtrOp, Indices, InBounds: GEP.isInBounds(),
2658 Q: SQ.getWithInstruction(I: &GEP)))
2659 return replaceInstUsesWith(I&: GEP, V);
2660
2661 // For vector geps, use the generic demanded vector support.
2662 // Skip if GEP return type is scalable. The number of elements is unknown at
2663 // compile-time.
2664 if (auto *GEPFVTy = dyn_cast<FixedVectorType>(Val: GEPType)) {
2665 auto VWidth = GEPFVTy->getNumElements();
2666 APInt PoisonElts(VWidth, 0);
2667 APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
2668 if (Value *V = SimplifyDemandedVectorElts(V: &GEP, DemandedElts: AllOnesEltMask,
2669 PoisonElts)) {
2670 if (V != &GEP)
2671 return replaceInstUsesWith(I&: GEP, V);
2672 return &GEP;
2673 }
2674
2675 // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
2676 // possible (decide on canonical form for pointer broadcast), 3) exploit
2677 // undef elements to decrease demanded bits
2678 }
2679
2680 // Eliminate unneeded casts for indices, and replace indices which displace
2681 // by multiples of a zero size type with zero.
2682 bool MadeChange = false;
2683
2684 // Index width may not be the same width as pointer width.
2685 // Data layout chooses the right type based on supported integer types.
2686 Type *NewScalarIndexTy =
2687 DL.getIndexType(PtrTy: GEP.getPointerOperandType()->getScalarType());
2688
2689 gep_type_iterator GTI = gep_type_begin(GEP);
2690 for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
2691 ++I, ++GTI) {
2692 // Skip indices into struct types.
2693 if (GTI.isStruct())
2694 continue;
2695
2696 Type *IndexTy = (*I)->getType();
2697 Type *NewIndexType =
2698 IndexTy->isVectorTy()
2699 ? VectorType::get(ElementType: NewScalarIndexTy,
2700 EC: cast<VectorType>(Val: IndexTy)->getElementCount())
2701 : NewScalarIndexTy;
2702
2703 // If the element type has zero size then any index over it is equivalent
2704 // to an index of zero, so replace it with zero if it is not zero already.
2705 Type *EltTy = GTI.getIndexedType();
2706 if (EltTy->isSized() && DL.getTypeAllocSize(Ty: EltTy).isZero())
2707 if (!isa<Constant>(Val: *I) || !match(V: I->get(), P: m_Zero())) {
2708 *I = Constant::getNullValue(Ty: NewIndexType);
2709 MadeChange = true;
2710 }
2711
2712 if (IndexTy != NewIndexType) {
2713 // If we are using a wider index than needed for this platform, shrink
2714 // it to what we need. If narrower, sign-extend it to what we need.
2715 // This explicit cast can make subsequent optimizations more obvious.
2716 *I = Builder.CreateIntCast(V: *I, DestTy: NewIndexType, isSigned: true);
2717 MadeChange = true;
2718 }
2719 }
2720 if (MadeChange)
2721 return &GEP;
2722
2723 // Canonicalize constant GEPs to i8 type.
2724 if (!GEPEltType->isIntegerTy(Bitwidth: 8) && GEP.hasAllConstantIndices()) {
2725 APInt Offset(DL.getIndexTypeSizeInBits(Ty: GEPType), 0);
2726 if (GEP.accumulateConstantOffset(DL, Offset))
2727 return replaceInstUsesWith(
2728 I&: GEP, V: Builder.CreatePtrAdd(Ptr: PtrOp, Offset: Builder.getInt(AI: Offset), Name: "",
2729 IsInBounds: GEP.isInBounds()));
2730 }
2731
2732 // Check to see if the inputs to the PHI node are getelementptr instructions.
2733 if (auto *PN = dyn_cast<PHINode>(Val: PtrOp)) {
2734 auto *Op1 = dyn_cast<GetElementPtrInst>(Val: PN->getOperand(i_nocapture: 0));
2735 if (!Op1)
2736 return nullptr;
2737
2738 // Don't fold a GEP into itself through a PHI node. This can only happen
2739 // through the back-edge of a loop. Folding a GEP into itself means that
2740 // the value of the previous iteration needs to be stored in the meantime,
2741 // thus requiring an additional register variable to be live, but not
2742 // actually achieving anything (the GEP still needs to be executed once per
2743 // loop iteration).
2744 if (Op1 == &GEP)
2745 return nullptr;
2746
2747 int DI = -1;
2748
2749 for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
2750 auto *Op2 = dyn_cast<GetElementPtrInst>(Val&: *I);
2751 if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() ||
2752 Op1->getSourceElementType() != Op2->getSourceElementType())
2753 return nullptr;
2754
2755 // As for Op1 above, don't try to fold a GEP into itself.
2756 if (Op2 == &GEP)
2757 return nullptr;
2758
2759 // Keep track of the type as we walk the GEP.
2760 Type *CurTy = nullptr;
2761
2762 for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
2763 if (Op1->getOperand(i_nocapture: J)->getType() != Op2->getOperand(i_nocapture: J)->getType())
2764 return nullptr;
2765
2766 if (Op1->getOperand(i_nocapture: J) != Op2->getOperand(i_nocapture: J)) {
2767 if (DI == -1) {
2768 // We have not seen any differences yet in the GEPs feeding the
2769 // PHI yet, so we record this one if it is allowed to be a
2770 // variable.
2771
2772 // The first two arguments can vary for any GEP, the rest have to be
2773 // static for struct slots
2774 if (J > 1) {
2775 assert(CurTy && "No current type?");
2776 if (CurTy->isStructTy())
2777 return nullptr;
2778 }
2779
2780 DI = J;
2781 } else {
2782 // The GEP is different by more than one input. While this could be
2783 // extended to support GEPs that vary by more than one variable it
2784 // doesn't make sense since it greatly increases the complexity and
2785 // would result in an R+R+R addressing mode which no backend
2786 // directly supports and would need to be broken into several
2787 // simpler instructions anyway.
2788 return nullptr;
2789 }
2790 }
2791
2792 // Sink down a layer of the type for the next iteration.
2793 if (J > 0) {
2794 if (J == 1) {
2795 CurTy = Op1->getSourceElementType();
2796 } else {
2797 CurTy =
2798 GetElementPtrInst::getTypeAtIndex(Ty: CurTy, Idx: Op1->getOperand(i_nocapture: J));
2799 }
2800 }
2801 }
2802 }
2803
2804 // If not all GEPs are identical we'll have to create a new PHI node.
2805 // Check that the old PHI node has only one use so that it will get
2806 // removed.
2807 if (DI != -1 && !PN->hasOneUse())
2808 return nullptr;
2809
2810 auto *NewGEP = cast<GetElementPtrInst>(Val: Op1->clone());
2811 if (DI == -1) {
2812 // All the GEPs feeding the PHI are identical. Clone one down into our
2813 // BB so that it can be merged with the current GEP.
2814 } else {
2815 // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
2816 // into the current block so it can be merged, and create a new PHI to
2817 // set that index.
2818 PHINode *NewPN;
2819 {
2820 IRBuilderBase::InsertPointGuard Guard(Builder);
2821 Builder.SetInsertPoint(PN);
2822 NewPN = Builder.CreatePHI(Ty: Op1->getOperand(i_nocapture: DI)->getType(),
2823 NumReservedValues: PN->getNumOperands());
2824 }
2825
2826 for (auto &I : PN->operands())
2827 NewPN->addIncoming(V: cast<GEPOperator>(Val&: I)->getOperand(i_nocapture: DI),
2828 BB: PN->getIncomingBlock(U: I));
2829
2830 NewGEP->setOperand(i_nocapture: DI, Val_nocapture: NewPN);
2831 }
2832
2833 NewGEP->insertBefore(BB&: *GEP.getParent(), InsertPos: GEP.getParent()->getFirstInsertionPt());
2834 return replaceOperand(I&: GEP, OpNum: 0, V: NewGEP);
2835 }
2836
2837 if (auto *Src = dyn_cast<GEPOperator>(Val: PtrOp))
2838 if (Instruction *I = visitGEPOfGEP(GEP, Src))
2839 return I;
2840
2841 // Skip if GEP source element type is scalable. The type alloc size is unknown
2842 // at compile-time.
2843 if (GEP.getNumIndices() == 1 && !IsGEPSrcEleScalable) {
2844 unsigned AS = GEP.getPointerAddressSpace();
2845 if (GEP.getOperand(i_nocapture: 1)->getType()->getScalarSizeInBits() ==
2846 DL.getIndexSizeInBits(AS)) {
2847 uint64_t TyAllocSize = DL.getTypeAllocSize(Ty: GEPEltType).getFixedValue();
2848
2849 if (TyAllocSize == 1) {
2850 // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),
2851 // but only if the result pointer is only used as if it were an integer,
2852 // or both point to the same underlying object (otherwise provenance is
2853 // not necessarily retained).
2854 Value *X = GEP.getPointerOperand();
2855 Value *Y;
2856 if (match(V: GEP.getOperand(i_nocapture: 1),
2857 P: m_Sub(L: m_PtrToInt(Op: m_Value(V&: Y)), R: m_PtrToInt(Op: m_Specific(V: X)))) &&
2858 GEPType == Y->getType()) {
2859 bool HasSameUnderlyingObject =
2860 getUnderlyingObject(V: X) == getUnderlyingObject(V: Y);
2861 bool Changed = false;
2862 GEP.replaceUsesWithIf(New: Y, ShouldReplace: [&](Use &U) {
2863 bool ShouldReplace = HasSameUnderlyingObject ||
2864 isa<ICmpInst>(Val: U.getUser()) ||
2865 isa<PtrToIntInst>(Val: U.getUser());
2866 Changed |= ShouldReplace;
2867 return ShouldReplace;
2868 });
2869 return Changed ? &GEP : nullptr;
2870 }
2871 } else {
2872 // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V)
2873 Value *V;
2874 if ((has_single_bit(Value: TyAllocSize) &&
2875 match(V: GEP.getOperand(i_nocapture: 1),
2876 P: m_Exact(SubPattern: m_Shr(L: m_Value(V),
2877 R: m_SpecificInt(V: countr_zero(Val: TyAllocSize)))))) ||
2878 match(V: GEP.getOperand(i_nocapture: 1),
2879 P: m_Exact(SubPattern: m_IDiv(L: m_Value(V), R: m_SpecificInt(V: TyAllocSize))))) {
2880 GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
2881 PointeeType: Builder.getInt8Ty(), Ptr: GEP.getPointerOperand(), IdxList: V);
2882 NewGEP->setIsInBounds(GEP.isInBounds());
2883 return NewGEP;
2884 }
2885 }
2886 }
2887 }
2888 // We do not handle pointer-vector geps here.
2889 if (GEPType->isVectorTy())
2890 return nullptr;
2891
2892 if (GEP.getNumIndices() == 1) {
2893 // Try to replace ADD + GEP with GEP + GEP.
2894 Value *Idx1, *Idx2;
2895 if (match(V: GEP.getOperand(i_nocapture: 1),
2896 P: m_OneUse(SubPattern: m_Add(L: m_Value(V&: Idx1), R: m_Value(V&: Idx2))))) {
2897 // %idx = add i64 %idx1, %idx2
2898 // %gep = getelementptr i32, ptr %ptr, i64 %idx
2899 // as:
2900 // %newptr = getelementptr i32, ptr %ptr, i64 %idx1
2901 // %newgep = getelementptr i32, ptr %newptr, i64 %idx2
2902 auto *NewPtr = Builder.CreateGEP(Ty: GEP.getResultElementType(),
2903 Ptr: GEP.getPointerOperand(), IdxList: Idx1);
2904 return GetElementPtrInst::Create(PointeeType: GEP.getResultElementType(), Ptr: NewPtr,
2905 IdxList: Idx2);
2906 }
2907 ConstantInt *C;
2908 if (match(V: GEP.getOperand(i_nocapture: 1), P: m_OneUse(SubPattern: m_SExtLike(Op: m_OneUse(SubPattern: m_NSWAdd(
2909 L: m_Value(V&: Idx1), R: m_ConstantInt(CI&: C))))))) {
2910 // %add = add nsw i32 %idx1, idx2
2911 // %sidx = sext i32 %add to i64
2912 // %gep = getelementptr i32, ptr %ptr, i64 %sidx
2913 // as:
2914 // %newptr = getelementptr i32, ptr %ptr, i32 %idx1
2915 // %newgep = getelementptr i32, ptr %newptr, i32 idx2
2916 auto *NewPtr = Builder.CreateGEP(
2917 Ty: GEP.getResultElementType(), Ptr: GEP.getPointerOperand(),
2918 IdxList: Builder.CreateSExt(V: Idx1, DestTy: GEP.getOperand(i_nocapture: 1)->getType()));
2919 return GetElementPtrInst::Create(
2920 PointeeType: GEP.getResultElementType(), Ptr: NewPtr,
2921 IdxList: Builder.CreateSExt(V: C, DestTy: GEP.getOperand(i_nocapture: 1)->getType()));
2922 }
2923 }
2924
2925 if (!GEP.isInBounds()) {
2926 unsigned IdxWidth =
2927 DL.getIndexSizeInBits(AS: PtrOp->getType()->getPointerAddressSpace());
2928 APInt BasePtrOffset(IdxWidth, 0);
2929 Value *UnderlyingPtrOp =
2930 PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL,
2931 Offset&: BasePtrOffset);
2932 bool CanBeNull, CanBeFreed;
2933 uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes(
2934 DL, CanBeNull, CanBeFreed);
2935 if (!CanBeNull && !CanBeFreed && DerefBytes != 0) {
2936 if (GEP.accumulateConstantOffset(DL, Offset&: BasePtrOffset) &&
2937 BasePtrOffset.isNonNegative()) {
2938 APInt AllocSize(IdxWidth, DerefBytes);
2939 if (BasePtrOffset.ule(RHS: AllocSize)) {
2940 return GetElementPtrInst::CreateInBounds(
2941 PointeeType: GEP.getSourceElementType(), Ptr: PtrOp, IdxList: Indices, NameStr: GEP.getName());
2942 }
2943 }
2944 }
2945 }
2946
2947 if (Instruction *R = foldSelectGEP(GEP, Builder))
2948 return R;
2949
2950 return nullptr;
2951}
2952
2953static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI,
2954 Instruction *AI) {
2955 if (isa<ConstantPointerNull>(Val: V))
2956 return true;
2957 if (auto *LI = dyn_cast<LoadInst>(Val: V))
2958 return isa<GlobalVariable>(Val: LI->getPointerOperand());
2959 // Two distinct allocations will never be equal.
2960 return isAllocLikeFn(V, TLI: &TLI) && V != AI;
2961}
2962
2963/// Given a call CB which uses an address UsedV, return true if we can prove the
2964/// call's only possible effect is storing to V.
2965static bool isRemovableWrite(CallBase &CB, Value *UsedV,
2966 const TargetLibraryInfo &TLI) {
2967 if (!CB.use_empty())
2968 // TODO: add recursion if returned attribute is present
2969 return false;
2970
2971 if (CB.isTerminator())
2972 // TODO: remove implementation restriction
2973 return false;
2974
2975 if (!CB.willReturn() || !CB.doesNotThrow())
2976 return false;
2977
2978 // If the only possible side effect of the call is writing to the alloca,
2979 // and the result isn't used, we can safely remove any reads implied by the
2980 // call including those which might read the alloca itself.
2981 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CI: &CB, TLI);
2982 return Dest && Dest->Ptr == UsedV;
2983}
2984
2985static bool isAllocSiteRemovable(Instruction *AI,
2986 SmallVectorImpl<WeakTrackingVH> &Users,
2987 const TargetLibraryInfo &TLI) {
2988 SmallVector<Instruction*, 4> Worklist;
2989 const std::optional<StringRef> Family = getAllocationFamily(I: AI, TLI: &TLI);
2990 Worklist.push_back(Elt: AI);
2991
2992 do {
2993 Instruction *PI = Worklist.pop_back_val();
2994 for (User *U : PI->users()) {
2995 Instruction *I = cast<Instruction>(Val: U);
2996 switch (I->getOpcode()) {
2997 default:
2998 // Give up the moment we see something we can't handle.
2999 return false;
3000
3001 case Instruction::AddrSpaceCast:
3002 case Instruction::BitCast:
3003 case Instruction::GetElementPtr:
3004 Users.emplace_back(Args&: I);
3005 Worklist.push_back(Elt: I);
3006 continue;
3007
3008 case Instruction::ICmp: {
3009 ICmpInst *ICI = cast<ICmpInst>(Val: I);
3010 // We can fold eq/ne comparisons with null to false/true, respectively.
3011 // We also fold comparisons in some conditions provided the alloc has
3012 // not escaped (see isNeverEqualToUnescapedAlloc).
3013 if (!ICI->isEquality())
3014 return false;
3015 unsigned OtherIndex = (ICI->getOperand(i_nocapture: 0) == PI) ? 1 : 0;
3016 if (!isNeverEqualToUnescapedAlloc(V: ICI->getOperand(i_nocapture: OtherIndex), TLI, AI))
3017 return false;
3018
3019 // Do not fold compares to aligned_alloc calls, as they may have to
3020 // return null in case the required alignment cannot be satisfied,
3021 // unless we can prove that both alignment and size are valid.
3022 auto AlignmentAndSizeKnownValid = [](CallBase *CB) {
3023 // Check if alignment and size of a call to aligned_alloc is valid,
3024 // that is alignment is a power-of-2 and the size is a multiple of the
3025 // alignment.
3026 const APInt *Alignment;
3027 const APInt *Size;
3028 return match(V: CB->getArgOperand(i: 0), P: m_APInt(Res&: Alignment)) &&
3029 match(V: CB->getArgOperand(i: 1), P: m_APInt(Res&: Size)) &&
3030 Alignment->isPowerOf2() && Size->urem(RHS: *Alignment).isZero();
3031 };
3032 auto *CB = dyn_cast<CallBase>(Val: AI);
3033 LibFunc TheLibFunc;
3034 if (CB && TLI.getLibFunc(FDecl: *CB->getCalledFunction(), F&: TheLibFunc) &&
3035 TLI.has(F: TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc &&
3036 !AlignmentAndSizeKnownValid(CB))
3037 return false;
3038 Users.emplace_back(Args&: I);
3039 continue;
3040 }
3041
3042 case Instruction::Call:
3043 // Ignore no-op and store intrinsics.
3044 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
3045 switch (II->getIntrinsicID()) {
3046 default:
3047 return false;
3048
3049 case Intrinsic::memmove:
3050 case Intrinsic::memcpy:
3051 case Intrinsic::memset: {
3052 MemIntrinsic *MI = cast<MemIntrinsic>(Val: II);
3053 if (MI->isVolatile() || MI->getRawDest() != PI)
3054 return false;
3055 [[fallthrough]];
3056 }
3057 case Intrinsic::assume:
3058 case Intrinsic::invariant_start:
3059 case Intrinsic::invariant_end:
3060 case Intrinsic::lifetime_start:
3061 case Intrinsic::lifetime_end:
3062 case Intrinsic::objectsize:
3063 Users.emplace_back(Args&: I);
3064 continue;
3065 case Intrinsic::launder_invariant_group:
3066 case Intrinsic::strip_invariant_group:
3067 Users.emplace_back(Args&: I);
3068 Worklist.push_back(Elt: I);
3069 continue;
3070 }
3071 }
3072
3073 if (isRemovableWrite(CB&: *cast<CallBase>(Val: I), UsedV: PI, TLI)) {
3074 Users.emplace_back(Args&: I);
3075 continue;
3076 }
3077
3078 if (getFreedOperand(CB: cast<CallBase>(Val: I), TLI: &TLI) == PI &&
3079 getAllocationFamily(I, TLI: &TLI) == Family) {
3080 assert(Family);
3081 Users.emplace_back(Args&: I);
3082 continue;
3083 }
3084
3085 if (getReallocatedOperand(CB: cast<CallBase>(Val: I)) == PI &&
3086 getAllocationFamily(I, TLI: &TLI) == Family) {
3087 assert(Family);
3088 Users.emplace_back(Args&: I);
3089 Worklist.push_back(Elt: I);
3090 continue;
3091 }
3092
3093 return false;
3094
3095 case Instruction::Store: {
3096 StoreInst *SI = cast<StoreInst>(Val: I);
3097 if (SI->isVolatile() || SI->getPointerOperand() != PI)
3098 return false;
3099 Users.emplace_back(Args&: I);
3100 continue;
3101 }
3102 }
3103 llvm_unreachable("missing a return?");
3104 }
3105 } while (!Worklist.empty());
3106 return true;
3107}
3108
3109Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
3110 assert(isa<AllocaInst>(MI) || isRemovableAlloc(&cast<CallBase>(MI), &TLI));
3111
3112 // If we have a malloc call which is only used in any amount of comparisons to
3113 // null and free calls, delete the calls and replace the comparisons with true
3114 // or false as appropriate.
3115
3116 // This is based on the principle that we can substitute our own allocation
3117 // function (which will never return null) rather than knowledge of the
3118 // specific function being called. In some sense this can change the permitted
3119 // outputs of a program (when we convert a malloc to an alloca, the fact that
3120 // the allocation is now on the stack is potentially visible, for example),
3121 // but we believe in a permissible manner.
3122 SmallVector<WeakTrackingVH, 64> Users;
3123
3124 // If we are removing an alloca with a dbg.declare, insert dbg.value calls
3125 // before each store.
3126 SmallVector<DbgVariableIntrinsic *, 8> DVIs;
3127 SmallVector<DbgVariableRecord *, 8> DVRs;
3128 std::unique_ptr<DIBuilder> DIB;
3129 if (isa<AllocaInst>(Val: MI)) {
3130 findDbgUsers(DbgInsts&: DVIs, V: &MI, DbgVariableRecords: &DVRs);
3131 DIB.reset(p: new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
3132 }
3133
3134 if (isAllocSiteRemovable(AI: &MI, Users, TLI)) {
3135 for (unsigned i = 0, e = Users.size(); i != e; ++i) {
3136 // Lowering all @llvm.objectsize calls first because they may
3137 // use a bitcast/GEP of the alloca we are removing.
3138 if (!Users[i])
3139 continue;
3140
3141 Instruction *I = cast<Instruction>(Val: &*Users[i]);
3142
3143 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
3144 if (II->getIntrinsicID() == Intrinsic::objectsize) {
3145 SmallVector<Instruction *> InsertedInstructions;
3146 Value *Result = lowerObjectSizeCall(
3147 ObjectSize: II, DL, TLI: &TLI, AA, /*MustSucceed=*/true, InsertedInstructions: &InsertedInstructions);
3148 for (Instruction *Inserted : InsertedInstructions)
3149 Worklist.add(I: Inserted);
3150 replaceInstUsesWith(I&: *I, V: Result);
3151 eraseInstFromFunction(I&: *I);
3152 Users[i] = nullptr; // Skip examining in the next loop.
3153 }
3154 }
3155 }
3156 for (unsigned i = 0, e = Users.size(); i != e; ++i) {
3157 if (!Users[i])
3158 continue;
3159
3160 Instruction *I = cast<Instruction>(Val: &*Users[i]);
3161
3162 if (ICmpInst *C = dyn_cast<ICmpInst>(Val: I)) {
3163 replaceInstUsesWith(I&: *C,
3164 V: ConstantInt::get(Ty: Type::getInt1Ty(C&: C->getContext()),
3165 V: C->isFalseWhenEqual()));
3166 } else if (auto *SI = dyn_cast<StoreInst>(Val: I)) {
3167 for (auto *DVI : DVIs)
3168 if (DVI->isAddressOfVariable())
3169 ConvertDebugDeclareToDebugValue(DII: DVI, SI, Builder&: *DIB);
3170 for (auto *DVR : DVRs)
3171 if (DVR->isAddressOfVariable())
3172 ConvertDebugDeclareToDebugValue(DVR, SI, Builder&: *DIB);
3173 } else {
3174 // Casts, GEP, or anything else: we're about to delete this instruction,
3175 // so it can not have any valid uses.
3176 replaceInstUsesWith(I&: *I, V: PoisonValue::get(T: I->getType()));
3177 }
3178 eraseInstFromFunction(I&: *I);
3179 }
3180
3181 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &MI)) {
3182 // Replace invoke with a NOP intrinsic to maintain the original CFG
3183 Module *M = II->getModule();
3184 Function *F = Intrinsic::getDeclaration(M, Intrinsic::id: donothing);
3185 InvokeInst::Create(Func: F, IfNormal: II->getNormalDest(), IfException: II->getUnwindDest(),
3186 Args: std::nullopt, NameStr: "", InsertAtEnd: II->getParent());
3187 }
3188
3189 // Remove debug intrinsics which describe the value contained within the
3190 // alloca. In addition to removing dbg.{declare,addr} which simply point to
3191 // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
3192 //
3193 // ```
3194 // define void @foo(i32 %0) {
3195 // %a = alloca i32 ; Deleted.
3196 // store i32 %0, i32* %a
3197 // dbg.value(i32 %0, "arg0") ; Not deleted.
3198 // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
3199 // call void @trivially_inlinable_no_op(i32* %a)
3200 // ret void
3201 // }
3202 // ```
3203 //
3204 // This may not be required if we stop describing the contents of allocas
3205 // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
3206 // the LowerDbgDeclare utility.
3207 //
3208 // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
3209 // "arg0" dbg.value may be stale after the call. However, failing to remove
3210 // the DW_OP_deref dbg.value causes large gaps in location coverage.
3211 //
3212 // FIXME: the Assignment Tracking project has now likely made this
3213 // redundant (and it's sometimes harmful).
3214 for (auto *DVI : DVIs)
3215 if (DVI->isAddressOfVariable() || DVI->getExpression()->startsWithDeref())
3216 DVI->eraseFromParent();
3217 for (auto *DVR : DVRs)
3218 if (DVR->isAddressOfVariable() || DVR->getExpression()->startsWithDeref())
3219 DVR->eraseFromParent();
3220
3221 return eraseInstFromFunction(I&: MI);
3222 }
3223 return nullptr;
3224}
3225
3226/// Move the call to free before a NULL test.
3227///
3228/// Check if this free is accessed after its argument has been test
3229/// against NULL (property 0).
3230/// If yes, it is legal to move this call in its predecessor block.
3231///
3232/// The move is performed only if the block containing the call to free
3233/// will be removed, i.e.:
3234/// 1. it has only one predecessor P, and P has two successors
3235/// 2. it contains the call, noops, and an unconditional branch
3236/// 3. its successor is the same as its predecessor's successor
3237///
3238/// The profitability is out-of concern here and this function should
3239/// be called only if the caller knows this transformation would be
3240/// profitable (e.g., for code size).
3241static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI,
3242 const DataLayout &DL) {
3243 Value *Op = FI.getArgOperand(i: 0);
3244 BasicBlock *FreeInstrBB = FI.getParent();
3245 BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
3246
3247 // Validate part of constraint #1: Only one predecessor
3248 // FIXME: We can extend the number of predecessor, but in that case, we
3249 // would duplicate the call to free in each predecessor and it may
3250 // not be profitable even for code size.
3251 if (!PredBB)
3252 return nullptr;
3253
3254 // Validate constraint #2: Does this block contains only the call to
3255 // free, noops, and an unconditional branch?
3256 BasicBlock *SuccBB;
3257 Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
3258 if (!match(V: FreeInstrBBTerminator, P: m_UnconditionalBr(Succ&: SuccBB)))
3259 return nullptr;
3260
3261 // If there are only 2 instructions in the block, at this point,
3262 // this is the call to free and unconditional.
3263 // If there are more than 2 instructions, check that they are noops
3264 // i.e., they won't hurt the performance of the generated code.
3265 if (FreeInstrBB->size() != 2) {
3266 for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) {
3267 if (&Inst == &FI || &Inst == FreeInstrBBTerminator)
3268 continue;
3269 auto *Cast = dyn_cast<CastInst>(Val: &Inst);
3270 if (!Cast || !Cast->isNoopCast(DL))
3271 return nullptr;
3272 }
3273 }
3274 // Validate the rest of constraint #1 by matching on the pred branch.
3275 Instruction *TI = PredBB->getTerminator();
3276 BasicBlock *TrueBB, *FalseBB;
3277 ICmpInst::Predicate Pred;
3278 if (!match(V: TI, P: m_Br(C: m_ICmp(Pred,
3279 L: m_CombineOr(L: m_Specific(V: Op),
3280 R: m_Specific(V: Op->stripPointerCasts())),
3281 R: m_Zero()),
3282 T&: TrueBB, F&: FalseBB)))
3283 return nullptr;
3284 if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
3285 return nullptr;
3286
3287 // Validate constraint #3: Ensure the null case just falls through.
3288 if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
3289 return nullptr;
3290 assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
3291 "Broken CFG: missing edge from predecessor to successor");
3292
3293 // At this point, we know that everything in FreeInstrBB can be moved
3294 // before TI.
3295 for (Instruction &Instr : llvm::make_early_inc_range(Range&: *FreeInstrBB)) {
3296 if (&Instr == FreeInstrBBTerminator)
3297 break;
3298 Instr.moveBeforePreserving(MovePos: TI);
3299 }
3300 assert(FreeInstrBB->size() == 1 &&
3301 "Only the branch instruction should remain");
3302
3303 // Now that we've moved the call to free before the NULL check, we have to
3304 // remove any attributes on its parameter that imply it's non-null, because
3305 // those attributes might have only been valid because of the NULL check, and
3306 // we can get miscompiles if we keep them. This is conservative if non-null is
3307 // also implied by something other than the NULL check, but it's guaranteed to
3308 // be correct, and the conservativeness won't matter in practice, since the
3309 // attributes are irrelevant for the call to free itself and the pointer
3310 // shouldn't be used after the call.
3311 AttributeList Attrs = FI.getAttributes();
3312 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull);
3313 Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable);
3314 if (Dereferenceable.isValid()) {
3315 uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
3316 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0,
3317 Attribute::Dereferenceable);
3318 Attrs = Attrs.addDereferenceableOrNullParamAttr(C&: FI.getContext(), ArgNo: 0, Bytes);
3319 }
3320 FI.setAttributes(Attrs);
3321
3322 return &FI;
3323}
3324
3325Instruction *InstCombinerImpl::visitFree(CallInst &FI, Value *Op) {
3326 // free undef -> unreachable.
3327 if (isa<UndefValue>(Val: Op)) {
3328 // Leave a marker since we can't modify the CFG here.
3329 CreateNonTerminatorUnreachable(InsertAt: &FI);
3330 return eraseInstFromFunction(I&: FI);
3331 }
3332
3333 // If we have 'free null' delete the instruction. This can happen in stl code
3334 // when lots of inlining happens.
3335 if (isa<ConstantPointerNull>(Val: Op))
3336 return eraseInstFromFunction(I&: FI);
3337
3338 // If we had free(realloc(...)) with no intervening uses, then eliminate the
3339 // realloc() entirely.
3340 CallInst *CI = dyn_cast<CallInst>(Val: Op);
3341 if (CI && CI->hasOneUse())
3342 if (Value *ReallocatedOp = getReallocatedOperand(CB: CI))
3343 return eraseInstFromFunction(I&: *replaceInstUsesWith(I&: *CI, V: ReallocatedOp));
3344
3345 // If we optimize for code size, try to move the call to free before the null
3346 // test so that simplify cfg can remove the empty block and dead code
3347 // elimination the branch. I.e., helps to turn something like:
3348 // if (foo) free(foo);
3349 // into
3350 // free(foo);
3351 //
3352 // Note that we can only do this for 'free' and not for any flavor of
3353 // 'operator delete'; there is no 'operator delete' symbol for which we are
3354 // permitted to invent a call, even if we're passing in a null pointer.
3355 if (MinimizeSize) {
3356 LibFunc Func;
3357 if (TLI.getLibFunc(CB: FI, F&: Func) && TLI.has(F: Func) && Func == LibFunc_free)
3358 if (Instruction *I = tryToMoveFreeBeforeNullTest(FI, DL))
3359 return I;
3360 }
3361
3362 return nullptr;
3363}
3364
3365Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) {
3366 Value *RetVal = RI.getReturnValue();
3367 if (!RetVal || !AttributeFuncs::isNoFPClassCompatibleType(Ty: RetVal->getType()))
3368 return nullptr;
3369
3370 Function *F = RI.getFunction();
3371 FPClassTest ReturnClass = F->getAttributes().getRetNoFPClass();
3372 if (ReturnClass == fcNone)
3373 return nullptr;
3374
3375 KnownFPClass KnownClass;
3376 Value *Simplified =
3377 SimplifyDemandedUseFPClass(V: RetVal, DemandedMask: ~ReturnClass, Known&: KnownClass, Depth: 0, CxtI: &RI);
3378 if (!Simplified)
3379 return nullptr;
3380
3381 return ReturnInst::Create(C&: RI.getContext(), retVal: Simplified);
3382}
3383
3384// WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
3385bool InstCombinerImpl::removeInstructionsBeforeUnreachable(Instruction &I) {
3386 // Try to remove the previous instruction if it must lead to unreachable.
3387 // This includes instructions like stores and "llvm.assume" that may not get
3388 // removed by simple dead code elimination.
3389 bool Changed = false;
3390 while (Instruction *Prev = I.getPrevNonDebugInstruction()) {
3391 // While we theoretically can erase EH, that would result in a block that
3392 // used to start with an EH no longer starting with EH, which is invalid.
3393 // To make it valid, we'd need to fixup predecessors to no longer refer to
3394 // this block, but that changes CFG, which is not allowed in InstCombine.
3395 if (Prev->isEHPad())
3396 break; // Can not drop any more instructions. We're done here.
3397
3398 if (!isGuaranteedToTransferExecutionToSuccessor(I: Prev))
3399 break; // Can not drop any more instructions. We're done here.
3400 // Otherwise, this instruction can be freely erased,
3401 // even if it is not side-effect free.
3402
3403 // A value may still have uses before we process it here (for example, in
3404 // another unreachable block), so convert those to poison.
3405 replaceInstUsesWith(I&: *Prev, V: PoisonValue::get(T: Prev->getType()));
3406 eraseInstFromFunction(I&: *Prev);
3407 Changed = true;
3408 }
3409 return Changed;
3410}
3411
3412Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) {
3413 removeInstructionsBeforeUnreachable(I);
3414 return nullptr;
3415}
3416
3417Instruction *InstCombinerImpl::visitUnconditionalBranchInst(BranchInst &BI) {
3418 assert(BI.isUnconditional() && "Only for unconditional branches.");
3419
3420 // If this store is the second-to-last instruction in the basic block
3421 // (excluding debug info and bitcasts of pointers) and if the block ends with
3422 // an unconditional branch, try to move the store to the successor block.
3423
3424 auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
3425 auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) {
3426 return BBI->isDebugOrPseudoInst() ||
3427 (isa<BitCastInst>(Val: BBI) && BBI->getType()->isPointerTy());
3428 };
3429
3430 BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
3431 do {
3432 if (BBI != FirstInstr)
3433 --BBI;
3434 } while (BBI != FirstInstr && IsNoopInstrForStoreMerging(BBI));
3435
3436 return dyn_cast<StoreInst>(Val&: BBI);
3437 };
3438
3439 if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
3440 if (mergeStoreIntoSuccessor(SI&: *SI))
3441 return &BI;
3442
3443 return nullptr;
3444}
3445
3446void InstCombinerImpl::addDeadEdge(BasicBlock *From, BasicBlock *To,
3447 SmallVectorImpl<BasicBlock *> &Worklist) {
3448 if (!DeadEdges.insert(V: {From, To}).second)
3449 return;
3450
3451 // Replace phi node operands in successor with poison.
3452 for (PHINode &PN : To->phis())
3453 for (Use &U : PN.incoming_values())
3454 if (PN.getIncomingBlock(U) == From && !isa<PoisonValue>(Val: U)) {
3455 replaceUse(U, NewValue: PoisonValue::get(T: PN.getType()));
3456 addToWorklist(I: &PN);
3457 MadeIRChange = true;
3458 }
3459
3460 Worklist.push_back(Elt: To);
3461}
3462
3463// Under the assumption that I is unreachable, remove it and following
3464// instructions. Changes are reported directly to MadeIRChange.
3465void InstCombinerImpl::handleUnreachableFrom(
3466 Instruction *I, SmallVectorImpl<BasicBlock *> &Worklist) {
3467 BasicBlock *BB = I->getParent();
3468 for (Instruction &Inst : make_early_inc_range(
3469 Range: make_range(x: std::next(x: BB->getTerminator()->getReverseIterator()),
3470 y: std::next(x: I->getReverseIterator())))) {
3471 if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) {
3472 replaceInstUsesWith(I&: Inst, V: PoisonValue::get(T: Inst.getType()));
3473 MadeIRChange = true;
3474 }
3475 if (Inst.isEHPad() || Inst.getType()->isTokenTy())
3476 continue;
3477 // RemoveDIs: erase debug-info on this instruction manually.
3478 Inst.dropDbgRecords();
3479 eraseInstFromFunction(I&: Inst);
3480 MadeIRChange = true;
3481 }
3482
3483 SmallVector<Value *> Changed;
3484 if (handleUnreachableTerminator(I: BB->getTerminator(), PoisonedValues&: Changed)) {
3485 MadeIRChange = true;
3486 for (Value *V : Changed)
3487 addToWorklist(I: cast<Instruction>(Val: V));
3488 }
3489
3490 // Handle potentially dead successors.
3491 for (BasicBlock *Succ : successors(BB))
3492 addDeadEdge(From: BB, To: Succ, Worklist);
3493}
3494
3495void InstCombinerImpl::handlePotentiallyDeadBlocks(
3496 SmallVectorImpl<BasicBlock *> &Worklist) {
3497 while (!Worklist.empty()) {
3498 BasicBlock *BB = Worklist.pop_back_val();
3499 if (!all_of(Range: predecessors(BB), P: [&](BasicBlock *Pred) {
3500 return DeadEdges.contains(V: {Pred, BB}) || DT.dominates(A: BB, B: Pred);
3501 }))
3502 continue;
3503
3504 handleUnreachableFrom(I: &BB->front(), Worklist);
3505 }
3506}
3507
3508void InstCombinerImpl::handlePotentiallyDeadSuccessors(BasicBlock *BB,
3509 BasicBlock *LiveSucc) {
3510 SmallVector<BasicBlock *> Worklist;
3511 for (BasicBlock *Succ : successors(BB)) {
3512 // The live successor isn't dead.
3513 if (Succ == LiveSucc)
3514 continue;
3515
3516 addDeadEdge(From: BB, To: Succ, Worklist);
3517 }
3518
3519 handlePotentiallyDeadBlocks(Worklist);
3520}
3521
3522Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) {
3523 if (BI.isUnconditional())
3524 return visitUnconditionalBranchInst(BI);
3525
3526 // Change br (not X), label True, label False to: br X, label False, True
3527 Value *Cond = BI.getCondition();
3528 Value *X;
3529 if (match(V: Cond, P: m_Not(V: m_Value(V&: X))) && !isa<Constant>(Val: X)) {
3530 // Swap Destinations and condition...
3531 BI.swapSuccessors();
3532 if (BPI)
3533 BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
3534 return replaceOperand(I&: BI, OpNum: 0, V: X);
3535 }
3536
3537 // Canonicalize logical-and-with-invert as logical-or-with-invert.
3538 // This is done by inverting the condition and swapping successors:
3539 // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T
3540 Value *Y;
3541 if (isa<SelectInst>(Val: Cond) &&
3542 match(V: Cond,
3543 P: m_OneUse(SubPattern: m_LogicalAnd(L: m_Value(V&: X), R: m_OneUse(SubPattern: m_Not(V: m_Value(V&: Y))))))) {
3544 Value *NotX = Builder.CreateNot(V: X, Name: "not." + X->getName());
3545 Value *Or = Builder.CreateLogicalOr(Cond1: NotX, Cond2: Y);
3546 BI.swapSuccessors();
3547 if (BPI)
3548 BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
3549 return replaceOperand(I&: BI, OpNum: 0, V: Or);
3550 }
3551
3552 // If the condition is irrelevant, remove the use so that other
3553 // transforms on the condition become more effective.
3554 if (!isa<ConstantInt>(Val: Cond) && BI.getSuccessor(i: 0) == BI.getSuccessor(i: 1))
3555 return replaceOperand(I&: BI, OpNum: 0, V: ConstantInt::getFalse(Ty: Cond->getType()));
3556
3557 // Canonicalize, for example, fcmp_one -> fcmp_oeq.
3558 CmpInst::Predicate Pred;
3559 if (match(V: Cond, P: m_OneUse(SubPattern: m_FCmp(Pred, L: m_Value(), R: m_Value()))) &&
3560 !isCanonicalPredicate(Pred)) {
3561 // Swap destinations and condition.
3562 auto *Cmp = cast<CmpInst>(Val: Cond);
3563 Cmp->setPredicate(CmpInst::getInversePredicate(pred: Pred));
3564 BI.swapSuccessors();
3565 if (BPI)
3566 BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
3567 Worklist.push(I: Cmp);
3568 return &BI;
3569 }
3570
3571 if (isa<UndefValue>(Val: Cond)) {
3572 handlePotentiallyDeadSuccessors(BB: BI.getParent(), /*LiveSucc*/ nullptr);
3573 return nullptr;
3574 }
3575 if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) {
3576 handlePotentiallyDeadSuccessors(BB: BI.getParent(),
3577 LiveSucc: BI.getSuccessor(i: !CI->getZExtValue()));
3578 return nullptr;
3579 }
3580
3581 DC.registerBranch(BI: &BI);
3582 return nullptr;
3583}
3584
3585// Replaces (switch (select cond, X, C)/(select cond, C, X)) with (switch X) if
3586// we can prove that both (switch C) and (switch X) go to the default when cond
3587// is false/true.
3588static Value *simplifySwitchOnSelectUsingRanges(SwitchInst &SI,
3589 SelectInst *Select,
3590 bool IsTrueArm) {
3591 unsigned CstOpIdx = IsTrueArm ? 1 : 2;
3592 auto *C = dyn_cast<ConstantInt>(Val: Select->getOperand(i_nocapture: CstOpIdx));
3593 if (!C)
3594 return nullptr;
3595
3596 BasicBlock *CstBB = SI.findCaseValue(C)->getCaseSuccessor();
3597 if (CstBB != SI.getDefaultDest())
3598 return nullptr;
3599 Value *X = Select->getOperand(i_nocapture: 3 - CstOpIdx);
3600 ICmpInst::Predicate Pred;
3601 const APInt *RHSC;
3602 if (!match(V: Select->getCondition(),
3603 P: m_ICmp(Pred, L: m_Specific(V: X), R: m_APInt(Res&: RHSC))))
3604 return nullptr;
3605 if (IsTrueArm)
3606 Pred = ICmpInst::getInversePredicate(pred: Pred);
3607
3608 // See whether we can replace the select with X
3609 ConstantRange CR = ConstantRange::makeExactICmpRegion(Pred, Other: *RHSC);
3610 for (auto Case : SI.cases())
3611 if (!CR.contains(Val: Case.getCaseValue()->getValue()))
3612 return nullptr;
3613
3614 return X;
3615}
3616
3617Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) {
3618 Value *Cond = SI.getCondition();
3619 Value *Op0;
3620 ConstantInt *AddRHS;
3621 if (match(V: Cond, P: m_Add(L: m_Value(V&: Op0), R: m_ConstantInt(CI&: AddRHS)))) {
3622 // Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
3623 for (auto Case : SI.cases()) {
3624 Constant *NewCase = ConstantExpr::getSub(C1: Case.getCaseValue(), C2: AddRHS);
3625 assert(isa<ConstantInt>(NewCase) &&
3626 "Result of expression should be constant");
3627 Case.setValue(cast<ConstantInt>(Val: NewCase));
3628 }
3629 return replaceOperand(I&: SI, OpNum: 0, V: Op0);
3630 }
3631
3632 ConstantInt *SubLHS;
3633 if (match(V: Cond, P: m_Sub(L: m_ConstantInt(CI&: SubLHS), R: m_Value(V&: Op0)))) {
3634 // Change 'switch (1-X) case 1:' into 'switch (X) case 0'.
3635 for (auto Case : SI.cases()) {
3636 Constant *NewCase = ConstantExpr::getSub(C1: SubLHS, C2: Case.getCaseValue());
3637 assert(isa<ConstantInt>(NewCase) &&
3638 "Result of expression should be constant");
3639 Case.setValue(cast<ConstantInt>(Val: NewCase));
3640 }
3641 return replaceOperand(I&: SI, OpNum: 0, V: Op0);
3642 }
3643
3644 uint64_t ShiftAmt;
3645 if (match(V: Cond, P: m_Shl(L: m_Value(V&: Op0), R: m_ConstantInt(V&: ShiftAmt))) &&
3646 ShiftAmt < Op0->getType()->getScalarSizeInBits() &&
3647 all_of(Range: SI.cases(), P: [&](const auto &Case) {
3648 return Case.getCaseValue()->getValue().countr_zero() >= ShiftAmt;
3649 })) {
3650 // Change 'switch (X << 2) case 4:' into 'switch (X) case 1:'.
3651 OverflowingBinaryOperator *Shl = cast<OverflowingBinaryOperator>(Val: Cond);
3652 if (Shl->hasNoUnsignedWrap() || Shl->hasNoSignedWrap() ||
3653 Shl->hasOneUse()) {
3654 Value *NewCond = Op0;
3655 if (!Shl->hasNoUnsignedWrap() && !Shl->hasNoSignedWrap()) {
3656 // If the shift may wrap, we need to mask off the shifted bits.
3657 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
3658 NewCond = Builder.CreateAnd(
3659 LHS: Op0, RHS: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - ShiftAmt));
3660 }
3661 for (auto Case : SI.cases()) {
3662 const APInt &CaseVal = Case.getCaseValue()->getValue();
3663 APInt ShiftedCase = Shl->hasNoSignedWrap() ? CaseVal.ashr(ShiftAmt)
3664 : CaseVal.lshr(shiftAmt: ShiftAmt);
3665 Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: ShiftedCase));
3666 }
3667 return replaceOperand(I&: SI, OpNum: 0, V: NewCond);
3668 }
3669 }
3670
3671 // Fold switch(zext/sext(X)) into switch(X) if possible.
3672 if (match(V: Cond, P: m_ZExtOrSExt(Op: m_Value(V&: Op0)))) {
3673 bool IsZExt = isa<ZExtInst>(Val: Cond);
3674 Type *SrcTy = Op0->getType();
3675 unsigned NewWidth = SrcTy->getScalarSizeInBits();
3676
3677 if (all_of(Range: SI.cases(), P: [&](const auto &Case) {
3678 const APInt &CaseVal = Case.getCaseValue()->getValue();
3679 return IsZExt ? CaseVal.isIntN(N: NewWidth)
3680 : CaseVal.isSignedIntN(N: NewWidth);
3681 })) {
3682 for (auto &Case : SI.cases()) {
3683 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(width: NewWidth);
3684 Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: TruncatedCase));
3685 }
3686 return replaceOperand(I&: SI, OpNum: 0, V: Op0);
3687 }
3688 }
3689
3690 // Fold switch(select cond, X, Y) into switch(X/Y) if possible
3691 if (auto *Select = dyn_cast<SelectInst>(Val: Cond)) {
3692 if (Value *V =
3693 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/true))
3694 return replaceOperand(I&: SI, OpNum: 0, V);
3695 if (Value *V =
3696 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/false))
3697 return replaceOperand(I&: SI, OpNum: 0, V);
3698 }
3699
3700 KnownBits Known = computeKnownBits(V: Cond, Depth: 0, CxtI: &SI);
3701 unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
3702 unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
3703
3704 // Compute the number of leading bits we can ignore.
3705 // TODO: A better way to determine this would use ComputeNumSignBits().
3706 for (const auto &C : SI.cases()) {
3707 LeadingKnownZeros =
3708 std::min(a: LeadingKnownZeros, b: C.getCaseValue()->getValue().countl_zero());
3709 LeadingKnownOnes =
3710 std::min(a: LeadingKnownOnes, b: C.getCaseValue()->getValue().countl_one());
3711 }
3712
3713 unsigned NewWidth = Known.getBitWidth() - std::max(a: LeadingKnownZeros, b: LeadingKnownOnes);
3714
3715 // Shrink the condition operand if the new type is smaller than the old type.
3716 // But do not shrink to a non-standard type, because backend can't generate
3717 // good code for that yet.
3718 // TODO: We can make it aggressive again after fixing PR39569.
3719 if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
3720 shouldChangeType(FromWidth: Known.getBitWidth(), ToWidth: NewWidth)) {
3721 IntegerType *Ty = IntegerType::get(C&: SI.getContext(), NumBits: NewWidth);
3722 Builder.SetInsertPoint(&SI);
3723 Value *NewCond = Builder.CreateTrunc(V: Cond, DestTy: Ty, Name: "trunc");
3724
3725 for (auto Case : SI.cases()) {
3726 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(width: NewWidth);
3727 Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: TruncatedCase));
3728 }
3729 return replaceOperand(I&: SI, OpNum: 0, V: NewCond);
3730 }
3731
3732 if (isa<UndefValue>(Val: Cond)) {
3733 handlePotentiallyDeadSuccessors(BB: SI.getParent(), /*LiveSucc*/ nullptr);
3734 return nullptr;
3735 }
3736 if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) {
3737 handlePotentiallyDeadSuccessors(BB: SI.getParent(),
3738 LiveSucc: SI.findCaseValue(C: CI)->getCaseSuccessor());
3739 return nullptr;
3740 }
3741
3742 return nullptr;
3743}
3744
3745Instruction *
3746InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
3747 auto *WO = dyn_cast<WithOverflowInst>(Val: EV.getAggregateOperand());
3748 if (!WO)
3749 return nullptr;
3750
3751 Intrinsic::ID OvID = WO->getIntrinsicID();
3752 const APInt *C = nullptr;
3753 if (match(V: WO->getRHS(), P: m_APIntAllowPoison(Res&: C))) {
3754 if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow ||
3755 OvID == Intrinsic::umul_with_overflow)) {
3756 // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
3757 if (C->isAllOnes())
3758 return BinaryOperator::CreateNeg(Op: WO->getLHS());
3759 // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
3760 if (C->isPowerOf2()) {
3761 return BinaryOperator::CreateShl(
3762 V1: WO->getLHS(),
3763 V2: ConstantInt::get(Ty: WO->getLHS()->getType(), V: C->logBase2()));
3764 }
3765 }
3766 }
3767
3768 // We're extracting from an overflow intrinsic. See if we're the only user.
3769 // That allows us to simplify multiple result intrinsics to simpler things
3770 // that just get one value.
3771 if (!WO->hasOneUse())
3772 return nullptr;
3773
3774 // Check if we're grabbing only the result of a 'with overflow' intrinsic
3775 // and replace it with a traditional binary instruction.
3776 if (*EV.idx_begin() == 0) {
3777 Instruction::BinaryOps BinOp = WO->getBinaryOp();
3778 Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
3779 // Replace the old instruction's uses with poison.
3780 replaceInstUsesWith(I&: *WO, V: PoisonValue::get(T: WO->getType()));
3781 eraseInstFromFunction(I&: *WO);
3782 return BinaryOperator::Create(Op: BinOp, S1: LHS, S2: RHS);
3783 }
3784
3785 assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst");
3786
3787 // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
3788 if (OvID == Intrinsic::usub_with_overflow)
3789 return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
3790
3791 // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but
3792 // +1 is not possible because we assume signed values.
3793 if (OvID == Intrinsic::smul_with_overflow &&
3794 WO->getLHS()->getType()->isIntOrIntVectorTy(BitWidth: 1))
3795 return BinaryOperator::CreateAnd(V1: WO->getLHS(), V2: WO->getRHS());
3796
3797 // extractvalue (umul_with_overflow X, X), 1 -> X u> 2^(N/2)-1
3798 if (OvID == Intrinsic::umul_with_overflow && WO->getLHS() == WO->getRHS()) {
3799 unsigned BitWidth = WO->getLHS()->getType()->getScalarSizeInBits();
3800 // Only handle even bitwidths for performance reasons.
3801 if (BitWidth % 2 == 0)
3802 return new ICmpInst(
3803 ICmpInst::ICMP_UGT, WO->getLHS(),
3804 ConstantInt::get(Ty: WO->getLHS()->getType(),
3805 V: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth / 2)));
3806 }
3807
3808 // If only the overflow result is used, and the right hand side is a
3809 // constant (or constant splat), we can remove the intrinsic by directly
3810 // checking for overflow.
3811 if (C) {
3812 // Compute the no-wrap range for LHS given RHS=C, then construct an
3813 // equivalent icmp, potentially using an offset.
3814 ConstantRange NWR = ConstantRange::makeExactNoWrapRegion(
3815 BinOp: WO->getBinaryOp(), Other: *C, NoWrapKind: WO->getNoWrapKind());
3816
3817 CmpInst::Predicate Pred;
3818 APInt NewRHSC, Offset;
3819 NWR.getEquivalentICmp(Pred, RHS&: NewRHSC, Offset);
3820 auto *OpTy = WO->getRHS()->getType();
3821 auto *NewLHS = WO->getLHS();
3822 if (Offset != 0)
3823 NewLHS = Builder.CreateAdd(LHS: NewLHS, RHS: ConstantInt::get(Ty: OpTy, V: Offset));
3824 return new ICmpInst(ICmpInst::getInversePredicate(pred: Pred), NewLHS,
3825 ConstantInt::get(Ty: OpTy, V: NewRHSC));
3826 }
3827
3828 return nullptr;
3829}
3830
3831Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) {
3832 Value *Agg = EV.getAggregateOperand();
3833
3834 if (!EV.hasIndices())
3835 return replaceInstUsesWith(I&: EV, V: Agg);
3836
3837 if (Value *V = simplifyExtractValueInst(Agg, Idxs: EV.getIndices(),
3838 Q: SQ.getWithInstruction(I: &EV)))
3839 return replaceInstUsesWith(I&: EV, V);
3840
3841 if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Val: Agg)) {
3842 // We're extracting from an insertvalue instruction, compare the indices
3843 const unsigned *exti, *exte, *insi, *inse;
3844 for (exti = EV.idx_begin(), insi = IV->idx_begin(),
3845 exte = EV.idx_end(), inse = IV->idx_end();
3846 exti != exte && insi != inse;
3847 ++exti, ++insi) {
3848 if (*insi != *exti)
3849 // The insert and extract both reference distinctly different elements.
3850 // This means the extract is not influenced by the insert, and we can
3851 // replace the aggregate operand of the extract with the aggregate
3852 // operand of the insert. i.e., replace
3853 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3854 // %E = extractvalue { i32, { i32 } } %I, 0
3855 // with
3856 // %E = extractvalue { i32, { i32 } } %A, 0
3857 return ExtractValueInst::Create(Agg: IV->getAggregateOperand(),
3858 Idxs: EV.getIndices());
3859 }
3860 if (exti == exte && insi == inse)
3861 // Both iterators are at the end: Index lists are identical. Replace
3862 // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3863 // %C = extractvalue { i32, { i32 } } %B, 1, 0
3864 // with "i32 42"
3865 return replaceInstUsesWith(I&: EV, V: IV->getInsertedValueOperand());
3866 if (exti == exte) {
3867 // The extract list is a prefix of the insert list. i.e. replace
3868 // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3869 // %E = extractvalue { i32, { i32 } } %I, 1
3870 // with
3871 // %X = extractvalue { i32, { i32 } } %A, 1
3872 // %E = insertvalue { i32 } %X, i32 42, 0
3873 // by switching the order of the insert and extract (though the
3874 // insertvalue should be left in, since it may have other uses).
3875 Value *NewEV = Builder.CreateExtractValue(Agg: IV->getAggregateOperand(),
3876 Idxs: EV.getIndices());
3877 return InsertValueInst::Create(Agg: NewEV, Val: IV->getInsertedValueOperand(),
3878 Idxs: ArrayRef(insi, inse));
3879 }
3880 if (insi == inse)
3881 // The insert list is a prefix of the extract list
3882 // We can simply remove the common indices from the extract and make it
3883 // operate on the inserted value instead of the insertvalue result.
3884 // i.e., replace
3885 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3886 // %E = extractvalue { i32, { i32 } } %I, 1, 0
3887 // with
3888 // %E extractvalue { i32 } { i32 42 }, 0
3889 return ExtractValueInst::Create(Agg: IV->getInsertedValueOperand(),
3890 Idxs: ArrayRef(exti, exte));
3891 }
3892
3893 if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
3894 return R;
3895
3896 if (LoadInst *L = dyn_cast<LoadInst>(Val: Agg)) {
3897 // Bail out if the aggregate contains scalable vector type
3898 if (auto *STy = dyn_cast<StructType>(Val: Agg->getType());
3899 STy && STy->containsScalableVectorType())
3900 return nullptr;
3901
3902 // If the (non-volatile) load only has one use, we can rewrite this to a
3903 // load from a GEP. This reduces the size of the load. If a load is used
3904 // only by extractvalue instructions then this either must have been
3905 // optimized before, or it is a struct with padding, in which case we
3906 // don't want to do the transformation as it loses padding knowledge.
3907 if (L->isSimple() && L->hasOneUse()) {
3908 // extractvalue has integer indices, getelementptr has Value*s. Convert.
3909 SmallVector<Value*, 4> Indices;
3910 // Prefix an i32 0 since we need the first element.
3911 Indices.push_back(Elt: Builder.getInt32(C: 0));
3912 for (unsigned Idx : EV.indices())
3913 Indices.push_back(Elt: Builder.getInt32(C: Idx));
3914
3915 // We need to insert these at the location of the old load, not at that of
3916 // the extractvalue.
3917 Builder.SetInsertPoint(L);
3918 Value *GEP = Builder.CreateInBoundsGEP(Ty: L->getType(),
3919 Ptr: L->getPointerOperand(), IdxList: Indices);
3920 Instruction *NL = Builder.CreateLoad(Ty: EV.getType(), Ptr: GEP);
3921 // Whatever aliasing information we had for the orignal load must also
3922 // hold for the smaller load, so propagate the annotations.
3923 NL->setAAMetadata(L->getAAMetadata());
3924 // Returning the load directly will cause the main loop to insert it in
3925 // the wrong spot, so use replaceInstUsesWith().
3926 return replaceInstUsesWith(I&: EV, V: NL);
3927 }
3928 }
3929
3930 if (auto *PN = dyn_cast<PHINode>(Val: Agg))
3931 if (Instruction *Res = foldOpIntoPhi(I&: EV, PN))
3932 return Res;
3933
3934 // Canonicalize extract (select Cond, TV, FV)
3935 // -> select cond, (extract TV), (extract FV)
3936 if (auto *SI = dyn_cast<SelectInst>(Val: Agg))
3937 if (Instruction *R = FoldOpIntoSelect(Op&: EV, SI, /*FoldWithMultiUse=*/true))
3938 return R;
3939
3940 // We could simplify extracts from other values. Note that nested extracts may
3941 // already be simplified implicitly by the above: extract (extract (insert) )
3942 // will be translated into extract ( insert ( extract ) ) first and then just
3943 // the value inserted, if appropriate. Similarly for extracts from single-use
3944 // loads: extract (extract (load)) will be translated to extract (load (gep))
3945 // and if again single-use then via load (gep (gep)) to load (gep).
3946 // However, double extracts from e.g. function arguments or return values
3947 // aren't handled yet.
3948 return nullptr;
3949}
3950
3951/// Return 'true' if the given typeinfo will match anything.
3952static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
3953 switch (Personality) {
3954 case EHPersonality::GNU_C:
3955 case EHPersonality::GNU_C_SjLj:
3956 case EHPersonality::Rust:
3957 // The GCC C EH and Rust personality only exists to support cleanups, so
3958 // it's not clear what the semantics of catch clauses are.
3959 return false;
3960 case EHPersonality::Unknown:
3961 return false;
3962 case EHPersonality::GNU_Ada:
3963 // While __gnat_all_others_value will match any Ada exception, it doesn't
3964 // match foreign exceptions (or didn't, before gcc-4.7).
3965 return false;
3966 case EHPersonality::GNU_CXX:
3967 case EHPersonality::GNU_CXX_SjLj:
3968 case EHPersonality::GNU_ObjC:
3969 case EHPersonality::MSVC_X86SEH:
3970 case EHPersonality::MSVC_TableSEH:
3971 case EHPersonality::MSVC_CXX:
3972 case EHPersonality::CoreCLR:
3973 case EHPersonality::Wasm_CXX:
3974 case EHPersonality::XL_CXX:
3975 case EHPersonality::ZOS_CXX:
3976 return TypeInfo->isNullValue();
3977 }
3978 llvm_unreachable("invalid enum");
3979}
3980
3981static bool shorter_filter(const Value *LHS, const Value *RHS) {
3982 return
3983 cast<ArrayType>(Val: LHS->getType())->getNumElements()
3984 <
3985 cast<ArrayType>(Val: RHS->getType())->getNumElements();
3986}
3987
3988Instruction *InstCombinerImpl::visitLandingPadInst(LandingPadInst &LI) {
3989 // The logic here should be correct for any real-world personality function.
3990 // However if that turns out not to be true, the offending logic can always
3991 // be conditioned on the personality function, like the catch-all logic is.
3992 EHPersonality Personality =
3993 classifyEHPersonality(Pers: LI.getParent()->getParent()->getPersonalityFn());
3994
3995 // Simplify the list of clauses, eg by removing repeated catch clauses
3996 // (these are often created by inlining).
3997 bool MakeNewInstruction = false; // If true, recreate using the following:
3998 SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
3999 bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
4000
4001 SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
4002 for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
4003 bool isLastClause = i + 1 == e;
4004 if (LI.isCatch(Idx: i)) {
4005 // A catch clause.
4006 Constant *CatchClause = LI.getClause(Idx: i);
4007 Constant *TypeInfo = CatchClause->stripPointerCasts();
4008
4009 // If we already saw this clause, there is no point in having a second
4010 // copy of it.
4011 if (AlreadyCaught.insert(Ptr: TypeInfo).second) {
4012 // This catch clause was not already seen.
4013 NewClauses.push_back(Elt: CatchClause);
4014 } else {
4015 // Repeated catch clause - drop the redundant copy.
4016 MakeNewInstruction = true;
4017 }
4018
4019 // If this is a catch-all then there is no point in keeping any following
4020 // clauses or marking the landingpad as having a cleanup.
4021 if (isCatchAll(Personality, TypeInfo)) {
4022 if (!isLastClause)
4023 MakeNewInstruction = true;
4024 CleanupFlag = false;
4025 break;
4026 }
4027 } else {
4028 // A filter clause. If any of the filter elements were already caught
4029 // then they can be dropped from the filter. It is tempting to try to
4030 // exploit the filter further by saying that any typeinfo that does not
4031 // occur in the filter can't be caught later (and thus can be dropped).
4032 // However this would be wrong, since typeinfos can match without being
4033 // equal (for example if one represents a C++ class, and the other some
4034 // class derived from it).
4035 assert(LI.isFilter(i) && "Unsupported landingpad clause!");
4036 Constant *FilterClause = LI.getClause(Idx: i);
4037 ArrayType *FilterType = cast<ArrayType>(Val: FilterClause->getType());
4038 unsigned NumTypeInfos = FilterType->getNumElements();
4039
4040 // An empty filter catches everything, so there is no point in keeping any
4041 // following clauses or marking the landingpad as having a cleanup. By
4042 // dealing with this case here the following code is made a bit simpler.
4043 if (!NumTypeInfos) {
4044 NewClauses.push_back(Elt: FilterClause);
4045 if (!isLastClause)
4046 MakeNewInstruction = true;
4047 CleanupFlag = false;
4048 break;
4049 }
4050
4051 bool MakeNewFilter = false; // If true, make a new filter.
4052 SmallVector<Constant *, 16> NewFilterElts; // New elements.
4053 if (isa<ConstantAggregateZero>(Val: FilterClause)) {
4054 // Not an empty filter - it contains at least one null typeinfo.
4055 assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
4056 Constant *TypeInfo =
4057 Constant::getNullValue(Ty: FilterType->getElementType());
4058 // If this typeinfo is a catch-all then the filter can never match.
4059 if (isCatchAll(Personality, TypeInfo)) {
4060 // Throw the filter away.
4061 MakeNewInstruction = true;
4062 continue;
4063 }
4064
4065 // There is no point in having multiple copies of this typeinfo, so
4066 // discard all but the first copy if there is more than one.
4067 NewFilterElts.push_back(Elt: TypeInfo);
4068 if (NumTypeInfos > 1)
4069 MakeNewFilter = true;
4070 } else {
4071 ConstantArray *Filter = cast<ConstantArray>(Val: FilterClause);
4072 SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
4073 NewFilterElts.reserve(N: NumTypeInfos);
4074
4075 // Remove any filter elements that were already caught or that already
4076 // occurred in the filter. While there, see if any of the elements are
4077 // catch-alls. If so, the filter can be discarded.
4078 bool SawCatchAll = false;
4079 for (unsigned j = 0; j != NumTypeInfos; ++j) {
4080 Constant *Elt = Filter->getOperand(i_nocapture: j);
4081 Constant *TypeInfo = Elt->stripPointerCasts();
4082 if (isCatchAll(Personality, TypeInfo)) {
4083 // This element is a catch-all. Bail out, noting this fact.
4084 SawCatchAll = true;
4085 break;
4086 }
4087
4088 // Even if we've seen a type in a catch clause, we don't want to
4089 // remove it from the filter. An unexpected type handler may be
4090 // set up for a call site which throws an exception of the same
4091 // type caught. In order for the exception thrown by the unexpected
4092 // handler to propagate correctly, the filter must be correctly
4093 // described for the call site.
4094 //
4095 // Example:
4096 //
4097 // void unexpected() { throw 1;}
4098 // void foo() throw (int) {
4099 // std::set_unexpected(unexpected);
4100 // try {
4101 // throw 2.0;
4102 // } catch (int i) {}
4103 // }
4104
4105 // There is no point in having multiple copies of the same typeinfo in
4106 // a filter, so only add it if we didn't already.
4107 if (SeenInFilter.insert(Ptr: TypeInfo).second)
4108 NewFilterElts.push_back(Elt: cast<Constant>(Val: Elt));
4109 }
4110 // A filter containing a catch-all cannot match anything by definition.
4111 if (SawCatchAll) {
4112 // Throw the filter away.
4113 MakeNewInstruction = true;
4114 continue;
4115 }
4116
4117 // If we dropped something from the filter, make a new one.
4118 if (NewFilterElts.size() < NumTypeInfos)
4119 MakeNewFilter = true;
4120 }
4121 if (MakeNewFilter) {
4122 FilterType = ArrayType::get(ElementType: FilterType->getElementType(),
4123 NumElements: NewFilterElts.size());
4124 FilterClause = ConstantArray::get(T: FilterType, V: NewFilterElts);
4125 MakeNewInstruction = true;
4126 }
4127
4128 NewClauses.push_back(Elt: FilterClause);
4129
4130 // If the new filter is empty then it will catch everything so there is
4131 // no point in keeping any following clauses or marking the landingpad
4132 // as having a cleanup. The case of the original filter being empty was
4133 // already handled above.
4134 if (MakeNewFilter && !NewFilterElts.size()) {
4135 assert(MakeNewInstruction && "New filter but not a new instruction!");
4136 CleanupFlag = false;
4137 break;
4138 }
4139 }
4140 }
4141
4142 // If several filters occur in a row then reorder them so that the shortest
4143 // filters come first (those with the smallest number of elements). This is
4144 // advantageous because shorter filters are more likely to match, speeding up
4145 // unwinding, but mostly because it increases the effectiveness of the other
4146 // filter optimizations below.
4147 for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
4148 unsigned j;
4149 // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
4150 for (j = i; j != e; ++j)
4151 if (!isa<ArrayType>(Val: NewClauses[j]->getType()))
4152 break;
4153
4154 // Check whether the filters are already sorted by length. We need to know
4155 // if sorting them is actually going to do anything so that we only make a
4156 // new landingpad instruction if it does.
4157 for (unsigned k = i; k + 1 < j; ++k)
4158 if (shorter_filter(LHS: NewClauses[k+1], RHS: NewClauses[k])) {
4159 // Not sorted, so sort the filters now. Doing an unstable sort would be
4160 // correct too but reordering filters pointlessly might confuse users.
4161 std::stable_sort(first: NewClauses.begin() + i, last: NewClauses.begin() + j,
4162 comp: shorter_filter);
4163 MakeNewInstruction = true;
4164 break;
4165 }
4166
4167 // Look for the next batch of filters.
4168 i = j + 1;
4169 }
4170
4171 // If typeinfos matched if and only if equal, then the elements of a filter L
4172 // that occurs later than a filter F could be replaced by the intersection of
4173 // the elements of F and L. In reality two typeinfos can match without being
4174 // equal (for example if one represents a C++ class, and the other some class
4175 // derived from it) so it would be wrong to perform this transform in general.
4176 // However the transform is correct and useful if F is a subset of L. In that
4177 // case L can be replaced by F, and thus removed altogether since repeating a
4178 // filter is pointless. So here we look at all pairs of filters F and L where
4179 // L follows F in the list of clauses, and remove L if every element of F is
4180 // an element of L. This can occur when inlining C++ functions with exception
4181 // specifications.
4182 for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
4183 // Examine each filter in turn.
4184 Value *Filter = NewClauses[i];
4185 ArrayType *FTy = dyn_cast<ArrayType>(Val: Filter->getType());
4186 if (!FTy)
4187 // Not a filter - skip it.
4188 continue;
4189 unsigned FElts = FTy->getNumElements();
4190 // Examine each filter following this one. Doing this backwards means that
4191 // we don't have to worry about filters disappearing under us when removed.
4192 for (unsigned j = NewClauses.size() - 1; j != i; --j) {
4193 Value *LFilter = NewClauses[j];
4194 ArrayType *LTy = dyn_cast<ArrayType>(Val: LFilter->getType());
4195 if (!LTy)
4196 // Not a filter - skip it.
4197 continue;
4198 // If Filter is a subset of LFilter, i.e. every element of Filter is also
4199 // an element of LFilter, then discard LFilter.
4200 SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
4201 // If Filter is empty then it is a subset of LFilter.
4202 if (!FElts) {
4203 // Discard LFilter.
4204 NewClauses.erase(CI: J);
4205 MakeNewInstruction = true;
4206 // Move on to the next filter.
4207 continue;
4208 }
4209 unsigned LElts = LTy->getNumElements();
4210 // If Filter is longer than LFilter then it cannot be a subset of it.
4211 if (FElts > LElts)
4212 // Move on to the next filter.
4213 continue;
4214 // At this point we know that LFilter has at least one element.
4215 if (isa<ConstantAggregateZero>(Val: LFilter)) { // LFilter only contains zeros.
4216 // Filter is a subset of LFilter iff Filter contains only zeros (as we
4217 // already know that Filter is not longer than LFilter).
4218 if (isa<ConstantAggregateZero>(Val: Filter)) {
4219 assert(FElts <= LElts && "Should have handled this case earlier!");
4220 // Discard LFilter.
4221 NewClauses.erase(CI: J);
4222 MakeNewInstruction = true;
4223 }
4224 // Move on to the next filter.
4225 continue;
4226 }
4227 ConstantArray *LArray = cast<ConstantArray>(Val: LFilter);
4228 if (isa<ConstantAggregateZero>(Val: Filter)) { // Filter only contains zeros.
4229 // Since Filter is non-empty and contains only zeros, it is a subset of
4230 // LFilter iff LFilter contains a zero.
4231 assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
4232 for (unsigned l = 0; l != LElts; ++l)
4233 if (LArray->getOperand(i_nocapture: l)->isNullValue()) {
4234 // LFilter contains a zero - discard it.
4235 NewClauses.erase(CI: J);
4236 MakeNewInstruction = true;
4237 break;
4238 }
4239 // Move on to the next filter.
4240 continue;
4241 }
4242 // At this point we know that both filters are ConstantArrays. Loop over
4243 // operands to see whether every element of Filter is also an element of
4244 // LFilter. Since filters tend to be short this is probably faster than
4245 // using a method that scales nicely.
4246 ConstantArray *FArray = cast<ConstantArray>(Val: Filter);
4247 bool AllFound = true;
4248 for (unsigned f = 0; f != FElts; ++f) {
4249 Value *FTypeInfo = FArray->getOperand(i_nocapture: f)->stripPointerCasts();
4250 AllFound = false;
4251 for (unsigned l = 0; l != LElts; ++l) {
4252 Value *LTypeInfo = LArray->getOperand(i_nocapture: l)->stripPointerCasts();
4253 if (LTypeInfo == FTypeInfo) {
4254 AllFound = true;
4255 break;
4256 }
4257 }
4258 if (!AllFound)
4259 break;
4260 }
4261 if (AllFound) {
4262 // Discard LFilter.
4263 NewClauses.erase(CI: J);
4264 MakeNewInstruction = true;
4265 }
4266 // Move on to the next filter.
4267 }
4268 }
4269
4270 // If we changed any of the clauses, replace the old landingpad instruction
4271 // with a new one.
4272 if (MakeNewInstruction) {
4273 LandingPadInst *NLI = LandingPadInst::Create(RetTy: LI.getType(),
4274 NumReservedClauses: NewClauses.size());
4275 for (unsigned i = 0, e = NewClauses.size(); i != e; ++i)
4276 NLI->addClause(ClauseVal: NewClauses[i]);
4277 // A landing pad with no clauses must have the cleanup flag set. It is
4278 // theoretically possible, though highly unlikely, that we eliminated all
4279 // clauses. If so, force the cleanup flag to true.
4280 if (NewClauses.empty())
4281 CleanupFlag = true;
4282 NLI->setCleanup(CleanupFlag);
4283 return NLI;
4284 }
4285
4286 // Even if none of the clauses changed, we may nonetheless have understood
4287 // that the cleanup flag is pointless. Clear it if so.
4288 if (LI.isCleanup() != CleanupFlag) {
4289 assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
4290 LI.setCleanup(CleanupFlag);
4291 return &LI;
4292 }
4293
4294 return nullptr;
4295}
4296
4297Value *
4298InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) {
4299 // Try to push freeze through instructions that propagate but don't produce
4300 // poison as far as possible. If an operand of freeze follows three
4301 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
4302 // guaranteed-non-poison operands then push the freeze through to the one
4303 // operand that is not guaranteed non-poison. The actual transform is as
4304 // follows.
4305 // Op1 = ... ; Op1 can be posion
4306 // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
4307 // ; single guaranteed-non-poison operands
4308 // ... = Freeze(Op0)
4309 // =>
4310 // Op1 = ...
4311 // Op1.fr = Freeze(Op1)
4312 // ... = Inst(Op1.fr, NonPoisonOps...)
4313 auto *OrigOp = OrigFI.getOperand(i_nocapture: 0);
4314 auto *OrigOpInst = dyn_cast<Instruction>(Val: OrigOp);
4315
4316 // While we could change the other users of OrigOp to use freeze(OrigOp), that
4317 // potentially reduces their optimization potential, so let's only do this iff
4318 // the OrigOp is only used by the freeze.
4319 if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(Val: OrigOp))
4320 return nullptr;
4321
4322 // We can't push the freeze through an instruction which can itself create
4323 // poison. If the only source of new poison is flags, we can simply
4324 // strip them (since we know the only use is the freeze and nothing can
4325 // benefit from them.)
4326 if (canCreateUndefOrPoison(Op: cast<Operator>(Val: OrigOp),
4327 /*ConsiderFlagsAndMetadata*/ false))
4328 return nullptr;
4329
4330 // If operand is guaranteed not to be poison, there is no need to add freeze
4331 // to the operand. So we first find the operand that is not guaranteed to be
4332 // poison.
4333 Use *MaybePoisonOperand = nullptr;
4334 for (Use &U : OrigOpInst->operands()) {
4335 if (isa<MetadataAsValue>(Val: U.get()) ||
4336 isGuaranteedNotToBeUndefOrPoison(V: U.get()))
4337 continue;
4338 if (!MaybePoisonOperand)
4339 MaybePoisonOperand = &U;
4340 else
4341 return nullptr;
4342 }
4343
4344 OrigOpInst->dropPoisonGeneratingAnnotations();
4345
4346 // If all operands are guaranteed to be non-poison, we can drop freeze.
4347 if (!MaybePoisonOperand)
4348 return OrigOp;
4349
4350 Builder.SetInsertPoint(OrigOpInst);
4351 auto *FrozenMaybePoisonOperand = Builder.CreateFreeze(
4352 V: MaybePoisonOperand->get(), Name: MaybePoisonOperand->get()->getName() + ".fr");
4353
4354 replaceUse(U&: *MaybePoisonOperand, NewValue: FrozenMaybePoisonOperand);
4355 return OrigOp;
4356}
4357
4358Instruction *InstCombinerImpl::foldFreezeIntoRecurrence(FreezeInst &FI,
4359 PHINode *PN) {
4360 // Detect whether this is a recurrence with a start value and some number of
4361 // backedge values. We'll check whether we can push the freeze through the
4362 // backedge values (possibly dropping poison flags along the way) until we
4363 // reach the phi again. In that case, we can move the freeze to the start
4364 // value.
4365 Use *StartU = nullptr;
4366 SmallVector<Value *> Worklist;
4367 for (Use &U : PN->incoming_values()) {
4368 if (DT.dominates(A: PN->getParent(), B: PN->getIncomingBlock(U))) {
4369 // Add backedge value to worklist.
4370 Worklist.push_back(Elt: U.get());
4371 continue;
4372 }
4373
4374 // Don't bother handling multiple start values.
4375 if (StartU)
4376 return nullptr;
4377 StartU = &U;
4378 }
4379
4380 if (!StartU || Worklist.empty())
4381 return nullptr; // Not a recurrence.
4382
4383 Value *StartV = StartU->get();
4384 BasicBlock *StartBB = PN->getIncomingBlock(U: *StartU);
4385 bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(V: StartV);
4386 // We can't insert freeze if the start value is the result of the
4387 // terminator (e.g. an invoke).
4388 if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
4389 return nullptr;
4390
4391 SmallPtrSet<Value *, 32> Visited;
4392 SmallVector<Instruction *> DropFlags;
4393 while (!Worklist.empty()) {
4394 Value *V = Worklist.pop_back_val();
4395 if (!Visited.insert(Ptr: V).second)
4396 continue;
4397
4398 if (Visited.size() > 32)
4399 return nullptr; // Limit the total number of values we inspect.
4400
4401 // Assume that PN is non-poison, because it will be after the transform.
4402 if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
4403 continue;
4404
4405 Instruction *I = dyn_cast<Instruction>(Val: V);
4406 if (!I || canCreateUndefOrPoison(Op: cast<Operator>(Val: I),
4407 /*ConsiderFlagsAndMetadata*/ false))
4408 return nullptr;
4409
4410 DropFlags.push_back(Elt: I);
4411 append_range(C&: Worklist, R: I->operands());
4412 }
4413
4414 for (Instruction *I : DropFlags)
4415 I->dropPoisonGeneratingAnnotations();
4416
4417 if (StartNeedsFreeze) {
4418 Builder.SetInsertPoint(StartBB->getTerminator());
4419 Value *FrozenStartV = Builder.CreateFreeze(V: StartV,
4420 Name: StartV->getName() + ".fr");
4421 replaceUse(U&: *StartU, NewValue: FrozenStartV);
4422 }
4423 return replaceInstUsesWith(I&: FI, V: PN);
4424}
4425
4426bool InstCombinerImpl::freezeOtherUses(FreezeInst &FI) {
4427 Value *Op = FI.getOperand(i_nocapture: 0);
4428
4429 if (isa<Constant>(Val: Op) || Op->hasOneUse())
4430 return false;
4431
4432 // Move the freeze directly after the definition of its operand, so that
4433 // it dominates the maximum number of uses. Note that it may not dominate
4434 // *all* uses if the operand is an invoke/callbr and the use is in a phi on
4435 // the normal/default destination. This is why the domination check in the
4436 // replacement below is still necessary.
4437 BasicBlock::iterator MoveBefore;
4438 if (isa<Argument>(Val: Op)) {
4439 MoveBefore =
4440 FI.getFunction()->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();
4441 } else {
4442 auto MoveBeforeOpt = cast<Instruction>(Val: Op)->getInsertionPointAfterDef();
4443 if (!MoveBeforeOpt)
4444 return false;
4445 MoveBefore = *MoveBeforeOpt;
4446 }
4447
4448 // Don't move to the position of a debug intrinsic.
4449 if (isa<DbgInfoIntrinsic>(Val: MoveBefore))
4450 MoveBefore = MoveBefore->getNextNonDebugInstruction()->getIterator();
4451 // Re-point iterator to come after any debug-info records, if we're
4452 // running in "RemoveDIs" mode
4453 MoveBefore.setHeadBit(false);
4454
4455 bool Changed = false;
4456 if (&FI != &*MoveBefore) {
4457 FI.moveBefore(BB&: *MoveBefore->getParent(), I: MoveBefore);
4458 Changed = true;
4459 }
4460
4461 Op->replaceUsesWithIf(New: &FI, ShouldReplace: [&](Use &U) -> bool {
4462 bool Dominates = DT.dominates(Def: &FI, U);
4463 Changed |= Dominates;
4464 return Dominates;
4465 });
4466
4467 return Changed;
4468}
4469
4470// Check if any direct or bitcast user of this value is a shuffle instruction.
4471static bool isUsedWithinShuffleVector(Value *V) {
4472 for (auto *U : V->users()) {
4473 if (isa<ShuffleVectorInst>(Val: U))
4474 return true;
4475 else if (match(V: U, P: m_BitCast(Op: m_Specific(V))) && isUsedWithinShuffleVector(V: U))
4476 return true;
4477 }
4478 return false;
4479}
4480
4481Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {
4482 Value *Op0 = I.getOperand(i_nocapture: 0);
4483
4484 if (Value *V = simplifyFreezeInst(Op: Op0, Q: SQ.getWithInstruction(I: &I)))
4485 return replaceInstUsesWith(I, V);
4486
4487 // freeze (phi const, x) --> phi const, (freeze x)
4488 if (auto *PN = dyn_cast<PHINode>(Val: Op0)) {
4489 if (Instruction *NV = foldOpIntoPhi(I, PN))
4490 return NV;
4491 if (Instruction *NV = foldFreezeIntoRecurrence(FI&: I, PN))
4492 return NV;
4493 }
4494
4495 if (Value *NI = pushFreezeToPreventPoisonFromPropagating(OrigFI&: I))
4496 return replaceInstUsesWith(I, V: NI);
4497
4498 // If I is freeze(undef), check its uses and fold it to a fixed constant.
4499 // - or: pick -1
4500 // - select's condition: if the true value is constant, choose it by making
4501 // the condition true.
4502 // - default: pick 0
4503 //
4504 // Note that this transform is intentionally done here rather than
4505 // via an analysis in InstSimplify or at individual user sites. That is
4506 // because we must produce the same value for all uses of the freeze -
4507 // it's the reason "freeze" exists!
4508 //
4509 // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
4510 // duplicating logic for binops at least.
4511 auto getUndefReplacement = [&I](Type *Ty) {
4512 Constant *BestValue = nullptr;
4513 Constant *NullValue = Constant::getNullValue(Ty);
4514 for (const auto *U : I.users()) {
4515 Constant *C = NullValue;
4516 if (match(V: U, P: m_Or(L: m_Value(), R: m_Value())))
4517 C = ConstantInt::getAllOnesValue(Ty);
4518 else if (match(V: U, P: m_Select(C: m_Specific(V: &I), L: m_Constant(), R: m_Value())))
4519 C = ConstantInt::getTrue(Ty);
4520
4521 if (!BestValue)
4522 BestValue = C;
4523 else if (BestValue != C)
4524 BestValue = NullValue;
4525 }
4526 assert(BestValue && "Must have at least one use");
4527 return BestValue;
4528 };
4529
4530 if (match(V: Op0, P: m_Undef())) {
4531 // Don't fold freeze(undef/poison) if it's used as a vector operand in
4532 // a shuffle. This may improve codegen for shuffles that allow
4533 // unspecified inputs.
4534 if (isUsedWithinShuffleVector(V: &I))
4535 return nullptr;
4536 return replaceInstUsesWith(I, V: getUndefReplacement(I.getType()));
4537 }
4538
4539 Constant *C;
4540 if (match(V: Op0, P: m_Constant(C)) && C->containsUndefOrPoisonElement()) {
4541 Constant *ReplaceC = getUndefReplacement(I.getType()->getScalarType());
4542 return replaceInstUsesWith(I, V: Constant::replaceUndefsWith(C, Replacement: ReplaceC));
4543 }
4544
4545 // Replace uses of Op with freeze(Op).
4546 if (freezeOtherUses(FI&: I))
4547 return &I;
4548
4549 return nullptr;
4550}
4551
4552/// Check for case where the call writes to an otherwise dead alloca. This
4553/// shows up for unused out-params in idiomatic C/C++ code. Note that this
4554/// helper *only* analyzes the write; doesn't check any other legality aspect.
4555static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI) {
4556 auto *CB = dyn_cast<CallBase>(Val: I);
4557 if (!CB)
4558 // TODO: handle e.g. store to alloca here - only worth doing if we extend
4559 // to allow reload along used path as described below. Otherwise, this
4560 // is simply a store to a dead allocation which will be removed.
4561 return false;
4562 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CI: CB, TLI);
4563 if (!Dest)
4564 return false;
4565 auto *AI = dyn_cast<AllocaInst>(Val: getUnderlyingObject(V: Dest->Ptr));
4566 if (!AI)
4567 // TODO: allow malloc?
4568 return false;
4569 // TODO: allow memory access dominated by move point? Note that since AI
4570 // could have a reference to itself captured by the call, we would need to
4571 // account for cycles in doing so.
4572 SmallVector<const User *> AllocaUsers;
4573 SmallPtrSet<const User *, 4> Visited;
4574 auto pushUsers = [&](const Instruction &I) {
4575 for (const User *U : I.users()) {
4576 if (Visited.insert(Ptr: U).second)
4577 AllocaUsers.push_back(Elt: U);
4578 }
4579 };
4580 pushUsers(*AI);
4581 while (!AllocaUsers.empty()) {
4582 auto *UserI = cast<Instruction>(Val: AllocaUsers.pop_back_val());
4583 if (isa<BitCastInst>(Val: UserI) || isa<GetElementPtrInst>(Val: UserI) ||
4584 isa<AddrSpaceCastInst>(Val: UserI)) {
4585 pushUsers(*UserI);
4586 continue;
4587 }
4588 if (UserI == CB)
4589 continue;
4590 // TODO: support lifetime.start/end here
4591 return false;
4592 }
4593 return true;
4594}
4595
4596/// Try to move the specified instruction from its current block into the
4597/// beginning of DestBlock, which can only happen if it's safe to move the
4598/// instruction past all of the instructions between it and the end of its
4599/// block.
4600bool InstCombinerImpl::tryToSinkInstruction(Instruction *I,
4601 BasicBlock *DestBlock) {
4602 BasicBlock *SrcBlock = I->getParent();
4603
4604 // Cannot move control-flow-involving, volatile loads, vaarg, etc.
4605 if (isa<PHINode>(Val: I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
4606 I->isTerminator())
4607 return false;
4608
4609 // Do not sink static or dynamic alloca instructions. Static allocas must
4610 // remain in the entry block, and dynamic allocas must not be sunk in between
4611 // a stacksave / stackrestore pair, which would incorrectly shorten its
4612 // lifetime.
4613 if (isa<AllocaInst>(Val: I))
4614 return false;
4615
4616 // Do not sink into catchswitch blocks.
4617 if (isa<CatchSwitchInst>(Val: DestBlock->getTerminator()))
4618 return false;
4619
4620 // Do not sink convergent call instructions.
4621 if (auto *CI = dyn_cast<CallInst>(Val: I)) {
4622 if (CI->isConvergent())
4623 return false;
4624 }
4625
4626 // Unless we can prove that the memory write isn't visibile except on the
4627 // path we're sinking to, we must bail.
4628 if (I->mayWriteToMemory()) {
4629 if (!SoleWriteToDeadLocal(I, TLI))
4630 return false;
4631 }
4632
4633 // We can only sink load instructions if there is nothing between the load and
4634 // the end of block that could change the value.
4635 if (I->mayReadFromMemory()) {
4636 // We don't want to do any sophisticated alias analysis, so we only check
4637 // the instructions after I in I's parent block if we try to sink to its
4638 // successor block.
4639 if (DestBlock->getUniquePredecessor() != I->getParent())
4640 return false;
4641 for (BasicBlock::iterator Scan = std::next(x: I->getIterator()),
4642 E = I->getParent()->end();
4643 Scan != E; ++Scan)
4644 if (Scan->mayWriteToMemory())
4645 return false;
4646 }
4647
4648 I->dropDroppableUses(ShouldDrop: [&](const Use *U) {
4649 auto *I = dyn_cast<Instruction>(Val: U->getUser());
4650 if (I && I->getParent() != DestBlock) {
4651 Worklist.add(I);
4652 return true;
4653 }
4654 return false;
4655 });
4656 /// FIXME: We could remove droppable uses that are not dominated by
4657 /// the new position.
4658
4659 BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
4660 I->moveBefore(BB&: *DestBlock, I: InsertPos);
4661 ++NumSunkInst;
4662
4663 // Also sink all related debug uses from the source basic block. Otherwise we
4664 // get debug use before the def. Attempt to salvage debug uses first, to
4665 // maximise the range variables have location for. If we cannot salvage, then
4666 // mark the location undef: we know it was supposed to receive a new location
4667 // here, but that computation has been sunk.
4668 SmallVector<DbgVariableIntrinsic *, 2> DbgUsers;
4669 SmallVector<DbgVariableRecord *, 2> DbgVariableRecords;
4670 findDbgUsers(DbgInsts&: DbgUsers, V: I, DbgVariableRecords: &DbgVariableRecords);
4671 if (!DbgUsers.empty())
4672 tryToSinkInstructionDbgValues(I, InsertPos, SrcBlock, DestBlock, DbgUsers);
4673 if (!DbgVariableRecords.empty())
4674 tryToSinkInstructionDbgVariableRecords(I, InsertPos, SrcBlock, DestBlock,
4675 DPUsers&: DbgVariableRecords);
4676
4677 // PS: there are numerous flaws with this behaviour, not least that right now
4678 // assignments can be re-ordered past other assignments to the same variable
4679 // if they use different Values. Creating more undef assignements can never be
4680 // undone. And salvaging all users outside of this block can un-necessarily
4681 // alter the lifetime of the live-value that the variable refers to.
4682 // Some of these things can be resolved by tolerating debug use-before-defs in
4683 // LLVM-IR, however it depends on the instruction-referencing CodeGen backend
4684 // being used for more architectures.
4685
4686 return true;
4687}
4688
4689void InstCombinerImpl::tryToSinkInstructionDbgValues(
4690 Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
4691 BasicBlock *DestBlock, SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers) {
4692 // For all debug values in the destination block, the sunk instruction
4693 // will still be available, so they do not need to be dropped.
4694 SmallVector<DbgVariableIntrinsic *, 2> DbgUsersToSalvage;
4695 for (auto &DbgUser : DbgUsers)
4696 if (DbgUser->getParent() != DestBlock)
4697 DbgUsersToSalvage.push_back(Elt: DbgUser);
4698
4699 // Process the sinking DbgUsersToSalvage in reverse order, as we only want
4700 // to clone the last appearing debug intrinsic for each given variable.
4701 SmallVector<DbgVariableIntrinsic *, 2> DbgUsersToSink;
4702 for (DbgVariableIntrinsic *DVI : DbgUsersToSalvage)
4703 if (DVI->getParent() == SrcBlock)
4704 DbgUsersToSink.push_back(Elt: DVI);
4705 llvm::sort(C&: DbgUsersToSink,
4706 Comp: [](auto *A, auto *B) { return B->comesBefore(A); });
4707
4708 SmallVector<DbgVariableIntrinsic *, 2> DIIClones;
4709 SmallSet<DebugVariable, 4> SunkVariables;
4710 for (auto *User : DbgUsersToSink) {
4711 // A dbg.declare instruction should not be cloned, since there can only be
4712 // one per variable fragment. It should be left in the original place
4713 // because the sunk instruction is not an alloca (otherwise we could not be
4714 // here).
4715 if (isa<DbgDeclareInst>(Val: User))
4716 continue;
4717
4718 DebugVariable DbgUserVariable =
4719 DebugVariable(User->getVariable(), User->getExpression(),
4720 User->getDebugLoc()->getInlinedAt());
4721
4722 if (!SunkVariables.insert(V: DbgUserVariable).second)
4723 continue;
4724
4725 // Leave dbg.assign intrinsics in their original positions and there should
4726 // be no need to insert a clone.
4727 if (isa<DbgAssignIntrinsic>(Val: User))
4728 continue;
4729
4730 DIIClones.emplace_back(Args: cast<DbgVariableIntrinsic>(Val: User->clone()));
4731 if (isa<DbgDeclareInst>(Val: User) && isa<CastInst>(Val: I))
4732 DIIClones.back()->replaceVariableLocationOp(OldValue: I, NewValue: I->getOperand(i: 0));
4733 LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones.back() << '\n');
4734 }
4735
4736 // Perform salvaging without the clones, then sink the clones.
4737 if (!DIIClones.empty()) {
4738 salvageDebugInfoForDbgValues(I&: *I, Insns: DbgUsersToSalvage, DPInsns: {});
4739 // The clones are in reverse order of original appearance, reverse again to
4740 // maintain the original order.
4741 for (auto &DIIClone : llvm::reverse(C&: DIIClones)) {
4742 DIIClone->insertBefore(InsertPos: &*InsertPos);
4743 LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << '\n');
4744 }
4745 }
4746}
4747
4748void InstCombinerImpl::tryToSinkInstructionDbgVariableRecords(
4749 Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
4750 BasicBlock *DestBlock,
4751 SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords) {
4752 // Implementation of tryToSinkInstructionDbgValues, but for the
4753 // DbgVariableRecord of variable assignments rather than dbg.values.
4754
4755 // Fetch all DbgVariableRecords not already in the destination.
4756 SmallVector<DbgVariableRecord *, 2> DbgVariableRecordsToSalvage;
4757 for (auto &DVR : DbgVariableRecords)
4758 if (DVR->getParent() != DestBlock)
4759 DbgVariableRecordsToSalvage.push_back(Elt: DVR);
4760
4761 // Fetch a second collection, of DbgVariableRecords in the source block that
4762 // we're going to sink.
4763 SmallVector<DbgVariableRecord *> DbgVariableRecordsToSink;
4764 for (DbgVariableRecord *DVR : DbgVariableRecordsToSalvage)
4765 if (DVR->getParent() == SrcBlock)
4766 DbgVariableRecordsToSink.push_back(Elt: DVR);
4767
4768 // Sort DbgVariableRecords according to their position in the block. This is a
4769 // partial order: DbgVariableRecords attached to different instructions will
4770 // be ordered by the instruction order, but DbgVariableRecords attached to the
4771 // same instruction won't have an order.
4772 auto Order = [](DbgVariableRecord *A, DbgVariableRecord *B) -> bool {
4773 return B->getInstruction()->comesBefore(Other: A->getInstruction());
4774 };
4775 llvm::stable_sort(Range&: DbgVariableRecordsToSink, C: Order);
4776
4777 // If there are two assignments to the same variable attached to the same
4778 // instruction, the ordering between the two assignments is important. Scan
4779 // for this (rare) case and establish which is the last assignment.
4780 using InstVarPair = std::pair<const Instruction *, DebugVariable>;
4781 SmallDenseMap<InstVarPair, DbgVariableRecord *> FilterOutMap;
4782 if (DbgVariableRecordsToSink.size() > 1) {
4783 SmallDenseMap<InstVarPair, unsigned> CountMap;
4784 // Count how many assignments to each variable there is per instruction.
4785 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
4786 DebugVariable DbgUserVariable =
4787 DebugVariable(DVR->getVariable(), DVR->getExpression(),
4788 DVR->getDebugLoc()->getInlinedAt());
4789 CountMap[std::make_pair(x: DVR->getInstruction(), y&: DbgUserVariable)] += 1;
4790 }
4791
4792 // If there are any instructions with two assignments, add them to the
4793 // FilterOutMap to record that they need extra filtering.
4794 SmallPtrSet<const Instruction *, 4> DupSet;
4795 for (auto It : CountMap) {
4796 if (It.second > 1) {
4797 FilterOutMap[It.first] = nullptr;
4798 DupSet.insert(Ptr: It.first.first);
4799 }
4800 }
4801
4802 // For all instruction/variable pairs needing extra filtering, find the
4803 // latest assignment.
4804 for (const Instruction *Inst : DupSet) {
4805 for (DbgVariableRecord &DVR :
4806 llvm::reverse(C: filterDbgVars(R: Inst->getDbgRecordRange()))) {
4807 DebugVariable DbgUserVariable =
4808 DebugVariable(DVR.getVariable(), DVR.getExpression(),
4809 DVR.getDebugLoc()->getInlinedAt());
4810 auto FilterIt =
4811 FilterOutMap.find(Val: std::make_pair(x&: Inst, y&: DbgUserVariable));
4812 if (FilterIt == FilterOutMap.end())
4813 continue;
4814 if (FilterIt->second != nullptr)
4815 continue;
4816 FilterIt->second = &DVR;
4817 }
4818 }
4819 }
4820
4821 // Perform cloning of the DbgVariableRecords that we plan on sinking, filter
4822 // out any duplicate assignments identified above.
4823 SmallVector<DbgVariableRecord *, 2> DVRClones;
4824 SmallSet<DebugVariable, 4> SunkVariables;
4825 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
4826 if (DVR->Type == DbgVariableRecord::LocationType::Declare)
4827 continue;
4828
4829 DebugVariable DbgUserVariable =
4830 DebugVariable(DVR->getVariable(), DVR->getExpression(),
4831 DVR->getDebugLoc()->getInlinedAt());
4832
4833 // For any variable where there were multiple assignments in the same place,
4834 // ignore all but the last assignment.
4835 if (!FilterOutMap.empty()) {
4836 InstVarPair IVP = std::make_pair(x: DVR->getInstruction(), y&: DbgUserVariable);
4837 auto It = FilterOutMap.find(Val: IVP);
4838
4839 // Filter out.
4840 if (It != FilterOutMap.end() && It->second != DVR)
4841 continue;
4842 }
4843
4844 if (!SunkVariables.insert(V: DbgUserVariable).second)
4845 continue;
4846
4847 if (DVR->isDbgAssign())
4848 continue;
4849
4850 DVRClones.emplace_back(Args: DVR->clone());
4851 LLVM_DEBUG(dbgs() << "CLONE: " << *DVRClones.back() << '\n');
4852 }
4853
4854 // Perform salvaging without the clones, then sink the clones.
4855 if (DVRClones.empty())
4856 return;
4857
4858 salvageDebugInfoForDbgValues(I&: *I, Insns: {}, DPInsns: DbgVariableRecordsToSalvage);
4859
4860 // The clones are in reverse order of original appearance. Assert that the
4861 // head bit is set on the iterator as we _should_ have received it via
4862 // getFirstInsertionPt. Inserting like this will reverse the clone order as
4863 // we'll repeatedly insert at the head, such as:
4864 // DVR-3 (third insertion goes here)
4865 // DVR-2 (second insertion goes here)
4866 // DVR-1 (first insertion goes here)
4867 // Any-Prior-DVRs
4868 // InsertPtInst
4869 assert(InsertPos.getHeadBit());
4870 for (DbgVariableRecord *DVRClone : DVRClones) {
4871 InsertPos->getParent()->insertDbgRecordBefore(DR: DVRClone, Here: InsertPos);
4872 LLVM_DEBUG(dbgs() << "SINK: " << *DVRClone << '\n');
4873 }
4874}
4875
4876bool InstCombinerImpl::run() {
4877 while (!Worklist.isEmpty()) {
4878 // Walk deferred instructions in reverse order, and push them to the
4879 // worklist, which means they'll end up popped from the worklist in-order.
4880 while (Instruction *I = Worklist.popDeferred()) {
4881 // Check to see if we can DCE the instruction. We do this already here to
4882 // reduce the number of uses and thus allow other folds to trigger.
4883 // Note that eraseInstFromFunction() may push additional instructions on
4884 // the deferred worklist, so this will DCE whole instruction chains.
4885 if (isInstructionTriviallyDead(I, TLI: &TLI)) {
4886 eraseInstFromFunction(I&: *I);
4887 ++NumDeadInst;
4888 continue;
4889 }
4890
4891 Worklist.push(I);
4892 }
4893
4894 Instruction *I = Worklist.removeOne();
4895 if (I == nullptr) continue; // skip null values.
4896
4897 // Check to see if we can DCE the instruction.
4898 if (isInstructionTriviallyDead(I, TLI: &TLI)) {
4899 eraseInstFromFunction(I&: *I);
4900 ++NumDeadInst;
4901 continue;
4902 }
4903
4904 if (!DebugCounter::shouldExecute(CounterName: VisitCounter))
4905 continue;
4906
4907 // See if we can trivially sink this instruction to its user if we can
4908 // prove that the successor is not executed more frequently than our block.
4909 // Return the UserBlock if successful.
4910 auto getOptionalSinkBlockForInst =
4911 [this](Instruction *I) -> std::optional<BasicBlock *> {
4912 if (!EnableCodeSinking)
4913 return std::nullopt;
4914
4915 BasicBlock *BB = I->getParent();
4916 BasicBlock *UserParent = nullptr;
4917 unsigned NumUsers = 0;
4918
4919 for (auto *U : I->users()) {
4920 if (U->isDroppable())
4921 continue;
4922 if (NumUsers > MaxSinkNumUsers)
4923 return std::nullopt;
4924
4925 Instruction *UserInst = cast<Instruction>(Val: U);
4926 // Special handling for Phi nodes - get the block the use occurs in.
4927 if (PHINode *PN = dyn_cast<PHINode>(Val: UserInst)) {
4928 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
4929 if (PN->getIncomingValue(i) == I) {
4930 // Bail out if we have uses in different blocks. We don't do any
4931 // sophisticated analysis (i.e finding NearestCommonDominator of
4932 // these use blocks).
4933 if (UserParent && UserParent != PN->getIncomingBlock(i))
4934 return std::nullopt;
4935 UserParent = PN->getIncomingBlock(i);
4936 }
4937 }
4938 assert(UserParent && "expected to find user block!");
4939 } else {
4940 if (UserParent && UserParent != UserInst->getParent())
4941 return std::nullopt;
4942 UserParent = UserInst->getParent();
4943 }
4944
4945 // Make sure these checks are done only once, naturally we do the checks
4946 // the first time we get the userparent, this will save compile time.
4947 if (NumUsers == 0) {
4948 // Try sinking to another block. If that block is unreachable, then do
4949 // not bother. SimplifyCFG should handle it.
4950 if (UserParent == BB || !DT.isReachableFromEntry(A: UserParent))
4951 return std::nullopt;
4952
4953 auto *Term = UserParent->getTerminator();
4954 // See if the user is one of our successors that has only one
4955 // predecessor, so that we don't have to split the critical edge.
4956 // Another option where we can sink is a block that ends with a
4957 // terminator that does not pass control to other block (such as
4958 // return or unreachable or resume). In this case:
4959 // - I dominates the User (by SSA form);
4960 // - the User will be executed at most once.
4961 // So sinking I down to User is always profitable or neutral.
4962 if (UserParent->getUniquePredecessor() != BB && !succ_empty(I: Term))
4963 return std::nullopt;
4964
4965 assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
4966 }
4967
4968 NumUsers++;
4969 }
4970
4971 // No user or only has droppable users.
4972 if (!UserParent)
4973 return std::nullopt;
4974
4975 return UserParent;
4976 };
4977
4978 auto OptBB = getOptionalSinkBlockForInst(I);
4979 if (OptBB) {
4980 auto *UserParent = *OptBB;
4981 // Okay, the CFG is simple enough, try to sink this instruction.
4982 if (tryToSinkInstruction(I, DestBlock: UserParent)) {
4983 LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
4984 MadeIRChange = true;
4985 // We'll add uses of the sunk instruction below, but since
4986 // sinking can expose opportunities for it's *operands* add
4987 // them to the worklist
4988 for (Use &U : I->operands())
4989 if (Instruction *OpI = dyn_cast<Instruction>(Val: U.get()))
4990 Worklist.push(I: OpI);
4991 }
4992 }
4993
4994 // Now that we have an instruction, try combining it to simplify it.
4995 Builder.SetInsertPoint(I);
4996 Builder.CollectMetadataToCopy(
4997 Src: I, MetadataKinds: {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
4998
4999#ifndef NDEBUG
5000 std::string OrigI;
5001#endif
5002 LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
5003 LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
5004
5005 if (Instruction *Result = visit(I&: *I)) {
5006 ++NumCombined;
5007 // Should we replace the old instruction with a new one?
5008 if (Result != I) {
5009 LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
5010 << " New = " << *Result << '\n');
5011
5012 Result->copyMetadata(SrcInst: *I,
5013 WL: {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
5014 // Everything uses the new instruction now.
5015 I->replaceAllUsesWith(V: Result);
5016
5017 // Move the name to the new instruction first.
5018 Result->takeName(V: I);
5019
5020 // Insert the new instruction into the basic block...
5021 BasicBlock *InstParent = I->getParent();
5022 BasicBlock::iterator InsertPos = I->getIterator();
5023
5024 // Are we replace a PHI with something that isn't a PHI, or vice versa?
5025 if (isa<PHINode>(Val: Result) != isa<PHINode>(Val: I)) {
5026 // We need to fix up the insertion point.
5027 if (isa<PHINode>(Val: I)) // PHI -> Non-PHI
5028 InsertPos = InstParent->getFirstInsertionPt();
5029 else // Non-PHI -> PHI
5030 InsertPos = InstParent->getFirstNonPHIIt();
5031 }
5032
5033 Result->insertInto(ParentBB: InstParent, It: InsertPos);
5034
5035 // Push the new instruction and any users onto the worklist.
5036 Worklist.pushUsersToWorkList(I&: *Result);
5037 Worklist.push(I: Result);
5038
5039 eraseInstFromFunction(I&: *I);
5040 } else {
5041 LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
5042 << " New = " << *I << '\n');
5043
5044 // If the instruction was modified, it's possible that it is now dead.
5045 // if so, remove it.
5046 if (isInstructionTriviallyDead(I, TLI: &TLI)) {
5047 eraseInstFromFunction(I&: *I);
5048 } else {
5049 Worklist.pushUsersToWorkList(I&: *I);
5050 Worklist.push(I);
5051 }
5052 }
5053 MadeIRChange = true;
5054 }
5055 }
5056
5057 Worklist.zap();
5058 return MadeIRChange;
5059}
5060
5061// Track the scopes used by !alias.scope and !noalias. In a function, a
5062// @llvm.experimental.noalias.scope.decl is only useful if that scope is used
5063// by both sets. If not, the declaration of the scope can be safely omitted.
5064// The MDNode of the scope can be omitted as well for the instructions that are
5065// part of this function. We do not do that at this point, as this might become
5066// too time consuming to do.
5067class AliasScopeTracker {
5068 SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
5069 SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
5070
5071public:
5072 void analyse(Instruction *I) {
5073 // This seems to be faster than checking 'mayReadOrWriteMemory()'.
5074 if (!I->hasMetadataOtherThanDebugLoc())
5075 return;
5076
5077 auto Track = [](Metadata *ScopeList, auto &Container) {
5078 const auto *MDScopeList = dyn_cast_or_null<MDNode>(Val: ScopeList);
5079 if (!MDScopeList || !Container.insert(MDScopeList).second)
5080 return;
5081 for (const auto &MDOperand : MDScopeList->operands())
5082 if (auto *MDScope = dyn_cast<MDNode>(Val: MDOperand))
5083 Container.insert(MDScope);
5084 };
5085
5086 Track(I->getMetadata(KindID: LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
5087 Track(I->getMetadata(KindID: LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
5088 }
5089
5090 bool isNoAliasScopeDeclDead(Instruction *Inst) {
5091 NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: Inst);
5092 if (!Decl)
5093 return false;
5094
5095 assert(Decl->use_empty() &&
5096 "llvm.experimental.noalias.scope.decl in use ?");
5097 const MDNode *MDSL = Decl->getScopeList();
5098 assert(MDSL->getNumOperands() == 1 &&
5099 "llvm.experimental.noalias.scope should refer to a single scope");
5100 auto &MDOperand = MDSL->getOperand(I: 0);
5101 if (auto *MD = dyn_cast<MDNode>(Val: MDOperand))
5102 return !UsedAliasScopesAndLists.contains(Ptr: MD) ||
5103 !UsedNoAliasScopesAndLists.contains(Ptr: MD);
5104
5105 // Not an MDNode ? throw away.
5106 return true;
5107 }
5108};
5109
5110/// Populate the IC worklist from a function, by walking it in reverse
5111/// post-order and adding all reachable code to the worklist.
5112///
5113/// This has a couple of tricks to make the code faster and more powerful. In
5114/// particular, we constant fold and DCE instructions as we go, to avoid adding
5115/// them to the worklist (this significantly speeds up instcombine on code where
5116/// many instructions are dead or constant). Additionally, if we find a branch
5117/// whose condition is a known constant, we only visit the reachable successors.
5118bool InstCombinerImpl::prepareWorklist(
5119 Function &F, ReversePostOrderTraversal<BasicBlock *> &RPOT) {
5120 bool MadeIRChange = false;
5121 SmallPtrSet<BasicBlock *, 32> LiveBlocks;
5122 SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
5123 DenseMap<Constant *, Constant *> FoldedConstants;
5124 AliasScopeTracker SeenAliasScopes;
5125
5126 auto HandleOnlyLiveSuccessor = [&](BasicBlock *BB, BasicBlock *LiveSucc) {
5127 for (BasicBlock *Succ : successors(BB))
5128 if (Succ != LiveSucc && DeadEdges.insert(V: {BB, Succ}).second)
5129 for (PHINode &PN : Succ->phis())
5130 for (Use &U : PN.incoming_values())
5131 if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(Val: U)) {
5132 U.set(PoisonValue::get(T: PN.getType()));
5133 MadeIRChange = true;
5134 }
5135 };
5136
5137 for (BasicBlock *BB : RPOT) {
5138 if (!BB->isEntryBlock() && all_of(Range: predecessors(BB), P: [&](BasicBlock *Pred) {
5139 return DeadEdges.contains(V: {Pred, BB}) || DT.dominates(A: BB, B: Pred);
5140 })) {
5141 HandleOnlyLiveSuccessor(BB, nullptr);
5142 continue;
5143 }
5144 LiveBlocks.insert(Ptr: BB);
5145
5146 for (Instruction &Inst : llvm::make_early_inc_range(Range&: *BB)) {
5147 // ConstantProp instruction if trivially constant.
5148 if (!Inst.use_empty() &&
5149 (Inst.getNumOperands() == 0 || isa<Constant>(Val: Inst.getOperand(i: 0))))
5150 if (Constant *C = ConstantFoldInstruction(I: &Inst, DL, TLI: &TLI)) {
5151 LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
5152 << '\n');
5153 Inst.replaceAllUsesWith(V: C);
5154 ++NumConstProp;
5155 if (isInstructionTriviallyDead(I: &Inst, TLI: &TLI))
5156 Inst.eraseFromParent();
5157 MadeIRChange = true;
5158 continue;
5159 }
5160
5161 // See if we can constant fold its operands.
5162 for (Use &U : Inst.operands()) {
5163 if (!isa<ConstantVector>(Val: U) && !isa<ConstantExpr>(Val: U))
5164 continue;
5165
5166 auto *C = cast<Constant>(Val&: U);
5167 Constant *&FoldRes = FoldedConstants[C];
5168 if (!FoldRes)
5169 FoldRes = ConstantFoldConstant(C, DL, TLI: &TLI);
5170
5171 if (FoldRes != C) {
5172 LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
5173 << "\n Old = " << *C
5174 << "\n New = " << *FoldRes << '\n');
5175 U = FoldRes;
5176 MadeIRChange = true;
5177 }
5178 }
5179
5180 // Skip processing debug and pseudo intrinsics in InstCombine. Processing
5181 // these call instructions consumes non-trivial amount of time and
5182 // provides no value for the optimization.
5183 if (!Inst.isDebugOrPseudoInst()) {
5184 InstrsForInstructionWorklist.push_back(Elt: &Inst);
5185 SeenAliasScopes.analyse(I: &Inst);
5186 }
5187 }
5188
5189 // If this is a branch or switch on a constant, mark only the single
5190 // live successor. Otherwise assume all successors are live.
5191 Instruction *TI = BB->getTerminator();
5192 if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI); BI && BI->isConditional()) {
5193 if (isa<UndefValue>(Val: BI->getCondition())) {
5194 // Branch on undef is UB.
5195 HandleOnlyLiveSuccessor(BB, nullptr);
5196 continue;
5197 }
5198 if (auto *Cond = dyn_cast<ConstantInt>(Val: BI->getCondition())) {
5199 bool CondVal = Cond->getZExtValue();
5200 HandleOnlyLiveSuccessor(BB, BI->getSuccessor(i: !CondVal));
5201 continue;
5202 }
5203 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
5204 if (isa<UndefValue>(Val: SI->getCondition())) {
5205 // Switch on undef is UB.
5206 HandleOnlyLiveSuccessor(BB, nullptr);
5207 continue;
5208 }
5209 if (auto *Cond = dyn_cast<ConstantInt>(Val: SI->getCondition())) {
5210 HandleOnlyLiveSuccessor(BB,
5211 SI->findCaseValue(C: Cond)->getCaseSuccessor());
5212 continue;
5213 }
5214 }
5215 }
5216
5217 // Remove instructions inside unreachable blocks. This prevents the
5218 // instcombine code from having to deal with some bad special cases, and
5219 // reduces use counts of instructions.
5220 for (BasicBlock &BB : F) {
5221 if (LiveBlocks.count(Ptr: &BB))
5222 continue;
5223
5224 unsigned NumDeadInstInBB;
5225 unsigned NumDeadDbgInstInBB;
5226 std::tie(args&: NumDeadInstInBB, args&: NumDeadDbgInstInBB) =
5227 removeAllNonTerminatorAndEHPadInstructions(BB: &BB);
5228
5229 MadeIRChange |= NumDeadInstInBB + NumDeadDbgInstInBB > 0;
5230 NumDeadInst += NumDeadInstInBB;
5231 }
5232
5233 // Once we've found all of the instructions to add to instcombine's worklist,
5234 // add them in reverse order. This way instcombine will visit from the top
5235 // of the function down. This jives well with the way that it adds all uses
5236 // of instructions to the worklist after doing a transformation, thus avoiding
5237 // some N^2 behavior in pathological cases.
5238 Worklist.reserve(Size: InstrsForInstructionWorklist.size());
5239 for (Instruction *Inst : reverse(C&: InstrsForInstructionWorklist)) {
5240 // DCE instruction if trivially dead. As we iterate in reverse program
5241 // order here, we will clean up whole chains of dead instructions.
5242 if (isInstructionTriviallyDead(I: Inst, TLI: &TLI) ||
5243 SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
5244 ++NumDeadInst;
5245 LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
5246 salvageDebugInfo(I&: *Inst);
5247 Inst->eraseFromParent();
5248 MadeIRChange = true;
5249 continue;
5250 }
5251
5252 Worklist.push(I: Inst);
5253 }
5254
5255 return MadeIRChange;
5256}
5257
5258static bool combineInstructionsOverFunction(
5259 Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA,
5260 AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
5261 DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
5262 BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI, LoopInfo *LI,
5263 const InstCombineOptions &Opts) {
5264 auto &DL = F.getParent()->getDataLayout();
5265
5266 /// Builder - This is an IRBuilder that automatically inserts new
5267 /// instructions into the worklist when they are created.
5268 IRBuilder<TargetFolder, IRBuilderCallbackInserter> Builder(
5269 F.getContext(), TargetFolder(DL),
5270 IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
5271 Worklist.add(I);
5272 if (auto *Assume = dyn_cast<AssumeInst>(Val: I))
5273 AC.registerAssumption(CI: Assume);
5274 }));
5275
5276 ReversePostOrderTraversal<BasicBlock *> RPOT(&F.front());
5277
5278 // Lower dbg.declare intrinsics otherwise their value may be clobbered
5279 // by instcombiner.
5280 bool MadeIRChange = false;
5281 if (ShouldLowerDbgDeclare)
5282 MadeIRChange = LowerDbgDeclare(F);
5283
5284 // Iterate while there is work to do.
5285 unsigned Iteration = 0;
5286 while (true) {
5287 ++Iteration;
5288
5289 if (Iteration > Opts.MaxIterations && !Opts.VerifyFixpoint) {
5290 LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations
5291 << " on " << F.getName()
5292 << " reached; stopping without verifying fixpoint\n");
5293 break;
5294 }
5295
5296 ++NumWorklistIterations;
5297 LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
5298 << F.getName() << "\n");
5299
5300 InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
5301 ORE, BFI, BPI, PSI, DL, LI);
5302 IC.MaxArraySizeForCombine = MaxArraySize;
5303 bool MadeChangeInThisIteration = IC.prepareWorklist(F, RPOT);
5304 MadeChangeInThisIteration |= IC.run();
5305 if (!MadeChangeInThisIteration)
5306 break;
5307
5308 MadeIRChange = true;
5309 if (Iteration > Opts.MaxIterations) {
5310 report_fatal_error(
5311 reason: "Instruction Combining did not reach a fixpoint after " +
5312 Twine(Opts.MaxIterations) + " iterations",
5313 /*GenCrashDiag=*/gen_crash_diag: false);
5314 }
5315 }
5316
5317 if (Iteration == 1)
5318 ++NumOneIteration;
5319 else if (Iteration == 2)
5320 ++NumTwoIterations;
5321 else if (Iteration == 3)
5322 ++NumThreeIterations;
5323 else
5324 ++NumFourOrMoreIterations;
5325
5326 return MadeIRChange;
5327}
5328
5329InstCombinePass::InstCombinePass(InstCombineOptions Opts) : Options(Opts) {}
5330
5331void InstCombinePass::printPipeline(
5332 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
5333 static_cast<PassInfoMixin<InstCombinePass> *>(this)->printPipeline(
5334 OS, MapClassName2PassName);
5335 OS << '<';
5336 OS << "max-iterations=" << Options.MaxIterations << ";";
5337 OS << (Options.UseLoopInfo ? "" : "no-") << "use-loop-info;";
5338 OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint";
5339 OS << '>';
5340}
5341
5342PreservedAnalyses InstCombinePass::run(Function &F,
5343 FunctionAnalysisManager &AM) {
5344 auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F);
5345 auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
5346 auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
5347 auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
5348 auto &TTI = AM.getResult<TargetIRAnalysis>(IR&: F);
5349
5350 // TODO: Only use LoopInfo when the option is set. This requires that the
5351 // callers in the pass pipeline explicitly set the option.
5352 auto *LI = AM.getCachedResult<LoopAnalysis>(IR&: F);
5353 if (!LI && Options.UseLoopInfo)
5354 LI = &AM.getResult<LoopAnalysis>(IR&: F);
5355
5356 auto *AA = &AM.getResult<AAManager>(IR&: F);
5357 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F);
5358 ProfileSummaryInfo *PSI =
5359 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(IR&: *F.getParent());
5360 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
5361 &AM.getResult<BlockFrequencyAnalysis>(IR&: F) : nullptr;
5362 auto *BPI = AM.getCachedResult<BranchProbabilityAnalysis>(IR&: F);
5363
5364 if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
5365 BFI, BPI, PSI, LI, Opts: Options))
5366 // No changes, all analyses are preserved.
5367 return PreservedAnalyses::all();
5368
5369 // Mark all the analyses that instcombine updates as preserved.
5370 PreservedAnalyses PA;
5371 PA.preserveSet<CFGAnalyses>();
5372 return PA;
5373}
5374
5375void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
5376 AU.setPreservesCFG();
5377 AU.addRequired<AAResultsWrapperPass>();
5378 AU.addRequired<AssumptionCacheTracker>();
5379 AU.addRequired<TargetLibraryInfoWrapperPass>();
5380 AU.addRequired<TargetTransformInfoWrapperPass>();
5381 AU.addRequired<DominatorTreeWrapperPass>();
5382 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
5383 AU.addPreserved<DominatorTreeWrapperPass>();
5384 AU.addPreserved<AAResultsWrapperPass>();
5385 AU.addPreserved<BasicAAWrapperPass>();
5386 AU.addPreserved<GlobalsAAWrapperPass>();
5387 AU.addRequired<ProfileSummaryInfoWrapperPass>();
5388 LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
5389}
5390
5391bool InstructionCombiningPass::runOnFunction(Function &F) {
5392 if (skipFunction(F))
5393 return false;
5394
5395 // Required analyses.
5396 auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
5397 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
5398 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
5399 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
5400 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5401 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
5402
5403 // Optional analyses.
5404 auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
5405 auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
5406 ProfileSummaryInfo *PSI =
5407 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
5408 BlockFrequencyInfo *BFI =
5409 (PSI && PSI->hasProfileSummary()) ?
5410 &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
5411 nullptr;
5412 BranchProbabilityInfo *BPI = nullptr;
5413 if (auto *WrapperPass =
5414 getAnalysisIfAvailable<BranchProbabilityInfoWrapperPass>())
5415 BPI = &WrapperPass->getBPI();
5416
5417 return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
5418 BFI, BPI, PSI, LI,
5419 Opts: InstCombineOptions());
5420}
5421
5422char InstructionCombiningPass::ID = 0;
5423
5424InstructionCombiningPass::InstructionCombiningPass() : FunctionPass(ID) {
5425 initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry());
5426}
5427
5428INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine",
5429 "Combine redundant instructions", false, false)
5430INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
5431INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
5432INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
5433INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
5434INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
5435INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
5436INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
5437INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
5438INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
5439INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
5440 "Combine redundant instructions", false, false)
5441
5442// Initialization Routines
5443void llvm::initializeInstCombine(PassRegistry &Registry) {
5444 initializeInstructionCombiningPassPass(Registry);
5445}
5446
5447FunctionPass *llvm::createInstructionCombiningPass() {
5448 return new InstructionCombiningPass();
5449}
5450

source code of llvm/lib/Transforms/InstCombine/InstructionCombining.cpp