InstructionCombining.cpp source code [llvm/lib/Transforms/InstCombine/InstructionCombining.cpp]

1	//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// InstructionCombining - Combine instructions to form fewer, simple
10	// instructions. This pass does not modify the CFG. This pass is where
11	// algebraic simplification happens.
12	//
13	// This pass combines things like:
14	// %Y = add i32 %X, 1
15	// %Z = add i32 %Y, 1
16	// into:
17	// %Z = add i32 %X, 2
18	//
19	// This is a simple worklist driven algorithm.
20	//
21	// This pass guarantees that the following canonicalizations are performed on
22	// the program:
23	// 1. If a binary operator has a constant operand, it is moved to the RHS
24	// 2. Bitwise operators with constant operands are always grouped so that
25	// shifts are performed first, then or's, then and's, then xor's.
26	// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27	// 4. All cmp instructions on boolean values are replaced with logical ops
28	// 5. add X, X is represented as (X2) => (X << 1)*
29	// 6. Multiplies with a power-of-two constant argument are transformed into
30	// shifts.
31	// ... etc.
32	//
33	//===----------------------------------------------------------------------===//
34
35	#include "InstCombineInternal.h"
36	#include "llvm/ADT/APInt.h"
37	#include "llvm/ADT/ArrayRef.h"
38	#include "llvm/ADT/DenseMap.h"
39	#include "llvm/ADT/SmallPtrSet.h"
40	#include "llvm/ADT/SmallVector.h"
41	#include "llvm/ADT/Statistic.h"
42	#include "llvm/Analysis/AliasAnalysis.h"
43	#include "llvm/Analysis/AssumptionCache.h"
44	#include "llvm/Analysis/BasicAliasAnalysis.h"
45	#include "llvm/Analysis/BlockFrequencyInfo.h"
46	#include "llvm/Analysis/CFG.h"
47	#include "llvm/Analysis/ConstantFolding.h"
48	#include "llvm/Analysis/GlobalsModRef.h"
49	#include "llvm/Analysis/InstructionSimplify.h"
50	#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
51	#include "llvm/Analysis/LoopInfo.h"
52	#include "llvm/Analysis/MemoryBuiltins.h"
53	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
54	#include "llvm/Analysis/ProfileSummaryInfo.h"
55	#include "llvm/Analysis/TargetFolder.h"
56	#include "llvm/Analysis/TargetLibraryInfo.h"
57	#include "llvm/Analysis/TargetTransformInfo.h"
58	#include "llvm/Analysis/Utils/Local.h"
59	#include "llvm/Analysis/ValueTracking.h"
60	#include "llvm/Analysis/VectorUtils.h"
61	#include "llvm/IR/BasicBlock.h"
62	#include "llvm/IR/CFG.h"
63	#include "llvm/IR/Constant.h"
64	#include "llvm/IR/Constants.h"
65	#include "llvm/IR/DIBuilder.h"
66	#include "llvm/IR/DataLayout.h"
67	#include "llvm/IR/DebugInfo.h"
68	#include "llvm/IR/DerivedTypes.h"
69	#include "llvm/IR/Dominators.h"
70	#include "llvm/IR/EHPersonalities.h"
71	#include "llvm/IR/Function.h"
72	#include "llvm/IR/GetElementPtrTypeIterator.h"
73	#include "llvm/IR/IRBuilder.h"
74	#include "llvm/IR/InstrTypes.h"
75	#include "llvm/IR/Instruction.h"
76	#include "llvm/IR/Instructions.h"
77	#include "llvm/IR/IntrinsicInst.h"
78	#include "llvm/IR/Intrinsics.h"
79	#include "llvm/IR/Metadata.h"
80	#include "llvm/IR/Operator.h"
81	#include "llvm/IR/PassManager.h"
82	#include "llvm/IR/PatternMatch.h"
83	#include "llvm/IR/Type.h"
84	#include "llvm/IR/Use.h"
85	#include "llvm/IR/User.h"
86	#include "llvm/IR/Value.h"
87	#include "llvm/IR/ValueHandle.h"
88	#include "llvm/InitializePasses.h"
89	#include "llvm/Support/Casting.h"
90	#include "llvm/Support/CommandLine.h"
91	#include "llvm/Support/Compiler.h"
92	#include "llvm/Support/Debug.h"
93	#include "llvm/Support/DebugCounter.h"
94	#include "llvm/Support/ErrorHandling.h"
95	#include "llvm/Support/KnownBits.h"
96	#include "llvm/Support/raw_ostream.h"
97	#include "llvm/Transforms/InstCombine/InstCombine.h"
98	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
99	#include "llvm/Transforms/Utils/Local.h"
100	#include <algorithm>
101	#include <cassert>
102	#include <cstdint>
103	#include <memory>
104	#include <optional>
105	#include <string>
106	#include <utility>
107
108	#define DEBUG_TYPE "instcombine"
109	#include "llvm/Transforms/Utils/InstructionWorklist.h"
110	#include <optional>
111
112	using namespace llvm;
113	using namespace llvm::PatternMatch;
114
115	STATISTIC(NumWorklistIterations,
116	"Number of instruction combining iterations performed");
117	STATISTIC(NumOneIteration, "Number of functions with one iteration");
118	STATISTIC(NumTwoIterations, "Number of functions with two iterations");
119	STATISTIC(NumThreeIterations, "Number of functions with three iterations");
120	STATISTIC(NumFourOrMoreIterations,
121	"Number of functions with four or more iterations");
122
123	STATISTIC(NumCombined , "Number of insts combined");
124	STATISTIC(NumConstProp, "Number of constant folds");
125	STATISTIC(NumDeadInst , "Number of dead inst eliminated");
126	STATISTIC(NumSunkInst , "Number of instructions sunk");
127	STATISTIC(NumExpand, "Number of expansions");
128	STATISTIC(NumFactor , "Number of factorizations");
129	STATISTIC(NumReassoc , "Number of reassociations");
130	DEBUG_COUNTER(VisitCounter, "instcombine-visit",
131	"Controls which instructions are visited");
132
133	static cl::opt<bool>
134	EnableCodeSinking("instcombine-code-sinking", cl::desc ("Enable code sinking"),
135	cl::init(Val: true));
136
137	static cl::opt<unsigned> MaxSinkNumUsers(
138	"instcombine-max-sink-users", cl::init(Val: `32`),
139	cl::desc ("Maximum number of undroppable users for instruction sinking"));
140
141	static cl::opt<unsigned>
142	MaxArraySize("instcombine-maxarray-size", cl::init(Val: `1024`),
143	cl::desc ("Maximum array size considered when doing a combine"));
144
145	// FIXME: Remove this flag when it is no longer necessary to convert
146	// llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
147	// increases variable availability at the cost of accuracy. Variables that
148	// cannot be promoted by mem2reg or SROA will be described as living in memory
149	// for their entire lifetime. However, passes like DSE and instcombine can
150	// delete stores to the alloca, leading to misleading and inaccurate debug
151	// information. This flag can be removed when those passes are fixed.
152	static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
153	cl::Hidden, cl::init(Val: true));
154
155	std::optional<Instruction *>
156	InstCombiner::targetInstCombineIntrinsic(IntrinsicInst &II) {
157	// Handle target specific intrinsics
158	if (II.getCalledFunction()->isTargetIntrinsic()) {
159	return TTI.instCombineIntrinsic(IC&: *this, II);
160	}
161	return std::nullopt;
162	}
163
164	std::optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic(
165	IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
166	bool &KnownBitsComputed) {
167	// Handle target specific intrinsics
168	if (II.getCalledFunction()->isTargetIntrinsic()) {
169	return TTI.simplifyDemandedUseBitsIntrinsic(IC&: *this, II, DemandedMask, Known,
170	KnownBitsComputed);
171	}
172	return std::nullopt;
173	}
174
175	std::optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic(
176	IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts,
177	APInt &PoisonElts2, APInt &PoisonElts3,
178	std::function<void(Instruction , unsigned*, APInt, APInt &)>
179	SimplifyAndSetOp) {
180	// Handle target specific intrinsics
181	if (II.getCalledFunction()->isTargetIntrinsic()) {
182	return TTI.simplifyDemandedVectorEltsIntrinsic(
183	IC&: *this, II, DemandedElts, UndefElts&: PoisonElts, UndefElts2&: PoisonElts2, UndefElts3&: PoisonElts3,
184	SimplifyAndSetOp);
185	}
186	return std::nullopt;
187	}
188
189	bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
190	return TTI.isValidAddrSpaceCast(FromAS, ToAS);
191	}
192
193	Value InstCombinerImpl::EmitGEPOffset(User GEP) {
194	return llvm::emitGEPOffset(Builder: &Builder, DL, GEP);
195	}
196
197	/// Legal integers and common types are considered desirable. This is used to
198	/// avoid creating instructions with types that may not be supported well by the
199	/// the backend.
200	/// NOTE: This treats i8, i16 and i32 specially because they are common
201	/// types in frontend languages.
202	bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
203	switch (BitWidth) {
204	case `8`:
205	case `16`:
206	case `32`:
207	return true;
208	default:
209	return DL.isLegalInteger(Width: BitWidth);
210	}
211	}
212
213	/// Return true if it is desirable to convert an integer computation from a
214	/// given bit width to a new bit width.
215	/// We don't want to convert from a legal or desirable type (like i8) to an
216	/// illegal type or from a smaller to a larger illegal type. A width of '1'
217	/// is always treated as a desirable type because i1 is a fundamental type in
218	/// IR, and there are many specialized optimizations for i1 types.
219	/// Common/desirable widths are equally treated as legal to convert to, in
220	/// order to open up more combining opportunities.
221	bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
222	unsigned ToWidth) const {
223	bool FromLegal = FromWidth == `1` \|\| DL.isLegalInteger(Width: FromWidth);
224	bool ToLegal = ToWidth == `1` \|\| DL.isLegalInteger(Width: ToWidth);
225
226	// Convert to desirable widths even if they are not legal types.
227	// Only shrink types, to prevent infinite loops.
228	if (ToWidth < FromWidth && isDesirableIntType(BitWidth: ToWidth))
229	return true;
230
231	// If this is a legal or desiable integer from type, and the result would be
232	// an illegal type, don't do the transformation.
233	if ((FromLegal \|\| isDesirableIntType(BitWidth: FromWidth)) && !ToLegal)
234	return false;
235
236	// Otherwise, if both are illegal, do not increase the size of the result. We
237	// do allow things like i160 -> i64, but not i64 -> i160.
238	if (!FromLegal && !ToLegal && ToWidth > FromWidth)
239	return false;
240
241	return true;
242	}
243
244	/// Return true if it is desirable to convert a computation from 'From' to 'To'.
245	/// We don't want to convert from a legal to an illegal type or from a smaller
246	/// to a larger illegal type. i1 is always treated as a legal type because it is
247	/// a fundamental type in IR, and there are many specialized optimizations for
248	/// i1 types.
249	bool InstCombinerImpl::shouldChangeType(Type From, Type To) const {
250	// TODO: This could be extended to allow vectors. Datalayout changes might be
251	// needed to properly support that.
252	if (!From->isIntegerTy() \|\| !To->isIntegerTy())
253	return false;
254
255	unsigned FromWidth = From->getPrimitiveSizeInBits();
256	unsigned ToWidth = To->getPrimitiveSizeInBits();
257	return shouldChangeType(FromWidth, ToWidth);
258	}
259
260	// Return true, if No Signed Wrap should be maintained for I.
261	// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
262	// where both B and C should be ConstantInts, results in a constant that does
263	// not overflow. This function only handles the Add and Sub opcodes. For
264	// all other opcodes, the function conservatively returns false.
265	static bool maintainNoSignedWrap(BinaryOperator &I, Value B, Value C) {
266	auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
267	if (!OBO \|\| !OBO->hasNoSignedWrap())
268	return false;
269
270	// We reason about Add and Sub Only.
271	Instruction::BinaryOps Opcode = I.getOpcode();
272	if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
273	return false;
274
275	const APInt BVal, CVal;
276	if (!match(V: B, P: m_APInt(Res&: BVal)) \|\| !match(V: C, P: m_APInt(Res&: CVal)))
277	return false;
278
279	bool Overflow = false;
280	if (Opcode == Instruction::Add)
281	(void)BVal->sadd_ov(RHS: *CVal, Overflow);
282	else
283	(void)BVal->ssub_ov(RHS: *CVal, Overflow);
284
285	return !Overflow;
286	}
287
288	static bool hasNoUnsignedWrap(BinaryOperator &I) {
289	auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
290	return OBO && OBO->hasNoUnsignedWrap();
291	}
292
293	static bool hasNoSignedWrap(BinaryOperator &I) {
294	auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
295	return OBO && OBO->hasNoSignedWrap();
296	}
297
298	/// Conservatively clears subclassOptionalData after a reassociation or
299	/// commutation. We preserve fast-math flags when applicable as they can be
300	/// preserved.
301	static void ClearSubclassDataAfterReassociation(BinaryOperator &I) {
302	FPMathOperator *FPMO = dyn_cast<FPMathOperator>(Val: &I);
303	if (!FPMO) {
304	I.clearSubclassOptionalData();
305	return;
306	}
307
308	FastMathFlags FMF = I.getFastMathFlags();
309	I.clearSubclassOptionalData();
310	I.setFastMathFlags(FMF);
311	}
312
313	/// Combine constant operands of associative operations either before or after a
314	/// cast to eliminate one of the associative operations:
315	/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
316	/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
317	static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1,
318	InstCombinerImpl &IC) {
319	auto *Cast = dyn_cast<CastInst>(Val: BinOp1->getOperand(i_nocapture: `0`));
320	if (!Cast \|\| !Cast->hasOneUse())
321	return false;
322
323	// TODO: Enhance logic for other casts and remove this check.
324	auto CastOpcode = Cast->getOpcode();
325	if (CastOpcode != Instruction::ZExt)
326	return false;
327
328	// TODO: Enhance logic for other BinOps and remove this check.
329	if (!BinOp1->isBitwiseLogicOp())
330	return false;
331
332	auto AssocOpcode = BinOp1->getOpcode();
333	auto *BinOp2 = dyn_cast<BinaryOperator>(Val: Cast->getOperand(i_nocapture: `0`));
334	if (!BinOp2 \|\| !BinOp2->hasOneUse() \|\| BinOp2->getOpcode() != AssocOpcode)
335	return false;
336
337	Constant C1, C2;
338	if (!match(V: BinOp1->getOperand(i_nocapture: `1`), P: m_Constant(C&: C1)) \|\|
339	!match(V: BinOp2->getOperand(i_nocapture: `1`), P: m_Constant(C&: C2)))
340	return false;
341
342	// TODO: This assumes a zext cast.
343	// Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
344	// to the destination type might lose bits.
345
346	// Fold the constants together in the destination type:
347	// (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
348	const DataLayout &DL = IC.getDataLayout();
349	Type *DestTy = C1->getType();
350	Constant *CastC2 = ConstantFoldCastOperand(Opcode: CastOpcode, C: C2, DestTy, DL);
351	if (!CastC2)
352	return false;
353	Constant *FoldedC = ConstantFoldBinaryOpOperands(Opcode: AssocOpcode, LHS: C1, RHS: CastC2, DL);
354	if (!FoldedC)
355	return false;
356
357	IC.replaceOperand(I&: *Cast, OpNum: `0`, V: BinOp2->getOperand(i_nocapture: `0`));
358	IC.replaceOperand(I&: *BinOp1, OpNum: `1`, V: FoldedC);
359	BinOp1->dropPoisonGeneratingFlags();
360	Cast->dropPoisonGeneratingFlags();
361	return true;
362	}
363
364	// Simplifies IntToPtr/PtrToInt RoundTrip Cast.
365	// inttoptr ( ptrtoint (x) ) --> x
366	Value InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value Val) {
367	auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
368	if (IntToPtr && DL.getTypeSizeInBits(Ty: IntToPtr->getDestTy()) ==
369	DL.getTypeSizeInBits(Ty: IntToPtr->getSrcTy())) {
370	auto *PtrToInt = dyn_cast<PtrToIntInst>(Val: IntToPtr->getOperand(i_nocapture: `0`));
371	Type *CastTy = IntToPtr->getDestTy();
372	if (PtrToInt &&
373	CastTy->getPointerAddressSpace() ==
374	PtrToInt->getSrcTy()->getPointerAddressSpace() &&
375	DL.getTypeSizeInBits(Ty: PtrToInt->getSrcTy()) ==
376	DL.getTypeSizeInBits(Ty: PtrToInt->getDestTy()))
377	return PtrToInt->getOperand(i_nocapture: `0`);
378	}
379	return nullptr;
380	}
381
382	/// This performs a few simplifications for operators that are associative or
383	/// commutative:
384	///
385	/// Commutative operators:
386	///
387	/// 1. Order operands such that they are listed from right (least complex) to
388	/// left (most complex). This puts constants before unary operators before
389	/// binary operators.
390	///
391	/// Associative operators:
392	///
393	/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
394	/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
395	///
396	/// Associative and commutative operators:
397	///
398	/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
399	/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
400	/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
401	/// if C1 and C2 are constants.
402	bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
403	Instruction::BinaryOps Opcode = I.getOpcode();
404	bool Changed = false;
405
406	do {
407	// Order operands such that they are listed from right (least complex) to
408	// left (most complex). This puts constants before unary operators before
409	// binary operators.
410	if (I.isCommutative() && getComplexity(V: I.getOperand(i_nocapture: `0`)) <
411	getComplexity(V: I.getOperand(i_nocapture: `1`)))
412	Changed = !I.swapOperands();
413
414	if (I.isCommutative()) {
415	if (auto Pair = matchSymmetricPair(LHS: I.getOperand(i_nocapture: `0`), RHS: I.getOperand(i_nocapture: `1`))) {
416	replaceOperand(I, OpNum: `0`, V: Pair ->first);
417	replaceOperand(I, OpNum: `1`, V: Pair ->second);
418	Changed = true;
419	}
420	}
421
422	BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: I.getOperand(i_nocapture: `0`));
423	BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: I.getOperand(i_nocapture: `1`));
424
425	if (I.isAssociative()) {
426	// Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
427	if (Op0 && Op0->getOpcode() == Opcode) {
428	Value *A = Op0->getOperand(i_nocapture: `0`);
429	Value *B = Op0->getOperand(i_nocapture: `1`);
430	Value *C = I.getOperand(i_nocapture: `1`);
431
432	// Does "B op C" simplify?
433	if (Value *V = simplifyBinOp(Opcode, LHS: B, RHS: C, Q: SQ.getWithInstruction(I: &I))) {
434	// It simplifies to V. Form "A op V".
435	replaceOperand(I, OpNum: `0`, V: A);
436	replaceOperand(I, OpNum: `1`, V);
437	bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(I&: *Op0);
438	bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(I&: *Op0);
439
440	// Conservatively clear all optional flags since they may not be
441	// preserved by the reassociation. Reset nsw/nuw based on the above
442	// analysis.
443	ClearSubclassDataAfterReassociation(I);
444
445	// Note: this is only valid because SimplifyBinOp doesn't look at
446	// the operands to Op0.
447	if (IsNUW)
448	I.setHasNoUnsignedWrap(true);
449
450	if (IsNSW)
451	I.setHasNoSignedWrap(true);
452
453	Changed = true;
454	++NumReassoc;
455	continue;
456	}
457	}
458
459	// Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
460	if (Op1 && Op1->getOpcode() == Opcode) {
461	Value *A = I.getOperand(i_nocapture: `0`);
462	Value *B = Op1->getOperand(i_nocapture: `0`);
463	Value *C = Op1->getOperand(i_nocapture: `1`);
464
465	// Does "A op B" simplify?
466	if (Value *V = simplifyBinOp(Opcode, LHS: A, RHS: B, Q: SQ.getWithInstruction(I: &I))) {
467	// It simplifies to V. Form "V op C".
468	replaceOperand(I, OpNum: `0`, V);
469	replaceOperand(I, OpNum: `1`, V: C);
470	// Conservatively clear the optional flags, since they may not be
471	// preserved by the reassociation.
472	ClearSubclassDataAfterReassociation(I);
473	Changed = true;
474	++NumReassoc;
475	continue;
476	}
477	}
478	}
479
480	if (I.isAssociative() && I.isCommutative()) {
481	if (simplifyAssocCastAssoc(BinOp1: &I, IC&: *this)) {
482	Changed = true;
483	++NumReassoc;
484	continue;
485	}
486
487	// Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
488	if (Op0 && Op0->getOpcode() == Opcode) {
489	Value *A = Op0->getOperand(i_nocapture: `0`);
490	Value *B = Op0->getOperand(i_nocapture: `1`);
491	Value *C = I.getOperand(i_nocapture: `1`);
492
493	// Does "C op A" simplify?
494	if (Value *V = simplifyBinOp(Opcode, LHS: C, RHS: A, Q: SQ.getWithInstruction(I: &I))) {
495	// It simplifies to V. Form "V op B".
496	replaceOperand(I, OpNum: `0`, V);
497	replaceOperand(I, OpNum: `1`, V: B);
498	// Conservatively clear the optional flags, since they may not be
499	// preserved by the reassociation.
500	ClearSubclassDataAfterReassociation(I);
501	Changed = true;
502	++NumReassoc;
503	continue;
504	}
505	}
506
507	// Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
508	if (Op1 && Op1->getOpcode() == Opcode) {
509	Value *A = I.getOperand(i_nocapture: `0`);
510	Value *B = Op1->getOperand(i_nocapture: `0`);
511	Value *C = Op1->getOperand(i_nocapture: `1`);
512
513	// Does "C op A" simplify?
514	if (Value *V = simplifyBinOp(Opcode, LHS: C, RHS: A, Q: SQ.getWithInstruction(I: &I))) {
515	// It simplifies to V. Form "B op V".
516	replaceOperand(I, OpNum: `0`, V: B);
517	replaceOperand(I, OpNum: `1`, V);
518	// Conservatively clear the optional flags, since they may not be
519	// preserved by the reassociation.
520	ClearSubclassDataAfterReassociation(I);
521	Changed = true;
522	++NumReassoc;
523	continue;
524	}
525	}
526
527	// Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
528	// if C1 and C2 are constants.
529	Value A, B;
530	Constant C1, C2, *CRes;
531	if (Op0 && Op1 &&
532	Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
533	match(V: Op0, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: A), R: m_Constant(C&: C1)))) &&
534	match(V: Op1, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: B), R: m_Constant(C&: C2)))) &&
535	(CRes = ConstantFoldBinaryOpOperands(Opcode, LHS: C1, RHS: C2, DL))) {
536	bool IsNUW = hasNoUnsignedWrap(I) &&
537	hasNoUnsignedWrap(I&: *Op0) &&
538	hasNoUnsignedWrap(I&: *Op1);
539	BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
540	BinaryOperator::CreateNUW(Opc: Opcode, V1: A, V2: B) :
541	BinaryOperator::Create(Op: Opcode, S1: A, S2: B);
542
543	if (isa<FPMathOperator>(Val: NewBO)) {
544	FastMathFlags Flags = I.getFastMathFlags() &
545	Op0->getFastMathFlags() &
546	Op1->getFastMathFlags();
547	NewBO->setFastMathFlags(Flags);
548	}
549	InsertNewInstWith(New: NewBO, Old: I.getIterator());
550	NewBO->takeName(V: Op1);
551	replaceOperand(I, OpNum: `0`, V: NewBO);
552	replaceOperand(I, OpNum: `1`, V: CRes);
553	// Conservatively clear the optional flags, since they may not be
554	// preserved by the reassociation.
555	ClearSubclassDataAfterReassociation(I);
556	if (IsNUW)
557	I.setHasNoUnsignedWrap(true);
558
559	Changed = true;
560	continue;
561	}
562	}
563
564	// No further simplifications.
565	return Changed;
566	} while (true);
567	}
568
569	/// Return whether "X LOp (Y ROp Z)" is always equal to
570	/// "(X LOp Y) ROp (X LOp Z)".
571	static bool leftDistributesOverRight(Instruction::BinaryOps LOp,
572	Instruction::BinaryOps ROp) {
573	// X & (Y \| Z) <--> (X & Y) \| (X & Z)
574	// X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
575	if (LOp == Instruction::And)
576	return ROp == Instruction::Or \|\| ROp == Instruction::Xor;
577
578	// X \| (Y & Z) <--> (X \| Y) & (X \| Z)
579	if (LOp == Instruction::Or)
580	return ROp == Instruction::And;
581
582	// X (Y + Z) <--> (X * Y) + (X * Z)*
583	// X (Y - Z) <--> (X * Y) - (X * Z)*
584	if (LOp == Instruction::Mul)
585	return ROp == Instruction::Add \|\| ROp == Instruction::Sub;
586
587	return false;
588	}
589
590	/// Return whether "(X LOp Y) ROp Z" is always equal to
591	/// "(X ROp Z) LOp (Y ROp Z)".
592	static bool rightDistributesOverLeft(Instruction::BinaryOps LOp,
593	Instruction::BinaryOps ROp) {
594	if (Instruction::isCommutative(Opcode: ROp))
595	return leftDistributesOverRight(LOp: ROp, ROp: LOp);
596
597	// (X {&\|^} Y) >> Z <--> (X >> Z) {&\|^} (Y >> Z) for all shifts.
598	return Instruction::isBitwiseLogicOp(Opcode: LOp) && Instruction::isShift(Opcode: ROp);
599
600	// TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
601	// but this requires knowing that the addition does not overflow and other
602	// such subtleties.
603	}
604
605	/// This function returns identity value for given opcode, which can be used to
606	/// factor patterns like (X 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).*
607	static Value getIdentityValue(Instruction::BinaryOps Opcode, Value V) {
608	if (isa<Constant>(Val: V))
609	return nullptr;
610
611	return ConstantExpr::getBinOpIdentity(Opcode, Ty: V->getType());
612	}
613
614	/// This function predicates factorization using distributive laws. By default,
615	/// it just returns the 'Op' inputs. But for special-cases like
616	/// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
617	/// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
618	/// allow more factorization opportunities.
619	static Instruction::BinaryOps
620	getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op,
621	Value &LHS, Value &RHS, BinaryOperator *OtherOp) {
622	assert(Op && "Expected a binary operator");
623	LHS = Op->getOperand(i_nocapture: `0`);
624	RHS = Op->getOperand(i_nocapture: `1`);
625	if (TopOpcode == Instruction::Add \|\| TopOpcode == Instruction::Sub) {
626	Constant *C;
627	if (match(V: Op, P: m_Shl(L: m_Value(), R: m_Constant(C)))) {
628	// X << C --> X (1 << C)*
629	RHS = ConstantExpr::getShl(C1: ConstantInt::get(Ty: Op->getType(), V: `1`), C2: C);
630	return Instruction::Mul;
631	}
632	// TODO: We can add other conversions e.g. shr => div etc.
633	}
634	if (Instruction::isBitwiseLogicOp(Opcode: TopOpcode)) {
635	if (OtherOp && OtherOp->getOpcode() == Instruction::AShr &&
636	match(V: Op, P: m_LShr(L: m_NonNegative(), R: m_Value()))) {
637	// lshr nneg C, X --> ashr nneg C, X
638	return Instruction::AShr;
639	}
640	}
641	return Op->getOpcode();
642	}
643
644	/// This tries to simplify binary operations by factorizing out common terms
645	/// (e. g. "(AB)+(AC)" -> "A(B+C)").*
646	static Value tryFactorization(BinaryOperator &I, const* SimplifyQuery &SQ,
647	InstCombiner::BuilderTy &Builder,
648	Instruction::BinaryOps InnerOpcode, Value *A,
649	Value B, Value C, Value *D) {
650	assert(A && B && C && D && "All values must be provided");
651
652	Value V = nullptr*;
653	Value RetVal = nullptr*;
654	Value LHS = I.getOperand(i_nocapture: `0`), RHS = I.getOperand(i_nocapture: `1`);
655	Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
656
657	// Does "X op' Y" always equal "Y op' X"?
658	bool InnerCommutative = Instruction::isCommutative(Opcode: InnerOpcode);
659
660	// Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
661	if (leftDistributesOverRight(LOp: InnerOpcode, ROp: TopLevelOpcode)) {
662	// Does the instruction have the form "(A op' B) op (A op' D)" or, in the
663	// commutative case, "(A op' B) op (C op' A)"?
664	if (A == C \|\| (InnerCommutative && A == D)) {
665	if (A != C)
666	std::swap(a&: C, b&: D);
667	// Consider forming "A op' (B op D)".
668	// If "B op D" simplifies then it can be formed with no cost.
669	V = simplifyBinOp(Opcode: TopLevelOpcode, LHS: B, RHS: D, Q: SQ.getWithInstruction(I: &I));
670
671	// If "B op D" doesn't simplify then only go on if one of the existing
672	// operations "A op' B" and "C op' D" will be zapped as no longer used.
673	if (!V && (LHS->hasOneUse() \|\| RHS->hasOneUse()))
674	V = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: B, RHS: D, Name: RHS->getName());
675	if (V)
676	RetVal = Builder.CreateBinOp(Opc: InnerOpcode, LHS: A, RHS: V);
677	}
678	}
679
680	// Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
681	if (!RetVal && rightDistributesOverLeft(LOp: TopLevelOpcode, ROp: InnerOpcode)) {
682	// Does the instruction have the form "(A op' B) op (C op' B)" or, in the
683	// commutative case, "(A op' B) op (B op' D)"?
684	if (B == D \|\| (InnerCommutative && B == C)) {
685	if (B != D)
686	std::swap(a&: C, b&: D);
687	// Consider forming "(A op C) op' B".
688	// If "A op C" simplifies then it can be formed with no cost.
689	V = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQ.getWithInstruction(I: &I));
690
691	// If "A op C" doesn't simplify then only go on if one of the existing
692	// operations "A op' B" and "C op' D" will be zapped as no longer used.
693	if (!V && (LHS->hasOneUse() \|\| RHS->hasOneUse()))
694	V = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C, Name: LHS->getName());
695	if (V)
696	RetVal = Builder.CreateBinOp(Opc: InnerOpcode, LHS: V, RHS: B);
697	}
698	}
699
700	if (!RetVal)
701	return nullptr;
702
703	++NumFactor;
704	RetVal->takeName(V: &I);
705
706	// Try to add no-overflow flags to the final value.
707	if (isa<OverflowingBinaryOperator>(Val: RetVal)) {
708	bool HasNSW = false;
709	bool HasNUW = false;
710	if (isa<OverflowingBinaryOperator>(Val: &I)) {
711	HasNSW = I.hasNoSignedWrap();
712	HasNUW = I.hasNoUnsignedWrap();
713	}
714	if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(Val: LHS)) {
715	HasNSW &= LOBO->hasNoSignedWrap();
716	HasNUW &= LOBO->hasNoUnsignedWrap();
717	}
718
719	if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(Val: RHS)) {
720	HasNSW &= ROBO->hasNoSignedWrap();
721	HasNUW &= ROBO->hasNoUnsignedWrap();
722	}
723
724	if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
725	// We can propagate 'nsw' if we know that
726	// %Y = mul nsw i16 %X, C
727	// %Z = add nsw i16 %Y, %X
728	// =>
729	// %Z = mul nsw i16 %X, C+1
730	//
731	// iff C+1 isn't INT_MIN
732	const APInt *CInt;
733	if (match(V, P: m_APInt(Res&: CInt)) && !CInt->isMinSignedValue())
734	cast<Instruction>(Val: RetVal)->setHasNoSignedWrap(HasNSW);
735
736	// nuw can be propagated with any constant or nuw value.
737	cast<Instruction>(Val: RetVal)->setHasNoUnsignedWrap(HasNUW);
738	}
739	}
740	return RetVal;
741	}
742
743	// If `I` has one Const operand and the other matches `(ctpop (not x))`,
744	// replace `(ctpop (not x))` with `(sub nuw nsw BitWidth(x), (ctpop x))`.
745	// This is only useful is the new subtract can fold so we only handle the
746	// following cases:
747	// 1) (add/sub/disjoint_or C, (ctpop (not x))
748	// -> (add/sub/disjoint_or C', (ctpop x))
749	// 1) (cmp pred C, (ctpop (not x))
750	// -> (cmp pred C', (ctpop x))
751	Instruction InstCombinerImpl::tryFoldInstWithCtpopWithNot(Instruction I) {
752	unsigned Opc = I->getOpcode();
753	unsigned ConstIdx = `1`;
754	switch (Opc) {
755	default:
756	return nullptr;
757	// (ctpop (not x)) <-> (sub nuw nsw BitWidth(x) - (ctpop x))
758	// We can fold the BitWidth(x) with add/sub/icmp as long the other operand
759	// is constant.
760	case Instruction::Sub:
761	ConstIdx = `0`;
762	break;
763	case Instruction::ICmp:
764	// Signed predicates aren't correct in some edge cases like for i2 types, as
765	// well since (ctpop x) is known [0, log2(BitWidth(x))] almost all signed
766	// comparisons against it are simplfied to unsigned.
767	if (cast<ICmpInst>(Val: I)->isSigned())
768	return nullptr;
769	break;
770	case Instruction::Or:
771	if (!match(V: I, P: m_DisjointOr(L: m_Value(), R: m_Value())))
772	return nullptr;
773	[[fallthrough]];
774	case Instruction::Add:
775	break;
776	}
777
778	Value *Op;
779	// Find ctpop.
780	if (!match(I->getOperand(`1` - ConstIdx),
781	m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(Op)))))
782	return nullptr;
783
784	Constant *C;
785	// Check other operand is ImmConstant.
786	if (!match(V: I->getOperand(i: ConstIdx), P: m_ImmConstant(C)))
787	return nullptr;
788
789	Type *Ty = Op->getType();
790	Constant *BitWidthC = ConstantInt::get(Ty, V: Ty->getScalarSizeInBits());
791	// Need extra check for icmp. Note if this check is true, it generally means
792	// the icmp will simplify to true/false.
793	if (Opc == Instruction::ICmp && !cast<ICmpInst>(Val: I)->isEquality() &&
794	!ConstantExpr::getICmp(pred: ICmpInst::ICMP_UGT, LHS: C, RHS: BitWidthC)->isZeroValue())
795	return nullptr;
796
797	// Check we can invert `(not x)` for free.
798	bool Consumes = false;
799	if (!isFreeToInvert(V: Op, WillInvertAllUses: Op->hasOneUse(), DoesConsume&: Consumes) \|\| !Consumes)
800	return nullptr;
801	Value *NotOp = getFreelyInverted(V: Op, WillInvertAllUses: Op->hasOneUse(), Builder: &Builder);
802	assert(NotOp != nullptr &&
803	"Desync between isFreeToInvert and getFreelyInverted");
804
805	Value *CtpopOfNotOp = Builder.CreateIntrinsic(Ty, Intrinsic::ctpop, NotOp);
806
807	Value R = nullptr*;
808
809	// Do the transformation here to avoid potentially introducing an infinite
810	// loop.
811	switch (Opc) {
812	case Instruction::Sub:
813	R = Builder.CreateAdd(LHS: CtpopOfNotOp, RHS: ConstantExpr::getSub(C1: C, C2: BitWidthC));
814	break;
815	case Instruction::Or:
816	case Instruction::Add:
817	R = Builder.CreateSub(LHS: ConstantExpr::getAdd(C1: C, C2: BitWidthC), RHS: CtpopOfNotOp);
818	break;
819	case Instruction::ICmp:
820	R = Builder.CreateICmp(P: cast<ICmpInst>(Val: I)->getSwappedPredicate(),
821	LHS: CtpopOfNotOp, RHS: ConstantExpr::getSub(C1: BitWidthC, C2: C));
822	break;
823	default:
824	llvm_unreachable("Unhandled Opcode");
825	}
826	assert(R != nullptr);
827	return replaceInstUsesWith(I&: *I, V: R);
828	}
829
830	// (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
831	// IFF
832	// 1) the logic_shifts match
833	// 2) either both binops are binops and one is `and` or
834	// BinOp1 is `and`
835	// (logic_shift (inv_logic_shift C1, C), C) == C1 or
836	//
837	// -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
838	//
839	// (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
840	// IFF
841	// 1) the logic_shifts match
842	// 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`).
843	//
844	// -> (BinOp (logic_shift (BinOp X, Y)), Mask)
845	//
846	// (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt))
847	// IFF
848	// 1) Binop1 is bitwise logical operator `and`, `or` or `xor`
849	// 2) Binop2 is `not`
850	//
851	// -> (arithmetic_shift Binop1((not X), Y), Amt)
852
853	Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) {
854	const DataLayout &DL = I.getModule()->getDataLayout();
855	auto IsValidBinOpc = [](unsigned Opc) {
856	switch (Opc) {
857	default:
858	return false;
859	case Instruction::And:
860	case Instruction::Or:
861	case Instruction::Xor:
862	case Instruction::Add:
863	// Skip Sub as we only match constant masks which will canonicalize to use
864	// add.
865	return true;
866	}
867	};
868
869	// Check if we can distribute binop arbitrarily. `add` + `lshr` has extra
870	// constraints.
871	auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2,
872	unsigned ShOpc) {
873	assert(ShOpc != Instruction::AShr);
874	return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) \|\|
875	ShOpc == Instruction::Shl;
876	};
877
878	auto GetInvShift = [](unsigned ShOpc) {
879	assert(ShOpc != Instruction::AShr);
880	return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr;
881	};
882
883	auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2,
884	unsigned ShOpc, Constant *CMask,
885	Constant *CShift) {
886	// If the BinOp1 is `and` we don't need to check the mask.
887	if (BinOpc1 == Instruction::And)
888	return true;
889
890	// For all other possible transfers we need complete distributable
891	// binop/shift (anything but `add` + `lshr`).
892	if (!IsCompletelyDistributable (BinOpc1, BinOpc2, ShOpc))
893	return false;
894
895	// If BinOp2 is `and`, any mask works (this only really helps for non-splat
896	// vecs, otherwise the mask will be simplified and the following check will
897	// handle it).
898	if (BinOpc2 == Instruction::And)
899	return true;
900
901	// Otherwise, need mask that meets the below requirement.
902	// (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask
903	Constant *MaskInvShift =
904	ConstantFoldBinaryOpOperands(Opcode: GetInvShift (ShOpc), LHS: CMask, RHS: CShift, DL);
905	return ConstantFoldBinaryOpOperands(Opcode: ShOpc, LHS: MaskInvShift, RHS: CShift, DL) ==
906	CMask;
907	};
908
909	auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * {
910	Constant CMask, CShift;
911	Value X, Y, ShiftedX, Mask, *Shift;
912	if (!match(V: I.getOperand(i_nocapture: ShOpnum),
913	P: m_OneUse(SubPattern: m_Shift(L: m_Value(V&: Y), R: m_Value(V&: Shift)))))
914	return nullptr;
915	if (!match(V: I.getOperand(i_nocapture: `1` - ShOpnum),
916	P: m_BinOp(L: m_Value(V&: ShiftedX), R: m_Value(V&: Mask))))
917	return nullptr;
918
919	if (!match(V: ShiftedX, P: m_OneUse(SubPattern: m_Shift(L: m_Value(V&: X), R: m_Specific(V: Shift)))))
920	return nullptr;
921
922	// Make sure we are matching instruction shifts and not ConstantExpr
923	auto *IY = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: ShOpnum));
924	auto *IX = dyn_cast<Instruction>(Val: ShiftedX);
925	if (!IY \|\| !IX)
926	return nullptr;
927
928	// LHS and RHS need same shift opcode
929	unsigned ShOpc = IY->getOpcode();
930	if (ShOpc != IX->getOpcode())
931	return nullptr;
932
933	// Make sure binop is real instruction and not ConstantExpr
934	auto *BO2 = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: `1` - ShOpnum));
935	if (!BO2)
936	return nullptr;
937
938	unsigned BinOpc = BO2->getOpcode();
939	// Make sure we have valid binops.
940	if (!IsValidBinOpc (I.getOpcode()) \|\| !IsValidBinOpc (BinOpc))
941	return nullptr;
942
943	if (ShOpc == Instruction::AShr) {
944	if (Instruction::isBitwiseLogicOp(Opcode: I.getOpcode()) &&
945	BinOpc == Instruction::Xor && match(V: Mask, P: m_AllOnes())) {
946	Value *NotX = Builder.CreateNot(V: X);
947	Value *NewBinOp = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: Y, RHS: NotX);
948	return BinaryOperator::Create(
949	Op: static_cast<Instruction::BinaryOps>(ShOpc), S1: NewBinOp, S2: Shift);
950	}
951
952	return nullptr;
953	}
954
955	// If BinOp1 == BinOp2 and it's bitwise or shl with add, then just
956	// distribute to drop the shift irrelevant of constants.
957	if (BinOpc == I.getOpcode() &&
958	IsCompletelyDistributable (I.getOpcode(), BinOpc, ShOpc)) {
959	Value *NewBinOp2 = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: X, RHS: Y);
960	Value *NewBinOp1 = Builder.CreateBinOp(
961	Opc: static_cast<Instruction::BinaryOps>(ShOpc), LHS: NewBinOp2, RHS: Shift);
962	return BinaryOperator::Create(Op: I.getOpcode(), S1: NewBinOp1, S2: Mask);
963	}
964
965	// Otherwise we can only distribute by constant shifting the mask, so
966	// ensure we have constants.
967	if (!match(V: Shift, P: m_ImmConstant(C&: CShift)))
968	return nullptr;
969	if (!match(V: Mask, P: m_ImmConstant(C&: CMask)))
970	return nullptr;
971
972	// Check if we can distribute the binops.
973	if (!CanDistributeBinops (I.getOpcode(), BinOpc, ShOpc, CMask, CShift))
974	return nullptr;
975
976	Constant *NewCMask =
977	ConstantFoldBinaryOpOperands(Opcode: GetInvShift (ShOpc), LHS: CMask, RHS: CShift, DL);
978	Value *NewBinOp2 = Builder.CreateBinOp(
979	Opc: static_cast<Instruction::BinaryOps>(BinOpc), LHS: X, RHS: NewCMask);
980	Value *NewBinOp1 = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: Y, RHS: NewBinOp2);
981	return BinaryOperator::Create(Op: static_cast<Instruction::BinaryOps>(ShOpc),
982	S1: NewBinOp1, S2: CShift);
983	};
984
985	if (Instruction *R = MatchBinOp (`0`))
986	return R;
987	return MatchBinOp (`1`);
988	}
989
990	// (Binop (zext C), (select C, T, F))
991	// -> (select C, (binop 1, T), (binop 0, F))
992	//
993	// (Binop (sext C), (select C, T, F))
994	// -> (select C, (binop -1, T), (binop 0, F))
995	//
996	// Attempt to simplify binary operations into a select with folded args, when
997	// one operand of the binop is a select instruction and the other operand is a
998	// zext/sext extension, whose value is the select condition.
999	Instruction *
1000	InstCombinerImpl::foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I) {
1001	// TODO: this simplification may be extended to any speculatable instruction,
1002	// not just binops, and would possibly be handled better in FoldOpIntoSelect.
1003	Instruction::BinaryOps Opc = I.getOpcode();
1004	Value LHS = I.getOperand(i_nocapture: `0`), RHS = I.getOperand(i_nocapture: `1`);
1005	Value A, CondVal, TrueVal, FalseVal;
1006	Value *CastOp;
1007
1008	auto MatchSelectAndCast = [&](Value CastOp, Value SelectOp) {
1009	return match(V: CastOp, P: m_ZExtOrSExt(Op: m_Value(V&: A))) &&
1010	A->getType()->getScalarSizeInBits() == `1` &&
1011	match(V: SelectOp, P: m_Select(C: m_Value(V&: CondVal), L: m_Value(V&: TrueVal),
1012	R: m_Value(V&: FalseVal)));
1013	};
1014
1015	// Make sure one side of the binop is a select instruction, and the other is a
1016	// zero/sign extension operating on a i1.
1017	if (MatchSelectAndCast (LHS, RHS))
1018	CastOp = LHS;
1019	else if (MatchSelectAndCast (RHS, LHS))
1020	CastOp = RHS;
1021	else
1022	return nullptr;
1023
1024	auto NewFoldedConst = [&](bool IsTrueArm, Value *V) {
1025	bool IsCastOpRHS = (CastOp == RHS);
1026	bool IsZExt = isa<ZExtInst>(Val: CastOp);
1027	Constant *C;
1028
1029	if (IsTrueArm) {
1030	C = Constant::getNullValue(Ty: V->getType());
1031	} else if (IsZExt) {
1032	unsigned BitWidth = V->getType()->getScalarSizeInBits();
1033	C = Constant::getIntegerValue(Ty: V->getType(), V: APInt (BitWidth, `1`));
1034	} else {
1035	C = Constant::getAllOnesValue(Ty: V->getType());
1036	}
1037
1038	return IsCastOpRHS ? Builder.CreateBinOp(Opc, LHS: V, RHS: C)
1039	: Builder.CreateBinOp(Opc, LHS: C, RHS: V);
1040	};
1041
1042	// If the value used in the zext/sext is the select condition, or the negated
1043	// of the select condition, the binop can be simplified.
1044	if (CondVal == A) {
1045	Value NewTrueVal = NewFoldedConst (false*, TrueVal);
1046	return SelectInst::Create(C: CondVal, S1: NewTrueVal,
1047	S2: NewFoldedConst (true, FalseVal));
1048	}
1049
1050	if (match(V: A, P: m_Not(V: m_Specific(V: CondVal)))) {
1051	Value NewTrueVal = NewFoldedConst (true*, TrueVal);
1052	return SelectInst::Create(C: CondVal, S1: NewTrueVal,
1053	S2: NewFoldedConst (false, FalseVal));
1054	}
1055
1056	return nullptr;
1057	}
1058
1059	Value *InstCombinerImpl::tryFactorizationFolds(BinaryOperator &I) {
1060	Value LHS = I.getOperand(i_nocapture: `0`), RHS = I.getOperand(i_nocapture: `1`);
1061	BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: LHS);
1062	BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: RHS);
1063	Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1064	Value A, B, C, D;
1065	Instruction::BinaryOps LHSOpcode, RHSOpcode;
1066
1067	if (Op0)
1068	LHSOpcode = getBinOpsForFactorization(TopOpcode: TopLevelOpcode, Op: Op0, LHS&: A, RHS&: B, OtherOp: Op1);
1069	if (Op1)
1070	RHSOpcode = getBinOpsForFactorization(TopOpcode: TopLevelOpcode, Op: Op1, LHS&: C, RHS&: D, OtherOp: Op0);
1071
1072	// The instruction has the form "(A op' B) op (C op' D)". Try to factorize
1073	// a common term.
1074	if (Op0 && Op1 && LHSOpcode == RHSOpcode)
1075	if (Value *V = tryFactorization(I, SQ, Builder, InnerOpcode: LHSOpcode, A, B, C, D))
1076	return V;
1077
1078	// The instruction has the form "(A op' B) op (C)". Try to factorize common
1079	// term.
1080	if (Op0)
1081	if (Value *Ident = getIdentityValue(Opcode: LHSOpcode, V: RHS))
1082	if (Value *V =
1083	tryFactorization(I, SQ, Builder, InnerOpcode: LHSOpcode, A, B, C: RHS, D: Ident))
1084	return V;
1085
1086	// The instruction has the form "(B) op (C op' D)". Try to factorize common
1087	// term.
1088	if (Op1)
1089	if (Value *Ident = getIdentityValue(Opcode: RHSOpcode, V: LHS))
1090	if (Value *V =
1091	tryFactorization(I, SQ, Builder, InnerOpcode: RHSOpcode, A: LHS, B: Ident, C, D))
1092	return V;
1093
1094	return nullptr;
1095	}
1096
1097	/// This tries to simplify binary operations which some other binary operation
1098	/// distributes over either by factorizing out common terms
1099	/// (eg "(AB)+(AC)" -> "A(B+C)") or expanding out if this results in*
1100	/// simplifications (eg: "A & (B \| C) -> (A&B) \| (A&C)" if this is a win).
1101	/// Returns the simplified value, or null if it didn't simplify.
1102	Value *InstCombinerImpl::foldUsingDistributiveLaws(BinaryOperator &I) {
1103	Value LHS = I.getOperand(i_nocapture: `0`), RHS = I.getOperand(i_nocapture: `1`);
1104	BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: LHS);
1105	BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: RHS);
1106	Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1107
1108	// Factorization.
1109	if (Value *R = tryFactorizationFolds(I))
1110	return R;
1111
1112	// Expansion.
1113	if (Op0 && rightDistributesOverLeft(LOp: Op0->getOpcode(), ROp: TopLevelOpcode)) {
1114	// The instruction has the form "(A op' B) op C". See if expanding it out
1115	// to "(A op C) op' (B op C)" results in simplifications.
1116	Value A = Op0->getOperand(i_nocapture: `0`), B = Op0->getOperand(i_nocapture: `1`), *C = RHS;
1117	Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
1118
1119	// Disable the use of undef because it's not safe to distribute undef.
1120	auto SQDistributive = SQ.getWithInstruction(I: &I).getWithoutUndef();
1121	Value *L = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQDistributive);
1122	Value *R = simplifyBinOp(Opcode: TopLevelOpcode, LHS: B, RHS: C, Q: SQDistributive);
1123
1124	// Do "A op C" and "B op C" both simplify?
1125	if (L && R) {
1126	// They do! Return "L op' R".
1127	++NumExpand;
1128	C = Builder.CreateBinOp(Opc: InnerOpcode, LHS: L, RHS: R);
1129	C->takeName(V: &I);
1130	return C;
1131	}
1132
1133	// Does "A op C" simplify to the identity value for the inner opcode?
1134	if (L && L == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: L->getType())) {
1135	// They do! Return "B op C".
1136	++NumExpand;
1137	C = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: B, RHS: C);
1138	C->takeName(V: &I);
1139	return C;
1140	}
1141
1142	// Does "B op C" simplify to the identity value for the inner opcode?
1143	if (R && R == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: R->getType())) {
1144	// They do! Return "A op C".
1145	++NumExpand;
1146	C = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C);
1147	C->takeName(V: &I);
1148	return C;
1149	}
1150	}
1151
1152	if (Op1 && leftDistributesOverRight(LOp: TopLevelOpcode, ROp: Op1->getOpcode())) {
1153	// The instruction has the form "A op (B op' C)". See if expanding it out
1154	// to "(A op B) op' (A op C)" results in simplifications.
1155	Value A = LHS, B = Op1->getOperand(i_nocapture: `0`), *C = Op1->getOperand(i_nocapture: `1`);
1156	Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
1157
1158	// Disable the use of undef because it's not safe to distribute undef.
1159	auto SQDistributive = SQ.getWithInstruction(I: &I).getWithoutUndef();
1160	Value *L = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: B, Q: SQDistributive);
1161	Value *R = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQDistributive);
1162
1163	// Do "A op B" and "A op C" both simplify?
1164	if (L && R) {
1165	// They do! Return "L op' R".
1166	++NumExpand;
1167	A = Builder.CreateBinOp(Opc: InnerOpcode, LHS: L, RHS: R);
1168	A->takeName(V: &I);
1169	return A;
1170	}
1171
1172	// Does "A op B" simplify to the identity value for the inner opcode?
1173	if (L && L == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: L->getType())) {
1174	// They do! Return "A op C".
1175	++NumExpand;
1176	A = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C);
1177	A->takeName(V: &I);
1178	return A;
1179	}
1180
1181	// Does "A op C" simplify to the identity value for the inner opcode?
1182	if (R && R == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: R->getType())) {
1183	// They do! Return "A op B".
1184	++NumExpand;
1185	A = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: B);
1186	A->takeName(V: &I);
1187	return A;
1188	}
1189	}
1190
1191	return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
1192	}
1193
1194	static std::optional<std::pair<Value , Value >>
1195	matchSymmetricPhiNodesPair(PHINode LHS, PHINode RHS) {
1196	if (LHS->getParent() != RHS->getParent())
1197	return std::nullopt;
1198
1199	if (LHS->getNumIncomingValues() < `2`)
1200	return std::nullopt;
1201
1202	if (!equal(LRange: LHS->blocks(), RRange: RHS->blocks()))
1203	return std::nullopt;
1204
1205	Value *L0 = LHS->getIncomingValue(i: `0`);
1206	Value *R0 = RHS->getIncomingValue(i: `0`);
1207
1208	for (unsigned I = `1`, E = LHS->getNumIncomingValues(); I != E; ++I) {
1209	Value *L1 = LHS->getIncomingValue(i: I);
1210	Value *R1 = RHS->getIncomingValue(i: I);
1211
1212	if ((L0 == L1 && R0 == R1) \|\| (L0 == R1 && R0 == L1))
1213	continue;
1214
1215	return std::nullopt;
1216	}
1217
1218	return std::optional(std::pair(L0, R0));
1219	}
1220
1221	std::optional<std::pair<Value , Value >>
1222	InstCombinerImpl::matchSymmetricPair(Value LHS, Value RHS) {
1223	Instruction *LHSInst = dyn_cast<Instruction>(Val: LHS);
1224	Instruction *RHSInst = dyn_cast<Instruction>(Val: RHS);
1225	if (!LHSInst \|\| !RHSInst \|\| LHSInst->getOpcode() != RHSInst->getOpcode())
1226	return std::nullopt;
1227	switch (LHSInst->getOpcode()) {
1228	case Instruction::PHI:
1229	return matchSymmetricPhiNodesPair(LHS: cast<PHINode>(Val: LHS), RHS: cast<PHINode>(Val: RHS));
1230	case Instruction::Select: {
1231	Value *Cond = LHSInst->getOperand(i: `0`);
1232	Value *TrueVal = LHSInst->getOperand(i: `1`);
1233	Value *FalseVal = LHSInst->getOperand(i: `2`);
1234	if (Cond == RHSInst->getOperand(i: `0`) && TrueVal == RHSInst->getOperand(i: `2`) &&
1235	FalseVal == RHSInst->getOperand(i: `1`))
1236	return std::pair(TrueVal, FalseVal);
1237	return std::nullopt;
1238	}
1239	case Instruction::Call: {
1240	// Match min(a, b) and max(a, b)
1241	MinMaxIntrinsic *LHSMinMax = dyn_cast<MinMaxIntrinsic>(Val: LHSInst);
1242	MinMaxIntrinsic *RHSMinMax = dyn_cast<MinMaxIntrinsic>(Val: RHSInst);
1243	if (LHSMinMax && RHSMinMax &&
1244	LHSMinMax->getPredicate() ==
1245	ICmpInst::getSwappedPredicate(pred: RHSMinMax->getPredicate()) &&
1246	((LHSMinMax->getLHS() == RHSMinMax->getLHS() &&
1247	LHSMinMax->getRHS() == RHSMinMax->getRHS()) \|\|
1248	(LHSMinMax->getLHS() == RHSMinMax->getRHS() &&
1249	LHSMinMax->getRHS() == RHSMinMax->getLHS())))
1250	return std::pair(LHSMinMax->getLHS(), LHSMinMax->getRHS());
1251	return std::nullopt;
1252	}
1253	default:
1254	return std::nullopt;
1255	}
1256	}
1257
1258	Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
1259	Value *LHS,
1260	Value *RHS) {
1261	Value A, B, C, D, E, F;
1262	bool LHSIsSelect = match(V: LHS, P: m_Select(C: m_Value(V&: A), L: m_Value(V&: B), R: m_Value(V&: C)));
1263	bool RHSIsSelect = match(V: RHS, P: m_Select(C: m_Value(V&: D), L: m_Value(V&: E), R: m_Value(V&: F)));
1264	if (!LHSIsSelect && !RHSIsSelect)
1265	return nullptr;
1266
1267	FastMathFlags FMF;
1268	BuilderTy::FastMathFlagGuard Guard(Builder);
1269	if (isa<FPMathOperator>(Val: &I)) {
1270	FMF = I.getFastMathFlags();
1271	Builder.setFastMathFlags(FMF);
1272	}
1273
1274	Instruction::BinaryOps Opcode = I.getOpcode();
1275	SimplifyQuery Q = SQ.getWithInstruction(I: &I);
1276
1277	Value Cond, True = nullptr, False = nullptr*;
1278
1279	// Special-case for add/negate combination. Replace the zero in the negation
1280	// with the trailing add operand:
1281	// (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
1282	// (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
1283	auto foldAddNegate = [&](Value TVal, Value FVal, Value Z) -> Value {
1284	// We need an 'add' and exactly 1 arm of the select to have been simplified.
1285	if (Opcode != Instruction::Add \|\| (!True && !False) \|\| (True && False))
1286	return nullptr;
1287
1288	Value *N;
1289	if (True && match(V: FVal, P: m_Neg(V: m_Value(V&: N)))) {
1290	Value *Sub = Builder.CreateSub(LHS: Z, RHS: N);
1291	return Builder.CreateSelect(C: Cond, True, False: Sub, Name: I.getName());
1292	}
1293	if (False && match(V: TVal, P: m_Neg(V: m_Value(V&: N)))) {
1294	Value *Sub = Builder.CreateSub(LHS: Z, RHS: N);
1295	return Builder.CreateSelect(C: Cond, True: Sub, False, Name: I.getName());
1296	}
1297	return nullptr;
1298	};
1299
1300	if (LHSIsSelect && RHSIsSelect && A == D) {
1301	// (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
1302	Cond = A;
1303	True = simplifyBinOp(Opcode, LHS: B, RHS: E, FMF, Q);
1304	False = simplifyBinOp(Opcode, LHS: C, RHS: F, FMF, Q);
1305
1306	if (LHS->hasOneUse() && RHS->hasOneUse()) {
1307	if (False && !True)
1308	True = Builder.CreateBinOp(Opc: Opcode, LHS: B, RHS: E);
1309	else if (True && !False)
1310	False = Builder.CreateBinOp(Opc: Opcode, LHS: C, RHS: F);
1311	}
1312	} else if (LHSIsSelect && LHS->hasOneUse()) {
1313	// (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
1314	Cond = A;
1315	True = simplifyBinOp(Opcode, LHS: B, RHS, FMF, Q);
1316	False = simplifyBinOp(Opcode, LHS: C, RHS, FMF, Q);
1317	if (Value *NewSel = foldAddNegate (B, C, RHS))
1318	return NewSel;
1319	} else if (RHSIsSelect && RHS->hasOneUse()) {
1320	// X op (D ? E : F) -> D ? (X op E) : (X op F)
1321	Cond = D;
1322	True = simplifyBinOp(Opcode, LHS, RHS: E, FMF, Q);
1323	False = simplifyBinOp(Opcode, LHS, RHS: F, FMF, Q);
1324	if (Value *NewSel = foldAddNegate (E, F, LHS))
1325	return NewSel;
1326	}
1327
1328	if (!True \|\| !False)
1329	return nullptr;
1330
1331	Value *SI = Builder.CreateSelect(C: Cond, True, False);
1332	SI->takeName(V: &I);
1333	return SI;
1334	}
1335
1336	/// Freely adapt every user of V as-if V was changed to !V.
1337	/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
1338	void InstCombinerImpl::freelyInvertAllUsersOf(Value I, Value IgnoredUser) {
1339	assert(!isa<Constant>(I) && "Shouldn't invert users of constant");
1340	for (User *U : make_early_inc_range(Range: I->users())) {
1341	if (U == IgnoredUser)
1342	continue; // Don't consider this user.
1343	switch (cast<Instruction>(Val: U)->getOpcode()) {
1344	case Instruction::Select: {
1345	auto *SI = cast<SelectInst>(Val: U);
1346	SI->swapValues();
1347	SI->swapProfMetadata();
1348	break;
1349	}
1350	case Instruction::Br: {
1351	BranchInst *BI = cast<BranchInst>(Val: U);
1352	BI->swapSuccessors(); // swaps prof metadata too
1353	if (BPI)
1354	BPI->swapSuccEdgesProbabilities(Src: BI->getParent());
1355	break;
1356	}
1357	case Instruction::Xor:
1358	replaceInstUsesWith(I&: cast<Instruction>(Val&: *U), V: I);
1359	// Add to worklist for DCE.
1360	addToWorklist(I: cast<Instruction>(Val: U));
1361	break;
1362	default:
1363	llvm_unreachable("Got unexpected user - out of sync with "
1364	"canFreelyInvertAllUsersOf() ?");
1365	}
1366	}
1367	}
1368
1369	/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
1370	/// constant zero (which is the 'negate' form).
1371	Value InstCombinerImpl::dyn_castNegVal(Value V) const {
1372	Value *NegV;
1373	if (match(V, P: m_Neg(V: m_Value(V&: NegV))))
1374	return NegV;
1375
1376	// Constants can be considered to be negated values if they can be folded.
1377	if (ConstantInt *C = dyn_cast<ConstantInt>(Val: V))
1378	return ConstantExpr::getNeg(C);
1379
1380	if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(Val: V))
1381	if (C->getType()->getElementType()->isIntegerTy())
1382	return ConstantExpr::getNeg(C);
1383
1384	if (ConstantVector *CV = dyn_cast<ConstantVector>(Val: V)) {
1385	for (unsigned i = `0`, e = CV->getNumOperands(); i != e; ++i) {
1386	Constant *Elt = CV->getAggregateElement(Elt: i);
1387	if (!Elt)
1388	return nullptr;
1389
1390	if (isa<UndefValue>(Val: Elt))
1391	continue;
1392
1393	if (!isa<ConstantInt>(Val: Elt))
1394	return nullptr;
1395	}
1396	return ConstantExpr::getNeg(C: CV);
1397	}
1398
1399	// Negate integer vector splats.
1400	if (auto *CV = dyn_cast<Constant>(Val: V))
1401	if (CV->getType()->isVectorTy() &&
1402	CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
1403	return ConstantExpr::getNeg(C: CV);
1404
1405	return nullptr;
1406	}
1407
1408	// Try to fold:
1409	// 1) (fp_binop ({s\|u}itofp x), ({s\|u}itofp y))
1410	// -> ({s\|u}itofp (int_binop x, y))
1411	// 2) (fp_binop ({s\|u}itofp x), FpC)
1412	// -> ({s\|u}itofp (int_binop x, (fpto{s\|u}i FpC)))
1413	//
1414	// Assuming the sign of the cast for x/y is `OpsFromSigned`.
1415	Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
1416	BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, `2`> IntOps,
1417	Constant Op1FpC, SmallVectorImpl<WithCache<const* Value *>> &OpsKnown) {
1418
1419	Type *FPTy = BO.getType();
1420	Type *IntTy = IntOps [`0`]->getType();
1421
1422	unsigned IntSz = IntTy->getScalarSizeInBits();
1423	// This is the maximum number of inuse bits by the integer where the int -> fp
1424	// casts are exact.
1425	unsigned MaxRepresentableBits =
1426	APFloat::semanticsPrecision(FPTy->getScalarType()->getFltSemantics());
1427
1428	// Preserve known number of leading bits. This can allow us to trivial nsw/nuw
1429	// checks later on.
1430	unsigned NumUsedLeadingBits[`2`] = {IntSz, IntSz};
1431
1432	// NB: This only comes up if OpsFromSigned is true, so there is no need to
1433	// cache if between calls to `foldFBinOpOfIntCastsFromSign`.
1434	auto IsNonZero = [&](unsigned OpNo) -> bool {
1435	if (OpsKnown [OpNo].hasKnownBits() &&
1436	OpsKnown [OpNo].getKnownBits(Q: SQ).isNonZero())
1437	return true;
1438	return isKnownNonZero(V: IntOps [OpNo], Q: SQ);
1439	};
1440
1441	auto IsNonNeg = [&](unsigned OpNo) -> bool {
1442	// NB: This matches the impl in ValueTracking, we just try to use cached
1443	// knownbits here. If we ever start supporting WithCache for
1444	// `isKnownNonNegative`, change this to an explicit call.
1445	return OpsKnown [OpNo].getKnownBits(Q: SQ).isNonNegative();
1446	};
1447
1448	// Check if we know for certain that ({s\|u}itofp op) is exact.
1449	auto IsValidPromotion = [&](unsigned OpNo) -> bool {
1450	// Can we treat this operand as the desired sign?
1451	if (OpsFromSigned != isa<SIToFPInst>(Val: BO.getOperand(i_nocapture: OpNo)) &&
1452	!IsNonNeg (OpNo))
1453	return false;
1454
1455	// If fp precision >= bitwidth(op) then its exact.
1456	// NB: This is slightly conservative for `sitofp`. For signed conversion, we
1457	// can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be
1458	// handled specially. We can't, however, increase the bound arbitrarily for
1459	// `sitofp` as for larger sizes, it won't sign extend.
1460	if (MaxRepresentableBits < IntSz) {
1461	// Otherwise if its signed cast check that fp precisions >= bitwidth(op) -
1462	// numSignBits(op).
1463	// TODO: If we add support for `WithCache` in `ComputeNumSignBits`, change
1464	// `IntOps[OpNo]` arguments to `KnownOps[OpNo]`.
1465	if (OpsFromSigned)
1466	NumUsedLeadingBits[OpNo] = IntSz - ComputeNumSignBits(Op: IntOps [OpNo]);
1467	// Finally for unsigned check that fp precision >= bitwidth(op) -
1468	// numLeadingZeros(op).
1469	else {
1470	NumUsedLeadingBits[OpNo] =
1471	IntSz - OpsKnown [OpNo].getKnownBits(Q: SQ).countMinLeadingZeros();
1472	}
1473	}
1474	// NB: We could also check if op is known to be a power of 2 or zero (which
1475	// will always be representable). Its unlikely, however, that is we are
1476	// unable to bound op in any way we will be able to pass the overflow checks
1477	// later on.
1478
1479	if (MaxRepresentableBits < NumUsedLeadingBits[OpNo])
1480	return false;
1481	// Signed + Mul also requires that op is non-zero to avoid -0 cases.
1482	return !OpsFromSigned \|\| BO.getOpcode() != Instruction::FMul \|\|
1483	IsNonZero (OpNo);
1484	};
1485
1486	// If we have a constant rhs, see if we can losslessly convert it to an int.
1487	if (Op1FpC != nullptr) {
1488	// Signed + Mul req non-zero
1489	if (OpsFromSigned && BO.getOpcode() == Instruction::FMul &&
1490	!match(V: Op1FpC, P: m_NonZeroFP()))
1491	return nullptr;
1492
1493	Constant *Op1IntC = ConstantFoldCastOperand(
1494	Opcode: OpsFromSigned ? Instruction::FPToSI : Instruction::FPToUI, C: Op1FpC,
1495	DestTy: IntTy, DL);
1496	if (Op1IntC == nullptr)
1497	return nullptr;
1498	if (ConstantFoldCastOperand(Opcode: OpsFromSigned ? Instruction::SIToFP
1499	: Instruction::UIToFP,
1500	C: Op1IntC, DestTy: FPTy, DL) != Op1FpC)
1501	return nullptr;
1502
1503	// First try to keep sign of cast the same.
1504	IntOps [`1`] = Op1IntC;
1505	}
1506
1507	// Ensure lhs/rhs integer types match.
1508	if (IntTy != IntOps [`1`]->getType())
1509	return nullptr;
1510
1511	if (Op1FpC == nullptr) {
1512	if (!IsValidPromotion (`1`))
1513	return nullptr;
1514	}
1515	if (!IsValidPromotion (`0`))
1516	return nullptr;
1517
1518	// Final we check if the integer version of the binop will not overflow.
1519	BinaryOperator::BinaryOps IntOpc;
1520	// Because of the precision check, we can often rule out overflows.
1521	bool NeedsOverflowCheck = true;
1522	// Try to conservatively rule out overflow based on the already done precision
1523	// checks.
1524	unsigned OverflowMaxOutputBits = OpsFromSigned ? `2` : `1`;
1525	unsigned OverflowMaxCurBits =
1526	std::max(a: NumUsedLeadingBits[`0`], b: NumUsedLeadingBits[`1`]);
1527	bool OutputSigned = OpsFromSigned;
1528	switch (BO.getOpcode()) {
1529	case Instruction::FAdd:
1530	IntOpc = Instruction::Add;
1531	OverflowMaxOutputBits += OverflowMaxCurBits;
1532	break;
1533	case Instruction::FSub:
1534	IntOpc = Instruction::Sub;
1535	OverflowMaxOutputBits += OverflowMaxCurBits;
1536	break;
1537	case Instruction::FMul:
1538	IntOpc = Instruction::Mul;
1539	OverflowMaxOutputBits += OverflowMaxCurBits * `2`;
1540	break;
1541	default:
1542	llvm_unreachable("Unsupported binop");
1543	}
1544	// The precision check may have already ruled out overflow.
1545	if (OverflowMaxOutputBits < IntSz) {
1546	NeedsOverflowCheck = false;
1547	// We can bound unsigned overflow from sub to in range signed value (this is
1548	// what allows us to avoid the overflow check for sub).
1549	if (IntOpc == Instruction::Sub)
1550	OutputSigned = true;
1551	}
1552
1553	// Precision check did not rule out overflow, so need to check.
1554	// TODO: If we add support for `WithCache` in `willNotOverflow`, change
1555	// `IntOps[...]` arguments to `KnownOps[...]`.
1556	if (NeedsOverflowCheck &&
1557	!willNotOverflow(Opcode: IntOpc, LHS: IntOps [`0`], RHS: IntOps [`1`], CxtI: BO, IsSigned: OutputSigned))
1558	return nullptr;
1559
1560	Value *IntBinOp = Builder.CreateBinOp(Opc: IntOpc, LHS: IntOps [`0`], RHS: IntOps [`1`]);
1561	if (auto *IntBO = dyn_cast<BinaryOperator>(Val: IntBinOp)) {
1562	IntBO->setHasNoSignedWrap(OutputSigned);
1563	IntBO->setHasNoUnsignedWrap(!OutputSigned);
1564	}
1565	if (OutputSigned)
1566	return new SIToFPInst (IntBinOp, FPTy);
1567	return new UIToFPInst (IntBinOp, FPTy);
1568	}
1569
1570	// Try to fold:
1571	// 1) (fp_binop ({s\|u}itofp x), ({s\|u}itofp y))
1572	// -> ({s\|u}itofp (int_binop x, y))
1573	// 2) (fp_binop ({s\|u}itofp x), FpC)
1574	// -> ({s\|u}itofp (int_binop x, (fpto{s\|u}i FpC)))
1575	Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
1576	std::array<Value , `2`> IntOps = {nullptr, nullptr*};
1577	Constant Op1FpC = nullptr*;
1578	// Check for:
1579	// 1) (binop ({s\|u}itofp x), ({s\|u}itofp y))
1580	// 2) (binop ({s\|u}itofp x), FpC)
1581	if (!match(V: BO.getOperand(i_nocapture: `0`), P: m_SIToFP(Op: m_Value(V&: IntOps [`0`]))) &&
1582	!match(V: BO.getOperand(i_nocapture: `0`), P: m_UIToFP(Op: m_Value(V&: IntOps [`0`]))))
1583	return nullptr;
1584
1585	if (!match(V: BO.getOperand(i_nocapture: `1`), P: m_Constant(C&: Op1FpC)) &&
1586	!match(V: BO.getOperand(i_nocapture: `1`), P: m_SIToFP(Op: m_Value(V&: IntOps [`1`]))) &&
1587	!match(V: BO.getOperand(i_nocapture: `1`), P: m_UIToFP(Op: m_Value(V&: IntOps [`1`]))))
1588	return nullptr;
1589
1590	// Cache KnownBits a bit to potentially save some analysis.
1591	SmallVector<WithCache<const Value *>, `2`> OpsKnown = {IntOps [`0`], IntOps [`1`]};
1592
1593	// Try treating x/y as coming from both `uitofp` and `sitofp`. There are
1594	// different constraints depending on the sign of the cast.
1595	// NB: `(uitofp nneg X)` == `(sitofp nneg X)`.
1596	if (Instruction R = foldFBinOpOfIntCastsFromSign(BO, /OpsFromSigned=/*false,
1597	IntOps, Op1FpC, OpsKnown))
1598	return R;
1599	return foldFBinOpOfIntCastsFromSign(BO, /OpsFromSigned=/true, IntOps,
1600	Op1FpC, OpsKnown);
1601	}
1602
1603	/// A binop with a constant operand and a sign-extended boolean operand may be
1604	/// converted into a select of constants by applying the binary operation to
1605	/// the constant with the two possible values of the extended boolean (0 or -1).
1606	Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
1607	// TODO: Handle non-commutative binop (constant is operand 0).
1608	// TODO: Handle zext.
1609	// TODO: Peek through 'not' of cast.
1610	Value *BO0 = BO.getOperand(i_nocapture: `0`);
1611	Value *BO1 = BO.getOperand(i_nocapture: `1`);
1612	Value *X;
1613	Constant *C;
1614	if (!match(V: BO0, P: m_SExt(Op: m_Value(V&: X))) \|\| !match(V: BO1, P: m_ImmConstant(C)) \|\|
1615	!X->getType()->isIntOrIntVectorTy(BitWidth: `1`))
1616	return nullptr;
1617
1618	// bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1619	Constant *Ones = ConstantInt::getAllOnesValue(Ty: BO.getType());
1620	Constant *Zero = ConstantInt::getNullValue(Ty: BO.getType());
1621	Value *TVal = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Ones, RHS: C);
1622	Value *FVal = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Zero, RHS: C);
1623	return SelectInst::Create(C: X, S1: TVal, S2: FVal);
1624	}
1625
1626	static Constant *constantFoldOperationIntoSelectOperand(Instruction &I,
1627	SelectInst *SI,
1628	bool IsTrueArm) {
1629	SmallVector<Constant *> ConstOps;
1630	for (Value *Op : I.operands()) {
1631	CmpInst::Predicate Pred;
1632	Constant C = nullptr*;
1633	if (Op == SI) {
1634	C = dyn_cast<Constant>(Val: IsTrueArm ? SI->getTrueValue()
1635	: SI->getFalseValue());
1636	} else if (match(V: SI->getCondition(),
1637	P: m_ICmp(Pred, L: m_Specific(V: Op), R: m_Constant(C))) &&
1638	Pred == (IsTrueArm ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) &&
1639	isGuaranteedNotToBeUndefOrPoison(V: C)) {
1640	// Pass
1641	} else {
1642	C = dyn_cast<Constant>(Val: Op);
1643	}
1644	if (C == nullptr)
1645	return nullptr;
1646
1647	ConstOps.push_back(Elt: C);
1648	}
1649
1650	return ConstantFoldInstOperands(I: &I, Ops: ConstOps, DL: I.getModule()->getDataLayout());
1651	}
1652
1653	static Value foldOperationIntoSelectOperand(Instruction &I, SelectInst SI,
1654	Value *NewOp, InstCombiner &IC) {
1655	Instruction *Clone = I.clone();
1656	Clone->replaceUsesOfWith(From: SI, To: NewOp);
1657	Clone->dropUBImplyingAttrsAndMetadata();
1658	IC.InsertNewInstBefore(New: Clone, Old: SI->getIterator());
1659	return Clone;
1660	}
1661
1662	Instruction InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst SI,
1663	bool FoldWithMultiUse) {
1664	// Don't modify shared select instructions unless set FoldWithMultiUse
1665	if (!SI->hasOneUse() && !FoldWithMultiUse)
1666	return nullptr;
1667
1668	Value *TV = SI->getTrueValue();
1669	Value *FV = SI->getFalseValue();
1670	if (!(isa<Constant>(Val: TV) \|\| isa<Constant>(Val: FV)))
1671	return nullptr;
1672
1673	// Bool selects with constant operands can be folded to logical ops.
1674	if (SI->getType()->isIntOrIntVectorTy(BitWidth: `1`))
1675	return nullptr;
1676
1677	// Test if a FCmpInst instruction is used exclusively by a select as
1678	// part of a minimum or maximum operation. If so, refrain from doing
1679	// any other folding. This helps out other analyses which understand
1680	// non-obfuscated minimum and maximum idioms. And in this case, at
1681	// least one of the comparison operands has at least one user besides
1682	// the compare (the select), which would often largely negate the
1683	// benefit of folding anyway.
1684	if (auto *CI = dyn_cast<FCmpInst>(Val: SI->getCondition())) {
1685	if (CI->hasOneUse()) {
1686	Value Op0 = CI->getOperand(i_nocapture: `0`), Op1 = CI->getOperand(i_nocapture: `1`);
1687	if ((TV == Op0 && FV == Op1) \|\| (FV == Op0 && TV == Op1))
1688	return nullptr;
1689	}
1690	}
1691
1692	// Make sure that one of the select arms constant folds successfully.
1693	Value NewTV = constantFoldOperationIntoSelectOperand(I&: Op, SI, /IsTrueArm/* true);
1694	Value NewFV = constantFoldOperationIntoSelectOperand(I&: Op, SI, /IsTrueArm/* false);
1695	if (!NewTV && !NewFV)
1696	return nullptr;
1697
1698	// Create an instruction for the arm that did not fold.
1699	if (!NewTV)
1700	NewTV = foldOperationIntoSelectOperand(I&: Op, SI, NewOp: TV, IC&: *this);
1701	if (!NewFV)
1702	NewFV = foldOperationIntoSelectOperand(I&: Op, SI, NewOp: FV, IC&: *this);
1703	return SelectInst::Create(C: SI->getCondition(), S1: NewTV, S2: NewFV, NameStr: "", InsertBefore: nullptr, MDFrom: SI);
1704	}
1705
1706	static Value simplifyInstructionWithPHI(Instruction &I, PHINode PN,
1707	Value InValue, BasicBlock InBB,
1708	const DataLayout &DL,
1709	const SimplifyQuery SQ) {
1710	// NB: It is a precondition of this transform that the operands be
1711	// phi translatable! This is usually trivially satisfied by limiting it
1712	// to constant ops, and for selects we do a more sophisticated check.
1713	SmallVector<Value *> Ops;
1714	for (Value *Op : I.operands()) {
1715	if (Op == PN)
1716	Ops.push_back(Elt: InValue);
1717	else
1718	Ops.push_back(Elt: Op->DoPHITranslation(CurBB: PN->getParent(), PredBB: InBB));
1719	}
1720
1721	// Don't consider the simplification successful if we get back a constant
1722	// expression. That's just an instruction in hiding.
1723	// Also reject the case where we simplify back to the phi node. We wouldn't
1724	// be able to remove it in that case.
1725	Value *NewVal = simplifyInstructionWithOperands(
1726	I: &I, NewOps: Ops, Q: SQ.getWithInstruction(I: InBB->getTerminator()));
1727	if (NewVal && NewVal != PN && !match(V: NewVal, P: m_ConstantExpr()))
1728	return NewVal;
1729
1730	// Check if incoming PHI value can be replaced with constant
1731	// based on implied condition.
1732	BranchInst *TerminatorBI = dyn_cast<BranchInst>(Val: InBB->getTerminator());
1733	const ICmpInst *ICmp = dyn_cast<ICmpInst>(Val: &I);
1734	if (TerminatorBI && TerminatorBI->isConditional() &&
1735	TerminatorBI->getSuccessor(i: `0`) != TerminatorBI->getSuccessor(i: `1`) && ICmp) {
1736	bool LHSIsTrue = TerminatorBI->getSuccessor(i: `0`) == PN->getParent();
1737	std::optional<bool> ImpliedCond =
1738	isImpliedCondition(LHS: TerminatorBI->getCondition(), RHSPred: ICmp->getPredicate(),
1739	RHSOp0: Ops [`0`], RHSOp1: Ops [`1`], DL, LHSIsTrue);
1740	if (ImpliedCond)
1741	return ConstantInt::getBool(Ty: I.getType(), V: ImpliedCond.value());
1742	}
1743
1744	return nullptr;
1745	}
1746
1747	Instruction InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode PN) {
1748	unsigned NumPHIValues = PN->getNumIncomingValues();
1749	if (NumPHIValues == `0`)
1750	return nullptr;
1751
1752	// We normally only transform phis with a single use. However, if a PHI has
1753	// multiple uses and they are all the same operation, we can fold all* of the*
1754	// uses into the PHI.
1755	if (!PN->hasOneUse()) {
1756	// Walk the use list for the instruction, comparing them to I.
1757	for (User *U : PN->users()) {
1758	Instruction *UI = cast<Instruction>(Val: U);
1759	if (UI != &I && !I.isIdenticalTo(I: UI))
1760	return nullptr;
1761	}
1762	// Otherwise, we can replace all* users with the new PHI we form.*
1763	}
1764
1765	// Check to see whether the instruction can be folded into each phi operand.
1766	// If there is one operand that does not fold, remember the BB it is in.
1767	// If there is more than one or if it* is a PHI, bail out.*
1768	SmallVector<Value *> NewPhiValues;
1769	BasicBlock NonSimplifiedBB = nullptr*;
1770	Value NonSimplifiedInVal = nullptr*;
1771	for (unsigned i = `0`; i != NumPHIValues; ++i) {
1772	Value *InVal = PN->getIncomingValue(i);
1773	BasicBlock *InBB = PN->getIncomingBlock(i);
1774
1775	if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InValue: InVal, InBB, DL, SQ)) {
1776	NewPhiValues.push_back(Elt: NewVal);
1777	continue;
1778	}
1779
1780	if (NonSimplifiedBB) return nullptr; // More than one non-simplified value.
1781
1782	NonSimplifiedBB = InBB;
1783	NonSimplifiedInVal = InVal;
1784	NewPhiValues.push_back(Elt: nullptr);
1785
1786	// If the InVal is an invoke at the end of the pred block, then we can't
1787	// insert a computation after it without breaking the edge.
1788	if (isa<InvokeInst>(Val: InVal))
1789	if (cast<Instruction>(Val: InVal)->getParent() == NonSimplifiedBB)
1790	return nullptr;
1791
1792	// If the incoming non-constant value is reachable from the phis block,
1793	// we'll push the operation across a loop backedge. This could result in
1794	// an infinite combine loop, and is generally non-profitable (especially
1795	// if the operation was originally outside the loop).
1796	if (isPotentiallyReachable(From: PN->getParent(), To: NonSimplifiedBB, ExclusionSet: nullptr, DT: &DT,
1797	LI))
1798	return nullptr;
1799	}
1800
1801	// If there is exactly one non-simplified value, we can insert a copy of the
1802	// operation in that block. However, if this is a critical edge, we would be
1803	// inserting the computation on some other paths (e.g. inside a loop). Only
1804	// do this if the pred block is unconditionally branching into the phi block.
1805	// Also, make sure that the pred block is not dead code.
1806	if (NonSimplifiedBB != nullptr) {
1807	BranchInst *BI = dyn_cast<BranchInst>(Val: NonSimplifiedBB->getTerminator());
1808	if (!BI \|\| !BI->isUnconditional() \|\|
1809	!DT.isReachableFromEntry(A: NonSimplifiedBB))
1810	return nullptr;
1811	}
1812
1813	// Okay, we can do the transformation: create the new PHI node.
1814	PHINode *NewPN = PHINode::Create(Ty: I.getType(), NumReservedValues: PN->getNumIncomingValues());
1815	InsertNewInstBefore(New: NewPN, Old: PN->getIterator());
1816	NewPN->takeName(V: PN);
1817	NewPN->setDebugLoc(PN->getDebugLoc());
1818
1819	// If we are going to have to insert a new computation, do so right before the
1820	// predecessor's terminator.
1821	Instruction Clone = nullptr*;
1822	if (NonSimplifiedBB) {
1823	Clone = I.clone();
1824	for (Use &U : Clone->operands()) {
1825	if (U == PN)
1826	U = NonSimplifiedInVal;
1827	else
1828	U = U ->DoPHITranslation(CurBB: PN->getParent(), PredBB: NonSimplifiedBB);
1829	}
1830	InsertNewInstBefore(New: Clone, Old: NonSimplifiedBB->getTerminator()->getIterator());
1831	}
1832
1833	for (unsigned i = `0`; i != NumPHIValues; ++i) {
1834	if (NewPhiValues [i])
1835	NewPN->addIncoming(V: NewPhiValues [i], BB: PN->getIncomingBlock(i));
1836	else
1837	NewPN->addIncoming(V: Clone, BB: PN->getIncomingBlock(i));
1838	}
1839
1840	for (User *U : make_early_inc_range(Range: PN->users())) {
1841	Instruction *User = cast<Instruction>(Val: U);
1842	if (User == &I) continue;
1843	replaceInstUsesWith(I&: *User, V: NewPN);
1844	eraseInstFromFunction(I&: *User);
1845	}
1846
1847	replaceAllDbgUsesWith(From&: const_cast<PHINode &>(*PN),
1848	To&: const_cast<PHINode &>(*NewPN),
1849	DomPoint&: const_cast<PHINode &>(*PN), DT);
1850	return replaceInstUsesWith(I, V: NewPN);
1851	}
1852
1853	Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) {
1854	// TODO: This should be similar to the incoming values check in foldOpIntoPhi:
1855	// we are guarding against replicating the binop in >1 predecessor.
1856	// This could miss matching a phi with 2 constant incoming values.
1857	auto *Phi0 = dyn_cast<PHINode>(Val: BO.getOperand(i_nocapture: `0`));
1858	auto *Phi1 = dyn_cast<PHINode>(Val: BO.getOperand(i_nocapture: `1`));
1859	if (!Phi0 \|\| !Phi1 \|\| !Phi0->hasOneUse() \|\| !Phi1->hasOneUse() \|\|
1860	Phi0->getNumOperands() != Phi1->getNumOperands())
1861	return nullptr;
1862
1863	// TODO: Remove the restriction for binop being in the same block as the phis.
1864	if (BO.getParent() != Phi0->getParent() \|\|
1865	BO.getParent() != Phi1->getParent())
1866	return nullptr;
1867
1868	// Fold if there is at least one specific constant value in phi0 or phi1's
1869	// incoming values that comes from the same block and this specific constant
1870	// value can be used to do optimization for specific binary operator.
1871	// For example:
1872	// %phi0 = phi i32 [0, %bb0], [%i, %bb1]
1873	// %phi1 = phi i32 [%j, %bb0], [0, %bb1]
1874	// %add = add i32 %phi0, %phi1
1875	// ==>
1876	// %add = phi i32 [%j, %bb0], [%i, %bb1]
1877	Constant *C = ConstantExpr::getBinOpIdentity(Opcode: BO.getOpcode(), Ty: BO.getType(),
1878	/AllowRHSConstant/ false);
1879	if (C) {
1880	SmallVector<Value *, `4`> NewIncomingValues;
1881	auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) {
1882	auto &Phi0Use = std::get<`0`>(t&: T);
1883	auto &Phi1Use = std::get<`1`>(t&: T);
1884	if (Phi0->getIncomingBlock(U: Phi0Use) != Phi1->getIncomingBlock(U: Phi1Use))
1885	return false;
1886	Value *Phi0UseV = Phi0Use.get();
1887	Value *Phi1UseV = Phi1Use.get();
1888	if (Phi0UseV == C)
1889	NewIncomingValues.push_back(Elt: Phi1UseV);
1890	else if (Phi1UseV == C)
1891	NewIncomingValues.push_back(Elt: Phi0UseV);
1892	else
1893	return false;
1894	return true;
1895	};
1896
1897	if (all_of(Range: zip(t: Phi0->operands(), u: Phi1->operands()),
1898	P: CanFoldIncomingValuePair)) {
1899	PHINode *NewPhi =
1900	PHINode::Create(Ty: Phi0->getType(), NumReservedValues: Phi0->getNumOperands());
1901	assert(NewIncomingValues.size() == Phi0->getNumOperands() &&
1902	"The number of collected incoming values should equal the number "
1903	"of the original PHINode operands!");
1904	for (unsigned I = `0`; I < Phi0->getNumOperands(); I++)
1905	NewPhi->addIncoming(V: NewIncomingValues [I], BB: Phi0->getIncomingBlock(i: I));
1906	return NewPhi;
1907	}
1908	}
1909
1910	if (Phi0->getNumOperands() != `2` \|\| Phi1->getNumOperands() != `2`)
1911	return nullptr;
1912
1913	// Match a pair of incoming constants for one of the predecessor blocks.
1914	BasicBlock ConstBB, OtherBB;
1915	Constant C0, C1;
1916	if (match(V: Phi0->getIncomingValue(i: `0`), P: m_ImmConstant(C&: C0))) {
1917	ConstBB = Phi0->getIncomingBlock(i: `0`);
1918	OtherBB = Phi0->getIncomingBlock(i: `1`);
1919	} else if (match(V: Phi0->getIncomingValue(i: `1`), P: m_ImmConstant(C&: C0))) {
1920	ConstBB = Phi0->getIncomingBlock(i: `1`);
1921	OtherBB = Phi0->getIncomingBlock(i: `0`);
1922	} else {
1923	return nullptr;
1924	}
1925	if (!match(V: Phi1->getIncomingValueForBlock(BB: ConstBB), P: m_ImmConstant(C&: C1)))
1926	return nullptr;
1927
1928	// The block that we are hoisting to must reach here unconditionally.
1929	// Otherwise, we could be speculatively executing an expensive or
1930	// non-speculative op.
1931	auto *PredBlockBranch = dyn_cast<BranchInst>(Val: OtherBB->getTerminator());
1932	if (!PredBlockBranch \|\| PredBlockBranch->isConditional() \|\|
1933	!DT.isReachableFromEntry(A: OtherBB))
1934	return nullptr;
1935
1936	// TODO: This check could be tightened to only apply to binops (div/rem) that
1937	// are not safe to speculatively execute. But that could allow hoisting
1938	// potentially expensive instructions (fdiv for example).
1939	for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
1940	if (!isGuaranteedToTransferExecutionToSuccessor(I: &*BBIter))
1941	return nullptr;
1942
1943	// Fold constants for the predecessor block with constant incoming values.
1944	Constant *NewC = ConstantFoldBinaryOpOperands(Opcode: BO.getOpcode(), LHS: C0, RHS: C1, DL);
1945	if (!NewC)
1946	return nullptr;
1947
1948	// Make a new binop in the predecessor block with the non-constant incoming
1949	// values.
1950	Builder.SetInsertPoint(PredBlockBranch);
1951	Value *NewBO = Builder.CreateBinOp(Opc: BO.getOpcode(),
1952	LHS: Phi0->getIncomingValueForBlock(BB: OtherBB),
1953	RHS: Phi1->getIncomingValueForBlock(BB: OtherBB));
1954	if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(Val: NewBO))
1955	NotFoldedNewBO->copyIRFlags(V: &BO);
1956
1957	// Replace the binop with a phi of the new values. The old phis are dead.
1958	PHINode *NewPhi = PHINode::Create(Ty: BO.getType(), NumReservedValues: `2`);
1959	NewPhi->addIncoming(V: NewBO, BB: OtherBB);
1960	NewPhi->addIncoming(V: NewC, BB: ConstBB);
1961	return NewPhi;
1962	}
1963
1964	Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
1965	if (!isa<Constant>(Val: I.getOperand(i_nocapture: `1`)))
1966	return nullptr;
1967
1968	if (auto *Sel = dyn_cast<SelectInst>(Val: I.getOperand(i_nocapture: `0`))) {
1969	if (Instruction *NewSel = FoldOpIntoSelect(Op&: I, SI: Sel))
1970	return NewSel;
1971	} else if (auto *PN = dyn_cast<PHINode>(Val: I.getOperand(i_nocapture: `0`))) {
1972	if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
1973	return NewPhi;
1974	}
1975	return nullptr;
1976	}
1977
1978	static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
1979	// If this GEP has only 0 indices, it is the same pointer as
1980	// Src. If Src is not a trivial GEP too, don't combine
1981	// the indices.
1982	if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
1983	!Src.hasOneUse())
1984	return false;
1985	return true;
1986	}
1987
1988	Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
1989	if (!isa<VectorType>(Val: Inst.getType()))
1990	return nullptr;
1991
1992	BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
1993	Value LHS = Inst.getOperand(i_nocapture: `0`), RHS = Inst.getOperand(i_nocapture: `1`);
1994	assert(cast<VectorType>(LHS->getType())->getElementCount() ==
1995	cast<VectorType>(Inst.getType())->getElementCount());
1996	assert(cast<VectorType>(RHS->getType())->getElementCount() ==
1997	cast<VectorType>(Inst.getType())->getElementCount());
1998
1999	// If both operands of the binop are vector concatenations, then perform the
2000	// narrow binop on each pair of the source operands followed by concatenation
2001	// of the results.
2002	Value L0, L1, R0, R1;
2003	ArrayRef<int> Mask;
2004	if (match(V: LHS, P: m_Shuffle(v1: m_Value(V&: L0), v2: m_Value(V&: L1), mask: m_Mask (Mask))) &&
2005	match(V: RHS, P: m_Shuffle(v1: m_Value(V&: R0), v2: m_Value(V&: R1), mask: m_SpecificMask (Mask))) &&
2006	LHS->hasOneUse() && RHS->hasOneUse() &&
2007	cast<ShuffleVectorInst>(Val: LHS)->isConcat() &&
2008	cast<ShuffleVectorInst>(Val: RHS)->isConcat()) {
2009	// This transform does not have the speculative execution constraint as
2010	// below because the shuffle is a concatenation. The new binops are
2011	// operating on exactly the same elements as the existing binop.
2012	// TODO: We could ease the mask requirement to allow different undef lanes,
2013	// but that requires an analysis of the binop-with-undef output value.
2014	Value *NewBO0 = Builder.CreateBinOp(Opc: Opcode, LHS: L0, RHS: R0);
2015	if (auto *BO = dyn_cast<BinaryOperator>(Val: NewBO0))
2016	BO->copyIRFlags(V: &Inst);
2017	Value *NewBO1 = Builder.CreateBinOp(Opc: Opcode, LHS: L1, RHS: R1);
2018	if (auto *BO = dyn_cast<BinaryOperator>(Val: NewBO1))
2019	BO->copyIRFlags(V: &Inst);
2020	return new ShuffleVectorInst (NewBO0, NewBO1, Mask);
2021	}
2022
2023	auto createBinOpReverse = [&](Value X, Value Y) {
2024	Value *V = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y, Name: Inst.getName());
2025	if (auto *BO = dyn_cast<BinaryOperator>(Val: V))
2026	BO->copyIRFlags(V: &Inst);
2027	Module *M = Inst.getModule();
2028	Function *F = Intrinsic::getDeclaration(
2029	M, Intrinsic::id: experimental_vector_reverse, Tys: V->getType());
2030	return CallInst::Create(F, V);
2031	};
2032
2033	// NOTE: Reverse shuffles don't require the speculative execution protection
2034	// below because they don't affect which lanes take part in the computation.
2035
2036	Value V1, V2;
2037	if (match(V: LHS, P: m_VecReverse(Op0: m_Value(V&: V1)))) {
2038	// Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2039	if (match(V: RHS, P: m_VecReverse(Op0: m_Value(V&: V2))) &&
2040	(LHS->hasOneUse() \|\| RHS->hasOneUse() \|\|
2041	(LHS == RHS && LHS->hasNUses(N: `2`))))
2042	return createBinOpReverse(V1, V2);
2043
2044	// Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2045	if (LHS->hasOneUse() && isSplatValue(V: RHS))
2046	return createBinOpReverse(V1, RHS);
2047	}
2048	// Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2049	else if (isSplatValue(V: LHS) && match(V: RHS, P: m_OneUse(SubPattern: m_VecReverse(Op0: m_Value(V&: V2)))))
2050	return createBinOpReverse(LHS, V2);
2051
2052	// It may not be safe to reorder shuffles and things like div, urem, etc.
2053	// because we may trap when executing those ops on unknown vector elements.
2054	// See PR20059.
2055	if (!isSafeToSpeculativelyExecute(I: &Inst))
2056	return nullptr;
2057
2058	auto createBinOpShuffle = [&](Value X, Value Y, ArrayRef<int> M) {
2059	Value *XY = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y);
2060	if (auto *BO = dyn_cast<BinaryOperator>(Val: XY))
2061	BO->copyIRFlags(V: &Inst);
2062	return new ShuffleVectorInst (XY, M);
2063	};
2064
2065	// If both arguments of the binary operation are shuffles that use the same
2066	// mask and shuffle within a single vector, move the shuffle after the binop.
2067	if (match(V: LHS, P: m_Shuffle(v1: m_Value(V&: V1), v2: m_Poison(), mask: m_Mask (Mask))) &&
2068	match(V: RHS, P: m_Shuffle(v1: m_Value(V&: V2), v2: m_Poison(), mask: m_SpecificMask (Mask))) &&
2069	V1->getType() == V2->getType() &&
2070	(LHS->hasOneUse() \|\| RHS->hasOneUse() \|\| LHS == RHS)) {
2071	// Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
2072	return createBinOpShuffle (V1, V2, Mask);
2073	}
2074
2075	// If both arguments of a commutative binop are select-shuffles that use the
2076	// same mask with commuted operands, the shuffles are unnecessary.
2077	if (Inst.isCommutative() &&
2078	match(V: LHS, P: m_Shuffle(v1: m_Value(V&: V1), v2: m_Value(V&: V2), mask: m_Mask (Mask))) &&
2079	match(V: RHS,
2080	P: m_Shuffle(v1: m_Specific(V: V2), v2: m_Specific(V: V1), mask: m_SpecificMask (Mask)))) {
2081	auto *LShuf = cast<ShuffleVectorInst>(Val: LHS);
2082	auto *RShuf = cast<ShuffleVectorInst>(Val: RHS);
2083	// TODO: Allow shuffles that contain undefs in the mask?
2084	// That is legal, but it reduces undef knowledge.
2085	// TODO: Allow arbitrary shuffles by shuffling after binop?
2086	// That might be legal, but we have to deal with poison.
2087	if (LShuf->isSelect() &&
2088	!is_contained(Range: LShuf->getShuffleMask(), Element: PoisonMaskElem) &&
2089	RShuf->isSelect() &&
2090	!is_contained(Range: RShuf->getShuffleMask(), Element: PoisonMaskElem)) {
2091	// Example:
2092	// LHS = shuffle V1, V2, <0, 5, 6, 3>
2093	// RHS = shuffle V2, V1, <0, 5, 6, 3>
2094	// LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
2095	Instruction *NewBO = BinaryOperator::Create(Op: Opcode, S1: V1, S2: V2);
2096	NewBO->copyIRFlags(V: &Inst);
2097	return NewBO;
2098	}
2099	}
2100
2101	// If one argument is a shuffle within one vector and the other is a constant,
2102	// try moving the shuffle after the binary operation. This canonicalization
2103	// intends to move shuffles closer to other shuffles and binops closer to
2104	// other binops, so they can be folded. It may also enable demanded elements
2105	// transforms.
2106	Constant *C;
2107	auto *InstVTy = dyn_cast<FixedVectorType>(Val: Inst.getType());
2108	if (InstVTy &&
2109	match(V: &Inst, P: m_c_BinOp(L: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: V1), v2: m_Poison(),
2110	mask: m_Mask (Mask))),
2111	R: m_ImmConstant(C))) &&
2112	cast<FixedVectorType>(Val: V1->getType())->getNumElements() <=
2113	InstVTy->getNumElements()) {
2114	assert(InstVTy->getScalarType() == V1->getType()->getScalarType() &&
2115	"Shuffle should not change scalar type");
2116
2117	// Find constant NewC that has property:
2118	// shuffle(NewC, ShMask) = C
2119	// If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
2120	// reorder is not possible. A 1-to-1 mapping is not required. Example:
2121	// ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
2122	bool ConstOp1 = isa<Constant>(Val: RHS);
2123	ArrayRef<int> ShMask = Mask;
2124	unsigned SrcVecNumElts =
2125	cast<FixedVectorType>(Val: V1->getType())->getNumElements();
2126	PoisonValue *PoisonScalar = PoisonValue::get(T: C->getType()->getScalarType());
2127	SmallVector<Constant *, `16`> NewVecC(SrcVecNumElts, PoisonScalar);
2128	bool MayChange = true;
2129	unsigned NumElts = InstVTy->getNumElements();
2130	for (unsigned I = `0`; I < NumElts; ++I) {
2131	Constant *CElt = C->getAggregateElement(Elt: I);
2132	if (ShMask [I] >= `0`) {
2133	assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
2134	Constant *NewCElt = NewVecC [ShMask [I]];
2135	// Bail out if:
2136	// 1. The constant vector contains a constant expression.
2137	// 2. The shuffle needs an element of the constant vector that can't
2138	// be mapped to a new constant vector.
2139	// 3. This is a widening shuffle that copies elements of V1 into the
2140	// extended elements (extending with poison is allowed).
2141	if (!CElt \|\| (!isa<PoisonValue>(Val: NewCElt) && NewCElt != CElt) \|\|
2142	I >= SrcVecNumElts) {
2143	MayChange = false;
2144	break;
2145	}
2146	NewVecC [ShMask [I]] = CElt;
2147	}
2148	// If this is a widening shuffle, we must be able to extend with poison
2149	// elements. If the original binop does not produce a poison in the high
2150	// lanes, then this transform is not safe.
2151	// Similarly for poison lanes due to the shuffle mask, we can only
2152	// transform binops that preserve poison.
2153	// TODO: We could shuffle those non-poison constant values into the
2154	// result by using a constant vector (rather than an poison vector)
2155	// as operand 1 of the new binop, but that might be too aggressive
2156	// for target-independent shuffle creation.
2157	if (I >= SrcVecNumElts \|\| ShMask [I] < `0`) {
2158	Constant *MaybePoison =
2159	ConstOp1
2160	? ConstantFoldBinaryOpOperands(Opcode, LHS: PoisonScalar, RHS: CElt, DL)
2161	: ConstantFoldBinaryOpOperands(Opcode, LHS: CElt, RHS: PoisonScalar, DL);
2162	if (!MaybePoison \|\| !isa<PoisonValue>(Val: MaybePoison)) {
2163	MayChange = false;
2164	break;
2165	}
2166	}
2167	}
2168	if (MayChange) {
2169	Constant *NewC = ConstantVector::get(V: NewVecC);
2170	// It may not be safe to execute a binop on a vector with poison elements
2171	// because the entire instruction can be folded to undef or create poison
2172	// that did not exist in the original code.
2173	// TODO: The shift case should not be necessary.
2174	if (Inst.isIntDivRem() \|\| (Inst.isShift() && ConstOp1))
2175	NewC = getSafeVectorConstantForBinop(Opcode, In: NewC, IsRHSConstant: ConstOp1);
2176
2177	// Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
2178	// Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
2179	Value *NewLHS = ConstOp1 ? V1 : NewC;
2180	Value *NewRHS = ConstOp1 ? NewC : V1;
2181	return createBinOpShuffle (NewLHS, NewRHS, Mask);
2182	}
2183	}
2184
2185	// Try to reassociate to sink a splat shuffle after a binary operation.
2186	if (Inst.isAssociative() && Inst.isCommutative()) {
2187	// Canonicalize shuffle operand as LHS.
2188	if (isa<ShuffleVectorInst>(Val: RHS))
2189	std::swap(a&: LHS, b&: RHS);
2190
2191	Value *X;
2192	ArrayRef<int> MaskC;
2193	int SplatIndex;
2194	Value Y, OtherOp;
2195	if (!match(V: LHS,
2196	P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: X), v2: m_Undef(), mask: m_Mask (MaskC)))) \|\|
2197	!match(Mask: MaskC, P: m_SplatOrPoisonMask (SplatIndex)) \|\|
2198	X->getType() != Inst.getType() \|\|
2199	!match(V: RHS, P: m_OneUse(SubPattern: m_BinOp(Opcode, L: m_Value(V&: Y), R: m_Value(V&: OtherOp)))))
2200	return nullptr;
2201
2202	// FIXME: This may not be safe if the analysis allows undef elements. By
2203	// moving 'Y' before the splat shuffle, we are implicitly assuming
2204	// that it is not undef/poison at the splat index.
2205	if (isSplatValue(V: OtherOp, Index: SplatIndex)) {
2206	std::swap(a&: Y, b&: OtherOp);
2207	} else if (!isSplatValue(V: Y, Index: SplatIndex)) {
2208	return nullptr;
2209	}
2210
2211	// X and Y are splatted values, so perform the binary operation on those
2212	// values followed by a splat followed by the 2nd binary operation:
2213	// bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
2214	Value *NewBO = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y);
2215	SmallVector<int, `8`> NewMask(MaskC.size(), SplatIndex);
2216	Value *NewSplat = Builder.CreateShuffleVector(V: NewBO, Mask: NewMask);
2217	Instruction *R = BinaryOperator::Create(Op: Opcode, S1: NewSplat, S2: OtherOp);
2218
2219	// Intersect FMF on both new binops. Other (poison-generating) flags are
2220	// dropped to be safe.
2221	if (isa<FPMathOperator>(Val: R)) {
2222	R->copyFastMathFlags(I: &Inst);
2223	R->andIRFlags(V: RHS);
2224	}
2225	if (auto *NewInstBO = dyn_cast<BinaryOperator>(Val: NewBO))
2226	NewInstBO->copyIRFlags(V: R);
2227	return R;
2228	}
2229
2230	return nullptr;
2231	}
2232
2233	/// Try to narrow the width of a binop if at least 1 operand is an extend of
2234	/// of a value. This requires a potentially expensive known bits check to make
2235	/// sure the narrow op does not overflow.
2236	Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
2237	// We need at least one extended operand.
2238	Value Op0 = BO.getOperand(i_nocapture: `0`), Op1 = BO.getOperand(i_nocapture: `1`);
2239
2240	// If this is a sub, we swap the operands since we always want an extension
2241	// on the RHS. The LHS can be an extension or a constant.
2242	if (BO.getOpcode() == Instruction::Sub)
2243	std::swap(a&: Op0, b&: Op1);
2244
2245	Value *X;
2246	bool IsSext = match(V: Op0, P: m_SExt(Op: m_Value(V&: X)));
2247	if (!IsSext && !match(V: Op0, P: m_ZExt(Op: m_Value(V&: X))))
2248	return nullptr;
2249
2250	// If both operands are the same extension from the same source type and we
2251	// can eliminate at least one (hasOneUse), this might work.
2252	CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
2253	Value *Y;
2254	if (!(match(V: Op1, P: m_ZExtOrSExt(Op: m_Value(V&: Y))) && X->getType() == Y->getType() &&
2255	cast<Operator>(Val: Op1)->getOpcode() == CastOpc &&
2256	(Op0->hasOneUse() \|\| Op1->hasOneUse()))) {
2257	// If that did not match, see if we have a suitable constant operand.
2258	// Truncating and extending must produce the same constant.
2259	Constant *WideC;
2260	if (!Op0->hasOneUse() \|\| !match(V: Op1, P: m_Constant(C&: WideC)))
2261	return nullptr;
2262	Constant *NarrowC = getLosslessTrunc(C: WideC, TruncTy: X->getType(), ExtOp: CastOpc);
2263	if (!NarrowC)
2264	return nullptr;
2265	Y = NarrowC;
2266	}
2267
2268	// Swap back now that we found our operands.
2269	if (BO.getOpcode() == Instruction::Sub)
2270	std::swap(a&: X, b&: Y);
2271
2272	// Both operands have narrow versions. Last step: the math must not overflow
2273	// in the narrow width.
2274	if (!willNotOverflow(Opcode: BO.getOpcode(), LHS: X, RHS: Y, CxtI: BO, IsSigned: IsSext))
2275	return nullptr;
2276
2277	// bo (ext X), (ext Y) --> ext (bo X, Y)
2278	// bo (ext X), C --> ext (bo X, C')
2279	Value *NarrowBO = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: X, RHS: Y, Name: "narrow");
2280	if (auto *NewBinOp = dyn_cast<BinaryOperator>(Val: NarrowBO)) {
2281	if (IsSext)
2282	NewBinOp->setHasNoSignedWrap();
2283	else
2284	NewBinOp->setHasNoUnsignedWrap();
2285	}
2286	return CastInst::Create(CastOpc, S: NarrowBO, Ty: BO.getType());
2287	}
2288
2289	static bool isMergedGEPInBounds(GEPOperator &GEP1, GEPOperator &GEP2) {
2290	// At least one GEP must be inbounds.
2291	if (!GEP1.isInBounds() && !GEP2.isInBounds())
2292	return false;
2293
2294	return (GEP1.isInBounds() \|\| GEP1.hasAllZeroIndices()) &&
2295	(GEP2.isInBounds() \|\| GEP2.hasAllZeroIndices());
2296	}
2297
2298	/// Thread a GEP operation with constant indices through the constant true/false
2299	/// arms of a select.
2300	static Instruction *foldSelectGEP(GetElementPtrInst &GEP,
2301	InstCombiner::BuilderTy &Builder) {
2302	if (!GEP.hasAllConstantIndices())
2303	return nullptr;
2304
2305	Instruction *Sel;
2306	Value *Cond;
2307	Constant TrueC, FalseC;
2308	if (!match(V: GEP.getPointerOperand(), P: m_Instruction(I&: Sel)) \|\|
2309	!match(V: Sel,
2310	P: m_Select(C: m_Value(V&: Cond), L: m_Constant(C&: TrueC), R: m_Constant(C&: FalseC))))
2311	return nullptr;
2312
2313	// gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
2314	// Propagate 'inbounds' and metadata from existing instructions.
2315	// Note: using IRBuilder to create the constants for efficiency.
2316	SmallVector<Value *, `4`> IndexC(GEP.indices());
2317	bool IsInBounds = GEP.isInBounds();
2318	Type *Ty = GEP.getSourceElementType();
2319	Value *NewTrueC = Builder.CreateGEP(Ty, Ptr: TrueC, IdxList: IndexC, Name: "", IsInBounds);
2320	Value *NewFalseC = Builder.CreateGEP(Ty, Ptr: FalseC, IdxList: IndexC, Name: "", IsInBounds);
2321	return SelectInst::Create(C: Cond, S1: NewTrueC, S2: NewFalseC, NameStr: "", InsertBefore: nullptr, MDFrom: Sel);
2322	}
2323
2324	Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
2325	GEPOperator *Src) {
2326	// Combine Indices - If the source pointer to this getelementptr instruction
2327	// is a getelementptr instruction with matching element type, combine the
2328	// indices of the two getelementptr instructions into a single instruction.
2329	if (!shouldMergeGEPs(GEP&: cast<GEPOperator>(Val: &GEP), Src&: Src))
2330	return nullptr;
2331
2332	// For constant GEPs, use a more general offset-based folding approach.
2333	Type *PtrTy = Src->getType()->getScalarType();
2334	if (GEP.hasAllConstantIndices() &&
2335	(Src->hasOneUse() \|\| Src->hasAllConstantIndices())) {
2336	// Split Src into a variable part and a constant suffix.
2337	gep_type_iterator GTI = gep_type_begin(GEP: *Src);
2338	Type *BaseType = GTI.getIndexedType();
2339	bool IsFirstType = true;
2340	unsigned NumVarIndices = `0`;
2341	for (auto Pair : enumerate(First: Src->indices())) {
2342	if (!isa<ConstantInt>(Val: Pair.value())) {
2343	BaseType = GTI.getIndexedType();
2344	IsFirstType = false;
2345	NumVarIndices = Pair.index() + `1`;
2346	}
2347	++GTI;
2348	}
2349
2350	// Determine the offset for the constant suffix of Src.
2351	APInt Offset(DL.getIndexTypeSizeInBits(Ty: PtrTy), `0`);
2352	if (NumVarIndices != Src->getNumIndices()) {
2353	// FIXME: getIndexedOffsetInType() does not handled scalable vectors.
2354	if (BaseType->isScalableTy())
2355	return nullptr;
2356
2357	SmallVector<Value *> ConstantIndices;
2358	if (!IsFirstType)
2359	ConstantIndices.push_back(
2360	Elt: Constant::getNullValue(Ty: Type::getInt32Ty(C&: GEP.getContext())));
2361	append_range(C&: ConstantIndices, R: drop_begin(RangeOrContainer: Src->indices(), N: NumVarIndices));
2362	Offset += DL.getIndexedOffsetInType(ElemTy: BaseType, Indices: ConstantIndices);
2363	}
2364
2365	// Add the offset for GEP (which is fully constant).
2366	if (!GEP.accumulateConstantOffset(DL, Offset))
2367	return nullptr;
2368
2369	APInt OffsetOld = Offset;
2370	// Convert the total offset back into indices.
2371	SmallVector<APInt> ConstIndices =
2372	DL.getGEPIndicesForOffset(ElemTy&: BaseType, Offset);
2373	if (!Offset.isZero() \|\| (!IsFirstType && !ConstIndices [`0`].isZero())) {
2374	// If both GEP are constant-indexed, and cannot be merged in either way,
2375	// convert them to a GEP of i8.
2376	if (Src->hasAllConstantIndices())
2377	return replaceInstUsesWith(
2378	I&: GEP, V: Builder.CreateGEP(
2379	Ty: Builder.getInt8Ty(), Ptr: Src->getOperand(i_nocapture: `0`),
2380	IdxList: Builder.getInt(AI: OffsetOld), Name: "",
2381	IsInBounds: isMergedGEPInBounds(GEP1&: Src, GEP2&: cast<GEPOperator>(Val: &GEP))));
2382	return nullptr;
2383	}
2384
2385	bool IsInBounds = isMergedGEPInBounds(GEP1&: Src, GEP2&: cast<GEPOperator>(Val: &GEP));
2386	SmallVector<Value *> Indices;
2387	append_range(C&: Indices, R: drop_end(RangeOrContainer: Src->indices(),
2388	N: Src->getNumIndices() - NumVarIndices));
2389	for (const APInt &Idx : drop_begin(RangeOrContainer&: ConstIndices, N: !IsFirstType)) {
2390	Indices.push_back(Elt: ConstantInt::get(Context&: GEP.getContext(), V: Idx));
2391	// Even if the total offset is inbounds, we may end up representing it
2392	// by first performing a larger negative offset, and then a smaller
2393	// positive one. The large negative offset might go out of bounds. Only
2394	// preserve inbounds if all signs are the same.
2395	IsInBounds &= Idx.isNonNegative() == ConstIndices [`0`].isNonNegative();
2396	}
2397
2398	return replaceInstUsesWith(
2399	I&: GEP, V: Builder.CreateGEP(Ty: Src->getSourceElementType(), Ptr: Src->getOperand(i_nocapture: `0`),
2400	IdxList: Indices, Name: "", IsInBounds));
2401	}
2402
2403	if (Src->getResultElementType() != GEP.getSourceElementType())
2404	return nullptr;
2405
2406	SmallVector<Value*, `8`> Indices;
2407
2408	// Find out whether the last index in the source GEP is a sequential idx.
2409	bool EndsWithSequential = false;
2410	for (gep_type_iterator I = gep_type_begin(GEP: Src), E = gep_type_end(GEP: Src);
2411	I != E; ++I)
2412	EndsWithSequential = I.isSequential();
2413
2414	// Can we combine the two pointer arithmetics offsets?
2415	if (EndsWithSequential) {
2416	// Replace: gep (gep %P, long B), long A, ...
2417	// With: T = long A+B; gep %P, T, ...
2418	Value *SO1 = Src->getOperand(i_nocapture: Src->getNumOperands()-`1`);
2419	Value *GO1 = GEP.getOperand(i_nocapture: `1`);
2420
2421	// If they aren't the same type, then the input hasn't been processed
2422	// by the loop above yet (which canonicalizes sequential index types to
2423	// intptr_t). Just avoid transforming this until the input has been
2424	// normalized.
2425	if (SO1->getType() != GO1->getType())
2426	return nullptr;
2427
2428	Value *Sum =
2429	simplifyAddInst(LHS: GO1, RHS: SO1, IsNSW: false, IsNUW: false, Q: SQ.getWithInstruction(I: &GEP));
2430	// Only do the combine when we are sure the cost after the
2431	// merge is never more than that before the merge.
2432	if (Sum == nullptr)
2433	return nullptr;
2434
2435	// Update the GEP in place if possible.
2436	if (Src->getNumOperands() == `2`) {
2437	GEP.setIsInBounds(isMergedGEPInBounds(GEP1&: Src, GEP2&: cast<GEPOperator>(Val: &GEP)));
2438	replaceOperand(I&: GEP, OpNum: `0`, V: Src->getOperand(i_nocapture: `0`));
2439	replaceOperand(I&: GEP, OpNum: `1`, V: Sum);
2440	return &GEP;
2441	}
2442	Indices.append(in_start: Src->op_begin()+`1`, in_end: Src->op_end()-`1`);
2443	Indices.push_back(Elt: Sum);
2444	Indices.append(in_start: GEP.op_begin()+`2`, in_end: GEP.op_end());
2445	} else if (isa<Constant>(Val: *GEP.idx_begin()) &&
2446	cast<Constant>(Val&: *GEP.idx_begin())->isNullValue() &&
2447	Src->getNumOperands() != `1`) {
2448	// Otherwise we can do the fold if the first index of the GEP is a zero
2449	Indices.append(in_start: Src->op_begin()+`1`, in_end: Src->op_end());
2450	Indices.append(in_start: GEP.idx_begin()+`1`, in_end: GEP.idx_end());
2451	}
2452
2453	if (!Indices.empty())
2454	return replaceInstUsesWith(
2455	I&: GEP, V: Builder.CreateGEP(
2456	Ty: Src->getSourceElementType(), Ptr: Src->getOperand(i_nocapture: `0`), IdxList: Indices, Name: "",
2457	IsInBounds: isMergedGEPInBounds(GEP1&: Src, GEP2&: cast<GEPOperator>(Val: &GEP))));
2458
2459	return nullptr;
2460	}
2461
2462	Value InstCombiner::getFreelyInvertedImpl(Value V, bool WillInvertAllUses,
2463	BuilderTy *Builder,
2464	bool &DoesConsume, unsigned Depth) {
2465	static Value *const NonNull = reinterpret_cast<Value *>(uintptr_t(`1`));
2466	// ~(~(X)) -> X.
2467	Value A, B;
2468	if (match(V, P: m_Not(V: m_Value(V&: A)))) {
2469	DoesConsume = true;
2470	return A;
2471	}
2472
2473	Constant *C;
2474	// Constants can be considered to be not'ed values.
2475	if (match(V, P: m_ImmConstant(C)))
2476	return ConstantExpr::getNot(C);
2477
2478	if (Depth++ >= MaxAnalysisRecursionDepth)
2479	return nullptr;
2480
2481	// The rest of the cases require that we invert all uses so don't bother
2482	// doing the analysis if we know we can't use the result.
2483	if (!WillInvertAllUses)
2484	return nullptr;
2485
2486	// Compares can be inverted if all of their uses are being modified to use
2487	// the ~V.
2488	if (auto *I = dyn_cast<CmpInst>(Val: V)) {
2489	if (Builder != nullptr)
2490	return Builder->CreateCmp(Pred: I->getInversePredicate(), LHS: I->getOperand(i_nocapture: `0`),
2491	RHS: I->getOperand(i_nocapture: `1`));
2492	return NonNull;
2493	}
2494
2495	// If `V` is of the form `A + B` then `-1 - V` can be folded into
2496	// `(-1 - B) - A` if we are willing to invert all of the uses.
2497	if (match(V, P: m_Add(L: m_Value(V&: A), R: m_Value(V&: B)))) {
2498	if (auto *BV = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
2499	DoesConsume, Depth))
2500	return Builder ? Builder->CreateSub(LHS: BV, RHS: A) : NonNull;
2501	if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2502	DoesConsume, Depth))
2503	return Builder ? Builder->CreateSub(LHS: AV, RHS: B) : NonNull;
2504	return nullptr;
2505	}
2506
2507	// If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded
2508	// into `A ^ B` if we are willing to invert all of the uses.
2509	if (match(V, P: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B)))) {
2510	if (auto *BV = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
2511	DoesConsume, Depth))
2512	return Builder ? Builder->CreateXor(LHS: A, RHS: BV) : NonNull;
2513	if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2514	DoesConsume, Depth))
2515	return Builder ? Builder->CreateXor(LHS: AV, RHS: B) : NonNull;
2516	return nullptr;
2517	}
2518
2519	// If `V` is of the form `B - A` then `-1 - V` can be folded into
2520	// `A + (-1 - B)` if we are willing to invert all of the uses.
2521	if (match(V, P: m_Sub(L: m_Value(V&: A), R: m_Value(V&: B)))) {
2522	if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2523	DoesConsume, Depth))
2524	return Builder ? Builder->CreateAdd(LHS: AV, RHS: B) : NonNull;
2525	return nullptr;
2526	}
2527
2528	// If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded
2529	// into `A s>> B` if we are willing to invert all of the uses.
2530	if (match(V, P: m_AShr(L: m_Value(V&: A), R: m_Value(V&: B)))) {
2531	if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2532	DoesConsume, Depth))
2533	return Builder ? Builder->CreateAShr(LHS: AV, RHS: B) : NonNull;
2534	return nullptr;
2535	}
2536
2537	Value *Cond;
2538	// LogicOps are special in that we canonicalize them at the cost of an
2539	// instruction.
2540	bool IsSelect = match(V, P: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: A), R: m_Value(V&: B))) &&
2541	!shouldAvoidAbsorbingNotIntoSelect(SI: *cast<SelectInst>(Val: V));
2542	// Selects/min/max with invertible operands are freely invertible
2543	if (IsSelect \|\| match(V, P: m_MaxOrMin(L: m_Value(V&: A), R: m_Value(V&: B)))) {
2544	bool LocalDoesConsume = DoesConsume;
2545	if (!getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), /Builder/ nullptr,
2546	DoesConsume&: LocalDoesConsume, Depth))
2547	return nullptr;
2548	if (Value *NotA = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2549	DoesConsume&: LocalDoesConsume, Depth)) {
2550	DoesConsume = LocalDoesConsume;
2551	if (Builder != nullptr) {
2552	Value *NotB = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
2553	DoesConsume, Depth);
2554	assert(NotB != nullptr &&
2555	"Unable to build inverted value for known freely invertable op");
2556	if (auto *II = dyn_cast<IntrinsicInst>(Val: V))
2557	return Builder->CreateBinaryIntrinsic(
2558	ID: getInverseMinMaxIntrinsic(MinMaxID: II->getIntrinsicID()), LHS: NotA, RHS: NotB);
2559	return Builder->CreateSelect(C: Cond, True: NotA, False: NotB);
2560	}
2561	return NonNull;
2562	}
2563	}
2564
2565	if (PHINode *PN = dyn_cast<PHINode>(Val: V)) {
2566	bool LocalDoesConsume = DoesConsume;
2567	SmallVector<std::pair<Value , BasicBlock >, `8`> IncomingValues;
2568	for (Use &U : PN->operands()) {
2569	BasicBlock *IncomingBlock = PN->getIncomingBlock(U);
2570	Value *NewIncomingVal = getFreelyInvertedImpl(
2571	V: U.get(), /WillInvertAllUses=/false,
2572	/Builder=/nullptr, DoesConsume&: LocalDoesConsume, Depth: MaxAnalysisRecursionDepth - `1`);
2573	if (NewIncomingVal == nullptr)
2574	return nullptr;
2575	// Make sure that we can safely erase the original PHI node.
2576	if (NewIncomingVal == V)
2577	return nullptr;
2578	if (Builder != nullptr)
2579	IncomingValues.emplace_back(Args&: NewIncomingVal, Args&: IncomingBlock);
2580	}
2581
2582	DoesConsume = LocalDoesConsume;
2583	if (Builder != nullptr) {
2584	IRBuilderBase::InsertPointGuard Guard(*Builder);
2585	Builder->SetInsertPoint(PN);
2586	PHINode *NewPN =
2587	Builder->CreatePHI(Ty: PN->getType(), NumReservedValues: PN->getNumIncomingValues());
2588	for (auto [Val, Pred] : IncomingValues)
2589	NewPN->addIncoming(V: Val, BB: Pred);
2590	return NewPN;
2591	}
2592	return NonNull;
2593	}
2594
2595	if (match(V, P: m_SExtLike(Op: m_Value(V&: A)))) {
2596	if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2597	DoesConsume, Depth))
2598	return Builder ? Builder->CreateSExt(V: AV, DestTy: V->getType()) : NonNull;
2599	return nullptr;
2600	}
2601
2602	if (match(V, P: m_Trunc(Op: m_Value(V&: A)))) {
2603	if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2604	DoesConsume, Depth))
2605	return Builder ? Builder->CreateTrunc(V: AV, DestTy: V->getType()) : NonNull;
2606	return nullptr;
2607	}
2608
2609	// De Morgan's Laws:
2610	// (~(A \| B)) -> (~A & ~B)
2611	// (~(A & B)) -> (~A \| ~B)
2612	auto TryInvertAndOrUsingDeMorgan = [&](Instruction::BinaryOps Opcode,
2613	bool IsLogical, Value *A,
2614	Value B) -> Value {
2615	bool LocalDoesConsume = DoesConsume;
2616	if (!getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), /Builder=/nullptr,
2617	DoesConsume&: LocalDoesConsume, Depth))
2618	return nullptr;
2619	if (auto *NotA = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2620	DoesConsume&: LocalDoesConsume, Depth)) {
2621	auto *NotB = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
2622	DoesConsume&: LocalDoesConsume, Depth);
2623	DoesConsume = LocalDoesConsume;
2624	if (IsLogical)
2625	return Builder ? Builder->CreateLogicalOp(Opc: Opcode, Cond1: NotA, Cond2: NotB) : NonNull;
2626	return Builder ? Builder->CreateBinOp(Opc: Opcode, LHS: NotA, RHS: NotB) : NonNull;
2627	}
2628
2629	return nullptr;
2630	};
2631
2632	if (match(V, P: m_Or(L: m_Value(V&: A), R: m_Value(V&: B))))
2633	return TryInvertAndOrUsingDeMorgan (Instruction::And, /IsLogical=/false, A,
2634	B);
2635
2636	if (match(V, P: m_And(L: m_Value(V&: A), R: m_Value(V&: B))))
2637	return TryInvertAndOrUsingDeMorgan (Instruction::Or, /IsLogical=/false, A,
2638	B);
2639
2640	if (match(V, P: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B))))
2641	return TryInvertAndOrUsingDeMorgan (Instruction::And, /IsLogical=/true, A,
2642	B);
2643
2644	if (match(V, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B))))
2645	return TryInvertAndOrUsingDeMorgan (Instruction::Or, /IsLogical=/true, A,
2646	B);
2647
2648	return nullptr;
2649	}
2650
2651	Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
2652	Value *PtrOp = GEP.getOperand(i_nocapture: `0`);
2653	SmallVector<Value *, `8`> Indices(GEP.indices());
2654	Type *GEPType = GEP.getType();
2655	Type *GEPEltType = GEP.getSourceElementType();
2656	bool IsGEPSrcEleScalable = GEPEltType->isScalableTy();
2657	if (Value *V = simplifyGEPInst(SrcTy: GEPEltType, Ptr: PtrOp, Indices, InBounds: GEP.isInBounds(),
2658	Q: SQ.getWithInstruction(I: &GEP)))
2659	return replaceInstUsesWith(I&: GEP, V);
2660
2661	// For vector geps, use the generic demanded vector support.
2662	// Skip if GEP return type is scalable. The number of elements is unknown at
2663	// compile-time.
2664	if (auto *GEPFVTy = dyn_cast<FixedVectorType>(Val: GEPType)) {
2665	auto VWidth = GEPFVTy->getNumElements();
2666	APInt PoisonElts(VWidth, `0`);
2667	APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
2668	if (Value *V = SimplifyDemandedVectorElts(V: &GEP, DemandedElts: AllOnesEltMask,
2669	PoisonElts)) {
2670	if (V != &GEP)
2671	return replaceInstUsesWith(I&: GEP, V);
2672	return &GEP;
2673	}
2674
2675	// TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
2676	// possible (decide on canonical form for pointer broadcast), 3) exploit
2677	// undef elements to decrease demanded bits
2678	}
2679
2680	// Eliminate unneeded casts for indices, and replace indices which displace
2681	// by multiples of a zero size type with zero.
2682	bool MadeChange = false;
2683
2684	// Index width may not be the same width as pointer width.
2685	// Data layout chooses the right type based on supported integer types.
2686	Type *NewScalarIndexTy =
2687	DL.getIndexType(PtrTy: GEP.getPointerOperandType()->getScalarType());
2688
2689	gep_type_iterator GTI = gep_type_begin(GEP);
2690	for (User::op_iterator I = GEP.op_begin() + `1`, E = GEP.op_end(); I != E;
2691	++I, ++GTI) {
2692	// Skip indices into struct types.
2693	if (GTI.isStruct())
2694	continue;
2695
2696	Type IndexTy = (I)->getType();
2697	Type *NewIndexType =
2698	IndexTy->isVectorTy()
2699	? VectorType::get(ElementType: NewScalarIndexTy,
2700	EC: cast<VectorType>(Val: IndexTy)->getElementCount())
2701	: NewScalarIndexTy;
2702
2703	// If the element type has zero size then any index over it is equivalent
2704	// to an index of zero, so replace it with zero if it is not zero already.
2705	Type *EltTy = GTI.getIndexedType();
2706	if (EltTy->isSized() && DL.getTypeAllocSize(Ty: EltTy).isZero())
2707	if (!isa<Constant>(Val: *I) \|\| !match(V: I->get(), P: m_Zero())) {
2708	*I = Constant::getNullValue(Ty: NewIndexType);
2709	MadeChange = true;
2710	}
2711
2712	if (IndexTy != NewIndexType) {
2713	// If we are using a wider index than needed for this platform, shrink
2714	// it to what we need. If narrower, sign-extend it to what we need.
2715	// This explicit cast can make subsequent optimizations more obvious.
2716	I = Builder.CreateIntCast(V: I, DestTy: NewIndexType, isSigned: true);
2717	MadeChange = true;
2718	}
2719	}
2720	if (MadeChange)
2721	return &GEP;
2722
2723	// Canonicalize constant GEPs to i8 type.
2724	if (!GEPEltType->isIntegerTy(Bitwidth: `8`) && GEP.hasAllConstantIndices()) {
2725	APInt Offset(DL.getIndexTypeSizeInBits(Ty: GEPType), `0`);
2726	if (GEP.accumulateConstantOffset(DL, Offset))
2727	return replaceInstUsesWith(
2728	I&: GEP, V: Builder.CreatePtrAdd(Ptr: PtrOp, Offset: Builder.getInt(AI: Offset), Name: "",
2729	IsInBounds: GEP.isInBounds()));
2730	}
2731
2732	// Check to see if the inputs to the PHI node are getelementptr instructions.
2733	if (auto *PN = dyn_cast<PHINode>(Val: PtrOp)) {
2734	auto *Op1 = dyn_cast<GetElementPtrInst>(Val: PN->getOperand(i_nocapture: `0`));
2735	if (!Op1)
2736	return nullptr;
2737
2738	// Don't fold a GEP into itself through a PHI node. This can only happen
2739	// through the back-edge of a loop. Folding a GEP into itself means that
2740	// the value of the previous iteration needs to be stored in the meantime,
2741	// thus requiring an additional register variable to be live, but not
2742	// actually achieving anything (the GEP still needs to be executed once per
2743	// loop iteration).
2744	if (Op1 == &GEP)
2745	return nullptr;
2746
2747	int DI = -`1`;
2748
2749	for (auto I = PN->op_begin()+`1`, E = PN->op_end(); I !=E; ++I) {
2750	auto Op2 = dyn_cast<GetElementPtrInst>(Val&: I);
2751	if (!Op2 \|\| Op1->getNumOperands() != Op2->getNumOperands() \|\|
2752	Op1->getSourceElementType() != Op2->getSourceElementType())
2753	return nullptr;
2754
2755	// As for Op1 above, don't try to fold a GEP into itself.
2756	if (Op2 == &GEP)
2757	return nullptr;
2758
2759	// Keep track of the type as we walk the GEP.
2760	Type CurTy = nullptr*;
2761
2762	for (unsigned J = `0`, F = Op1->getNumOperands(); J != F; ++J) {
2763	if (Op1->getOperand(i_nocapture: J)->getType() != Op2->getOperand(i_nocapture: J)->getType())
2764	return nullptr;
2765
2766	if (Op1->getOperand(i_nocapture: J) != Op2->getOperand(i_nocapture: J)) {
2767	if (DI == -`1`) {
2768	// We have not seen any differences yet in the GEPs feeding the
2769	// PHI yet, so we record this one if it is allowed to be a
2770	// variable.
2771
2772	// The first two arguments can vary for any GEP, the rest have to be
2773	// static for struct slots
2774	if (J > `1`) {
2775	assert(CurTy && "No current type?");
2776	if (CurTy->isStructTy())
2777	return nullptr;
2778	}
2779
2780	DI = J;
2781	} else {
2782	// The GEP is different by more than one input. While this could be
2783	// extended to support GEPs that vary by more than one variable it
2784	// doesn't make sense since it greatly increases the complexity and
2785	// would result in an R+R+R addressing mode which no backend
2786	// directly supports and would need to be broken into several
2787	// simpler instructions anyway.
2788	return nullptr;
2789	}
2790	}
2791
2792	// Sink down a layer of the type for the next iteration.
2793	if (J > `0`) {
2794	if (J == `1`) {
2795	CurTy = Op1->getSourceElementType();
2796	} else {
2797	CurTy =
2798	GetElementPtrInst::getTypeAtIndex(Ty: CurTy, Idx: Op1->getOperand(i_nocapture: J));
2799	}
2800	}
2801	}
2802	}
2803
2804	// If not all GEPs are identical we'll have to create a new PHI node.
2805	// Check that the old PHI node has only one use so that it will get
2806	// removed.
2807	if (DI != -`1` && !PN->hasOneUse())
2808	return nullptr;
2809
2810	auto *NewGEP = cast<GetElementPtrInst>(Val: Op1->clone());
2811	if (DI == -`1`) {
2812	// All the GEPs feeding the PHI are identical. Clone one down into our
2813	// BB so that it can be merged with the current GEP.
2814	} else {
2815	// All the GEPs feeding the PHI differ at a single offset. Clone a GEP
2816	// into the current block so it can be merged, and create a new PHI to
2817	// set that index.
2818	PHINode *NewPN;
2819	{
2820	IRBuilderBase::InsertPointGuard Guard(Builder);
2821	Builder.SetInsertPoint(PN);
2822	NewPN = Builder.CreatePHI(Ty: Op1->getOperand(i_nocapture: DI)->getType(),
2823	NumReservedValues: PN->getNumOperands());
2824	}
2825
2826	for (auto &I : PN->operands())
2827	NewPN->addIncoming(V: cast<GEPOperator>(Val&: I)->getOperand(i_nocapture: DI),
2828	BB: PN->getIncomingBlock(U: I));
2829
2830	NewGEP->setOperand(i_nocapture: DI, Val_nocapture: NewPN);
2831	}
2832
2833	NewGEP->insertBefore(BB&: *GEP.getParent(), InsertPos: GEP.getParent()->getFirstInsertionPt());
2834	return replaceOperand(I&: GEP, OpNum: `0`, V: NewGEP);
2835	}
2836
2837	if (auto *Src = dyn_cast<GEPOperator>(Val: PtrOp))
2838	if (Instruction *I = visitGEPOfGEP(GEP, Src))
2839	return I;
2840
2841	// Skip if GEP source element type is scalable. The type alloc size is unknown
2842	// at compile-time.
2843	if (GEP.getNumIndices() == `1` && !IsGEPSrcEleScalable) {
2844	unsigned AS = GEP.getPointerAddressSpace();
2845	if (GEP.getOperand(i_nocapture: `1`)->getType()->getScalarSizeInBits() ==
2846	DL.getIndexSizeInBits(AS)) {
2847	uint64_t TyAllocSize = DL.getTypeAllocSize(Ty: GEPEltType).getFixedValue();
2848
2849	if (TyAllocSize == `1`) {
2850	// Canonicalize (gep i8 X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),*
2851	// but only if the result pointer is only used as if it were an integer,
2852	// or both point to the same underlying object (otherwise provenance is
2853	// not necessarily retained).
2854	Value *X = GEP.getPointerOperand();
2855	Value *Y;
2856	if (match(V: GEP.getOperand(i_nocapture: `1`),
2857	P: m_Sub(L: m_PtrToInt(Op: m_Value(V&: Y)), R: m_PtrToInt(Op: m_Specific(V: X)))) &&
2858	GEPType == Y->getType()) {
2859	bool HasSameUnderlyingObject =
2860	getUnderlyingObject(V: X) == getUnderlyingObject(V: Y);
2861	bool Changed = false;
2862	GEP.replaceUsesWithIf(New: Y, ShouldReplace: [&](Use &U) {
2863	bool ShouldReplace = HasSameUnderlyingObject \|\|
2864	isa<ICmpInst>(Val: U.getUser()) \|\|
2865	isa<PtrToIntInst>(Val: U.getUser());
2866	Changed \|= ShouldReplace;
2867	return ShouldReplace;
2868	});
2869	return Changed ? &GEP : nullptr;
2870	}
2871	} else {
2872	// Canonicalize (gep T X, V / sizeof(T)) to (gep i8* X, V)*
2873	Value *V;
2874	if ((has_single_bit(Value: TyAllocSize) &&
2875	match(V: GEP.getOperand(i_nocapture: `1`),
2876	P: m_Exact(SubPattern: m_Shr(L: m_Value(V),
2877	R: m_SpecificInt(V: countr_zero(Val: TyAllocSize)))))) \|\|
2878	match(V: GEP.getOperand(i_nocapture: `1`),
2879	P: m_Exact(SubPattern: m_IDiv(L: m_Value(V), R: m_SpecificInt(V: TyAllocSize))))) {
2880	GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
2881	PointeeType: Builder.getInt8Ty(), Ptr: GEP.getPointerOperand(), IdxList: V);
2882	NewGEP->setIsInBounds(GEP.isInBounds());
2883	return NewGEP;
2884	}
2885	}
2886	}
2887	}
2888	// We do not handle pointer-vector geps here.
2889	if (GEPType->isVectorTy())
2890	return nullptr;
2891
2892	if (GEP.getNumIndices() == `1`) {
2893	// Try to replace ADD + GEP with GEP + GEP.
2894	Value Idx1, Idx2;
2895	if (match(V: GEP.getOperand(i_nocapture: `1`),
2896	P: m_OneUse(SubPattern: m_Add(L: m_Value(V&: Idx1), R: m_Value(V&: Idx2))))) {
2897	// %idx = add i64 %idx1, %idx2
2898	// %gep = getelementptr i32, ptr %ptr, i64 %idx
2899	// as:
2900	// %newptr = getelementptr i32, ptr %ptr, i64 %idx1
2901	// %newgep = getelementptr i32, ptr %newptr, i64 %idx2
2902	auto *NewPtr = Builder.CreateGEP(Ty: GEP.getResultElementType(),
2903	Ptr: GEP.getPointerOperand(), IdxList: Idx1);
2904	return GetElementPtrInst::Create(PointeeType: GEP.getResultElementType(), Ptr: NewPtr,
2905	IdxList: Idx2);
2906	}
2907	ConstantInt *C;
2908	if (match(V: GEP.getOperand(i_nocapture: `1`), P: m_OneUse(SubPattern: m_SExtLike(Op: m_OneUse(SubPattern: m_NSWAdd(
2909	L: m_Value(V&: Idx1), R: m_ConstantInt(CI&: C))))))) {
2910	// %add = add nsw i32 %idx1, idx2
2911	// %sidx = sext i32 %add to i64
2912	// %gep = getelementptr i32, ptr %ptr, i64 %sidx
2913	// as:
2914	// %newptr = getelementptr i32, ptr %ptr, i32 %idx1
2915	// %newgep = getelementptr i32, ptr %newptr, i32 idx2
2916	auto *NewPtr = Builder.CreateGEP(
2917	Ty: GEP.getResultElementType(), Ptr: GEP.getPointerOperand(),
2918	IdxList: Builder.CreateSExt(V: Idx1, DestTy: GEP.getOperand(i_nocapture: `1`)->getType()));
2919	return GetElementPtrInst::Create(
2920	PointeeType: GEP.getResultElementType(), Ptr: NewPtr,
2921	IdxList: Builder.CreateSExt(V: C, DestTy: GEP.getOperand(i_nocapture: `1`)->getType()));
2922	}
2923	}
2924
2925	if (!GEP.isInBounds()) {
2926	unsigned IdxWidth =
2927	DL.getIndexSizeInBits(AS: PtrOp->getType()->getPointerAddressSpace());
2928	APInt BasePtrOffset(IdxWidth, `0`);
2929	Value *UnderlyingPtrOp =
2930	PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL,
2931	Offset&: BasePtrOffset);
2932	bool CanBeNull, CanBeFreed;
2933	uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes(
2934	DL, CanBeNull, CanBeFreed);
2935	if (!CanBeNull && !CanBeFreed && DerefBytes != `0`) {
2936	if (GEP.accumulateConstantOffset(DL, Offset&: BasePtrOffset) &&
2937	BasePtrOffset.isNonNegative()) {
2938	APInt AllocSize(IdxWidth, DerefBytes);
2939	if (BasePtrOffset.ule(RHS: AllocSize)) {
2940	return GetElementPtrInst::CreateInBounds(
2941	PointeeType: GEP.getSourceElementType(), Ptr: PtrOp, IdxList: Indices, NameStr: GEP.getName());
2942	}
2943	}
2944	}
2945	}
2946
2947	if (Instruction *R = foldSelectGEP(GEP, Builder))
2948	return R;
2949
2950	return nullptr;
2951	}
2952
2953	static bool isNeverEqualToUnescapedAlloc(Value V, const* TargetLibraryInfo &TLI,
2954	Instruction *AI) {
2955	if (isa<ConstantPointerNull>(Val: V))
2956	return true;
2957	if (auto *LI = dyn_cast<LoadInst>(Val: V))
2958	return isa<GlobalVariable>(Val: LI->getPointerOperand());
2959	// Two distinct allocations will never be equal.
2960	return isAllocLikeFn(V, TLI: &TLI) && V != AI;
2961	}
2962
2963	/// Given a call CB which uses an address UsedV, return true if we can prove the
2964	/// call's only possible effect is storing to V.
2965	static bool isRemovableWrite(CallBase &CB, Value *UsedV,
2966	const TargetLibraryInfo &TLI) {
2967	if (!CB.use_empty())
2968	// TODO: add recursion if returned attribute is present
2969	return false;
2970
2971	if (CB.isTerminator())
2972	// TODO: remove implementation restriction
2973	return false;
2974
2975	if (!CB.willReturn() \|\| !CB.doesNotThrow())
2976	return false;
2977
2978	// If the only possible side effect of the call is writing to the alloca,
2979	// and the result isn't used, we can safely remove any reads implied by the
2980	// call including those which might read the alloca itself.
2981	std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CI: &CB, TLI);
2982	return Dest && Dest ->Ptr == UsedV;
2983	}
2984
2985	static bool isAllocSiteRemovable(Instruction *AI,
2986	SmallVectorImpl<WeakTrackingVH> &Users,
2987	const TargetLibraryInfo &TLI) {
2988	SmallVector<Instruction*, `4`> Worklist;
2989	const std::optional<StringRef> Family = getAllocationFamily(I: AI, TLI: &TLI);
2990	Worklist.push_back(Elt: AI);
2991
2992	do {
2993	Instruction *PI = Worklist.pop_back_val();
2994	for (User *U : PI->users()) {
2995	Instruction *I = cast<Instruction>(Val: U);
2996	switch (I->getOpcode()) {
2997	default:
2998	// Give up the moment we see something we can't handle.
2999	return false;
3000
3001	case Instruction::AddrSpaceCast:
3002	case Instruction::BitCast:
3003	case Instruction::GetElementPtr:
3004	Users.emplace_back(Args&: I);
3005	Worklist.push_back(Elt: I);
3006	continue;
3007
3008	case Instruction::ICmp: {
3009	ICmpInst *ICI = cast<ICmpInst>(Val: I);
3010	// We can fold eq/ne comparisons with null to false/true, respectively.
3011	// We also fold comparisons in some conditions provided the alloc has
3012	// not escaped (see isNeverEqualToUnescapedAlloc).
3013	if (!ICI->isEquality())
3014	return false;
3015	unsigned OtherIndex = (ICI->getOperand(i_nocapture: `0`) == PI) ? `1` : `0`;
3016	if (!isNeverEqualToUnescapedAlloc(V: ICI->getOperand(i_nocapture: OtherIndex), TLI, AI))
3017	return false;
3018
3019	// Do not fold compares to aligned_alloc calls, as they may have to
3020	// return null in case the required alignment cannot be satisfied,
3021	// unless we can prove that both alignment and size are valid.
3022	auto AlignmentAndSizeKnownValid = [](CallBase *CB) {
3023	// Check if alignment and size of a call to aligned_alloc is valid,
3024	// that is alignment is a power-of-2 and the size is a multiple of the
3025	// alignment.
3026	const APInt *Alignment;
3027	const APInt *Size;
3028	return match(V: CB->getArgOperand(i: `0`), P: m_APInt(Res&: Alignment)) &&
3029	match(V: CB->getArgOperand(i: `1`), P: m_APInt(Res&: Size)) &&
3030	Alignment->isPowerOf2() && Size->urem(RHS: *Alignment).isZero();
3031	};
3032	auto *CB = dyn_cast<CallBase>(Val: AI);
3033	LibFunc TheLibFunc;
3034	if (CB && TLI.getLibFunc(FDecl: *CB->getCalledFunction(), F&: TheLibFunc) &&
3035	TLI.has(F: TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc &&
3036	!AlignmentAndSizeKnownValid (CB))
3037	return false;
3038	Users.emplace_back(Args&: I);
3039	continue;
3040	}
3041
3042	case Instruction::Call:
3043	// Ignore no-op and store intrinsics.
3044	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
3045	switch (II->getIntrinsicID()) {
3046	default:
3047	return false;
3048
3049	case Intrinsic::memmove:
3050	case Intrinsic::memcpy:
3051	case Intrinsic::memset: {
3052	MemIntrinsic *MI = cast<MemIntrinsic>(Val: II);
3053	if (MI->isVolatile() \|\| MI->getRawDest() != PI)
3054	return false;
3055	[[fallthrough]];
3056	}
3057	case Intrinsic::assume:
3058	case Intrinsic::invariant_start:
3059	case Intrinsic::invariant_end:
3060	case Intrinsic::lifetime_start:
3061	case Intrinsic::lifetime_end:
3062	case Intrinsic::objectsize:
3063	Users.emplace_back(Args&: I);
3064	continue;
3065	case Intrinsic::launder_invariant_group:
3066	case Intrinsic::strip_invariant_group:
3067	Users.emplace_back(Args&: I);
3068	Worklist.push_back(Elt: I);
3069	continue;
3070	}
3071	}
3072
3073	if (isRemovableWrite(CB&: *cast<CallBase>(Val: I), UsedV: PI, TLI)) {
3074	Users.emplace_back(Args&: I);
3075	continue;
3076	}
3077
3078	if (getFreedOperand(CB: cast<CallBase>(Val: I), TLI: &TLI) == PI &&
3079	getAllocationFamily(I, TLI: &TLI) == Family) {
3080	assert(Family);
3081	Users.emplace_back(Args&: I);
3082	continue;
3083	}
3084
3085	if (getReallocatedOperand(CB: cast<CallBase>(Val: I)) == PI &&
3086	getAllocationFamily(I, TLI: &TLI) == Family) {
3087	assert(Family);
3088	Users.emplace_back(Args&: I);
3089	Worklist.push_back(Elt: I);
3090	continue;
3091	}
3092
3093	return false;
3094
3095	case Instruction::Store: {
3096	StoreInst *SI = cast<StoreInst>(Val: I);
3097	if (SI->isVolatile() \|\| SI->getPointerOperand() != PI)
3098	return false;
3099	Users.emplace_back(Args&: I);
3100	continue;
3101	}
3102	}
3103	llvm_unreachable("missing a return?");
3104	}
3105	} while (!Worklist.empty());
3106	return true;
3107	}
3108
3109	Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
3110	assert(isa<AllocaInst>(MI) \|\| isRemovableAlloc(&cast<CallBase>(MI), &TLI));
3111
3112	// If we have a malloc call which is only used in any amount of comparisons to
3113	// null and free calls, delete the calls and replace the comparisons with true
3114	// or false as appropriate.
3115
3116	// This is based on the principle that we can substitute our own allocation
3117	// function (which will never return null) rather than knowledge of the
3118	// specific function being called. In some sense this can change the permitted
3119	// outputs of a program (when we convert a malloc to an alloca, the fact that
3120	// the allocation is now on the stack is potentially visible, for example),
3121	// but we believe in a permissible manner.
3122	SmallVector<WeakTrackingVH, `64`> Users;
3123
3124	// If we are removing an alloca with a dbg.declare, insert dbg.value calls
3125	// before each store.
3126	SmallVector<DbgVariableIntrinsic *, `8`> DVIs;
3127	SmallVector<DbgVariableRecord *, `8`> DVRs;
3128	std::unique_ptr<DIBuilder> DIB;
3129	if (isa<AllocaInst>(Val: MI)) {
3130	findDbgUsers(DbgInsts&: DVIs, V: &MI, DbgVariableRecords: &DVRs);
3131	DIB.reset(p: new DIBuilder (MI.getModule(), /AllowUnresolved=/*false));
3132	}
3133
3134	if (isAllocSiteRemovable(AI: &MI, Users, TLI)) {
3135	for (unsigned i = `0`, e = Users.size(); i != e; ++i) {
3136	// Lowering all @llvm.objectsize calls first because they may
3137	// use a bitcast/GEP of the alloca we are removing.
3138	if (!Users [i])
3139	continue;
3140
3141	Instruction I = cast<Instruction>(Val: &Users [i]);
3142
3143	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
3144	if (II->getIntrinsicID() == Intrinsic::objectsize) {
3145	SmallVector<Instruction *> InsertedInstructions;
3146	Value *Result = lowerObjectSizeCall(
3147	ObjectSize: II, DL, TLI: &TLI, AA, /MustSucceed=/true, InsertedInstructions: &InsertedInstructions);
3148	for (Instruction *Inserted : InsertedInstructions)
3149	Worklist.add(I: Inserted);
3150	replaceInstUsesWith(I&: *I, V: Result);
3151	eraseInstFromFunction(I&: *I);
3152	Users [i] = nullptr; // Skip examining in the next loop.
3153	}
3154	}
3155	}
3156	for (unsigned i = `0`, e = Users.size(); i != e; ++i) {
3157	if (!Users [i])
3158	continue;
3159
3160	Instruction I = cast<Instruction>(Val: &Users [i]);
3161
3162	if (ICmpInst *C = dyn_cast<ICmpInst>(Val: I)) {
3163	replaceInstUsesWith(I&: *C,
3164	V: ConstantInt::get(Ty: Type::getInt1Ty(C&: C->getContext()),
3165	V: C->isFalseWhenEqual()));
3166	} else if (auto *SI = dyn_cast<StoreInst>(Val: I)) {
3167	for (auto *DVI : DVIs)
3168	if (DVI->isAddressOfVariable())
3169	ConvertDebugDeclareToDebugValue(DII: DVI, SI, Builder&: *DIB);
3170	for (auto *DVR : DVRs)
3171	if (DVR->isAddressOfVariable())
3172	ConvertDebugDeclareToDebugValue(DVR, SI, Builder&: *DIB);
3173	} else {
3174	// Casts, GEP, or anything else: we're about to delete this instruction,
3175	// so it can not have any valid uses.
3176	replaceInstUsesWith(I&: *I, V: PoisonValue::get(T: I->getType()));
3177	}
3178	eraseInstFromFunction(I&: *I);
3179	}
3180
3181	if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &MI)) {
3182	// Replace invoke with a NOP intrinsic to maintain the original CFG
3183	Module *M = II->getModule();
3184	Function *F = Intrinsic::getDeclaration(M, Intrinsic::id: donothing);
3185	InvokeInst::Create(Func: F, IfNormal: II->getNormalDest(), IfException: II->getUnwindDest(),
3186	Args: std::nullopt, NameStr: "", InsertAtEnd: II->getParent());
3187	}
3188
3189	// Remove debug intrinsics which describe the value contained within the
3190	// alloca. In addition to removing dbg.{declare,addr} which simply point to
3191	// the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
3192	//
3193	// ```
3194	// define void @foo(i32 %0) {
3195	// %a = alloca i32 ; Deleted.
3196	// store i32 %0, i32 %a*
3197	// dbg.value(i32 %0, "arg0") ; Not deleted.
3198	// dbg.value(i32 %a, "arg0", DW_OP_deref) ; Deleted.*
3199	// call void @trivially_inlinable_no_op(i32 %a)*
3200	// ret void
3201	// }
3202	// ```
3203	//
3204	// This may not be required if we stop describing the contents of allocas
3205	// using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
3206	// the LowerDbgDeclare utility.
3207	//
3208	// If there is a dead store to `%a` in @trivially_inlinable_no_op, the
3209	// "arg0" dbg.value may be stale after the call. However, failing to remove
3210	// the DW_OP_deref dbg.value causes large gaps in location coverage.
3211	//
3212	// FIXME: the Assignment Tracking project has now likely made this
3213	// redundant (and it's sometimes harmful).
3214	for (auto *DVI : DVIs)
3215	if (DVI->isAddressOfVariable() \|\| DVI->getExpression()->startsWithDeref())
3216	DVI->eraseFromParent();
3217	for (auto *DVR : DVRs)
3218	if (DVR->isAddressOfVariable() \|\| DVR->getExpression()->startsWithDeref())
3219	DVR->eraseFromParent();
3220
3221	return eraseInstFromFunction(I&: MI);
3222	}
3223	return nullptr;
3224	}
3225
3226	/// Move the call to free before a NULL test.
3227	///
3228	/// Check if this free is accessed after its argument has been test
3229	/// against NULL (property 0).
3230	/// If yes, it is legal to move this call in its predecessor block.
3231	///
3232	/// The move is performed only if the block containing the call to free
3233	/// will be removed, i.e.:
3234	/// 1. it has only one predecessor P, and P has two successors
3235	/// 2. it contains the call, noops, and an unconditional branch
3236	/// 3. its successor is the same as its predecessor's successor
3237	///
3238	/// The profitability is out-of concern here and this function should
3239	/// be called only if the caller knows this transformation would be
3240	/// profitable (e.g., for code size).
3241	static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI,
3242	const DataLayout &DL) {
3243	Value *Op = FI.getArgOperand(i: `0`);
3244	BasicBlock *FreeInstrBB = FI.getParent();
3245	BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
3246
3247	// Validate part of constraint #1: Only one predecessor
3248	// FIXME: We can extend the number of predecessor, but in that case, we
3249	// would duplicate the call to free in each predecessor and it may
3250	// not be profitable even for code size.
3251	if (!PredBB)
3252	return nullptr;
3253
3254	// Validate constraint #2: Does this block contains only the call to
3255	// free, noops, and an unconditional branch?
3256	BasicBlock *SuccBB;
3257	Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
3258	if (!match(V: FreeInstrBBTerminator, P: m_UnconditionalBr(Succ&: SuccBB)))
3259	return nullptr;
3260
3261	// If there are only 2 instructions in the block, at this point,
3262	// this is the call to free and unconditional.
3263	// If there are more than 2 instructions, check that they are noops
3264	// i.e., they won't hurt the performance of the generated code.
3265	if (FreeInstrBB->size() != `2`) {
3266	for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) {
3267	if (&Inst == &FI \|\| &Inst == FreeInstrBBTerminator)
3268	continue;
3269	auto *Cast = dyn_cast<CastInst>(Val: &Inst);
3270	if (!Cast \|\| !Cast->isNoopCast(DL))
3271	return nullptr;
3272	}
3273	}
3274	// Validate the rest of constraint #1 by matching on the pred branch.
3275	Instruction *TI = PredBB->getTerminator();
3276	BasicBlock TrueBB, FalseBB;
3277	ICmpInst::Predicate Pred;
3278	if (!match(V: TI, P: m_Br(C: m_ICmp(Pred,
3279	L: m_CombineOr(L: m_Specific(V: Op),
3280	R: m_Specific(V: Op->stripPointerCasts())),
3281	R: m_Zero()),
3282	T&: TrueBB, F&: FalseBB)))
3283	return nullptr;
3284	if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
3285	return nullptr;
3286
3287	// Validate constraint #3: Ensure the null case just falls through.
3288	if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
3289	return nullptr;
3290	assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
3291	"Broken CFG: missing edge from predecessor to successor");
3292
3293	// At this point, we know that everything in FreeInstrBB can be moved
3294	// before TI.
3295	for (Instruction &Instr : llvm::make_early_inc_range(Range&: *FreeInstrBB)) {
3296	if (&Instr == FreeInstrBBTerminator)
3297	break;
3298	Instr.moveBeforePreserving(MovePos: TI);
3299	}
3300	assert(FreeInstrBB->size() == `1` &&
3301	"Only the branch instruction should remain");
3302
3303	// Now that we've moved the call to free before the NULL check, we have to
3304	// remove any attributes on its parameter that imply it's non-null, because
3305	// those attributes might have only been valid because of the NULL check, and
3306	// we can get miscompiles if we keep them. This is conservative if non-null is
3307	// also implied by something other than the NULL check, but it's guaranteed to
3308	// be correct, and the conservativeness won't matter in practice, since the
3309	// attributes are irrelevant for the call to free itself and the pointer
3310	// shouldn't be used after the call.
3311	AttributeList Attrs = FI.getAttributes();
3312	Attrs = Attrs.removeParamAttribute(FI.getContext(), `0`, Attribute::NonNull);
3313	Attribute Dereferenceable = Attrs.getParamAttr(`0`, Attribute::Dereferenceable);
3314	if (Dereferenceable.isValid()) {
3315	uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
3316	Attrs = Attrs.removeParamAttribute(FI.getContext(), `0`,
3317	Attribute::Dereferenceable);
3318	Attrs = Attrs.addDereferenceableOrNullParamAttr(C&: FI.getContext(), ArgNo: `0`, Bytes);
3319	}
3320	FI.setAttributes(Attrs);
3321
3322	return &FI;
3323	}
3324
3325	Instruction InstCombinerImpl::visitFree(CallInst &FI, Value Op) {
3326	// free undef -> unreachable.
3327	if (isa<UndefValue>(Val: Op)) {
3328	// Leave a marker since we can't modify the CFG here.
3329	CreateNonTerminatorUnreachable(InsertAt: &FI);
3330	return eraseInstFromFunction(I&: FI);
3331	}
3332
3333	// If we have 'free null' delete the instruction. This can happen in stl code
3334	// when lots of inlining happens.
3335	if (isa<ConstantPointerNull>(Val: Op))
3336	return eraseInstFromFunction(I&: FI);
3337
3338	// If we had free(realloc(...)) with no intervening uses, then eliminate the
3339	// realloc() entirely.
3340	CallInst *CI = dyn_cast<CallInst>(Val: Op);
3341	if (CI && CI->hasOneUse())
3342	if (Value *ReallocatedOp = getReallocatedOperand(CB: CI))
3343	return eraseInstFromFunction(I&: replaceInstUsesWith(I&: CI, V: ReallocatedOp));
3344
3345	// If we optimize for code size, try to move the call to free before the null
3346	// test so that simplify cfg can remove the empty block and dead code
3347	// elimination the branch. I.e., helps to turn something like:
3348	// if (foo) free(foo);
3349	// into
3350	// free(foo);
3351	//
3352	// Note that we can only do this for 'free' and not for any flavor of
3353	// 'operator delete'; there is no 'operator delete' symbol for which we are
3354	// permitted to invent a call, even if we're passing in a null pointer.
3355	if (MinimizeSize) {
3356	LibFunc Func;
3357	if (TLI.getLibFunc(CB: FI, F&: Func) && TLI.has(F: Func) && Func == LibFunc_free)
3358	if (Instruction *I = tryToMoveFreeBeforeNullTest(FI, DL))
3359	return I;
3360	}
3361
3362	return nullptr;
3363	}
3364
3365	Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) {
3366	Value *RetVal = RI.getReturnValue();
3367	if (!RetVal \|\| !AttributeFuncs::isNoFPClassCompatibleType(Ty: RetVal->getType()))
3368	return nullptr;
3369
3370	Function *F = RI.getFunction();
3371	FPClassTest ReturnClass = F->getAttributes().getRetNoFPClass();
3372	if (ReturnClass == fcNone)
3373	return nullptr;
3374
3375	KnownFPClass KnownClass;
3376	Value *Simplified =
3377	SimplifyDemandedUseFPClass(V: RetVal, DemandedMask: ~ReturnClass, Known&: KnownClass, Depth: `0`, CxtI: &RI);
3378	if (!Simplified)
3379	return nullptr;
3380
3381	return ReturnInst::Create(C&: RI.getContext(), retVal: Simplified);
3382	}
3383
3384	// WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
3385	bool InstCombinerImpl::removeInstructionsBeforeUnreachable(Instruction &I) {
3386	// Try to remove the previous instruction if it must lead to unreachable.
3387	// This includes instructions like stores and "llvm.assume" that may not get
3388	// removed by simple dead code elimination.
3389	bool Changed = false;
3390	while (Instruction *Prev = I.getPrevNonDebugInstruction()) {
3391	// While we theoretically can erase EH, that would result in a block that
3392	// used to start with an EH no longer starting with EH, which is invalid.
3393	// To make it valid, we'd need to fixup predecessors to no longer refer to
3394	// this block, but that changes CFG, which is not allowed in InstCombine.
3395	if (Prev->isEHPad())
3396	break; // Can not drop any more instructions. We're done here.
3397
3398	if (!isGuaranteedToTransferExecutionToSuccessor(I: Prev))
3399	break; // Can not drop any more instructions. We're done here.
3400	// Otherwise, this instruction can be freely erased,
3401	// even if it is not side-effect free.
3402
3403	// A value may still have uses before we process it here (for example, in
3404	// another unreachable block), so convert those to poison.
3405	replaceInstUsesWith(I&: *Prev, V: PoisonValue::get(T: Prev->getType()));
3406	eraseInstFromFunction(I&: *Prev);
3407	Changed = true;
3408	}
3409	return Changed;
3410	}
3411
3412	Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) {
3413	removeInstructionsBeforeUnreachable(I);
3414	return nullptr;
3415	}
3416
3417	Instruction *InstCombinerImpl::visitUnconditionalBranchInst(BranchInst &BI) {
3418	assert(BI.isUnconditional() && "Only for unconditional branches.");
3419
3420	// If this store is the second-to-last instruction in the basic block
3421	// (excluding debug info and bitcasts of pointers) and if the block ends with
3422	// an unconditional branch, try to move the store to the successor block.
3423
3424	auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
3425	auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) {
3426	return BBI ->isDebugOrPseudoInst() \|\|
3427	(isa<BitCastInst>(Val: BBI) && BBI ->getType()->isPointerTy());
3428	};
3429
3430	BasicBlock::iterator FirstInstr = BBI ->getParent()->begin();
3431	do {
3432	if (BBI != FirstInstr)
3433	--BBI;
3434	} while (BBI != FirstInstr && IsNoopInstrForStoreMerging(BBI));
3435
3436	return dyn_cast<StoreInst>(Val&: BBI);
3437	};
3438
3439	if (StoreInst *SI = GetLastSinkableStore (BasicBlock::iterator (BI)))
3440	if (mergeStoreIntoSuccessor(SI&: *SI))
3441	return &BI;
3442
3443	return nullptr;
3444	}
3445
3446	void InstCombinerImpl::addDeadEdge(BasicBlock From, BasicBlock To,
3447	SmallVectorImpl<BasicBlock *> &Worklist) {
3448	if (!DeadEdges.insert(V: {From, To}).second)
3449	return;
3450
3451	// Replace phi node operands in successor with poison.
3452	for (PHINode &PN : To->phis())
3453	for (Use &U : PN.incoming_values())
3454	if (PN.getIncomingBlock(U) == From && !isa<PoisonValue>(Val: U)) {
3455	replaceUse(U, NewValue: PoisonValue::get(T: PN.getType()));
3456	addToWorklist(I: &PN);
3457	MadeIRChange = true;
3458	}
3459
3460	Worklist.push_back(Elt: To);
3461	}
3462
3463	// Under the assumption that I is unreachable, remove it and following
3464	// instructions. Changes are reported directly to MadeIRChange.
3465	void InstCombinerImpl::handleUnreachableFrom(
3466	Instruction I, SmallVectorImpl<BasicBlock > &Worklist) {
3467	BasicBlock *BB = I->getParent();
3468	for (Instruction &Inst : make_early_inc_range(
3469	Range: make_range(x: std::next(x: BB->getTerminator()->getReverseIterator()),
3470	y: std::next(x: I->getReverseIterator())))) {
3471	if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) {
3472	replaceInstUsesWith(I&: Inst, V: PoisonValue::get(T: Inst.getType()));
3473	MadeIRChange = true;
3474	}
3475	if (Inst.isEHPad() \|\| Inst.getType()->isTokenTy())
3476	continue;
3477	// RemoveDIs: erase debug-info on this instruction manually.
3478	Inst.dropDbgRecords();
3479	eraseInstFromFunction(I&: Inst);
3480	MadeIRChange = true;
3481	}
3482
3483	SmallVector<Value *> Changed;
3484	if (handleUnreachableTerminator(I: BB->getTerminator(), PoisonedValues&: Changed)) {
3485	MadeIRChange = true;
3486	for (Value *V : Changed)
3487	addToWorklist(I: cast<Instruction>(Val: V));
3488	}
3489
3490	// Handle potentially dead successors.
3491	for (BasicBlock *Succ : successors(BB))
3492	addDeadEdge(From: BB, To: Succ, Worklist);
3493	}
3494
3495	void InstCombinerImpl::handlePotentiallyDeadBlocks(
3496	SmallVectorImpl<BasicBlock *> &Worklist) {
3497	while (!Worklist.empty()) {
3498	BasicBlock *BB = Worklist.pop_back_val();
3499	if (!all_of(Range: predecessors(BB), P: [&](BasicBlock *Pred) {
3500	return DeadEdges.contains(V: {Pred, BB}) \|\| DT.dominates(A: BB, B: Pred);
3501	}))
3502	continue;
3503
3504	handleUnreachableFrom(I: &BB->front(), Worklist);
3505	}
3506	}
3507
3508	void InstCombinerImpl::handlePotentiallyDeadSuccessors(BasicBlock *BB,
3509	BasicBlock *LiveSucc) {
3510	SmallVector<BasicBlock *> Worklist;
3511	for (BasicBlock *Succ : successors(BB)) {
3512	// The live successor isn't dead.
3513	if (Succ == LiveSucc)
3514	continue;
3515
3516	addDeadEdge(From: BB, To: Succ, Worklist);
3517	}
3518
3519	handlePotentiallyDeadBlocks(Worklist);
3520	}
3521
3522	Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) {
3523	if (BI.isUnconditional())
3524	return visitUnconditionalBranchInst(BI);
3525
3526	// Change br (not X), label True, label False to: br X, label False, True
3527	Value *Cond = BI.getCondition();
3528	Value *X;
3529	if (match(V: Cond, P: m_Not(V: m_Value(V&: X))) && !isa<Constant>(Val: X)) {
3530	// Swap Destinations and condition...
3531	BI.swapSuccessors();
3532	if (BPI)
3533	BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
3534	return replaceOperand(I&: BI, OpNum: `0`, V: X);
3535	}
3536
3537	// Canonicalize logical-and-with-invert as logical-or-with-invert.
3538	// This is done by inverting the condition and swapping successors:
3539	// br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X \|\| Y), F, T
3540	Value *Y;
3541	if (isa<SelectInst>(Val: Cond) &&
3542	match(V: Cond,
3543	P: m_OneUse(SubPattern: m_LogicalAnd(L: m_Value(V&: X), R: m_OneUse(SubPattern: m_Not(V: m_Value(V&: Y))))))) {
3544	Value *NotX = Builder.CreateNot(V: X, Name: "not." + X->getName());
3545	Value *Or = Builder.CreateLogicalOr(Cond1: NotX, Cond2: Y);
3546	BI.swapSuccessors();
3547	if (BPI)
3548	BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
3549	return replaceOperand(I&: BI, OpNum: `0`, V: Or);
3550	}
3551
3552	// If the condition is irrelevant, remove the use so that other
3553	// transforms on the condition become more effective.
3554	if (!isa<ConstantInt>(Val: Cond) && BI.getSuccessor(i: `0`) == BI.getSuccessor(i: `1`))
3555	return replaceOperand(I&: BI, OpNum: `0`, V: ConstantInt::getFalse(Ty: Cond->getType()));
3556
3557	// Canonicalize, for example, fcmp_one -> fcmp_oeq.
3558	CmpInst::Predicate Pred;
3559	if (match(V: Cond, P: m_OneUse(SubPattern: m_FCmp(Pred, L: m_Value(), R: m_Value()))) &&
3560	!isCanonicalPredicate(Pred)) {
3561	// Swap destinations and condition.
3562	auto *Cmp = cast<CmpInst>(Val: Cond);
3563	Cmp->setPredicate(CmpInst::getInversePredicate(pred: Pred));
3564	BI.swapSuccessors();
3565	if (BPI)
3566	BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
3567	Worklist.push(I: Cmp);
3568	return &BI;
3569	}
3570
3571	if (isa<UndefValue>(Val: Cond)) {
3572	handlePotentiallyDeadSuccessors(BB: BI.getParent(), /LiveSucc/ nullptr);
3573	return nullptr;
3574	}
3575	if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) {
3576	handlePotentiallyDeadSuccessors(BB: BI.getParent(),
3577	LiveSucc: BI.getSuccessor(i: !CI->getZExtValue()));
3578	return nullptr;
3579	}
3580
3581	DC.registerBranch(BI: &BI);
3582	return nullptr;
3583	}
3584
3585	// Replaces (switch (select cond, X, C)/(select cond, C, X)) with (switch X) if
3586	// we can prove that both (switch C) and (switch X) go to the default when cond
3587	// is false/true.
3588	static Value *simplifySwitchOnSelectUsingRanges(SwitchInst &SI,
3589	SelectInst *Select,
3590	bool IsTrueArm) {
3591	unsigned CstOpIdx = IsTrueArm ? `1` : `2`;
3592	auto *C = dyn_cast<ConstantInt>(Val: Select->getOperand(i_nocapture: CstOpIdx));
3593	if (!C)
3594	return nullptr;
3595
3596	BasicBlock *CstBB = SI.findCaseValue(C)->getCaseSuccessor();
3597	if (CstBB != SI.getDefaultDest())
3598	return nullptr;
3599	Value *X = Select->getOperand(i_nocapture: `3` - CstOpIdx);
3600	ICmpInst::Predicate Pred;
3601	const APInt *RHSC;
3602	if (!match(V: Select->getCondition(),
3603	P: m_ICmp(Pred, L: m_Specific(V: X), R: m_APInt(Res&: RHSC))))
3604	return nullptr;
3605	if (IsTrueArm)
3606	Pred = ICmpInst::getInversePredicate(pred: Pred);
3607
3608	// See whether we can replace the select with X
3609	ConstantRange CR = ConstantRange::makeExactICmpRegion(Pred, Other: *RHSC);
3610	for (auto Case : SI.cases())
3611	if (!CR.contains(Val: Case.getCaseValue()->getValue()))
3612	return nullptr;
3613
3614	return X;
3615	}
3616
3617	Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) {
3618	Value *Cond = SI.getCondition();
3619	Value *Op0;
3620	ConstantInt *AddRHS;
3621	if (match(V: Cond, P: m_Add(L: m_Value(V&: Op0), R: m_ConstantInt(CI&: AddRHS)))) {
3622	// Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
3623	for (auto Case : SI.cases()) {
3624	Constant *NewCase = ConstantExpr::getSub(C1: Case.getCaseValue(), C2: AddRHS);
3625	assert(isa<ConstantInt>(NewCase) &&
3626	"Result of expression should be constant");
3627	Case.setValue(cast<ConstantInt>(Val: NewCase));
3628	}
3629	return replaceOperand(I&: SI, OpNum: `0`, V: Op0);
3630	}
3631
3632	ConstantInt *SubLHS;
3633	if (match(V: Cond, P: m_Sub(L: m_ConstantInt(CI&: SubLHS), R: m_Value(V&: Op0)))) {
3634	// Change 'switch (1-X) case 1:' into 'switch (X) case 0'.
3635	for (auto Case : SI.cases()) {
3636	Constant *NewCase = ConstantExpr::getSub(C1: SubLHS, C2: Case.getCaseValue());
3637	assert(isa<ConstantInt>(NewCase) &&
3638	"Result of expression should be constant");
3639	Case.setValue(cast<ConstantInt>(Val: NewCase));
3640	}
3641	return replaceOperand(I&: SI, OpNum: `0`, V: Op0);
3642	}
3643
3644	uint64_t ShiftAmt;
3645	if (match(V: Cond, P: m_Shl(L: m_Value(V&: Op0), R: m_ConstantInt(V&: ShiftAmt))) &&
3646	ShiftAmt < Op0->getType()->getScalarSizeInBits() &&
3647	all_of(Range: SI.cases(), P: [&](const auto &Case) {
3648	return Case.getCaseValue()->getValue().countr_zero() >= ShiftAmt;
3649	})) {
3650	// Change 'switch (X << 2) case 4:' into 'switch (X) case 1:'.
3651	OverflowingBinaryOperator *Shl = cast<OverflowingBinaryOperator>(Val: Cond);
3652	if (Shl->hasNoUnsignedWrap() \|\| Shl->hasNoSignedWrap() \|\|
3653	Shl->hasOneUse()) {
3654	Value *NewCond = Op0;
3655	if (!Shl->hasNoUnsignedWrap() && !Shl->hasNoSignedWrap()) {
3656	// If the shift may wrap, we need to mask off the shifted bits.
3657	unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
3658	NewCond = Builder.CreateAnd(
3659	LHS: Op0, RHS: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - ShiftAmt));
3660	}
3661	for (auto Case : SI.cases()) {
3662	const APInt &CaseVal = Case.getCaseValue()->getValue();
3663	APInt ShiftedCase = Shl->hasNoSignedWrap() ? CaseVal.ashr(ShiftAmt)
3664	: CaseVal.lshr(shiftAmt: ShiftAmt);
3665	Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: ShiftedCase));
3666	}
3667	return replaceOperand(I&: SI, OpNum: `0`, V: NewCond);
3668	}
3669	}
3670
3671	// Fold switch(zext/sext(X)) into switch(X) if possible.
3672	if (match(V: Cond, P: m_ZExtOrSExt(Op: m_Value(V&: Op0)))) {
3673	bool IsZExt = isa<ZExtInst>(Val: Cond);
3674	Type *SrcTy = Op0->getType();
3675	unsigned NewWidth = SrcTy->getScalarSizeInBits();
3676
3677	if (all_of(Range: SI.cases(), P: [&](const auto &Case) {
3678	const APInt &CaseVal = Case.getCaseValue()->getValue();
3679	return IsZExt ? CaseVal.isIntN(N: NewWidth)
3680	: CaseVal.isSignedIntN(N: NewWidth);
3681	})) {
3682	for (auto &Case : SI.cases()) {
3683	APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(width: NewWidth);
3684	Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: TruncatedCase));
3685	}
3686	return replaceOperand(I&: SI, OpNum: `0`, V: Op0);
3687	}
3688	}
3689
3690	// Fold switch(select cond, X, Y) into switch(X/Y) if possible
3691	if (auto *Select = dyn_cast<SelectInst>(Val: Cond)) {
3692	if (Value *V =
3693	simplifySwitchOnSelectUsingRanges(SI, Select, /IsTrueArm=/true))
3694	return replaceOperand(I&: SI, OpNum: `0`, V);
3695	if (Value *V =
3696	simplifySwitchOnSelectUsingRanges(SI, Select, /IsTrueArm=/false))
3697	return replaceOperand(I&: SI, OpNum: `0`, V);
3698	}
3699
3700	KnownBits Known = computeKnownBits(V: Cond, Depth: `0`, CxtI: &SI);
3701	unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
3702	unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
3703
3704	// Compute the number of leading bits we can ignore.
3705	// TODO: A better way to determine this would use ComputeNumSignBits().
3706	for (const auto &C : SI.cases()) {
3707	LeadingKnownZeros =
3708	std::min(a: LeadingKnownZeros, b: C.getCaseValue()->getValue().countl_zero());
3709	LeadingKnownOnes =
3710	std::min(a: LeadingKnownOnes, b: C.getCaseValue()->getValue().countl_one());
3711	}
3712
3713	unsigned NewWidth = Known.getBitWidth() - std::max(a: LeadingKnownZeros, b: LeadingKnownOnes);
3714
3715	// Shrink the condition operand if the new type is smaller than the old type.
3716	// But do not shrink to a non-standard type, because backend can't generate
3717	// good code for that yet.
3718	// TODO: We can make it aggressive again after fixing PR39569.
3719	if (NewWidth > `0` && NewWidth < Known.getBitWidth() &&
3720	shouldChangeType(FromWidth: Known.getBitWidth(), ToWidth: NewWidth)) {
3721	IntegerType *Ty = IntegerType::get(C&: SI.getContext(), NumBits: NewWidth);
3722	Builder.SetInsertPoint(&SI);
3723	Value *NewCond = Builder.CreateTrunc(V: Cond, DestTy: Ty, Name: "trunc");
3724
3725	for (auto Case : SI.cases()) {
3726	APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(width: NewWidth);
3727	Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: TruncatedCase));
3728	}
3729	return replaceOperand(I&: SI, OpNum: `0`, V: NewCond);
3730	}
3731
3732	if (isa<UndefValue>(Val: Cond)) {
3733	handlePotentiallyDeadSuccessors(BB: SI.getParent(), /LiveSucc/ nullptr);
3734	return nullptr;
3735	}
3736	if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) {
3737	handlePotentiallyDeadSuccessors(BB: SI.getParent(),
3738	LiveSucc: SI.findCaseValue(C: CI)->getCaseSuccessor());
3739	return nullptr;
3740	}
3741
3742	return nullptr;
3743	}
3744
3745	Instruction *
3746	InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
3747	auto *WO = dyn_cast<WithOverflowInst>(Val: EV.getAggregateOperand());
3748	if (!WO)
3749	return nullptr;
3750
3751	Intrinsic::ID OvID = WO->getIntrinsicID();
3752	const APInt C = nullptr*;
3753	if (match(V: WO->getRHS(), P: m_APIntAllowPoison(Res&: C))) {
3754	if (*EV.idx_begin() == `0` && (OvID == Intrinsic::smul_with_overflow \|\|
3755	OvID == Intrinsic::umul_with_overflow)) {
3756	// extractvalue (any_mul_with_overflow X, -1), 0 --> -X
3757	if (C->isAllOnes())
3758	return BinaryOperator::CreateNeg(Op: WO->getLHS());
3759	// extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
3760	if (C->isPowerOf2()) {
3761	return BinaryOperator::CreateShl(
3762	V1: WO->getLHS(),
3763	V2: ConstantInt::get(Ty: WO->getLHS()->getType(), V: C->logBase2()));
3764	}
3765	}
3766	}
3767
3768	// We're extracting from an overflow intrinsic. See if we're the only user.
3769	// That allows us to simplify multiple result intrinsics to simpler things
3770	// that just get one value.
3771	if (!WO->hasOneUse())
3772	return nullptr;
3773
3774	// Check if we're grabbing only the result of a 'with overflow' intrinsic
3775	// and replace it with a traditional binary instruction.
3776	if (*EV.idx_begin() == `0`) {
3777	Instruction::BinaryOps BinOp = WO->getBinaryOp();
3778	Value LHS = WO->getLHS(), RHS = WO->getRHS();
3779	// Replace the old instruction's uses with poison.
3780	replaceInstUsesWith(I&: *WO, V: PoisonValue::get(T: WO->getType()));
3781	eraseInstFromFunction(I&: *WO);
3782	return BinaryOperator::Create(Op: BinOp, S1: LHS, S2: RHS);
3783	}
3784
3785	assert(*EV.idx_begin() == `1` && "Unexpected extract index for overflow inst");
3786
3787	// (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
3788	if (OvID == Intrinsic::usub_with_overflow)
3789	return new ICmpInst (ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
3790
3791	// smul with i1 types overflows when both sides are set: -1 -1 == +1, but*
3792	// +1 is not possible because we assume signed values.
3793	if (OvID == Intrinsic::smul_with_overflow &&
3794	WO->getLHS()->getType()->isIntOrIntVectorTy(BitWidth: `1`))
3795	return BinaryOperator::CreateAnd(V1: WO->getLHS(), V2: WO->getRHS());
3796
3797	// extractvalue (umul_with_overflow X, X), 1 -> X u> 2^(N/2)-1
3798	if (OvID == Intrinsic::umul_with_overflow && WO->getLHS() == WO->getRHS()) {
3799	unsigned BitWidth = WO->getLHS()->getType()->getScalarSizeInBits();
3800	// Only handle even bitwidths for performance reasons.
3801	if (BitWidth % `2` == `0`)
3802	return new ICmpInst (
3803	ICmpInst::ICMP_UGT, WO->getLHS(),
3804	ConstantInt::get(Ty: WO->getLHS()->getType(),
3805	V: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth / `2`)));
3806	}
3807
3808	// If only the overflow result is used, and the right hand side is a
3809	// constant (or constant splat), we can remove the intrinsic by directly
3810	// checking for overflow.
3811	if (C) {
3812	// Compute the no-wrap range for LHS given RHS=C, then construct an
3813	// equivalent icmp, potentially using an offset.
3814	ConstantRange NWR = ConstantRange::makeExactNoWrapRegion(
3815	BinOp: WO->getBinaryOp(), Other: *C, NoWrapKind: WO->getNoWrapKind());
3816
3817	CmpInst::Predicate Pred;
3818	APInt NewRHSC, Offset;
3819	NWR.getEquivalentICmp(Pred, RHS&: NewRHSC, Offset);
3820	auto *OpTy = WO->getRHS()->getType();
3821	auto *NewLHS = WO->getLHS();
3822	if (Offset != `0`)
3823	NewLHS = Builder.CreateAdd(LHS: NewLHS, RHS: ConstantInt::get(Ty: OpTy, V: Offset));
3824	return new ICmpInst (ICmpInst::getInversePredicate(pred: Pred), NewLHS,
3825	ConstantInt::get(Ty: OpTy, V: NewRHSC));
3826	}
3827
3828	return nullptr;
3829	}
3830
3831	Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) {
3832	Value *Agg = EV.getAggregateOperand();
3833
3834	if (!EV.hasIndices())
3835	return replaceInstUsesWith(I&: EV, V: Agg);
3836
3837	if (Value *V = simplifyExtractValueInst(Agg, Idxs: EV.getIndices(),
3838	Q: SQ.getWithInstruction(I: &EV)))
3839	return replaceInstUsesWith(I&: EV, V);
3840
3841	if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Val: Agg)) {
3842	// We're extracting from an insertvalue instruction, compare the indices
3843	const unsigned exti, exte, insi, inse;
3844	for (exti = EV.idx_begin(), insi = IV->idx_begin(),
3845	exte = EV.idx_end(), inse = IV->idx_end();
3846	exti != exte && insi != inse;
3847	++exti, ++insi) {
3848	if (insi != exti)
3849	// The insert and extract both reference distinctly different elements.
3850	// This means the extract is not influenced by the insert, and we can
3851	// replace the aggregate operand of the extract with the aggregate
3852	// operand of the insert. i.e., replace
3853	// %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3854	// %E = extractvalue { i32, { i32 } } %I, 0
3855	// with
3856	// %E = extractvalue { i32, { i32 } } %A, 0
3857	return ExtractValueInst::Create(Agg: IV->getAggregateOperand(),
3858	Idxs: EV.getIndices());
3859	}
3860	if (exti == exte && insi == inse)
3861	// Both iterators are at the end: Index lists are identical. Replace
3862	// %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3863	// %C = extractvalue { i32, { i32 } } %B, 1, 0
3864	// with "i32 42"
3865	return replaceInstUsesWith(I&: EV, V: IV->getInsertedValueOperand());
3866	if (exti == exte) {
3867	// The extract list is a prefix of the insert list. i.e. replace
3868	// %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3869	// %E = extractvalue { i32, { i32 } } %I, 1
3870	// with
3871	// %X = extractvalue { i32, { i32 } } %A, 1
3872	// %E = insertvalue { i32 } %X, i32 42, 0
3873	// by switching the order of the insert and extract (though the
3874	// insertvalue should be left in, since it may have other uses).
3875	Value *NewEV = Builder.CreateExtractValue(Agg: IV->getAggregateOperand(),
3876	Idxs: EV.getIndices());
3877	return InsertValueInst::Create(Agg: NewEV, Val: IV->getInsertedValueOperand(),
3878	Idxs: ArrayRef(insi, inse));
3879	}
3880	if (insi == inse)
3881	// The insert list is a prefix of the extract list
3882	// We can simply remove the common indices from the extract and make it
3883	// operate on the inserted value instead of the insertvalue result.
3884	// i.e., replace
3885	// %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3886	// %E = extractvalue { i32, { i32 } } %I, 1, 0
3887	// with
3888	// %E extractvalue { i32 } { i32 42 }, 0
3889	return ExtractValueInst::Create(Agg: IV->getInsertedValueOperand(),
3890	Idxs: ArrayRef(exti, exte));
3891	}
3892
3893	if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
3894	return R;
3895
3896	if (LoadInst *L = dyn_cast<LoadInst>(Val: Agg)) {
3897	// Bail out if the aggregate contains scalable vector type
3898	if (auto *STy = dyn_cast<StructType>(Val: Agg->getType());
3899	STy && STy->containsScalableVectorType())
3900	return nullptr;
3901
3902	// If the (non-volatile) load only has one use, we can rewrite this to a
3903	// load from a GEP. This reduces the size of the load. If a load is used
3904	// only by extractvalue instructions then this either must have been
3905	// optimized before, or it is a struct with padding, in which case we
3906	// don't want to do the transformation as it loses padding knowledge.
3907	if (L->isSimple() && L->hasOneUse()) {
3908	// extractvalue has integer indices, getelementptr has Values. Convert.*
3909	SmallVector<Value*, `4`> Indices;
3910	// Prefix an i32 0 since we need the first element.
3911	Indices.push_back(Elt: Builder.getInt32(C: `0`));
3912	for (unsigned Idx : EV.indices())
3913	Indices.push_back(Elt: Builder.getInt32(C: Idx));
3914
3915	// We need to insert these at the location of the old load, not at that of
3916	// the extractvalue.
3917	Builder.SetInsertPoint(L);
3918	Value *GEP = Builder.CreateInBoundsGEP(Ty: L->getType(),
3919	Ptr: L->getPointerOperand(), IdxList: Indices);
3920	Instruction *NL = Builder.CreateLoad(Ty: EV.getType(), Ptr: GEP);
3921	// Whatever aliasing information we had for the orignal load must also
3922	// hold for the smaller load, so propagate the annotations.
3923	NL->setAAMetadata(L->getAAMetadata());
3924	// Returning the load directly will cause the main loop to insert it in
3925	// the wrong spot, so use replaceInstUsesWith().
3926	return replaceInstUsesWith(I&: EV, V: NL);
3927	}
3928	}
3929
3930	if (auto *PN = dyn_cast<PHINode>(Val: Agg))
3931	if (Instruction *Res = foldOpIntoPhi(I&: EV, PN))
3932	return Res;
3933
3934	// Canonicalize extract (select Cond, TV, FV)
3935	// -> select cond, (extract TV), (extract FV)
3936	if (auto *SI = dyn_cast<SelectInst>(Val: Agg))
3937	if (Instruction R = FoldOpIntoSelect(Op&: EV, SI, /FoldWithMultiUse=/*true))
3938	return R;
3939
3940	// We could simplify extracts from other values. Note that nested extracts may
3941	// already be simplified implicitly by the above: extract (extract (insert) )
3942	// will be translated into extract ( insert ( extract ) ) first and then just
3943	// the value inserted, if appropriate. Similarly for extracts from single-use
3944	// loads: extract (extract (load)) will be translated to extract (load (gep))
3945	// and if again single-use then via load (gep (gep)) to load (gep).
3946	// However, double extracts from e.g. function arguments or return values
3947	// aren't handled yet.
3948	return nullptr;
3949	}
3950
3951	/// Return 'true' if the given typeinfo will match anything.
3952	static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
3953	switch (Personality) {
3954	case EHPersonality::GNU_C:
3955	case EHPersonality::GNU_C_SjLj:
3956	case EHPersonality::Rust:
3957	// The GCC C EH and Rust personality only exists to support cleanups, so
3958	// it's not clear what the semantics of catch clauses are.
3959	return false;
3960	case EHPersonality::Unknown:
3961	return false;
3962	case EHPersonality::GNU_Ada:
3963	// While __gnat_all_others_value will match any Ada exception, it doesn't
3964	// match foreign exceptions (or didn't, before gcc-4.7).
3965	return false;
3966	case EHPersonality::GNU_CXX:
3967	case EHPersonality::GNU_CXX_SjLj:
3968	case EHPersonality::GNU_ObjC:
3969	case EHPersonality::MSVC_X86SEH:
3970	case EHPersonality::MSVC_TableSEH:
3971	case EHPersonality::MSVC_CXX:
3972	case EHPersonality::CoreCLR:
3973	case EHPersonality::Wasm_CXX:
3974	case EHPersonality::XL_CXX:
3975	case EHPersonality::ZOS_CXX:
3976	return TypeInfo->isNullValue();
3977	}
3978	llvm_unreachable("invalid enum");
3979	}
3980
3981	static bool shorter_filter(const Value LHS, const* Value *RHS) {
3982	return
3983	cast<ArrayType>(Val: LHS->getType())->getNumElements()
3984	<
3985	cast<ArrayType>(Val: RHS->getType())->getNumElements();
3986	}
3987
3988	Instruction *InstCombinerImpl::visitLandingPadInst(LandingPadInst &LI) {
3989	// The logic here should be correct for any real-world personality function.
3990	// However if that turns out not to be true, the offending logic can always
3991	// be conditioned on the personality function, like the catch-all logic is.
3992	EHPersonality Personality =
3993	classifyEHPersonality(Pers: LI.getParent()->getParent()->getPersonalityFn());
3994
3995	// Simplify the list of clauses, eg by removing repeated catch clauses
3996	// (these are often created by inlining).
3997	bool MakeNewInstruction = false; // If true, recreate using the following:
3998	SmallVector<Constant , `16`> NewClauses; // - Clauses for the new instruction;*
3999	bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
4000
4001	SmallPtrSet<Value , `16`> AlreadyCaught; // Typeinfos known caught already.*
4002	for (unsigned i = `0`, e = LI.getNumClauses(); i != e; ++i) {
4003	bool isLastClause = i + `1` == e;
4004	if (LI.isCatch(Idx: i)) {
4005	// A catch clause.
4006	Constant *CatchClause = LI.getClause(Idx: i);
4007	Constant *TypeInfo = CatchClause->stripPointerCasts();
4008
4009	// If we already saw this clause, there is no point in having a second
4010	// copy of it.
4011	if (AlreadyCaught.insert(Ptr: TypeInfo).second) {
4012	// This catch clause was not already seen.
4013	NewClauses.push_back(Elt: CatchClause);
4014	} else {
4015	// Repeated catch clause - drop the redundant copy.
4016	MakeNewInstruction = true;
4017	}
4018
4019	// If this is a catch-all then there is no point in keeping any following
4020	// clauses or marking the landingpad as having a cleanup.
4021	if (isCatchAll(Personality, TypeInfo)) {
4022	if (!isLastClause)
4023	MakeNewInstruction = true;
4024	CleanupFlag = false;
4025	break;
4026	}
4027	} else {
4028	// A filter clause. If any of the filter elements were already caught
4029	// then they can be dropped from the filter. It is tempting to try to
4030	// exploit the filter further by saying that any typeinfo that does not
4031	// occur in the filter can't be caught later (and thus can be dropped).
4032	// However this would be wrong, since typeinfos can match without being
4033	// equal (for example if one represents a C++ class, and the other some
4034	// class derived from it).
4035	assert(LI.isFilter(i) && "Unsupported landingpad clause!");
4036	Constant *FilterClause = LI.getClause(Idx: i);
4037	ArrayType *FilterType = cast<ArrayType>(Val: FilterClause->getType());
4038	unsigned NumTypeInfos = FilterType->getNumElements();
4039
4040	// An empty filter catches everything, so there is no point in keeping any
4041	// following clauses or marking the landingpad as having a cleanup. By
4042	// dealing with this case here the following code is made a bit simpler.
4043	if (!NumTypeInfos) {
4044	NewClauses.push_back(Elt: FilterClause);
4045	if (!isLastClause)
4046	MakeNewInstruction = true;
4047	CleanupFlag = false;
4048	break;
4049	}
4050
4051	bool MakeNewFilter = false; // If true, make a new filter.
4052	SmallVector<Constant , `16`> NewFilterElts; // New elements.*
4053	if (isa<ConstantAggregateZero>(Val: FilterClause)) {
4054	// Not an empty filter - it contains at least one null typeinfo.
4055	assert(NumTypeInfos > `0` && "Should have handled empty filter already!");
4056	Constant *TypeInfo =
4057	Constant::getNullValue(Ty: FilterType->getElementType());
4058	// If this typeinfo is a catch-all then the filter can never match.
4059	if (isCatchAll(Personality, TypeInfo)) {
4060	// Throw the filter away.
4061	MakeNewInstruction = true;
4062	continue;
4063	}
4064
4065	// There is no point in having multiple copies of this typeinfo, so
4066	// discard all but the first copy if there is more than one.
4067	NewFilterElts.push_back(Elt: TypeInfo);
4068	if (NumTypeInfos > `1`)
4069	MakeNewFilter = true;
4070	} else {
4071	ConstantArray *Filter = cast<ConstantArray>(Val: FilterClause);
4072	SmallPtrSet<Value , `16`> SeenInFilter; // For uniquing the elements.*
4073	NewFilterElts.reserve(N: NumTypeInfos);
4074
4075	// Remove any filter elements that were already caught or that already
4076	// occurred in the filter. While there, see if any of the elements are
4077	// catch-alls. If so, the filter can be discarded.
4078	bool SawCatchAll = false;
4079	for (unsigned j = `0`; j != NumTypeInfos; ++j) {
4080	Constant *Elt = Filter->getOperand(i_nocapture: j);
4081	Constant *TypeInfo = Elt->stripPointerCasts();
4082	if (isCatchAll(Personality, TypeInfo)) {
4083	// This element is a catch-all. Bail out, noting this fact.
4084	SawCatchAll = true;
4085	break;
4086	}
4087
4088	// Even if we've seen a type in a catch clause, we don't want to
4089	// remove it from the filter. An unexpected type handler may be
4090	// set up for a call site which throws an exception of the same
4091	// type caught. In order for the exception thrown by the unexpected
4092	// handler to propagate correctly, the filter must be correctly
4093	// described for the call site.
4094	//
4095	// Example:
4096	//
4097	// void unexpected() { throw 1;}
4098	// void foo() throw (int) {
4099	// std::set_unexpected(unexpected);
4100	// try {
4101	// throw 2.0;
4102	// } catch (int i) {}
4103	// }
4104
4105	// There is no point in having multiple copies of the same typeinfo in
4106	// a filter, so only add it if we didn't already.
4107	if (SeenInFilter.insert(Ptr: TypeInfo).second)
4108	NewFilterElts.push_back(Elt: cast<Constant>(Val: Elt));
4109	}
4110	// A filter containing a catch-all cannot match anything by definition.
4111	if (SawCatchAll) {
4112	// Throw the filter away.
4113	MakeNewInstruction = true;
4114	continue;
4115	}
4116
4117	// If we dropped something from the filter, make a new one.
4118	if (NewFilterElts.size() < NumTypeInfos)
4119	MakeNewFilter = true;
4120	}
4121	if (MakeNewFilter) {
4122	FilterType = ArrayType::get(ElementType: FilterType->getElementType(),
4123	NumElements: NewFilterElts.size());
4124	FilterClause = ConstantArray::get(T: FilterType, V: NewFilterElts);
4125	MakeNewInstruction = true;
4126	}
4127
4128	NewClauses.push_back(Elt: FilterClause);
4129
4130	// If the new filter is empty then it will catch everything so there is
4131	// no point in keeping any following clauses or marking the landingpad
4132	// as having a cleanup. The case of the original filter being empty was
4133	// already handled above.
4134	if (MakeNewFilter && !NewFilterElts.size()) {
4135	assert(MakeNewInstruction && "New filter but not a new instruction!");
4136	CleanupFlag = false;
4137	break;
4138	}
4139	}
4140	}
4141
4142	// If several filters occur in a row then reorder them so that the shortest
4143	// filters come first (those with the smallest number of elements). This is
4144	// advantageous because shorter filters are more likely to match, speeding up
4145	// unwinding, but mostly because it increases the effectiveness of the other
4146	// filter optimizations below.
4147	for (unsigned i = `0`, e = NewClauses.size(); i + `1` < e; ) {
4148	unsigned j;
4149	// Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
4150	for (j = i; j != e; ++j)
4151	if (!isa<ArrayType>(Val: NewClauses [j]->getType()))
4152	break;
4153
4154	// Check whether the filters are already sorted by length. We need to know
4155	// if sorting them is actually going to do anything so that we only make a
4156	// new landingpad instruction if it does.
4157	for (unsigned k = i; k + `1` < j; ++k)
4158	if (shorter_filter(LHS: NewClauses [k+`1`], RHS: NewClauses [k])) {
4159	// Not sorted, so sort the filters now. Doing an unstable sort would be
4160	// correct too but reordering filters pointlessly might confuse users.
4161	std::stable_sort(first: NewClauses.begin() + i, last: NewClauses.begin() + j,
4162	comp: shorter_filter);
4163	MakeNewInstruction = true;
4164	break;
4165	}
4166
4167	// Look for the next batch of filters.
4168	i = j + `1`;
4169	}
4170
4171	// If typeinfos matched if and only if equal, then the elements of a filter L
4172	// that occurs later than a filter F could be replaced by the intersection of
4173	// the elements of F and L. In reality two typeinfos can match without being
4174	// equal (for example if one represents a C++ class, and the other some class
4175	// derived from it) so it would be wrong to perform this transform in general.
4176	// However the transform is correct and useful if F is a subset of L. In that
4177	// case L can be replaced by F, and thus removed altogether since repeating a
4178	// filter is pointless. So here we look at all pairs of filters F and L where
4179	// L follows F in the list of clauses, and remove L if every element of F is
4180	// an element of L. This can occur when inlining C++ functions with exception
4181	// specifications.
4182	for (unsigned i = `0`; i + `1` < NewClauses.size(); ++i) {
4183	// Examine each filter in turn.
4184	Value *Filter = NewClauses [i];
4185	ArrayType *FTy = dyn_cast<ArrayType>(Val: Filter->getType());
4186	if (!FTy)
4187	// Not a filter - skip it.
4188	continue;
4189	unsigned FElts = FTy->getNumElements();
4190	// Examine each filter following this one. Doing this backwards means that
4191	// we don't have to worry about filters disappearing under us when removed.
4192	for (unsigned j = NewClauses.size() - `1`; j != i; --j) {
4193	Value *LFilter = NewClauses [j];
4194	ArrayType *LTy = dyn_cast<ArrayType>(Val: LFilter->getType());
4195	if (!LTy)
4196	// Not a filter - skip it.
4197	continue;
4198	// If Filter is a subset of LFilter, i.e. every element of Filter is also
4199	// an element of LFilter, then discard LFilter.
4200	SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
4201	// If Filter is empty then it is a subset of LFilter.
4202	if (!FElts) {
4203	// Discard LFilter.
4204	NewClauses.erase(CI: J);
4205	MakeNewInstruction = true;
4206	// Move on to the next filter.
4207	continue;
4208	}
4209	unsigned LElts = LTy->getNumElements();
4210	// If Filter is longer than LFilter then it cannot be a subset of it.
4211	if (FElts > LElts)
4212	// Move on to the next filter.
4213	continue;
4214	// At this point we know that LFilter has at least one element.
4215	if (isa<ConstantAggregateZero>(Val: LFilter)) { // LFilter only contains zeros.
4216	// Filter is a subset of LFilter iff Filter contains only zeros (as we
4217	// already know that Filter is not longer than LFilter).
4218	if (isa<ConstantAggregateZero>(Val: Filter)) {
4219	assert(FElts <= LElts && "Should have handled this case earlier!");
4220	// Discard LFilter.
4221	NewClauses.erase(CI: J);
4222	MakeNewInstruction = true;
4223	}
4224	// Move on to the next filter.
4225	continue;
4226	}
4227	ConstantArray *LArray = cast<ConstantArray>(Val: LFilter);
4228	if (isa<ConstantAggregateZero>(Val: Filter)) { // Filter only contains zeros.
4229	// Since Filter is non-empty and contains only zeros, it is a subset of
4230	// LFilter iff LFilter contains a zero.
4231	assert(FElts > `0` && "Should have eliminated the empty filter earlier!");
4232	for (unsigned l = `0`; l != LElts; ++l)
4233	if (LArray->getOperand(i_nocapture: l)->isNullValue()) {
4234	// LFilter contains a zero - discard it.
4235	NewClauses.erase(CI: J);
4236	MakeNewInstruction = true;
4237	break;
4238	}
4239	// Move on to the next filter.
4240	continue;
4241	}
4242	// At this point we know that both filters are ConstantArrays. Loop over
4243	// operands to see whether every element of Filter is also an element of
4244	// LFilter. Since filters tend to be short this is probably faster than
4245	// using a method that scales nicely.
4246	ConstantArray *FArray = cast<ConstantArray>(Val: Filter);
4247	bool AllFound = true;
4248	for (unsigned f = `0`; f != FElts; ++f) {
4249	Value *FTypeInfo = FArray->getOperand(i_nocapture: f)->stripPointerCasts();
4250	AllFound = false;
4251	for (unsigned l = `0`; l != LElts; ++l) {
4252	Value *LTypeInfo = LArray->getOperand(i_nocapture: l)->stripPointerCasts();
4253	if (LTypeInfo == FTypeInfo) {
4254	AllFound = true;
4255	break;
4256	}
4257	}
4258	if (!AllFound)
4259	break;
4260	}
4261	if (AllFound) {
4262	// Discard LFilter.
4263	NewClauses.erase(CI: J);
4264	MakeNewInstruction = true;
4265	}
4266	// Move on to the next filter.
4267	}
4268	}
4269
4270	// If we changed any of the clauses, replace the old landingpad instruction
4271	// with a new one.
4272	if (MakeNewInstruction) {
4273	LandingPadInst *NLI = LandingPadInst::Create(RetTy: LI.getType(),
4274	NumReservedClauses: NewClauses.size());
4275	for (unsigned i = `0`, e = NewClauses.size(); i != e; ++i)
4276	NLI->addClause(ClauseVal: NewClauses [i]);
4277	// A landing pad with no clauses must have the cleanup flag set. It is
4278	// theoretically possible, though highly unlikely, that we eliminated all
4279	// clauses. If so, force the cleanup flag to true.
4280	if (NewClauses.empty())
4281	CleanupFlag = true;
4282	NLI->setCleanup(CleanupFlag);
4283	return NLI;
4284	}
4285
4286	// Even if none of the clauses changed, we may nonetheless have understood
4287	// that the cleanup flag is pointless. Clear it if so.
4288	if (LI.isCleanup() != CleanupFlag) {
4289	assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
4290	LI.setCleanup(CleanupFlag);
4291	return &LI;
4292	}
4293
4294	return nullptr;
4295	}
4296
4297	Value *
4298	InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) {
4299	// Try to push freeze through instructions that propagate but don't produce
4300	// poison as far as possible. If an operand of freeze follows three
4301	// conditions 1) one-use, 2) does not produce poison, and 3) has all but one
4302	// guaranteed-non-poison operands then push the freeze through to the one
4303	// operand that is not guaranteed non-poison. The actual transform is as
4304	// follows.
4305	// Op1 = ... ; Op1 can be posion
4306	// Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
4307	// ; single guaranteed-non-poison operands
4308	// ... = Freeze(Op0)
4309	// =>
4310	// Op1 = ...
4311	// Op1.fr = Freeze(Op1)
4312	// ... = Inst(Op1.fr, NonPoisonOps...)
4313	auto *OrigOp = OrigFI.getOperand(i_nocapture: `0`);
4314	auto *OrigOpInst = dyn_cast<Instruction>(Val: OrigOp);
4315
4316	// While we could change the other users of OrigOp to use freeze(OrigOp), that
4317	// potentially reduces their optimization potential, so let's only do this iff
4318	// the OrigOp is only used by the freeze.
4319	if (!OrigOpInst \|\| !OrigOpInst->hasOneUse() \|\| isa<PHINode>(Val: OrigOp))
4320	return nullptr;
4321
4322	// We can't push the freeze through an instruction which can itself create
4323	// poison. If the only source of new poison is flags, we can simply
4324	// strip them (since we know the only use is the freeze and nothing can
4325	// benefit from them.)
4326	if (canCreateUndefOrPoison(Op: cast<Operator>(Val: OrigOp),
4327	/ConsiderFlagsAndMetadata/ false))
4328	return nullptr;
4329
4330	// If operand is guaranteed not to be poison, there is no need to add freeze
4331	// to the operand. So we first find the operand that is not guaranteed to be
4332	// poison.
4333	Use MaybePoisonOperand = nullptr*;
4334	for (Use &U : OrigOpInst->operands()) {
4335	if (isa<MetadataAsValue>(Val: U.get()) \|\|
4336	isGuaranteedNotToBeUndefOrPoison(V: U.get()))
4337	continue;
4338	if (!MaybePoisonOperand)
4339	MaybePoisonOperand = &U;
4340	else
4341	return nullptr;
4342	}
4343
4344	OrigOpInst->dropPoisonGeneratingAnnotations();
4345
4346	// If all operands are guaranteed to be non-poison, we can drop freeze.
4347	if (!MaybePoisonOperand)
4348	return OrigOp;
4349
4350	Builder.SetInsertPoint(OrigOpInst);
4351	auto *FrozenMaybePoisonOperand = Builder.CreateFreeze(
4352	V: MaybePoisonOperand->get(), Name: MaybePoisonOperand->get()->getName() + ".fr");
4353
4354	replaceUse(U&: *MaybePoisonOperand, NewValue: FrozenMaybePoisonOperand);
4355	return OrigOp;
4356	}
4357
4358	Instruction *InstCombinerImpl::foldFreezeIntoRecurrence(FreezeInst &FI,
4359	PHINode *PN) {
4360	// Detect whether this is a recurrence with a start value and some number of
4361	// backedge values. We'll check whether we can push the freeze through the
4362	// backedge values (possibly dropping poison flags along the way) until we
4363	// reach the phi again. In that case, we can move the freeze to the start
4364	// value.
4365	Use StartU = nullptr*;
4366	SmallVector<Value *> Worklist;
4367	for (Use &U : PN->incoming_values()) {
4368	if (DT.dominates(A: PN->getParent(), B: PN->getIncomingBlock(U))) {
4369	// Add backedge value to worklist.
4370	Worklist.push_back(Elt: U.get());
4371	continue;
4372	}
4373
4374	// Don't bother handling multiple start values.
4375	if (StartU)
4376	return nullptr;
4377	StartU = &U;
4378	}
4379
4380	if (!StartU \|\| Worklist.empty())
4381	return nullptr; // Not a recurrence.
4382
4383	Value *StartV = StartU->get();
4384	BasicBlock StartBB = PN->getIncomingBlock(U: StartU);
4385	bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(V: StartV);
4386	// We can't insert freeze if the start value is the result of the
4387	// terminator (e.g. an invoke).
4388	if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
4389	return nullptr;
4390
4391	SmallPtrSet<Value *, `32`> Visited;
4392	SmallVector<Instruction *> DropFlags;
4393	while (!Worklist.empty()) {
4394	Value *V = Worklist.pop_back_val();
4395	if (!Visited.insert(Ptr: V).second)
4396	continue;
4397
4398	if (Visited.size() > `32`)
4399	return nullptr; // Limit the total number of values we inspect.
4400
4401	// Assume that PN is non-poison, because it will be after the transform.
4402	if (V == PN \|\| isGuaranteedNotToBeUndefOrPoison(V))
4403	continue;
4404
4405	Instruction *I = dyn_cast<Instruction>(Val: V);
4406	if (!I \|\| canCreateUndefOrPoison(Op: cast<Operator>(Val: I),
4407	/ConsiderFlagsAndMetadata/ false))
4408	return nullptr;
4409
4410	DropFlags.push_back(Elt: I);
4411	append_range(C&: Worklist, R: I->operands());
4412	}
4413
4414	for (Instruction *I : DropFlags)
4415	I->dropPoisonGeneratingAnnotations();
4416
4417	if (StartNeedsFreeze) {
4418	Builder.SetInsertPoint(StartBB->getTerminator());
4419	Value *FrozenStartV = Builder.CreateFreeze(V: StartV,
4420	Name: StartV->getName() + ".fr");
4421	replaceUse(U&: *StartU, NewValue: FrozenStartV);
4422	}
4423	return replaceInstUsesWith(I&: FI, V: PN);
4424	}
4425
4426	bool InstCombinerImpl::freezeOtherUses(FreezeInst &FI) {
4427	Value *Op = FI.getOperand(i_nocapture: `0`);
4428
4429	if (isa<Constant>(Val: Op) \|\| Op->hasOneUse())
4430	return false;
4431
4432	// Move the freeze directly after the definition of its operand, so that
4433	// it dominates the maximum number of uses. Note that it may not dominate
4434	// all* uses if the operand is an invoke/callbr and the use is in a phi on*
4435	// the normal/default destination. This is why the domination check in the
4436	// replacement below is still necessary.
4437	BasicBlock::iterator MoveBefore;
4438	if (isa<Argument>(Val: Op)) {
4439	MoveBefore =
4440	FI.getFunction()->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();
4441	} else {
4442	auto MoveBeforeOpt = cast<Instruction>(Val: Op)->getInsertionPointAfterDef();
4443	if (!MoveBeforeOpt)
4444	return false;
4445	MoveBefore = *MoveBeforeOpt;
4446	}
4447
4448	// Don't move to the position of a debug intrinsic.
4449	if (isa<DbgInfoIntrinsic>(Val: MoveBefore))
4450	MoveBefore = MoveBefore ->getNextNonDebugInstruction()->getIterator();
4451	// Re-point iterator to come after any debug-info records, if we're
4452	// running in "RemoveDIs" mode
4453	MoveBefore.setHeadBit(false);
4454
4455	bool Changed = false;
4456	if (&FI != &*MoveBefore) {
4457	FI.moveBefore(BB&: *MoveBefore ->getParent(), I: MoveBefore);
4458	Changed = true;
4459	}
4460
4461	Op->replaceUsesWithIf(New: &FI, ShouldReplace: [&](Use &U) -> bool {
4462	bool Dominates = DT.dominates(Def: &FI, U);
4463	Changed \|= Dominates;
4464	return Dominates;
4465	});
4466
4467	return Changed;
4468	}
4469
4470	// Check if any direct or bitcast user of this value is a shuffle instruction.
4471	static bool isUsedWithinShuffleVector(Value *V) {
4472	for (auto *U : V->users()) {
4473	if (isa<ShuffleVectorInst>(Val: U))
4474	return true;
4475	else if (match(V: U, P: m_BitCast(Op: m_Specific(V))) && isUsedWithinShuffleVector(V: U))
4476	return true;
4477	}
4478	return false;
4479	}
4480
4481	Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {
4482	Value *Op0 = I.getOperand(i_nocapture: `0`);
4483
4484	if (Value *V = simplifyFreezeInst(Op: Op0, Q: SQ.getWithInstruction(I: &I)))
4485	return replaceInstUsesWith(I, V);
4486
4487	// freeze (phi const, x) --> phi const, (freeze x)
4488	if (auto *PN = dyn_cast<PHINode>(Val: Op0)) {
4489	if (Instruction *NV = foldOpIntoPhi(I, PN))
4490	return NV;
4491	if (Instruction *NV = foldFreezeIntoRecurrence(FI&: I, PN))
4492	return NV;
4493	}
4494
4495	if (Value *NI = pushFreezeToPreventPoisonFromPropagating(OrigFI&: I))
4496	return replaceInstUsesWith(I, V: NI);
4497
4498	// If I is freeze(undef), check its uses and fold it to a fixed constant.
4499	// - or: pick -1
4500	// - select's condition: if the true value is constant, choose it by making
4501	// the condition true.
4502	// - default: pick 0
4503	//
4504	// Note that this transform is intentionally done here rather than
4505	// via an analysis in InstSimplify or at individual user sites. That is
4506	// because we must produce the same value for all uses of the freeze -
4507	// it's the reason "freeze" exists!
4508	//
4509	// TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
4510	// duplicating logic for binops at least.
4511	auto getUndefReplacement = [&I](Type *Ty) {
4512	Constant BestValue = nullptr*;
4513	Constant *NullValue = Constant::getNullValue(Ty);
4514	for (const auto *U : I.users()) {
4515	Constant *C = NullValue;
4516	if (match(V: U, P: m_Or(L: m_Value(), R: m_Value())))
4517	C = ConstantInt::getAllOnesValue(Ty);
4518	else if (match(V: U, P: m_Select(C: m_Specific(V: &I), L: m_Constant(), R: m_Value())))
4519	C = ConstantInt::getTrue(Ty);
4520
4521	if (!BestValue)
4522	BestValue = C;
4523	else if (BestValue != C)
4524	BestValue = NullValue;
4525	}
4526	assert(BestValue && "Must have at least one use");
4527	return BestValue;
4528	};
4529
4530	if (match(V: Op0, P: m_Undef())) {
4531	// Don't fold freeze(undef/poison) if it's used as a vector operand in
4532	// a shuffle. This may improve codegen for shuffles that allow
4533	// unspecified inputs.
4534	if (isUsedWithinShuffleVector(V: &I))
4535	return nullptr;
4536	return replaceInstUsesWith(I, V: getUndefReplacement (I.getType()));
4537	}
4538
4539	Constant *C;
4540	if (match(V: Op0, P: m_Constant(C)) && C->containsUndefOrPoisonElement()) {
4541	Constant *ReplaceC = getUndefReplacement (I.getType()->getScalarType());
4542	return replaceInstUsesWith(I, V: Constant::replaceUndefsWith(C, Replacement: ReplaceC));
4543	}
4544
4545	// Replace uses of Op with freeze(Op).
4546	if (freezeOtherUses(FI&: I))
4547	return &I;
4548
4549	return nullptr;
4550	}
4551
4552	/// Check for case where the call writes to an otherwise dead alloca. This
4553	/// shows up for unused out-params in idiomatic C/C++ code. Note that this
4554	/// helper only* analyzes the write; doesn't check any other legality aspect.*
4555	static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI) {
4556	auto *CB = dyn_cast<CallBase>(Val: I);
4557	if (!CB)
4558	// TODO: handle e.g. store to alloca here - only worth doing if we extend
4559	// to allow reload along used path as described below. Otherwise, this
4560	// is simply a store to a dead allocation which will be removed.
4561	return false;
4562	std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CI: CB, TLI);
4563	if (!Dest)
4564	return false;
4565	auto *AI = dyn_cast<AllocaInst>(Val: getUnderlyingObject(V: Dest ->Ptr));
4566	if (!AI)
4567	// TODO: allow malloc?
4568	return false;
4569	// TODO: allow memory access dominated by move point? Note that since AI
4570	// could have a reference to itself captured by the call, we would need to
4571	// account for cycles in doing so.
4572	SmallVector<const User *> AllocaUsers;
4573	SmallPtrSet<const User *, `4`> Visited;
4574	auto pushUsers = [&](const Instruction &I) {
4575	for (const User *U : I.users()) {
4576	if (Visited.insert(Ptr: U).second)
4577	AllocaUsers.push_back(Elt: U);
4578	}
4579	};
4580	pushUsers (*AI);
4581	while (!AllocaUsers.empty()) {
4582	auto *UserI = cast<Instruction>(Val: AllocaUsers.pop_back_val());
4583	if (isa<BitCastInst>(Val: UserI) \|\| isa<GetElementPtrInst>(Val: UserI) \|\|
4584	isa<AddrSpaceCastInst>(Val: UserI)) {
4585	pushUsers (*UserI);
4586	continue;
4587	}
4588	if (UserI == CB)
4589	continue;
4590	// TODO: support lifetime.start/end here
4591	return false;
4592	}
4593	return true;
4594	}
4595
4596	/// Try to move the specified instruction from its current block into the
4597	/// beginning of DestBlock, which can only happen if it's safe to move the
4598	/// instruction past all of the instructions between it and the end of its
4599	/// block.
4600	bool InstCombinerImpl::tryToSinkInstruction(Instruction *I,
4601	BasicBlock *DestBlock) {
4602	BasicBlock *SrcBlock = I->getParent();
4603
4604	// Cannot move control-flow-involving, volatile loads, vaarg, etc.
4605	if (isa<PHINode>(Val: I) \|\| I->isEHPad() \|\| I->mayThrow() \|\| !I->willReturn() \|\|
4606	I->isTerminator())
4607	return false;
4608
4609	// Do not sink static or dynamic alloca instructions. Static allocas must
4610	// remain in the entry block, and dynamic allocas must not be sunk in between
4611	// a stacksave / stackrestore pair, which would incorrectly shorten its
4612	// lifetime.
4613	if (isa<AllocaInst>(Val: I))
4614	return false;
4615
4616	// Do not sink into catchswitch blocks.
4617	if (isa<CatchSwitchInst>(Val: DestBlock->getTerminator()))
4618	return false;
4619
4620	// Do not sink convergent call instructions.
4621	if (auto *CI = dyn_cast<CallInst>(Val: I)) {
4622	if (CI->isConvergent())
4623	return false;
4624	}
4625
4626	// Unless we can prove that the memory write isn't visibile except on the
4627	// path we're sinking to, we must bail.
4628	if (I->mayWriteToMemory()) {
4629	if (!SoleWriteToDeadLocal(I, TLI))
4630	return false;
4631	}
4632
4633	// We can only sink load instructions if there is nothing between the load and
4634	// the end of block that could change the value.
4635	if (I->mayReadFromMemory()) {
4636	// We don't want to do any sophisticated alias analysis, so we only check
4637	// the instructions after I in I's parent block if we try to sink to its
4638	// successor block.
4639	if (DestBlock->getUniquePredecessor() != I->getParent())
4640	return false;
4641	for (BasicBlock::iterator Scan = std::next(x: I->getIterator()),
4642	E = I->getParent()->end();
4643	Scan != E; ++Scan)
4644	if (Scan ->mayWriteToMemory())
4645	return false;
4646	}
4647
4648	I->dropDroppableUses(ShouldDrop: [&](const Use *U) {
4649	auto *I = dyn_cast<Instruction>(Val: U->getUser());
4650	if (I && I->getParent() != DestBlock) {
4651	Worklist.add(I);
4652	return true;
4653	}
4654	return false;
4655	});
4656	/// FIXME: We could remove droppable uses that are not dominated by
4657	/// the new position.
4658
4659	BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
4660	I->moveBefore(BB&: *DestBlock, I: InsertPos);
4661	++NumSunkInst;
4662
4663	// Also sink all related debug uses from the source basic block. Otherwise we
4664	// get debug use before the def. Attempt to salvage debug uses first, to
4665	// maximise the range variables have location for. If we cannot salvage, then
4666	// mark the location undef: we know it was supposed to receive a new location
4667	// here, but that computation has been sunk.
4668	SmallVector<DbgVariableIntrinsic *, `2`> DbgUsers;
4669	SmallVector<DbgVariableRecord *, `2`> DbgVariableRecords;
4670	findDbgUsers(DbgInsts&: DbgUsers, V: I, DbgVariableRecords: &DbgVariableRecords);
4671	if (!DbgUsers.empty())
4672	tryToSinkInstructionDbgValues(I, InsertPos, SrcBlock, DestBlock, DbgUsers);
4673	if (!DbgVariableRecords.empty())
4674	tryToSinkInstructionDbgVariableRecords(I, InsertPos, SrcBlock, DestBlock,
4675	DPUsers&: DbgVariableRecords);
4676
4677	// PS: there are numerous flaws with this behaviour, not least that right now
4678	// assignments can be re-ordered past other assignments to the same variable
4679	// if they use different Values. Creating more undef assignements can never be
4680	// undone. And salvaging all users outside of this block can un-necessarily
4681	// alter the lifetime of the live-value that the variable refers to.
4682	// Some of these things can be resolved by tolerating debug use-before-defs in
4683	// LLVM-IR, however it depends on the instruction-referencing CodeGen backend
4684	// being used for more architectures.
4685
4686	return true;
4687	}
4688
4689	void InstCombinerImpl::tryToSinkInstructionDbgValues(
4690	Instruction I, BasicBlock::iterator InsertPos, BasicBlock SrcBlock,
4691	BasicBlock DestBlock, SmallVectorImpl<DbgVariableIntrinsic > &DbgUsers) {
4692	// For all debug values in the destination block, the sunk instruction
4693	// will still be available, so they do not need to be dropped.
4694	SmallVector<DbgVariableIntrinsic *, `2`> DbgUsersToSalvage;
4695	for (auto &DbgUser : DbgUsers)
4696	if (DbgUser->getParent() != DestBlock)
4697	DbgUsersToSalvage.push_back(Elt: DbgUser);
4698
4699	// Process the sinking DbgUsersToSalvage in reverse order, as we only want
4700	// to clone the last appearing debug intrinsic for each given variable.
4701	SmallVector<DbgVariableIntrinsic *, `2`> DbgUsersToSink;
4702	for (DbgVariableIntrinsic *DVI : DbgUsersToSalvage)
4703	if (DVI->getParent() == SrcBlock)
4704	DbgUsersToSink.push_back(Elt: DVI);
4705	llvm::sort(C&: DbgUsersToSink,
4706	Comp: [](auto A, auto* B) { return* B->comesBefore(A); });
4707
4708	SmallVector<DbgVariableIntrinsic *, `2`> DIIClones;
4709	SmallSet<DebugVariable, `4`> SunkVariables;
4710	for (auto *User : DbgUsersToSink) {
4711	// A dbg.declare instruction should not be cloned, since there can only be
4712	// one per variable fragment. It should be left in the original place
4713	// because the sunk instruction is not an alloca (otherwise we could not be
4714	// here).
4715	if (isa<DbgDeclareInst>(Val: User))
4716	continue;
4717
4718	DebugVariable DbgUserVariable =
4719	DebugVariable (User->getVariable(), User->getExpression(),
4720	User->getDebugLoc()->getInlinedAt());
4721
4722	if (!SunkVariables.insert(V: DbgUserVariable).second)
4723	continue;
4724
4725	// Leave dbg.assign intrinsics in their original positions and there should
4726	// be no need to insert a clone.
4727	if (isa<DbgAssignIntrinsic>(Val: User))
4728	continue;
4729
4730	DIIClones.emplace_back(Args: cast<DbgVariableIntrinsic>(Val: User->clone()));
4731	if (isa<DbgDeclareInst>(Val: User) && isa<CastInst>(Val: I))
4732	DIIClones.back()->replaceVariableLocationOp(OldValue: I, NewValue: I->getOperand(i: `0`));
4733	LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones.back() << `'\n'`);
4734	}
4735
4736	// Perform salvaging without the clones, then sink the clones.
4737	if (!DIIClones.empty()) {
4738	salvageDebugInfoForDbgValues(I&: *I, Insns: DbgUsersToSalvage, DPInsns: {});
4739	// The clones are in reverse order of original appearance, reverse again to
4740	// maintain the original order.
4741	for (auto &DIIClone : llvm::reverse(C&: DIIClones)) {
4742	DIIClone->insertBefore(InsertPos: &*InsertPos);
4743	LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << `'\n'`);
4744	}
4745	}
4746	}
4747
4748	void InstCombinerImpl::tryToSinkInstructionDbgVariableRecords(
4749	Instruction I, BasicBlock::iterator InsertPos, BasicBlock SrcBlock,
4750	BasicBlock *DestBlock,
4751	SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords) {
4752	// Implementation of tryToSinkInstructionDbgValues, but for the
4753	// DbgVariableRecord of variable assignments rather than dbg.values.
4754
4755	// Fetch all DbgVariableRecords not already in the destination.
4756	SmallVector<DbgVariableRecord *, `2`> DbgVariableRecordsToSalvage;
4757	for (auto &DVR : DbgVariableRecords)
4758	if (DVR->getParent() != DestBlock)
4759	DbgVariableRecordsToSalvage.push_back(Elt: DVR);
4760
4761	// Fetch a second collection, of DbgVariableRecords in the source block that
4762	// we're going to sink.
4763	SmallVector<DbgVariableRecord *> DbgVariableRecordsToSink;
4764	for (DbgVariableRecord *DVR : DbgVariableRecordsToSalvage)
4765	if (DVR->getParent() == SrcBlock)
4766	DbgVariableRecordsToSink.push_back(Elt: DVR);
4767
4768	// Sort DbgVariableRecords according to their position in the block. This is a
4769	// partial order: DbgVariableRecords attached to different instructions will
4770	// be ordered by the instruction order, but DbgVariableRecords attached to the
4771	// same instruction won't have an order.
4772	auto Order = [](DbgVariableRecord A, DbgVariableRecord B) -> bool {
4773	return B->getInstruction()->comesBefore(Other: A->getInstruction());
4774	};
4775	llvm::stable_sort(Range&: DbgVariableRecordsToSink, C: Order);
4776
4777	// If there are two assignments to the same variable attached to the same
4778	// instruction, the ordering between the two assignments is important. Scan
4779	// for this (rare) case and establish which is the last assignment.
4780	using InstVarPair = std::pair<const Instruction *, DebugVariable>;
4781	SmallDenseMap<InstVarPair, DbgVariableRecord *> FilterOutMap;
4782	if (DbgVariableRecordsToSink.size() > `1`) {
4783	SmallDenseMap<InstVarPair, unsigned> CountMap;
4784	// Count how many assignments to each variable there is per instruction.
4785	for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
4786	DebugVariable DbgUserVariable =
4787	DebugVariable (DVR->getVariable(), DVR->getExpression(),
4788	DVR->getDebugLoc()->getInlinedAt());
4789	CountMap [std::make_pair(x: DVR->getInstruction(), y&: DbgUserVariable)] += `1`;
4790	}
4791
4792	// If there are any instructions with two assignments, add them to the
4793	// FilterOutMap to record that they need extra filtering.
4794	SmallPtrSet<const Instruction *, `4`> DupSet;
4795	for (auto It : CountMap) {
4796	if (It.second > `1`) {
4797	FilterOutMap [It.first] = nullptr;
4798	DupSet.insert(Ptr: It.first.first);
4799	}
4800	}
4801
4802	// For all instruction/variable pairs needing extra filtering, find the
4803	// latest assignment.
4804	for (const Instruction *Inst : DupSet) {
4805	for (DbgVariableRecord &DVR :
4806	llvm::reverse(C: filterDbgVars(R: Inst->getDbgRecordRange()))) {
4807	DebugVariable DbgUserVariable =
4808	DebugVariable (DVR.getVariable(), DVR.getExpression(),
4809	DVR.getDebugLoc()->getInlinedAt());
4810	auto FilterIt =
4811	FilterOutMap.find(Val: std::make_pair(x&: Inst, y&: DbgUserVariable));
4812	if (FilterIt == FilterOutMap.end())
4813	continue;
4814	if (FilterIt ->second != nullptr)
4815	continue;
4816	FilterIt ->second = &DVR;
4817	}
4818	}
4819	}
4820
4821	// Perform cloning of the DbgVariableRecords that we plan on sinking, filter
4822	// out any duplicate assignments identified above.
4823	SmallVector<DbgVariableRecord *, `2`> DVRClones;
4824	SmallSet<DebugVariable, `4`> SunkVariables;
4825	for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
4826	if (DVR->Type == DbgVariableRecord::LocationType::Declare)
4827	continue;
4828
4829	DebugVariable DbgUserVariable =
4830	DebugVariable (DVR->getVariable(), DVR->getExpression(),
4831	DVR->getDebugLoc()->getInlinedAt());
4832
4833	// For any variable where there were multiple assignments in the same place,
4834	// ignore all but the last assignment.
4835	if (!FilterOutMap.empty()) {
4836	InstVarPair IVP = std::make_pair(x: DVR->getInstruction(), y&: DbgUserVariable);
4837	auto It = FilterOutMap.find(Val: IVP);
4838
4839	// Filter out.
4840	if (It != FilterOutMap.end() && It ->second != DVR)
4841	continue;
4842	}
4843
4844	if (!SunkVariables.insert(V: DbgUserVariable).second)
4845	continue;
4846
4847	if (DVR->isDbgAssign())
4848	continue;
4849
4850	DVRClones.emplace_back(Args: DVR->clone());
4851	LLVM_DEBUG(dbgs() << "CLONE: " << *DVRClones.back() << `'\n'`);
4852	}
4853
4854	// Perform salvaging without the clones, then sink the clones.
4855	if (DVRClones.empty())
4856	return;
4857
4858	salvageDebugInfoForDbgValues(I&: *I, Insns: {}, DPInsns: DbgVariableRecordsToSalvage);
4859
4860	// The clones are in reverse order of original appearance. Assert that the
4861	// head bit is set on the iterator as we _should_ have received it via
4862	// getFirstInsertionPt. Inserting like this will reverse the clone order as
4863	// we'll repeatedly insert at the head, such as:
4864	// DVR-3 (third insertion goes here)
4865	// DVR-2 (second insertion goes here)
4866	// DVR-1 (first insertion goes here)
4867	// Any-Prior-DVRs
4868	// InsertPtInst
4869	assert(InsertPos.getHeadBit());
4870	for (DbgVariableRecord *DVRClone : DVRClones) {
4871	InsertPos ->getParent()->insertDbgRecordBefore(DR: DVRClone, Here: InsertPos);
4872	LLVM_DEBUG(dbgs() << "SINK: " << *DVRClone << `'\n'`);
4873	}
4874	}
4875
4876	bool InstCombinerImpl::run() {
4877	while (!Worklist.isEmpty()) {
4878	// Walk deferred instructions in reverse order, and push them to the
4879	// worklist, which means they'll end up popped from the worklist in-order.
4880	while (Instruction *I = Worklist.popDeferred()) {
4881	// Check to see if we can DCE the instruction. We do this already here to
4882	// reduce the number of uses and thus allow other folds to trigger.
4883	// Note that eraseInstFromFunction() may push additional instructions on
4884	// the deferred worklist, so this will DCE whole instruction chains.
4885	if (isInstructionTriviallyDead(I, TLI: &TLI)) {
4886	eraseInstFromFunction(I&: *I);
4887	++NumDeadInst;
4888	continue;
4889	}
4890
4891	Worklist.push(I);
4892	}
4893
4894	Instruction *I = Worklist.removeOne();
4895	if (I == nullptr) continue; // skip null values.
4896
4897	// Check to see if we can DCE the instruction.
4898	if (isInstructionTriviallyDead(I, TLI: &TLI)) {
4899	eraseInstFromFunction(I&: *I);
4900	++NumDeadInst;
4901	continue;
4902	}
4903
4904	if (!DebugCounter::shouldExecute(CounterName: VisitCounter))
4905	continue;
4906
4907	// See if we can trivially sink this instruction to its user if we can
4908	// prove that the successor is not executed more frequently than our block.
4909	// Return the UserBlock if successful.
4910	auto getOptionalSinkBlockForInst =
4911	[this](Instruction I) -> std::optional<BasicBlock > {
4912	if (!EnableCodeSinking)
4913	return std::nullopt;
4914
4915	BasicBlock *BB = I->getParent();
4916	BasicBlock UserParent = nullptr*;
4917	unsigned NumUsers = `0`;
4918
4919	for (auto *U : I->users()) {
4920	if (U->isDroppable())
4921	continue;
4922	if (NumUsers > MaxSinkNumUsers)
4923	return std::nullopt;
4924
4925	Instruction *UserInst = cast<Instruction>(Val: U);
4926	// Special handling for Phi nodes - get the block the use occurs in.
4927	if (PHINode *PN = dyn_cast<PHINode>(Val: UserInst)) {
4928	for (unsigned i = `0`; i < PN->getNumIncomingValues(); i++) {
4929	if (PN->getIncomingValue(i) == I) {
4930	// Bail out if we have uses in different blocks. We don't do any
4931	// sophisticated analysis (i.e finding NearestCommonDominator of
4932	// these use blocks).
4933	if (UserParent && UserParent != PN->getIncomingBlock(i))
4934	return std::nullopt;
4935	UserParent = PN->getIncomingBlock(i);
4936	}
4937	}
4938	assert(UserParent && "expected to find user block!");
4939	} else {
4940	if (UserParent && UserParent != UserInst->getParent())
4941	return std::nullopt;
4942	UserParent = UserInst->getParent();
4943	}
4944
4945	// Make sure these checks are done only once, naturally we do the checks
4946	// the first time we get the userparent, this will save compile time.
4947	if (NumUsers == `0`) {
4948	// Try sinking to another block. If that block is unreachable, then do
4949	// not bother. SimplifyCFG should handle it.
4950	if (UserParent == BB \|\| !DT.isReachableFromEntry(A: UserParent))
4951	return std::nullopt;
4952
4953	auto *Term = UserParent->getTerminator();
4954	// See if the user is one of our successors that has only one
4955	// predecessor, so that we don't have to split the critical edge.
4956	// Another option where we can sink is a block that ends with a
4957	// terminator that does not pass control to other block (such as
4958	// return or unreachable or resume). In this case:
4959	// - I dominates the User (by SSA form);
4960	// - the User will be executed at most once.
4961	// So sinking I down to User is always profitable or neutral.
4962	if (UserParent->getUniquePredecessor() != BB && !succ_empty(I: Term))
4963	return std::nullopt;
4964
4965	assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
4966	}
4967
4968	NumUsers++;
4969	}
4970
4971	// No user or only has droppable users.
4972	if (!UserParent)
4973	return std::nullopt;
4974
4975	return UserParent;
4976	};
4977
4978	auto OptBB = getOptionalSinkBlockForInst (I);
4979	if (OptBB) {
4980	auto UserParent = OptBB;
4981	// Okay, the CFG is simple enough, try to sink this instruction.
4982	if (tryToSinkInstruction(I, DestBlock: UserParent)) {
4983	LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << `'\n'`);
4984	MadeIRChange = true;
4985	// We'll add uses of the sunk instruction below, but since
4986	// sinking can expose opportunities for it's operands* add*
4987	// them to the worklist
4988	for (Use &U : I->operands())
4989	if (Instruction *OpI = dyn_cast<Instruction>(Val: U.get()))
4990	Worklist.push(I: OpI);
4991	}
4992	}
4993
4994	// Now that we have an instruction, try combining it to simplify it.
4995	Builder.SetInsertPoint(I);
4996	Builder.CollectMetadataToCopy(
4997	Src: I, MetadataKinds: {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
4998
4999	#ifndef NDEBUG
5000	std::string OrigI;
5001	#endif
5002	LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
5003	LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << `'\n'`);
5004
5005	if (Instruction Result = visit(I&: I)) {
5006	++NumCombined;
5007	// Should we replace the old instruction with a new one?
5008	if (Result != I) {
5009	LLVM_DEBUG(dbgs() << "IC: Old = " << *I << `'\n'`
5010	<< " New = " << *Result << `'\n'`);
5011
5012	Result->copyMetadata(SrcInst: *I,
5013	WL: {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
5014	// Everything uses the new instruction now.
5015	I->replaceAllUsesWith(V: Result);
5016
5017	// Move the name to the new instruction first.
5018	Result->takeName(V: I);
5019
5020	// Insert the new instruction into the basic block...
5021	BasicBlock *InstParent = I->getParent();
5022	BasicBlock::iterator InsertPos = I->getIterator();
5023
5024	// Are we replace a PHI with something that isn't a PHI, or vice versa?
5025	if (isa<PHINode>(Val: Result) != isa<PHINode>(Val: I)) {
5026	// We need to fix up the insertion point.
5027	if (isa<PHINode>(Val: I)) // PHI -> Non-PHI
5028	InsertPos = InstParent->getFirstInsertionPt();
5029	else // Non-PHI -> PHI
5030	InsertPos = InstParent->getFirstNonPHIIt();
5031	}
5032
5033	Result->insertInto(ParentBB: InstParent, It: InsertPos);
5034
5035	// Push the new instruction and any users onto the worklist.
5036	Worklist.pushUsersToWorkList(I&: *Result);
5037	Worklist.push(I: Result);
5038
5039	eraseInstFromFunction(I&: *I);
5040	} else {
5041	LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << `'\n'`
5042	<< " New = " << *I << `'\n'`);
5043
5044	// If the instruction was modified, it's possible that it is now dead.
5045	// if so, remove it.
5046	if (isInstructionTriviallyDead(I, TLI: &TLI)) {
5047	eraseInstFromFunction(I&: *I);
5048	} else {
5049	Worklist.pushUsersToWorkList(I&: *I);
5050	Worklist.push(I);
5051	}
5052	}
5053	MadeIRChange = true;
5054	}
5055	}
5056
5057	Worklist.zap();
5058	return MadeIRChange;
5059	}
5060
5061	// Track the scopes used by !alias.scope and !noalias. In a function, a
5062	// @llvm.experimental.noalias.scope.decl is only useful if that scope is used
5063	// by both sets. If not, the declaration of the scope can be safely omitted.
5064	// The MDNode of the scope can be omitted as well for the instructions that are
5065	// part of this function. We do not do that at this point, as this might become
5066	// too time consuming to do.
5067	class AliasScopeTracker {
5068	SmallPtrSet<const MDNode *, `8`> UsedAliasScopesAndLists;
5069	SmallPtrSet<const MDNode *, `8`> UsedNoAliasScopesAndLists;
5070
5071	public:
5072	void analyse(Instruction *I) {
5073	// This seems to be faster than checking 'mayReadOrWriteMemory()'.
5074	if (!I->hasMetadataOtherThanDebugLoc())
5075	return;
5076
5077	auto Track = [](Metadata ScopeList, auto* &Container) {
5078	const auto *MDScopeList = dyn_cast_or_null<MDNode>(Val: ScopeList);
5079	if (!MDScopeList \|\| !Container.insert(MDScopeList).second)
5080	return;
5081	for (const auto &MDOperand : MDScopeList->operands())
5082	if (auto *MDScope = dyn_cast<MDNode>(Val: MDOperand))
5083	Container.insert(MDScope);
5084	};
5085
5086	Track(I->getMetadata(KindID: LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
5087	Track(I->getMetadata(KindID: LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
5088	}
5089
5090	bool isNoAliasScopeDeclDead(Instruction *Inst) {
5091	NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: Inst);
5092	if (!Decl)
5093	return false;
5094
5095	assert(Decl->use_empty() &&
5096	"llvm.experimental.noalias.scope.decl in use ?");
5097	const MDNode *MDSL = Decl->getScopeList();
5098	assert(MDSL->getNumOperands() == `1` &&
5099	"llvm.experimental.noalias.scope should refer to a single scope");
5100	auto &MDOperand = MDSL->getOperand(I: `0`);
5101	if (auto *MD = dyn_cast<MDNode>(Val: MDOperand))
5102	return !UsedAliasScopesAndLists.contains(Ptr: MD) \|\|
5103	!UsedNoAliasScopesAndLists.contains(Ptr: MD);
5104
5105	// Not an MDNode ? throw away.
5106	return true;
5107	}
5108	};
5109
5110	/// Populate the IC worklist from a function, by walking it in reverse
5111	/// post-order and adding all reachable code to the worklist.
5112	///
5113	/// This has a couple of tricks to make the code faster and more powerful. In
5114	/// particular, we constant fold and DCE instructions as we go, to avoid adding
5115	/// them to the worklist (this significantly speeds up instcombine on code where
5116	/// many instructions are dead or constant). Additionally, if we find a branch
5117	/// whose condition is a known constant, we only visit the reachable successors.
5118	bool InstCombinerImpl::prepareWorklist(
5119	Function &F, ReversePostOrderTraversal<BasicBlock *> &RPOT) {
5120	bool MadeIRChange = false;
5121	SmallPtrSet<BasicBlock *, `32`> LiveBlocks;
5122	SmallVector<Instruction *, `128`> InstrsForInstructionWorklist;
5123	DenseMap<Constant , Constant > FoldedConstants;
5124	AliasScopeTracker SeenAliasScopes;
5125
5126	auto HandleOnlyLiveSuccessor = [&](BasicBlock BB, BasicBlock LiveSucc) {
5127	for (BasicBlock *Succ : successors(BB))
5128	if (Succ != LiveSucc && DeadEdges.insert(V: {BB, Succ}).second)
5129	for (PHINode &PN : Succ->phis())
5130	for (Use &U : PN.incoming_values())
5131	if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(Val: U)) {
5132	U.set(PoisonValue::get(T: PN.getType()));
5133	MadeIRChange = true;
5134	}
5135	};
5136
5137	for (BasicBlock *BB : RPOT) {
5138	if (!BB->isEntryBlock() && all_of(Range: predecessors(BB), P: [&](BasicBlock *Pred) {
5139	return DeadEdges.contains(V: {Pred, BB}) \|\| DT.dominates(A: BB, B: Pred);
5140	})) {
5141	HandleOnlyLiveSuccessor (BB, nullptr);
5142	continue;
5143	}
5144	LiveBlocks.insert(Ptr: BB);
5145
5146	for (Instruction &Inst : llvm::make_early_inc_range(Range&: *BB)) {
5147	// ConstantProp instruction if trivially constant.
5148	if (!Inst.use_empty() &&
5149	(Inst.getNumOperands() == `0` \|\| isa<Constant>(Val: Inst.getOperand(i: `0`))))
5150	if (Constant *C = ConstantFoldInstruction(I: &Inst, DL, TLI: &TLI)) {
5151	LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
5152	<< `'\n'`);
5153	Inst.replaceAllUsesWith(V: C);
5154	++NumConstProp;
5155	if (isInstructionTriviallyDead(I: &Inst, TLI: &TLI))
5156	Inst.eraseFromParent();
5157	MadeIRChange = true;
5158	continue;
5159	}
5160
5161	// See if we can constant fold its operands.
5162	for (Use &U : Inst.operands()) {
5163	if (!isa<ConstantVector>(Val: U) && !isa<ConstantExpr>(Val: U))
5164	continue;
5165
5166	auto *C = cast<Constant>(Val&: U);
5167	Constant *&FoldRes = FoldedConstants [C];
5168	if (!FoldRes)
5169	FoldRes = ConstantFoldConstant(C, DL, TLI: &TLI);
5170
5171	if (FoldRes != C) {
5172	LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
5173	<< "\n Old = " << *C
5174	<< "\n New = " << *FoldRes << `'\n'`);
5175	U = FoldRes;
5176	MadeIRChange = true;
5177	}
5178	}
5179
5180	// Skip processing debug and pseudo intrinsics in InstCombine. Processing
5181	// these call instructions consumes non-trivial amount of time and
5182	// provides no value for the optimization.
5183	if (!Inst.isDebugOrPseudoInst()) {
5184	InstrsForInstructionWorklist.push_back(Elt: &Inst);
5185	SeenAliasScopes.analyse(I: &Inst);
5186	}
5187	}
5188
5189	// If this is a branch or switch on a constant, mark only the single
5190	// live successor. Otherwise assume all successors are live.
5191	Instruction *TI = BB->getTerminator();
5192	if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI); BI && BI->isConditional()) {
5193	if (isa<UndefValue>(Val: BI->getCondition())) {
5194	// Branch on undef is UB.
5195	HandleOnlyLiveSuccessor (BB, nullptr);
5196	continue;
5197	}
5198	if (auto *Cond = dyn_cast<ConstantInt>(Val: BI->getCondition())) {
5199	bool CondVal = Cond->getZExtValue();
5200	HandleOnlyLiveSuccessor (BB, BI->getSuccessor(i: !CondVal));
5201	continue;
5202	}
5203	} else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
5204	if (isa<UndefValue>(Val: SI->getCondition())) {
5205	// Switch on undef is UB.
5206	HandleOnlyLiveSuccessor (BB, nullptr);
5207	continue;
5208	}
5209	if (auto *Cond = dyn_cast<ConstantInt>(Val: SI->getCondition())) {
5210	HandleOnlyLiveSuccessor (BB,
5211	SI->findCaseValue(C: Cond)->getCaseSuccessor());
5212	continue;
5213	}
5214	}
5215	}
5216
5217	// Remove instructions inside unreachable blocks. This prevents the
5218	// instcombine code from having to deal with some bad special cases, and
5219	// reduces use counts of instructions.
5220	for (BasicBlock &BB : F) {
5221	if (LiveBlocks.count(Ptr: &BB))
5222	continue;
5223
5224	unsigned NumDeadInstInBB;
5225	unsigned NumDeadDbgInstInBB;
5226	std::tie(args&: NumDeadInstInBB, args&: NumDeadDbgInstInBB) =
5227	removeAllNonTerminatorAndEHPadInstructions(BB: &BB);
5228
5229	MadeIRChange \|= NumDeadInstInBB + NumDeadDbgInstInBB > `0`;
5230	NumDeadInst += NumDeadInstInBB;
5231	}
5232
5233	// Once we've found all of the instructions to add to instcombine's worklist,
5234	// add them in reverse order. This way instcombine will visit from the top
5235	// of the function down. This jives well with the way that it adds all uses
5236	// of instructions to the worklist after doing a transformation, thus avoiding
5237	// some N^2 behavior in pathological cases.
5238	Worklist.reserve(Size: InstrsForInstructionWorklist.size());
5239	for (Instruction *Inst : reverse(C&: InstrsForInstructionWorklist)) {
5240	// DCE instruction if trivially dead. As we iterate in reverse program
5241	// order here, we will clean up whole chains of dead instructions.
5242	if (isInstructionTriviallyDead(I: Inst, TLI: &TLI) \|\|
5243	SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
5244	++NumDeadInst;
5245	LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << `'\n'`);
5246	salvageDebugInfo(I&: *Inst);
5247	Inst->eraseFromParent();
5248	MadeIRChange = true;
5249	continue;
5250	}
5251
5252	Worklist.push(I: Inst);
5253	}
5254
5255	return MadeIRChange;
5256	}
5257
5258	static bool combineInstructionsOverFunction(
5259	Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA,
5260	AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
5261	DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
5262	BranchProbabilityInfo BPI, ProfileSummaryInfo PSI, LoopInfo *LI,
5263	const InstCombineOptions &Opts) {
5264	auto &DL = F.getParent()->getDataLayout();
5265
5266	/// Builder - This is an IRBuilder that automatically inserts new
5267	/// instructions into the worklist when they are created.
5268	IRBuilder<TargetFolder, IRBuilderCallbackInserter> Builder(
5269	F.getContext(), TargetFolder (DL),
5270	IRBuilderCallbackInserter ([&Worklist, &AC](Instruction *I) {
5271	Worklist.add(I);
5272	if (auto *Assume = dyn_cast<AssumeInst>(Val: I))
5273	AC.registerAssumption(CI: Assume);
5274	}));
5275
5276	ReversePostOrderTraversal<BasicBlock *> RPOT(&F.front());
5277
5278	// Lower dbg.declare intrinsics otherwise their value may be clobbered
5279	// by instcombiner.
5280	bool MadeIRChange = false;
5281	if (ShouldLowerDbgDeclare)
5282	MadeIRChange = LowerDbgDeclare(F);
5283
5284	// Iterate while there is work to do.
5285	unsigned Iteration = `0`;
5286	while (true) {
5287	++Iteration;
5288
5289	if (Iteration > Opts.MaxIterations && !Opts.VerifyFixpoint) {
5290	LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations
5291	<< " on " << F.getName()
5292	<< " reached; stopping without verifying fixpoint\n");
5293	break;
5294	}
5295
5296	++NumWorklistIterations;
5297	LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
5298	<< F.getName() << "\n");
5299
5300	InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
5301	ORE, BFI, BPI, PSI, DL, LI);
5302	IC.MaxArraySizeForCombine = MaxArraySize;
5303	bool MadeChangeInThisIteration = IC.prepareWorklist(F, RPOT);
5304	MadeChangeInThisIteration \|= IC.run();
5305	if (!MadeChangeInThisIteration)
5306	break;
5307
5308	MadeIRChange = true;
5309	if (Iteration > Opts.MaxIterations) {
5310	report_fatal_error(
5311	reason: "Instruction Combining did not reach a fixpoint after " +
5312	Twine (Opts.MaxIterations) + " iterations",
5313	/GenCrashDiag=/gen_crash_diag: false);
5314	}
5315	}
5316
5317	if (Iteration == `1`)
5318	++NumOneIteration;
5319	else if (Iteration == `2`)
5320	++NumTwoIterations;
5321	else if (Iteration == `3`)
5322	++NumThreeIterations;
5323	else
5324	++NumFourOrMoreIterations;
5325
5326	return MadeIRChange;
5327	}
5328
5329	InstCombinePass::InstCombinePass(InstCombineOptions Opts) : Options (Opts) {}
5330
5331	void InstCombinePass::printPipeline(
5332	raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
5333	static_cast<PassInfoMixin<InstCombinePass> >(this*)->printPipeline(
5334	OS, MapClassName2PassName);
5335	OS << `'<'`;
5336	OS << "max-iterations=" << Options.MaxIterations << ";";
5337	OS << (Options.UseLoopInfo ? "" : "no-") << "use-loop-info;";
5338	OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint";
5339	OS << `'>'`;
5340	}
5341
5342	PreservedAnalyses InstCombinePass::run(Function &F,
5343	FunctionAnalysisManager &AM) {
5344	auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F);
5345	auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
5346	auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
5347	auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
5348	auto &TTI = AM.getResult<TargetIRAnalysis>(IR&: F);
5349
5350	// TODO: Only use LoopInfo when the option is set. This requires that the
5351	// callers in the pass pipeline explicitly set the option.
5352	auto *LI = AM.getCachedResult<LoopAnalysis>(IR&: F);
5353	if (!LI && Options.UseLoopInfo)
5354	LI = &AM.getResult<LoopAnalysis>(IR&: F);
5355
5356	auto *AA = &AM.getResult<AAManager>(IR&: F);
5357	auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F);
5358	ProfileSummaryInfo *PSI =
5359	MAMProxy.getCachedResult<ProfileSummaryAnalysis>(IR&: *F.getParent());
5360	auto *BFI = (PSI && PSI->hasProfileSummary()) ?
5361	&AM.getResult<BlockFrequencyAnalysis>(IR&: F) : nullptr;
5362	auto *BPI = AM.getCachedResult<BranchProbabilityAnalysis>(IR&: F);
5363
5364	if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
5365	BFI, BPI, PSI, LI, Opts: Options))
5366	// No changes, all analyses are preserved.
5367	return PreservedAnalyses::all();
5368
5369	// Mark all the analyses that instcombine updates as preserved.
5370	PreservedAnalyses PA;
5371	PA.preserveSet<CFGAnalyses>();
5372	return PA;
5373	}
5374
5375	void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
5376	AU.setPreservesCFG();
5377	AU.addRequired<AAResultsWrapperPass>();
5378	AU.addRequired<AssumptionCacheTracker>();
5379	AU.addRequired<TargetLibraryInfoWrapperPass>();
5380	AU.addRequired<TargetTransformInfoWrapperPass>();
5381	AU.addRequired<DominatorTreeWrapperPass>();
5382	AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
5383	AU.addPreserved<DominatorTreeWrapperPass>();
5384	AU.addPreserved<AAResultsWrapperPass>();
5385	AU.addPreserved<BasicAAWrapperPass>();
5386	AU.addPreserved<GlobalsAAWrapperPass>();
5387	AU.addRequired<ProfileSummaryInfoWrapperPass>();
5388	LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
5389	}
5390
5391	bool InstructionCombiningPass::runOnFunction(Function &F) {
5392	if (skipFunction(F))
5393	return false;
5394
5395	// Required analyses.
5396	auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
5397	auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
5398	auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
5399	auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
5400	auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5401	auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
5402
5403	// Optional analyses.
5404	auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
5405	auto LI = LIWP ? &LIWP->getLoopInfo() : nullptr*;
5406	ProfileSummaryInfo *PSI =
5407	&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
5408	BlockFrequencyInfo *BFI =
5409	(PSI && PSI->hasProfileSummary()) ?
5410	&getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
5411	nullptr;
5412	BranchProbabilityInfo BPI = nullptr*;
5413	if (auto *WrapperPass =
5414	getAnalysisIfAvailable<BranchProbabilityInfoWrapperPass>())
5415	BPI = &WrapperPass->getBPI();
5416
5417	return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
5418	BFI, BPI, PSI, LI,
5419	Opts: InstCombineOptions ());
5420	}
5421
5422	char InstructionCombiningPass::ID = `0`;
5423
5424	InstructionCombiningPass::InstructionCombiningPass() : FunctionPass (ID) {
5425	initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry());
5426	}
5427
5428	INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine",
5429	"Combine redundant instructions", false, false)
5430	INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
5431	INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
5432	INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
5433	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
5434	INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
5435	INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
5436	INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
5437	INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
5438	INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
5439	INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
5440	"Combine redundant instructions", false, false)
5441
5442	// Initialization Routines
5443	void llvm::initializeInstCombine(PassRegistry &Registry) {
5444	initializeInstructionCombiningPassPass(Registry);
5445	}
5446
5447	FunctionPass *llvm::createInstructionCombiningPass() {
5448	return new InstructionCombiningPass ();
5449	}
5450

source code of llvm/lib/Transforms/InstCombine/InstructionCombining.cpp