InstCombineVectorOps.cpp source code [llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp]

1	//===- InstCombineVectorOps.cpp -------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements instcombine for ExtractElement, InsertElement and
10	// ShuffleVector.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "InstCombineInternal.h"
15	#include "llvm/ADT/APInt.h"
16	#include "llvm/ADT/ArrayRef.h"
17	#include "llvm/ADT/DenseMap.h"
18	#include "llvm/ADT/STLExtras.h"
19	#include "llvm/ADT/SmallBitVector.h"
20	#include "llvm/ADT/SmallVector.h"
21	#include "llvm/ADT/Statistic.h"
22	#include "llvm/Analysis/InstructionSimplify.h"
23	#include "llvm/Analysis/VectorUtils.h"
24	#include "llvm/IR/BasicBlock.h"
25	#include "llvm/IR/Constant.h"
26	#include "llvm/IR/Constants.h"
27	#include "llvm/IR/DerivedTypes.h"
28	#include "llvm/IR/InstrTypes.h"
29	#include "llvm/IR/Instruction.h"
30	#include "llvm/IR/Instructions.h"
31	#include "llvm/IR/Operator.h"
32	#include "llvm/IR/PatternMatch.h"
33	#include "llvm/IR/Type.h"
34	#include "llvm/IR/User.h"
35	#include "llvm/IR/Value.h"
36	#include "llvm/Support/Casting.h"
37	#include "llvm/Support/ErrorHandling.h"
38	#include "llvm/Transforms/InstCombine/InstCombiner.h"
39	#include <cassert>
40	#include <cstdint>
41	#include <iterator>
42	#include <utility>
43
44	#define DEBUG_TYPE "instcombine"
45
46	using namespace llvm;
47	using namespace PatternMatch;
48
49	STATISTIC(NumAggregateReconstructionsSimplified,
50	"Number of aggregate reconstructions turned into reuse of the "
51	"original aggregate");
52
53	/// Return true if the value is cheaper to scalarize than it is to leave as a
54	/// vector operation. If the extract index \p EI is a constant integer then
55	/// some operations may be cheap to scalarize.
56	///
57	/// FIXME: It's possible to create more instructions than previously existed.
58	static bool cheapToScalarize(Value V, Value EI) {
59	ConstantInt *CEI = dyn_cast<ConstantInt>(Val: EI);
60
61	// If we can pick a scalar constant value out of a vector, that is free.
62	if (auto *C = dyn_cast<Constant>(Val: V))
63	return CEI \|\| C->getSplatValue();
64
65	if (CEI && match(V, m_Intrinsic<Intrinsic::experimental_stepvector>())) {
66	ElementCount EC = cast<VectorType>(Val: V->getType())->getElementCount();
67	// Index needs to be lower than the minimum size of the vector, because
68	// for scalable vector, the vector size is known at run time.
69	return CEI->getValue().ult(RHS: EC.getKnownMinValue());
70	}
71
72	// An insertelement to the same constant index as our extract will simplify
73	// to the scalar inserted element. An insertelement to a different constant
74	// index is irrelevant to our extract.
75	if (match(V, P: m_InsertElt(Val: m_Value(), Elt: m_Value(), Idx: m_ConstantInt())))
76	return CEI;
77
78	if (match(V, P: m_OneUse(SubPattern: m_Load(Op: m_Value()))))
79	return true;
80
81	if (match(V, P: m_OneUse(SubPattern: m_UnOp())))
82	return true;
83
84	Value V0, V1;
85	if (match(V, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: V0), R: m_Value(V&: V1)))))
86	if (cheapToScalarize(V: V0, EI) \|\| cheapToScalarize(V: V1, EI))
87	return true;
88
89	CmpInst::Predicate UnusedPred;
90	if (match(V, P: m_OneUse(SubPattern: m_Cmp(Pred&: UnusedPred, L: m_Value(V&: V0), R: m_Value(V&: V1)))))
91	if (cheapToScalarize(V: V0, EI) \|\| cheapToScalarize(V: V1, EI))
92	return true;
93
94	return false;
95	}
96
97	// If we have a PHI node with a vector type that is only used to feed
98	// itself and be an operand of extractelement at a constant location,
99	// try to replace the PHI of the vector type with a PHI of a scalar type.
100	Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
101	PHINode *PN) {
102	SmallVector<Instruction *, `2`> Extracts;
103	// The users we want the PHI to have are:
104	// 1) The EI ExtractElement (we already know this)
105	// 2) Possibly more ExtractElements with the same index.
106	// 3) Another operand, which will feed back into the PHI.
107	Instruction PHIUser = nullptr*;
108	for (auto *U : PN->users()) {
109	if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(Val: U)) {
110	if (EI.getIndexOperand() == EU->getIndexOperand())
111	Extracts.push_back(Elt: EU);
112	else
113	return nullptr;
114	} else if (!PHIUser) {
115	PHIUser = cast<Instruction>(Val: U);
116	} else {
117	return nullptr;
118	}
119	}
120
121	if (!PHIUser)
122	return nullptr;
123
124	// Verify that this PHI user has one use, which is the PHI itself,
125	// and that it is a binary operation which is cheap to scalarize.
126	// otherwise return nullptr.
127	if (!PHIUser->hasOneUse() \|\| !(PHIUser->user_back() == PN) \|\|
128	!(isa<BinaryOperator>(Val: PHIUser)) \|\|
129	!cheapToScalarize(V: PHIUser, EI: EI.getIndexOperand()))
130	return nullptr;
131
132	// Create a scalar PHI node that will replace the vector PHI node
133	// just before the current PHI node.
134	PHINode *scalarPHI = cast<PHINode>(Val: InsertNewInstWith(
135	New: PHINode::Create(Ty: EI.getType(), NumReservedValues: PN->getNumIncomingValues(), NameStr: ""), Old: PN->getIterator()));
136	// Scalarize each PHI operand.
137	for (unsigned i = `0`; i < PN->getNumIncomingValues(); i++) {
138	Value *PHIInVal = PN->getIncomingValue(i);
139	BasicBlock *inBB = PN->getIncomingBlock(i);
140	Value *Elt = EI.getIndexOperand();
141	// If the operand is the PHI induction variable:
142	if (PHIInVal == PHIUser) {
143	// Scalarize the binary operation. Its first operand is the
144	// scalar PHI, and the second operand is extracted from the other
145	// vector operand.
146	BinaryOperator *B0 = cast<BinaryOperator>(Val: PHIUser);
147	unsigned opId = (B0->getOperand(i_nocapture: `0`) == PN) ? `1` : `0`;
148	Value *Op = InsertNewInstWith(
149	New: ExtractElementInst::Create(Vec: B0->getOperand(i_nocapture: opId), Idx: Elt,
150	NameStr: B0->getOperand(i_nocapture: opId)->getName() + ".Elt"),
151	Old: B0->getIterator());
152	Value *newPHIUser = InsertNewInstWith(
153	New: BinaryOperator::CreateWithCopiedFlags(Opc: B0->getOpcode(),
154	V1: scalarPHI, V2: Op, CopyO: B0), Old: B0->getIterator());
155	scalarPHI->addIncoming(V: newPHIUser, BB: inBB);
156	} else {
157	// Scalarize PHI input:
158	Instruction *newEI = ExtractElementInst::Create(Vec: PHIInVal, Idx: Elt, NameStr: "");
159	// Insert the new instruction into the predecessor basic block.
160	Instruction *pos = dyn_cast<Instruction>(Val: PHIInVal);
161	BasicBlock::iterator InsertPos;
162	if (pos && !isa<PHINode>(Val: pos)) {
163	InsertPos = ++pos->getIterator();
164	} else {
165	InsertPos = inBB->getFirstInsertionPt();
166	}
167
168	InsertNewInstWith(New: newEI, Old: InsertPos);
169
170	scalarPHI->addIncoming(V: newEI, BB: inBB);
171	}
172	}
173
174	for (auto *E : Extracts) {
175	replaceInstUsesWith(I&: *E, V: scalarPHI);
176	// Add old extract to worklist for DCE.
177	addToWorklist(I: E);
178	}
179
180	return &EI;
181	}
182
183	Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
184	Value *X;
185	uint64_t ExtIndexC;
186	if (!match(V: Ext.getVectorOperand(), P: m_BitCast(Op: m_Value(V&: X))) \|\|
187	!match(V: Ext.getIndexOperand(), P: m_ConstantInt(V&: ExtIndexC)))
188	return nullptr;
189
190	ElementCount NumElts =
191	cast<VectorType>(Val: Ext.getVectorOperandType())->getElementCount();
192	Type *DestTy = Ext.getType();
193	unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
194	bool IsBigEndian = DL.isBigEndian();
195
196	// If we are casting an integer to vector and extracting a portion, that is
197	// a shift-right and truncate.
198	if (X->getType()->isIntegerTy()) {
199	assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
200	"Expected fixed vector type for bitcast from scalar integer");
201
202	// Big endian requires adjusting the extract index since MSB is at index 0.
203	// LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
204	// BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
205	if (IsBigEndian)
206	ExtIndexC = NumElts.getKnownMinValue() - `1` - ExtIndexC;
207	unsigned ShiftAmountC = ExtIndexC * DestWidth;
208	if (!ShiftAmountC \|\|
209	(isDesirableIntType(BitWidth: X->getType()->getPrimitiveSizeInBits()) &&
210	Ext.getVectorOperand()->hasOneUse())) {
211	if (ShiftAmountC)
212	X = Builder.CreateLShr(LHS: X, RHS: ShiftAmountC, Name: "extelt.offset");
213	if (DestTy->isFloatingPointTy()) {
214	Type *DstIntTy = IntegerType::getIntNTy(C&: X->getContext(), N: DestWidth);
215	Value *Trunc = Builder.CreateTrunc(V: X, DestTy: DstIntTy);
216	return new BitCastInst (Trunc, DestTy);
217	}
218	return new TruncInst (X, DestTy);
219	}
220	}
221
222	if (!X->getType()->isVectorTy())
223	return nullptr;
224
225	// If this extractelement is using a bitcast from a vector of the same number
226	// of elements, see if we can find the source element from the source vector:
227	// extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
228	auto *SrcTy = cast<VectorType>(Val: X->getType());
229	ElementCount NumSrcElts = SrcTy->getElementCount();
230	if (NumSrcElts == NumElts)
231	if (Value *Elt = findScalarElement(V: X, EltNo: ExtIndexC))
232	return new BitCastInst (Elt, DestTy);
233
234	assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
235	"Src and Dst must be the same sort of vector type");
236
237	// If the source elements are wider than the destination, try to shift and
238	// truncate a subset of scalar bits of an insert op.
239	if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
240	Value *Scalar;
241	Value *Vec;
242	uint64_t InsIndexC;
243	if (!match(V: X, P: m_InsertElt(Val: m_Value(V&: Vec), Elt: m_Value(V&: Scalar),
244	Idx: m_ConstantInt(V&: InsIndexC))))
245	return nullptr;
246
247	// The extract must be from the subset of vector elements that we inserted
248	// into. Example: if we inserted element 1 of a <2 x i64> and we are
249	// extracting an i16 (narrowing ratio = 4), then this extract must be from 1
250	// of elements 4-7 of the bitcasted vector.
251	unsigned NarrowingRatio =
252	NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
253
254	if (ExtIndexC / NarrowingRatio != InsIndexC) {
255	// Remove insertelement, if we don't use the inserted element.
256	// extractelement (bitcast (insertelement (Vec, b)), a) ->
257	// extractelement (bitcast (Vec), a)
258	// FIXME: this should be removed to SimplifyDemandedVectorElts,
259	// once scale vectors are supported.
260	if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {
261	Value *NewBC = Builder.CreateBitCast(V: Vec, DestTy: Ext.getVectorOperandType());
262	return ExtractElementInst::Create(Vec: NewBC, Idx: Ext.getIndexOperand());
263	}
264	return nullptr;
265	}
266
267	// We are extracting part of the original scalar. How that scalar is
268	// inserted into the vector depends on the endian-ness. Example:
269	// Vector Byte Elt Index: 0 1 2 3 4 5 6 7
270	// +--+--+--+--+--+--+--+--+
271	// inselt <2 x i32> V, <i32> S, 1: \|V0\|V1\|V2\|V3\|S0\|S1\|S2\|S3\|
272	// extelt <4 x i16> V', 3: \| \|S2\|S3\|
273	// +--+--+--+--+--+--+--+--+
274	// If this is little-endian, S2\|S3 are the MSB of the 32-bit 'S' value.
275	// If this is big-endian, S2\|S3 are the LSB of the 32-bit 'S' value.
276	// In this example, we must right-shift little-endian. Big-endian is just a
277	// truncate.
278	unsigned Chunk = ExtIndexC % NarrowingRatio;
279	if (IsBigEndian)
280	Chunk = NarrowingRatio - `1` - Chunk;
281
282	// Bail out if this is an FP vector to FP vector sequence. That would take
283	// more instructions than we started with unless there is no shift, and it
284	// may not be handled as well in the backend.
285	bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
286	bool NeedDestBitcast = DestTy->isFloatingPointTy();
287	if (NeedSrcBitcast && NeedDestBitcast)
288	return nullptr;
289
290	unsigned SrcWidth = SrcTy->getScalarSizeInBits();
291	unsigned ShAmt = Chunk * DestWidth;
292
293	// TODO: This limitation is more strict than necessary. We could sum the
294	// number of new instructions and subtract the number eliminated to know if
295	// we can proceed.
296	if (!X->hasOneUse() \|\| !Ext.getVectorOperand()->hasOneUse())
297	if (NeedSrcBitcast \|\| NeedDestBitcast)
298	return nullptr;
299
300	if (NeedSrcBitcast) {
301	Type *SrcIntTy = IntegerType::getIntNTy(C&: Scalar->getContext(), N: SrcWidth);
302	Scalar = Builder.CreateBitCast(V: Scalar, DestTy: SrcIntTy);
303	}
304
305	if (ShAmt) {
306	// Bail out if we could end with more instructions than we started with.
307	if (!Ext.getVectorOperand()->hasOneUse())
308	return nullptr;
309	Scalar = Builder.CreateLShr(LHS: Scalar, RHS: ShAmt);
310	}
311
312	if (NeedDestBitcast) {
313	Type *DestIntTy = IntegerType::getIntNTy(C&: Scalar->getContext(), N: DestWidth);
314	return new BitCastInst (Builder.CreateTrunc(V: Scalar, DestTy: DestIntTy), DestTy);
315	}
316	return new TruncInst (Scalar, DestTy);
317	}
318
319	return nullptr;
320	}
321
322	/// Find elements of V demanded by UserInstr.
323	static APInt findDemandedEltsBySingleUser(Value V, Instruction UserInstr) {
324	unsigned VWidth = cast<FixedVectorType>(Val: V->getType())->getNumElements();
325
326	// Conservatively assume that all elements are needed.
327	APInt UsedElts(APInt::getAllOnes(numBits: VWidth));
328
329	switch (UserInstr->getOpcode()) {
330	case Instruction::ExtractElement: {
331	ExtractElementInst *EEI = cast<ExtractElementInst>(Val: UserInstr);
332	assert(EEI->getVectorOperand() == V);
333	ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(Val: EEI->getIndexOperand());
334	if (EEIIndexC && EEIIndexC->getValue().ult(RHS: VWidth)) {
335	UsedElts = APInt::getOneBitSet(numBits: VWidth, BitNo: EEIIndexC->getZExtValue());
336	}
337	break;
338	}
339	case Instruction::ShuffleVector: {
340	ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(Val: UserInstr);
341	unsigned MaskNumElts =
342	cast<FixedVectorType>(Val: UserInstr->getType())->getNumElements();
343
344	UsedElts = APInt (VWidth, `0`);
345	for (unsigned i = `0`; i < MaskNumElts; i++) {
346	unsigned MaskVal = Shuffle->getMaskValue(Elt: i);
347	if (MaskVal == -`1u` \|\| MaskVal >= `2` * VWidth)
348	continue;
349	if (Shuffle->getOperand(i_nocapture: `0`) == V && (MaskVal < VWidth))
350	UsedElts.setBit(MaskVal);
351	if (Shuffle->getOperand(i_nocapture: `1`) == V &&
352	((MaskVal >= VWidth) && (MaskVal < `2` * VWidth)))
353	UsedElts.setBit(MaskVal - VWidth);
354	}
355	break;
356	}
357	default:
358	break;
359	}
360	return UsedElts;
361	}
362
363	/// Find union of elements of V demanded by all its users.
364	/// If it is known by querying findDemandedEltsBySingleUser that
365	/// no user demands an element of V, then the corresponding bit
366	/// remains unset in the returned value.
367	static APInt findDemandedEltsByAllUsers(Value *V) {
368	unsigned VWidth = cast<FixedVectorType>(Val: V->getType())->getNumElements();
369
370	APInt UnionUsedElts(VWidth, `0`);
371	for (const Use &U : V->uses()) {
372	if (Instruction *I = dyn_cast<Instruction>(Val: U.getUser())) {
373	UnionUsedElts \|= findDemandedEltsBySingleUser(V, UserInstr: I);
374	} else {
375	UnionUsedElts = APInt::getAllOnes(numBits: VWidth);
376	break;
377	}
378
379	if (UnionUsedElts.isAllOnes())
380	break;
381	}
382
383	return UnionUsedElts;
384	}
385
386	/// Given a constant index for a extractelement or insertelement instruction,
387	/// return it with the canonical type if it isn't already canonical. We
388	/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
389	/// matter, we just want a consistent type to simplify CSE.
390	static ConstantInt getPreferredVectorIndex(ConstantInt IndexC) {
391	const unsigned IndexBW = IndexC->getBitWidth();
392	if (IndexBW == `64` \|\| IndexC->getValue().getActiveBits() > `64`)
393	return nullptr;
394	return ConstantInt::get(Context&: IndexC->getContext(),
395	V: IndexC->getValue().zextOrTrunc(width: `64`));
396	}
397
398	Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
399	Value *SrcVec = EI.getVectorOperand();
400	Value *Index = EI.getIndexOperand();
401	if (Value *V = simplifyExtractElementInst(Vec: SrcVec, Idx: Index,
402	Q: SQ.getWithInstruction(I: &EI)))
403	return replaceInstUsesWith(I&: EI, V);
404
405	// extractelt (select %x, %vec1, %vec2), %const ->
406	// select %x, %vec1[%const], %vec2[%const]
407	// TODO: Support constant folding of multiple select operands:
408	// extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)
409	// If the extractelement will for instance try to do out of bounds accesses
410	// because of the values of %c1 and/or %c2, the sequence could be optimized
411	// early. This is currently not possible because constant folding will reach
412	// an unreachable assertion if it doesn't find a constant operand.
413	if (SelectInst *SI = dyn_cast<SelectInst>(Val: EI.getVectorOperand()))
414	if (SI->getCondition()->getType()->isIntegerTy() &&
415	isa<Constant>(Val: EI.getIndexOperand()))
416	if (Instruction *R = FoldOpIntoSelect(Op&: EI, SI))
417	return R;
418
419	// If extracting a specified index from the vector, see if we can recursively
420	// find a previously computed scalar that was inserted into the vector.
421	auto *IndexC = dyn_cast<ConstantInt>(Val: Index);
422	if (IndexC) {
423	// Canonicalize type of constant indices to i64 to simplify CSE
424	if (auto *NewIdx = getPreferredVectorIndex(IndexC))
425	return replaceOperand(I&: EI, OpNum: `1`, V: NewIdx);
426
427	ElementCount EC = EI.getVectorOperandType()->getElementCount();
428	unsigned NumElts = EC.getKnownMinValue();
429
430	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: SrcVec)) {
431	Intrinsic::ID IID = II->getIntrinsicID();
432	// Index needs to be lower than the minimum size of the vector, because
433	// for scalable vector, the vector size is known at run time.
434	if (IID == Intrinsic::experimental_stepvector &&
435	IndexC->getValue().ult(RHS: NumElts)) {
436	Type *Ty = EI.getType();
437	unsigned BitWidth = Ty->getIntegerBitWidth();
438	Value *Idx;
439	// Return index when its value does not exceed the allowed limit
440	// for the element type of the vector, otherwise return undefined.
441	if (IndexC->getValue().getActiveBits() <= BitWidth)
442	Idx = ConstantInt::get(Ty, V: IndexC->getValue().zextOrTrunc(width: BitWidth));
443	else
444	Idx = PoisonValue::get(T: Ty);
445	return replaceInstUsesWith(I&: EI, V: Idx);
446	}
447	}
448
449	// InstSimplify should handle cases where the index is invalid.
450	// For fixed-length vector, it's invalid to extract out-of-range element.
451	if (!EC.isScalable() && IndexC->getValue().uge(RHS: NumElts))
452	return nullptr;
453
454	if (Instruction *I = foldBitcastExtElt(Ext&: EI))
455	return I;
456
457	// If there's a vector PHI feeding a scalar use through this extractelement
458	// instruction, try to scalarize the PHI.
459	if (auto *Phi = dyn_cast<PHINode>(Val: SrcVec))
460	if (Instruction *ScalarPHI = scalarizePHI(EI, PN: Phi))
461	return ScalarPHI;
462	}
463
464	// TODO come up with a n-ary matcher that subsumes both unary and
465	// binary matchers.
466	UnaryOperator *UO;
467	if (match(V: SrcVec, P: m_UnOp(I&: UO)) && cheapToScalarize(V: SrcVec, EI: Index)) {
468	// extelt (unop X), Index --> unop (extelt X, Index)
469	Value *X = UO->getOperand(i_nocapture: `0`);
470	Value *E = Builder.CreateExtractElement(Vec: X, Idx: Index);
471	return UnaryOperator::CreateWithCopiedFlags(Opc: UO->getOpcode(), V: E, CopyO: UO);
472	}
473
474	BinaryOperator *BO;
475	if (match(V: SrcVec, P: m_BinOp(I&: BO)) && cheapToScalarize(V: SrcVec, EI: Index)) {
476	// extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
477	Value X = BO->getOperand(i_nocapture: `0`), Y = BO->getOperand(i_nocapture: `1`);
478	Value *E0 = Builder.CreateExtractElement(Vec: X, Idx: Index);
479	Value *E1 = Builder.CreateExtractElement(Vec: Y, Idx: Index);
480	return BinaryOperator::CreateWithCopiedFlags(Opc: BO->getOpcode(), V1: E0, V2: E1, CopyO: BO);
481	}
482
483	Value X, Y;
484	CmpInst::Predicate Pred;
485	if (match(V: SrcVec, P: m_Cmp(Pred, L: m_Value(V&: X), R: m_Value(V&: Y))) &&
486	cheapToScalarize(V: SrcVec, EI: Index)) {
487	// extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
488	Value *E0 = Builder.CreateExtractElement(Vec: X, Idx: Index);
489	Value *E1 = Builder.CreateExtractElement(Vec: Y, Idx: Index);
490	CmpInst *SrcCmpInst = cast<CmpInst>(Val: SrcVec);
491	return CmpInst::CreateWithCopiedFlags(Op: SrcCmpInst->getOpcode(), Pred, S1: E0, S2: E1,
492	FlagsSource: SrcCmpInst);
493	}
494
495	if (auto *I = dyn_cast<Instruction>(Val: SrcVec)) {
496	if (auto *IE = dyn_cast<InsertElementInst>(Val: I)) {
497	// instsimplify already handled the case where the indices are constants
498	// and equal by value, if both are constants, they must not be the same
499	// value, extract from the pre-inserted value instead.
500	if (isa<Constant>(Val: IE->getOperand(i_nocapture: `2`)) && IndexC)
501	return replaceOperand(I&: EI, OpNum: `0`, V: IE->getOperand(i_nocapture: `0`));
502	} else if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: I)) {
503	auto *VecType = cast<VectorType>(Val: GEP->getType());
504	ElementCount EC = VecType->getElementCount();
505	uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : `0`;
506	if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
507	// Find out why we have a vector result - these are a few examples:
508	// 1. We have a scalar pointer and a vector of indices, or
509	// 2. We have a vector of pointers and a scalar index, or
510	// 3. We have a vector of pointers and a vector of indices, etc.
511	// Here we only consider combining when there is exactly one vector
512	// operand, since the optimization is less obviously a win due to
513	// needing more than one extractelements.
514
515	unsigned VectorOps =
516	llvm::count_if(Range: GEP->operands(), P: [](const Value *V) {
517	return isa<VectorType>(Val: V->getType());
518	});
519	if (VectorOps == `1`) {
520	Value *NewPtr = GEP->getPointerOperand();
521	if (isa<VectorType>(Val: NewPtr->getType()))
522	NewPtr = Builder.CreateExtractElement(Vec: NewPtr, Idx: IndexC);
523
524	SmallVector<Value *> NewOps;
525	for (unsigned I = `1`; I != GEP->getNumOperands(); ++I) {
526	Value *Op = GEP->getOperand(i_nocapture: I);
527	if (isa<VectorType>(Val: Op->getType()))
528	NewOps.push_back(Elt: Builder.CreateExtractElement(Vec: Op, Idx: IndexC));
529	else
530	NewOps.push_back(Elt: Op);
531	}
532
533	GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
534	PointeeType: GEP->getSourceElementType(), Ptr: NewPtr, IdxList: NewOps);
535	NewGEP->setIsInBounds(GEP->isInBounds());
536	return NewGEP;
537	}
538	}
539	} else if (auto *SVI = dyn_cast<ShuffleVectorInst>(Val: I)) {
540	// If this is extracting an element from a shufflevector, figure out where
541	// it came from and extract from the appropriate input element instead.
542	// Restrict the following transformation to fixed-length vector.
543	if (isa<FixedVectorType>(Val: SVI->getType()) && isa<ConstantInt>(Val: Index)) {
544	int SrcIdx =
545	SVI->getMaskValue(Elt: cast<ConstantInt>(Val: Index)->getZExtValue());
546	Value *Src;
547	unsigned LHSWidth = cast<FixedVectorType>(Val: SVI->getOperand(i_nocapture: `0`)->getType())
548	->getNumElements();
549
550	if (SrcIdx < `0`)
551	return replaceInstUsesWith(I&: EI, V: PoisonValue::get(T: EI.getType()));
552	if (SrcIdx < (int)LHSWidth)
553	Src = SVI->getOperand(i_nocapture: `0`);
554	else {
555	SrcIdx -= LHSWidth;
556	Src = SVI->getOperand(i_nocapture: `1`);
557	}
558	Type *Int64Ty = Type::getInt64Ty(C&: EI.getContext());
559	return ExtractElementInst::Create(
560	Vec: Src, Idx: ConstantInt::get(Ty: Int64Ty, V: SrcIdx, IsSigned: false));
561	}
562	} else if (auto *CI = dyn_cast<CastInst>(Val: I)) {
563	// Canonicalize extractelement(cast) -> cast(extractelement).
564	// Bitcasts can change the number of vector elements, and they cost
565	// nothing.
566	if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
567	Value *EE = Builder.CreateExtractElement(Vec: CI->getOperand(i_nocapture: `0`), Idx: Index);
568	return CastInst::Create(CI->getOpcode(), S: EE, Ty: EI.getType());
569	}
570	}
571	}
572
573	// Run demanded elements after other transforms as this can drop flags on
574	// binops. If there's two paths to the same final result, we prefer the
575	// one which doesn't force us to drop flags.
576	if (IndexC) {
577	ElementCount EC = EI.getVectorOperandType()->getElementCount();
578	unsigned NumElts = EC.getKnownMinValue();
579	// This instruction only demands the single element from the input vector.
580	// Skip for scalable type, the number of elements is unknown at
581	// compile-time.
582	if (!EC.isScalable() && NumElts != `1`) {
583	// If the input vector has a single use, simplify it based on this use
584	// property.
585	if (SrcVec->hasOneUse()) {
586	APInt PoisonElts(NumElts, `0`);
587	APInt DemandedElts(NumElts, `0`);
588	DemandedElts.setBit(IndexC->getZExtValue());
589	if (Value *V =
590	SimplifyDemandedVectorElts(V: SrcVec, DemandedElts, PoisonElts))
591	return replaceOperand(I&: EI, OpNum: `0`, V);
592	} else {
593	// If the input vector has multiple uses, simplify it based on a union
594	// of all elements used.
595	APInt DemandedElts = findDemandedEltsByAllUsers(V: SrcVec);
596	if (!DemandedElts.isAllOnes()) {
597	APInt PoisonElts(NumElts, `0`);
598	if (Value *V = SimplifyDemandedVectorElts(
599	V: SrcVec, DemandedElts, PoisonElts, Depth: `0` / Depth /,
600	AllowMultipleUsers: true / AllowMultipleUsers /)) {
601	if (V != SrcVec) {
602	Worklist.addValue(V: SrcVec);
603	SrcVec->replaceAllUsesWith(V);
604	return &EI;
605	}
606	}
607	}
608	}
609	}
610	}
611	return nullptr;
612	}
613
614	/// If V is a shuffle of values that ONLY returns elements from either LHS or
615	/// RHS, return the shuffle mask and true. Otherwise, return false.
616	static bool collectSingleShuffleElements(Value V, Value LHS, Value *RHS,
617	SmallVectorImpl<int> &Mask) {
618	assert(LHS->getType() == RHS->getType() &&
619	"Invalid CollectSingleShuffleElements");
620	unsigned NumElts = cast<FixedVectorType>(Val: V->getType())->getNumElements();
621
622	if (match(V, P: m_Undef())) {
623	Mask.assign(NumElts, Elt: -`1`);
624	return true;
625	}
626
627	if (V == LHS) {
628	for (unsigned i = `0`; i != NumElts; ++i)
629	Mask.push_back(Elt: i);
630	return true;
631	}
632
633	if (V == RHS) {
634	for (unsigned i = `0`; i != NumElts; ++i)
635	Mask.push_back(Elt: i + NumElts);
636	return true;
637	}
638
639	if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(Val: V)) {
640	// If this is an insert of an extract from some other vector, include it.
641	Value *VecOp = IEI->getOperand(i_nocapture: `0`);
642	Value *ScalarOp = IEI->getOperand(i_nocapture: `1`);
643	Value *IdxOp = IEI->getOperand(i_nocapture: `2`);
644
645	if (!isa<ConstantInt>(Val: IdxOp))
646	return false;
647	unsigned InsertedIdx = cast<ConstantInt>(Val: IdxOp)->getZExtValue();
648
649	if (isa<PoisonValue>(Val: ScalarOp)) { // inserting poison into vector.
650	// We can handle this if the vector we are inserting into is
651	// transitively ok.
652	if (collectSingleShuffleElements(V: VecOp, LHS, RHS, Mask)) {
653	// If so, update the mask to reflect the inserted poison.
654	Mask [InsertedIdx] = -`1`;
655	return true;
656	}
657	} else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(Val: ScalarOp)){
658	if (isa<ConstantInt>(Val: EI->getOperand(i_nocapture: `1`))) {
659	unsigned ExtractedIdx =
660	cast<ConstantInt>(Val: EI->getOperand(i_nocapture: `1`))->getZExtValue();
661	unsigned NumLHSElts =
662	cast<FixedVectorType>(Val: LHS->getType())->getNumElements();
663
664	// This must be extracting from either LHS or RHS.
665	if (EI->getOperand(i_nocapture: `0`) == LHS \|\| EI->getOperand(i_nocapture: `0`) == RHS) {
666	// We can handle this if the vector we are inserting into is
667	// transitively ok.
668	if (collectSingleShuffleElements(V: VecOp, LHS, RHS, Mask)) {
669	// If so, update the mask to reflect the inserted value.
670	if (EI->getOperand(i_nocapture: `0`) == LHS) {
671	Mask [InsertedIdx % NumElts] = ExtractedIdx;
672	} else {
673	assert(EI->getOperand(`0`) == RHS);
674	Mask [InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
675	}
676	return true;
677	}
678	}
679	}
680	}
681	}
682
683	return false;
684	}
685
686	/// If we have insertion into a vector that is wider than the vector that we
687	/// are extracting from, try to widen the source vector to allow a single
688	/// shufflevector to replace one or more insert/extract pairs.
689	static bool replaceExtractElements(InsertElementInst *InsElt,
690	ExtractElementInst *ExtElt,
691	InstCombinerImpl &IC) {
692	auto *InsVecType = cast<FixedVectorType>(Val: InsElt->getType());
693	auto *ExtVecType = cast<FixedVectorType>(Val: ExtElt->getVectorOperandType());
694	unsigned NumInsElts = InsVecType->getNumElements();
695	unsigned NumExtElts = ExtVecType->getNumElements();
696
697	// The inserted-to vector must be wider than the extracted-from vector.
698	if (InsVecType->getElementType() != ExtVecType->getElementType() \|\|
699	NumExtElts >= NumInsElts)
700	return false;
701
702	// Create a shuffle mask to widen the extended-from vector using poison
703	// values. The mask selects all of the values of the original vector followed
704	// by as many poison values as needed to create a vector of the same length
705	// as the inserted-to vector.
706	SmallVector<int, `16`> ExtendMask;
707	for (unsigned i = `0`; i < NumExtElts; ++i)
708	ExtendMask.push_back(Elt: i);
709	for (unsigned i = NumExtElts; i < NumInsElts; ++i)
710	ExtendMask.push_back(Elt: -`1`);
711
712	Value *ExtVecOp = ExtElt->getVectorOperand();
713	auto *ExtVecOpInst = dyn_cast<Instruction>(Val: ExtVecOp);
714	BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(Val: ExtVecOpInst))
715	? ExtVecOpInst->getParent()
716	: ExtElt->getParent();
717
718	// TODO: This restriction matches the basic block check below when creating
719	// new extractelement instructions. If that limitation is removed, this one
720	// could also be removed. But for now, we just bail out to ensure that we
721	// will replace the extractelement instruction that is feeding our
722	// insertelement instruction. This allows the insertelement to then be
723	// replaced by a shufflevector. If the insertelement is not replaced, we can
724	// induce infinite looping because there's an optimization for extractelement
725	// that will delete our widening shuffle. This would trigger another attempt
726	// here to create that shuffle, and we spin forever.
727	if (InsertionBlock != InsElt->getParent())
728	return false;
729
730	// TODO: This restriction matches the check in visitInsertElementInst() and
731	// prevents an infinite loop caused by not turning the extract/insert pair
732	// into a shuffle. We really should not need either check, but we're lacking
733	// folds for shufflevectors because we're afraid to generate shuffle masks
734	// that the backend can't handle.
735	if (InsElt->hasOneUse() && isa<InsertElementInst>(Val: InsElt->user_back()))
736	return false;
737
738	auto WideVec = new* ShuffleVectorInst (ExtVecOp, ExtendMask);
739
740	// Insert the new shuffle after the vector operand of the extract is defined
741	// (as long as it's not a PHI) or at the start of the basic block of the
742	// extract, so any subsequent extracts in the same basic block can use it.
743	// TODO: Insert before the earliest ExtractElementInst that is replaced.
744	if (ExtVecOpInst && !isa<PHINode>(Val: ExtVecOpInst))
745	WideVec->insertAfter(InsertPos: ExtVecOpInst);
746	else
747	IC.InsertNewInstWith(New: WideVec, Old: ExtElt->getParent()->getFirstInsertionPt());
748
749	// Replace extracts from the original narrow vector with extracts from the new
750	// wide vector.
751	for (User *U : ExtVecOp->users()) {
752	ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(Val: U);
753	if (!OldExt \|\| OldExt->getParent() != WideVec->getParent())
754	continue;
755	auto *NewExt = ExtractElementInst::Create(Vec: WideVec, Idx: OldExt->getOperand(i_nocapture: `1`));
756	IC.InsertNewInstWith(New: NewExt, Old: OldExt->getIterator());
757	IC.replaceInstUsesWith(I&: *OldExt, V: NewExt);
758	// Add the old extracts to the worklist for DCE. We can't remove the
759	// extracts directly, because they may still be used by the calling code.
760	IC.addToWorklist(I: OldExt);
761	}
762
763	return true;
764	}
765
766	/// We are building a shuffle to create V, which is a sequence of insertelement,
767	/// extractelement pairs. If PermittedRHS is set, then we must either use it or
768	/// not rely on the second vector source. Return a std::pair containing the
769	/// left and right vectors of the proposed shuffle (or 0), and set the Mask
770	/// parameter as required.
771	///
772	/// Note: we intentionally don't try to fold earlier shuffles since they have
773	/// often been chosen carefully to be efficiently implementable on the target.
774	using ShuffleOps = std::pair<Value , Value >;
775
776	static ShuffleOps collectShuffleElements(Value V, SmallVectorImpl<int*> &Mask,
777	Value *PermittedRHS,
778	InstCombinerImpl &IC, bool &Rerun) {
779	assert(V->getType()->isVectorTy() && "Invalid shuffle!");
780	unsigned NumElts = cast<FixedVectorType>(Val: V->getType())->getNumElements();
781
782	if (match(V, P: m_Poison())) {
783	Mask.assign(NumElts, Elt: -`1`);
784	return std::make_pair(
785	x: PermittedRHS ? PoisonValue::get(T: PermittedRHS->getType()) : V, y: nullptr);
786	}
787
788	if (isa<ConstantAggregateZero>(Val: V)) {
789	Mask.assign(NumElts, Elt: `0`);
790	return std::make_pair(x&: V, y: nullptr);
791	}
792
793	if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(Val: V)) {
794	// If this is an insert of an extract from some other vector, include it.
795	Value *VecOp = IEI->getOperand(i_nocapture: `0`);
796	Value *ScalarOp = IEI->getOperand(i_nocapture: `1`);
797	Value *IdxOp = IEI->getOperand(i_nocapture: `2`);
798
799	if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(Val: ScalarOp)) {
800	if (isa<ConstantInt>(Val: EI->getOperand(i_nocapture: `1`)) && isa<ConstantInt>(Val: IdxOp)) {
801	unsigned ExtractedIdx =
802	cast<ConstantInt>(Val: EI->getOperand(i_nocapture: `1`))->getZExtValue();
803	unsigned InsertedIdx = cast<ConstantInt>(Val: IdxOp)->getZExtValue();
804
805	// Either the extracted from or inserted into vector must be RHSVec,
806	// otherwise we'd end up with a shuffle of three inputs.
807	if (EI->getOperand(i_nocapture: `0`) == PermittedRHS \|\| PermittedRHS == nullptr) {
808	Value *RHS = EI->getOperand(i_nocapture: `0`);
809	ShuffleOps LR = collectShuffleElements(V: VecOp, Mask, PermittedRHS: RHS, IC, Rerun);
810	assert(LR.second == nullptr \|\| LR.second == RHS);
811
812	if (LR.first->getType() != RHS->getType()) {
813	// Although we are giving up for now, see if we can create extracts
814	// that match the inserts for another round of combining.
815	if (replaceExtractElements(InsElt: IEI, ExtElt: EI, IC))
816	Rerun = true;
817
818	// We tried our best, but we can't find anything compatible with RHS
819	// further up the chain. Return a trivial shuffle.
820	for (unsigned i = `0`; i < NumElts; ++i)
821	Mask [i] = i;
822	return std::make_pair(x&: V, y: nullptr);
823	}
824
825	unsigned NumLHSElts =
826	cast<FixedVectorType>(Val: RHS->getType())->getNumElements();
827	Mask [InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
828	return std::make_pair(x&: LR.first, y&: RHS);
829	}
830
831	if (VecOp == PermittedRHS) {
832	// We've gone as far as we can: anything on the other side of the
833	// extractelement will already have been converted into a shuffle.
834	unsigned NumLHSElts =
835	cast<FixedVectorType>(Val: EI->getOperand(i_nocapture: `0`)->getType())
836	->getNumElements();
837	for (unsigned i = `0`; i != NumElts; ++i)
838	Mask.push_back(Elt: i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
839	return std::make_pair(x: EI->getOperand(i_nocapture: `0`), y&: PermittedRHS);
840	}
841
842	// If this insertelement is a chain that comes from exactly these two
843	// vectors, return the vector and the effective shuffle.
844	if (EI->getOperand(i_nocapture: `0`)->getType() == PermittedRHS->getType() &&
845	collectSingleShuffleElements(V: IEI, LHS: EI->getOperand(i_nocapture: `0`), RHS: PermittedRHS,
846	Mask))
847	return std::make_pair(x: EI->getOperand(i_nocapture: `0`), y&: PermittedRHS);
848	}
849	}
850	}
851
852	// Otherwise, we can't do anything fancy. Return an identity vector.
853	for (unsigned i = `0`; i != NumElts; ++i)
854	Mask.push_back(Elt: i);
855	return std::make_pair(x&: V, y: nullptr);
856	}
857
858	/// Look for chain of insertvalue's that fully define an aggregate, and trace
859	/// back the values inserted, see if they are all were extractvalue'd from
860	/// the same source aggregate from the exact same element indexes.
861	/// If they were, just reuse the source aggregate.
862	/// This potentially deals with PHI indirections.
863	Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
864	InsertValueInst &OrigIVI) {
865	Type *AggTy = OrigIVI.getType();
866	unsigned NumAggElts;
867	switch (AggTy->getTypeID()) {
868	case Type::StructTyID:
869	NumAggElts = AggTy->getStructNumElements();
870	break;
871	case Type::ArrayTyID:
872	NumAggElts = AggTy->getArrayNumElements();
873	break;
874	default:
875	llvm_unreachable("Unhandled aggregate type?");
876	}
877
878	// Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
879	// to handle clang C++ exception struct (which is hardcoded as {i8, i32}),*
880	// FIXME: any interesting patterns to be caught with larger limit?
881	assert(NumAggElts > `0` && "Aggregate should have elements.");
882	if (NumAggElts > `2`)
883	return nullptr;
884
885	static constexpr auto NotFound = std::nullopt;
886	static constexpr auto FoundMismatch = nullptr;
887
888	// Try to find a value of each element of an aggregate.
889	// FIXME: deal with more complex, not one-dimensional, aggregate types
890	SmallVector<std::optional<Instruction *>, `2`> AggElts(NumAggElts, NotFound);
891
892	// Do we know values for each element of the aggregate?
893	auto KnowAllElts = [&AggElts]() {
894	return !llvm::is_contained(Range&: AggElts, Element: NotFound);
895	};
896
897	int Depth = `0`;
898
899	// Arbitrary `insertvalue` visitation depth limit. Let's be okay with
900	// every element being overwritten twice, which should never happen.
901	static const int DepthLimit = `2` * NumAggElts;
902
903	// Recurse up the chain of `insertvalue` aggregate operands until either we've
904	// reconstructed full initializer or can't visit any more `insertvalue`'s.
905	for (InsertValueInst *CurrIVI = &OrigIVI;
906	Depth < DepthLimit && CurrIVI && !KnowAllElts ();
907	CurrIVI = dyn_cast<InsertValueInst>(Val: CurrIVI->getAggregateOperand()),
908	++Depth) {
909	auto *InsertedValue =
910	dyn_cast<Instruction>(Val: CurrIVI->getInsertedValueOperand());
911	if (!InsertedValue)
912	return nullptr; // Inserted value must be produced by an instruction.
913
914	ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
915
916	// Don't bother with more than single-level aggregates.
917	if (Indices.size() != `1`)
918	return nullptr; // FIXME: deal with more complex aggregates?
919
920	// Now, we may have already previously recorded the value for this element
921	// of an aggregate. If we did, that means the CurrIVI will later be
922	// overwritten with the already-recorded value. But if not, let's record it!
923	std::optional<Instruction *> &Elt = AggElts [Indices.front()];
924	Elt = Elt.value_or(u&: InsertedValue);
925
926	// FIXME: should we handle chain-terminating undef base operand?
927	}
928
929	// Was that sufficient to deduce the full initializer for the aggregate?
930	if (!KnowAllElts ())
931	return nullptr; // Give up then.
932
933	// We now want to find the source[s] of the aggregate elements we've found.
934	// And with "source" we mean the original aggregate[s] from which
935	// the inserted elements were extracted. This may require PHI translation.
936
937	enum class AggregateDescription {
938	/// When analyzing the value that was inserted into an aggregate, we did
939	/// not manage to find defining `extractvalue` instruction to analyze.
940	NotFound,
941	/// When analyzing the value that was inserted into an aggregate, we did
942	/// manage to find defining `extractvalue` instruction[s], and everything
943	/// matched perfectly - aggregate type, element insertion/extraction index.
944	Found,
945	/// When analyzing the value that was inserted into an aggregate, we did
946	/// manage to find defining `extractvalue` instruction, but there was
947	/// a mismatch: either the source type from which the extraction was didn't
948	/// match the aggregate type into which the insertion was,
949	/// or the extraction/insertion channels mismatched,
950	/// or different elements had different source aggregates.
951	FoundMismatch
952	};
953	auto Describe = [](std::optional<Value *> SourceAggregate) {
954	if (SourceAggregate == NotFound)
955	return AggregateDescription::NotFound;
956	if (*SourceAggregate == FoundMismatch)
957	return AggregateDescription::FoundMismatch;
958	return AggregateDescription::Found;
959	};
960
961	// Given the value \p Elt that was being inserted into element \p EltIdx of an
962	// aggregate AggTy, see if \p Elt was originally defined by an
963	// appropriate extractvalue (same element index, same aggregate type).
964	// If found, return the source aggregate from which the extraction was.
965	// If \p PredBB is provided, does PHI translation of an \p Elt first.
966	auto FindSourceAggregate =
967	[&](Instruction Elt, unsigned* EltIdx, std::optional<BasicBlock *> UseBB,
968	std::optional<BasicBlock > PredBB) -> std::optional<Value > {
969	// For now(?), only deal with, at most, a single level of PHI indirection.
970	if (UseBB && PredBB)
971	Elt = dyn_cast<Instruction>(Val: Elt->DoPHITranslation(CurBB: UseBB, PredBB: PredBB));
972	// FIXME: deal with multiple levels of PHI indirection?
973
974	// Did we find an extraction?
975	auto *EVI = dyn_cast_or_null<ExtractValueInst>(Val: Elt);
976	if (!EVI)
977	return NotFound;
978
979	Value *SourceAggregate = EVI->getAggregateOperand();
980
981	// Is the extraction from the same type into which the insertion was?
982	if (SourceAggregate->getType() != AggTy)
983	return FoundMismatch;
984	// And the element index doesn't change between extraction and insertion?
985	if (EVI->getNumIndices() != `1` \|\| EltIdx != EVI->getIndices().front())
986	return FoundMismatch;
987
988	return SourceAggregate; // AggregateDescription::Found
989	};
990
991	// Given elements AggElts that were constructing an aggregate OrigIVI,
992	// see if we can find appropriate source aggregate for each of the elements,
993	// and see it's the same aggregate for each element. If so, return it.
994	auto FindCommonSourceAggregate =
995	[&](std::optional<BasicBlock *> UseBB,
996	std::optional<BasicBlock > PredBB) -> std::optional<Value > {
997	std::optional<Value *> SourceAggregate;
998
999	for (auto I : enumerate(First&: AggElts)) {
1000	assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
1001	"We don't store nullptr in SourceAggregate!");
1002	assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
1003	(I.index() != `0`) &&
1004	"SourceAggregate should be valid after the first element,");
1005
1006	// For this element, is there a plausible source aggregate?
1007	// FIXME: we could special-case undef element, IFF we know that in the
1008	// source aggregate said element isn't poison.
1009	std::optional<Value *> SourceAggregateForElement =
1010	FindSourceAggregate (*I.value(), I.index(), UseBB, PredBB);
1011
1012	// Okay, what have we found? Does that correlate with previous findings?
1013
1014	// Regardless of whether or not we have previously found source
1015	// aggregate for previous elements (if any), if we didn't find one for
1016	// this element, passthrough whatever we have just found.
1017	if (Describe (SourceAggregateForElement) != AggregateDescription::Found)
1018	return SourceAggregateForElement;
1019
1020	// Okay, we have found source aggregate for this element.
1021	// Let's see what we already know from previous elements, if any.
1022	switch (Describe (SourceAggregate)) {
1023	case AggregateDescription::NotFound:
1024	// This is apparently the first element that we have examined.
1025	SourceAggregate = SourceAggregateForElement; // Record the aggregate!
1026	continue; // Great, now look at next element.
1027	case AggregateDescription::Found:
1028	// We have previously already successfully examined other elements.
1029	// Is this the same source aggregate we've found for other elements?
1030	if (SourceAggregateForElement != SourceAggregate)
1031	return FoundMismatch;
1032	continue; // Still the same aggregate, look at next element.
1033	case AggregateDescription::FoundMismatch:
1034	llvm_unreachable("Can't happen. We would have early-exited then.");
1035	};
1036	}
1037
1038	assert(Describe(SourceAggregate) == AggregateDescription::Found &&
1039	"Must be a valid Value");
1040	return *SourceAggregate;
1041	};
1042
1043	std::optional<Value *> SourceAggregate;
1044
1045	// Can we find the source aggregate without looking at predecessors?
1046	SourceAggregate = FindCommonSourceAggregate (/UseBB=/std::nullopt,
1047	/PredBB=/std::nullopt);
1048	if (Describe (SourceAggregate) != AggregateDescription::NotFound) {
1049	if (Describe (SourceAggregate) == AggregateDescription::FoundMismatch)
1050	return nullptr; // Conflicting source aggregates!
1051	++NumAggregateReconstructionsSimplified;
1052	return replaceInstUsesWith(I&: OrigIVI, V: *SourceAggregate);
1053	}
1054
1055	// Okay, apparently we need to look at predecessors.
1056
1057	// We should be smart about picking the "use" basic block, which will be the
1058	// merge point for aggregate, where we'll insert the final PHI that will be
1059	// used instead of OrigIVI. Basic block of OrigIVI is not* the right choice.*
1060	// We should look in which blocks each of the AggElts is being defined,
1061	// they all should be defined in the same basic block.
1062	BasicBlock UseBB = nullptr*;
1063
1064	for (const std::optional<Instruction *> &I : AggElts) {
1065	BasicBlock BB = (I)->getParent();
1066	// If it's the first instruction we've encountered, record the basic block.
1067	if (!UseBB) {
1068	UseBB = BB;
1069	continue;
1070	}
1071	// Otherwise, this must be the same basic block we've seen previously.
1072	if (UseBB != BB)
1073	return nullptr;
1074	}
1075
1076	// If all* of the elements are basic-block-independent, meaning they are*
1077	// either function arguments, or constant expressions, then if we didn't
1078	// handle them without predecessor-aware handling, we won't handle them now.
1079	if (!UseBB)
1080	return nullptr;
1081
1082	// If we didn't manage to find source aggregate without looking at
1083	// predecessors, and there are no predecessors to look at, then we're done.
1084	if (pred_empty(BB: UseBB))
1085	return nullptr;
1086
1087	// Arbitrary predecessor count limit.
1088	static const int PredCountLimit = `64`;
1089
1090	// Cache the (non-uniqified!) list of predecessors in a vector,
1091	// checking the limit at the same time for efficiency.
1092	SmallVector<BasicBlock , `4`> Preds; // May have duplicates!*
1093	for (BasicBlock *Pred : predecessors(BB: UseBB)) {
1094	// Don't bother if there are too many predecessors.
1095	if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1096	return nullptr;
1097	Preds.emplace_back(Args&: Pred);
1098	}
1099
1100	// For each predecessor, what is the source aggregate,
1101	// from which all the elements were originally extracted from?
1102	// Note that we want for the map to have stable iteration order!
1103	SmallDenseMap<BasicBlock , Value , `4`> SourceAggregates;
1104	for (BasicBlock *Pred : Preds) {
1105	std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1106	SourceAggregates.insert(KV: {Pred, nullptr});
1107	// Did we already evaluate this predecessor?
1108	if (!IV.second)
1109	continue;
1110
1111	// Let's hope that when coming from predecessor Pred, all elements of the
1112	// aggregate produced by OrigIVI must have been originally extracted from
1113	// the same aggregate. Is that so? Can we find said original aggregate?
1114	SourceAggregate = FindCommonSourceAggregate (UseBB, Pred);
1115	if (Describe (SourceAggregate) != AggregateDescription::Found)
1116	return nullptr; // Give up.
1117	IV.first ->second = *SourceAggregate;
1118	}
1119
1120	// All good! Now we just need to thread the source aggregates here.
1121	// Note that we have to insert the new PHI here, ourselves, because we can't
1122	// rely on InstCombinerImpl::run() inserting it into the right basic block.
1123	// Note that the same block can be a predecessor more than once,
1124	// and we need to preserve that invariant for the PHI node.
1125	BuilderTy::InsertPointGuard Guard(Builder);
1126	Builder.SetInsertPoint(TheBB: UseBB, IP: UseBB->getFirstNonPHIIt());
1127	auto *PHI =
1128	Builder.CreatePHI(Ty: AggTy, NumReservedValues: Preds.size(), Name: OrigIVI.getName() + ".merged");
1129	for (BasicBlock *Pred : Preds)
1130	PHI->addIncoming(V: SourceAggregates [Pred], BB: Pred);
1131
1132	++NumAggregateReconstructionsSimplified;
1133	return replaceInstUsesWith(I&: OrigIVI, V: PHI);
1134	}
1135
1136	/// Try to find redundant insertvalue instructions, like the following ones:
1137	/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1138	/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1139	/// Here the second instruction inserts values at the same indices, as the
1140	/// first one, making the first one redundant.
1141	/// It should be transformed to:
1142	/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1143	Instruction *InstCombinerImpl::visitInsertValueInst(InsertValueInst &I) {
1144	if (Value *V = simplifyInsertValueInst(
1145	Agg: I.getAggregateOperand(), Val: I.getInsertedValueOperand(), Idxs: I.getIndices(),
1146	Q: SQ.getWithInstruction(I: &I)))
1147	return replaceInstUsesWith(I, V);
1148
1149	bool IsRedundant = false;
1150	ArrayRef<unsigned int> FirstIndices = I.getIndices();
1151
1152	// If there is a chain of insertvalue instructions (each of them except the
1153	// last one has only one use and it's another insertvalue insn from this
1154	// chain), check if any of the 'children' uses the same indices as the first
1155	// instruction. In this case, the first one is redundant.
1156	Value *V = &I;
1157	unsigned Depth = `0`;
1158	while (V->hasOneUse() && Depth < `10`) {
1159	User *U = V->user_back();
1160	auto UserInsInst = dyn_cast<InsertValueInst>(Val: U);
1161	if (!UserInsInst \|\| U->getOperand(i: `0`) != V)
1162	break;
1163	if (UserInsInst->getIndices() == FirstIndices) {
1164	IsRedundant = true;
1165	break;
1166	}
1167	V = UserInsInst;
1168	Depth++;
1169	}
1170
1171	if (IsRedundant)
1172	return replaceInstUsesWith(I, V: I.getOperand(i_nocapture: `0`));
1173
1174	if (Instruction *NewI = foldAggregateConstructionIntoAggregateReuse(OrigIVI&: I))
1175	return NewI;
1176
1177	return nullptr;
1178	}
1179
1180	static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {
1181	// Can not analyze scalable type, the number of elements is not a compile-time
1182	// constant.
1183	if (isa<ScalableVectorType>(Val: Shuf.getOperand(i_nocapture: `0`)->getType()))
1184	return false;
1185
1186	int MaskSize = Shuf.getShuffleMask().size();
1187	int VecSize =
1188	cast<FixedVectorType>(Val: Shuf.getOperand(i_nocapture: `0`)->getType())->getNumElements();
1189
1190	// A vector select does not change the size of the operands.
1191	if (MaskSize != VecSize)
1192	return false;
1193
1194	// Each mask element must be undefined or choose a vector element from one of
1195	// the source operands without crossing vector lanes.
1196	for (int i = `0`; i != MaskSize; ++i) {
1197	int Elt = Shuf.getMaskValue(Elt: i);
1198	if (Elt != -`1` && Elt != i && Elt != i + VecSize)
1199	return false;
1200	}
1201
1202	return true;
1203	}
1204
1205	/// Turn a chain of inserts that splats a value into an insert + shuffle:
1206	/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1207	/// shufflevector(insertelt(X, %k, 0), poison, zero)
1208	static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
1209	// We are interested in the last insert in a chain. So if this insert has a
1210	// single user and that user is an insert, bail.
1211	if (InsElt.hasOneUse() && isa<InsertElementInst>(Val: InsElt.user_back()))
1212	return nullptr;
1213
1214	VectorType *VecTy = InsElt.getType();
1215	// Can not handle scalable type, the number of elements is not a compile-time
1216	// constant.
1217	if (isa<ScalableVectorType>(Val: VecTy))
1218	return nullptr;
1219	unsigned NumElements = cast<FixedVectorType>(Val: VecTy)->getNumElements();
1220
1221	// Do not try to do this for a one-element vector, since that's a nop,
1222	// and will cause an inf-loop.
1223	if (NumElements == `1`)
1224	return nullptr;
1225
1226	Value *SplatVal = InsElt.getOperand(i_nocapture: `1`);
1227	InsertElementInst *CurrIE = &InsElt;
1228	SmallBitVector ElementPresent(NumElements, false);
1229	InsertElementInst FirstIE = nullptr*;
1230
1231	// Walk the chain backwards, keeping track of which indices we inserted into,
1232	// until we hit something that isn't an insert of the splatted value.
1233	while (CurrIE) {
1234	auto *Idx = dyn_cast<ConstantInt>(Val: CurrIE->getOperand(i_nocapture: `2`));
1235	if (!Idx \|\| CurrIE->getOperand(i_nocapture: `1`) != SplatVal)
1236	return nullptr;
1237
1238	auto *NextIE = dyn_cast<InsertElementInst>(Val: CurrIE->getOperand(i_nocapture: `0`));
1239	// Check none of the intermediate steps have any additional uses, except
1240	// for the root insertelement instruction, which can be re-used, if it
1241	// inserts at position 0.
1242	if (CurrIE != &InsElt &&
1243	(!CurrIE->hasOneUse() && (NextIE != nullptr \|\| !Idx->isZero())))
1244	return nullptr;
1245
1246	ElementPresent [Idx->getZExtValue()] = true;
1247	FirstIE = CurrIE;
1248	CurrIE = NextIE;
1249	}
1250
1251	// If this is just a single insertelement (not a sequence), we are done.
1252	if (FirstIE == &InsElt)
1253	return nullptr;
1254
1255	// If we are not inserting into a poison vector, make sure we've seen an
1256	// insert into every element.
1257	// TODO: If the base vector is not undef, it might be better to create a splat
1258	// and then a select-shuffle (blend) with the base vector.
1259	if (!match(V: FirstIE->getOperand(i_nocapture: `0`), P: m_Poison()))
1260	if (!ElementPresent.all())
1261	return nullptr;
1262
1263	// Create the insert + shuffle.
1264	Type *Int64Ty = Type::getInt64Ty(C&: InsElt.getContext());
1265	PoisonValue *PoisonVec = PoisonValue::get(T: VecTy);
1266	Constant *Zero = ConstantInt::get(Ty: Int64Ty, V: `0`);
1267	if (!cast<ConstantInt>(Val: FirstIE->getOperand(i_nocapture: `2`))->isZero())
1268	FirstIE = InsertElementInst::Create(Vec: PoisonVec, NewElt: SplatVal, Idx: Zero, NameStr: "",
1269	InsertBefore: InsElt.getIterator());
1270
1271	// Splat from element 0, but replace absent elements with poison in the mask.
1272	SmallVector<int, `16`> Mask(NumElements, `0`);
1273	for (unsigned i = `0`; i != NumElements; ++i)
1274	if (!ElementPresent [i])
1275	Mask [i] = -`1`;
1276
1277	return new ShuffleVectorInst (FirstIE, Mask);
1278	}
1279
1280	/// Try to fold an insert element into an existing splat shuffle by changing
1281	/// the shuffle's mask to include the index of this insert element.
1282	static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
1283	// Check if the vector operand of this insert is a canonical splat shuffle.
1284	auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: InsElt.getOperand(i_nocapture: `0`));
1285	if (!Shuf \|\| !Shuf->isZeroEltSplat())
1286	return nullptr;
1287
1288	// Bail out early if shuffle is scalable type. The number of elements in
1289	// shuffle mask is unknown at compile-time.
1290	if (isa<ScalableVectorType>(Val: Shuf->getType()))
1291	return nullptr;
1292
1293	// Check for a constant insertion index.
1294	uint64_t IdxC;
1295	if (!match(V: InsElt.getOperand(i_nocapture: `2`), P: m_ConstantInt(V&: IdxC)))
1296	return nullptr;
1297
1298	// Check if the splat shuffle's input is the same as this insert's scalar op.
1299	Value *X = InsElt.getOperand(i_nocapture: `1`);
1300	Value *Op0 = Shuf->getOperand(i_nocapture: `0`);
1301	if (!match(V: Op0, P: m_InsertElt(Val: m_Undef(), Elt: m_Specific(V: X), Idx: m_ZeroInt())))
1302	return nullptr;
1303
1304	// Replace the shuffle mask element at the index of this insert with a zero.
1305	// For example:
1306	// inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
1307	// --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
1308	unsigned NumMaskElts =
1309	cast<FixedVectorType>(Val: Shuf->getType())->getNumElements();
1310	SmallVector<int, `16`> NewMask(NumMaskElts);
1311	for (unsigned i = `0`; i != NumMaskElts; ++i)
1312	NewMask [i] = i == IdxC ? `0` : Shuf->getMaskValue(Elt: i);
1313
1314	return new ShuffleVectorInst (Op0, NewMask);
1315	}
1316
1317	/// Try to fold an extract+insert element into an existing identity shuffle by
1318	/// changing the shuffle's mask to include the index of this insert element.
1319	static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) {
1320	// Check if the vector operand of this insert is an identity shuffle.
1321	auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: InsElt.getOperand(i_nocapture: `0`));
1322	if (!Shuf \|\| !match(V: Shuf->getOperand(i_nocapture: `1`), P: m_Undef()) \|\|
1323	!(Shuf->isIdentityWithExtract() \|\| Shuf->isIdentityWithPadding()))
1324	return nullptr;
1325
1326	// Bail out early if shuffle is scalable type. The number of elements in
1327	// shuffle mask is unknown at compile-time.
1328	if (isa<ScalableVectorType>(Val: Shuf->getType()))
1329	return nullptr;
1330
1331	// Check for a constant insertion index.
1332	uint64_t IdxC;
1333	if (!match(V: InsElt.getOperand(i_nocapture: `2`), P: m_ConstantInt(V&: IdxC)))
1334	return nullptr;
1335
1336	// Check if this insert's scalar op is extracted from the identity shuffle's
1337	// input vector.
1338	Value *Scalar = InsElt.getOperand(i_nocapture: `1`);
1339	Value *X = Shuf->getOperand(i_nocapture: `0`);
1340	if (!match(V: Scalar, P: m_ExtractElt(Val: m_Specific(V: X), Idx: m_SpecificInt(V: IdxC))))
1341	return nullptr;
1342
1343	// Replace the shuffle mask element at the index of this extract+insert with
1344	// that same index value.
1345	// For example:
1346	// inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1347	unsigned NumMaskElts =
1348	cast<FixedVectorType>(Val: Shuf->getType())->getNumElements();
1349	SmallVector<int, `16`> NewMask(NumMaskElts);
1350	ArrayRef<int> OldMask = Shuf->getShuffleMask();
1351	for (unsigned i = `0`; i != NumMaskElts; ++i) {
1352	if (i != IdxC) {
1353	// All mask elements besides the inserted element remain the same.
1354	NewMask [i] = OldMask [i];
1355	} else if (OldMask [i] == (int)IdxC) {
1356	// If the mask element was already set, there's nothing to do
1357	// (demanded elements analysis may unset it later).
1358	return nullptr;
1359	} else {
1360	assert(OldMask[i] == PoisonMaskElem &&
1361	"Unexpected shuffle mask element for identity shuffle");
1362	NewMask [i] = IdxC;
1363	}
1364	}
1365
1366	return new ShuffleVectorInst (X, Shuf->getOperand(i_nocapture: `1`), NewMask);
1367	}
1368
1369	/// If we have an insertelement instruction feeding into another insertelement
1370	/// and the 2nd is inserting a constant into the vector, canonicalize that
1371	/// constant insertion before the insertion of a variable:
1372	///
1373	/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1374	/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1375	///
1376	/// This has the potential of eliminating the 2nd insertelement instruction
1377	/// via constant folding of the scalar constant into a vector constant.
1378	static Instruction *hoistInsEltConst(InsertElementInst &InsElt2,
1379	InstCombiner::BuilderTy &Builder) {
1380	auto *InsElt1 = dyn_cast<InsertElementInst>(Val: InsElt2.getOperand(i_nocapture: `0`));
1381	if (!InsElt1 \|\| !InsElt1->hasOneUse())
1382	return nullptr;
1383
1384	Value X, Y;
1385	Constant *ScalarC;
1386	ConstantInt IdxC1, IdxC2;
1387	if (match(V: InsElt1->getOperand(i_nocapture: `0`), P: m_Value(V&: X)) &&
1388	match(V: InsElt1->getOperand(i_nocapture: `1`), P: m_Value(V&: Y)) && !isa<Constant>(Val: Y) &&
1389	match(V: InsElt1->getOperand(i_nocapture: `2`), P: m_ConstantInt(CI&: IdxC1)) &&
1390	match(V: InsElt2.getOperand(i_nocapture: `1`), P: m_Constant(C&: ScalarC)) &&
1391	match(V: InsElt2.getOperand(i_nocapture: `2`), P: m_ConstantInt(CI&: IdxC2)) && IdxC1 != IdxC2) {
1392	Value *NewInsElt1 = Builder.CreateInsertElement(Vec: X, NewElt: ScalarC, Idx: IdxC2);
1393	return InsertElementInst::Create(Vec: NewInsElt1, NewElt: Y, Idx: IdxC1);
1394	}
1395
1396	return nullptr;
1397	}
1398
1399	/// insertelt (shufflevector X, CVec, Mask\|insertelt X, C1, CIndex1), C, CIndex
1400	/// --> shufflevector X, CVec', Mask'
1401	static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
1402	auto *Inst = dyn_cast<Instruction>(Val: InsElt.getOperand(i_nocapture: `0`));
1403	// Bail out if the parent has more than one use. In that case, we'd be
1404	// replacing the insertelt with a shuffle, and that's not a clear win.
1405	if (!Inst \|\| !Inst->hasOneUse())
1406	return nullptr;
1407	if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: InsElt.getOperand(i_nocapture: `0`))) {
1408	// The shuffle must have a constant vector operand. The insertelt must have
1409	// a constant scalar being inserted at a constant position in the vector.
1410	Constant ShufConstVec, InsEltScalar;
1411	uint64_t InsEltIndex;
1412	if (!match(V: Shuf->getOperand(i_nocapture: `1`), P: m_Constant(C&: ShufConstVec)) \|\|
1413	!match(V: InsElt.getOperand(i_nocapture: `1`), P: m_Constant(C&: InsEltScalar)) \|\|
1414	!match(V: InsElt.getOperand(i_nocapture: `2`), P: m_ConstantInt(V&: InsEltIndex)))
1415	return nullptr;
1416
1417	// Adding an element to an arbitrary shuffle could be expensive, but a
1418	// shuffle that selects elements from vectors without crossing lanes is
1419	// assumed cheap.
1420	// If we're just adding a constant into that shuffle, it will still be
1421	// cheap.
1422	if (!isShuffleEquivalentToSelect(Shuf&: *Shuf))
1423	return nullptr;
1424
1425	// From the above 'select' check, we know that the mask has the same number
1426	// of elements as the vector input operands. We also know that each constant
1427	// input element is used in its lane and can not be used more than once by
1428	// the shuffle. Therefore, replace the constant in the shuffle's constant
1429	// vector with the insertelt constant. Replace the constant in the shuffle's
1430	// mask vector with the insertelt index plus the length of the vector
1431	// (because the constant vector operand of a shuffle is always the 2nd
1432	// operand).
1433	ArrayRef<int> Mask = Shuf->getShuffleMask();
1434	unsigned NumElts = Mask.size();
1435	SmallVector<Constant *, `16`> NewShufElts(NumElts);
1436	SmallVector<int, `16`> NewMaskElts(NumElts);
1437	for (unsigned I = `0`; I != NumElts; ++I) {
1438	if (I == InsEltIndex) {
1439	NewShufElts [I] = InsEltScalar;
1440	NewMaskElts [I] = InsEltIndex + NumElts;
1441	} else {
1442	// Copy over the existing values.
1443	NewShufElts [I] = ShufConstVec->getAggregateElement(Elt: I);
1444	NewMaskElts [I] = Mask [I];
1445	}
1446
1447	// Bail if we failed to find an element.
1448	if (!NewShufElts [I])
1449	return nullptr;
1450	}
1451
1452	// Create new operands for a shuffle that includes the constant of the
1453	// original insertelt. The old shuffle will be dead now.
1454	return new ShuffleVectorInst (Shuf->getOperand(i_nocapture: `0`),
1455	ConstantVector::get(V: NewShufElts), NewMaskElts);
1456	} else if (auto *IEI = dyn_cast<InsertElementInst>(Val: Inst)) {
1457	// Transform sequences of insertelements ops with constant data/indexes into
1458	// a single shuffle op.
1459	// Can not handle scalable type, the number of elements needed to create
1460	// shuffle mask is not a compile-time constant.
1461	if (isa<ScalableVectorType>(Val: InsElt.getType()))
1462	return nullptr;
1463	unsigned NumElts =
1464	cast<FixedVectorType>(Val: InsElt.getType())->getNumElements();
1465
1466	uint64_t InsertIdx[`2`];
1467	Constant *Val[`2`];
1468	if (!match(V: InsElt.getOperand(i_nocapture: `2`), P: m_ConstantInt(V&: InsertIdx[`0`])) \|\|
1469	!match(V: InsElt.getOperand(i_nocapture: `1`), P: m_Constant(C&: Val[`0`])) \|\|
1470	!match(V: IEI->getOperand(i_nocapture: `2`), P: m_ConstantInt(V&: InsertIdx[`1`])) \|\|
1471	!match(V: IEI->getOperand(i_nocapture: `1`), P: m_Constant(C&: Val[`1`])))
1472	return nullptr;
1473	SmallVector<Constant *, `16`> Values(NumElts);
1474	SmallVector<int, `16`> Mask(NumElts);
1475	auto ValI = std::begin(arr&: Val);
1476	// Generate new constant vector and mask.
1477	// We have 2 values/masks from the insertelements instructions. Insert them
1478	// into new value/mask vectors.
1479	for (uint64_t I : InsertIdx) {
1480	if (!Values [I]) {
1481	Values [I] = *ValI;
1482	Mask [I] = NumElts + I;
1483	}
1484	++ValI;
1485	}
1486	// Remaining values are filled with 'poison' values.
1487	for (unsigned I = `0`; I < NumElts; ++I) {
1488	if (!Values [I]) {
1489	Values [I] = PoisonValue::get(T: InsElt.getType()->getElementType());
1490	Mask [I] = I;
1491	}
1492	}
1493	// Create new operands for a shuffle that includes the constant of the
1494	// original insertelt.
1495	return new ShuffleVectorInst (IEI->getOperand(i_nocapture: `0`),
1496	ConstantVector::get(V: Values), Mask);
1497	}
1498	return nullptr;
1499	}
1500
1501	/// If both the base vector and the inserted element are extended from the same
1502	/// type, do the insert element in the narrow source type followed by extend.
1503	/// TODO: This can be extended to include other cast opcodes, but particularly
1504	/// if we create a wider insertelement, make sure codegen is not harmed.
1505	static Instruction *narrowInsElt(InsertElementInst &InsElt,
1506	InstCombiner::BuilderTy &Builder) {
1507	// We are creating a vector extend. If the original vector extend has another
1508	// use, that would mean we end up with 2 vector extends, so avoid that.
1509	// TODO: We could ease the use-clause to "if at least one op has one use"
1510	// (assuming that the source types match - see next TODO comment).
1511	Value *Vec = InsElt.getOperand(i_nocapture: `0`);
1512	if (!Vec->hasOneUse())
1513	return nullptr;
1514
1515	Value *Scalar = InsElt.getOperand(i_nocapture: `1`);
1516	Value X, Y;
1517	CastInst::CastOps CastOpcode;
1518	if (match(V: Vec, P: m_FPExt(Op: m_Value(V&: X))) && match(V: Scalar, P: m_FPExt(Op: m_Value(V&: Y))))
1519	CastOpcode = Instruction::FPExt;
1520	else if (match(V: Vec, P: m_SExt(Op: m_Value(V&: X))) && match(V: Scalar, P: m_SExt(Op: m_Value(V&: Y))))
1521	CastOpcode = Instruction::SExt;
1522	else if (match(V: Vec, P: m_ZExt(Op: m_Value(V&: X))) && match(V: Scalar, P: m_ZExt(Op: m_Value(V&: Y))))
1523	CastOpcode = Instruction::ZExt;
1524	else
1525	return nullptr;
1526
1527	// TODO: We can allow mismatched types by creating an intermediate cast.
1528	if (X->getType()->getScalarType() != Y->getType())
1529	return nullptr;
1530
1531	// inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1532	Value *NewInsElt = Builder.CreateInsertElement(Vec: X, NewElt: Y, Idx: InsElt.getOperand(i_nocapture: `2`));
1533	return CastInst::Create(CastOpcode, S: NewInsElt, Ty: InsElt.getType());
1534	}
1535
1536	/// If we are inserting 2 halves of a value into adjacent elements of a vector,
1537	/// try to convert to a single insert with appropriate bitcasts.
1538	static Instruction *foldTruncInsEltPair(InsertElementInst &InsElt,
1539	bool IsBigEndian,
1540	InstCombiner::BuilderTy &Builder) {
1541	Value *VecOp = InsElt.getOperand(i_nocapture: `0`);
1542	Value *ScalarOp = InsElt.getOperand(i_nocapture: `1`);
1543	Value *IndexOp = InsElt.getOperand(i_nocapture: `2`);
1544
1545	// Pattern depends on endian because we expect lower index is inserted first.
1546	// Big endian:
1547	// inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1
1548	// Little endian:
1549	// inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1
1550	// Note: It is not safe to do this transform with an arbitrary base vector
1551	// because the bitcast of that vector to fewer/larger elements could
1552	// allow poison to spill into an element that was not poison before.
1553	// TODO: Detect smaller fractions of the scalar.
1554	// TODO: One-use checks are conservative.
1555	auto *VTy = dyn_cast<FixedVectorType>(Val: InsElt.getType());
1556	Value Scalar0, BaseVec;
1557	uint64_t Index0, Index1;
1558	if (!VTy \|\| (VTy->getNumElements() & `1`) \|\|
1559	!match(V: IndexOp, P: m_ConstantInt(V&: Index1)) \|\|
1560	!match(V: VecOp, P: m_InsertElt(Val: m_Value(V&: BaseVec), Elt: m_Value(V&: Scalar0),
1561	Idx: m_ConstantInt(V&: Index0))) \|\|
1562	!match(V: BaseVec, P: m_Undef()))
1563	return nullptr;
1564
1565	// The first insert must be to the index one less than this one, and
1566	// the first insert must be to an even index.
1567	if (Index0 + `1` != Index1 \|\| Index0 & `1`)
1568	return nullptr;
1569
1570	// For big endian, the high half of the value should be inserted first.
1571	// For little endian, the low half of the value should be inserted first.
1572	Value *X;
1573	uint64_t ShAmt;
1574	if (IsBigEndian) {
1575	if (!match(V: ScalarOp, P: m_Trunc(Op: m_Value(V&: X))) \|\|
1576	!match(V: Scalar0, P: m_Trunc(Op: m_LShr(L: m_Specific(V: X), R: m_ConstantInt(V&: ShAmt)))))
1577	return nullptr;
1578	} else {
1579	if (!match(V: Scalar0, P: m_Trunc(Op: m_Value(V&: X))) \|\|
1580	!match(V: ScalarOp, P: m_Trunc(Op: m_LShr(L: m_Specific(V: X), R: m_ConstantInt(V&: ShAmt)))))
1581	return nullptr;
1582	}
1583
1584	Type *SrcTy = X->getType();
1585	unsigned ScalarWidth = SrcTy->getScalarSizeInBits();
1586	unsigned VecEltWidth = VTy->getScalarSizeInBits();
1587	if (ScalarWidth != VecEltWidth * `2` \|\| ShAmt != VecEltWidth)
1588	return nullptr;
1589
1590	// Bitcast the base vector to a vector type with the source element type.
1591	Type *CastTy = FixedVectorType::get(ElementType: SrcTy, NumElts: VTy->getNumElements() / `2`);
1592	Value *CastBaseVec = Builder.CreateBitCast(V: BaseVec, DestTy: CastTy);
1593
1594	// Scale the insert index for a vector with half as many elements.
1595	// bitcast (inselt (bitcast BaseVec), X, NewIndex)
1596	uint64_t NewIndex = IsBigEndian ? Index1 / `2` : Index0 / `2`;
1597	Value *NewInsert = Builder.CreateInsertElement(Vec: CastBaseVec, NewElt: X, Idx: NewIndex);
1598	return new BitCastInst (NewInsert, VTy);
1599	}
1600
1601	Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
1602	Value *VecOp = IE.getOperand(i_nocapture: `0`);
1603	Value *ScalarOp = IE.getOperand(i_nocapture: `1`);
1604	Value *IdxOp = IE.getOperand(i_nocapture: `2`);
1605
1606	if (auto *V = simplifyInsertElementInst(
1607	Vec: VecOp, Elt: ScalarOp, Idx: IdxOp, Q: SQ.getWithInstruction(I: &IE)))
1608	return replaceInstUsesWith(I&: IE, V);
1609
1610	// Canonicalize type of constant indices to i64 to simplify CSE
1611	if (auto *IndexC = dyn_cast<ConstantInt>(Val: IdxOp)) {
1612	if (auto *NewIdx = getPreferredVectorIndex(IndexC))
1613	return replaceOperand(I&: IE, OpNum: `2`, V: NewIdx);
1614
1615	Value BaseVec, OtherScalar;
1616	uint64_t OtherIndexVal;
1617	if (match(V: VecOp, P: m_OneUse(SubPattern: m_InsertElt(Val: m_Value(V&: BaseVec),
1618	Elt: m_Value(V&: OtherScalar),
1619	Idx: m_ConstantInt(V&: OtherIndexVal)))) &&
1620	!isa<Constant>(Val: OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) {
1621	Value *NewIns = Builder.CreateInsertElement(Vec: BaseVec, NewElt: ScalarOp, Idx: IdxOp);
1622	return InsertElementInst::Create(Vec: NewIns, NewElt: OtherScalar,
1623	Idx: Builder.getInt64(C: OtherIndexVal));
1624	}
1625	}
1626
1627	// If the scalar is bitcast and inserted into undef, do the insert in the
1628	// source type followed by bitcast.
1629	// TODO: Generalize for insert into any constant, not just undef?
1630	Value *ScalarSrc;
1631	if (match(V: VecOp, P: m_Undef()) &&
1632	match(V: ScalarOp, P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: ScalarSrc)))) &&
1633	(ScalarSrc->getType()->isIntegerTy() \|\|
1634	ScalarSrc->getType()->isFloatingPointTy())) {
1635	// inselt undef, (bitcast ScalarSrc), IdxOp -->
1636	// bitcast (inselt undef, ScalarSrc, IdxOp)
1637	Type *ScalarTy = ScalarSrc->getType();
1638	Type *VecTy = VectorType::get(ElementType: ScalarTy, EC: IE.getType()->getElementCount());
1639	Constant *NewUndef = isa<PoisonValue>(Val: VecOp) ? PoisonValue::get(T: VecTy)
1640	: UndefValue::get(T: VecTy);
1641	Value *NewInsElt = Builder.CreateInsertElement(Vec: NewUndef, NewElt: ScalarSrc, Idx: IdxOp);
1642	return new BitCastInst (NewInsElt, IE.getType());
1643	}
1644
1645	// If the vector and scalar are both bitcast from the same element type, do
1646	// the insert in that source type followed by bitcast.
1647	Value *VecSrc;
1648	if (match(V: VecOp, P: m_BitCast(Op: m_Value(V&: VecSrc))) &&
1649	match(V: ScalarOp, P: m_BitCast(Op: m_Value(V&: ScalarSrc))) &&
1650	(VecOp->hasOneUse() \|\| ScalarOp->hasOneUse()) &&
1651	VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1652	cast<VectorType>(Val: VecSrc->getType())->getElementType() ==
1653	ScalarSrc->getType()) {
1654	// inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1655	// bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1656	Value *NewInsElt = Builder.CreateInsertElement(Vec: VecSrc, NewElt: ScalarSrc, Idx: IdxOp);
1657	return new BitCastInst (NewInsElt, IE.getType());
1658	}
1659
1660	// If the inserted element was extracted from some other fixed-length vector
1661	// and both indexes are valid constants, try to turn this into a shuffle.
1662	// Can not handle scalable vector type, the number of elements needed to
1663	// create shuffle mask is not a compile-time constant.
1664	uint64_t InsertedIdx, ExtractedIdx;
1665	Value *ExtVecOp;
1666	if (isa<FixedVectorType>(Val: IE.getType()) &&
1667	match(V: IdxOp, P: m_ConstantInt(V&: InsertedIdx)) &&
1668	match(V: ScalarOp,
1669	P: m_ExtractElt(Val: m_Value(V&: ExtVecOp), Idx: m_ConstantInt(V&: ExtractedIdx))) &&
1670	isa<FixedVectorType>(Val: ExtVecOp->getType()) &&
1671	ExtractedIdx <
1672	cast<FixedVectorType>(Val: ExtVecOp->getType())->getNumElements()) {
1673	// TODO: Looking at the user(s) to determine if this insert is a
1674	// fold-to-shuffle opportunity does not match the usual instcombine
1675	// constraints. We should decide if the transform is worthy based only
1676	// on this instruction and its operands, but that may not work currently.
1677	//
1678	// Here, we are trying to avoid creating shuffles before reaching
1679	// the end of a chain of extract-insert pairs. This is complicated because
1680	// we do not generally form arbitrary shuffle masks in instcombine
1681	// (because those may codegen poorly), but collectShuffleElements() does
1682	// exactly that.
1683	//
1684	// The rules for determining what is an acceptable target-independent
1685	// shuffle mask are fuzzy because they evolve based on the backend's
1686	// capabilities and real-world impact.
1687	auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1688	if (!Insert.hasOneUse())
1689	return true;
1690	auto *InsertUser = dyn_cast<InsertElementInst>(Val: Insert.user_back());
1691	if (!InsertUser)
1692	return true;
1693	return false;
1694	};
1695
1696	// Try to form a shuffle from a chain of extract-insert ops.
1697	if (isShuffleRootCandidate (IE)) {
1698	bool Rerun = true;
1699	while (Rerun) {
1700	Rerun = false;
1701
1702	SmallVector<int, `16`> Mask;
1703	ShuffleOps LR =
1704	collectShuffleElements(V: &IE, Mask, PermittedRHS: nullptr, IC&: *this, Rerun);
1705
1706	// The proposed shuffle may be trivial, in which case we shouldn't
1707	// perform the combine.
1708	if (LR.first != &IE && LR.second != &IE) {
1709	// We now have a shuffle of LHS, RHS, Mask.
1710	if (LR.second == nullptr)
1711	LR.second = PoisonValue::get(T: LR.first->getType());
1712	return new ShuffleVectorInst (LR.first, LR.second, Mask);
1713	}
1714	}
1715	}
1716	}
1717
1718	if (auto VecTy = dyn_cast<FixedVectorType>(Val: VecOp->getType())) {
1719	unsigned VWidth = VecTy->getNumElements();
1720	APInt PoisonElts(VWidth, `0`);
1721	APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
1722	if (Value *V = SimplifyDemandedVectorElts(V: &IE, DemandedElts: AllOnesEltMask,
1723	PoisonElts)) {
1724	if (V != &IE)
1725	return replaceInstUsesWith(I&: IE, V);
1726	return &IE;
1727	}
1728	}
1729
1730	if (Instruction *Shuf = foldConstantInsEltIntoShuffle(InsElt&: IE))
1731	return Shuf;
1732
1733	if (Instruction *NewInsElt = hoistInsEltConst(InsElt2&: IE, Builder))
1734	return NewInsElt;
1735
1736	if (Instruction *Broadcast = foldInsSequenceIntoSplat(InsElt&: IE))
1737	return Broadcast;
1738
1739	if (Instruction *Splat = foldInsEltIntoSplat(InsElt&: IE))
1740	return Splat;
1741
1742	if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(InsElt&: IE))
1743	return IdentityShuf;
1744
1745	if (Instruction *Ext = narrowInsElt(InsElt&: IE, Builder))
1746	return Ext;
1747
1748	if (Instruction *Ext = foldTruncInsEltPair(InsElt&: IE, IsBigEndian: DL.isBigEndian(), Builder))
1749	return Ext;
1750
1751	return nullptr;
1752	}
1753
1754	/// Return true if we can evaluate the specified expression tree if the vector
1755	/// elements were shuffled in a different order.
1756	static bool canEvaluateShuffled(Value V, ArrayRef<int*> Mask,
1757	unsigned Depth = `5`) {
1758	// We can always reorder the elements of a constant.
1759	if (isa<Constant>(Val: V))
1760	return true;
1761
1762	// We won't reorder vector arguments. No IPO here.
1763	Instruction *I = dyn_cast<Instruction>(Val: V);
1764	if (!I) return false;
1765
1766	// Two users may expect different orders of the elements. Don't try it.
1767	if (!I->hasOneUse())
1768	return false;
1769
1770	if (Depth == `0`) return false;
1771
1772	switch (I->getOpcode()) {
1773	case Instruction::UDiv:
1774	case Instruction::SDiv:
1775	case Instruction::URem:
1776	case Instruction::SRem:
1777	// Propagating an undefined shuffle mask element to integer div/rem is not
1778	// allowed because those opcodes can create immediate undefined behavior
1779	// from an undefined element in an operand.
1780	if (llvm::is_contained(Range&: Mask, Element: -`1`))
1781	return false;
1782	[[fallthrough]];
1783	case Instruction::Add:
1784	case Instruction::FAdd:
1785	case Instruction::Sub:
1786	case Instruction::FSub:
1787	case Instruction::Mul:
1788	case Instruction::FMul:
1789	case Instruction::FDiv:
1790	case Instruction::FRem:
1791	case Instruction::Shl:
1792	case Instruction::LShr:
1793	case Instruction::AShr:
1794	case Instruction::And:
1795	case Instruction::Or:
1796	case Instruction::Xor:
1797	case Instruction::ICmp:
1798	case Instruction::FCmp:
1799	case Instruction::Trunc:
1800	case Instruction::ZExt:
1801	case Instruction::SExt:
1802	case Instruction::FPToUI:
1803	case Instruction::FPToSI:
1804	case Instruction::UIToFP:
1805	case Instruction::SIToFP:
1806	case Instruction::FPTrunc:
1807	case Instruction::FPExt:
1808	case Instruction::GetElementPtr: {
1809	// Bail out if we would create longer vector ops. We could allow creating
1810	// longer vector ops, but that may result in more expensive codegen.
1811	Type *ITy = I->getType();
1812	if (ITy->isVectorTy() &&
1813	Mask.size() > cast<FixedVectorType>(Val: ITy)->getNumElements())
1814	return false;
1815	for (Value *Operand : I->operands()) {
1816	if (!canEvaluateShuffled(V: Operand, Mask, Depth: Depth - `1`))
1817	return false;
1818	}
1819	return true;
1820	}
1821	case Instruction::InsertElement: {
1822	ConstantInt *CI = dyn_cast<ConstantInt>(Val: I->getOperand(i: `2`));
1823	if (!CI) return false;
1824	int ElementNumber = CI->getLimitedValue();
1825
1826	// Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1827	// can't put an element into multiple indices.
1828	bool SeenOnce = false;
1829	for (int I : Mask) {
1830	if (I == ElementNumber) {
1831	if (SeenOnce)
1832	return false;
1833	SeenOnce = true;
1834	}
1835	}
1836	return canEvaluateShuffled(V: I->getOperand(i: `0`), Mask, Depth: Depth - `1`);
1837	}
1838	}
1839	return false;
1840	}
1841
1842	/// Rebuild a new instruction just like 'I' but with the new operands given.
1843	/// In the event of type mismatch, the type of the operands is correct.
1844	static Value buildNew(Instruction I, ArrayRef<Value*> NewOps,
1845	IRBuilderBase &Builder) {
1846	Builder.SetInsertPoint(I);
1847	switch (I->getOpcode()) {
1848	case Instruction::Add:
1849	case Instruction::FAdd:
1850	case Instruction::Sub:
1851	case Instruction::FSub:
1852	case Instruction::Mul:
1853	case Instruction::FMul:
1854	case Instruction::UDiv:
1855	case Instruction::SDiv:
1856	case Instruction::FDiv:
1857	case Instruction::URem:
1858	case Instruction::SRem:
1859	case Instruction::FRem:
1860	case Instruction::Shl:
1861	case Instruction::LShr:
1862	case Instruction::AShr:
1863	case Instruction::And:
1864	case Instruction::Or:
1865	case Instruction::Xor: {
1866	BinaryOperator *BO = cast<BinaryOperator>(Val: I);
1867	assert(NewOps.size() == `2` && "binary operator with #ops != 2");
1868	Value *New = Builder.CreateBinOp(Opc: cast<BinaryOperator>(Val: I)->getOpcode(),
1869	LHS: NewOps [`0`], RHS: NewOps [`1`]);
1870	if (auto *NewI = dyn_cast<Instruction>(Val: New)) {
1871	if (isa<OverflowingBinaryOperator>(Val: BO)) {
1872	NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1873	NewI->setHasNoSignedWrap(BO->hasNoSignedWrap());
1874	}
1875	if (isa<PossiblyExactOperator>(Val: BO)) {
1876	NewI->setIsExact(BO->isExact());
1877	}
1878	if (isa<FPMathOperator>(Val: BO))
1879	NewI->copyFastMathFlags(I);
1880	}
1881	return New;
1882	}
1883	case Instruction::ICmp:
1884	assert(NewOps.size() == `2` && "icmp with #ops != 2");
1885	return Builder.CreateICmp(P: cast<ICmpInst>(Val: I)->getPredicate(), LHS: NewOps [`0`],
1886	RHS: NewOps [`1`]);
1887	case Instruction::FCmp:
1888	assert(NewOps.size() == `2` && "fcmp with #ops != 2");
1889	return Builder.CreateFCmp(P: cast<FCmpInst>(Val: I)->getPredicate(), LHS: NewOps [`0`],
1890	RHS: NewOps [`1`]);
1891	case Instruction::Trunc:
1892	case Instruction::ZExt:
1893	case Instruction::SExt:
1894	case Instruction::FPToUI:
1895	case Instruction::FPToSI:
1896	case Instruction::UIToFP:
1897	case Instruction::SIToFP:
1898	case Instruction::FPTrunc:
1899	case Instruction::FPExt: {
1900	// It's possible that the mask has a different number of elements from
1901	// the original cast. We recompute the destination type to match the mask.
1902	Type *DestTy = VectorType::get(
1903	ElementType: I->getType()->getScalarType(),
1904	EC: cast<VectorType>(Val: NewOps [`0`]->getType())->getElementCount());
1905	assert(NewOps.size() == `1` && "cast with #ops != 1");
1906	return Builder.CreateCast(Op: cast<CastInst>(Val: I)->getOpcode(), V: NewOps [`0`],
1907	DestTy);
1908	}
1909	case Instruction::GetElementPtr: {
1910	Value *Ptr = NewOps [`0`];
1911	ArrayRef<Value*> Idx = NewOps.slice(N: `1`);
1912	return Builder.CreateGEP(Ty: cast<GEPOperator>(Val: I)->getSourceElementType(),
1913	Ptr, IdxList: Idx, Name: "",
1914	IsInBounds: cast<GEPOperator>(Val: I)->isInBounds());
1915	}
1916	}
1917	llvm_unreachable("failed to rebuild vector instructions");
1918	}
1919
1920	static Value evaluateInDifferentElementOrder(Value V, ArrayRef<int> Mask,
1921	IRBuilderBase &Builder) {
1922	// Mask.size() does not need to be equal to the number of vector elements.
1923
1924	assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
1925	Type *EltTy = V->getType()->getScalarType();
1926
1927	if (isa<PoisonValue>(Val: V))
1928	return PoisonValue::get(T: FixedVectorType::get(ElementType: EltTy, NumElts: Mask.size()));
1929
1930	if (match(V, P: m_Undef()))
1931	return UndefValue::get(T: FixedVectorType::get(ElementType: EltTy, NumElts: Mask.size()));
1932
1933	if (isa<ConstantAggregateZero>(Val: V))
1934	return ConstantAggregateZero::get(Ty: FixedVectorType::get(ElementType: EltTy, NumElts: Mask.size()));
1935
1936	if (Constant *C = dyn_cast<Constant>(Val: V))
1937	return ConstantExpr::getShuffleVector(V1: C, V2: PoisonValue::get(T: C->getType()),
1938	Mask);
1939
1940	Instruction *I = cast<Instruction>(Val: V);
1941	switch (I->getOpcode()) {
1942	case Instruction::Add:
1943	case Instruction::FAdd:
1944	case Instruction::Sub:
1945	case Instruction::FSub:
1946	case Instruction::Mul:
1947	case Instruction::FMul:
1948	case Instruction::UDiv:
1949	case Instruction::SDiv:
1950	case Instruction::FDiv:
1951	case Instruction::URem:
1952	case Instruction::SRem:
1953	case Instruction::FRem:
1954	case Instruction::Shl:
1955	case Instruction::LShr:
1956	case Instruction::AShr:
1957	case Instruction::And:
1958	case Instruction::Or:
1959	case Instruction::Xor:
1960	case Instruction::ICmp:
1961	case Instruction::FCmp:
1962	case Instruction::Trunc:
1963	case Instruction::ZExt:
1964	case Instruction::SExt:
1965	case Instruction::FPToUI:
1966	case Instruction::FPToSI:
1967	case Instruction::UIToFP:
1968	case Instruction::SIToFP:
1969	case Instruction::FPTrunc:
1970	case Instruction::FPExt:
1971	case Instruction::Select:
1972	case Instruction::GetElementPtr: {
1973	SmallVector<Value*, `8`> NewOps;
1974	bool NeedsRebuild =
1975	(Mask.size() !=
1976	cast<FixedVectorType>(Val: I->getType())->getNumElements());
1977	for (int i = `0`, e = I->getNumOperands(); i != e; ++i) {
1978	Value *V;
1979	// Recursively call evaluateInDifferentElementOrder on vector arguments
1980	// as well. E.g. GetElementPtr may have scalar operands even if the
1981	// return value is a vector, so we need to examine the operand type.
1982	if (I->getOperand(i)->getType()->isVectorTy())
1983	V = evaluateInDifferentElementOrder(V: I->getOperand(i), Mask, Builder);
1984	else
1985	V = I->getOperand(i);
1986	NewOps.push_back(Elt: V);
1987	NeedsRebuild \|= (V != I->getOperand(i));
1988	}
1989	if (NeedsRebuild)
1990	return buildNew(I, NewOps, Builder);
1991	return I;
1992	}
1993	case Instruction::InsertElement: {
1994	int Element = cast<ConstantInt>(Val: I->getOperand(i: `2`))->getLimitedValue();
1995
1996	// The insertelement was inserting at Element. Figure out which element
1997	// that becomes after shuffling. The answer is guaranteed to be unique
1998	// by CanEvaluateShuffled.
1999	bool Found = false;
2000	int Index = `0`;
2001	for (int e = Mask.size(); Index != e; ++Index) {
2002	if (Mask [Index] == Element) {
2003	Found = true;
2004	break;
2005	}
2006	}
2007
2008	// If element is not in Mask, no need to handle the operand 1 (element to
2009	// be inserted). Just evaluate values in operand 0 according to Mask.
2010	if (!Found)
2011	return evaluateInDifferentElementOrder(V: I->getOperand(i: `0`), Mask, Builder);
2012
2013	Value *V = evaluateInDifferentElementOrder(V: I->getOperand(i: `0`), Mask,
2014	Builder);
2015	Builder.SetInsertPoint(I);
2016	return Builder.CreateInsertElement(Vec: V, NewElt: I->getOperand(i: `1`), Idx: Index);
2017	}
2018	}
2019	llvm_unreachable("failed to reorder elements of vector instruction!");
2020	}
2021
2022	// Returns true if the shuffle is extracting a contiguous range of values from
2023	// LHS, for example:
2024	// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2025	// Input: \|AA\|BB\|CC\|DD\|EE\|FF\|GG\|HH\|II\|JJ\|KK\|LL\|MM\|NN\|OO\|PP\|
2026	// Shuffles to: \|EE\|FF\|GG\|HH\|
2027	// +--+--+--+--+
2028	static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
2029	ArrayRef<int> Mask) {
2030	unsigned LHSElems =
2031	cast<FixedVectorType>(Val: SVI.getOperand(i_nocapture: `0`)->getType())->getNumElements();
2032	unsigned MaskElems = Mask.size();
2033	unsigned BegIdx = Mask.front();
2034	unsigned EndIdx = Mask.back();
2035	if (BegIdx > EndIdx \|\| EndIdx >= LHSElems \|\| EndIdx - BegIdx != MaskElems - `1`)
2036	return false;
2037	for (unsigned I = `0`; I != MaskElems; ++I)
2038	if (static_cast<unsigned>(Mask [I]) != BegIdx + I)
2039	return false;
2040	return true;
2041	}
2042
2043	/// These are the ingredients in an alternate form binary operator as described
2044	/// below.
2045	struct BinopElts {
2046	BinaryOperator::BinaryOps Opcode;
2047	Value *Op0;
2048	Value *Op1;
2049	BinopElts(BinaryOperator::BinaryOps Opc = (BinaryOperator::BinaryOps)`0`,
2050	Value V0 = nullptr, Value V1 = nullptr) :
2051	Opcode(Opc), Op0(V0), Op1(V1) {}
2052	operator bool() const { return Opcode != `0`; }
2053	};
2054
2055	/// Binops may be transformed into binops with different opcodes and operands.
2056	/// Reverse the usual canonicalization to enable folds with the non-canonical
2057	/// form of the binop. If a transform is possible, return the elements of the
2058	/// new binop. If not, return invalid elements.
2059	static BinopElts getAlternateBinop(BinaryOperator BO, const* DataLayout &DL) {
2060	Value BO0 = BO->getOperand(i_nocapture: `0`), BO1 = BO->getOperand(i_nocapture: `1`);
2061	Type *Ty = BO->getType();
2062	switch (BO->getOpcode()) {
2063	case Instruction::Shl: {
2064	// shl X, C --> mul X, (1 << C)
2065	Constant *C;
2066	if (match(V: BO1, P: m_Constant(C))) {
2067	Constant *ShlOne = ConstantExpr::getShl(C1: ConstantInt::get(Ty, V: `1`), C2: C);
2068	return {Instruction::Mul, BO0, ShlOne};
2069	}
2070	break;
2071	}
2072	case Instruction::Or: {
2073	// or X, C --> add X, C (when X and C have no common bits set)
2074	const APInt *C;
2075	if (match(V: BO1, P: m_APInt(Res&: C)) && MaskedValueIsZero(V: BO0, Mask: *C, DL))
2076	return {Instruction::Add, BO0, BO1};
2077	break;
2078	}
2079	case Instruction::Sub:
2080	// sub 0, X --> mul X, -1
2081	if (match(V: BO0, P: m_ZeroInt()))
2082	return {Instruction::Mul, BO1, ConstantInt::getAllOnesValue(Ty)};
2083	break;
2084	default:
2085	break;
2086	}
2087	return {};
2088	}
2089
2090	/// A select shuffle of a select shuffle with a shared operand can be reduced
2091	/// to a single select shuffle. This is an obvious improvement in IR, and the
2092	/// backend is expected to lower select shuffles efficiently.
2093	static Instruction *foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf) {
2094	assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2095
2096	Value Op0 = Shuf.getOperand(i_nocapture: `0`), Op1 = Shuf.getOperand(i_nocapture: `1`);
2097	SmallVector<int, `16`> Mask;
2098	Shuf.getShuffleMask(Result&: Mask);
2099	unsigned NumElts = Mask.size();
2100
2101	// Canonicalize a select shuffle with common operand as Op1.
2102	auto *ShufOp = dyn_cast<ShuffleVectorInst>(Val: Op0);
2103	if (ShufOp && ShufOp->isSelect() &&
2104	(ShufOp->getOperand(i_nocapture: `0`) == Op1 \|\| ShufOp->getOperand(i_nocapture: `1`) == Op1)) {
2105	std::swap(a&: Op0, b&: Op1);
2106	ShuffleVectorInst::commuteShuffleMask(Mask, InVecNumElts: NumElts);
2107	}
2108
2109	ShufOp = dyn_cast<ShuffleVectorInst>(Val: Op1);
2110	if (!ShufOp \|\| !ShufOp->isSelect() \|\|
2111	(ShufOp->getOperand(i_nocapture: `0`) != Op0 && ShufOp->getOperand(i_nocapture: `1`) != Op0))
2112	return nullptr;
2113
2114	Value X = ShufOp->getOperand(i_nocapture: `0`), Y = ShufOp->getOperand(i_nocapture: `1`);
2115	SmallVector<int, `16`> Mask1;
2116	ShufOp->getShuffleMask(Result&: Mask1);
2117	assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
2118
2119	// Canonicalize common operand (Op0) as X (first operand of first shuffle).
2120	if (Y == Op0) {
2121	std::swap(a&: X, b&: Y);
2122	ShuffleVectorInst::commuteShuffleMask(Mask: Mask1, InVecNumElts: NumElts);
2123	}
2124
2125	// If the mask chooses from X (operand 0), it stays the same.
2126	// If the mask chooses from the earlier shuffle, the other mask value is
2127	// transferred to the combined select shuffle:
2128	// shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
2129	SmallVector<int, `16`> NewMask(NumElts);
2130	for (unsigned i = `0`; i != NumElts; ++i)
2131	NewMask [i] = Mask [i] < (signed)NumElts ? Mask [i] : Mask1 [i];
2132
2133	// A select mask with undef elements might look like an identity mask.
2134	assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) \|\|
2135	ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) &&
2136	"Unexpected shuffle mask");
2137	return new ShuffleVectorInst (X, Y, NewMask);
2138	}
2139
2140	static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf,
2141	const SimplifyQuery &SQ) {
2142	assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2143
2144	// Are we shuffling together some value and that same value after it has been
2145	// modified by a binop with a constant?
2146	Value Op0 = Shuf.getOperand(i_nocapture: `0`), Op1 = Shuf.getOperand(i_nocapture: `1`);
2147	Constant *C;
2148	bool Op0IsBinop;
2149	if (match(V: Op0, P: m_BinOp(L: m_Specific(V: Op1), R: m_Constant(C))))
2150	Op0IsBinop = true;
2151	else if (match(V: Op1, P: m_BinOp(L: m_Specific(V: Op0), R: m_Constant(C))))
2152	Op0IsBinop = false;
2153	else
2154	return nullptr;
2155
2156	// The identity constant for a binop leaves a variable operand unchanged. For
2157	// a vector, this is a splat of something like 0, -1, or 1.
2158	// If there's no identity constant for this binop, we're done.
2159	auto *BO = cast<BinaryOperator>(Val: Op0IsBinop ? Op0 : Op1);
2160	BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
2161	Constant IdC = ConstantExpr::getBinOpIdentity(Opcode: BOpcode, Ty: Shuf.getType(), AllowRHSConstant: true*);
2162	if (!IdC)
2163	return nullptr;
2164
2165	Value *X = Op0IsBinop ? Op1 : Op0;
2166
2167	// Prevent folding in the case the non-binop operand might have NaN values.
2168	// If X can have NaN elements then we have that the floating point math
2169	// operation in the transformed code may not preserve the exact NaN
2170	// bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`.
2171	// This makes the transformation incorrect since the original program would
2172	// have preserved the exact NaN bit-pattern.
2173	// Avoid the folding if X can have NaN elements.
2174	if (Shuf.getType()->getElementType()->isFloatingPointTy() &&
2175	!isKnownNeverNaN(V: X, Depth: `0`, SQ))
2176	return nullptr;
2177
2178	// Shuffle identity constants into the lanes that return the original value.
2179	// Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
2180	// Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
2181	// The existing binop constant vector remains in the same operand position.
2182	ArrayRef<int> Mask = Shuf.getShuffleMask();
2183	Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(V1: C, V2: IdC, Mask) :
2184	ConstantExpr::getShuffleVector(V1: IdC, V2: C, Mask);
2185
2186	bool MightCreatePoisonOrUB =
2187	is_contained(Range&: Mask, Element: PoisonMaskElem) &&
2188	(Instruction::isIntDivRem(Opcode: BOpcode) \|\| Instruction::isShift(Opcode: BOpcode));
2189	if (MightCreatePoisonOrUB)
2190	NewC = InstCombiner::getSafeVectorConstantForBinop(Opcode: BOpcode, In: NewC, IsRHSConstant: true);
2191
2192	// shuf (bop X, C), X, M --> bop X, C'
2193	// shuf X, (bop X, C), M --> bop X, C'
2194	Instruction *NewBO = BinaryOperator::Create(Op: BOpcode, S1: X, S2: NewC);
2195	NewBO->copyIRFlags(V: BO);
2196
2197	// An undef shuffle mask element may propagate as an undef constant element in
2198	// the new binop. That would produce poison where the original code might not.
2199	// If we already made a safe constant, then there's no danger.
2200	if (is_contained(Range&: Mask, Element: PoisonMaskElem) && !MightCreatePoisonOrUB)
2201	NewBO->dropPoisonGeneratingFlags();
2202	return NewBO;
2203	}
2204
2205	/// If we have an insert of a scalar to a non-zero element of an undefined
2206	/// vector and then shuffle that value, that's the same as inserting to the zero
2207	/// element and shuffling. Splatting from the zero element is recognized as the
2208	/// canonical form of splat.
2209	static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
2210	InstCombiner::BuilderTy &Builder) {
2211	Value Op0 = Shuf.getOperand(i_nocapture: `0`), Op1 = Shuf.getOperand(i_nocapture: `1`);
2212	ArrayRef<int> Mask = Shuf.getShuffleMask();
2213	Value *X;
2214	uint64_t IndexC;
2215
2216	// Match a shuffle that is a splat to a non-zero element.
2217	if (!match(V: Op0, P: m_OneUse(SubPattern: m_InsertElt(Val: m_Undef(), Elt: m_Value(V&: X),
2218	Idx: m_ConstantInt(V&: IndexC)))) \|\|
2219	!match(V: Op1, P: m_Undef()) \|\| match(Mask, P: m_ZeroMask ()) \|\| IndexC == `0`)
2220	return nullptr;
2221
2222	// Insert into element 0 of a poison vector.
2223	PoisonValue *PoisonVec = PoisonValue::get(T: Shuf.getType());
2224	Value *NewIns = Builder.CreateInsertElement(Vec: PoisonVec, NewElt: X, Idx: (uint64_t)`0`);
2225
2226	// Splat from element 0. Any mask element that is undefined remains undefined.
2227	// For example:
2228	// shuf (inselt undef, X, 2), _, <2,2,undef>
2229	// --> shuf (inselt undef, X, 0), poison, <0,0,undef>
2230	unsigned NumMaskElts =
2231	cast<FixedVectorType>(Val: Shuf.getType())->getNumElements();
2232	SmallVector<int, `16`> NewMask(NumMaskElts, `0`);
2233	for (unsigned i = `0`; i != NumMaskElts; ++i)
2234	if (Mask [i] == PoisonMaskElem)
2235	NewMask [i] = Mask [i];
2236
2237	return new ShuffleVectorInst (NewIns, NewMask);
2238	}
2239
2240	/// Try to fold shuffles that are the equivalent of a vector select.
2241	Instruction *InstCombinerImpl::foldSelectShuffle(ShuffleVectorInst &Shuf) {
2242	if (!Shuf.isSelect())
2243	return nullptr;
2244
2245	// Canonicalize to choose from operand 0 first unless operand 1 is undefined.
2246	// Commuting undef to operand 0 conflicts with another canonicalization.
2247	unsigned NumElts = cast<FixedVectorType>(Val: Shuf.getType())->getNumElements();
2248	if (!match(V: Shuf.getOperand(i_nocapture: `1`), P: m_Undef()) &&
2249	Shuf.getMaskValue(Elt: `0`) >= (int)NumElts) {
2250	// TODO: Can we assert that both operands of a shuffle-select are not undef
2251	// (otherwise, it would have been folded by instsimplify?
2252	Shuf.commute();
2253	return &Shuf;
2254	}
2255
2256	if (Instruction *I = foldSelectShuffleOfSelectShuffle(Shuf))
2257	return I;
2258
2259	if (Instruction *I = foldSelectShuffleWith1Binop(
2260	Shuf, SQ: getSimplifyQuery().getWithInstruction(I: &Shuf)))
2261	return I;
2262
2263	BinaryOperator B0, B1;
2264	if (!match(V: Shuf.getOperand(i_nocapture: `0`), P: m_BinOp(I&: B0)) \|\|
2265	!match(V: Shuf.getOperand(i_nocapture: `1`), P: m_BinOp(I&: B1)))
2266	return nullptr;
2267
2268	// If one operand is "0 - X", allow that to be viewed as "X -1"*
2269	// (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
2270	// with a multiply, we will exit because C0/C1 will not be set.
2271	Value X, Y;
2272	Constant C0 = nullptr, C1 = nullptr;
2273	bool ConstantsAreOp1;
2274	if (match(V: B0, P: m_BinOp(L: m_Constant(C&: C0), R: m_Value(V&: X))) &&
2275	match(V: B1, P: m_BinOp(L: m_Constant(C&: C1), R: m_Value(V&: Y))))
2276	ConstantsAreOp1 = false;
2277	else if (match(V: B0, P: m_CombineOr(L: m_BinOp(L: m_Value(V&: X), R: m_Constant(C&: C0)),
2278	R: m_Neg(V: m_Value(V&: X)))) &&
2279	match(V: B1, P: m_CombineOr(L: m_BinOp(L: m_Value(V&: Y), R: m_Constant(C&: C1)),
2280	R: m_Neg(V: m_Value(V&: Y)))))
2281	ConstantsAreOp1 = true;
2282	else
2283	return nullptr;
2284
2285	// We need matching binops to fold the lanes together.
2286	BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
2287	BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
2288	bool DropNSW = false;
2289	if (ConstantsAreOp1 && Opc0 != Opc1) {
2290	// TODO: We drop "nsw" if shift is converted into multiply because it may
2291	// not be correct when the shift amount is BitWidth - 1. We could examine
2292	// each vector element to determine if it is safe to keep that flag.
2293	if (Opc0 == Instruction::Shl \|\| Opc1 == Instruction::Shl)
2294	DropNSW = true;
2295	if (BinopElts AltB0 = getAlternateBinop(BO: B0, DL)) {
2296	assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
2297	Opc0 = AltB0.Opcode;
2298	C0 = cast<Constant>(Val: AltB0.Op1);
2299	} else if (BinopElts AltB1 = getAlternateBinop(BO: B1, DL)) {
2300	assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
2301	Opc1 = AltB1.Opcode;
2302	C1 = cast<Constant>(Val: AltB1.Op1);
2303	}
2304	}
2305
2306	if (Opc0 != Opc1 \|\| !C0 \|\| !C1)
2307	return nullptr;
2308
2309	// The opcodes must be the same. Use a new name to make that clear.
2310	BinaryOperator::BinaryOps BOpc = Opc0;
2311
2312	// Select the constant elements needed for the single binop.
2313	ArrayRef<int> Mask = Shuf.getShuffleMask();
2314	Constant *NewC = ConstantExpr::getShuffleVector(V1: C0, V2: C1, Mask);
2315
2316	// We are moving a binop after a shuffle. When a shuffle has an undefined
2317	// mask element, the result is undefined, but it is not poison or undefined
2318	// behavior. That is not necessarily true for div/rem/shift.
2319	bool MightCreatePoisonOrUB =
2320	is_contained(Range&: Mask, Element: PoisonMaskElem) &&
2321	(Instruction::isIntDivRem(Opcode: BOpc) \|\| Instruction::isShift(Opcode: BOpc));
2322	if (MightCreatePoisonOrUB)
2323	NewC = InstCombiner::getSafeVectorConstantForBinop(Opcode: BOpc, In: NewC,
2324	IsRHSConstant: ConstantsAreOp1);
2325
2326	Value *V;
2327	if (X == Y) {
2328	// Remove a binop and the shuffle by rearranging the constant:
2329	// shuffle (op V, C0), (op V, C1), M --> op V, C'
2330	// shuffle (op C0, V), (op C1, V), M --> op C', V
2331	V = X;
2332	} else {
2333	// If there are 2 different variable operands, we must create a new shuffle
2334	// (select) first, so check uses to ensure that we don't end up with more
2335	// instructions than we started with.
2336	if (!B0->hasOneUse() && !B1->hasOneUse())
2337	return nullptr;
2338
2339	// If we use the original shuffle mask and op1 is variable, we would be
2340	// putting an undef into operand 1 of div/rem/shift. This is either UB or
2341	// poison. We do not have to guard against UB when constants* are op1*
2342	// because safe constants guarantee that we do not overflow sdiv/srem (and
2343	// there's no danger for other opcodes).
2344	// TODO: To allow this case, create a new shuffle mask with no undefs.
2345	if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2346	return nullptr;
2347
2348	// Note: In general, we do not create new shuffles in InstCombine because we
2349	// do not know if a target can lower an arbitrary shuffle optimally. In this
2350	// case, the shuffle uses the existing mask, so there is no additional risk.
2351
2352	// Select the variable vectors first, then perform the binop:
2353	// shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2354	// shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2355	V = Builder.CreateShuffleVector(V1: X, V2: Y, Mask);
2356	}
2357
2358	Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(Opc: BOpc, LHS: V, RHS: NewC) :
2359	Builder.CreateBinOp(Opc: BOpc, LHS: NewC, RHS: V);
2360
2361	// Flags are intersected from the 2 source binops. But there are 2 exceptions:
2362	// 1. If we changed an opcode, poison conditions might have changed.
2363	// 2. If the shuffle had undef mask elements, the new binop might have undefs
2364	// where the original code did not. But if we already made a safe constant,
2365	// then there's no danger.
2366	if (auto *NewI = dyn_cast<Instruction>(Val: NewBO)) {
2367	NewI->copyIRFlags(V: B0);
2368	NewI->andIRFlags(V: B1);
2369	if (DropNSW)
2370	NewI->setHasNoSignedWrap(false);
2371	if (is_contained(Range&: Mask, Element: PoisonMaskElem) && !MightCreatePoisonOrUB)
2372	NewI->dropPoisonGeneratingFlags();
2373	}
2374	return replaceInstUsesWith(I&: Shuf, V: NewBO);
2375	}
2376
2377	/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2378	/// Example (little endian):
2379	/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2380	static Instruction *foldTruncShuffle(ShuffleVectorInst &Shuf,
2381	bool IsBigEndian) {
2382	// This must be a bitcasted shuffle of 1 vector integer operand.
2383	Type *DestType = Shuf.getType();
2384	Value *X;
2385	if (!match(V: Shuf.getOperand(i_nocapture: `0`), P: m_BitCast(Op: m_Value(V&: X))) \|\|
2386	!match(V: Shuf.getOperand(i_nocapture: `1`), P: m_Undef()) \|\| !DestType->isIntOrIntVectorTy())
2387	return nullptr;
2388
2389	// The source type must have the same number of elements as the shuffle,
2390	// and the source element type must be larger than the shuffle element type.
2391	Type *SrcType = X->getType();
2392	if (!SrcType->isVectorTy() \|\| !SrcType->isIntOrIntVectorTy() \|\|
2393	cast<FixedVectorType>(Val: SrcType)->getNumElements() !=
2394	cast<FixedVectorType>(Val: DestType)->getNumElements() \|\|
2395	SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != `0`)
2396	return nullptr;
2397
2398	assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2399	"Expected a shuffle that decreases length");
2400
2401	// Last, check that the mask chooses the correct low bits for each narrow
2402	// element in the result.
2403	uint64_t TruncRatio =
2404	SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2405	ArrayRef<int> Mask = Shuf.getShuffleMask();
2406	for (unsigned i = `0`, e = Mask.size(); i != e; ++i) {
2407	if (Mask [i] == PoisonMaskElem)
2408	continue;
2409	uint64_t LSBIndex = IsBigEndian ? (i + `1`) * TruncRatio - `1` : i * TruncRatio;
2410	assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2411	if (Mask [i] != (int)LSBIndex)
2412	return nullptr;
2413	}
2414
2415	return new TruncInst (X, DestType);
2416	}
2417
2418	/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2419	/// narrowing (concatenating with undef and extracting back to the original
2420	/// length). This allows replacing the wide select with a narrow select.
2421	static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
2422	InstCombiner::BuilderTy &Builder) {
2423	// This must be a narrowing identity shuffle. It extracts the 1st N elements
2424	// of the 1st vector operand of a shuffle.
2425	if (!match(V: Shuf.getOperand(i_nocapture: `1`), P: m_Undef()) \|\| !Shuf.isIdentityWithExtract())
2426	return nullptr;
2427
2428	// The vector being shuffled must be a vector select that we can eliminate.
2429	// TODO: The one-use requirement could be eased if X and/or Y are constants.
2430	Value Cond, X, *Y;
2431	if (!match(V: Shuf.getOperand(i_nocapture: `0`),
2432	P: m_OneUse(SubPattern: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: X), R: m_Value(V&: Y)))))
2433	return nullptr;
2434
2435	// We need a narrow condition value. It must be extended with undef elements
2436	// and have the same number of elements as this shuffle.
2437	unsigned NarrowNumElts =
2438	cast<FixedVectorType>(Val: Shuf.getType())->getNumElements();
2439	Value *NarrowCond;
2440	if (!match(V: Cond, P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: NarrowCond), v2: m_Undef()))) \|\|
2441	cast<FixedVectorType>(Val: NarrowCond->getType())->getNumElements() !=
2442	NarrowNumElts \|\|
2443	!cast<ShuffleVectorInst>(Val: Cond)->isIdentityWithPadding())
2444	return nullptr;
2445
2446	// shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
2447	// sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
2448	Value *NarrowX = Builder.CreateShuffleVector(V: X, Mask: Shuf.getShuffleMask());
2449	Value *NarrowY = Builder.CreateShuffleVector(V: Y, Mask: Shuf.getShuffleMask());
2450	return SelectInst::Create(C: NarrowCond, S1: NarrowX, S2: NarrowY);
2451	}
2452
2453	/// Canonicalize FP negate/abs after shuffle.
2454	static Instruction *foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf,
2455	InstCombiner::BuilderTy &Builder) {
2456	auto *S0 = dyn_cast<Instruction>(Val: Shuf.getOperand(i_nocapture: `0`));
2457	Value *X;
2458	if (!S0 \|\| !match(V: S0, P: m_CombineOr(L: m_FNeg(X: m_Value(V&: X)), R: m_FAbs(Op0: m_Value(V&: X)))))
2459	return nullptr;
2460
2461	bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
2462
2463	// Match 1-input (unary) shuffle.
2464	// shuffle (fneg/fabs X), Mask --> fneg/fabs (shuffle X, Mask)
2465	if (S0->hasOneUse() && match(V: Shuf.getOperand(i_nocapture: `1`), P: m_Undef())) {
2466	Value *NewShuf = Builder.CreateShuffleVector(V: X, Mask: Shuf.getShuffleMask());
2467	if (IsFNeg)
2468	return UnaryOperator::CreateFNegFMF(Op: NewShuf, FMFSource: S0);
2469
2470	Function *FAbs = Intrinsic::getDeclaration(M: Shuf.getModule(),
2471	Intrinsic::id: fabs, Tys: Shuf.getType());
2472	CallInst *NewF = CallInst::Create(Func: FAbs, Args: {NewShuf});
2473	NewF->setFastMathFlags(S0->getFastMathFlags());
2474	return NewF;
2475	}
2476
2477	// Match 2-input (binary) shuffle.
2478	auto *S1 = dyn_cast<Instruction>(Val: Shuf.getOperand(i_nocapture: `1`));
2479	Value *Y;
2480	if (!S1 \|\| !match(V: S1, P: m_CombineOr(L: m_FNeg(X: m_Value(V&: Y)), R: m_FAbs(Op0: m_Value(V&: Y)))) \|\|
2481	S0->getOpcode() != S1->getOpcode() \|\|
2482	(!S0->hasOneUse() && !S1->hasOneUse()))
2483	return nullptr;
2484
2485	// shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
2486	Value *NewShuf = Builder.CreateShuffleVector(V1: X, V2: Y, Mask: Shuf.getShuffleMask());
2487	Instruction *NewF;
2488	if (IsFNeg) {
2489	NewF = UnaryOperator::CreateFNeg(V: NewShuf);
2490	} else {
2491	Function *FAbs = Intrinsic::getDeclaration(M: Shuf.getModule(),
2492	Intrinsic::id: fabs, Tys: Shuf.getType());
2493	NewF = CallInst::Create(Func: FAbs, Args: {NewShuf});
2494	}
2495	NewF->copyIRFlags(V: S0);
2496	NewF->andIRFlags(V: S1);
2497	return NewF;
2498	}
2499
2500	/// Canonicalize casts after shuffle.
2501	static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf,
2502	InstCombiner::BuilderTy &Builder) {
2503	// Do we have 2 matching cast operands?
2504	auto *Cast0 = dyn_cast<CastInst>(Val: Shuf.getOperand(i_nocapture: `0`));
2505	auto *Cast1 = dyn_cast<CastInst>(Val: Shuf.getOperand(i_nocapture: `1`));
2506	if (!Cast0 \|\| !Cast1 \|\| Cast0->getOpcode() != Cast1->getOpcode() \|\|
2507	Cast0->getSrcTy() != Cast1->getSrcTy())
2508	return nullptr;
2509
2510	// TODO: Allow other opcodes? That would require easing the type restrictions
2511	// below here.
2512	CastInst::CastOps CastOpcode = Cast0->getOpcode();
2513	switch (CastOpcode) {
2514	case Instruction::FPToSI:
2515	case Instruction::FPToUI:
2516	case Instruction::SIToFP:
2517	case Instruction::UIToFP:
2518	break;
2519	default:
2520	return nullptr;
2521	}
2522
2523	VectorType *ShufTy = Shuf.getType();
2524	VectorType *ShufOpTy = cast<VectorType>(Val: Shuf.getOperand(i_nocapture: `0`)->getType());
2525	VectorType *CastSrcTy = cast<VectorType>(Val: Cast0->getSrcTy());
2526
2527	// TODO: Allow length-increasing shuffles?
2528	if (ShufTy->getElementCount().getKnownMinValue() >
2529	ShufOpTy->getElementCount().getKnownMinValue())
2530	return nullptr;
2531
2532	// TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
2533	assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2534	"Expected fixed vector operands for casts and binary shuffle");
2535	if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
2536	return nullptr;
2537
2538	// At least one of the operands must have only one use (the shuffle).
2539	if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2540	return nullptr;
2541
2542	// shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
2543	Value *X = Cast0->getOperand(i_nocapture: `0`);
2544	Value *Y = Cast1->getOperand(i_nocapture: `0`);
2545	Value *NewShuf = Builder.CreateShuffleVector(V1: X, V2: Y, Mask: Shuf.getShuffleMask());
2546	return CastInst::Create(CastOpcode, S: NewShuf, Ty: ShufTy);
2547	}
2548
2549	/// Try to fold an extract subvector operation.
2550	static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
2551	Value Op0 = Shuf.getOperand(i_nocapture: `0`), Op1 = Shuf.getOperand(i_nocapture: `1`);
2552	if (!Shuf.isIdentityWithExtract() \|\| !match(V: Op1, P: m_Undef()))
2553	return nullptr;
2554
2555	// Check if we are extracting all bits of an inserted scalar:
2556	// extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2557	Value *X;
2558	if (match(V: Op0, P: m_BitCast(Op: m_InsertElt(Val: m_Value(), Elt: m_Value(V&: X), Idx: m_Zero()))) &&
2559	X->getType()->getPrimitiveSizeInBits() ==
2560	Shuf.getType()->getPrimitiveSizeInBits())
2561	return new BitCastInst (X, Shuf.getType());
2562
2563	// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2564	Value *Y;
2565	ArrayRef<int> Mask;
2566	if (!match(V: Op0, P: m_Shuffle(v1: m_Value(V&: X), v2: m_Value(V&: Y), mask: m_Mask (Mask))))
2567	return nullptr;
2568
2569	// Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2570	// then combining may result in worse codegen.
2571	if (!Op0->hasOneUse())
2572	return nullptr;
2573
2574	// We are extracting a subvector from a shuffle. Remove excess elements from
2575	// the 1st shuffle mask to eliminate the extract.
2576	//
2577	// This transform is conservatively limited to identity extracts because we do
2578	// not allow arbitrary shuffle mask creation as a target-independent transform
2579	// (because we can't guarantee that will lower efficiently).
2580	//
2581	// If the extracting shuffle has an undef mask element, it transfers to the
2582	// new shuffle mask. Otherwise, copy the original mask element. Example:
2583	// shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
2584	// shuf X, Y, <C0, undef, C2, undef>
2585	unsigned NumElts = cast<FixedVectorType>(Val: Shuf.getType())->getNumElements();
2586	SmallVector<int, `16`> NewMask(NumElts);
2587	assert(NumElts < Mask.size() &&
2588	"Identity with extract must have less elements than its inputs");
2589
2590	for (unsigned i = `0`; i != NumElts; ++i) {
2591	int ExtractMaskElt = Shuf.getMaskValue(Elt: i);
2592	int MaskElt = Mask [i];
2593	NewMask [i] = ExtractMaskElt == PoisonMaskElem ? ExtractMaskElt : MaskElt;
2594	}
2595	return new ShuffleVectorInst (X, Y, NewMask);
2596	}
2597
2598	/// Try to replace a shuffle with an insertelement or try to replace a shuffle
2599	/// operand with the operand of an insertelement.
2600	static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
2601	InstCombinerImpl &IC) {
2602	Value V0 = Shuf.getOperand(i_nocapture: `0`), V1 = Shuf.getOperand(i_nocapture: `1`);
2603	SmallVector<int, `16`> Mask;
2604	Shuf.getShuffleMask(Result&: Mask);
2605
2606	int NumElts = Mask.size();
2607	int InpNumElts = cast<FixedVectorType>(Val: V0->getType())->getNumElements();
2608
2609	// This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2610	// not be able to handle it there if the insertelement has >1 use.
2611	// If the shuffle has an insertelement operand but does not choose the
2612	// inserted scalar element from that value, then we can replace that shuffle
2613	// operand with the source vector of the insertelement.
2614	Value *X;
2615	uint64_t IdxC;
2616	if (match(V: V0, P: m_InsertElt(Val: m_Value(V&: X), Elt: m_Value(), Idx: m_ConstantInt(V&: IdxC)))) {
2617	// shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2618	if (!is_contained(Range&: Mask, Element: (int)IdxC))
2619	return IC.replaceOperand(I&: Shuf, OpNum: `0`, V: X);
2620	}
2621	if (match(V: V1, P: m_InsertElt(Val: m_Value(V&: X), Elt: m_Value(), Idx: m_ConstantInt(V&: IdxC)))) {
2622	// Offset the index constant by the vector width because we are checking for
2623	// accesses to the 2nd vector input of the shuffle.
2624	IdxC += InpNumElts;
2625	// shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2626	if (!is_contained(Range&: Mask, Element: (int)IdxC))
2627	return IC.replaceOperand(I&: Shuf, OpNum: `1`, V: X);
2628	}
2629	// For the rest of the transform, the shuffle must not change vector sizes.
2630	// TODO: This restriction could be removed if the insert has only one use
2631	// (because the transform would require a new length-changing shuffle).
2632	if (NumElts != InpNumElts)
2633	return nullptr;
2634
2635	// shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2636	auto isShufflingScalarIntoOp1 = [&](Value &Scalar, ConstantInt &IndexC) {
2637	// We need an insertelement with a constant index.
2638	if (!match(V: V0, P: m_InsertElt(Val: m_Value(), Elt: m_Value(V&: Scalar),
2639	Idx: m_ConstantInt(CI&: IndexC))))
2640	return false;
2641
2642	// Test the shuffle mask to see if it splices the inserted scalar into the
2643	// operand 1 vector of the shuffle.
2644	int NewInsIndex = -`1`;
2645	for (int i = `0`; i != NumElts; ++i) {
2646	// Ignore undef mask elements.
2647	if (Mask [i] == -`1`)
2648	continue;
2649
2650	// The shuffle takes elements of operand 1 without lane changes.
2651	if (Mask [i] == NumElts + i)
2652	continue;
2653
2654	// The shuffle must choose the inserted scalar exactly once.
2655	if (NewInsIndex != -`1` \|\| Mask [i] != IndexC->getSExtValue())
2656	return false;
2657
2658	// The shuffle is placing the inserted scalar into element i.
2659	NewInsIndex = i;
2660	}
2661
2662	assert(NewInsIndex != -`1` && "Did not fold shuffle with unused operand?");
2663
2664	// Index is updated to the potentially translated insertion lane.
2665	IndexC = ConstantInt::get(Ty: IndexC->getIntegerType(), V: NewInsIndex);
2666	return true;
2667	};
2668
2669	// If the shuffle is unnecessary, insert the scalar operand directly into
2670	// operand 1 of the shuffle. Example:
2671	// shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2672	Value *Scalar;
2673	ConstantInt *IndexC;
2674	if (isShufflingScalarIntoOp1 (Scalar, IndexC))
2675	return InsertElementInst::Create(Vec: V1, NewElt: Scalar, Idx: IndexC);
2676
2677	// Try again after commuting shuffle. Example:
2678	// shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2679	// shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2680	std::swap(a&: V0, b&: V1);
2681	ShuffleVectorInst::commuteShuffleMask(Mask, InVecNumElts: NumElts);
2682	if (isShufflingScalarIntoOp1 (Scalar, IndexC))
2683	return InsertElementInst::Create(Vec: V1, NewElt: Scalar, Idx: IndexC);
2684
2685	return nullptr;
2686	}
2687
2688	static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
2689	// Match the operands as identity with padding (also known as concatenation
2690	// with undef) shuffles of the same source type. The backend is expected to
2691	// recreate these concatenations from a shuffle of narrow operands.
2692	auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Val: Shuf.getOperand(i_nocapture: `0`));
2693	auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Val: Shuf.getOperand(i_nocapture: `1`));
2694	if (!Shuffle0 \|\| !Shuffle0->isIdentityWithPadding() \|\|
2695	!Shuffle1 \|\| !Shuffle1->isIdentityWithPadding())
2696	return nullptr;
2697
2698	// We limit this transform to power-of-2 types because we expect that the
2699	// backend can convert the simplified IR patterns to identical nodes as the
2700	// original IR.
2701	// TODO: If we can verify the same behavior for arbitrary types, the
2702	// power-of-2 checks can be removed.
2703	Value *X = Shuffle0->getOperand(i_nocapture: `0`);
2704	Value *Y = Shuffle1->getOperand(i_nocapture: `0`);
2705	if (X->getType() != Y->getType() \|\|
2706	!isPowerOf2_32(Value: cast<FixedVectorType>(Val: Shuf.getType())->getNumElements()) \|\|
2707	!isPowerOf2_32(
2708	Value: cast<FixedVectorType>(Val: Shuffle0->getType())->getNumElements()) \|\|
2709	!isPowerOf2_32(Value: cast<FixedVectorType>(Val: X->getType())->getNumElements()) \|\|
2710	match(V: X, P: m_Undef()) \|\| match(V: Y, P: m_Undef()))
2711	return nullptr;
2712	assert(match(Shuffle0->getOperand(`1`), m_Undef()) &&
2713	match(Shuffle1->getOperand(`1`), m_Undef()) &&
2714	"Unexpected operand for identity shuffle");
2715
2716	// This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2717	// operands directly by adjusting the shuffle mask to account for the narrower
2718	// types:
2719	// shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2720	int NarrowElts = cast<FixedVectorType>(Val: X->getType())->getNumElements();
2721	int WideElts = cast<FixedVectorType>(Val: Shuffle0->getType())->getNumElements();
2722	assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2723
2724	ArrayRef<int> Mask = Shuf.getShuffleMask();
2725	SmallVector<int, `16`> NewMask(Mask.size(), -`1`);
2726	for (int i = `0`, e = Mask.size(); i != e; ++i) {
2727	if (Mask [i] == -`1`)
2728	continue;
2729
2730	// If this shuffle is choosing an undef element from 1 of the sources, that
2731	// element is undef.
2732	if (Mask [i] < WideElts) {
2733	if (Shuffle0->getMaskValue(Elt: Mask [i]) == -`1`)
2734	continue;
2735	} else {
2736	if (Shuffle1->getMaskValue(Elt: Mask [i] - WideElts) == -`1`)
2737	continue;
2738	}
2739
2740	// If this shuffle is choosing from the 1st narrow op, the mask element is
2741	// the same. If this shuffle is choosing from the 2nd narrow op, the mask
2742	// element is offset down to adjust for the narrow vector widths.
2743	if (Mask [i] < WideElts) {
2744	assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2745	NewMask [i] = Mask [i];
2746	} else {
2747	assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2748	NewMask [i] = Mask [i] - (WideElts - NarrowElts);
2749	}
2750	}
2751	return new ShuffleVectorInst (X, Y, NewMask);
2752	}
2753
2754	// Splatting the first element of the result of a BinOp, where any of the
2755	// BinOp's operands are the result of a first element splat can be simplified to
2756	// splatting the first element of the result of the BinOp
2757	Instruction *InstCombinerImpl::simplifyBinOpSplats(ShuffleVectorInst &SVI) {
2758	if (!match(V: SVI.getOperand(i_nocapture: `1`), P: m_Undef()) \|\|
2759	!match(Mask: SVI.getShuffleMask(), P: m_ZeroMask ()) \|\|
2760	!SVI.getOperand(i_nocapture: `0`)->hasOneUse())
2761	return nullptr;
2762
2763	Value *Op0 = SVI.getOperand(i_nocapture: `0`);
2764	Value X, Y;
2765	if (!match(V: Op0, P: m_BinOp(L: m_Shuffle(v1: m_Value(V&: X), v2: m_Undef(), mask: m_ZeroMask ()),
2766	R: m_Value(V&: Y))) &&
2767	!match(V: Op0, P: m_BinOp(L: m_Value(V&: X),
2768	R: m_Shuffle(v1: m_Value(V&: Y), v2: m_Undef(), mask: m_ZeroMask ()))))
2769	return nullptr;
2770	if (X->getType() != Y->getType())
2771	return nullptr;
2772
2773	auto *BinOp = cast<BinaryOperator>(Val: Op0);
2774	if (!isSafeToSpeculativelyExecute(I: BinOp))
2775	return nullptr;
2776
2777	Value *NewBO = Builder.CreateBinOp(Opc: BinOp->getOpcode(), LHS: X, RHS: Y);
2778	if (auto NewBOI = dyn_cast<Instruction>(Val: NewBO))
2779	NewBOI->copyIRFlags(V: BinOp);
2780
2781	return new ShuffleVectorInst (NewBO, SVI.getShuffleMask());
2782	}
2783
2784	Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
2785	Value *LHS = SVI.getOperand(i_nocapture: `0`);
2786	Value *RHS = SVI.getOperand(i_nocapture: `1`);
2787	SimplifyQuery ShufQuery = SQ.getWithInstruction(I: &SVI);
2788	if (auto *V = simplifyShuffleVectorInst(Op0: LHS, Op1: RHS, Mask: SVI.getShuffleMask(),
2789	RetTy: SVI.getType(), Q: ShufQuery))
2790	return replaceInstUsesWith(I&: SVI, V);
2791
2792	if (Instruction *I = simplifyBinOpSplats(SVI))
2793	return I;
2794
2795	// Canonicalize splat shuffle to use poison RHS. Handle this explicitly in
2796	// order to support scalable vectors.
2797	if (match(Mask: SVI.getShuffleMask(), P: m_ZeroMask ()) && !isa<PoisonValue>(Val: RHS))
2798	return replaceOperand(I&: SVI, OpNum: `1`, V: PoisonValue::get(T: RHS->getType()));
2799
2800	if (isa<ScalableVectorType>(Val: LHS->getType()))
2801	return nullptr;
2802
2803	unsigned VWidth = cast<FixedVectorType>(Val: SVI.getType())->getNumElements();
2804	unsigned LHSWidth = cast<FixedVectorType>(Val: LHS->getType())->getNumElements();
2805
2806	// shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2807	//
2808	// if X and Y are of the same (vector) type, and the element size is not
2809	// changed by the bitcasts, we can distribute the bitcasts through the
2810	// shuffle, hopefully reducing the number of instructions. We make sure that
2811	// at least one bitcast only has one use, so we don't increase* the number of*
2812	// instructions here.
2813	Value X, Y;
2814	if (match(V: LHS, P: m_BitCast(Op: m_Value(V&: X))) && match(V: RHS, P: m_BitCast(Op: m_Value(V&: Y))) &&
2815	X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2816	X->getType()->getScalarSizeInBits() ==
2817	SVI.getType()->getScalarSizeInBits() &&
2818	(LHS->hasOneUse() \|\| RHS->hasOneUse())) {
2819	Value *V = Builder.CreateShuffleVector(V1: X, V2: Y, Mask: SVI.getShuffleMask(),
2820	Name: SVI.getName() + ".uncasted");
2821	return new BitCastInst (V, SVI.getType());
2822	}
2823
2824	ArrayRef<int> Mask = SVI.getShuffleMask();
2825
2826	// Peek through a bitcasted shuffle operand by scaling the mask. If the
2827	// simulated shuffle can simplify, then this shuffle is unnecessary:
2828	// shuf (bitcast X), undef, Mask --> bitcast X'
2829	// TODO: This could be extended to allow length-changing shuffles.
2830	// The transform might also be obsoleted if we allowed canonicalization
2831	// of bitcasted shuffles.
2832	if (match(V: LHS, P: m_BitCast(Op: m_Value(V&: X))) && match(V: RHS, P: m_Undef()) &&
2833	X->getType()->isVectorTy() && VWidth == LHSWidth) {
2834	// Try to create a scaled mask constant.
2835	auto *XType = cast<FixedVectorType>(Val: X->getType());
2836	unsigned XNumElts = XType->getNumElements();
2837	SmallVector<int, `16`> ScaledMask;
2838	if (XNumElts >= VWidth) {
2839	assert(XNumElts % VWidth == `0` && "Unexpected vector bitcast");
2840	narrowShuffleMaskElts(Scale: XNumElts / VWidth, Mask, ScaledMask);
2841	} else {
2842	assert(VWidth % XNumElts == `0` && "Unexpected vector bitcast");
2843	if (!widenShuffleMaskElts(Scale: VWidth / XNumElts, Mask, ScaledMask))
2844	ScaledMask.clear();
2845	}
2846	if (!ScaledMask.empty()) {
2847	// If the shuffled source vector simplifies, cast that value to this
2848	// shuffle's type.
2849	if (auto *V = simplifyShuffleVectorInst(Op0: X, Op1: UndefValue::get(T: XType),
2850	Mask: ScaledMask, RetTy: XType, Q: ShufQuery))
2851	return BitCastInst::Create(Instruction::BitCast, S: V, Ty: SVI.getType());
2852	}
2853	}
2854
2855	// shuffle x, x, mask --> shuffle x, undef, mask'
2856	if (LHS == RHS) {
2857	assert(!match(RHS, m_Undef()) &&
2858	"Shuffle with 2 undef ops not simplified?");
2859	return new ShuffleVectorInst (LHS, createUnaryMask(Mask, NumElts: LHSWidth));
2860	}
2861
2862	// shuffle undef, x, mask --> shuffle x, undef, mask'
2863	if (match(V: LHS, P: m_Undef())) {
2864	SVI.commute();
2865	return &SVI;
2866	}
2867
2868	if (Instruction *I = canonicalizeInsertSplat(Shuf&: SVI, Builder))
2869	return I;
2870
2871	if (Instruction *I = foldSelectShuffle(Shuf&: SVI))
2872	return I;
2873
2874	if (Instruction *I = foldTruncShuffle(Shuf&: SVI, IsBigEndian: DL.isBigEndian()))
2875	return I;
2876
2877	if (Instruction *I = narrowVectorSelect(Shuf&: SVI, Builder))
2878	return I;
2879
2880	if (Instruction *I = foldShuffleOfUnaryOps(Shuf&: SVI, Builder))
2881	return I;
2882
2883	if (Instruction *I = foldCastShuffle(Shuf&: SVI, Builder))
2884	return I;
2885
2886	APInt PoisonElts(VWidth, `0`);
2887	APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
2888	if (Value *V = SimplifyDemandedVectorElts(V: &SVI, DemandedElts: AllOnesEltMask, PoisonElts)) {
2889	if (V != &SVI)
2890	return replaceInstUsesWith(I&: SVI, V);
2891	return &SVI;
2892	}
2893
2894	if (Instruction *I = foldIdentityExtractShuffle(Shuf&: SVI))
2895	return I;
2896
2897	// These transforms have the potential to lose undef knowledge, so they are
2898	// intentionally placed after SimplifyDemandedVectorElts().
2899	if (Instruction I = foldShuffleWithInsert(Shuf&: SVI, IC&: this))
2900	return I;
2901	if (Instruction *I = foldIdentityPaddedShuffles(Shuf&: SVI))
2902	return I;
2903
2904	if (match(V: RHS, P: m_Undef()) && canEvaluateShuffled(V: LHS, Mask)) {
2905	Value *V = evaluateInDifferentElementOrder(V: LHS, Mask, Builder);
2906	return replaceInstUsesWith(I&: SVI, V);
2907	}
2908
2909	// SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
2910	// a non-vector type. We can instead bitcast the original vector followed by
2911	// an extract of the desired element:
2912	//
2913	// %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
2914	// <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2915	// %1 = bitcast <4 x i8> %sroa to i32
2916	// Becomes:
2917	// %bc = bitcast <16 x i8> %in to <4 x i32>
2918	// %ext = extractelement <4 x i32> %bc, i32 0
2919	//
2920	// If the shuffle is extracting a contiguous range of values from the input
2921	// vector then each use which is a bitcast of the extracted size can be
2922	// replaced. This will work if the vector types are compatible, and the begin
2923	// index is aligned to a value in the casted vector type. If the begin index
2924	// isn't aligned then we can shuffle the original vector (keeping the same
2925	// vector type) before extracting.
2926	//
2927	// This code will bail out if the target type is fundamentally incompatible
2928	// with vectors of the source type.
2929	//
2930	// Example of <16 x i8>, target type i32:
2931	// Index range [4,8): v-----------v Will work.
2932	// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2933	// <16 x i8>: \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \|
2934	// <4 x i32>: \| \| \| \| \|
2935	// +-----------+-----------+-----------+-----------+
2936	// Index range [6,10): ^-----------^ Needs an extra shuffle.
2937	// Target type i40: ^--------------^ Won't work, bail.
2938	bool MadeChange = false;
2939	if (isShuffleExtractingFromLHS(SVI, Mask)) {
2940	Value *V = LHS;
2941	unsigned MaskElems = Mask.size();
2942	auto *SrcTy = cast<FixedVectorType>(Val: V->getType());
2943	unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedValue();
2944	unsigned SrcElemBitWidth = DL.getTypeSizeInBits(Ty: SrcTy->getElementType());
2945	assert(SrcElemBitWidth && "vector elements must have a bitwidth");
2946	unsigned SrcNumElems = SrcTy->getNumElements();
2947	SmallVector<BitCastInst *, `8`> BCs;
2948	DenseMap<Type , Value > NewBCs;
2949	for (User *U : SVI.users())
2950	if (BitCastInst *BC = dyn_cast<BitCastInst>(Val: U))
2951	if (!BC->use_empty())
2952	// Only visit bitcasts that weren't previously handled.
2953	BCs.push_back(Elt: BC);
2954	for (BitCastInst *BC : BCs) {
2955	unsigned BegIdx = Mask.front();
2956	Type *TgtTy = BC->getDestTy();
2957	unsigned TgtElemBitWidth = DL.getTypeSizeInBits(Ty: TgtTy);
2958	if (!TgtElemBitWidth)
2959	continue;
2960	unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
2961	bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
2962	bool BegIsAligned = `0` == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
2963	if (!VecBitWidthsEqual)
2964	continue;
2965	if (!VectorType::isValidElementType(ElemTy: TgtTy))
2966	continue;
2967	auto *CastSrcTy = FixedVectorType::get(ElementType: TgtTy, NumElts: TgtNumElems);
2968	if (!BegIsAligned) {
2969	// Shuffle the input so [0,NumElements) contains the output, and
2970	// [NumElems,SrcNumElems) is undef.
2971	SmallVector<int, `16`> ShuffleMask(SrcNumElems, -`1`);
2972	for (unsigned I = `0`, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
2973	ShuffleMask [I] = Idx;
2974	V = Builder.CreateShuffleVector(V, Mask: ShuffleMask,
2975	Name: SVI.getName() + ".extract");
2976	BegIdx = `0`;
2977	}
2978	unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
2979	assert(SrcElemsPerTgtElem);
2980	BegIdx /= SrcElemsPerTgtElem;
2981	bool BCAlreadyExists = NewBCs.contains(Val: CastSrcTy);
2982	auto *NewBC =
2983	BCAlreadyExists
2984	? NewBCs [CastSrcTy]
2985	: Builder.CreateBitCast(V, DestTy: CastSrcTy, Name: SVI.getName() + ".bc");
2986	if (!BCAlreadyExists)
2987	NewBCs [CastSrcTy] = NewBC;
2988	auto *Ext = Builder.CreateExtractElement(Vec: NewBC, Idx: BegIdx,
2989	Name: SVI.getName() + ".extract");
2990	// The shufflevector isn't being replaced: the bitcast that used it
2991	// is. InstCombine will visit the newly-created instructions.
2992	replaceInstUsesWith(I&: *BC, V: Ext);
2993	MadeChange = true;
2994	}
2995	}
2996
2997	// If the LHS is a shufflevector itself, see if we can combine it with this
2998	// one without producing an unusual shuffle.
2999	// Cases that might be simplified:
3000	// 1.
3001	// x1=shuffle(v1,v2,mask1)
3002	// x=shuffle(x1,undef,mask)
3003	// ==>
3004	// x=shuffle(v1,undef,newMask)
3005	// newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
3006	// 2.
3007	// x1=shuffle(v1,undef,mask1)
3008	// x=shuffle(x1,x2,mask)
3009	// where v1.size() == mask1.size()
3010	// ==>
3011	// x=shuffle(v1,x2,newMask)
3012	// newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
3013	// 3.
3014	// x2=shuffle(v2,undef,mask2)
3015	// x=shuffle(x1,x2,mask)
3016	// where v2.size() == mask2.size()
3017	// ==>
3018	// x=shuffle(x1,v2,newMask)
3019	// newMask[i] = (mask[i] < x1.size())
3020	// ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
3021	// 4.
3022	// x1=shuffle(v1,undef,mask1)
3023	// x2=shuffle(v2,undef,mask2)
3024	// x=shuffle(x1,x2,mask)
3025	// where v1.size() == v2.size()
3026	// ==>
3027	// x=shuffle(v1,v2,newMask)
3028	// newMask[i] = (mask[i] < x1.size())
3029	// ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
3030	//
3031	// Here we are really conservative:
3032	// we are absolutely afraid of producing a shuffle mask not in the input
3033	// program, because the code gen may not be smart enough to turn a merged
3034	// shuffle into two specific shuffles: it may produce worse code. As such,
3035	// we only merge two shuffles if the result is either a splat or one of the
3036	// input shuffle masks. In this case, merging the shuffles just removes
3037	// one instruction, which we know is safe. This is good for things like
3038	// turning: (splat(splat)) -> splat, or
3039	// merge(V[0..n], V[n+1..2n]) -> V[0..2n]
3040	ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(Val: LHS);
3041	ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(Val: RHS);
3042	if (LHSShuffle)
3043	if (!match(V: LHSShuffle->getOperand(i_nocapture: `1`), P: m_Poison()) &&
3044	!match(V: RHS, P: m_Poison()))
3045	LHSShuffle = nullptr;
3046	if (RHSShuffle)
3047	if (!match(V: RHSShuffle->getOperand(i_nocapture: `1`), P: m_Poison()))
3048	RHSShuffle = nullptr;
3049	if (!LHSShuffle && !RHSShuffle)
3050	return MadeChange ? &SVI : nullptr;
3051
3052	Value* LHSOp0 = nullptr;
3053	Value* LHSOp1 = nullptr;
3054	Value* RHSOp0 = nullptr;
3055	unsigned LHSOp0Width = `0`;
3056	unsigned RHSOp0Width = `0`;
3057	if (LHSShuffle) {
3058	LHSOp0 = LHSShuffle->getOperand(i_nocapture: `0`);
3059	LHSOp1 = LHSShuffle->getOperand(i_nocapture: `1`);
3060	LHSOp0Width = cast<FixedVectorType>(Val: LHSOp0->getType())->getNumElements();
3061	}
3062	if (RHSShuffle) {
3063	RHSOp0 = RHSShuffle->getOperand(i_nocapture: `0`);
3064	RHSOp0Width = cast<FixedVectorType>(Val: RHSOp0->getType())->getNumElements();
3065	}
3066	Value* newLHS = LHS;
3067	Value* newRHS = RHS;
3068	if (LHSShuffle) {
3069	// case 1
3070	if (match(V: RHS, P: m_Poison())) {
3071	newLHS = LHSOp0;
3072	newRHS = LHSOp1;
3073	}
3074	// case 2 or 4
3075	else if (LHSOp0Width == LHSWidth) {
3076	newLHS = LHSOp0;
3077	}
3078	}
3079	// case 3 or 4
3080	if (RHSShuffle && RHSOp0Width == LHSWidth) {
3081	newRHS = RHSOp0;
3082	}
3083	// case 4
3084	if (LHSOp0 == RHSOp0) {
3085	newLHS = LHSOp0;
3086	newRHS = nullptr;
3087	}
3088
3089	if (newLHS == LHS && newRHS == RHS)
3090	return MadeChange ? &SVI : nullptr;
3091
3092	ArrayRef<int> LHSMask;
3093	ArrayRef<int> RHSMask;
3094	if (newLHS != LHS)
3095	LHSMask = LHSShuffle->getShuffleMask();
3096	if (RHSShuffle && newRHS != RHS)
3097	RHSMask = RHSShuffle->getShuffleMask();
3098
3099	unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
3100	SmallVector<int, `16`> newMask;
3101	bool isSplat = true;
3102	int SplatElt = -`1`;
3103	// Create a new mask for the new ShuffleVectorInst so that the new
3104	// ShuffleVectorInst is equivalent to the original one.
3105	for (unsigned i = `0`; i < VWidth; ++i) {
3106	int eltMask;
3107	if (Mask [i] < `0`) {
3108	// This element is a poison value.
3109	eltMask = -`1`;
3110	} else if (Mask [i] < (int)LHSWidth) {
3111	// This element is from left hand side vector operand.
3112	//
3113	// If LHS is going to be replaced (case 1, 2, or 4), calculate the
3114	// new mask value for the element.
3115	if (newLHS != LHS) {
3116	eltMask = LHSMask [Mask [i]];
3117	// If the value selected is an poison value, explicitly specify it
3118	// with a -1 mask value.
3119	if (eltMask >= (int)LHSOp0Width && isa<PoisonValue>(Val: LHSOp1))
3120	eltMask = -`1`;
3121	} else
3122	eltMask = Mask [i];
3123	} else {
3124	// This element is from right hand side vector operand
3125	//
3126	// If the value selected is a poison value, explicitly specify it
3127	// with a -1 mask value. (case 1)
3128	if (match(V: RHS, P: m_Poison()))
3129	eltMask = -`1`;
3130	// If RHS is going to be replaced (case 3 or 4), calculate the
3131	// new mask value for the element.
3132	else if (newRHS != RHS) {
3133	eltMask = RHSMask [Mask [i]-LHSWidth];
3134	// If the value selected is an poison value, explicitly specify it
3135	// with a -1 mask value.
3136	if (eltMask >= (int)RHSOp0Width) {
3137	assert(match(RHSShuffle->getOperand(`1`), m_Poison()) &&
3138	"should have been check above");
3139	eltMask = -`1`;
3140	}
3141	} else
3142	eltMask = Mask [i]-LHSWidth;
3143
3144	// If LHS's width is changed, shift the mask value accordingly.
3145	// If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
3146	// references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
3147	// If newRHS == newLHS, we want to remap any references from newRHS to
3148	// newLHS so that we can properly identify splats that may occur due to
3149	// obfuscation across the two vectors.
3150	if (eltMask >= `0` && newRHS != nullptr && newLHS != newRHS)
3151	eltMask += newLHSWidth;
3152	}
3153
3154	// Check if this could still be a splat.
3155	if (eltMask >= `0`) {
3156	if (SplatElt >= `0` && SplatElt != eltMask)
3157	isSplat = false;
3158	SplatElt = eltMask;
3159	}
3160
3161	newMask.push_back(Elt: eltMask);
3162	}
3163
3164	// If the result mask is equal to one of the original shuffle masks,
3165	// or is a splat, do the replacement.
3166	if (isSplat \|\| newMask == LHSMask \|\| newMask == RHSMask \|\| newMask == Mask) {
3167	if (!newRHS)
3168	newRHS = PoisonValue::get(T: newLHS->getType());
3169	return new ShuffleVectorInst (newLHS, newRHS, newMask);
3170	}
3171
3172	return MadeChange ? &SVI : nullptr;
3173	}
3174

source code of llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp