InstCombineCalls.cpp source code [llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp]

1	//===- InstCombineCalls.cpp -----------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the visitCall, visitInvoke, and visitCallBr functions.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "InstCombineInternal.h"
14	#include "llvm/ADT/APFloat.h"
15	#include "llvm/ADT/APInt.h"
16	#include "llvm/ADT/APSInt.h"
17	#include "llvm/ADT/ArrayRef.h"
18	#include "llvm/ADT/STLFunctionalExtras.h"
19	#include "llvm/ADT/SmallBitVector.h"
20	#include "llvm/ADT/SmallVector.h"
21	#include "llvm/ADT/Statistic.h"
22	#include "llvm/Analysis/AliasAnalysis.h"
23	#include "llvm/Analysis/AssumeBundleQueries.h"
24	#include "llvm/Analysis/AssumptionCache.h"
25	#include "llvm/Analysis/InstructionSimplify.h"
26	#include "llvm/Analysis/Loads.h"
27	#include "llvm/Analysis/MemoryBuiltins.h"
28	#include "llvm/Analysis/ValueTracking.h"
29	#include "llvm/Analysis/VectorUtils.h"
30	#include "llvm/IR/AttributeMask.h"
31	#include "llvm/IR/Attributes.h"
32	#include "llvm/IR/BasicBlock.h"
33	#include "llvm/IR/Constant.h"
34	#include "llvm/IR/Constants.h"
35	#include "llvm/IR/DataLayout.h"
36	#include "llvm/IR/DebugInfo.h"
37	#include "llvm/IR/DerivedTypes.h"
38	#include "llvm/IR/Function.h"
39	#include "llvm/IR/GlobalVariable.h"
40	#include "llvm/IR/InlineAsm.h"
41	#include "llvm/IR/InstrTypes.h"
42	#include "llvm/IR/Instruction.h"
43	#include "llvm/IR/Instructions.h"
44	#include "llvm/IR/IntrinsicInst.h"
45	#include "llvm/IR/Intrinsics.h"
46	#include "llvm/IR/IntrinsicsAArch64.h"
47	#include "llvm/IR/IntrinsicsAMDGPU.h"
48	#include "llvm/IR/IntrinsicsARM.h"
49	#include "llvm/IR/IntrinsicsHexagon.h"
50	#include "llvm/IR/LLVMContext.h"
51	#include "llvm/IR/Metadata.h"
52	#include "llvm/IR/PatternMatch.h"
53	#include "llvm/IR/Statepoint.h"
54	#include "llvm/IR/Type.h"
55	#include "llvm/IR/User.h"
56	#include "llvm/IR/Value.h"
57	#include "llvm/IR/ValueHandle.h"
58	#include "llvm/Support/AtomicOrdering.h"
59	#include "llvm/Support/Casting.h"
60	#include "llvm/Support/CommandLine.h"
61	#include "llvm/Support/Compiler.h"
62	#include "llvm/Support/Debug.h"
63	#include "llvm/Support/ErrorHandling.h"
64	#include "llvm/Support/KnownBits.h"
65	#include "llvm/Support/MathExtras.h"
66	#include "llvm/Support/raw_ostream.h"
67	#include "llvm/Transforms/InstCombine/InstCombiner.h"
68	#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
69	#include "llvm/Transforms/Utils/Local.h"
70	#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
71	#include <algorithm>
72	#include <cassert>
73	#include <cstdint>
74	#include <optional>
75	#include <utility>
76	#include <vector>
77
78	#define DEBUG_TYPE "instcombine"
79	#include "llvm/Transforms/Utils/InstructionWorklist.h"
80
81	using namespace llvm;
82	using namespace PatternMatch;
83
84	STATISTIC(NumSimplified, "Number of library calls simplified");
85
86	static cl::opt<unsigned> GuardWideningWindow(
87	"instcombine-guard-widening-window",
88	cl::init(Val: `3`),
89	cl::desc ("How wide an instruction window to bypass looking for "
90	"another guard"));
91
92	/// Return the specified type promoted as it would be to pass though a va_arg
93	/// area.
94	static Type getPromotedType(Type Ty) {
95	if (IntegerType* ITy = dyn_cast<IntegerType>(Val: Ty)) {
96	if (ITy->getBitWidth() < `32`)
97	return Type::getInt32Ty(C&: Ty->getContext());
98	}
99	return Ty;
100	}
101
102	/// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
103	/// TODO: This should probably be integrated with visitAllocSites, but that
104	/// requires a deeper change to allow either unread or unwritten objects.
105	static bool hasUndefSource(AnyMemTransferInst *MI) {
106	auto *Src = MI->getRawSource();
107	while (isa<GetElementPtrInst>(Val: Src) \|\| isa<BitCastInst>(Val: Src)) {
108	if (!Src->hasOneUse())
109	return false;
110	Src = cast<Instruction>(Val: Src)->getOperand(i: `0`);
111	}
112	return isa<AllocaInst>(Val: Src) && Src->hasOneUse();
113	}
114
115	Instruction InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst MI) {
116	Align DstAlign = getKnownAlignment(V: MI->getRawDest(), DL, CxtI: MI, AC: &AC, DT: &DT);
117	MaybeAlign CopyDstAlign = MI->getDestAlign();
118	if (!CopyDstAlign \|\| *CopyDstAlign < DstAlign) {
119	MI->setDestAlignment(DstAlign);
120	return MI;
121	}
122
123	Align SrcAlign = getKnownAlignment(V: MI->getRawSource(), DL, CxtI: MI, AC: &AC, DT: &DT);
124	MaybeAlign CopySrcAlign = MI->getSourceAlign();
125	if (!CopySrcAlign \|\| *CopySrcAlign < SrcAlign) {
126	MI->setSourceAlignment(SrcAlign);
127	return MI;
128	}
129
130	// If we have a store to a location which is known constant, we can conclude
131	// that the store must be storing the constant value (else the memory
132	// wouldn't be constant), and this must be a noop.
133	if (!isModSet(MRI: AA->getModRefInfoMask(P: MI->getDest()))) {
134	// Set the size of the copy to 0, it will be deleted on the next iteration.
135	MI->setLength(Constant::getNullValue(Ty: MI->getLength()->getType()));
136	return MI;
137	}
138
139	// If the source is provably undef, the memcpy/memmove doesn't do anything
140	// (unless the transfer is volatile).
141	if (hasUndefSource(MI) && !MI->isVolatile()) {
142	// Set the size of the copy to 0, it will be deleted on the next iteration.
143	MI->setLength(Constant::getNullValue(Ty: MI->getLength()->getType()));
144	return MI;
145	}
146
147	// If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
148	// load/store.
149	ConstantInt *MemOpLength = dyn_cast<ConstantInt>(Val: MI->getLength());
150	if (!MemOpLength) return nullptr;
151
152	// Source and destination pointer types are always "i8" for intrinsic. See*
153	// if the size is something we can handle with a single primitive load/store.
154	// A single load+store correctly handles overlapping memory in the memmove
155	// case.
156	uint64_t Size = MemOpLength->getLimitedValue();
157	assert(Size && "0-sized memory transferring should be removed already.");
158
159	if (Size > `8` \|\| (Size&(Size-`1`)))
160	return nullptr; // If not 1/2/4/8 bytes, exit.
161
162	// If it is an atomic and alignment is less than the size then we will
163	// introduce the unaligned memory access which will be later transformed
164	// into libcall in CodeGen. This is not evident performance gain so disable
165	// it now.
166	if (isa<AtomicMemTransferInst>(Val: MI))
167	if (CopyDstAlign < Size \|\| CopySrcAlign < Size)
168	return nullptr;
169
170	// Use an integer load+store unless we can find something better.
171	IntegerType* IntType = IntegerType::get(C&: MI->getContext(), NumBits: Size<<`3`);
172
173	// If the memcpy has metadata describing the members, see if we can get the
174	// TBAA tag describing our copy.
175	AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(AccessSize: Size);
176
177	Value *Src = MI->getArgOperand(i: `1`);
178	Value *Dest = MI->getArgOperand(i: `0`);
179	LoadInst *L = Builder.CreateLoad(Ty: IntType, Ptr: Src);
180	// Alignment from the mem intrinsic will be better, so use it.
181	L->setAlignment(*CopySrcAlign);
182	L->setAAMetadata(AACopyMD);
183	MDNode *LoopMemParallelMD =
184	MI->getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access);
185	if (LoopMemParallelMD)
186	L->setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access, Node: LoopMemParallelMD);
187	MDNode *AccessGroupMD = MI->getMetadata(KindID: LLVMContext::MD_access_group);
188	if (AccessGroupMD)
189	L->setMetadata(KindID: LLVMContext::MD_access_group, Node: AccessGroupMD);
190
191	StoreInst *S = Builder.CreateStore(Val: L, Ptr: Dest);
192	// Alignment from the mem intrinsic will be better, so use it.
193	S->setAlignment(*CopyDstAlign);
194	S->setAAMetadata(AACopyMD);
195	if (LoopMemParallelMD)
196	S->setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access, Node: LoopMemParallelMD);
197	if (AccessGroupMD)
198	S->setMetadata(KindID: LLVMContext::MD_access_group, Node: AccessGroupMD);
199	S->copyMetadata(SrcInst: *MI, WL: LLVMContext::MD_DIAssignID);
200
201	if (auto *MT = dyn_cast<MemTransferInst>(Val: MI)) {
202	// non-atomics can be volatile
203	L->setVolatile(MT->isVolatile());
204	S->setVolatile(MT->isVolatile());
205	}
206	if (isa<AtomicMemTransferInst>(Val: MI)) {
207	// atomics have to be unordered
208	L->setOrdering(AtomicOrdering::Unordered);
209	S->setOrdering(AtomicOrdering::Unordered);
210	}
211
212	// Set the size of the copy to 0, it will be deleted on the next iteration.
213	MI->setLength(Constant::getNullValue(Ty: MemOpLength->getType()));
214	return MI;
215	}
216
217	Instruction InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst MI) {
218	const Align KnownAlignment =
219	getKnownAlignment(V: MI->getDest(), DL, CxtI: MI, AC: &AC, DT: &DT);
220	MaybeAlign MemSetAlign = MI->getDestAlign();
221	if (!MemSetAlign \|\| *MemSetAlign < KnownAlignment) {
222	MI->setDestAlignment(KnownAlignment);
223	return MI;
224	}
225
226	// If we have a store to a location which is known constant, we can conclude
227	// that the store must be storing the constant value (else the memory
228	// wouldn't be constant), and this must be a noop.
229	if (!isModSet(MRI: AA->getModRefInfoMask(P: MI->getDest()))) {
230	// Set the size of the copy to 0, it will be deleted on the next iteration.
231	MI->setLength(Constant::getNullValue(Ty: MI->getLength()->getType()));
232	return MI;
233	}
234
235	// Remove memset with an undef value.
236	// FIXME: This is technically incorrect because it might overwrite a poison
237	// value. Change to PoisonValue once #52930 is resolved.
238	if (isa<UndefValue>(Val: MI->getValue())) {
239	// Set the size of the copy to 0, it will be deleted on the next iteration.
240	MI->setLength(Constant::getNullValue(Ty: MI->getLength()->getType()));
241	return MI;
242	}
243
244	// Extract the length and alignment and fill if they are constant.
245	ConstantInt *LenC = dyn_cast<ConstantInt>(Val: MI->getLength());
246	ConstantInt *FillC = dyn_cast<ConstantInt>(Val: MI->getValue());
247	if (!LenC \|\| !FillC \|\| !FillC->getType()->isIntegerTy(Bitwidth: `8`))
248	return nullptr;
249	const uint64_t Len = LenC->getLimitedValue();
250	assert(Len && "0-sized memory setting should be removed already.");
251	const Align Alignment = MI->getDestAlign().valueOrOne();
252
253	// If it is an atomic and alignment is less than the size then we will
254	// introduce the unaligned memory access which will be later transformed
255	// into libcall in CodeGen. This is not evident performance gain so disable
256	// it now.
257	if (isa<AtomicMemSetInst>(Val: MI))
258	if (Alignment < Len)
259	return nullptr;
260
261	// memset(s,c,n) -> store s, c (for n=1,2,4,8)
262	if (Len <= `8` && isPowerOf2_32(Value: (uint32_t)Len)) {
263	Type ITy = IntegerType::get(C&: MI->getContext(), NumBits: Len`8`); // n=1 -> i8.
264
265	Value *Dest = MI->getDest();
266
267	// Extract the fill value and store.
268	const uint64_t Fill = FillC->getZExtValue()*`0x0101010101010101ULL`;
269	Constant *FillVal = ConstantInt::get(Ty: ITy, V: Fill);
270	StoreInst *S = Builder.CreateStore(Val: FillVal, Ptr: Dest, isVolatile: MI->isVolatile());
271	S->copyMetadata(SrcInst: *MI, WL: LLVMContext::MD_DIAssignID);
272	auto replaceOpForAssignmentMarkers = [FillC, FillVal](auto *DbgAssign) {
273	if (llvm::is_contained(DbgAssign->location_ops(), FillC))
274	DbgAssign->replaceVariableLocationOp(FillC, FillVal);
275	};
276	for_each(Range: at::getAssignmentMarkers(Inst: S), F: replaceOpForAssignmentMarkers);
277	for_each(Range: at::getDPVAssignmentMarkers(Inst: S), F: replaceOpForAssignmentMarkers);
278
279	S->setAlignment(Alignment);
280	if (isa<AtomicMemSetInst>(Val: MI))
281	S->setOrdering(AtomicOrdering::Unordered);
282
283	// Set the size of the copy to 0, it will be deleted on the next iteration.
284	MI->setLength(Constant::getNullValue(Ty: LenC->getType()));
285	return MI;
286	}
287
288	return nullptr;
289	}
290
291	// TODO, Obvious Missing Transforms:
292	// Narrow width by halfs excluding zero/undef lanes*
293	Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
294	Value *LoadPtr = II.getArgOperand(i: `0`);
295	const Align Alignment =
296	cast<ConstantInt>(Val: II.getArgOperand(i: `1`))->getAlignValue();
297
298	// If the mask is all ones or undefs, this is a plain vector load of the 1st
299	// argument.
300	if (maskIsAllOneOrUndef(Mask: II.getArgOperand(i: `2`))) {
301	LoadInst *L = Builder.CreateAlignedLoad(Ty: II.getType(), Ptr: LoadPtr, Align: Alignment,
302	Name: "unmaskedload");
303	L->copyMetadata(SrcInst: II);
304	return L;
305	}
306
307	// If we can unconditionally load from this address, replace with a
308	// load/select idiom. TODO: use DT for context sensitive query
309	if (isDereferenceablePointer(V: LoadPtr, Ty: II.getType(),
310	DL: II.getModule()->getDataLayout(), CtxI: &II, AC: &AC)) {
311	LoadInst *LI = Builder.CreateAlignedLoad(Ty: II.getType(), Ptr: LoadPtr, Align: Alignment,
312	Name: "unmaskedload");
313	LI->copyMetadata(SrcInst: II);
314	return Builder.CreateSelect(C: II.getArgOperand(i: `2`), True: LI, False: II.getArgOperand(i: `3`));
315	}
316
317	return nullptr;
318	}
319
320	// TODO, Obvious Missing Transforms:
321	// Single constant active lane -> store*
322	// Narrow width by halfs excluding zero/undef lanes*
323	Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
324	auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: `3`));
325	if (!ConstMask)
326	return nullptr;
327
328	// If the mask is all zeros, this instruction does nothing.
329	if (ConstMask->isNullValue())
330	return eraseInstFromFunction(I&: II);
331
332	// If the mask is all ones, this is a plain vector store of the 1st argument.
333	if (ConstMask->isAllOnesValue()) {
334	Value *StorePtr = II.getArgOperand(i: `1`);
335	Align Alignment = cast<ConstantInt>(Val: II.getArgOperand(i: `2`))->getAlignValue();
336	StoreInst *S =
337	new StoreInst (II.getArgOperand(i: `0`), StorePtr, false, Alignment);
338	S->copyMetadata(SrcInst: II);
339	return S;
340	}
341
342	if (isa<ScalableVectorType>(Val: ConstMask->getType()))
343	return nullptr;
344
345	// Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
346	APInt DemandedElts = possiblyDemandedEltsInMask(Mask: ConstMask);
347	APInt PoisonElts(DemandedElts.getBitWidth(), `0`);
348	if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: `0`), DemandedElts,
349	PoisonElts))
350	return replaceOperand(I&: II, OpNum: `0`, V);
351
352	return nullptr;
353	}
354
355	// TODO, Obvious Missing Transforms:
356	// Single constant active lane load -> load*
357	// Dereferenceable address & few lanes -> scalarize speculative load/selects*
358	// Adjacent vector addresses -> masked.load*
359	// Narrow width by halfs excluding zero/undef lanes*
360	// Vector incrementing address -> vector masked load*
361	Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
362	auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: `2`));
363	if (!ConstMask)
364	return nullptr;
365
366	// Vector splat address w/known mask -> scalar load
367	// Fold the gather to load the source vector first lane
368	// because it is reloading the same value each time
369	if (ConstMask->isAllOnesValue())
370	if (auto *SplatPtr = getSplatValue(V: II.getArgOperand(i: `0`))) {
371	auto *VecTy = cast<VectorType>(Val: II.getType());
372	const Align Alignment =
373	cast<ConstantInt>(Val: II.getArgOperand(i: `1`))->getAlignValue();
374	LoadInst *L = Builder.CreateAlignedLoad(Ty: VecTy->getElementType(), Ptr: SplatPtr,
375	Align: Alignment, Name: "load.scalar");
376	Value *Shuf =
377	Builder.CreateVectorSplat(EC: VecTy->getElementCount(), V: L, Name: "broadcast");
378	return replaceInstUsesWith(I&: II, V: cast<Instruction>(Val: Shuf));
379	}
380
381	return nullptr;
382	}
383
384	// TODO, Obvious Missing Transforms:
385	// Single constant active lane -> store*
386	// Adjacent vector addresses -> masked.store*
387	// Narrow store width by halfs excluding zero/undef lanes*
388	// Vector incrementing address -> vector masked store*
389	Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
390	auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: `3`));
391	if (!ConstMask)
392	return nullptr;
393
394	// If the mask is all zeros, a scatter does nothing.
395	if (ConstMask->isNullValue())
396	return eraseInstFromFunction(I&: II);
397
398	// Vector splat address -> scalar store
399	if (auto *SplatPtr = getSplatValue(V: II.getArgOperand(i: `1`))) {
400	// scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
401	if (auto *SplatValue = getSplatValue(V: II.getArgOperand(i: `0`))) {
402	Align Alignment = cast<ConstantInt>(Val: II.getArgOperand(i: `2`))->getAlignValue();
403	StoreInst *S =
404	new StoreInst (SplatValue, SplatPtr, /IsVolatile=/false, Alignment);
405	S->copyMetadata(SrcInst: II);
406	return S;
407	}
408	// scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
409	// lastlane), ptr
410	if (ConstMask->isAllOnesValue()) {
411	Align Alignment = cast<ConstantInt>(Val: II.getArgOperand(i: `2`))->getAlignValue();
412	VectorType *WideLoadTy = cast<VectorType>(Val: II.getArgOperand(i: `1`)->getType());
413	ElementCount VF = WideLoadTy->getElementCount();
414	Value *RunTimeVF = Builder.CreateElementCount(DstType: Builder.getInt32Ty(), EC: VF);
415	Value *LastLane = Builder.CreateSub(LHS: RunTimeVF, RHS: Builder.getInt32(C: `1`));
416	Value *Extract =
417	Builder.CreateExtractElement(Vec: II.getArgOperand(i: `0`), Idx: LastLane);
418	StoreInst *S =
419	new StoreInst (Extract, SplatPtr, /IsVolatile=/false, Alignment);
420	S->copyMetadata(SrcInst: II);
421	return S;
422	}
423	}
424	if (isa<ScalableVectorType>(Val: ConstMask->getType()))
425	return nullptr;
426
427	// Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
428	APInt DemandedElts = possiblyDemandedEltsInMask(Mask: ConstMask);
429	APInt PoisonElts(DemandedElts.getBitWidth(), `0`);
430	if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: `0`), DemandedElts,
431	PoisonElts))
432	return replaceOperand(I&: II, OpNum: `0`, V);
433	if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: `1`), DemandedElts,
434	PoisonElts))
435	return replaceOperand(I&: II, OpNum: `1`, V);
436
437	return nullptr;
438	}
439
440	/// This function transforms launder.invariant.group and strip.invariant.group
441	/// like:
442	/// launder(launder(%x)) -> launder(%x) (the result is not the argument)
443	/// launder(strip(%x)) -> launder(%x)
444	/// strip(strip(%x)) -> strip(%x) (the result is not the argument)
445	/// strip(launder(%x)) -> strip(%x)
446	/// This is legal because it preserves the most recent information about
447	/// the presence or absence of invariant.group.
448	static Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II,
449	InstCombinerImpl &IC) {
450	auto *Arg = II.getArgOperand(i: `0`);
451	auto *StrippedArg = Arg->stripPointerCasts();
452	auto *StrippedInvariantGroupsArg = StrippedArg;
453	while (auto *Intr = dyn_cast<IntrinsicInst>(Val: StrippedInvariantGroupsArg)) {
454	if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
455	Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
456	break;
457	StrippedInvariantGroupsArg = Intr->getArgOperand(i: `0`)->stripPointerCasts();
458	}
459	if (StrippedArg == StrippedInvariantGroupsArg)
460	return nullptr; // No launders/strips to remove.
461
462	Value Result = nullptr*;
463
464	if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
465	Result = IC.Builder.CreateLaunderInvariantGroup(Ptr: StrippedInvariantGroupsArg);
466	else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
467	Result = IC.Builder.CreateStripInvariantGroup(Ptr: StrippedInvariantGroupsArg);
468	else
469	llvm_unreachable(
470	"simplifyInvariantGroupIntrinsic only handles launder and strip");
471	if (Result->getType()->getPointerAddressSpace() !=
472	II.getType()->getPointerAddressSpace())
473	Result = IC.Builder.CreateAddrSpaceCast(V: Result, DestTy: II.getType());
474
475	return cast<Instruction>(Val: Result);
476	}
477
478	static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
479	assert((II.getIntrinsicID() == Intrinsic::cttz \|\|
480	II.getIntrinsicID() == Intrinsic::ctlz) &&
481	"Expected cttz or ctlz intrinsic");
482	bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
483	Value *Op0 = II.getArgOperand(i: `0`);
484	Value *Op1 = II.getArgOperand(i: `1`);
485	Value *X;
486	// ctlz(bitreverse(x)) -> cttz(x)
487	// cttz(bitreverse(x)) -> ctlz(x)
488	if (match(V: Op0, P: m_BitReverse(Op0: m_Value(V&: X)))) {
489	Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
490	Function *F = Intrinsic::getDeclaration(M: II.getModule(), id: ID, Tys: II.getType());
491	return CallInst::Create(Func: F, Args: {X, II.getArgOperand(i: `1`)});
492	}
493
494	if (II.getType()->isIntOrIntVectorTy(BitWidth: `1`)) {
495	// ctlz/cttz i1 Op0 --> not Op0
496	if (match(V: Op1, P: m_Zero()))
497	return BinaryOperator::CreateNot(Op: Op0);
498	// If zero is poison, then the input can be assumed to be "true", so the
499	// instruction simplifies to "false".
500	assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
501	return IC.replaceInstUsesWith(I&: II, V: ConstantInt::getNullValue(Ty: II.getType()));
502	}
503
504	Constant *C;
505
506	if (IsTZ) {
507	// cttz(-x) -> cttz(x)
508	if (match(V: Op0, P: m_Neg(V: m_Value(V&: X))))
509	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
510
511	// cttz(-x & x) -> cttz(x)
512	if (match(V: Op0, P: m_c_And(L: m_Neg(V: m_Value(V&: X)), R: m_Deferred(V: X))))
513	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
514
515	// cttz(sext(x)) -> cttz(zext(x))
516	if (match(V: Op0, P: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: X))))) {
517	auto *Zext = IC.Builder.CreateZExt(V: X, DestTy: II.getType());
518	auto *CttzZext =
519	IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, Zext, Op1);
520	return IC.replaceInstUsesWith(I&: II, V: CttzZext);
521	}
522
523	// Zext doesn't change the number of trailing zeros, so narrow:
524	// cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
525	if (match(V: Op0, P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X)))) && match(V: Op1, P: m_One())) {
526	auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X,
527	IC.Builder.getTrue());
528	auto *ZextCttz = IC.Builder.CreateZExt(V: Cttz, DestTy: II.getType());
529	return IC.replaceInstUsesWith(I&: II, V: ZextCttz);
530	}
531
532	// cttz(abs(x)) -> cttz(x)
533	// cttz(nabs(x)) -> cttz(x)
534	Value *Y;
535	SelectPatternFlavor SPF = matchSelectPattern(V: Op0, LHS&: X, RHS&: Y).Flavor;
536	if (SPF == SPF_ABS \|\| SPF == SPF_NABS)
537	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
538
539	if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(V&: X))))
540	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
541
542	// cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
543	if (match(V: Op0, P: m_Shl(L: m_ImmConstant(C), R: m_Value(V&: X))) &&
544	match(V: Op1, P: m_One())) {
545	Value *ConstCttz =
546	IC.Builder.CreateBinaryIntrinsic(Intrinsic::ID: cttz, LHS: C, RHS: Op1);
547	return BinaryOperator::CreateAdd(V1: ConstCttz, V2: X);
548	}
549
550	// cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val)
551	if (match(V: Op0, P: m_Exact(SubPattern: m_LShr(L: m_ImmConstant(C), R: m_Value(V&: X)))) &&
552	match(V: Op1, P: m_One())) {
553	Value *ConstCttz =
554	IC.Builder.CreateBinaryIntrinsic(Intrinsic::ID: cttz, LHS: C, RHS: Op1);
555	return BinaryOperator::CreateSub(V1: ConstCttz, V2: X);
556	}
557	} else {
558	// ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
559	if (match(V: Op0, P: m_LShr(L: m_ImmConstant(C), R: m_Value(V&: X))) &&
560	match(V: Op1, P: m_One())) {
561	Value *ConstCtlz =
562	IC.Builder.CreateBinaryIntrinsic(Intrinsic::ID: ctlz, LHS: C, RHS: Op1);
563	return BinaryOperator::CreateAdd(V1: ConstCtlz, V2: X);
564	}
565
566	// ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val)
567	if (match(V: Op0, P: m_NUWShl(L: m_ImmConstant(C), R: m_Value(V&: X))) &&
568	match(V: Op1, P: m_One())) {
569	Value *ConstCtlz =
570	IC.Builder.CreateBinaryIntrinsic(Intrinsic::ID: ctlz, LHS: C, RHS: Op1);
571	return BinaryOperator::CreateSub(V1: ConstCtlz, V2: X);
572	}
573	}
574
575	KnownBits Known = IC.computeKnownBits(V: Op0, Depth: `0`, CxtI: &II);
576
577	// Create a mask for bits above (ctlz) or below (cttz) the first known one.
578	unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
579	: Known.countMaxLeadingZeros();
580	unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
581	: Known.countMinLeadingZeros();
582
583	// If all bits above (ctlz) or below (cttz) the first known one are known
584	// zero, this value is constant.
585	// FIXME: This should be in InstSimplify because we're replacing an
586	// instruction with a constant.
587	if (PossibleZeros == DefiniteZeros) {
588	auto *C = ConstantInt::get(Ty: Op0->getType(), V: DefiniteZeros);
589	return IC.replaceInstUsesWith(I&: II, V: C);
590	}
591
592	// If the input to cttz/ctlz is known to be non-zero,
593	// then change the 'ZeroIsPoison' parameter to 'true'
594	// because we know the zero behavior can't affect the result.
595	if (!Known.One.isZero() \|\|
596	isKnownNonZero(V: Op0, DL: IC.getDataLayout(), Depth: `0`, AC: &IC.getAssumptionCache(), CxtI: &II,
597	DT: &IC.getDominatorTree())) {
598	if (!match(V: II.getArgOperand(i: `1`), P: m_One()))
599	return IC.replaceOperand(I&: II, OpNum: `1`, V: IC.Builder.getTrue());
600	}
601
602	// Add range metadata since known bits can't completely reflect what we know.
603	auto *IT = cast<IntegerType>(Val: Op0->getType()->getScalarType());
604	if (IT && IT->getBitWidth() != `1` && !II.getMetadata(KindID: LLVMContext::MD_range)) {
605	Metadata *LowAndHigh[] = {
606	ConstantAsMetadata::get(C: ConstantInt::get(Ty: IT, V: DefiniteZeros)),
607	ConstantAsMetadata::get(C: ConstantInt::get(Ty: IT, V: PossibleZeros + `1`))};
608	II.setMetadata(KindID: LLVMContext::MD_range,
609	Node: MDNode::get(Context&: II.getContext(), MDs: LowAndHigh));
610	return &II;
611	}
612
613	return nullptr;
614	}
615
616	static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
617	assert(II.getIntrinsicID() == Intrinsic::ctpop &&
618	"Expected ctpop intrinsic");
619	Type *Ty = II.getType();
620	unsigned BitWidth = Ty->getScalarSizeInBits();
621	Value *Op0 = II.getArgOperand(i: `0`);
622	Value X, Y;
623
624	// ctpop(bitreverse(x)) -> ctpop(x)
625	// ctpop(bswap(x)) -> ctpop(x)
626	if (match(V: Op0, P: m_BitReverse(Op0: m_Value(V&: X))) \|\| match(V: Op0, P: m_BSwap(Op0: m_Value(V&: X))))
627	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
628
629	// ctpop(rot(x)) -> ctpop(x)
630	if ((match(V: Op0, P: m_FShl(Op0: m_Value(V&: X), Op1: m_Value(V&: Y), Op2: m_Value())) \|\|
631	match(V: Op0, P: m_FShr(Op0: m_Value(V&: X), Op1: m_Value(V&: Y), Op2: m_Value()))) &&
632	X == Y)
633	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
634
635	// ctpop(x \| -x) -> bitwidth - cttz(x, false)
636	if (Op0->hasOneUse() &&
637	match(V: Op0, P: m_c_Or(L: m_Value(V&: X), R: m_Neg(V: m_Deferred(V: X))))) {
638	Function *F =
639	Intrinsic::getDeclaration(M: II.getModule(), Intrinsic::id: cttz, Tys: Ty);
640	auto *Cttz = IC.Builder.CreateCall(Callee: F, Args: {X, IC.Builder.getFalse()});
641	auto *Bw = ConstantInt::get(Ty, V: APInt (BitWidth, BitWidth));
642	return IC.replaceInstUsesWith(I&: II, V: IC.Builder.CreateSub(LHS: Bw, RHS: Cttz));
643	}
644
645	// ctpop(~x & (x - 1)) -> cttz(x, false)
646	if (match(V: Op0,
647	P: m_c_And(L: m_Not(V: m_Value(V&: X)), R: m_Add(L: m_Deferred(V: X), R: m_AllOnes())))) {
648	Function *F =
649	Intrinsic::getDeclaration(M: II.getModule(), Intrinsic::id: cttz, Tys: Ty);
650	return CallInst::Create(Func: F, Args: {X, IC.Builder.getFalse()});
651	}
652
653	// Zext doesn't change the number of set bits, so narrow:
654	// ctpop (zext X) --> zext (ctpop X)
655	if (match(V: Op0, P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X))))) {
656	Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(Intrinsic::ID: ctpop, V: X);
657	return CastInst::Create(Instruction::ZExt, S: NarrowPop, Ty);
658	}
659
660	KnownBits Known(BitWidth);
661	IC.computeKnownBits(V: Op0, Known, Depth: `0`, CxtI: &II);
662
663	// If all bits are zero except for exactly one fixed bit, then the result
664	// must be 0 or 1, and we can get that answer by shifting to LSB:
665	// ctpop (X & 32) --> (X & 32) >> 5
666	// TODO: Investigate removing this as its likely unnecessary given the below
667	// `isKnownToBeAPowerOfTwo` check.
668	if ((~Known.Zero).isPowerOf2())
669	return BinaryOperator::CreateLShr(
670	V1: Op0, V2: ConstantInt::get(Ty, V: (~Known.Zero).exactLogBase2()));
671
672	// More generally we can also handle non-constant power of 2 patterns such as
673	// shl/shr(Pow2, X), (X & -X), etc... by transforming:
674	// ctpop(Pow2OrZero) --> icmp ne X, 0
675	if (IC.isKnownToBeAPowerOfTwo(V: Op0, / OrZero / true))
676	return CastInst::Create(Instruction::ZExt,
677	S: IC.Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: Op0,
678	RHS: Constant::getNullValue(Ty)),
679	Ty);
680
681	// Add range metadata since known bits can't completely reflect what we know.
682	auto *IT = cast<IntegerType>(Val: Ty->getScalarType());
683	unsigned MinCount = Known.countMinPopulation();
684	unsigned MaxCount = Known.countMaxPopulation();
685	if (IT->getBitWidth() != `1` && !II.getMetadata(KindID: LLVMContext::MD_range)) {
686	Metadata *LowAndHigh[] = {
687	ConstantAsMetadata::get(C: ConstantInt::get(Ty: IT, V: MinCount)),
688	ConstantAsMetadata::get(C: ConstantInt::get(Ty: IT, V: MaxCount + `1`))};
689	II.setMetadata(KindID: LLVMContext::MD_range,
690	Node: MDNode::get(Context&: II.getContext(), MDs: LowAndHigh));
691	return &II;
692	}
693
694	return nullptr;
695	}
696
697	/// Convert a table lookup to shufflevector if the mask is constant.
698	/// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
699	/// which case we could lower the shufflevector with rev64 instructions
700	/// as it's actually a byte reverse.
701	static Value simplifyNeonTbl1(const* IntrinsicInst &II,
702	InstCombiner::BuilderTy &Builder) {
703	// Bail out if the mask is not a constant.
704	auto *C = dyn_cast<Constant>(Val: II.getArgOperand(i: `1`));
705	if (!C)
706	return nullptr;
707
708	auto *VecTy = cast<FixedVectorType>(Val: II.getType());
709	unsigned NumElts = VecTy->getNumElements();
710
711	// Only perform this transformation for <8 x i8> vector types.
712	if (!VecTy->getElementType()->isIntegerTy(Bitwidth: `8`) \|\| NumElts != `8`)
713	return nullptr;
714
715	int Indexes[`8`];
716
717	for (unsigned I = `0`; I < NumElts; ++I) {
718	Constant *COp = C->getAggregateElement(Elt: I);
719
720	if (!COp \|\| !isa<ConstantInt>(Val: COp))
721	return nullptr;
722
723	Indexes[I] = cast<ConstantInt>(Val: COp)->getLimitedValue();
724
725	// Make sure the mask indices are in range.
726	if ((unsigned)Indexes[I] >= NumElts)
727	return nullptr;
728	}
729
730	auto *V1 = II.getArgOperand(i: `0`);
731	auto *V2 = Constant::getNullValue(Ty: V1->getType());
732	return Builder.CreateShuffleVector(V1, V2, Mask: ArrayRef(Indexes));
733	}
734
735	// Returns true iff the 2 intrinsics have the same operands, limiting the
736	// comparison to the first NumOperands.
737	static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
738	unsigned NumOperands) {
739	assert(I.arg_size() >= NumOperands && "Not enough operands");
740	assert(E.arg_size() >= NumOperands && "Not enough operands");
741	for (unsigned i = `0`; i < NumOperands; i++)
742	if (I.getArgOperand(i) != E.getArgOperand(i))
743	return false;
744	return true;
745	}
746
747	// Remove trivially empty start/end intrinsic ranges, i.e. a start
748	// immediately followed by an end (ignoring debuginfo or other
749	// start/end intrinsics in between). As this handles only the most trivial
750	// cases, tracking the nesting level is not needed:
751	//
752	// call @llvm.foo.start(i1 0)
753	// call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
754	// call @llvm.foo.end(i1 0)
755	// call @llvm.foo.end(i1 0) ; &I
756	static bool
757	removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC,
758	std::function<bool(const IntrinsicInst &)> IsStart) {
759	// We start from the end intrinsic and scan backwards, so that InstCombine
760	// has already processed (and potentially removed) all the instructions
761	// before the end intrinsic.
762	BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
763	for (; BI != BE; ++BI) {
764	if (auto I = dyn_cast<IntrinsicInst>(Val: &BI)) {
765	if (I->isDebugOrPseudoInst() \|\|
766	I->getIntrinsicID() == EndI.getIntrinsicID())
767	continue;
768	if (IsStart (*I)) {
769	if (haveSameOperands(I: EndI, E: *I, NumOperands: EndI.arg_size())) {
770	IC.eraseInstFromFunction(I&: *I);
771	IC.eraseInstFromFunction(I&: EndI);
772	return true;
773	}
774	// Skip start intrinsics that don't pair with this end intrinsic.
775	continue;
776	}
777	}
778	break;
779	}
780
781	return false;
782	}
783
784	Instruction *InstCombinerImpl::visitVAEndInst(VAEndInst &I) {
785	removeTriviallyEmptyRange(I, *this, [](const IntrinsicInst &I) {
786	return I.getIntrinsicID() == Intrinsic::vastart \|\|
787	I.getIntrinsicID() == Intrinsic::vacopy;
788	});
789	return nullptr;
790	}
791
792	static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) {
793	assert(Call.arg_size() > `1` && "Need at least 2 args to swap");
794	Value Arg0 = Call.getArgOperand(i: `0`), Arg1 = Call.getArgOperand(i: `1`);
795	if (isa<Constant>(Val: Arg0) && !isa<Constant>(Val: Arg1)) {
796	Call.setArgOperand(i: `0`, v: Arg1);
797	Call.setArgOperand(i: `1`, v: Arg0);
798	return &Call;
799	}
800	return nullptr;
801	}
802
803	/// Creates a result tuple for an overflow intrinsic \p II with a given
804	/// \p Result and a constant \p Overflow value.
805	static Instruction createOverflowTuple(IntrinsicInst II, Value *Result,
806	Constant *Overflow) {
807	Constant *V[] = {PoisonValue::get(T: Result->getType()), Overflow};
808	StructType *ST = cast<StructType>(Val: II->getType());
809	Constant *Struct = ConstantStruct::get(T: ST, V);
810	return InsertValueInst::Create(Agg: Struct, Val: Result, Idxs: `0`);
811	}
812
813	Instruction *
814	InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
815	WithOverflowInst *WO = cast<WithOverflowInst>(Val: II);
816	Value OperationResult = nullptr*;
817	Constant OverflowResult = nullptr*;
818	if (OptimizeOverflowCheck(BinaryOp: WO->getBinaryOp(), IsSigned: WO->isSigned(), LHS: WO->getLHS(),
819	RHS: WO->getRHS(), CtxI&: *WO, OperationResult, OverflowResult))
820	return createOverflowTuple(II: WO, Result: OperationResult, Overflow: OverflowResult);
821	return nullptr;
822	}
823
824	static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
825	Ty = Ty->getScalarType();
826	return F.getDenormalMode(FPType: Ty->getFltSemantics()).Input == DenormalMode::IEEE;
827	}
828
829	static bool inputDenormalIsDAZ(const Function &F, const Type *Ty) {
830	Ty = Ty->getScalarType();
831	return F.getDenormalMode(FPType: Ty->getFltSemantics()).inputsAreZero();
832	}
833
834	/// \returns the compare predicate type if the test performed by
835	/// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the
836	/// floating-point environment assumed for \p F for type \p Ty
837	static FCmpInst::Predicate fpclassTestIsFCmp0(FPClassTest Mask,
838	const Function &F, Type *Ty) {
839	switch (static_cast<unsigned>(Mask)) {
840	case fcZero:
841	if (inputDenormalIsIEEE(F, Ty))
842	return FCmpInst::FCMP_OEQ;
843	break;
844	case fcZero \| fcSubnormal:
845	if (inputDenormalIsDAZ(F, Ty))
846	return FCmpInst::FCMP_OEQ;
847	break;
848	case fcPositive \| fcNegZero:
849	if (inputDenormalIsIEEE(F, Ty))
850	return FCmpInst::FCMP_OGE;
851	break;
852	case fcPositive \| fcNegZero \| fcNegSubnormal:
853	if (inputDenormalIsDAZ(F, Ty))
854	return FCmpInst::FCMP_OGE;
855	break;
856	case fcPosSubnormal \| fcPosNormal \| fcPosInf:
857	if (inputDenormalIsIEEE(F, Ty))
858	return FCmpInst::FCMP_OGT;
859	break;
860	case fcNegative \| fcPosZero:
861	if (inputDenormalIsIEEE(F, Ty))
862	return FCmpInst::FCMP_OLE;
863	break;
864	case fcNegative \| fcPosZero \| fcPosSubnormal:
865	if (inputDenormalIsDAZ(F, Ty))
866	return FCmpInst::FCMP_OLE;
867	break;
868	case fcNegSubnormal \| fcNegNormal \| fcNegInf:
869	if (inputDenormalIsIEEE(F, Ty))
870	return FCmpInst::FCMP_OLT;
871	break;
872	case fcPosNormal \| fcPosInf:
873	if (inputDenormalIsDAZ(F, Ty))
874	return FCmpInst::FCMP_OGT;
875	break;
876	case fcNegNormal \| fcNegInf:
877	if (inputDenormalIsDAZ(F, Ty))
878	return FCmpInst::FCMP_OLT;
879	break;
880	case ~fcZero & ~fcNan:
881	if (inputDenormalIsIEEE(F, Ty))
882	return FCmpInst::FCMP_ONE;
883	break;
884	case ~(fcZero \| fcSubnormal) & ~fcNan:
885	if (inputDenormalIsDAZ(F, Ty))
886	return FCmpInst::FCMP_ONE;
887	break;
888	default:
889	break;
890	}
891
892	return FCmpInst::BAD_FCMP_PREDICATE;
893	}
894
895	Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
896	Value *Src0 = II.getArgOperand(i: `0`);
897	Value *Src1 = II.getArgOperand(i: `1`);
898	const ConstantInt *CMask = cast<ConstantInt>(Val: Src1);
899	FPClassTest Mask = static_cast<FPClassTest>(CMask->getZExtValue());
900	const bool IsUnordered = (Mask & fcNan) == fcNan;
901	const bool IsOrdered = (Mask & fcNan) == fcNone;
902	const FPClassTest OrderedMask = Mask & ~fcNan;
903	const FPClassTest OrderedInvertedMask = ~OrderedMask & ~fcNan;
904
905	const bool IsStrict =
906	II.getFunction()->getAttributes().hasFnAttr(Attribute::StrictFP);
907
908	Value *FNegSrc;
909	if (match(V: Src0, P: m_FNeg(X: m_Value(V&: FNegSrc)))) {
910	// is.fpclass (fneg x), mask -> is.fpclass x, (fneg mask)
911
912	II.setArgOperand(i: `1`, v: ConstantInt::get(Ty: Src1->getType(), V: fneg(Mask)));
913	return replaceOperand(I&: II, OpNum: `0`, V: FNegSrc);
914	}
915
916	Value *FAbsSrc;
917	if (match(V: Src0, P: m_FAbs(Op0: m_Value(V&: FAbsSrc)))) {
918	II.setArgOperand(i: `1`, v: ConstantInt::get(Ty: Src1->getType(), V: inverse_fabs(Mask)));
919	return replaceOperand(I&: II, OpNum: `0`, V: FAbsSrc);
920	}
921
922	if ((OrderedMask == fcInf \|\| OrderedInvertedMask == fcInf) &&
923	(IsOrdered \|\| IsUnordered) && !IsStrict) {
924	// is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf
925	// is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf
926	// is.fpclass(x, fcInf\|fcNan) -> fcmp ueq fabs(x), +inf
927	// is.fpclass(x, ~(fcInf\|fcNan)) -> fcmp une fabs(x), +inf
928	Constant *Inf = ConstantFP::getInfinity(Ty: Src0->getType());
929	FCmpInst::Predicate Pred =
930	IsUnordered ? FCmpInst::FCMP_UEQ : FCmpInst::FCMP_OEQ;
931	if (OrderedInvertedMask == fcInf)
932	Pred = IsUnordered ? FCmpInst::FCMP_UNE : FCmpInst::FCMP_ONE;
933
934	Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::ID: fabs, V: Src0);
935	Value *CmpInf = Builder.CreateFCmp(P: Pred, LHS: Fabs, RHS: Inf);
936	CmpInf->takeName(V: &II);
937	return replaceInstUsesWith(I&: II, V: CmpInf);
938	}
939
940	if ((OrderedMask == fcPosInf \|\| OrderedMask == fcNegInf) &&
941	(IsOrdered \|\| IsUnordered) && !IsStrict) {
942	// is.fpclass(x, fcPosInf) -> fcmp oeq x, +inf
943	// is.fpclass(x, fcNegInf) -> fcmp oeq x, -inf
944	// is.fpclass(x, fcPosInf\|fcNan) -> fcmp ueq x, +inf
945	// is.fpclass(x, fcNegInf\|fcNan) -> fcmp ueq x, -inf
946	Constant *Inf =
947	ConstantFP::getInfinity(Ty: Src0->getType(), Negative: OrderedMask == fcNegInf);
948	Value *EqInf = IsUnordered ? Builder.CreateFCmpUEQ(LHS: Src0, RHS: Inf)
949	: Builder.CreateFCmpOEQ(LHS: Src0, RHS: Inf);
950
951	EqInf->takeName(V: &II);
952	return replaceInstUsesWith(I&: II, V: EqInf);
953	}
954
955	if ((OrderedInvertedMask == fcPosInf \|\| OrderedInvertedMask == fcNegInf) &&
956	(IsOrdered \|\| IsUnordered) && !IsStrict) {
957	// is.fpclass(x, ~fcPosInf) -> fcmp one x, +inf
958	// is.fpclass(x, ~fcNegInf) -> fcmp one x, -inf
959	// is.fpclass(x, ~fcPosInf\|fcNan) -> fcmp une x, +inf
960	// is.fpclass(x, ~fcNegInf\|fcNan) -> fcmp une x, -inf
961	Constant *Inf = ConstantFP::getInfinity(Ty: Src0->getType(),
962	Negative: OrderedInvertedMask == fcNegInf);
963	Value *NeInf = IsUnordered ? Builder.CreateFCmpUNE(LHS: Src0, RHS: Inf)
964	: Builder.CreateFCmpONE(LHS: Src0, RHS: Inf);
965	NeInf->takeName(V: &II);
966	return replaceInstUsesWith(I&: II, V: NeInf);
967	}
968
969	if (Mask == fcNan && !IsStrict) {
970	// Equivalent of isnan. Replace with standard fcmp if we don't care about FP
971	// exceptions.
972	Value *IsNan =
973	Builder.CreateFCmpUNO(LHS: Src0, RHS: ConstantFP::getZero(Ty: Src0->getType()));
974	IsNan->takeName(V: &II);
975	return replaceInstUsesWith(I&: II, V: IsNan);
976	}
977
978	if (Mask == (~fcNan & fcAllFlags) && !IsStrict) {
979	// Equivalent of !isnan. Replace with standard fcmp.
980	Value *FCmp =
981	Builder.CreateFCmpORD(LHS: Src0, RHS: ConstantFP::getZero(Ty: Src0->getType()));
982	FCmp->takeName(V: &II);
983	return replaceInstUsesWith(I&: II, V: FCmp);
984	}
985
986	FCmpInst::Predicate PredType = FCmpInst::BAD_FCMP_PREDICATE;
987
988	// Try to replace with an fcmp with 0
989	//
990	// is.fpclass(x, fcZero) -> fcmp oeq x, 0.0
991	// is.fpclass(x, fcZero \| fcNan) -> fcmp ueq x, 0.0
992	// is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0
993	// is.fpclass(x, ~fcZero) -> fcmp une x, 0.0
994	//
995	// is.fpclass(x, fcPosSubnormal \| fcPosNormal \| fcPosInf) -> fcmp ogt x, 0.0
996	// is.fpclass(x, fcPositive \| fcNegZero) -> fcmp oge x, 0.0
997	//
998	// is.fpclass(x, fcNegSubnormal \| fcNegNormal \| fcNegInf) -> fcmp olt x, 0.0
999	// is.fpclass(x, fcNegative \| fcPosZero) -> fcmp ole x, 0.0
1000	//
1001	if (!IsStrict && (IsOrdered \|\| IsUnordered) &&
1002	(PredType = fpclassTestIsFCmp0(Mask: OrderedMask, F: *II.getFunction(),
1003	Ty: Src0->getType())) !=
1004	FCmpInst::BAD_FCMP_PREDICATE) {
1005	Constant *Zero = ConstantFP::getZero(Ty: Src0->getType());
1006	// Equivalent of == 0.
1007	Value *FCmp = Builder.CreateFCmp(
1008	P: IsUnordered ? FCmpInst::getUnorderedPredicate(Pred: PredType) : PredType,
1009	LHS: Src0, RHS: Zero);
1010
1011	FCmp->takeName(V: &II);
1012	return replaceInstUsesWith(I&: II, V: FCmp);
1013	}
1014
1015	KnownFPClass Known = computeKnownFPClass(Val: Src0, Interested: Mask, CtxI: &II);
1016
1017	// Clear test bits we know must be false from the source value.
1018	// fp_class (nnan x), qnan\|snan\|other -> fp_class (nnan x), other
1019	// fp_class (ninf x), ninf\|pinf\|other -> fp_class (ninf x), other
1020	if ((Mask & Known.KnownFPClasses) != Mask) {
1021	II.setArgOperand(
1022	i: `1`, v: ConstantInt::get(Ty: Src1->getType(), V: Mask & Known.KnownFPClasses));
1023	return &II;
1024	}
1025
1026	// If none of the tests which can return false are possible, fold to true.
1027	// fp_class (nnan x), ~(qnan\|snan) -> true
1028	// fp_class (ninf x), ~(ninf\|pinf) -> true
1029	if (Mask == Known.KnownFPClasses)
1030	return replaceInstUsesWith(I&: II, V: ConstantInt::get(Ty: II.getType(), V: true));
1031
1032	return nullptr;
1033	}
1034
1035	static std::optional<bool> getKnownSign(Value Op, Instruction CxtI,
1036	const DataLayout &DL, AssumptionCache *AC,
1037	DominatorTree *DT) {
1038	KnownBits Known = computeKnownBits(V: Op, DL, Depth: `0`, AC, CxtI, DT);
1039	if (Known.isNonNegative())
1040	return false;
1041	if (Known.isNegative())
1042	return true;
1043
1044	Value X, Y;
1045	if (match(V: Op, P: m_NSWSub(L: m_Value(V&: X), R: m_Value(V&: Y))))
1046	return isImpliedByDomCondition(Pred: ICmpInst::ICMP_SLT, LHS: X, RHS: Y, ContextI: CxtI, DL);
1047
1048	return isImpliedByDomCondition(
1049	Pred: ICmpInst::ICMP_SLT, LHS: Op, RHS: Constant::getNullValue(Ty: Op->getType()), ContextI: CxtI, DL);
1050	}
1051
1052	static std::optional<bool> getKnownSignOrZero(Value Op, Instruction CxtI,
1053	const DataLayout &DL,
1054	AssumptionCache *AC,
1055	DominatorTree *DT) {
1056	if (std::optional<bool> Sign = getKnownSign(Op, CxtI, DL, AC, DT))
1057	return Sign;
1058
1059	Value X, Y;
1060	if (match(V: Op, P: m_NSWSub(L: m_Value(V&: X), R: m_Value(V&: Y))))
1061	return isImpliedByDomCondition(Pred: ICmpInst::ICMP_SLE, LHS: X, RHS: Y, ContextI: CxtI, DL);
1062
1063	return std::nullopt;
1064	}
1065
1066	/// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
1067	static bool signBitMustBeTheSame(Value Op0, Value Op1, Instruction *CxtI,
1068	const DataLayout &DL, AssumptionCache *AC,
1069	DominatorTree *DT) {
1070	std::optional<bool> Known1 = getKnownSign(Op: Op1, CxtI, DL, AC, DT);
1071	if (!Known1)
1072	return false;
1073	std::optional<bool> Known0 = getKnownSign(Op: Op0, CxtI, DL, AC, DT);
1074	if (!Known0)
1075	return false;
1076	return Known0 == Known1;
1077	}
1078
1079	/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
1080	/// can trigger other combines.
1081	static Instruction moveAddAfterMinMax(IntrinsicInst II,
1082	InstCombiner::BuilderTy &Builder) {
1083	Intrinsic::ID MinMaxID = II->getIntrinsicID();
1084	assert((MinMaxID == Intrinsic::smax \|\| MinMaxID == Intrinsic::smin \|\|
1085	MinMaxID == Intrinsic::umax \|\| MinMaxID == Intrinsic::umin) &&
1086	"Expected a min or max intrinsic");
1087
1088	// TODO: Match vectors with undef elements, but undef may not propagate.
1089	Value Op0 = II->getArgOperand(i: `0`), Op1 = II->getArgOperand(i: `1`);
1090	Value *X;
1091	const APInt C0, C1;
1092	if (!match(V: Op0, P: m_OneUse(SubPattern: m_Add(L: m_Value(V&: X), R: m_APInt(Res&: C0)))) \|\|
1093	!match(V: Op1, P: m_APInt(Res&: C1)))
1094	return nullptr;
1095
1096	// Check for necessary no-wrap and overflow constraints.
1097	bool IsSigned = MinMaxID == Intrinsic::smax \|\| MinMaxID == Intrinsic::smin;
1098	auto *Add = cast<BinaryOperator>(Val: Op0);
1099	if ((IsSigned && !Add->hasNoSignedWrap()) \|\|
1100	(!IsSigned && !Add->hasNoUnsignedWrap()))
1101	return nullptr;
1102
1103	// If the constant difference overflows, then instsimplify should reduce the
1104	// min/max to the add or C1.
1105	bool Overflow;
1106	APInt CDiff =
1107	IsSigned ? C1->ssub_ov(RHS: C0, Overflow) : C1->usub_ov(RHS: C0, Overflow);
1108	assert(!Overflow && "Expected simplify of min/max");
1109
1110	// min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
1111	// Note: the "mismatched" no-overflow setting does not propagate.
1112	Constant *NewMinMaxC = ConstantInt::get(Ty: II->getType(), V: CDiff);
1113	Value *NewMinMax = Builder.CreateBinaryIntrinsic(ID: MinMaxID, LHS: X, RHS: NewMinMaxC);
1114	return IsSigned ? BinaryOperator::CreateNSWAdd(V1: NewMinMax, V2: Add->getOperand(i_nocapture: `1`))
1115	: BinaryOperator::CreateNUWAdd(V1: NewMinMax, V2: Add->getOperand(i_nocapture: `1`));
1116	}
1117	/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
1118	Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
1119	Type *Ty = MinMax1.getType();
1120
1121	// We are looking for a tree of:
1122	// max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
1123	// Where the min and max could be reversed
1124	Instruction *MinMax2;
1125	BinaryOperator *AddSub;
1126	const APInt MinValue, MaxValue;
1127	if (match(V: &MinMax1, P: m_SMin(L: m_Instruction(I&: MinMax2), R: m_APInt(Res&: MaxValue)))) {
1128	if (!match(V: MinMax2, P: m_SMax(L: m_BinOp(I&: AddSub), R: m_APInt(Res&: MinValue))))
1129	return nullptr;
1130	} else if (match(V: &MinMax1,
1131	P: m_SMax(L: m_Instruction(I&: MinMax2), R: m_APInt(Res&: MinValue)))) {
1132	if (!match(V: MinMax2, P: m_SMin(L: m_BinOp(I&: AddSub), R: m_APInt(Res&: MaxValue))))
1133	return nullptr;
1134	} else
1135	return nullptr;
1136
1137	// Check that the constants clamp a saturate, and that the new type would be
1138	// sensible to convert to.
1139	if (!(MaxValue + `1`).isPowerOf2() \|\| -MinValue != *MaxValue + `1`)
1140	return nullptr;
1141	// In what bitwidth can this be treated as saturating arithmetics?
1142	unsigned NewBitWidth = (*MaxValue + `1`).logBase2() + `1`;
1143	// FIXME: This isn't quite right for vectors, but using the scalar type is a
1144	// good first approximation for what should be done there.
1145	if (!shouldChangeType(FromBitWidth: Ty->getScalarType()->getIntegerBitWidth(), ToBitWidth: NewBitWidth))
1146	return nullptr;
1147
1148	// Also make sure that the inner min/max and the add/sub have one use.
1149	if (!MinMax2->hasOneUse() \|\| !AddSub->hasOneUse())
1150	return nullptr;
1151
1152	// Create the new type (which can be a vector type)
1153	Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
1154
1155	Intrinsic::ID IntrinsicID;
1156	if (AddSub->getOpcode() == Instruction::Add)
1157	IntrinsicID = Intrinsic::sadd_sat;
1158	else if (AddSub->getOpcode() == Instruction::Sub)
1159	IntrinsicID = Intrinsic::ssub_sat;
1160	else
1161	return nullptr;
1162
1163	// The two operands of the add/sub must be nsw-truncatable to the NewTy. This
1164	// is usually achieved via a sext from a smaller type.
1165	if (ComputeMaxSignificantBits(Op: AddSub->getOperand(i_nocapture: `0`), Depth: `0`, CxtI: AddSub) >
1166	NewBitWidth \|\|
1167	ComputeMaxSignificantBits(Op: AddSub->getOperand(i_nocapture: `1`), Depth: `0`, CxtI: AddSub) > NewBitWidth)
1168	return nullptr;
1169
1170	// Finally create and return the sat intrinsic, truncated to the new type
1171	Function *F = Intrinsic::getDeclaration(M: MinMax1.getModule(), id: IntrinsicID, Tys: NewTy);
1172	Value *AT = Builder.CreateTrunc(V: AddSub->getOperand(i_nocapture: `0`), DestTy: NewTy);
1173	Value *BT = Builder.CreateTrunc(V: AddSub->getOperand(i_nocapture: `1`), DestTy: NewTy);
1174	Value *Sat = Builder.CreateCall(Callee: F, Args: {AT, BT});
1175	return CastInst::Create(Instruction::SExt, S: Sat, Ty);
1176	}
1177
1178
1179	/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
1180	/// can only be one of two possible constant values -- turn that into a select
1181	/// of constants.
1182	static Instruction foldClampRangeOfTwo(IntrinsicInst II,
1183	InstCombiner::BuilderTy &Builder) {
1184	Value I0 = II->getArgOperand(i: `0`), I1 = II->getArgOperand(i: `1`);
1185	Value *X;
1186	const APInt C0, C1;
1187	if (!match(V: I1, P: m_APInt(Res&: C1)) \|\| !I0->hasOneUse())
1188	return nullptr;
1189
1190	CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
1191	switch (II->getIntrinsicID()) {
1192	case Intrinsic::smax:
1193	if (match(V: I0, P: m_SMin(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && C0 == C1 + `1`)
1194	Pred = ICmpInst::ICMP_SGT;
1195	break;
1196	case Intrinsic::smin:
1197	if (match(V: I0, P: m_SMax(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && C1 == C0 + `1`)
1198	Pred = ICmpInst::ICMP_SLT;
1199	break;
1200	case Intrinsic::umax:
1201	if (match(V: I0, P: m_UMin(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && C0 == C1 + `1`)
1202	Pred = ICmpInst::ICMP_UGT;
1203	break;
1204	case Intrinsic::umin:
1205	if (match(V: I0, P: m_UMax(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && C1 == C0 + `1`)
1206	Pred = ICmpInst::ICMP_ULT;
1207	break;
1208	default:
1209	llvm_unreachable("Expected min/max intrinsic");
1210	}
1211	if (Pred == CmpInst::BAD_ICMP_PREDICATE)
1212	return nullptr;
1213
1214	// max (min X, 42), 41 --> X > 41 ? 42 : 41
1215	// min (max X, 42), 43 --> X < 43 ? 42 : 43
1216	Value *Cmp = Builder.CreateICmp(P: Pred, LHS: X, RHS: I1);
1217	return SelectInst::Create(C: Cmp, S1: ConstantInt::get(Ty: II->getType(), V: *C0), S2: I1);
1218	}
1219
1220	/// If this min/max has a constant operand and an operand that is a matching
1221	/// min/max with a constant operand, constant-fold the 2 constant operands.
1222	static Value reassociateMinMaxWithConstants(IntrinsicInst II,
1223	IRBuilderBase &Builder) {
1224	Intrinsic::ID MinMaxID = II->getIntrinsicID();
1225	auto *LHS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: `0`));
1226	if (!LHS \|\| LHS->getIntrinsicID() != MinMaxID)
1227	return nullptr;
1228
1229	Constant C0, C1;
1230	if (!match(V: LHS->getArgOperand(i: `1`), P: m_ImmConstant(C&: C0)) \|\|
1231	!match(V: II->getArgOperand(i: `1`), P: m_ImmConstant(C&: C1)))
1232	return nullptr;
1233
1234	// max (max X, C0), C1 --> max X, (max C0, C1) --> max X, NewC
1235	ICmpInst::Predicate Pred = MinMaxIntrinsic::getPredicate(ID: MinMaxID);
1236	Value *CondC = Builder.CreateICmp(P: Pred, LHS: C0, RHS: C1);
1237	Value *NewC = Builder.CreateSelect(C: CondC, True: C0, False: C1);
1238	return Builder.CreateIntrinsic(ID: MinMaxID, Types: II->getType(),
1239	Args: {LHS->getArgOperand(i: `0`), NewC});
1240	}
1241
1242	/// If this min/max has a matching min/max operand with a constant, try to push
1243	/// the constant operand into this instruction. This can enable more folds.
1244	static Instruction *
1245	reassociateMinMaxWithConstantInOperand(IntrinsicInst *II,
1246	InstCombiner::BuilderTy &Builder) {
1247	// Match and capture a min/max operand candidate.
1248	Value X, Y;
1249	Constant *C;
1250	Instruction *Inner;
1251	if (!match(V: II, P: m_c_MaxOrMin(L: m_OneUse(SubPattern: m_CombineAnd(
1252	L: m_Instruction(I&: Inner),
1253	R: m_MaxOrMin(L: m_Value(V&: X), R: m_ImmConstant(C)))),
1254	R: m_Value(V&: Y))))
1255	return nullptr;
1256
1257	// The inner op must match. Check for constants to avoid infinite loops.
1258	Intrinsic::ID MinMaxID = II->getIntrinsicID();
1259	auto *InnerMM = dyn_cast<IntrinsicInst>(Val: Inner);
1260	if (!InnerMM \|\| InnerMM->getIntrinsicID() != MinMaxID \|\|
1261	match(V: X, P: m_ImmConstant()) \|\| match(V: Y, P: m_ImmConstant()))
1262	return nullptr;
1263
1264	// max (max X, C), Y --> max (max X, Y), C
1265	Function *MinMax =
1266	Intrinsic::getDeclaration(M: II->getModule(), id: MinMaxID, Tys: II->getType());
1267	Value *NewInner = Builder.CreateBinaryIntrinsic(ID: MinMaxID, LHS: X, RHS: Y);
1268	NewInner->takeName(V: Inner);
1269	return CallInst::Create(Func: MinMax, Args: {NewInner, C});
1270	}
1271
1272	/// Reduce a sequence of min/max intrinsics with a common operand.
1273	static Instruction factorizeMinMaxTree(IntrinsicInst II) {
1274	// Match 3 of the same min/max ops. Example: umin(umin(), umin()).
1275	auto *LHS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: `0`));
1276	auto *RHS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: `1`));
1277	Intrinsic::ID MinMaxID = II->getIntrinsicID();
1278	if (!LHS \|\| !RHS \|\| LHS->getIntrinsicID() != MinMaxID \|\|
1279	RHS->getIntrinsicID() != MinMaxID \|\|
1280	(!LHS->hasOneUse() && !RHS->hasOneUse()))
1281	return nullptr;
1282
1283	Value *A = LHS->getArgOperand(i: `0`);
1284	Value *B = LHS->getArgOperand(i: `1`);
1285	Value *C = RHS->getArgOperand(i: `0`);
1286	Value *D = RHS->getArgOperand(i: `1`);
1287
1288	// Look for a common operand.
1289	Value MinMaxOp = nullptr*;
1290	Value ThirdOp = nullptr*;
1291	if (LHS->hasOneUse()) {
1292	// If the LHS is only used in this chain and the RHS is used outside of it,
1293	// reuse the RHS min/max because that will eliminate the LHS.
1294	if (D == A \|\| C == A) {
1295	// min(min(a, b), min(c, a)) --> min(min(c, a), b)
1296	// min(min(a, b), min(a, d)) --> min(min(a, d), b)
1297	MinMaxOp = RHS;
1298	ThirdOp = B;
1299	} else if (D == B \|\| C == B) {
1300	// min(min(a, b), min(c, b)) --> min(min(c, b), a)
1301	// min(min(a, b), min(b, d)) --> min(min(b, d), a)
1302	MinMaxOp = RHS;
1303	ThirdOp = A;
1304	}
1305	} else {
1306	assert(RHS->hasOneUse() && "Expected one-use operand");
1307	// Reuse the LHS. This will eliminate the RHS.
1308	if (D == A \|\| D == B) {
1309	// min(min(a, b), min(c, a)) --> min(min(a, b), c)
1310	// min(min(a, b), min(c, b)) --> min(min(a, b), c)
1311	MinMaxOp = LHS;
1312	ThirdOp = C;
1313	} else if (C == A \|\| C == B) {
1314	// min(min(a, b), min(b, d)) --> min(min(a, b), d)
1315	// min(min(a, b), min(c, b)) --> min(min(a, b), d)
1316	MinMaxOp = LHS;
1317	ThirdOp = D;
1318	}
1319	}
1320
1321	if (!MinMaxOp \|\| !ThirdOp)
1322	return nullptr;
1323
1324	Module *Mod = II->getModule();
1325	Function *MinMax = Intrinsic::getDeclaration(M: Mod, id: MinMaxID, Tys: II->getType());
1326	return CallInst::Create(Func: MinMax, Args: { MinMaxOp, ThirdOp });
1327	}
1328
1329	/// If all arguments of the intrinsic are unary shuffles with the same mask,
1330	/// try to shuffle after the intrinsic.
1331	static Instruction *
1332	foldShuffledIntrinsicOperands(IntrinsicInst *II,
1333	InstCombiner::BuilderTy &Builder) {
1334	// TODO: This should be extended to handle other intrinsics like fshl, ctpop,
1335	// etc. Use llvm::isTriviallyVectorizable() and related to determine
1336	// which intrinsics are safe to shuffle?
1337	switch (II->getIntrinsicID()) {
1338	case Intrinsic::smax:
1339	case Intrinsic::smin:
1340	case Intrinsic::umax:
1341	case Intrinsic::umin:
1342	case Intrinsic::fma:
1343	case Intrinsic::fshl:
1344	case Intrinsic::fshr:
1345	break;
1346	default:
1347	return nullptr;
1348	}
1349
1350	Value *X;
1351	ArrayRef<int> Mask;
1352	if (!match(V: II->getArgOperand(i: `0`),
1353	P: m_Shuffle(v1: m_Value(V&: X), v2: m_Undef(), mask: m_Mask (Mask))))
1354	return nullptr;
1355
1356	// At least 1 operand must have 1 use because we are creating 2 instructions.
1357	if (none_of(Range: II->args(), P: [](Value V) { return* V->hasOneUse(); }))
1358	return nullptr;
1359
1360	// See if all arguments are shuffled with the same mask.
1361	SmallVector<Value *, `4`> NewArgs(II->arg_size());
1362	NewArgs [`0`] = X;
1363	Type *SrcTy = X->getType();
1364	for (unsigned i = `1`, e = II->arg_size(); i != e; ++i) {
1365	if (!match(V: II->getArgOperand(i),
1366	P: m_Shuffle(v1: m_Value(V&: X), v2: m_Undef(), mask: m_SpecificMask (Mask))) \|\|
1367	X->getType() != SrcTy)
1368	return nullptr;
1369	NewArgs [i] = X;
1370	}
1371
1372	// intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
1373	Instruction FPI = isa<FPMathOperator>(Val: II) ? II : nullptr*;
1374	Value *NewIntrinsic =
1375	Builder.CreateIntrinsic(ID: II->getIntrinsicID(), Types: SrcTy, Args: NewArgs, FMFSource: FPI);
1376	return new ShuffleVectorInst (NewIntrinsic, Mask);
1377	}
1378
1379	/// Fold the following cases and accepts bswap and bitreverse intrinsics:
1380	/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
1381	/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
1382	template <Intrinsic::ID IntrID>
1383	static Instruction foldBitOrderCrossLogicOp(Value V,
1384	InstCombiner::BuilderTy &Builder) {
1385	static_assert(IntrID == Intrinsic::bswap \|\| IntrID == Intrinsic::bitreverse,
1386	"This helper only supports BSWAP and BITREVERSE intrinsics");
1387
1388	Value X, Y;
1389	// Find bitwise logic op. Check that it is a BinaryOperator explicitly so we
1390	// don't match ConstantExpr that aren't meaningful for this transform.
1391	if (match(V, P: m_OneUse(SubPattern: m_BitwiseLogic(L: m_Value(V&: X), R: m_Value(V&: Y)))) &&
1392	isa<BinaryOperator>(Val: V)) {
1393	Value OldReorderX, OldReorderY;
1394	BinaryOperator::BinaryOps Op = cast<BinaryOperator>(Val: V)->getOpcode();
1395
1396	// If both X and Y are bswap/bitreverse, the transform reduces the number
1397	// of instructions even if there's multiuse.
1398	// If only one operand is bswap/bitreverse, we need to ensure the operand
1399	// have only one use.
1400	if (match(X, m_Intrinsic<IntrID>(m_Value(V&: OldReorderX))) &&
1401	match(Y, m_Intrinsic<IntrID>(m_Value(V&: OldReorderY)))) {
1402	return BinaryOperator::Create(Op, S1: OldReorderX, S2: OldReorderY);
1403	}
1404
1405	if (match(X, m_OneUse(m_Intrinsic<IntrID>(m_Value(V&: OldReorderX))))) {
1406	Value *NewReorder = Builder.CreateUnaryIntrinsic(ID: IntrID, V: Y);
1407	return BinaryOperator::Create(Op, S1: OldReorderX, S2: NewReorder);
1408	}
1409
1410	if (match(Y, m_OneUse(m_Intrinsic<IntrID>(m_Value(V&: OldReorderY))))) {
1411	Value *NewReorder = Builder.CreateUnaryIntrinsic(ID: IntrID, V: X);
1412	return BinaryOperator::Create(Op, S1: NewReorder, S2: OldReorderY);
1413	}
1414	}
1415	return nullptr;
1416	}
1417
1418	/// CallInst simplification. This mostly only handles folding of intrinsic
1419	/// instructions. For normal calls, it allows visitCallBase to do the heavy
1420	/// lifting.
1421	Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
1422	// Don't try to simplify calls without uses. It will not do anything useful,
1423	// but will result in the following folds being skipped.
1424	if (!CI.use_empty()) {
1425	SmallVector<Value *, `4`> Args;
1426	Args.reserve(N: CI.arg_size());
1427	for (Value *Op : CI.args())
1428	Args.push_back(Elt: Op);
1429	if (Value *V = simplifyCall(Call: &CI, Callee: CI.getCalledOperand(), Args,
1430	Q: SQ.getWithInstruction(I: &CI)))
1431	return replaceInstUsesWith(I&: CI, V);
1432	}
1433
1434	if (Value *FreedOp = getFreedOperand(CB: &CI, TLI: &TLI))
1435	return visitFree(FI&: CI, FreedOp);
1436
1437	// If the caller function (i.e. us, the function that contains this CallInst)
1438	// is nounwind, mark the call as nounwind, even if the callee isn't.
1439	if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1440	CI.setDoesNotThrow();
1441	return &CI;
1442	}
1443
1444	IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: &CI);
1445	if (!II) return visitCallBase(Call&: CI);
1446
1447	// For atomic unordered mem intrinsics if len is not a positive or
1448	// not a multiple of element size then behavior is undefined.
1449	if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(Val: II))
1450	if (ConstantInt *NumBytes = dyn_cast<ConstantInt>(Val: AMI->getLength()))
1451	if (NumBytes->isNegative() \|\|
1452	(NumBytes->getZExtValue() % AMI->getElementSizeInBytes() != `0`)) {
1453	CreateNonTerminatorUnreachable(InsertAt: AMI);
1454	assert(AMI->getType()->isVoidTy() &&
1455	"non void atomic unordered mem intrinsic");
1456	return eraseInstFromFunction(I&: *AMI);
1457	}
1458
1459	// Intrinsics cannot occur in an invoke or a callbr, so handle them here
1460	// instead of in visitCallBase.
1461	if (auto *MI = dyn_cast<AnyMemIntrinsic>(Val: II)) {
1462	bool Changed = false;
1463
1464	// memmove/cpy/set of zero bytes is a noop.
1465	if (Constant *NumBytes = dyn_cast<Constant>(Val: MI->getLength())) {
1466	if (NumBytes->isNullValue())
1467	return eraseInstFromFunction(I&: CI);
1468	}
1469
1470	// No other transformations apply to volatile transfers.
1471	if (auto *M = dyn_cast<MemIntrinsic>(Val: MI))
1472	if (M->isVolatile())
1473	return nullptr;
1474
1475	// If we have a memmove and the source operation is a constant global,
1476	// then the source and dest pointers can't alias, so we can change this
1477	// into a call to memcpy.
1478	if (auto *MMI = dyn_cast<AnyMemMoveInst>(Val: MI)) {
1479	if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(Val: MMI->getSource()))
1480	if (GVSrc->isConstant()) {
1481	Module *M = CI.getModule();
1482	Intrinsic::ID MemCpyID =
1483	isa<AtomicMemMoveInst>(Val: MMI)
1484	? Intrinsic::memcpy_element_unordered_atomic
1485	: Intrinsic::memcpy;
1486	Type *Tys[`3`] = { CI.getArgOperand(i: `0`)->getType(),
1487	CI.getArgOperand(i: `1`)->getType(),
1488	CI.getArgOperand(i: `2`)->getType() };
1489	CI.setCalledFunction(Intrinsic::getDeclaration(M, id: MemCpyID, Tys));
1490	Changed = true;
1491	}
1492	}
1493
1494	if (AnyMemTransferInst *MTI = dyn_cast<AnyMemTransferInst>(Val: MI)) {
1495	// memmove(x,x,size) -> noop.
1496	if (MTI->getSource() == MTI->getDest())
1497	return eraseInstFromFunction(I&: CI);
1498	}
1499
1500	// If we can determine a pointer alignment that is bigger than currently
1501	// set, update the alignment.
1502	if (auto *MTI = dyn_cast<AnyMemTransferInst>(Val: MI)) {
1503	if (Instruction *I = SimplifyAnyMemTransfer(MI: MTI))
1504	return I;
1505	} else if (auto *MSI = dyn_cast<AnyMemSetInst>(Val: MI)) {
1506	if (Instruction *I = SimplifyAnyMemSet(MI: MSI))
1507	return I;
1508	}
1509
1510	if (Changed) return II;
1511	}
1512
1513	// For fixed width vector result intrinsics, use the generic demanded vector
1514	// support.
1515	if (auto *IIFVTy = dyn_cast<FixedVectorType>(Val: II->getType())) {
1516	auto VWidth = IIFVTy->getNumElements();
1517	APInt PoisonElts(VWidth, `0`);
1518	APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
1519	if (Value *V = SimplifyDemandedVectorElts(V: II, DemandedElts: AllOnesEltMask, PoisonElts)) {
1520	if (V != II)
1521	return replaceInstUsesWith(I&: *II, V);
1522	return II;
1523	}
1524	}
1525
1526	if (II->isCommutative()) {
1527	if (auto Pair = matchSymmetricPair(LHS: II->getOperand(i_nocapture: `0`), RHS: II->getOperand(i_nocapture: `1`))) {
1528	replaceOperand(I&: *II, OpNum: `0`, V: Pair ->first);
1529	replaceOperand(I&: *II, OpNum: `1`, V: Pair ->second);
1530	return II;
1531	}
1532
1533	if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(Call&: CI))
1534	return NewCall;
1535	}
1536
1537	// Unused constrained FP intrinsic calls may have declared side effect, which
1538	// prevents it from being removed. In some cases however the side effect is
1539	// actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
1540	// returns a replacement, the call may be removed.
1541	if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(Val: CI)) {
1542	if (simplifyConstrainedFPCall(Call: &CI, Q: SQ.getWithInstruction(I: &CI)))
1543	return eraseInstFromFunction(I&: CI);
1544	}
1545
1546	Intrinsic::ID IID = II->getIntrinsicID();
1547	switch (IID) {
1548	case Intrinsic::objectsize: {
1549	SmallVector<Instruction *> InsertedInstructions;
1550	if (Value V = lowerObjectSizeCall(ObjectSize: II, DL, TLI: &TLI, AA, /MustSucceed=/*false,
1551	InsertedInstructions: &InsertedInstructions)) {
1552	for (Instruction *Inserted : InsertedInstructions)
1553	Worklist.add(I: Inserted);
1554	return replaceInstUsesWith(I&: CI, V);
1555	}
1556	return nullptr;
1557	}
1558	case Intrinsic::abs: {
1559	Value *IIOperand = II->getArgOperand(i: `0`);
1560	bool IntMinIsPoison = cast<Constant>(Val: II->getArgOperand(i: `1`))->isOneValue();
1561
1562	// abs(-x) -> abs(x)
1563	// TODO: Copy nsw if it was present on the neg?
1564	Value *X;
1565	if (match(V: IIOperand, P: m_Neg(V: m_Value(V&: X))))
1566	return replaceOperand(I&: *II, OpNum: `0`, V: X);
1567	if (match(V: IIOperand, P: m_Select(C: m_Value(), L: m_Value(V&: X), R: m_Neg(V: m_Deferred(V: X)))))
1568	return replaceOperand(I&: *II, OpNum: `0`, V: X);
1569	if (match(V: IIOperand, P: m_Select(C: m_Value(), L: m_Neg(V: m_Value(V&: X)), R: m_Deferred(V: X))))
1570	return replaceOperand(I&: *II, OpNum: `0`, V: X);
1571
1572	if (std::optional<bool> Known =
1573	getKnownSignOrZero(Op: IIOperand, CxtI: II, DL, AC: &AC, DT: &DT)) {
1574	// abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y)
1575	// abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y)
1576	if (!*Known)
1577	return replaceInstUsesWith(I&: *II, V: IIOperand);
1578
1579	// abs(x) -> -x if x < 0
1580	// abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y)
1581	if (IntMinIsPoison)
1582	return BinaryOperator::CreateNSWNeg(Op: IIOperand);
1583	return BinaryOperator::CreateNeg(Op: IIOperand);
1584	}
1585
1586	// abs (sext X) --> zext (abs X)*
1587	// Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
1588	if (match(V: IIOperand, P: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: X))))) {
1589	Value *NarrowAbs =
1590	Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse());
1591	return CastInst::Create(Instruction::ZExt, S: NarrowAbs, Ty: II->getType());
1592	}
1593
1594	// Match a complicated way to check if a number is odd/even:
1595	// abs (srem X, 2) --> and X, 1
1596	const APInt *C;
1597	if (match(V: IIOperand, P: m_SRem(L: m_Value(V&: X), R: m_APInt(Res&: C))) && *C == `2`)
1598	return BinaryOperator::CreateAnd(V1: X, V2: ConstantInt::get(Ty: II->getType(), V: `1`));
1599
1600	break;
1601	}
1602	case Intrinsic::umin: {
1603	Value I0 = II->getArgOperand(i: `0`), I1 = II->getArgOperand(i: `1`);
1604	// umin(x, 1) == zext(x != 0)
1605	if (match(V: I1, P: m_One())) {
1606	assert(II->getType()->getScalarSizeInBits() != `1` &&
1607	"Expected simplify of umin with max constant");
1608	Value *Zero = Constant::getNullValue(Ty: I0->getType());
1609	Value *Cmp = Builder.CreateICmpNE(LHS: I0, RHS: Zero);
1610	return CastInst::Create(Instruction::ZExt, S: Cmp, Ty: II->getType());
1611	}
1612	[[fallthrough]];
1613	}
1614	case Intrinsic::umax: {
1615	Value I0 = II->getArgOperand(i: `0`), I1 = II->getArgOperand(i: `1`);
1616	Value X, Y;
1617	if (match(V: I0, P: m_ZExt(Op: m_Value(V&: X))) && match(V: I1, P: m_ZExt(Op: m_Value(V&: Y))) &&
1618	(I0->hasOneUse() \|\| I1->hasOneUse()) && X->getType() == Y->getType()) {
1619	Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y);
1620	return CastInst::Create(Instruction::ZExt, S: NarrowMaxMin, Ty: II->getType());
1621	}
1622	Constant *C;
1623	if (match(V: I0, P: m_ZExt(Op: m_Value(V&: X))) && match(V: I1, P: m_Constant(C)) &&
1624	I0->hasOneUse()) {
1625	if (Constant *NarrowC = getLosslessUnsignedTrunc(C, TruncTy: X->getType())) {
1626	Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: NarrowC);
1627	return CastInst::Create(Instruction::ZExt, S: NarrowMaxMin, Ty: II->getType());
1628	}
1629	}
1630	// If both operands of unsigned min/max are sign-extended, it is still ok
1631	// to narrow the operation.
1632	[[fallthrough]];
1633	}
1634	case Intrinsic::smax:
1635	case Intrinsic::smin: {
1636	Value I0 = II->getArgOperand(i: `0`), I1 = II->getArgOperand(i: `1`);
1637	Value X, Y;
1638	if (match(V: I0, P: m_SExt(Op: m_Value(V&: X))) && match(V: I1, P: m_SExt(Op: m_Value(V&: Y))) &&
1639	(I0->hasOneUse() \|\| I1->hasOneUse()) && X->getType() == Y->getType()) {
1640	Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y);
1641	return CastInst::Create(Instruction::SExt, S: NarrowMaxMin, Ty: II->getType());
1642	}
1643
1644	Constant *C;
1645	if (match(V: I0, P: m_SExt(Op: m_Value(V&: X))) && match(V: I1, P: m_Constant(C)) &&
1646	I0->hasOneUse()) {
1647	if (Constant *NarrowC = getLosslessSignedTrunc(C, TruncTy: X->getType())) {
1648	Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: NarrowC);
1649	return CastInst::Create(Instruction::SExt, S: NarrowMaxMin, Ty: II->getType());
1650	}
1651	}
1652
1653	// umin(i1 X, i1 Y) -> and i1 X, Y
1654	// smax(i1 X, i1 Y) -> and i1 X, Y
1655	if ((IID == Intrinsic::umin \|\| IID == Intrinsic::smax) &&
1656	II->getType()->isIntOrIntVectorTy(`1`)) {
1657	return BinaryOperator::CreateAnd(V1: I0, V2: I1);
1658	}
1659
1660	// umax(i1 X, i1 Y) -> or i1 X, Y
1661	// smin(i1 X, i1 Y) -> or i1 X, Y
1662	if ((IID == Intrinsic::umax \|\| IID == Intrinsic::smin) &&
1663	II->getType()->isIntOrIntVectorTy(`1`)) {
1664	return BinaryOperator::CreateOr(V1: I0, V2: I1);
1665	}
1666
1667	if (IID == Intrinsic::smax \|\| IID == Intrinsic::smin) {
1668	// smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
1669	// smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
1670	// TODO: Canonicalize neg after min/max if I1 is constant.
1671	if (match(V: I0, P: m_NSWNeg(V: m_Value(V&: X))) && match(V: I1, P: m_NSWNeg(V: m_Value(V&: Y))) &&
1672	(I0->hasOneUse() \|\| I1->hasOneUse())) {
1673	Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMaxID: IID);
1674	Value *InvMaxMin = Builder.CreateBinaryIntrinsic(ID: InvID, LHS: X, RHS: Y);
1675	return BinaryOperator::CreateNSWNeg(Op: InvMaxMin);
1676	}
1677	}
1678
1679	// (umax X, (xor X, Pow2))
1680	// -> (or X, Pow2)
1681	// (umin X, (xor X, Pow2))
1682	// -> (and X, ~Pow2)
1683	// (smax X, (xor X, Pos_Pow2))
1684	// -> (or X, Pos_Pow2)
1685	// (smin X, (xor X, Pos_Pow2))
1686	// -> (and X, ~Pos_Pow2)
1687	// (smax X, (xor X, Neg_Pow2))
1688	// -> (and X, ~Neg_Pow2)
1689	// (smin X, (xor X, Neg_Pow2))
1690	// -> (or X, Neg_Pow2)
1691	if ((match(V: I0, P: m_c_Xor(L: m_Specific(V: I1), R: m_Value(V&: X))) \|\|
1692	match(V: I1, P: m_c_Xor(L: m_Specific(V: I0), R: m_Value(V&: X)))) &&
1693	isKnownToBeAPowerOfTwo(V: X, / OrZero / true)) {
1694	bool UseOr = IID == Intrinsic::smax \|\| IID == Intrinsic::umax;
1695	bool UseAndN = IID == Intrinsic::smin \|\| IID == Intrinsic::umin;
1696
1697	if (IID == Intrinsic::smax \|\| IID == Intrinsic::smin) {
1698	auto KnownSign = getKnownSign(Op: X, CxtI: II, DL, AC: &AC, DT: &DT);
1699	if (KnownSign == std::nullopt) {
1700	UseOr = false;
1701	UseAndN = false;
1702	} else if (KnownSign /* true is Signed. /) {
1703	UseOr ^= true;
1704	UseAndN ^= true;
1705	Type *Ty = I0->getType();
1706	// Negative power of 2 must be IntMin. It's possible to be able to
1707	// prove negative / power of 2 without actually having known bits, so
1708	// just get the value by hand.
1709	X = Constant::getIntegerValue(
1710	Ty, V: APInt::getSignedMinValue(numBits: Ty->getScalarSizeInBits()));
1711	}
1712	}
1713	if (UseOr)
1714	return BinaryOperator::CreateOr(V1: I0, V2: X);
1715	else if (UseAndN)
1716	return BinaryOperator::CreateAnd(V1: I0, V2: Builder.CreateNot(V: X));
1717	}
1718
1719	// If we can eliminate ~A and Y is free to invert:
1720	// max ~A, Y --> ~(min A, ~Y)
1721	//
1722	// Examples:
1723	// max ~A, ~Y --> ~(min A, Y)
1724	// max ~A, C --> ~(min A, ~C)
1725	// max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
1726	auto moveNotAfterMinMax = [&](Value X, Value Y) -> Instruction * {
1727	Value *A;
1728	if (match(V: X, P: m_OneUse(SubPattern: m_Not(V: m_Value(V&: A)))) &&
1729	!isFreeToInvert(V: A, WillInvertAllUses: A->hasOneUse())) {
1730	if (Value *NotY = getFreelyInverted(V: Y, WillInvertAllUses: Y->hasOneUse(), Builder: &Builder)) {
1731	Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMaxID: IID);
1732	Value *InvMaxMin = Builder.CreateBinaryIntrinsic(ID: InvID, LHS: A, RHS: NotY);
1733	return BinaryOperator::CreateNot(Op: InvMaxMin);
1734	}
1735	}
1736	return nullptr;
1737	};
1738
1739	if (Instruction *I = moveNotAfterMinMax (I0, I1))
1740	return I;
1741	if (Instruction *I = moveNotAfterMinMax (I1, I0))
1742	return I;
1743
1744	if (Instruction *I = moveAddAfterMinMax(II, Builder))
1745	return I;
1746
1747	// smax(X, -X) --> abs(X)
1748	// smin(X, -X) --> -abs(X)
1749	// umax(X, -X) --> -abs(X)
1750	// umin(X, -X) --> abs(X)
1751	if (isKnownNegation(X: I0, Y: I1)) {
1752	// We can choose either operand as the input to abs(), but if we can
1753	// eliminate the only use of a value, that's better for subsequent
1754	// transforms/analysis.
1755	if (I0->hasOneUse() && !I1->hasOneUse())
1756	std::swap(a&: I0, b&: I1);
1757
1758	// This is some variant of abs(). See if we can propagate 'nsw' to the abs
1759	// operation and potentially its negation.
1760	bool IntMinIsPoison = isKnownNegation(X: I0, Y: I1, / NeedNSW / true);
1761	Value *Abs = Builder.CreateBinaryIntrinsic(
1762	Intrinsic::abs, I0,
1763	ConstantInt::getBool(II->getContext(), IntMinIsPoison));
1764
1765	// We don't have a "nabs" intrinsic, so negate if needed based on the
1766	// max/min operation.
1767	if (IID == Intrinsic::smin \|\| IID == Intrinsic::umax)
1768	Abs = Builder.CreateNeg(V: Abs, Name: "nabs", / NUW / HasNUW: false, HasNSW: IntMinIsPoison);
1769	return replaceInstUsesWith(I&: CI, V: Abs);
1770	}
1771
1772	if (Instruction *Sel = foldClampRangeOfTwo(II, Builder))
1773	return Sel;
1774
1775	if (Instruction SAdd = matchSAddSubSat(MinMax1&: II))
1776	return SAdd;
1777
1778	if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder))
1779	return replaceInstUsesWith(I&: *II, V: NewMinMax);
1780
1781	if (Instruction *R = reassociateMinMaxWithConstantInOperand(II, Builder))
1782	return R;
1783
1784	if (Instruction *NewMinMax = factorizeMinMaxTree(II))
1785	return NewMinMax;
1786
1787	// Try to fold minmax with constant RHS based on range information
1788	const APInt *RHSC;
1789	if (match(V: I1, P: m_APIntAllowUndef(Res&: RHSC))) {
1790	ICmpInst::Predicate Pred =
1791	ICmpInst::getNonStrictPredicate(pred: MinMaxIntrinsic::getPredicate(ID: IID));
1792	bool IsSigned = MinMaxIntrinsic::isSigned(ID: IID);
1793	ConstantRange LHS_CR = computeConstantRangeIncludingKnownBits(
1794	V: I0, ForSigned: IsSigned, SQ: SQ.getWithInstruction(I: II));
1795	if (!LHS_CR.isFullSet()) {
1796	if (LHS_CR.icmp(Pred, Other: *RHSC))
1797	return replaceInstUsesWith(I&: *II, V: I0);
1798	if (LHS_CR.icmp(Pred: ICmpInst::getSwappedPredicate(pred: Pred), Other: *RHSC))
1799	return replaceInstUsesWith(I&: *II,
1800	V: ConstantInt::get(Ty: II->getType(), V: *RHSC));
1801	}
1802	}
1803
1804	break;
1805	}
1806	case Intrinsic::bitreverse: {
1807	Value *IIOperand = II->getArgOperand(i: `0`);
1808	// bitrev (zext i1 X to ?) --> X ? SignBitC : 0
1809	Value *X;
1810	if (match(V: IIOperand, P: m_ZExt(Op: m_Value(V&: X))) &&
1811	X->getType()->isIntOrIntVectorTy(BitWidth: `1`)) {
1812	Type *Ty = II->getType();
1813	APInt SignBit = APInt::getSignMask(BitWidth: Ty->getScalarSizeInBits());
1814	return SelectInst::Create(C: X, S1: ConstantInt::get(Ty, V: SignBit),
1815	S2: ConstantInt::getNullValue(Ty));
1816	}
1817
1818	if (Instruction *crossLogicOpFold =
1819	foldBitOrderCrossLogicOp<Intrinsic::bitreverse>(IIOperand, Builder))
1820	return crossLogicOpFold;
1821
1822	break;
1823	}
1824	case Intrinsic::bswap: {
1825	Value *IIOperand = II->getArgOperand(i: `0`);
1826
1827	// Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
1828	// inverse-shift-of-bswap:
1829	// bswap (shl X, Y) --> lshr (bswap X), Y
1830	// bswap (lshr X, Y) --> shl (bswap X), Y
1831	Value X, Y;
1832	if (match(V: IIOperand, P: m_OneUse(SubPattern: m_LogicalShift(L: m_Value(V&: X), R: m_Value(V&: Y))))) {
1833	// The transform allows undef vector elements, so try a constant match
1834	// first. If knownbits can handle that case, that clause could be removed.
1835	unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
1836	const APInt *C;
1837	if ((match(V: Y, P: m_APIntAllowUndef(Res&: C)) && (*C & `7`) == `0`) \|\|
1838	MaskedValueIsZero(V: Y, Mask: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: `3`))) {
1839	Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
1840	BinaryOperator::BinaryOps InverseShift =
1841	cast<BinaryOperator>(Val: IIOperand)->getOpcode() == Instruction::Shl
1842	? Instruction::LShr
1843	: Instruction::Shl;
1844	return BinaryOperator::Create(Op: InverseShift, S1: NewSwap, S2: Y);
1845	}
1846	}
1847
1848	KnownBits Known = computeKnownBits(V: IIOperand, Depth: `0`, CxtI: II);
1849	uint64_t LZ = alignDown(Value: Known.countMinLeadingZeros(), Align: `8`);
1850	uint64_t TZ = alignDown(Value: Known.countMinTrailingZeros(), Align: `8`);
1851	unsigned BW = Known.getBitWidth();
1852
1853	// bswap(x) -> shift(x) if x has exactly one "active byte"
1854	if (BW - LZ - TZ == `8`) {
1855	assert(LZ != TZ && "active byte cannot be in the middle");
1856	if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
1857	return BinaryOperator::CreateNUWShl(
1858	V1: IIOperand, V2: ConstantInt::get(Ty: IIOperand->getType(), V: LZ - TZ));
1859	// -> lshr(x) if the "active byte" is in the high part of x
1860	return BinaryOperator::CreateExactLShr(
1861	V1: IIOperand, V2: ConstantInt::get(Ty: IIOperand->getType(), V: TZ - LZ));
1862	}
1863
1864	// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
1865	if (match(V: IIOperand, P: m_Trunc(Op: m_BSwap(Op0: m_Value(V&: X))))) {
1866	unsigned C = X->getType()->getScalarSizeInBits() - BW;
1867	Value *CV = ConstantInt::get(Ty: X->getType(), V: C);
1868	Value *V = Builder.CreateLShr(LHS: X, RHS: CV);
1869	return new TruncInst (V, IIOperand->getType());
1870	}
1871
1872	if (Instruction *crossLogicOpFold =
1873	foldBitOrderCrossLogicOp<Intrinsic::bswap>(IIOperand, Builder)) {
1874	return crossLogicOpFold;
1875	}
1876
1877	// Try to fold into bitreverse if bswap is the root of the expression tree.
1878	if (Instruction BitOp = matchBSwapOrBitReverse(I&: II, /MatchBSwaps/ false,
1879	/MatchBitReversals/ true))
1880	return BitOp;
1881	break;
1882	}
1883	case Intrinsic::masked_load:
1884	if (Value SimplifiedMaskedOp = simplifyMaskedLoad(II&: II))
1885	return replaceInstUsesWith(I&: CI, V: SimplifiedMaskedOp);
1886	break;
1887	case Intrinsic::masked_store:
1888	return simplifyMaskedStore(II&: *II);
1889	case Intrinsic::masked_gather:
1890	return simplifyMaskedGather(II&: *II);
1891	case Intrinsic::masked_scatter:
1892	return simplifyMaskedScatter(II&: *II);
1893	case Intrinsic::launder_invariant_group:
1894	case Intrinsic::strip_invariant_group:
1895	if (auto SkippedBarrier = simplifyInvariantGroupIntrinsic(II&: II, IC&: *this))
1896	return replaceInstUsesWith(I&: *II, V: SkippedBarrier);
1897	break;
1898	case Intrinsic::powi:
1899	if (ConstantInt *Power = dyn_cast<ConstantInt>(Val: II->getArgOperand(i: `1`))) {
1900	// 0 and 1 are handled in instsimplify
1901	// powi(x, -1) -> 1/x
1902	if (Power->isMinusOne())
1903	return BinaryOperator::CreateFDivFMF(V1: ConstantFP::get(Ty: CI.getType(), V: `1.0`),
1904	V2: II->getArgOperand(i: `0`), FMFSource: II);
1905	// powi(x, 2) -> xx*
1906	if (Power->equalsInt(V: `2`))
1907	return BinaryOperator::CreateFMulFMF(V1: II->getArgOperand(i: `0`),
1908	V2: II->getArgOperand(i: `0`), FMFSource: II);
1909
1910	if (!Power->getValue()[`0`]) {
1911	Value *X;
1912	// If power is even:
1913	// powi(-x, p) -> powi(x, p)
1914	// powi(fabs(x), p) -> powi(x, p)
1915	// powi(copysign(x, y), p) -> powi(x, p)
1916	if (match(II->getArgOperand(`0`), m_FNeg(m_Value(X))) \|\|
1917	match(II->getArgOperand(`0`), m_FAbs(m_Value(X))) \|\|
1918	match(II->getArgOperand(`0`),
1919	m_Intrinsic<Intrinsic::copysign>(m_Value(X), m_Value())))
1920	return replaceOperand(I&: *II, OpNum: `0`, V: X);
1921	}
1922	}
1923	break;
1924
1925	case Intrinsic::cttz:
1926	case Intrinsic::ctlz:
1927	if (auto I = foldCttzCtlz(II&: II, IC&: *this))
1928	return I;
1929	break;
1930
1931	case Intrinsic::ctpop:
1932	if (auto I = foldCtpop(II&: II, IC&: *this))
1933	return I;
1934	break;
1935
1936	case Intrinsic::fshl:
1937	case Intrinsic::fshr: {
1938	Value Op0 = II->getArgOperand(i: `0`), Op1 = II->getArgOperand(i: `1`);
1939	Type *Ty = II->getType();
1940	unsigned BitWidth = Ty->getScalarSizeInBits();
1941	Constant *ShAmtC;
1942	if (match(V: II->getArgOperand(i: `2`), P: m_ImmConstant(C&: ShAmtC))) {
1943	// Canonicalize a shift amount constant operand to modulo the bit-width.
1944	Constant *WidthC = ConstantInt::get(Ty, V: BitWidth);
1945	Constant *ModuloC =
1946	ConstantFoldBinaryOpOperands(Opcode: Instruction::URem, LHS: ShAmtC, RHS: WidthC, DL);
1947	if (!ModuloC)
1948	return nullptr;
1949	if (ModuloC != ShAmtC)
1950	return replaceOperand(I&: *II, OpNum: `2`, V: ModuloC);
1951
1952	assert(ConstantExpr::getICmp(ICmpInst::ICMP_UGT, WidthC, ShAmtC) ==
1953	ConstantInt::getTrue(CmpInst::makeCmpResultType(Ty)) &&
1954	"Shift amount expected to be modulo bitwidth");
1955
1956	// Canonicalize funnel shift right by constant to funnel shift left. This
1957	// is not entirely arbitrary. For historical reasons, the backend may
1958	// recognize rotate left patterns but miss rotate right patterns.
1959	if (IID == Intrinsic::fshr) {
1960	// fshr X, Y, C --> fshl X, Y, (BitWidth - C)
1961	Constant *LeftShiftC = ConstantExpr::getSub(C1: WidthC, C2: ShAmtC);
1962	Module *Mod = II->getModule();
1963	Function *Fshl = Intrinsic::getDeclaration(Mod, Intrinsic::fshl, Ty);
1964	return CallInst::Create(Func: Fshl, Args: { Op0, Op1, LeftShiftC });
1965	}
1966	assert(IID == Intrinsic::fshl &&
1967	"All funnel shifts by simple constants should go left");
1968
1969	// fshl(X, 0, C) --> shl X, C
1970	// fshl(X, undef, C) --> shl X, C
1971	if (match(V: Op1, P: m_ZeroInt()) \|\| match(V: Op1, P: m_Undef()))
1972	return BinaryOperator::CreateShl(V1: Op0, V2: ShAmtC);
1973
1974	// fshl(0, X, C) --> lshr X, (BW-C)
1975	// fshl(undef, X, C) --> lshr X, (BW-C)
1976	if (match(V: Op0, P: m_ZeroInt()) \|\| match(V: Op0, P: m_Undef()))
1977	return BinaryOperator::CreateLShr(V1: Op1,
1978	V2: ConstantExpr::getSub(C1: WidthC, C2: ShAmtC));
1979
1980	// fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
1981	if (Op0 == Op1 && BitWidth == `16` && match(V: ShAmtC, P: m_SpecificInt(V: `8`))) {
1982	Module *Mod = II->getModule();
1983	Function *Bswap = Intrinsic::getDeclaration(Mod, Intrinsic::bswap, Ty);
1984	return CallInst::Create(Func: Bswap, Args: { Op0 });
1985	}
1986	if (Instruction *BitOp =
1987	matchBSwapOrBitReverse(I&: II, /MatchBSwaps/* true,
1988	/MatchBitReversals/ true))
1989	return BitOp;
1990	}
1991
1992	// Left or right might be masked.
1993	if (SimplifyDemandedInstructionBits(Inst&: *II))
1994	return &CI;
1995
1996	// The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
1997	// so only the low bits of the shift amount are demanded if the bitwidth is
1998	// a power-of-2.
1999	if (!isPowerOf2_32(Value: BitWidth))
2000	break;
2001	APInt Op2Demanded = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: Log2_32_Ceil(Value: BitWidth));
2002	KnownBits Op2Known(BitWidth);
2003	if (SimplifyDemandedBits(I: II, Op: `2`, DemandedMask: Op2Demanded, Known&: Op2Known))
2004	return &CI;
2005	break;
2006	}
2007	case Intrinsic::ptrmask: {
2008	unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType());
2009	KnownBits Known(BitWidth);
2010	if (SimplifyDemandedInstructionBits(Inst&: *II, Known))
2011	return II;
2012
2013	Value InnerPtr, InnerMask;
2014	bool Changed = false;
2015	// Combine:
2016	// (ptrmask (ptrmask p, A), B)
2017	// -> (ptrmask p, (and A, B))
2018	if (match(II->getArgOperand(`0`),
2019	m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(m_Value(InnerPtr),
2020	m_Value(InnerMask))))) {
2021	assert(II->getArgOperand(`1`)->getType() == InnerMask->getType() &&
2022	"Mask types must match");
2023	// TODO: If InnerMask == Op1, we could copy attributes from inner
2024	// callsite -> outer callsite.
2025	Value *NewMask = Builder.CreateAnd(LHS: II->getArgOperand(i: `1`), RHS: InnerMask);
2026	replaceOperand(I&: CI, OpNum: `0`, V: InnerPtr);
2027	replaceOperand(I&: CI, OpNum: `1`, V: NewMask);
2028	Changed = true;
2029	}
2030
2031	// See if we can deduce non-null.
2032	if (!CI.hasRetAttr(Attribute::NonNull) &&
2033	(Known.isNonZero() \|\|
2034	isKnownNonZero(II, DL, /Depth/ `0`, &AC, II, &DT))) {
2035	CI.addRetAttr(Attribute::NonNull);
2036	Changed = true;
2037	}
2038
2039	unsigned NewAlignmentLog =
2040	std::min(a: Value::MaxAlignmentExponent,
2041	b: std::min(a: BitWidth - `1`, b: Known.countMinTrailingZeros()));
2042	// Known bits will capture if we had alignment information associated with
2043	// the pointer argument.
2044	if (NewAlignmentLog > Log2(A: CI.getRetAlign().valueOrOne())) {
2045	CI.addRetAttr(Attr: Attribute::getWithAlignment(
2046	Context&: CI.getContext(), Alignment: Align (uint64_t(`1`) << NewAlignmentLog)));
2047	Changed = true;
2048	}
2049	if (Changed)
2050	return &CI;
2051	break;
2052	}
2053	case Intrinsic::uadd_with_overflow:
2054	case Intrinsic::sadd_with_overflow: {
2055	if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2056	return I;
2057
2058	// Given 2 constant operands whose sum does not overflow:
2059	// uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
2060	// saddo (X +nsw C0), C1 -> saddo X, C0 + C1
2061	Value *X;
2062	const APInt C0, C1;
2063	Value *Arg0 = II->getArgOperand(i: `0`);
2064	Value *Arg1 = II->getArgOperand(i: `1`);
2065	bool IsSigned = IID == Intrinsic::sadd_with_overflow;
2066	bool HasNWAdd = IsSigned ? match(V: Arg0, P: m_NSWAdd(L: m_Value(V&: X), R: m_APInt(Res&: C0)))
2067	: match(V: Arg0, P: m_NUWAdd(L: m_Value(V&: X), R: m_APInt(Res&: C0)));
2068	if (HasNWAdd && match(V: Arg1, P: m_APInt(Res&: C1))) {
2069	bool Overflow;
2070	APInt NewC =
2071	IsSigned ? C1->sadd_ov(RHS: C0, Overflow) : C1->uadd_ov(RHS: C0, Overflow);
2072	if (!Overflow)
2073	return replaceInstUsesWith(
2074	I&: *II, V: Builder.CreateBinaryIntrinsic(
2075	ID: IID, LHS: X, RHS: ConstantInt::get(Ty: Arg1->getType(), V: NewC)));
2076	}
2077	break;
2078	}
2079
2080	case Intrinsic::umul_with_overflow:
2081	case Intrinsic::smul_with_overflow:
2082	case Intrinsic::usub_with_overflow:
2083	if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2084	return I;
2085	break;
2086
2087	case Intrinsic::ssub_with_overflow: {
2088	if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2089	return I;
2090
2091	Constant *C;
2092	Value *Arg0 = II->getArgOperand(i: `0`);
2093	Value *Arg1 = II->getArgOperand(i: `1`);
2094	// Given a constant C that is not the minimum signed value
2095	// for an integer of a given bit width:
2096	//
2097	// ssubo X, C -> saddo X, -C
2098	if (match(V: Arg1, P: m_Constant(C)) && C->isNotMinSignedValue()) {
2099	Value *NegVal = ConstantExpr::getNeg(C);
2100	// Build a saddo call that is equivalent to the discovered
2101	// ssubo call.
2102	return replaceInstUsesWith(
2103	*II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow,
2104	Arg0, NegVal));
2105	}
2106
2107	break;
2108	}
2109
2110	case Intrinsic::uadd_sat:
2111	case Intrinsic::sadd_sat:
2112	case Intrinsic::usub_sat:
2113	case Intrinsic::ssub_sat: {
2114	SaturatingInst *SI = cast<SaturatingInst>(Val: II);
2115	Type *Ty = SI->getType();
2116	Value *Arg0 = SI->getLHS();
2117	Value *Arg1 = SI->getRHS();
2118
2119	// Make use of known overflow information.
2120	OverflowResult OR = computeOverflow(BinaryOp: SI->getBinaryOp(), IsSigned: SI->isSigned(),
2121	LHS: Arg0, RHS: Arg1, CxtI: SI);
2122	switch (OR) {
2123	case OverflowResult::MayOverflow:
2124	break;
2125	case OverflowResult::NeverOverflows:
2126	if (SI->isSigned())
2127	return BinaryOperator::CreateNSW(Opc: SI->getBinaryOp(), V1: Arg0, V2: Arg1);
2128	else
2129	return BinaryOperator::CreateNUW(Opc: SI->getBinaryOp(), V1: Arg0, V2: Arg1);
2130	case OverflowResult::AlwaysOverflowsLow: {
2131	unsigned BitWidth = Ty->getScalarSizeInBits();
2132	APInt Min = APSInt::getMinValue(numBits: BitWidth, Unsigned: !SI->isSigned());
2133	return replaceInstUsesWith(I&: *SI, V: ConstantInt::get(Ty, V: Min));
2134	}
2135	case OverflowResult::AlwaysOverflowsHigh: {
2136	unsigned BitWidth = Ty->getScalarSizeInBits();
2137	APInt Max = APSInt::getMaxValue(numBits: BitWidth, Unsigned: !SI->isSigned());
2138	return replaceInstUsesWith(I&: *SI, V: ConstantInt::get(Ty, V: Max));
2139	}
2140	}
2141
2142	// ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
2143	Constant *C;
2144	if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) &&
2145	C->isNotMinSignedValue()) {
2146	Value *NegVal = ConstantExpr::getNeg(C);
2147	return replaceInstUsesWith(
2148	*II, Builder.CreateBinaryIntrinsic(
2149	Intrinsic::sadd_sat, Arg0, NegVal));
2150	}
2151
2152	// sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
2153	// sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
2154	// if Val and Val2 have the same sign
2155	if (auto *Other = dyn_cast<IntrinsicInst>(Val: Arg0)) {
2156	Value *X;
2157	const APInt Val, Val2;
2158	APInt NewVal;
2159	bool IsUnsigned =
2160	IID == Intrinsic::uadd_sat \|\| IID == Intrinsic::usub_sat;
2161	if (Other->getIntrinsicID() == IID &&
2162	match(V: Arg1, P: m_APInt(Res&: Val)) &&
2163	match(V: Other->getArgOperand(i: `0`), P: m_Value(V&: X)) &&
2164	match(V: Other->getArgOperand(i: `1`), P: m_APInt(Res&: Val2))) {
2165	if (IsUnsigned)
2166	NewVal = Val->uadd_sat(RHS: *Val2);
2167	else if (Val->isNonNegative() == Val2->isNonNegative()) {
2168	bool Overflow;
2169	NewVal = Val->sadd_ov(RHS: *Val2, Overflow);
2170	if (Overflow) {
2171	// Both adds together may add more than SignedMaxValue
2172	// without saturating the final result.
2173	break;
2174	}
2175	} else {
2176	// Cannot fold saturated addition with different signs.
2177	break;
2178	}
2179
2180	return replaceInstUsesWith(
2181	I&: *II, V: Builder.CreateBinaryIntrinsic(
2182	ID: IID, LHS: X, RHS: ConstantInt::get(Ty: II->getType(), V: NewVal)));
2183	}
2184	}
2185	break;
2186	}
2187
2188	case Intrinsic::minnum:
2189	case Intrinsic::maxnum:
2190	case Intrinsic::minimum:
2191	case Intrinsic::maximum: {
2192	Value *Arg0 = II->getArgOperand(i: `0`);
2193	Value *Arg1 = II->getArgOperand(i: `1`);
2194	Value X, Y;
2195	if (match(V: Arg0, P: m_FNeg(X: m_Value(V&: X))) && match(V: Arg1, P: m_FNeg(X: m_Value(V&: Y))) &&
2196	(Arg0->hasOneUse() \|\| Arg1->hasOneUse())) {
2197	// If both operands are negated, invert the call and negate the result:
2198	// min(-X, -Y) --> -(max(X, Y))
2199	// max(-X, -Y) --> -(min(X, Y))
2200	Intrinsic::ID NewIID;
2201	switch (IID) {
2202	case Intrinsic::maxnum:
2203	NewIID = Intrinsic::minnum;
2204	break;
2205	case Intrinsic::minnum:
2206	NewIID = Intrinsic::maxnum;
2207	break;
2208	case Intrinsic::maximum:
2209	NewIID = Intrinsic::minimum;
2210	break;
2211	case Intrinsic::minimum:
2212	NewIID = Intrinsic::maximum;
2213	break;
2214	default:
2215	llvm_unreachable("unexpected intrinsic ID");
2216	}
2217	Value *NewCall = Builder.CreateBinaryIntrinsic(ID: NewIID, LHS: X, RHS: Y, FMFSource: II);
2218	Instruction *FNeg = UnaryOperator::CreateFNeg(V: NewCall);
2219	FNeg->copyIRFlags(V: II);
2220	return FNeg;
2221	}
2222
2223	// m(m(X, C2), C1) -> m(X, C)
2224	const APFloat C1, C2;
2225	if (auto *M = dyn_cast<IntrinsicInst>(Val: Arg0)) {
2226	if (M->getIntrinsicID() == IID && match(V: Arg1, P: m_APFloat(Res&: C1)) &&
2227	((match(V: M->getArgOperand(i: `0`), P: m_Value(V&: X)) &&
2228	match(V: M->getArgOperand(i: `1`), P: m_APFloat(Res&: C2))) \|\|
2229	(match(V: M->getArgOperand(i: `1`), P: m_Value(V&: X)) &&
2230	match(V: M->getArgOperand(i: `0`), P: m_APFloat(Res&: C2))))) {
2231	APFloat Res(`0.0`);
2232	switch (IID) {
2233	case Intrinsic::maxnum:
2234	Res = maxnum(A: C1, B: C2);
2235	break;
2236	case Intrinsic::minnum:
2237	Res = minnum(A: C1, B: C2);
2238	break;
2239	case Intrinsic::maximum:
2240	Res = maximum(A: C1, B: C2);
2241	break;
2242	case Intrinsic::minimum:
2243	Res = minimum(A: C1, B: C2);
2244	break;
2245	default:
2246	llvm_unreachable("unexpected intrinsic ID");
2247	}
2248	Instruction *NewCall = Builder.CreateBinaryIntrinsic(
2249	ID: IID, LHS: X, RHS: ConstantFP::get(Ty: Arg0->getType(), V: Res), FMFSource: II);
2250	// TODO: Conservatively intersecting FMF. If Res == C2, the transform
2251	// was a simplification (so Arg0 and its original flags could
2252	// propagate?)
2253	NewCall->andIRFlags(V: M);
2254	return replaceInstUsesWith(I&: *II, V: NewCall);
2255	}
2256	}
2257
2258	// m((fpext X), (fpext Y)) -> fpext (m(X, Y))
2259	if (match(V: Arg0, P: m_OneUse(SubPattern: m_FPExt(Op: m_Value(V&: X)))) &&
2260	match(V: Arg1, P: m_OneUse(SubPattern: m_FPExt(Op: m_Value(V&: Y)))) &&
2261	X->getType() == Y->getType()) {
2262	Value *NewCall =
2263	Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y, FMFSource: II, Name: II->getName());
2264	return new FPExtInst (NewCall, II->getType());
2265	}
2266
2267	// max X, -X --> fabs X
2268	// min X, -X --> -(fabs X)
2269	// TODO: Remove one-use limitation? That is obviously better for max.
2270	// It would be an extra instruction for min (fnabs), but that is
2271	// still likely better for analysis and codegen.
2272	if ((match(V: Arg0, P: m_OneUse(SubPattern: m_FNeg(X: m_Value(V&: X)))) && Arg1 == X) \|\|
2273	(match(V: Arg1, P: m_OneUse(SubPattern: m_FNeg(X: m_Value(V&: X)))) && Arg0 == X)) {
2274	Value *R = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, II);
2275	if (IID == Intrinsic::minimum \|\| IID == Intrinsic::minnum)
2276	R = Builder.CreateFNegFMF(V: R, FMFSource: II);
2277	return replaceInstUsesWith(I&: *II, V: R);
2278	}
2279
2280	break;
2281	}
2282	case Intrinsic::matrix_multiply: {
2283	// Optimize negation in matrix multiplication.
2284
2285	// -A -B -> A * B*
2286	Value A, B;
2287	if (match(V: II->getArgOperand(i: `0`), P: m_FNeg(X: m_Value(V&: A))) &&
2288	match(V: II->getArgOperand(i: `1`), P: m_FNeg(X: m_Value(V&: B)))) {
2289	replaceOperand(I&: *II, OpNum: `0`, V: A);
2290	replaceOperand(I&: *II, OpNum: `1`, V: B);
2291	return II;
2292	}
2293
2294	Value *Op0 = II->getOperand(i_nocapture: `0`);
2295	Value *Op1 = II->getOperand(i_nocapture: `1`);
2296	Value OpNotNeg, NegatedOp;
2297	unsigned NegatedOpArg, OtherOpArg;
2298	if (match(V: Op0, P: m_FNeg(X: m_Value(V&: OpNotNeg)))) {
2299	NegatedOp = Op0;
2300	NegatedOpArg = `0`;
2301	OtherOpArg = `1`;
2302	} else if (match(V: Op1, P: m_FNeg(X: m_Value(V&: OpNotNeg)))) {
2303	NegatedOp = Op1;
2304	NegatedOpArg = `1`;
2305	OtherOpArg = `0`;
2306	} else
2307	// Multiplication doesn't have a negated operand.
2308	break;
2309
2310	// Only optimize if the negated operand has only one use.
2311	if (!NegatedOp->hasOneUse())
2312	break;
2313
2314	Value *OtherOp = II->getOperand(i_nocapture: OtherOpArg);
2315	VectorType *RetTy = cast<VectorType>(Val: II->getType());
2316	VectorType *NegatedOpTy = cast<VectorType>(Val: NegatedOp->getType());
2317	VectorType *OtherOpTy = cast<VectorType>(Val: OtherOp->getType());
2318	ElementCount NegatedCount = NegatedOpTy->getElementCount();
2319	ElementCount OtherCount = OtherOpTy->getElementCount();
2320	ElementCount RetCount = RetTy->getElementCount();
2321	// (-A) B -> A * (-B), if it is cheaper to negate B and vice versa.*
2322	if (ElementCount::isKnownGT(LHS: NegatedCount, RHS: OtherCount) &&
2323	ElementCount::isKnownLT(LHS: OtherCount, RHS: RetCount)) {
2324	Value *InverseOtherOp = Builder.CreateFNeg(V: OtherOp);
2325	replaceOperand(I&: *II, OpNum: NegatedOpArg, V: OpNotNeg);
2326	replaceOperand(I&: *II, OpNum: OtherOpArg, V: InverseOtherOp);
2327	return II;
2328	}
2329	// (-A) B -> -(A * B), if it is cheaper to negate the result*
2330	if (ElementCount::isKnownGT(LHS: NegatedCount, RHS: RetCount)) {
2331	SmallVector<Value *, `5`> NewArgs(II->args());
2332	NewArgs [NegatedOpArg] = OpNotNeg;
2333	Instruction *NewMul =
2334	Builder.CreateIntrinsic(RetTy: II->getType(), ID: IID, Args: NewArgs, FMFSource: II);
2335	return replaceInstUsesWith(I&: *II, V: Builder.CreateFNegFMF(V: NewMul, FMFSource: II));
2336	}
2337	break;
2338	}
2339	case Intrinsic::fmuladd: {
2340	// Canonicalize fast fmuladd to the separate fmul + fadd.
2341	if (II->isFast()) {
2342	BuilderTy::FastMathFlagGuard Guard(Builder);
2343	Builder.setFastMathFlags(II->getFastMathFlags());
2344	Value *Mul = Builder.CreateFMul(L: II->getArgOperand(i: `0`),
2345	R: II->getArgOperand(i: `1`));
2346	Value *Add = Builder.CreateFAdd(L: Mul, R: II->getArgOperand(i: `2`));
2347	Add->takeName(V: II);
2348	return replaceInstUsesWith(I&: *II, V: Add);
2349	}
2350
2351	// Try to simplify the underlying FMul.
2352	if (Value *V = simplifyFMulInst(LHS: II->getArgOperand(i: `0`), RHS: II->getArgOperand(i: `1`),
2353	FMF: II->getFastMathFlags(),
2354	Q: SQ.getWithInstruction(I: II))) {
2355	auto *FAdd = BinaryOperator::CreateFAdd(V1: V, V2: II->getArgOperand(i: `2`));
2356	FAdd->copyFastMathFlags(I: II);
2357	return FAdd;
2358	}
2359
2360	[[fallthrough]];
2361	}
2362	case Intrinsic::fma: {
2363	// fma fneg(x), fneg(y), z -> fma x, y, z
2364	Value *Src0 = II->getArgOperand(i: `0`);
2365	Value *Src1 = II->getArgOperand(i: `1`);
2366	Value X, Y;
2367	if (match(V: Src0, P: m_FNeg(X: m_Value(V&: X))) && match(V: Src1, P: m_FNeg(X: m_Value(V&: Y)))) {
2368	replaceOperand(I&: *II, OpNum: `0`, V: X);
2369	replaceOperand(I&: *II, OpNum: `1`, V: Y);
2370	return II;
2371	}
2372
2373	// fma fabs(x), fabs(x), z -> fma x, x, z
2374	if (match(V: Src0, P: m_FAbs(Op0: m_Value(V&: X))) &&
2375	match(V: Src1, P: m_FAbs(Op0: m_Specific(V: X)))) {
2376	replaceOperand(I&: *II, OpNum: `0`, V: X);
2377	replaceOperand(I&: *II, OpNum: `1`, V: X);
2378	return II;
2379	}
2380
2381	// Try to simplify the underlying FMul. We can only apply simplifications
2382	// that do not require rounding.
2383	if (Value *V = simplifyFMAFMul(LHS: II->getArgOperand(i: `0`), RHS: II->getArgOperand(i: `1`),
2384	FMF: II->getFastMathFlags(),
2385	Q: SQ.getWithInstruction(I: II))) {
2386	auto *FAdd = BinaryOperator::CreateFAdd(V1: V, V2: II->getArgOperand(i: `2`));
2387	FAdd->copyFastMathFlags(I: II);
2388	return FAdd;
2389	}
2390
2391	// fma x, y, 0 -> fmul x, y
2392	// This is always valid for -0.0, but requires nsz for +0.0 as
2393	// -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
2394	if (match(V: II->getArgOperand(i: `2`), P: m_NegZeroFP()) \|\|
2395	(match(V: II->getArgOperand(i: `2`), P: m_PosZeroFP()) &&
2396	II->getFastMathFlags().noSignedZeros()))
2397	return BinaryOperator::CreateFMulFMF(V1: Src0, V2: Src1, FMFSource: II);
2398
2399	break;
2400	}
2401	case Intrinsic::copysign: {
2402	Value Mag = II->getArgOperand(i: `0`), Sign = II->getArgOperand(i: `1`);
2403	if (std::optional<bool> KnownSignBit = computeKnownFPSignBit(
2404	V: Sign, /Depth=/`0`, SQ: getSimplifyQuery().getWithInstruction(I: II))) {
2405	if (*KnownSignBit) {
2406	// If we know that the sign argument is negative, reduce to FNABS:
2407	// copysign Mag, -Sign --> fneg (fabs Mag)
2408	Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
2409	return replaceInstUsesWith(I&: *II, V: Builder.CreateFNegFMF(V: Fabs, FMFSource: II));
2410	}
2411
2412	// If we know that the sign argument is positive, reduce to FABS:
2413	// copysign Mag, +Sign --> fabs Mag
2414	Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
2415	return replaceInstUsesWith(I&: *II, V: Fabs);
2416	}
2417
2418	// Propagate sign argument through nested calls:
2419	// copysign Mag, (copysign ?, X) --> copysign Mag, X
2420	Value *X;
2421	if (match(Sign, m_Intrinsic<Intrinsic::copysign>(m_Value(), m_Value(X))))
2422	return replaceOperand(I&: *II, OpNum: `1`, V: X);
2423
2424	// Peek through changes of magnitude's sign-bit. This call rewrites those:
2425	// copysign (fabs X), Sign --> copysign X, Sign
2426	// copysign (fneg X), Sign --> copysign X, Sign
2427	if (match(V: Mag, P: m_FAbs(Op0: m_Value(V&: X))) \|\| match(V: Mag, P: m_FNeg(X: m_Value(V&: X))))
2428	return replaceOperand(I&: *II, OpNum: `0`, V: X);
2429
2430	break;
2431	}
2432	case Intrinsic::fabs: {
2433	Value Cond, TVal, *FVal;
2434	if (match(V: II->getArgOperand(i: `0`),
2435	P: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: TVal), R: m_Value(V&: FVal)))) {
2436	// fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
2437	if (isa<Constant>(Val: TVal) && isa<Constant>(Val: FVal)) {
2438	CallInst *AbsT = Builder.CreateCall(Callee: II->getCalledFunction(), Args: {TVal});
2439	CallInst *AbsF = Builder.CreateCall(Callee: II->getCalledFunction(), Args: {FVal});
2440	return SelectInst::Create(C: Cond, S1: AbsT, S2: AbsF);
2441	}
2442	// fabs (select Cond, -FVal, FVal) --> fabs FVal
2443	if (match(V: TVal, P: m_FNeg(X: m_Specific(V: FVal))))
2444	return replaceOperand(I&: *II, OpNum: `0`, V: FVal);
2445	// fabs (select Cond, TVal, -TVal) --> fabs TVal
2446	if (match(V: FVal, P: m_FNeg(X: m_Specific(V: TVal))))
2447	return replaceOperand(I&: *II, OpNum: `0`, V: TVal);
2448	}
2449
2450	Value Magnitude, Sign;
2451	if (match(V: II->getArgOperand(i: `0`),
2452	P: m_CopySign(Op0: m_Value(V&: Magnitude), Op1: m_Value(V&: Sign)))) {
2453	// fabs (copysign x, y) -> (fabs x)
2454	CallInst *AbsSign =
2455	Builder.CreateCall(Callee: II->getCalledFunction(), Args: {Magnitude});
2456	AbsSign->copyFastMathFlags(I: II);
2457	return replaceInstUsesWith(I&: *II, V: AbsSign);
2458	}
2459
2460	[[fallthrough]];
2461	}
2462	case Intrinsic::ceil:
2463	case Intrinsic::floor:
2464	case Intrinsic::round:
2465	case Intrinsic::roundeven:
2466	case Intrinsic::nearbyint:
2467	case Intrinsic::rint:
2468	case Intrinsic::trunc: {
2469	Value *ExtSrc;
2470	if (match(V: II->getArgOperand(i: `0`), P: m_OneUse(SubPattern: m_FPExt(Op: m_Value(V&: ExtSrc))))) {
2471	// Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
2472	Value *NarrowII = Builder.CreateUnaryIntrinsic(ID: IID, V: ExtSrc, FMFSource: II);
2473	return new FPExtInst (NarrowII, II->getType());
2474	}
2475	break;
2476	}
2477	case Intrinsic::cos:
2478	case Intrinsic::amdgcn_cos: {
2479	Value X, Sign;
2480	Value *Src = II->getArgOperand(i: `0`);
2481	if (match(V: Src, P: m_FNeg(X: m_Value(V&: X))) \|\| match(V: Src, P: m_FAbs(Op0: m_Value(V&: X))) \|\|
2482	match(V: Src, P: m_CopySign(Op0: m_Value(V&: X), Op1: m_Value(V&: Sign)))) {
2483	// cos(-x) --> cos(x)
2484	// cos(fabs(x)) --> cos(x)
2485	// cos(copysign(x, y)) --> cos(x)
2486	return replaceOperand(I&: *II, OpNum: `0`, V: X);
2487	}
2488	break;
2489	}
2490	case Intrinsic::sin: {
2491	Value *X;
2492	if (match(V: II->getArgOperand(i: `0`), P: m_OneUse(SubPattern: m_FNeg(X: m_Value(V&: X))))) {
2493	// sin(-x) --> -sin(x)
2494	Value *NewSin = Builder.CreateUnaryIntrinsic(Intrinsic::sin, X, II);
2495	Instruction *FNeg = UnaryOperator::CreateFNeg(V: NewSin);
2496	FNeg->copyFastMathFlags(I: II);
2497	return FNeg;
2498	}
2499	break;
2500	}
2501	case Intrinsic::ldexp: {
2502	// ldexp(ldexp(x, a), b) -> ldexp(x, a + b)
2503	//
2504	// The danger is if the first ldexp would overflow to infinity or underflow
2505	// to zero, but the combined exponent avoids it. We ignore this with
2506	// reassoc.
2507	//
2508	// It's also safe to fold if we know both exponents are >= 0 or <= 0 since
2509	// it would just double down on the overflow/underflow which would occur
2510	// anyway.
2511	//
2512	// TODO: Could do better if we had range tracking for the input value
2513	// exponent. Also could broaden sign check to cover == 0 case.
2514	Value *Src = II->getArgOperand(i: `0`);
2515	Value *Exp = II->getArgOperand(i: `1`);
2516	Value *InnerSrc;
2517	Value *InnerExp;
2518	if (match(Src, m_OneUse(m_Intrinsic<Intrinsic::ldexp>(
2519	m_Value(InnerSrc), m_Value(InnerExp)))) &&
2520	Exp->getType() == InnerExp->getType()) {
2521	FastMathFlags FMF = II->getFastMathFlags();
2522	FastMathFlags InnerFlags = cast<FPMathOperator>(Val: Src)->getFastMathFlags();
2523
2524	if ((FMF.allowReassoc() && InnerFlags.allowReassoc()) \|\|
2525	signBitMustBeTheSame(Op0: Exp, Op1: InnerExp, CxtI: II, DL, AC: &AC, DT: &DT)) {
2526	// TODO: Add nsw/nuw probably safe if integer type exceeds exponent
2527	// width.
2528	Value *NewExp = Builder.CreateAdd(LHS: InnerExp, RHS: Exp);
2529	II->setArgOperand(i: `1`, v: NewExp);
2530	II->setFastMathFlags(InnerFlags); // Or the inner flags.
2531	return replaceOperand(I&: *II, OpNum: `0`, V: InnerSrc);
2532	}
2533	}
2534
2535	break;
2536	}
2537	case Intrinsic::ptrauth_auth:
2538	case Intrinsic::ptrauth_resign: {
2539	// (sign\|resign) + (auth\|resign) can be folded by omitting the middle
2540	// sign+auth component if the key and discriminator match.
2541	bool NeedSign = II->getIntrinsicID() == Intrinsic::ptrauth_resign;
2542	Value *Key = II->getArgOperand(i: `1`);
2543	Value *Disc = II->getArgOperand(i: `2`);
2544
2545	// AuthKey will be the key we need to end up authenticating against in
2546	// whatever we replace this sequence with.
2547	Value AuthKey = nullptr, AuthDisc = nullptr, *BasePtr;
2548	if (auto CI = dyn_cast<CallBase>(Val: II->getArgOperand(i: `0`))) {
2549	BasePtr = CI->getArgOperand(i: `0`);
2550	if (CI->getIntrinsicID() == Intrinsic::ptrauth_sign) {
2551	if (CI->getArgOperand(i: `1`) != Key \|\| CI->getArgOperand(i: `2`) != Disc)
2552	break;
2553	} else if (CI->getIntrinsicID() == Intrinsic::ptrauth_resign) {
2554	if (CI->getArgOperand(i: `3`) != Key \|\| CI->getArgOperand(i: `4`) != Disc)
2555	break;
2556	AuthKey = CI->getArgOperand(i: `1`);
2557	AuthDisc = CI->getArgOperand(i: `2`);
2558	} else
2559	break;
2560	} else
2561	break;
2562
2563	unsigned NewIntrin;
2564	if (AuthKey && NeedSign) {
2565	// resign(0,1) + resign(1,2) = resign(0, 2)
2566	NewIntrin = Intrinsic::ptrauth_resign;
2567	} else if (AuthKey) {
2568	// resign(0,1) + auth(1) = auth(0)
2569	NewIntrin = Intrinsic::ptrauth_auth;
2570	} else if (NeedSign) {
2571	// sign(0) + resign(0, 1) = sign(1)
2572	NewIntrin = Intrinsic::ptrauth_sign;
2573	} else {
2574	// sign(0) + auth(0) = nop
2575	replaceInstUsesWith(I&: *II, V: BasePtr);
2576	eraseInstFromFunction(I&: *II);
2577	return nullptr;
2578	}
2579
2580	SmallVector<Value *, `4`> CallArgs;
2581	CallArgs.push_back(Elt: BasePtr);
2582	if (AuthKey) {
2583	CallArgs.push_back(Elt: AuthKey);
2584	CallArgs.push_back(Elt: AuthDisc);
2585	}
2586
2587	if (NeedSign) {
2588	CallArgs.push_back(Elt: II->getArgOperand(i: `3`));
2589	CallArgs.push_back(Elt: II->getArgOperand(i: `4`));
2590	}
2591
2592	Function *NewFn = Intrinsic::getDeclaration(M: II->getModule(), id: NewIntrin);
2593	return CallInst::Create(Func: NewFn, Args: CallArgs);
2594	}
2595	case Intrinsic::arm_neon_vtbl1:
2596	case Intrinsic::aarch64_neon_tbl1:
2597	if (Value V = simplifyNeonTbl1(II: II, Builder))
2598	return replaceInstUsesWith(I&: *II, V);
2599	break;
2600
2601	case Intrinsic::arm_neon_vmulls:
2602	case Intrinsic::arm_neon_vmullu:
2603	case Intrinsic::aarch64_neon_smull:
2604	case Intrinsic::aarch64_neon_umull: {
2605	Value *Arg0 = II->getArgOperand(i: `0`);
2606	Value *Arg1 = II->getArgOperand(i: `1`);
2607
2608	// Handle mul by zero first:
2609	if (isa<ConstantAggregateZero>(Val: Arg0) \|\| isa<ConstantAggregateZero>(Val: Arg1)) {
2610	return replaceInstUsesWith(I&: CI, V: ConstantAggregateZero::get(Ty: II->getType()));
2611	}
2612
2613	// Check for constant LHS & RHS - in this case we just simplify.
2614	bool Zext = (IID == Intrinsic::arm_neon_vmullu \|\|
2615	IID == Intrinsic::aarch64_neon_umull);
2616	VectorType *NewVT = cast<VectorType>(Val: II->getType());
2617	if (Constant *CV0 = dyn_cast<Constant>(Val: Arg0)) {
2618	if (Constant *CV1 = dyn_cast<Constant>(Val: Arg1)) {
2619	Value V0 = Builder.CreateIntCast(V: CV0, DestTy: NewVT, /isSigned=/*!Zext);
2620	Value V1 = Builder.CreateIntCast(V: CV1, DestTy: NewVT, /isSigned=/*!Zext);
2621	return replaceInstUsesWith(I&: CI, V: Builder.CreateMul(LHS: V0, RHS: V1));
2622	}
2623
2624	// Couldn't simplify - canonicalize constant to the RHS.
2625	std::swap(a&: Arg0, b&: Arg1);
2626	}
2627
2628	// Handle mul by one:
2629	if (Constant *CV1 = dyn_cast<Constant>(Val: Arg1))
2630	if (ConstantInt *Splat =
2631	dyn_cast_or_null<ConstantInt>(Val: CV1->getSplatValue()))
2632	if (Splat->isOne())
2633	return CastInst::CreateIntegerCast(S: Arg0, Ty: II->getType(),
2634	/isSigned=/!Zext);
2635
2636	break;
2637	}
2638	case Intrinsic::arm_neon_aesd:
2639	case Intrinsic::arm_neon_aese:
2640	case Intrinsic::aarch64_crypto_aesd:
2641	case Intrinsic::aarch64_crypto_aese: {
2642	Value *DataArg = II->getArgOperand(i: `0`);
2643	Value *KeyArg = II->getArgOperand(i: `1`);
2644
2645	// Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
2646	Value Data, Key;
2647	if (match(V: KeyArg, P: m_ZeroInt()) &&
2648	match(V: DataArg, P: m_Xor(L: m_Value(V&: Data), R: m_Value(V&: Key)))) {
2649	replaceOperand(I&: *II, OpNum: `0`, V: Data);
2650	replaceOperand(I&: *II, OpNum: `1`, V: Key);
2651	return II;
2652	}
2653	break;
2654	}
2655	case Intrinsic::hexagon_V6_vandvrt:
2656	case Intrinsic::hexagon_V6_vandvrt_128B: {
2657	// Simplify Q -> V -> Q conversion.
2658	if (auto Op0 = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: `0`))) {
2659	Intrinsic::ID ID0 = Op0->getIntrinsicID();
2660	if (ID0 != Intrinsic::hexagon_V6_vandqrt &&
2661	ID0 != Intrinsic::hexagon_V6_vandqrt_128B)
2662	break;
2663	Value Bytes = Op0->getArgOperand(i: `1`), Mask = II->getArgOperand(i: `1`);
2664	uint64_t Bytes1 = computeKnownBits(V: Bytes, Depth: `0`, CxtI: Op0).One.getZExtValue();
2665	uint64_t Mask1 = computeKnownBits(V: Mask, Depth: `0`, CxtI: II).One.getZExtValue();
2666	// Check if every byte has common bits in Bytes and Mask.
2667	uint64_t C = Bytes1 & Mask1;
2668	if ((C & `0xFF`) && (C & `0xFF00`) && (C & `0xFF0000`) && (C & `0xFF000000`))
2669	return replaceInstUsesWith(I&: *II, V: Op0->getArgOperand(i: `0`));
2670	}
2671	break;
2672	}
2673	case Intrinsic::stackrestore: {
2674	enum class ClassifyResult {
2675	None,
2676	Alloca,
2677	StackRestore,
2678	CallWithSideEffects,
2679	};
2680	auto Classify = [](const Instruction *I) {
2681	if (isa<AllocaInst>(Val: I))
2682	return ClassifyResult::Alloca;
2683
2684	if (auto *CI = dyn_cast<CallInst>(Val: I)) {
2685	if (auto *II = dyn_cast<IntrinsicInst>(Val: CI)) {
2686	if (II->getIntrinsicID() == Intrinsic::stackrestore)
2687	return ClassifyResult::StackRestore;
2688
2689	if (II->mayHaveSideEffects())
2690	return ClassifyResult::CallWithSideEffects;
2691	} else {
2692	// Consider all non-intrinsic calls to be side effects
2693	return ClassifyResult::CallWithSideEffects;
2694	}
2695	}
2696
2697	return ClassifyResult::None;
2698	};
2699
2700	// If the stacksave and the stackrestore are in the same BB, and there is
2701	// no intervening call, alloca, or stackrestore of a different stacksave,
2702	// remove the restore. This can happen when variable allocas are DCE'd.
2703	if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: `0`))) {
2704	if (SS->getIntrinsicID() == Intrinsic::stacksave &&
2705	SS->getParent() == II->getParent()) {
2706	BasicBlock::iterator BI(SS);
2707	bool CannotRemove = false;
2708	for (++BI; &*BI != II; ++BI) {
2709	switch (Classify (&*BI)) {
2710	case ClassifyResult::None:
2711	// So far so good, look at next instructions.
2712	break;
2713
2714	case ClassifyResult::StackRestore:
2715	// If we found an intervening stackrestore for a different
2716	// stacksave, we can't remove the stackrestore. Otherwise, continue.
2717	if (cast<IntrinsicInst>(Val&: *BI).getArgOperand(i: `0`) != SS)
2718	CannotRemove = true;
2719	break;
2720
2721	case ClassifyResult::Alloca:
2722	case ClassifyResult::CallWithSideEffects:
2723	// If we found an alloca, a non-intrinsic call, or an intrinsic
2724	// call with side effects, we can't remove the stackrestore.
2725	CannotRemove = true;
2726	break;
2727	}
2728	if (CannotRemove)
2729	break;
2730	}
2731
2732	if (!CannotRemove)
2733	return eraseInstFromFunction(I&: CI);
2734	}
2735	}
2736
2737	// Scan down this block to see if there is another stack restore in the
2738	// same block without an intervening call/alloca.
2739	BasicBlock::iterator BI(II);
2740	Instruction *TI = II->getParent()->getTerminator();
2741	bool CannotRemove = false;
2742	for (++BI; &*BI != TI; ++BI) {
2743	switch (Classify (&*BI)) {
2744	case ClassifyResult::None:
2745	// So far so good, look at next instructions.
2746	break;
2747
2748	case ClassifyResult::StackRestore:
2749	// If there is a stackrestore below this one, remove this one.
2750	return eraseInstFromFunction(I&: CI);
2751
2752	case ClassifyResult::Alloca:
2753	case ClassifyResult::CallWithSideEffects:
2754	// If we found an alloca, a non-intrinsic call, or an intrinsic call
2755	// with side effects (such as llvm.stacksave and llvm.read_register),
2756	// we can't remove the stack restore.
2757	CannotRemove = true;
2758	break;
2759	}
2760	if (CannotRemove)
2761	break;
2762	}
2763
2764	// If the stack restore is in a return, resume, or unwind block and if there
2765	// are no allocas or calls between the restore and the return, nuke the
2766	// restore.
2767	if (!CannotRemove && (isa<ReturnInst>(Val: TI) \|\| isa<ResumeInst>(Val: TI)))
2768	return eraseInstFromFunction(I&: CI);
2769	break;
2770	}
2771	case Intrinsic::lifetime_end:
2772	// Asan needs to poison memory to detect invalid access which is possible
2773	// even for empty lifetime range.
2774	if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) \|\|
2775	II->getFunction()->hasFnAttribute(Attribute::SanitizeMemory) \|\|
2776	II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
2777	break;
2778
2779	if (removeTriviallyEmptyRange(II, this, [](const IntrinsicInst &I) {
2780	return I.getIntrinsicID() == Intrinsic::lifetime_start;
2781	}))
2782	return nullptr;
2783	break;
2784	case Intrinsic::assume: {
2785	Value *IIOperand = II->getArgOperand(i: `0`);
2786	SmallVector<OperandBundleDef, `4`> OpBundles;
2787	II->getOperandBundlesAsDefs(Defs&: OpBundles);
2788
2789	/// This will remove the boolean Condition from the assume given as
2790	/// argument and remove the assume if it becomes useless.
2791	/// always returns nullptr for use as a return values.
2792	auto RemoveConditionFromAssume = [&](Instruction Assume) -> Instruction {
2793	assert(isa<AssumeInst>(Assume));
2794	if (isAssumeWithEmptyBundle(Assume: *cast<AssumeInst>(Val: II)))
2795	return eraseInstFromFunction(I&: CI);
2796	replaceUse(U&: II->getOperandUse(i: `0`), NewValue: ConstantInt::getTrue(Context&: II->getContext()));
2797	return nullptr;
2798	};
2799	// Remove an assume if it is followed by an identical assume.
2800	// TODO: Do we need this? Unless there are conflicting assumptions, the
2801	// computeKnownBits(IIOperand) below here eliminates redundant assumes.
2802	Instruction *Next = II->getNextNonDebugInstruction();
2803	if (match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))))
2804	return RemoveConditionFromAssume (Next);
2805
2806	// Canonicalize assume(a && b) -> assume(a); assume(b);
2807	// Note: New assumption intrinsics created here are registered by
2808	// the InstCombineIRInserter object.
2809	FunctionType *AssumeIntrinsicTy = II->getFunctionType();
2810	Value *AssumeIntrinsic = II->getCalledOperand();
2811	Value A, B;
2812	if (match(V: IIOperand, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B)))) {
2813	Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic, Args: A, OpBundles,
2814	Name: II->getName());
2815	Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic, Args: B, Name: II->getName());
2816	return eraseInstFromFunction(I&: *II);
2817	}
2818	// assume(!(a \|\| b)) -> assume(!a); assume(!b);
2819	if (match(V: IIOperand, P: m_Not(V: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B))))) {
2820	Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic,
2821	Args: Builder.CreateNot(V: A), OpBundles, Name: II->getName());
2822	Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic,
2823	Args: Builder.CreateNot(V: B), Name: II->getName());
2824	return eraseInstFromFunction(I&: *II);
2825	}
2826
2827	// assume( (load addr) != null ) -> add 'nonnull' metadata to load
2828	// (if assume is valid at the load)
2829	CmpInst::Predicate Pred;
2830	Instruction *LHS;
2831	if (match(V: IIOperand, P: m_ICmp(Pred, L: m_Instruction(I&: LHS), R: m_Zero())) &&
2832	Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load &&
2833	LHS->getType()->isPointerTy() &&
2834	isValidAssumeForContext(I: II, CxtI: LHS, DT: &DT)) {
2835	MDNode *MD = MDNode::get(Context&: II->getContext(), MDs: std::nullopt);
2836	LHS->setMetadata(KindID: LLVMContext::MD_nonnull, Node: MD);
2837	LHS->setMetadata(KindID: LLVMContext::MD_noundef, Node: MD);
2838	return RemoveConditionFromAssume (II);
2839
2840	// TODO: apply nonnull return attributes to calls and invokes
2841	// TODO: apply range metadata for range check patterns?
2842	}
2843
2844	// Separate storage assumptions apply to the underlying allocations, not any
2845	// particular pointer within them. When evaluating the hints for AA purposes
2846	// we getUnderlyingObject them; by precomputing the answers here we can
2847	// avoid having to do so repeatedly there.
2848	for (unsigned Idx = `0`; Idx < II->getNumOperandBundles(); Idx++) {
2849	OperandBundleUse OBU = II->getOperandBundleAt(Index: Idx);
2850	if (OBU.getTagName() == "separate_storage") {
2851	assert(OBU.Inputs.size() == `2`);
2852	auto MaybeSimplifyHint = [&](const Use &U) {
2853	Value *Hint = U.get();
2854	// Not having a limit is safe because InstCombine removes unreachable
2855	// code.
2856	Value UnderlyingObject = getUnderlyingObject(V: Hint, /MaxLookup/* `0`);
2857	if (Hint != UnderlyingObject)
2858	replaceUse(U&: const_cast<Use &>(U), NewValue: UnderlyingObject);
2859	};
2860	MaybeSimplifyHint (OBU.Inputs [`0`]);
2861	MaybeSimplifyHint (OBU.Inputs [`1`]);
2862	}
2863	}
2864
2865	// Convert nonnull assume like:
2866	// %A = icmp ne i32 %PTR, null*
2867	// call void @llvm.assume(i1 %A)
2868	// into
2869	// call void @llvm.assume(i1 true) [ "nonnull"(i32 %PTR) ]*
2870	if (EnableKnowledgeRetention &&
2871	match(V: IIOperand, P: m_Cmp(Pred, L: m_Value(V&: A), R: m_Zero())) &&
2872	Pred == CmpInst::ICMP_NE && A->getType()->isPointerTy()) {
2873	if (auto *Replacement = buildAssumeFromKnowledge(
2874	{RetainedKnowledge{Attribute::NonNull, `0`, A}}, Next, &AC, &DT)) {
2875
2876	Replacement->insertBefore(Next);
2877	AC.registerAssumption(CI: Replacement);
2878	return RemoveConditionFromAssume (II);
2879	}
2880	}
2881
2882	// Convert alignment assume like:
2883	// %B = ptrtoint i32 %A to i64*
2884	// %C = and i64 %B, Constant
2885	// %D = icmp eq i64 %C, 0
2886	// call void @llvm.assume(i1 %D)
2887	// into
2888	// call void @llvm.assume(i1 true) [ "align"(i32 [[A]], i64 Constant + 1)]*
2889	uint64_t AlignMask;
2890	if (EnableKnowledgeRetention &&
2891	match(V: IIOperand,
2892	P: m_Cmp(Pred, L: m_And(L: m_Value(V&: A), R: m_ConstantInt(V&: AlignMask)),
2893	R: m_Zero())) &&
2894	Pred == CmpInst::ICMP_EQ) {
2895	if (isPowerOf2_64(Value: AlignMask + `1`)) {
2896	uint64_t Offset = `0`;
2897	match(V: A, P: m_Add(L: m_Value(V&: A), R: m_ConstantInt(V&: Offset)));
2898	if (match(V: A, P: m_PtrToInt(Op: m_Value(V&: A)))) {
2899	/// Note: this doesn't preserve the offset information but merges
2900	/// offset and alignment.
2901	/// TODO: we can generate a GEP instead of merging the alignment with
2902	/// the offset.
2903	RetainedKnowledge RK{Attribute::Alignment,
2904	(unsigned)MinAlign(Offset, AlignMask + `1`), A};
2905	if (auto *Replacement =
2906	buildAssumeFromKnowledge(Knowledge: RK, CtxI: Next, AC: &AC, DT: &DT)) {
2907
2908	Replacement->insertAfter(InsertPos: II);
2909	AC.registerAssumption(CI: Replacement);
2910	}
2911	return RemoveConditionFromAssume (II);
2912	}
2913	}
2914	}
2915
2916	/// Canonicalize Knowledge in operand bundles.
2917	if (EnableKnowledgeRetention && II->hasOperandBundles()) {
2918	for (unsigned Idx = `0`; Idx < II->getNumOperandBundles(); Idx++) {
2919	auto &BOI = II->bundle_op_info_begin()[Idx];
2920	RetainedKnowledge RK =
2921	llvm::getKnowledgeFromBundle(Assume&: cast<AssumeInst>(Val&: *II), BOI);
2922	if (BOI.End - BOI.Begin > `2`)
2923	continue; // Prevent reducing knowledge in an align with offset since
2924	// extracting a RetainedKnowledge from them looses offset
2925	// information
2926	RetainedKnowledge CanonRK =
2927	llvm::simplifyRetainedKnowledge(Assume: cast<AssumeInst>(Val: II), RK,
2928	AC: &getAssumptionCache(),
2929	DT: &getDominatorTree());
2930	if (CanonRK == RK)
2931	continue;
2932	if (!CanonRK) {
2933	if (BOI.End - BOI.Begin > `0`) {
2934	Worklist.pushValue(V: II->op_begin()[BOI.Begin]);
2935	Value::dropDroppableUse(U&: II->op_begin()[BOI.Begin]);
2936	}
2937	continue;
2938	}
2939	assert(RK.AttrKind == CanonRK.AttrKind);
2940	if (BOI.End - BOI.Begin > `0`)
2941	II->op_begin()[BOI.Begin].set(CanonRK.WasOn);
2942	if (BOI.End - BOI.Begin > `1`)
2943	II->op_begin()[BOI.Begin + `1`].set(ConstantInt::get(
2944	Ty: Type::getInt64Ty(C&: II->getContext()), V: CanonRK.ArgValue));
2945	if (RK.WasOn)
2946	Worklist.pushValue(V: RK.WasOn);
2947	return II;
2948	}
2949	}
2950
2951	// If there is a dominating assume with the same condition as this one,
2952	// then this one is redundant, and should be removed.
2953	KnownBits Known(`1`);
2954	computeKnownBits(V: IIOperand, Known, Depth: `0`, CxtI: II);
2955	if (Known.isAllOnes() && isAssumeWithEmptyBundle(Assume: cast<AssumeInst>(Val&: *II)))
2956	return eraseInstFromFunction(I&: *II);
2957
2958	// assume(false) is unreachable.
2959	if (match(V: IIOperand, P: m_CombineOr(L: m_Zero(), R: m_Undef()))) {
2960	CreateNonTerminatorUnreachable(InsertAt: II);
2961	return eraseInstFromFunction(I&: *II);
2962	}
2963
2964	// Update the cache of affected values for this assumption (we might be
2965	// here because we just simplified the condition).
2966	AC.updateAffectedValues(CI: cast<AssumeInst>(Val: II));
2967	break;
2968	}
2969	case Intrinsic::experimental_guard: {
2970	// Is this guard followed by another guard? We scan forward over a small
2971	// fixed window of instructions to handle common cases with conditions
2972	// computed between guards.
2973	Instruction *NextInst = II->getNextNonDebugInstruction();
2974	for (unsigned i = `0`; i < GuardWideningWindow; i++) {
2975	// Note: Using context-free form to avoid compile time blow up
2976	if (!isSafeToSpeculativelyExecute(I: NextInst))
2977	break;
2978	NextInst = NextInst->getNextNonDebugInstruction();
2979	}
2980	Value NextCond = nullptr*;
2981	if (match(NextInst,
2982	m_Intrinsic<Intrinsic::experimental_guard>(m_Value(NextCond)))) {
2983	Value *CurrCond = II->getArgOperand(i: `0`);
2984
2985	// Remove a guard that it is immediately preceded by an identical guard.
2986	// Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
2987	if (CurrCond != NextCond) {
2988	Instruction *MoveI = II->getNextNonDebugInstruction();
2989	while (MoveI != NextInst) {
2990	auto *Temp = MoveI;
2991	MoveI = MoveI->getNextNonDebugInstruction();
2992	Temp->moveBefore(MovePos: II);
2993	}
2994	replaceOperand(I&: *II, OpNum: `0`, V: Builder.CreateAnd(LHS: CurrCond, RHS: NextCond));
2995	}
2996	eraseInstFromFunction(I&: *NextInst);
2997	return II;
2998	}
2999	break;
3000	}
3001	case Intrinsic::vector_insert: {
3002	Value *Vec = II->getArgOperand(i: `0`);
3003	Value *SubVec = II->getArgOperand(i: `1`);
3004	Value *Idx = II->getArgOperand(i: `2`);
3005	auto *DstTy = dyn_cast<FixedVectorType>(Val: II->getType());
3006	auto *VecTy = dyn_cast<FixedVectorType>(Val: Vec->getType());
3007	auto *SubVecTy = dyn_cast<FixedVectorType>(Val: SubVec->getType());
3008
3009	// Only canonicalize if the destination vector, Vec, and SubVec are all
3010	// fixed vectors.
3011	if (DstTy && VecTy && SubVecTy) {
3012	unsigned DstNumElts = DstTy->getNumElements();
3013	unsigned VecNumElts = VecTy->getNumElements();
3014	unsigned SubVecNumElts = SubVecTy->getNumElements();
3015	unsigned IdxN = cast<ConstantInt>(Val: Idx)->getZExtValue();
3016
3017	// An insert that entirely overwrites Vec with SubVec is a nop.
3018	if (VecNumElts == SubVecNumElts)
3019	return replaceInstUsesWith(I&: CI, V: SubVec);
3020
3021	// Widen SubVec into a vector of the same width as Vec, since
3022	// shufflevector requires the two input vectors to be the same width.
3023	// Elements beyond the bounds of SubVec within the widened vector are
3024	// undefined.
3025	SmallVector<int, `8`> WidenMask;
3026	unsigned i;
3027	for (i = `0`; i != SubVecNumElts; ++i)
3028	WidenMask.push_back(Elt: i);
3029	for (; i != VecNumElts; ++i)
3030	WidenMask.push_back(Elt: PoisonMaskElem);
3031
3032	Value *WidenShuffle = Builder.CreateShuffleVector(V: SubVec, Mask: WidenMask);
3033
3034	SmallVector<int, `8`> Mask;
3035	for (unsigned i = `0`; i != IdxN; ++i)
3036	Mask.push_back(Elt: i);
3037	for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
3038	Mask.push_back(Elt: i);
3039	for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
3040	Mask.push_back(Elt: i);
3041
3042	Value *Shuffle = Builder.CreateShuffleVector(V1: Vec, V2: WidenShuffle, Mask);
3043	return replaceInstUsesWith(I&: CI, V: Shuffle);
3044	}
3045	break;
3046	}
3047	case Intrinsic::vector_extract: {
3048	Value *Vec = II->getArgOperand(i: `0`);
3049	Value *Idx = II->getArgOperand(i: `1`);
3050
3051	Type *ReturnType = II->getType();
3052	// (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
3053	// ExtractIdx)
3054	unsigned ExtractIdx = cast<ConstantInt>(Val: Idx)->getZExtValue();
3055	Value InsertTuple, InsertIdx, *InsertValue;
3056	if (match(Vec, m_Intrinsic<Intrinsic::vector_insert>(m_Value(InsertTuple),
3057	m_Value(InsertValue),
3058	m_Value(InsertIdx))) &&
3059	InsertValue->getType() == ReturnType) {
3060	unsigned Index = cast<ConstantInt>(Val: InsertIdx)->getZExtValue();
3061	// Case where we get the same index right after setting it.
3062	// extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
3063	// InsertValue
3064	if (ExtractIdx == Index)
3065	return replaceInstUsesWith(I&: CI, V: InsertValue);
3066	// If we are getting a different index than what was set in the
3067	// insert.vector intrinsic. We can just set the input tuple to the one up
3068	// in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
3069	// InsertIndex), ExtractIndex)
3070	// --> extract.vector(InsertTuple, ExtractIndex)
3071	else
3072	return replaceOperand(I&: CI, OpNum: `0`, V: InsertTuple);
3073	}
3074
3075	auto *DstTy = dyn_cast<VectorType>(Val: ReturnType);
3076	auto *VecTy = dyn_cast<VectorType>(Val: Vec->getType());
3077
3078	if (DstTy && VecTy) {
3079	auto DstEltCnt = DstTy->getElementCount();
3080	auto VecEltCnt = VecTy->getElementCount();
3081	unsigned IdxN = cast<ConstantInt>(Val: Idx)->getZExtValue();
3082
3083	// Extracting the entirety of Vec is a nop.
3084	if (DstEltCnt == VecTy->getElementCount()) {
3085	replaceInstUsesWith(I&: CI, V: Vec);
3086	return eraseInstFromFunction(I&: CI);
3087	}
3088
3089	// Only canonicalize to shufflevector if the destination vector and
3090	// Vec are fixed vectors.
3091	if (VecEltCnt.isScalable() \|\| DstEltCnt.isScalable())
3092	break;
3093
3094	SmallVector<int, `8`> Mask;
3095	for (unsigned i = `0`; i != DstEltCnt.getKnownMinValue(); ++i)
3096	Mask.push_back(Elt: IdxN + i);
3097
3098	Value *Shuffle = Builder.CreateShuffleVector(V: Vec, Mask);
3099	return replaceInstUsesWith(I&: CI, V: Shuffle);
3100	}
3101	break;
3102	}
3103	case Intrinsic::experimental_vector_reverse: {
3104	Value BO0, BO1, X, Y;
3105	Value *Vec = II->getArgOperand(i: `0`);
3106	if (match(V: Vec, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: BO0), R: m_Value(V&: BO1))))) {
3107	auto *OldBinOp = cast<BinaryOperator>(Val: Vec);
3108	if (match(V: BO0, P: m_VecReverse(Op0: m_Value(V&: X)))) {
3109	// rev(binop rev(X), rev(Y)) --> binop X, Y
3110	if (match(V: BO1, P: m_VecReverse(Op0: m_Value(V&: Y))))
3111	return replaceInstUsesWith(I&: CI,
3112	V: BinaryOperator::CreateWithCopiedFlags(
3113	Opc: OldBinOp->getOpcode(), V1: X, V2: Y, CopyO: OldBinOp,
3114	Name: OldBinOp->getName(), InsertBefore: II));
3115	// rev(binop rev(X), BO1Splat) --> binop X, BO1Splat
3116	if (isSplatValue(V: BO1))
3117	return replaceInstUsesWith(I&: CI,
3118	V: BinaryOperator::CreateWithCopiedFlags(
3119	Opc: OldBinOp->getOpcode(), V1: X, V2: BO1,
3120	CopyO: OldBinOp, Name: OldBinOp->getName(), InsertBefore: II));
3121	}
3122	// rev(binop BO0Splat, rev(Y)) --> binop BO0Splat, Y
3123	if (match(V: BO1, P: m_VecReverse(Op0: m_Value(V&: Y))) && isSplatValue(V: BO0))
3124	return replaceInstUsesWith(I&: CI, V: BinaryOperator::CreateWithCopiedFlags(
3125	Opc: OldBinOp->getOpcode(), V1: BO0, V2: Y,
3126	CopyO: OldBinOp, Name: OldBinOp->getName(), InsertBefore: II));
3127	}
3128	// rev(unop rev(X)) --> unop X
3129	if (match(V: Vec, P: m_OneUse(SubPattern: m_UnOp(X: m_VecReverse(Op0: m_Value(V&: X)))))) {
3130	auto *OldUnOp = cast<UnaryOperator>(Val: Vec);
3131	auto *NewUnOp = UnaryOperator::CreateWithCopiedFlags(
3132	Opc: OldUnOp->getOpcode(), V: X, CopyO: OldUnOp, Name: OldUnOp->getName(), InsertBefore: II);
3133	return replaceInstUsesWith(I&: CI, V: NewUnOp);
3134	}
3135	break;
3136	}
3137	case Intrinsic::vector_reduce_or:
3138	case Intrinsic::vector_reduce_and: {
3139	// Canonicalize logical or/and reductions:
3140	// Or reduction for i1 is represented as:
3141	// %val = bitcast <ReduxWidth x i1> to iReduxWidth
3142	// %res = cmp ne iReduxWidth %val, 0
3143	// And reduction for i1 is represented as:
3144	// %val = bitcast <ReduxWidth x i1> to iReduxWidth
3145	// %res = cmp eq iReduxWidth %val, 11111
3146	Value *Arg = II->getArgOperand(i: `0`);
3147	Value *Vect;
3148	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3149	if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType()))
3150	if (FTy->getElementType() == Builder.getInt1Ty()) {
3151	Value *Res = Builder.CreateBitCast(
3152	V: Vect, DestTy: Builder.getIntNTy(N: FTy->getNumElements()));
3153	if (IID == Intrinsic::vector_reduce_and) {
3154	Res = Builder.CreateICmpEQ(
3155	LHS: Res, RHS: ConstantInt::getAllOnesValue(Ty: Res->getType()));
3156	} else {
3157	assert(IID == Intrinsic::vector_reduce_or &&
3158	"Expected or reduction.");
3159	Res = Builder.CreateIsNotNull(Arg: Res);
3160	}
3161	if (Arg != Vect)
3162	Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res,
3163	DestTy: II->getType());
3164	return replaceInstUsesWith(I&: CI, V: Res);
3165	}
3166	}
3167	[[fallthrough]];
3168	}
3169	case Intrinsic::vector_reduce_add: {
3170	if (IID == Intrinsic::vector_reduce_add) {
3171	// Convert vector_reduce_add(ZExt(<n x i1>)) to
3172	// ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
3173	// Convert vector_reduce_add(SExt(<n x i1>)) to
3174	// -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
3175	// Convert vector_reduce_add(<n x i1>) to
3176	// Trunc(ctpop(bitcast <n x i1> to in)).
3177	Value *Arg = II->getArgOperand(i: `0`);
3178	Value *Vect;
3179	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3180	if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType()))
3181	if (FTy->getElementType() == Builder.getInt1Ty()) {
3182	Value *V = Builder.CreateBitCast(
3183	V: Vect, DestTy: Builder.getIntNTy(N: FTy->getNumElements()));
3184	Value *Res = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, V);
3185	if (Res->getType() != II->getType())
3186	Res = Builder.CreateZExtOrTrunc(V: Res, DestTy: II->getType());
3187	if (Arg != Vect &&
3188	cast<Instruction>(Val: Arg)->getOpcode() == Instruction::SExt)
3189	Res = Builder.CreateNeg(V: Res);
3190	return replaceInstUsesWith(I&: CI, V: Res);
3191	}
3192	}
3193	}
3194	[[fallthrough]];
3195	}
3196	case Intrinsic::vector_reduce_xor: {
3197	if (IID == Intrinsic::vector_reduce_xor) {
3198	// Exclusive disjunction reduction over the vector with
3199	// (potentially-extended) i1 element type is actually a
3200	// (potentially-extended) arithmetic `add` reduction over the original
3201	// non-extended value:
3202	// vector_reduce_xor(?ext(<n x i1>))
3203	// -->
3204	// ?ext(vector_reduce_add(<n x i1>))
3205	Value *Arg = II->getArgOperand(i: `0`);
3206	Value *Vect;
3207	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3208	if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType()))
3209	if (FTy->getElementType() == Builder.getInt1Ty()) {
3210	Value *Res = Builder.CreateAddReduce(Src: Vect);
3211	if (Arg != Vect)
3212	Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res,
3213	DestTy: II->getType());
3214	return replaceInstUsesWith(I&: CI, V: Res);
3215	}
3216	}
3217	}
3218	[[fallthrough]];
3219	}
3220	case Intrinsic::vector_reduce_mul: {
3221	if (IID == Intrinsic::vector_reduce_mul) {
3222	// Multiplicative reduction over the vector with (potentially-extended)
3223	// i1 element type is actually a (potentially zero-extended)
3224	// logical `and` reduction over the original non-extended value:
3225	// vector_reduce_mul(?ext(<n x i1>))
3226	// -->
3227	// zext(vector_reduce_and(<n x i1>))
3228	Value *Arg = II->getArgOperand(i: `0`);
3229	Value *Vect;
3230	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3231	if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType()))
3232	if (FTy->getElementType() == Builder.getInt1Ty()) {
3233	Value *Res = Builder.CreateAndReduce(Src: Vect);
3234	if (Res->getType() != II->getType())
3235	Res = Builder.CreateZExt(V: Res, DestTy: II->getType());
3236	return replaceInstUsesWith(I&: CI, V: Res);
3237	}
3238	}
3239	}
3240	[[fallthrough]];
3241	}
3242	case Intrinsic::vector_reduce_umin:
3243	case Intrinsic::vector_reduce_umax: {
3244	if (IID == Intrinsic::vector_reduce_umin \|\|
3245	IID == Intrinsic::vector_reduce_umax) {
3246	// UMin/UMax reduction over the vector with (potentially-extended)
3247	// i1 element type is actually a (potentially-extended)
3248	// logical `and`/`or` reduction over the original non-extended value:
3249	// vector_reduce_u{min,max}(?ext(<n x i1>))
3250	// -->
3251	// ?ext(vector_reduce_{and,or}(<n x i1>))
3252	Value *Arg = II->getArgOperand(i: `0`);
3253	Value *Vect;
3254	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3255	if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType()))
3256	if (FTy->getElementType() == Builder.getInt1Ty()) {
3257	Value *Res = IID == Intrinsic::vector_reduce_umin
3258	? Builder.CreateAndReduce(Vect)
3259	: Builder.CreateOrReduce(Vect);
3260	if (Arg != Vect)
3261	Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res,
3262	DestTy: II->getType());
3263	return replaceInstUsesWith(I&: CI, V: Res);
3264	}
3265	}
3266	}
3267	[[fallthrough]];
3268	}
3269	case Intrinsic::vector_reduce_smin:
3270	case Intrinsic::vector_reduce_smax: {
3271	if (IID == Intrinsic::vector_reduce_smin \|\|
3272	IID == Intrinsic::vector_reduce_smax) {
3273	// SMin/SMax reduction over the vector with (potentially-extended)
3274	// i1 element type is actually a (potentially-extended)
3275	// logical `and`/`or` reduction over the original non-extended value:
3276	// vector_reduce_s{min,max}(<n x i1>)
3277	// -->
3278	// vector_reduce_{or,and}(<n x i1>)
3279	// and
3280	// vector_reduce_s{min,max}(sext(<n x i1>))
3281	// -->
3282	// sext(vector_reduce_{or,and}(<n x i1>))
3283	// and
3284	// vector_reduce_s{min,max}(zext(<n x i1>))
3285	// -->
3286	// zext(vector_reduce_{and,or}(<n x i1>))
3287	Value *Arg = II->getArgOperand(i: `0`);
3288	Value *Vect;
3289	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3290	if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType()))
3291	if (FTy->getElementType() == Builder.getInt1Ty()) {
3292	Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
3293	if (Arg != Vect)
3294	ExtOpc = cast<CastInst>(Val: Arg)->getOpcode();
3295	Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
3296	(ExtOpc == Instruction::CastOps::ZExt))
3297	? Builder.CreateAndReduce(Vect)
3298	: Builder.CreateOrReduce(Vect);
3299	if (Arg != Vect)
3300	Res = Builder.CreateCast(Op: ExtOpc, V: Res, DestTy: II->getType());
3301	return replaceInstUsesWith(I&: CI, V: Res);
3302	}
3303	}
3304	}
3305	[[fallthrough]];
3306	}
3307	case Intrinsic::vector_reduce_fmax:
3308	case Intrinsic::vector_reduce_fmin:
3309	case Intrinsic::vector_reduce_fadd:
3310	case Intrinsic::vector_reduce_fmul: {
3311	bool CanBeReassociated = (IID != Intrinsic::vector_reduce_fadd &&
3312	IID != Intrinsic::vector_reduce_fmul) \|\|
3313	II->hasAllowReassoc();
3314	const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd \|\|
3315	IID == Intrinsic::vector_reduce_fmul)
3316	? `1`
3317	: `0`;
3318	Value *Arg = II->getArgOperand(i: ArgIdx);
3319	Value *V;
3320	ArrayRef<int> Mask;
3321	if (!isa<FixedVectorType>(Val: Arg->getType()) \|\| !CanBeReassociated \|\|
3322	!match(V: Arg, P: m_Shuffle(v1: m_Value(V), v2: m_Undef(), mask: m_Mask (Mask))) \|\|
3323	!cast<ShuffleVectorInst>(Val: Arg)->isSingleSource())
3324	break;
3325	int Sz = Mask.size();
3326	SmallBitVector UsedIndices(Sz);
3327	for (int Idx : Mask) {
3328	if (Idx == PoisonMaskElem \|\| UsedIndices.test(Idx))
3329	break;
3330	UsedIndices.set(Idx);
3331	}
3332	// Can remove shuffle iff just shuffled elements, no repeats, undefs, or
3333	// other changes.
3334	if (UsedIndices.all()) {
3335	replaceUse(U&: II->getOperandUse(i: ArgIdx), NewValue: V);
3336	return nullptr;
3337	}
3338	break;
3339	}
3340	case Intrinsic::is_fpclass: {
3341	if (Instruction I = foldIntrinsicIsFPClass(II&: II))
3342	return I;
3343	break;
3344	}
3345	default: {
3346	// Handle target specific intrinsics
3347	std::optional<Instruction > V = targetInstCombineIntrinsic(II&: II);
3348	if (V)
3349	return *V;
3350	break;
3351	}
3352	}
3353
3354	// Try to fold intrinsic into select operands. This is legal if:
3355	// The intrinsic is speculatable.*
3356	// The select condition is not a vector, or the intrinsic does not*
3357	// perform cross-lane operations.
3358	switch (IID) {
3359	case Intrinsic::ctlz:
3360	case Intrinsic::cttz:
3361	case Intrinsic::ctpop:
3362	case Intrinsic::umin:
3363	case Intrinsic::umax:
3364	case Intrinsic::smin:
3365	case Intrinsic::smax:
3366	case Intrinsic::usub_sat:
3367	case Intrinsic::uadd_sat:
3368	case Intrinsic::ssub_sat:
3369	case Intrinsic::sadd_sat:
3370	for (Value *Op : II->args())
3371	if (auto *Sel = dyn_cast<SelectInst>(Val: Op))
3372	if (Instruction R = FoldOpIntoSelect(Op&: II, SI: Sel))
3373	return R;
3374	[[fallthrough]];
3375	default:
3376	break;
3377	}
3378
3379	if (Instruction *Shuf = foldShuffledIntrinsicOperands(II, Builder))
3380	return Shuf;
3381
3382	// Some intrinsics (like experimental_gc_statepoint) can be used in invoke
3383	// context, so it is handled in visitCallBase and we should trigger it.
3384	return visitCallBase(Call&: *II);
3385	}
3386
3387	// Fence instruction simplification
3388	Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) {
3389	auto *NFI = dyn_cast<FenceInst>(Val: FI.getNextNonDebugInstruction());
3390	// This check is solely here to handle arbitrary target-dependent syncscopes.
3391	// TODO: Can remove if does not matter in practice.
3392	if (NFI && FI.isIdenticalTo(I: NFI))
3393	return eraseInstFromFunction(I&: FI);
3394
3395	// Returns true if FI1 is identical or stronger fence than FI2.
3396	auto isIdenticalOrStrongerFence = [](FenceInst FI1, FenceInst FI2) {
3397	auto FI1SyncScope = FI1->getSyncScopeID();
3398	// Consider same scope, where scope is global or single-thread.
3399	if (FI1SyncScope != FI2->getSyncScopeID() \|\|
3400	(FI1SyncScope != SyncScope::System &&
3401	FI1SyncScope != SyncScope::SingleThread))
3402	return false;
3403
3404	return isAtLeastOrStrongerThan(AO: FI1->getOrdering(), Other: FI2->getOrdering());
3405	};
3406	if (NFI && isIdenticalOrStrongerFence (NFI, &FI))
3407	return eraseInstFromFunction(I&: FI);
3408
3409	if (auto *PFI = dyn_cast_or_null<FenceInst>(Val: FI.getPrevNonDebugInstruction()))
3410	if (isIdenticalOrStrongerFence (PFI, &FI))
3411	return eraseInstFromFunction(I&: FI);
3412	return nullptr;
3413	}
3414
3415	// InvokeInst simplification
3416	Instruction *InstCombinerImpl::visitInvokeInst(InvokeInst &II) {
3417	return visitCallBase(Call&: II);
3418	}
3419
3420	// CallBrInst simplification
3421	Instruction *InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) {
3422	return visitCallBase(Call&: CBI);
3423	}
3424
3425	Instruction InstCombinerImpl::tryOptimizeCall(CallInst CI) {
3426	if (!CI->getCalledFunction()) return nullptr;
3427
3428	// Skip optimizing notail and musttail calls so
3429	// LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
3430	// LibCallSimplifier::optimizeCall should try to preseve tail calls though.
3431	if (CI->isMustTailCall() \|\| CI->isNoTailCall())
3432	return nullptr;
3433
3434	auto InstCombineRAUW = [this](Instruction From, Value With) {
3435	replaceInstUsesWith(I&: *From, V: With);
3436	};
3437	auto InstCombineErase = [this](Instruction *I) {
3438	eraseInstFromFunction(I&: *I);
3439	};
3440	LibCallSimplifier Simplifier(DL, &TLI, &AC, ORE, BFI, PSI, InstCombineRAUW,
3441	InstCombineErase);
3442	if (Value *With = Simplifier.optimizeCall(CI, B&: Builder)) {
3443	++NumSimplified;
3444	return CI->use_empty() ? CI : replaceInstUsesWith(I&: *CI, V: With);
3445	}
3446
3447	return nullptr;
3448	}
3449
3450	static IntrinsicInst findInitTrampolineFromAlloca(Value TrampMem) {
3451	// Strip off at most one level of pointer casts, looking for an alloca. This
3452	// is good enough in practice and simpler than handling any number of casts.
3453	Value *Underlying = TrampMem->stripPointerCasts();
3454	if (Underlying != TrampMem &&
3455	(!Underlying->hasOneUse() \|\| Underlying->user_back() != TrampMem))
3456	return nullptr;
3457	if (!isa<AllocaInst>(Val: Underlying))
3458	return nullptr;
3459
3460	IntrinsicInst InitTrampoline = nullptr*;
3461	for (User *U : TrampMem->users()) {
3462	IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: U);
3463	if (!II)
3464	return nullptr;
3465	if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
3466	if (InitTrampoline)
3467	// More than one init_trampoline writes to this value. Give up.
3468	return nullptr;
3469	InitTrampoline = II;
3470	continue;
3471	}
3472	if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
3473	// Allow any number of calls to adjust.trampoline.
3474	continue;
3475	return nullptr;
3476	}
3477
3478	// No call to init.trampoline found.
3479	if (!InitTrampoline)
3480	return nullptr;
3481
3482	// Check that the alloca is being used in the expected way.
3483	if (InitTrampoline->getOperand(i_nocapture: `0`) != TrampMem)
3484	return nullptr;
3485
3486	return InitTrampoline;
3487	}
3488
3489	static IntrinsicInst findInitTrampolineFromBB(IntrinsicInst AdjustTramp,
3490	Value *TrampMem) {
3491	// Visit all the previous instructions in the basic block, and try to find a
3492	// init.trampoline which has a direct path to the adjust.trampoline.
3493	for (BasicBlock::iterator I = AdjustTramp->getIterator(),
3494	E = AdjustTramp->getParent()->begin();
3495	I != E;) {
3496	Instruction Inst = &--I;
3497	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val&: I))
3498	if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
3499	II->getOperand(`0`) == TrampMem)
3500	return II;
3501	if (Inst->mayWriteToMemory())
3502	return nullptr;
3503	}
3504	return nullptr;
3505	}
3506
3507	// Given a call to llvm.adjust.trampoline, find and return the corresponding
3508	// call to llvm.init.trampoline if the call to the trampoline can be optimized
3509	// to a direct call to a function. Otherwise return NULL.
3510	static IntrinsicInst findInitTrampoline(Value Callee) {
3511	Callee = Callee->stripPointerCasts();
3512	IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Val: Callee);
3513	if (!AdjustTramp \|\|
3514	AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
3515	return nullptr;
3516
3517	Value *TrampMem = AdjustTramp->getOperand(i_nocapture: `0`);
3518
3519	if (IntrinsicInst *IT = findInitTrampolineFromAlloca(TrampMem))
3520	return IT;
3521	if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
3522	return IT;
3523	return nullptr;
3524	}
3525
3526	bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
3527	const TargetLibraryInfo *TLI) {
3528	// Note: We only handle cases which can't be driven from generic attributes
3529	// here. So, for example, nonnull and noalias (which are common properties
3530	// of some allocation functions) are expected to be handled via annotation
3531	// of the respective allocator declaration with generic attributes.
3532	bool Changed = false;
3533
3534	if (!Call.getType()->isPointerTy())
3535	return Changed;
3536
3537	std::optional<APInt> Size = getAllocSize(CB: &Call, TLI);
3538	if (Size && *Size != `0`) {
3539	// TODO: We really should just emit deref_or_null here and then
3540	// let the generic inference code combine that with nonnull.
3541	if (Call.hasRetAttr(Attribute::NonNull)) {
3542	Changed = !Call.hasRetAttr(Attribute::Dereferenceable);
3543	Call.addRetAttr(Attr: Attribute::getWithDereferenceableBytes(
3544	Context&: Call.getContext(), Bytes: Size ->getLimitedValue()));
3545	} else {
3546	Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull);
3547	Call.addRetAttr(Attr: Attribute::getWithDereferenceableOrNullBytes(
3548	Context&: Call.getContext(), Bytes: Size ->getLimitedValue()));
3549	}
3550	}
3551
3552	// Add alignment attribute if alignment is a power of two constant.
3553	Value *Alignment = getAllocAlignment(V: &Call, TLI);
3554	if (!Alignment)
3555	return Changed;
3556
3557	ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Val: Alignment);
3558	if (AlignOpC && AlignOpC->getValue().ult(RHS: llvm::Value::MaximumAlignment)) {
3559	uint64_t AlignmentVal = AlignOpC->getZExtValue();
3560	if (llvm::isPowerOf2_64(Value: AlignmentVal)) {
3561	Align ExistingAlign = Call.getRetAlign().valueOrOne();
3562	Align NewAlign = Align (AlignmentVal);
3563	if (NewAlign > ExistingAlign) {
3564	Call.addRetAttr(
3565	Attr: Attribute::getWithAlignment(Context&: Call.getContext(), Alignment: NewAlign));
3566	Changed = true;
3567	}
3568	}
3569	}
3570	return Changed;
3571	}
3572
3573	/// Improvements for call, callbr and invoke instructions.
3574	Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
3575	bool Changed = annotateAnyAllocSite(Call, TLI: &TLI);
3576
3577	// Mark any parameters that are known to be non-null with the nonnull
3578	// attribute. This is helpful for inlining calls to functions with null
3579	// checks on their arguments.
3580	SmallVector<unsigned, `4`> ArgNos;
3581	unsigned ArgNo = `0`;
3582
3583	for (Value *V : Call.args()) {
3584	if (V->getType()->isPointerTy() &&
3585	!Call.paramHasAttr(ArgNo, Attribute::NonNull) &&
3586	isKnownNonZero(V, DL, `0`, &AC, &Call, &DT))
3587	ArgNos.push_back(Elt: ArgNo);
3588	ArgNo++;
3589	}
3590
3591	assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
3592
3593	if (!ArgNos.empty()) {
3594	AttributeList AS = Call.getAttributes();
3595	LLVMContext &Ctx = Call.getContext();
3596	AS = AS.addParamAttribute(Ctx, ArgNos,
3597	Attribute::get(Ctx, Attribute::NonNull));
3598	Call.setAttributes(AS);
3599	Changed = true;
3600	}
3601
3602	// If the callee is a pointer to a function, attempt to move any casts to the
3603	// arguments of the call/callbr/invoke.
3604	Value *Callee = Call.getCalledOperand();
3605	Function *CalleeF = dyn_cast<Function>(Val: Callee);
3606	if ((!CalleeF \|\| CalleeF->getFunctionType() != Call.getFunctionType()) &&
3607	transformConstExprCastCall(Call))
3608	return nullptr;
3609
3610	if (CalleeF) {
3611	// Remove the convergent attr on calls when the callee is not convergent.
3612	if (Call.isConvergent() && !CalleeF->isConvergent() &&
3613	!CalleeF->isIntrinsic()) {
3614	LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
3615	<< "\n");
3616	Call.setNotConvergent();
3617	return &Call;
3618	}
3619
3620	// If the call and callee calling conventions don't match, and neither one
3621	// of the calling conventions is compatible with C calling convention
3622	// this call must be unreachable, as the call is undefined.
3623	if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
3624	!(CalleeF->getCallingConv() == llvm::CallingConv::C &&
3625	TargetLibraryInfoImpl::isCallingConvCCompatible(CI: &Call)) &&
3626	!(Call.getCallingConv() == llvm::CallingConv::C &&
3627	TargetLibraryInfoImpl::isCallingConvCCompatible(Callee: CalleeF))) &&
3628	// Only do this for calls to a function with a body. A prototype may
3629	// not actually end up matching the implementation's calling conv for a
3630	// variety of reasons (e.g. it may be written in assembly).
3631	!CalleeF->isDeclaration()) {
3632	Instruction *OldCall = &Call;
3633	CreateNonTerminatorUnreachable(InsertAt: OldCall);
3634	// If OldCall does not return void then replaceInstUsesWith poison.
3635	// This allows ValueHandlers and custom metadata to adjust itself.
3636	if (!OldCall->getType()->isVoidTy())
3637	replaceInstUsesWith(I&: *OldCall, V: PoisonValue::get(T: OldCall->getType()));
3638	if (isa<CallInst>(Val: OldCall))
3639	return eraseInstFromFunction(I&: *OldCall);
3640
3641	// We cannot remove an invoke or a callbr, because it would change thexi
3642	// CFG, just change the callee to a null pointer.
3643	cast<CallBase>(Val: OldCall)->setCalledFunction(
3644	FTy: CalleeF->getFunctionType(),
3645	Fn: Constant::getNullValue(Ty: CalleeF->getType()));
3646	return nullptr;
3647	}
3648	}
3649
3650	// Calling a null function pointer is undefined if a null address isn't
3651	// dereferenceable.
3652	if ((isa<ConstantPointerNull>(Val: Callee) &&
3653	!NullPointerIsDefined(F: Call.getFunction())) \|\|
3654	isa<UndefValue>(Val: Callee)) {
3655	// If Call does not return void then replaceInstUsesWith poison.
3656	// This allows ValueHandlers and custom metadata to adjust itself.
3657	if (!Call.getType()->isVoidTy())
3658	replaceInstUsesWith(I&: Call, V: PoisonValue::get(T: Call.getType()));
3659
3660	if (Call.isTerminator()) {
3661	// Can't remove an invoke or callbr because we cannot change the CFG.
3662	return nullptr;
3663	}
3664
3665	// This instruction is not reachable, just remove it.
3666	CreateNonTerminatorUnreachable(InsertAt: &Call);
3667	return eraseInstFromFunction(I&: Call);
3668	}
3669
3670	if (IntrinsicInst *II = findInitTrampoline(Callee))
3671	return transformCallThroughTrampoline(Call, Tramp&: *II);
3672
3673	if (isa<InlineAsm>(Val: Callee) && !Call.doesNotThrow()) {
3674	InlineAsm *IA = cast<InlineAsm>(Val: Callee);
3675	if (!IA->canThrow()) {
3676	// Normal inline asm calls cannot throw - mark them
3677	// 'nounwind'.
3678	Call.setDoesNotThrow();
3679	Changed = true;
3680	}
3681	}
3682
3683	// Try to optimize the call if possible, we require DataLayout for most of
3684	// this. None of these calls are seen as possibly dead so go ahead and
3685	// delete the instruction now.
3686	if (CallInst *CI = dyn_cast<CallInst>(Val: &Call)) {
3687	Instruction *I = tryOptimizeCall(CI);
3688	// If we changed something return the result, etc. Otherwise let
3689	// the fallthrough check.
3690	if (I) return eraseInstFromFunction(I&: *I);
3691	}
3692
3693	if (!Call.use_empty() && !Call.isMustTailCall())
3694	if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
3695	Type *CallTy = Call.getType();
3696	Type *RetArgTy = ReturnedArg->getType();
3697	if (RetArgTy->canLosslesslyBitCastTo(Ty: CallTy))
3698	return replaceInstUsesWith(
3699	I&: Call, V: Builder.CreateBitOrPointerCast(V: ReturnedArg, DestTy: CallTy));
3700	}
3701
3702	// Drop unnecessary kcfi operand bundles from calls that were converted
3703	// into direct calls.
3704	auto Bundle = Call.getOperandBundle(ID: LLVMContext::OB_kcfi);
3705	if (Bundle && !Call.isIndirectCall()) {
3706	DEBUG_WITH_TYPE(DEBUG_TYPE "-kcfi", {
3707	if (CalleeF) {
3708	ConstantInt FunctionType = nullptr*;
3709	ConstantInt *ExpectedType = cast<ConstantInt>(Bundle ->Inputs[`0`]);
3710
3711	if (MDNode *MD = CalleeF->getMetadata(LLVMContext::MD_kcfi_type))
3712	FunctionType = mdconst::extract<ConstantInt>(MD->getOperand(`0`));
3713
3714	if (FunctionType &&
3715	FunctionType->getZExtValue() != ExpectedType->getZExtValue())
3716	dbgs() << Call.getModule()->getName()
3717	<< ": warning: kcfi: " << Call.getCaller()->getName()
3718	<< ": call to " << CalleeF->getName()
3719	<< " using a mismatching function pointer type\n";
3720	}
3721	});
3722
3723	return CallBase::removeOperandBundle(CB: &Call, ID: LLVMContext::OB_kcfi);
3724	}
3725
3726	if (isRemovableAlloc(V: &Call, TLI: &TLI))
3727	return visitAllocSite(FI&: Call);
3728
3729	// Handle intrinsics which can be used in both call and invoke context.
3730	switch (Call.getIntrinsicID()) {
3731	case Intrinsic::experimental_gc_statepoint: {
3732	GCStatepointInst &GCSP = *cast<GCStatepointInst>(Val: &Call);
3733	SmallPtrSet<Value *, `32`> LiveGcValues;
3734	for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
3735	GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
3736
3737	// Remove the relocation if unused.
3738	if (GCR.use_empty()) {
3739	eraseInstFromFunction(I&: GCR);
3740	continue;
3741	}
3742
3743	Value *DerivedPtr = GCR.getDerivedPtr();
3744	Value *BasePtr = GCR.getBasePtr();
3745
3746	// Undef is undef, even after relocation.
3747	if (isa<UndefValue>(Val: DerivedPtr) \|\| isa<UndefValue>(Val: BasePtr)) {
3748	replaceInstUsesWith(I&: GCR, V: UndefValue::get(T: GCR.getType()));
3749	eraseInstFromFunction(I&: GCR);
3750	continue;
3751	}
3752
3753	if (auto *PT = dyn_cast<PointerType>(Val: GCR.getType())) {
3754	// The relocation of null will be null for most any collector.
3755	// TODO: provide a hook for this in GCStrategy. There might be some
3756	// weird collector this property does not hold for.
3757	if (isa<ConstantPointerNull>(Val: DerivedPtr)) {
3758	// Use null-pointer of gc_relocate's type to replace it.
3759	replaceInstUsesWith(I&: GCR, V: ConstantPointerNull::get(T: PT));
3760	eraseInstFromFunction(I&: GCR);
3761	continue;
3762	}
3763
3764	// isKnownNonNull -> nonnull attribute
3765	if (!GCR.hasRetAttr(Attribute::NonNull) &&
3766	isKnownNonZero(DerivedPtr, DL, `0`, &AC, &Call, &DT)) {
3767	GCR.addRetAttr(Attribute::NonNull);
3768	// We discovered new fact, re-check users.
3769	Worklist.pushUsersToWorkList(I&: GCR);
3770	}
3771	}
3772
3773	// If we have two copies of the same pointer in the statepoint argument
3774	// list, canonicalize to one. This may let us common gc.relocates.
3775	if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
3776	GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
3777	auto *OpIntTy = GCR.getOperand(i_nocapture: `2`)->getType();
3778	GCR.setOperand(i_nocapture: `2`, Val_nocapture: ConstantInt::get(Ty: OpIntTy, V: GCR.getBasePtrIndex()));
3779	}
3780
3781	// TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
3782	// Canonicalize on the type from the uses to the defs
3783
3784	// TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
3785	LiveGcValues.insert(Ptr: BasePtr);
3786	LiveGcValues.insert(Ptr: DerivedPtr);
3787	}
3788	std::optional<OperandBundleUse> Bundle =
3789	GCSP.getOperandBundle(ID: LLVMContext::OB_gc_live);
3790	unsigned NumOfGCLives = LiveGcValues.size();
3791	if (!Bundle \|\| NumOfGCLives == Bundle ->Inputs.size())
3792	break;
3793	// We can reduce the size of gc live bundle.
3794	DenseMap<Value , unsigned*> Val2Idx;
3795	std::vector<Value *> NewLiveGc;
3796	for (Value *V : Bundle ->Inputs) {
3797	if (Val2Idx.count(Val: V))
3798	continue;
3799	if (LiveGcValues.count(Ptr: V)) {
3800	Val2Idx [V] = NewLiveGc.size();
3801	NewLiveGc.push_back(x: V);
3802	} else
3803	Val2Idx [V] = NumOfGCLives;
3804	}
3805	// Update all gc.relocates
3806	for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
3807	GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
3808	Value *BasePtr = GCR.getBasePtr();
3809	assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
3810	"Missed live gc for base pointer");
3811	auto *OpIntTy1 = GCR.getOperand(i_nocapture: `1`)->getType();
3812	GCR.setOperand(i_nocapture: `1`, Val_nocapture: ConstantInt::get(Ty: OpIntTy1, V: Val2Idx [BasePtr]));
3813	Value *DerivedPtr = GCR.getDerivedPtr();
3814	assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
3815	"Missed live gc for derived pointer");
3816	auto *OpIntTy2 = GCR.getOperand(i_nocapture: `2`)->getType();
3817	GCR.setOperand(i_nocapture: `2`, Val_nocapture: ConstantInt::get(Ty: OpIntTy2, V: Val2Idx [DerivedPtr]));
3818	}
3819	// Create new statepoint instruction.
3820	OperandBundleDef NewBundle("gc-live", NewLiveGc);
3821	return CallBase::Create(CB: &Call, Bundle: NewBundle);
3822	}
3823	default: { break; }
3824	}
3825
3826	return Changed ? &Call : nullptr;
3827	}
3828
3829	/// If the callee is a constexpr cast of a function, attempt to move the cast to
3830	/// the arguments of the call/invoke.
3831	/// CallBrInst is not supported.
3832	bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
3833	auto *Callee =
3834	dyn_cast<Function>(Val: Call.getCalledOperand()->stripPointerCasts());
3835	if (!Callee)
3836	return false;
3837
3838	assert(!isa<CallBrInst>(Call) &&
3839	"CallBr's don't have a single point after a def to insert at");
3840
3841	// If this is a call to a thunk function, don't remove the cast. Thunks are
3842	// used to transparently forward all incoming parameters and outgoing return
3843	// values, so it's important to leave the cast in place.
3844	if (Callee->hasFnAttribute(Kind: "thunk"))
3845	return false;
3846
3847	// If this is a call to a naked function, the assembly might be
3848	// using an argument, or otherwise rely on the frame layout,
3849	// the function prototype will mismatch.
3850	if (Callee->hasFnAttribute(Attribute::Naked))
3851	return false;
3852
3853	// If this is a musttail call, the callee's prototype must match the caller's
3854	// prototype with the exception of pointee types. The code below doesn't
3855	// implement that, so we can't do this transform.
3856	// TODO: Do the transform if it only requires adding pointer casts.
3857	if (Call.isMustTailCall())
3858	return false;
3859
3860	Instruction *Caller = &Call;
3861	const AttributeList &CallerPAL = Call.getAttributes();
3862
3863	// Okay, this is a cast from a function to a different type. Unless doing so
3864	// would cause a type conversion of one of our arguments, change this call to
3865	// be a direct call with arguments casted to the appropriate types.
3866	FunctionType *FT = Callee->getFunctionType();
3867	Type *OldRetTy = Caller->getType();
3868	Type *NewRetTy = FT->getReturnType();
3869
3870	// Check to see if we are changing the return type...
3871	if (OldRetTy != NewRetTy) {
3872
3873	if (NewRetTy->isStructTy())
3874	return false; // TODO: Handle multiple return values.
3875
3876	if (!CastInst::isBitOrNoopPointerCastable(SrcTy: NewRetTy, DestTy: OldRetTy, DL)) {
3877	if (Callee->isDeclaration())
3878	return false; // Cannot transform this return value.
3879
3880	if (!Caller->use_empty() &&
3881	// void -> non-void is handled specially
3882	!NewRetTy->isVoidTy())
3883	return false; // Cannot transform this return value.
3884	}
3885
3886	if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
3887	AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
3888	if (RAttrs.overlaps(AM: AttributeFuncs::typeIncompatible(Ty: NewRetTy)))
3889	return false; // Attribute not compatible with transformed value.
3890	}
3891
3892	// If the callbase is an invoke instruction, and the return value is
3893	// used by a PHI node in a successor, we cannot change the return type of
3894	// the call because there is no place to put the cast instruction (without
3895	// breaking the critical edge). Bail out in this case.
3896	if (!Caller->use_empty()) {
3897	BasicBlock PhisNotSupportedBlock = nullptr*;
3898	if (auto *II = dyn_cast<InvokeInst>(Val: Caller))
3899	PhisNotSupportedBlock = II->getNormalDest();
3900	if (PhisNotSupportedBlock)
3901	for (User *U : Caller->users())
3902	if (PHINode *PN = dyn_cast<PHINode>(Val: U))
3903	if (PN->getParent() == PhisNotSupportedBlock)
3904	return false;
3905	}
3906	}
3907
3908	unsigned NumActualArgs = Call.arg_size();
3909	unsigned NumCommonArgs = std::min(a: FT->getNumParams(), b: NumActualArgs);
3910
3911	// Prevent us turning:
3912	// declare void @takes_i32_inalloca(i32 inalloca)*
3913	// call void bitcast (void (i32)* @takes_i32_inalloca to void (i32))(i32 0)
3914	//
3915	// into:
3916	// call void @takes_i32_inalloca(i32 null)*
3917	//
3918	// Similarly, avoid folding away bitcasts of byval calls.
3919	if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) \|\|
3920	Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated))
3921	return false;
3922
3923	auto AI = Call.arg_begin();
3924	for (unsigned i = `0`, e = NumCommonArgs; i != e; ++i, ++AI) {
3925	Type *ParamTy = FT->getParamType(i);
3926	Type ActTy = (AI)->getType();
3927
3928	if (!CastInst::isBitOrNoopPointerCastable(SrcTy: ActTy, DestTy: ParamTy, DL))
3929	return false; // Cannot transform this parameter value.
3930
3931	// Check if there are any incompatible attributes we cannot drop safely.
3932	if (AttrBuilder (FT->getContext(), CallerPAL.getParamAttrs(ArgNo: i))
3933	.overlaps(AM: AttributeFuncs::typeIncompatible(
3934	Ty: ParamTy, ASK: AttributeFuncs::ASK_UNSAFE_TO_DROP)))
3935	return false; // Attribute not compatible with transformed value.
3936
3937	if (Call.isInAllocaArgument(i) \|\|
3938	CallerPAL.hasParamAttr(i, Attribute::Preallocated))
3939	return false; // Cannot transform to and from inalloca/preallocated.
3940
3941	if (CallerPAL.hasParamAttr(i, Attribute::SwiftError))
3942	return false;
3943
3944	if (CallerPAL.hasParamAttr(i, Attribute::ByVal) !=
3945	Callee->getAttributes().hasParamAttr(i, Attribute::ByVal))
3946	return false; // Cannot transform to or from byval.
3947	}
3948
3949	if (Callee->isDeclaration()) {
3950	// Do not delete arguments unless we have a function body.
3951	if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
3952	return false;
3953
3954	// If the callee is just a declaration, don't change the varargsness of the
3955	// call. We don't want to introduce a varargs call where one doesn't
3956	// already exist.
3957	if (FT->isVarArg() != Call.getFunctionType()->isVarArg())
3958	return false;
3959
3960	// If both the callee and the cast type are varargs, we still have to make
3961	// sure the number of fixed parameters are the same or we have the same
3962	// ABI issues as if we introduce a varargs call.
3963	if (FT->isVarArg() && Call.getFunctionType()->isVarArg() &&
3964	FT->getNumParams() != Call.getFunctionType()->getNumParams())
3965	return false;
3966	}
3967
3968	if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
3969	!CallerPAL.isEmpty()) {
3970	// In this case we have more arguments than the new function type, but we
3971	// won't be dropping them. Check that these extra arguments have attributes
3972	// that are compatible with being a vararg call argument.
3973	unsigned SRetIdx;
3974	if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
3975	SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
3976	return false;
3977	}
3978
3979	// Okay, we decided that this is a safe thing to do: go ahead and start
3980	// inserting cast instructions as necessary.
3981	SmallVector<Value *, `8`> Args;
3982	SmallVector<AttributeSet, `8`> ArgAttrs;
3983	Args.reserve(N: NumActualArgs);
3984	ArgAttrs.reserve(N: NumActualArgs);
3985
3986	// Get any return attributes.
3987	AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
3988
3989	// If the return value is not being used, the type may not be compatible
3990	// with the existing attributes. Wipe out any problematic attributes.
3991	RAttrs.remove(AM: AttributeFuncs::typeIncompatible(Ty: NewRetTy));
3992
3993	LLVMContext &Ctx = Call.getContext();
3994	AI = Call.arg_begin();
3995	for (unsigned i = `0`; i != NumCommonArgs; ++i, ++AI) {
3996	Type *ParamTy = FT->getParamType(i);
3997
3998	Value NewArg = AI;
3999	if ((*AI)->getType() != ParamTy)
4000	NewArg = Builder.CreateBitOrPointerCast(V: *AI, DestTy: ParamTy);
4001	Args.push_back(Elt: NewArg);
4002
4003	// Add any parameter attributes except the ones incompatible with the new
4004	// type. Note that we made sure all incompatible ones are safe to drop.
4005	AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
4006	Ty: ParamTy, ASK: AttributeFuncs::ASK_SAFE_TO_DROP);
4007	ArgAttrs.push_back(
4008	Elt: CallerPAL.getParamAttrs(ArgNo: i).removeAttributes(C&: Ctx, AttrsToRemove: IncompatibleAttrs));
4009	}
4010
4011	// If the function takes more arguments than the call was taking, add them
4012	// now.
4013	for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
4014	Args.push_back(Elt: Constant::getNullValue(Ty: FT->getParamType(i)));
4015	ArgAttrs.push_back(Elt: AttributeSet ());
4016	}
4017
4018	// If we are removing arguments to the function, emit an obnoxious warning.
4019	if (FT->getNumParams() < NumActualArgs) {
4020	// TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
4021	if (FT->isVarArg()) {
4022	// Add all of the arguments in their promoted form to the arg list.
4023	for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
4024	Type PTy = getPromotedType(Ty: (AI)->getType());
4025	Value NewArg = AI;
4026	if (PTy != (*AI)->getType()) {
4027	// Must promote to pass through va_arg area!
4028	Instruction::CastOps opcode =
4029	CastInst::getCastOpcode(Val: AI, SrcIsSigned: false, Ty: PTy, DstIsSigned: false*);
4030	NewArg = Builder.CreateCast(Op: opcode, V: *AI, DestTy: PTy);
4031	}
4032	Args.push_back(Elt: NewArg);
4033
4034	// Add any parameter attributes.
4035	ArgAttrs.push_back(Elt: CallerPAL.getParamAttrs(ArgNo: i));
4036	}
4037	}
4038	}
4039
4040	AttributeSet FnAttrs = CallerPAL.getFnAttrs();
4041
4042	if (NewRetTy->isVoidTy())
4043	Caller->setName(""); // Void type should not have a name.
4044
4045	assert((ArgAttrs.size() == FT->getNumParams() \|\| FT->isVarArg()) &&
4046	"missing argument attributes");
4047	AttributeList NewCallerPAL = AttributeList::get(
4048	C&: Ctx, FnAttrs, RetAttrs: AttributeSet::get(C&: Ctx, B: RAttrs), ArgAttrs);
4049
4050	SmallVector<OperandBundleDef, `1`> OpBundles;
4051	Call.getOperandBundlesAsDefs(Defs&: OpBundles);
4052
4053	CallBase *NewCall;
4054	if (InvokeInst *II = dyn_cast<InvokeInst>(Val: Caller)) {
4055	NewCall = Builder.CreateInvoke(Callee, NormalDest: II->getNormalDest(),
4056	UnwindDest: II->getUnwindDest(), Args, OpBundles);
4057	} else {
4058	NewCall = Builder.CreateCall(Callee, Args, OpBundles);
4059	cast<CallInst>(Val: NewCall)->setTailCallKind(
4060	cast<CallInst>(Val: Caller)->getTailCallKind());
4061	}
4062	NewCall->takeName(V: Caller);
4063	NewCall->setCallingConv(Call.getCallingConv());
4064	NewCall->setAttributes(NewCallerPAL);
4065
4066	// Preserve prof metadata if any.
4067	NewCall->copyMetadata(SrcInst: *Caller, WL: {LLVMContext::MD_prof});
4068
4069	// Insert a cast of the return type as necessary.
4070	Instruction *NC = NewCall;
4071	Value *NV = NC;
4072	if (OldRetTy != NV->getType() && !Caller->use_empty()) {
4073	if (!NV->getType()->isVoidTy()) {
4074	NV = NC = CastInst::CreateBitOrPointerCast(S: NC, Ty: OldRetTy);
4075	NC->setDebugLoc(Caller->getDebugLoc());
4076
4077	auto OptInsertPt = NewCall->getInsertionPointAfterDef();
4078	assert(OptInsertPt && "No place to insert cast");
4079	InsertNewInstBefore(New: NC, Old: *OptInsertPt);
4080	Worklist.pushUsersToWorkList(I&: *Caller);
4081	} else {
4082	NV = PoisonValue::get(T: Caller->getType());
4083	}
4084	}
4085
4086	if (!Caller->use_empty())
4087	replaceInstUsesWith(I&: *Caller, V: NV);
4088	else if (Caller->hasValueHandle()) {
4089	if (OldRetTy == NV->getType())
4090	ValueHandleBase::ValueIsRAUWd(Old: Caller, New: NV);
4091	else
4092	// We cannot call ValueIsRAUWd with a different type, and the
4093	// actual tracked value will disappear.
4094	ValueHandleBase::ValueIsDeleted(V: Caller);
4095	}
4096
4097	eraseInstFromFunction(I&: *Caller);
4098	return true;
4099	}
4100
4101	/// Turn a call to a function created by init_trampoline / adjust_trampoline
4102	/// intrinsic pair into a direct call to the underlying function.
4103	Instruction *
4104	InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
4105	IntrinsicInst &Tramp) {
4106	FunctionType *FTy = Call.getFunctionType();
4107	AttributeList Attrs = Call.getAttributes();
4108
4109	// If the call already has the 'nest' attribute somewhere then give up -
4110	// otherwise 'nest' would occur twice after splicing in the chain.
4111	if (Attrs.hasAttrSomewhere(Attribute::Nest))
4112	return nullptr;
4113
4114	Function *NestF = cast<Function>(Val: Tramp.getArgOperand(i: `1`)->stripPointerCasts());
4115	FunctionType *NestFTy = NestF->getFunctionType();
4116
4117	AttributeList NestAttrs = NestF->getAttributes();
4118	if (!NestAttrs.isEmpty()) {
4119	unsigned NestArgNo = `0`;
4120	Type NestTy = nullptr*;
4121	AttributeSet NestAttr;
4122
4123	// Look for a parameter marked with the 'nest' attribute.
4124	for (FunctionType::param_iterator I = NestFTy->param_begin(),
4125	E = NestFTy->param_end();
4126	I != E; ++NestArgNo, ++I) {
4127	AttributeSet AS = NestAttrs.getParamAttrs(ArgNo: NestArgNo);
4128	if (AS.hasAttribute(Attribute::Nest)) {
4129	// Record the parameter type and any other attributes.
4130	NestTy = *I;
4131	NestAttr = AS;
4132	break;
4133	}
4134	}
4135
4136	if (NestTy) {
4137	std::vector<Value*> NewArgs;
4138	std::vector<AttributeSet> NewArgAttrs;
4139	NewArgs.reserve(n: Call.arg_size() + `1`);
4140	NewArgAttrs.reserve(n: Call.arg_size());
4141
4142	// Insert the nest argument into the call argument list, which may
4143	// mean appending it. Likewise for attributes.
4144
4145	{
4146	unsigned ArgNo = `0`;
4147	auto I = Call.arg_begin(), E = Call.arg_end();
4148	do {
4149	if (ArgNo == NestArgNo) {
4150	// Add the chain argument and attributes.
4151	Value *NestVal = Tramp.getArgOperand(i: `2`);
4152	if (NestVal->getType() != NestTy)
4153	NestVal = Builder.CreateBitCast(V: NestVal, DestTy: NestTy, Name: "nest");
4154	NewArgs.push_back(x: NestVal);
4155	NewArgAttrs.push_back(x: NestAttr);
4156	}
4157
4158	if (I == E)
4159	break;
4160
4161	// Add the original argument and attributes.
4162	NewArgs.push_back(x: *I);
4163	NewArgAttrs.push_back(x: Attrs.getParamAttrs(ArgNo));
4164
4165	++ArgNo;
4166	++I;
4167	} while (true);
4168	}
4169
4170	// The trampoline may have been bitcast to a bogus type (FTy).
4171	// Handle this by synthesizing a new function type, equal to FTy
4172	// with the chain parameter inserted.
4173
4174	std::vector<Type*> NewTypes;
4175	NewTypes.reserve(n: FTy->getNumParams()+`1`);
4176
4177	// Insert the chain's type into the list of parameter types, which may
4178	// mean appending it.
4179	{
4180	unsigned ArgNo = `0`;
4181	FunctionType::param_iterator I = FTy->param_begin(),
4182	E = FTy->param_end();
4183
4184	do {
4185	if (ArgNo == NestArgNo)
4186	// Add the chain's type.
4187	NewTypes.push_back(x: NestTy);
4188
4189	if (I == E)
4190	break;
4191
4192	// Add the original type.
4193	NewTypes.push_back(x: *I);
4194
4195	++ArgNo;
4196	++I;
4197	} while (true);
4198	}
4199
4200	// Replace the trampoline call with a direct call. Let the generic
4201	// code sort out any function type mismatches.
4202	FunctionType *NewFTy =
4203	FunctionType::get(Result: FTy->getReturnType(), Params: NewTypes, isVarArg: FTy->isVarArg());
4204	AttributeList NewPAL =
4205	AttributeList::get(C&: FTy->getContext(), FnAttrs: Attrs.getFnAttrs(),
4206	RetAttrs: Attrs.getRetAttrs(), ArgAttrs: NewArgAttrs);
4207
4208	SmallVector<OperandBundleDef, `1`> OpBundles;
4209	Call.getOperandBundlesAsDefs(Defs&: OpBundles);
4210
4211	Instruction *NewCaller;
4212	if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &Call)) {
4213	NewCaller = InvokeInst::Create(Ty: NewFTy, Func: NestF, IfNormal: II->getNormalDest(),
4214	IfException: II->getUnwindDest(), Args: NewArgs, Bundles: OpBundles);
4215	cast<InvokeInst>(Val: NewCaller)->setCallingConv(II->getCallingConv());
4216	cast<InvokeInst>(Val: NewCaller)->setAttributes(NewPAL);
4217	} else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Val: &Call)) {
4218	NewCaller =
4219	CallBrInst::Create(Ty: NewFTy, Func: NestF, DefaultDest: CBI->getDefaultDest(),
4220	IndirectDests: CBI->getIndirectDests(), Args: NewArgs, Bundles: OpBundles);
4221	cast<CallBrInst>(Val: NewCaller)->setCallingConv(CBI->getCallingConv());
4222	cast<CallBrInst>(Val: NewCaller)->setAttributes(NewPAL);
4223	} else {
4224	NewCaller = CallInst::Create(Ty: NewFTy, Func: NestF, Args: NewArgs, Bundles: OpBundles);
4225	cast<CallInst>(Val: NewCaller)->setTailCallKind(
4226	cast<CallInst>(Val&: Call).getTailCallKind());
4227	cast<CallInst>(Val: NewCaller)->setCallingConv(
4228	cast<CallInst>(Val&: Call).getCallingConv());
4229	cast<CallInst>(Val: NewCaller)->setAttributes(NewPAL);
4230	}
4231	NewCaller->setDebugLoc(Call.getDebugLoc());
4232
4233	return NewCaller;
4234	}
4235	}
4236
4237	// Replace the trampoline call with a direct call. Since there is no 'nest'
4238	// parameter, there is no need to adjust the argument list. Let the generic
4239	// code sort out any function type mismatches.
4240	Call.setCalledFunction(FTy, Fn: NestF);
4241	return &Call;
4242	}
4243

source code of llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp