TargetLowering.cpp source code [llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp]

1	//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This implements the TargetLowering class.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/CodeGen/TargetLowering.h"
14	#include "llvm/ADT/STLExtras.h"
15	#include "llvm/Analysis/VectorUtils.h"
16	#include "llvm/CodeGen/CallingConvLower.h"
17	#include "llvm/CodeGen/CodeGenCommonISel.h"
18	#include "llvm/CodeGen/MachineFrameInfo.h"
19	#include "llvm/CodeGen/MachineFunction.h"
20	#include "llvm/CodeGen/MachineJumpTableInfo.h"
21	#include "llvm/CodeGen/MachineModuleInfoImpls.h"
22	#include "llvm/CodeGen/MachineRegisterInfo.h"
23	#include "llvm/CodeGen/SelectionDAG.h"
24	#include "llvm/CodeGen/TargetRegisterInfo.h"
25	#include "llvm/IR/DataLayout.h"
26	#include "llvm/IR/DerivedTypes.h"
27	#include "llvm/IR/GlobalVariable.h"
28	#include "llvm/IR/LLVMContext.h"
29	#include "llvm/MC/MCAsmInfo.h"
30	#include "llvm/MC/MCExpr.h"
31	#include "llvm/Support/DivisionByConstantInfo.h"
32	#include "llvm/Support/ErrorHandling.h"
33	#include "llvm/Support/KnownBits.h"
34	#include "llvm/Support/MathExtras.h"
35	#include "llvm/Target/TargetMachine.h"
36	#include <cctype>
37	using namespace llvm;
38
39	/// NOTE: The TargetMachine owns TLOF.
40	TargetLowering::TargetLowering(const TargetMachine &tm)
41	: TargetLoweringBase (tm) {}
42
43	const char TargetLowering::getTargetNodeName(unsigned* Opcode) const {
44	return nullptr;
45	}
46
47	bool TargetLowering::isPositionIndependent() const {
48	return getTargetMachine().isPositionIndependent();
49	}
50
51	/// Check whether a given call node is in tail position within its function. If
52	/// so, it sets Chain to the input chain of the tail call.
53	bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
54	SDValue &Chain) const {
55	const Function &F = DAG.getMachineFunction().getFunction();
56
57	// First, check if tail calls have been disabled in this function.
58	if (F.getFnAttribute(Kind: "disable-tail-calls").getValueAsBool())
59	return false;
60
61	// Conservatively require the attributes of the call to match those of
62	// the return. Ignore following attributes because they don't affect the
63	// call sequence.
64	AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
65	for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
66	Attribute::DereferenceableOrNull, Attribute::NoAlias,
67	Attribute::NonNull, Attribute::NoUndef})
68	CallerAttrs.removeAttribute(Attr);
69
70	if (CallerAttrs.hasAttributes())
71	return false;
72
73	// It's not safe to eliminate the sign / zero extension of the return value.
74	if (CallerAttrs.contains(Attribute::ZExt) \|\|
75	CallerAttrs.contains(Attribute::SExt))
76	return false;
77
78	// Check if the only use is a function return node.
79	return isUsedByReturnOnly(Node, Chain);
80	}
81
82	bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
83	const uint32_t *CallerPreservedMask,
84	const SmallVectorImpl<CCValAssign> &ArgLocs,
85	const SmallVectorImpl<SDValue> &OutVals) const {
86	for (unsigned I = `0`, E = ArgLocs.size(); I != E; ++I) {
87	const CCValAssign &ArgLoc = ArgLocs [I];
88	if (!ArgLoc.isRegLoc())
89	continue;
90	MCRegister Reg = ArgLoc.getLocReg();
91	// Only look at callee saved registers.
92	if (MachineOperand::clobbersPhysReg(RegMask: CallerPreservedMask, PhysReg: Reg))
93	continue;
94	// Check that we pass the value used for the caller.
95	// (We look for a CopyFromReg reading a virtual register that is used
96	// for the function live-in value of register Reg)
97	SDValue Value = OutVals [I];
98	if (Value ->getOpcode() == ISD::AssertZext)
99	Value = Value.getOperand(i: `0`);
100	if (Value ->getOpcode() != ISD::CopyFromReg)
101	return false;
102	Register ArgReg = cast<RegisterSDNode>(Val: Value ->getOperand(Num: `1`))->getReg();
103	if (MRI.getLiveInPhysReg(VReg: ArgReg) != Reg)
104	return false;
105	}
106	return true;
107	}
108
109	/// Set CallLoweringInfo attribute flags based on a call instruction
110	/// and called function attributes.
111	void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
112	unsigned ArgIdx) {
113	IsSExt = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: SExt);
114	IsZExt = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: ZExt);
115	IsInReg = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: InReg);
116	IsSRet = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: StructRet);
117	IsNest = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: Nest);
118	IsByVal = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: ByVal);
119	IsPreallocated = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: Preallocated);
120	IsInAlloca = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: InAlloca);
121	IsReturned = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: Returned);
122	IsSwiftSelf = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: SwiftSelf);
123	IsSwiftAsync = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: SwiftAsync);
124	IsSwiftError = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: SwiftError);
125	Alignment = Call->getParamStackAlign(ArgNo: ArgIdx);
126	IndirectType = nullptr;
127	assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= `1` &&
128	"multiple ABI attributes?");
129	if (IsByVal) {
130	IndirectType = Call->getParamByValType(ArgNo: ArgIdx);
131	if (!Alignment)
132	Alignment = Call->getParamAlign(ArgNo: ArgIdx);
133	}
134	if (IsPreallocated)
135	IndirectType = Call->getParamPreallocatedType(ArgNo: ArgIdx);
136	if (IsInAlloca)
137	IndirectType = Call->getParamInAllocaType(ArgNo: ArgIdx);
138	if (IsSRet)
139	IndirectType = Call->getParamStructRetType(ArgNo: ArgIdx);
140	}
141
142	/// Generate a libcall taking the given operands as arguments and returning a
143	/// result of type RetVT.
144	std::pair<SDValue, SDValue>
145	TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
146	ArrayRef<SDValue> Ops,
147	MakeLibCallOptions CallOptions,
148	const SDLoc &dl,
149	SDValue InChain) const {
150	if (!InChain)
151	InChain = DAG.getEntryNode();
152
153	TargetLowering::ArgListTy Args;
154	Args.reserve(n: Ops.size());
155
156	TargetLowering::ArgListEntry Entry;
157	for (unsigned i = `0`; i < Ops.size(); ++i) {
158	SDValue NewOp = Ops [i];
159	Entry.Node = NewOp;
160	Entry.Ty = Entry.Node.getValueType().getTypeForEVT(Context&: *DAG.getContext());
161	Entry.IsSExt = shouldSignExtendTypeInLibCall(Type: NewOp.getValueType(),
162	IsSigned: CallOptions.IsSExt);
163	Entry.IsZExt = !Entry.IsSExt;
164
165	if (CallOptions.IsSoften &&
166	!shouldExtendTypeInLibCall(Type: CallOptions.OpsVTBeforeSoften [i])) {
167	Entry.IsSExt = Entry.IsZExt = false;
168	}
169	Args.push_back(x: Entry);
170	}
171
172	if (LC == RTLIB::UNKNOWN_LIBCALL)
173	report_fatal_error(reason: "Unsupported library call operation!");
174	SDValue Callee = DAG.getExternalSymbol(Sym: getLibcallName(Call: LC),
175	VT: getPointerTy(DL: DAG.getDataLayout()));
176
177	Type RetTy = RetVT.getTypeForEVT(Context&: DAG.getContext());
178	TargetLowering::CallLoweringInfo CLI(DAG);
179	bool signExtend = shouldSignExtendTypeInLibCall(Type: RetVT, IsSigned: CallOptions.IsSExt);
180	bool zeroExtend = !signExtend;
181
182	if (CallOptions.IsSoften &&
183	!shouldExtendTypeInLibCall(Type: CallOptions.RetVTBeforeSoften)) {
184	signExtend = zeroExtend = false;
185	}
186
187	CLI.setDebugLoc(dl)
188	.setChain(InChain)
189	.setLibCallee(CC: getLibcallCallingConv(Call: LC), ResultType: RetTy, Target: Callee, ArgsList: std::move(Args))
190	.setNoReturn(CallOptions.DoesNotReturn)
191	.setDiscardResult(!CallOptions.IsReturnValueUsed)
192	.setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
193	.setSExtResult(signExtend)
194	.setZExtResult(zeroExtend);
195	return LowerCallTo(CLI);
196	}
197
198	bool TargetLowering::findOptimalMemOpLowering(
199	std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
200	unsigned SrcAS, const AttributeList &FuncAttributes) const {
201	if (Limit != ~unsigned(`0`) && Op.isMemcpyWithFixedDstAlign() &&
202	Op.getSrcAlign() < Op.getDstAlign())
203	return false;
204
205	EVT VT = getOptimalMemOpType(Op, FuncAttributes);
206
207	if (VT == MVT::Other) {
208	// Use the largest integer type whose alignment constraints are satisfied.
209	// We only need to check DstAlign here as SrcAlign is always greater or
210	// equal to DstAlign (or zero).
211	VT = MVT::i64;
212	if (Op.isFixedDstAlign())
213	while (Op.getDstAlign() < (VT.getSizeInBits() / `8`) &&
214	!allowsMisalignedMemoryAccesses(VT, AddrSpace: DstAS, Alignment: Op.getDstAlign()))
215	VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - `1`);
216	assert(VT.isInteger());
217
218	// Find the largest legal integer type.
219	MVT LVT = MVT::i64;
220	while (!isTypeLegal(VT: LVT))
221	LVT = (MVT::SimpleValueType)(LVT.SimpleTy - `1`);
222	assert(LVT.isInteger());
223
224	// If the type we've chosen is larger than the largest legal integer type
225	// then use that instead.
226	if (VT.bitsGT(VT: LVT))
227	VT = LVT;
228	}
229
230	unsigned NumMemOps = `0`;
231	uint64_t Size = Op.size();
232	while (Size) {
233	unsigned VTSize = VT.getSizeInBits() / `8`;
234	while (VTSize > Size) {
235	// For now, only use non-vector load / store's for the left-over pieces.
236	EVT NewVT = VT;
237	unsigned NewVTSize;
238
239	bool Found = false;
240	if (VT.isVector() \|\| VT.isFloatingPoint()) {
241	NewVT = (VT.getSizeInBits() > `64`) ? MVT::i64 : MVT::i32;
242	if (isOperationLegalOrCustom(Op: ISD::STORE, VT: NewVT) &&
243	isSafeMemOpType(NewVT.getSimpleVT()))
244	Found = true;
245	else if (NewVT == MVT::i64 &&
246	isOperationLegalOrCustom(Op: ISD::STORE, MVT::VT: f64) &&
247	isSafeMemOpType(MVT::f64)) {
248	// i64 is usually not legal on 32-bit targets, but f64 may be.
249	NewVT = MVT::f64;
250	Found = true;
251	}
252	}
253
254	if (!Found) {
255	do {
256	NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - `1`);
257	if (NewVT == MVT::i8)
258	break;
259	} while (!isSafeMemOpType(NewVT.getSimpleVT()));
260	}
261	NewVTSize = NewVT.getSizeInBits() / `8`;
262
263	// If the new VT cannot cover all of the remaining bits, then consider
264	// issuing a (or a pair of) unaligned and overlapping load / store.
265	unsigned Fast;
266	if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
267	allowsMisalignedMemoryAccesses(
268	VT, AddrSpace: DstAS, Alignment: Op.isFixedDstAlign() ? Op.getDstAlign() : Align (`1`),
269	Flags: MachineMemOperand::MONone, &Fast) &&
270	Fast)
271	VTSize = Size;
272	else {
273	VT = NewVT;
274	VTSize = NewVTSize;
275	}
276	}
277
278	if (++NumMemOps > Limit)
279	return false;
280
281	MemOps.push_back(x: VT);
282	Size -= VTSize;
283	}
284
285	return true;
286	}
287
288	/// Soften the operands of a comparison. This code is shared among BR_CC,
289	/// SELECT_CC, and SETCC handlers.
290	void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
291	SDValue &NewLHS, SDValue &NewRHS,
292	ISD::CondCode &CCCode,
293	const SDLoc &dl, const SDValue OldLHS,
294	const SDValue OldRHS) const {
295	SDValue Chain;
296	return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, DL: dl, OldLHS,
297	OldRHS, Chain);
298	}
299
300	void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
301	SDValue &NewLHS, SDValue &NewRHS,
302	ISD::CondCode &CCCode,
303	const SDLoc &dl, const SDValue OldLHS,
304	const SDValue OldRHS,
305	SDValue &Chain,
306	bool IsSignaling) const {
307	// FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
308	// not supporting it. We can update this code when libgcc provides such
309	// functions.
310
311	assert((VT == MVT::f32 \|\| VT == MVT::f64 \|\| VT == MVT::f128 \|\| VT == MVT::ppcf128)
312	&& "Unsupported setcc type!");
313
314	// Expand into one or more soft-fp libcall(s).
315	RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
316	bool ShouldInvertCC = false;
317	switch (CCCode) {
318	case ISD::SETEQ:
319	case ISD::SETOEQ:
320	LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
321	(VT == MVT::f64) ? RTLIB::OEQ_F64 :
322	(VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
323	break;
324	case ISD::SETNE:
325	case ISD::SETUNE:
326	LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
327	(VT == MVT::f64) ? RTLIB::UNE_F64 :
328	(VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
329	break;
330	case ISD::SETGE:
331	case ISD::SETOGE:
332	LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
333	(VT == MVT::f64) ? RTLIB::OGE_F64 :
334	(VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
335	break;
336	case ISD::SETLT:
337	case ISD::SETOLT:
338	LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
339	(VT == MVT::f64) ? RTLIB::OLT_F64 :
340	(VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
341	break;
342	case ISD::SETLE:
343	case ISD::SETOLE:
344	LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
345	(VT == MVT::f64) ? RTLIB::OLE_F64 :
346	(VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
347	break;
348	case ISD::SETGT:
349	case ISD::SETOGT:
350	LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
351	(VT == MVT::f64) ? RTLIB::OGT_F64 :
352	(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
353	break;
354	case ISD::SETO:
355	ShouldInvertCC = true;
356	[[fallthrough]];
357	case ISD::SETUO:
358	LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
359	(VT == MVT::f64) ? RTLIB::UO_F64 :
360	(VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
361	break;
362	case ISD::SETONE:
363	// SETONE = O && UNE
364	ShouldInvertCC = true;
365	[[fallthrough]];
366	case ISD::SETUEQ:
367	LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
368	(VT == MVT::f64) ? RTLIB::UO_F64 :
369	(VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
370	LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
371	(VT == MVT::f64) ? RTLIB::OEQ_F64 :
372	(VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
373	break;
374	default:
375	// Invert CC for unordered comparisons
376	ShouldInvertCC = true;
377	switch (CCCode) {
378	case ISD::SETULT:
379	LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
380	(VT == MVT::f64) ? RTLIB::OGE_F64 :
381	(VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
382	break;
383	case ISD::SETULE:
384	LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
385	(VT == MVT::f64) ? RTLIB::OGT_F64 :
386	(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
387	break;
388	case ISD::SETUGT:
389	LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
390	(VT == MVT::f64) ? RTLIB::OLE_F64 :
391	(VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
392	break;
393	case ISD::SETUGE:
394	LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
395	(VT == MVT::f64) ? RTLIB::OLT_F64 :
396	(VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
397	break;
398	default: llvm_unreachable("Do not know how to soften this setcc!");
399	}
400	}
401
402	// Use the target specific return value for comparison lib calls.
403	EVT RetVT = getCmpLibcallReturnType();
404	SDValue Ops[`2`] = {NewLHS, NewRHS};
405	TargetLowering::MakeLibCallOptions CallOptions;
406	EVT OpsVT[`2`] = { OldLHS.getValueType(),
407	OldRHS.getValueType() };
408	CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, Value: true);
409	auto Call = makeLibCall(DAG, LC: LC1, RetVT, Ops, CallOptions, dl, InChain: Chain);
410	NewLHS = Call.first;
411	NewRHS = DAG.getConstant(Val: `0`, DL: dl, VT: RetVT);
412
413	CCCode = getCmpLibcallCC(Call: LC1);
414	if (ShouldInvertCC) {
415	assert(RetVT.isInteger());
416	CCCode = getSetCCInverse(Operation: CCCode, Type: RetVT);
417	}
418
419	if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
420	// Update Chain.
421	Chain = Call.second;
422	} else {
423	EVT SetCCVT =
424	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: RetVT);
425	SDValue Tmp = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: NewLHS, RHS: NewRHS, Cond: CCCode);
426	auto Call2 = makeLibCall(DAG, LC: LC2, RetVT, Ops, CallOptions, dl, InChain: Chain);
427	CCCode = getCmpLibcallCC(Call: LC2);
428	if (ShouldInvertCC)
429	CCCode = getSetCCInverse(Operation: CCCode, Type: RetVT);
430	NewLHS = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Call2.first, RHS: NewRHS, Cond: CCCode);
431	if (Chain)
432	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
433	Call2.second);
434	NewLHS = DAG.getNode(Opcode: ShouldInvertCC ? ISD::AND : ISD::OR, DL: dl,
435	VT: Tmp.getValueType(), N1: Tmp, N2: NewLHS);
436	NewRHS = SDValue ();
437	}
438	}
439
440	/// Return the entry encoding for a jump table in the current function. The
441	/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
442	unsigned TargetLowering::getJumpTableEncoding() const {
443	// In non-pic modes, just use the address of a block.
444	if (!isPositionIndependent())
445	return MachineJumpTableInfo::EK_BlockAddress;
446
447	// In PIC mode, if the target supports a GPRel32 directive, use it.
448	if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
449	return MachineJumpTableInfo::EK_GPRel32BlockAddress;
450
451	// Otherwise, use a label difference.
452	return MachineJumpTableInfo::EK_LabelDifference32;
453	}
454
455	SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
456	SelectionDAG &DAG) const {
457	// If our PIC model is GP relative, use the global offset table as the base.
458	unsigned JTEncoding = getJumpTableEncoding();
459
460	if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) \|\|
461	(JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
462	return DAG.getGLOBAL_OFFSET_TABLE(VT: getPointerTy(DL: DAG.getDataLayout()));
463
464	return Table;
465	}
466
467	/// This returns the relocation base for the given PIC jumptable, the same as
468	/// getPICJumpTableRelocBase, but as an MCExpr.
469	const MCExpr *
470	TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
471	unsigned JTI,MCContext &Ctx) const{
472	// The normal PIC reloc base is the label at the start of the jump table.
473	return MCSymbolRefExpr::create(Symbol: MF->getJTISymbol(JTI, Ctx), Ctx);
474	}
475
476	SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
477	SDValue Addr, int JTI,
478	SelectionDAG &DAG) const {
479	SDValue Chain = Value;
480	// Jump table debug info is only needed if CodeView is enabled.
481	if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {
482	Chain = DAG.getJumpTableDebugInfo(JTI, Chain, DL: dl);
483	}
484	return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
485	}
486
487	bool
488	TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode GA) const* {
489	const TargetMachine &TM = getTargetMachine();
490	const GlobalValue *GV = GA->getGlobal();
491
492	// If the address is not even local to this DSO we will have to load it from
493	// a got and then add the offset.
494	if (!TM.shouldAssumeDSOLocal(GV))
495	return false;
496
497	// If the code is position independent we will have to add a base register.
498	if (isPositionIndependent())
499	return false;
500
501	// Otherwise we can do it.
502	return true;
503	}
504
505	//===----------------------------------------------------------------------===//
506	// Optimization Methods
507	//===----------------------------------------------------------------------===//
508
509	/// If the specified instruction has a constant integer operand and there are
510	/// bits set in that constant that are not demanded, then clear those bits and
511	/// return true.
512	bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
513	const APInt &DemandedBits,
514	const APInt &DemandedElts,
515	TargetLoweringOpt &TLO) const {
516	SDLoc DL(Op);
517	unsigned Opcode = Op.getOpcode();
518
519	// Early-out if we've ended up calling an undemanded node, leave this to
520	// constant folding.
521	if (DemandedBits.isZero() \|\| DemandedElts.isZero())
522	return false;
523
524	// Do target-specific constant optimization.
525	if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
526	return TLO.New.getNode();
527
528	// FIXME: ISD::SELECT, ISD::SELECT_CC
529	switch (Opcode) {
530	default:
531	break;
532	case ISD::XOR:
533	case ISD::AND:
534	case ISD::OR: {
535	auto *Op1C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `1`));
536	if (!Op1C \|\| Op1C->isOpaque())
537	return false;
538
539	// If this is a 'not' op, don't touch it because that's a canonical form.
540	const APInt &C = Op1C->getAPIntValue();
541	if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(RHS: C))
542	return false;
543
544	if (!C.isSubsetOf(RHS: DemandedBits)) {
545	EVT VT = Op.getValueType();
546	SDValue NewC = TLO.DAG.getConstant(Val: DemandedBits & C, DL, VT);
547	SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, N1: Op.getOperand(i: `0`), N2: NewC,
548	Flags: Op ->getFlags());
549	return TLO.CombineTo(O: Op, N: NewOp);
550	}
551
552	break;
553	}
554	}
555
556	return false;
557	}
558
559	bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
560	const APInt &DemandedBits,
561	TargetLoweringOpt &TLO) const {
562	EVT VT = Op.getValueType();
563	APInt DemandedElts = VT.isVector()
564	? APInt::getAllOnes(numBits: VT.getVectorNumElements())
565	: APInt (`1`, `1`);
566	return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
567	}
568
569	/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
570	/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
571	/// but it could be generalized for targets with other types of implicit
572	/// widening casts.
573	bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
574	const APInt &DemandedBits,
575	TargetLoweringOpt &TLO) const {
576	assert(Op.getNumOperands() == `2` &&
577	"ShrinkDemandedOp only supports binary operators!");
578	assert(Op.getNode()->getNumValues() == `1` &&
579	"ShrinkDemandedOp only supports nodes with one result!");
580
581	EVT VT = Op.getValueType();
582	SelectionDAG &DAG = TLO.DAG;
583	SDLoc dl(Op);
584
585	// Early return, as this function cannot handle vector types.
586	if (VT.isVector())
587	return false;
588
589	// Don't do this if the node has another user, which may require the
590	// full value.
591	if (!Op.getNode()->hasOneUse())
592	return false;
593
594	// Search for the smallest integer type with free casts to and from
595	// Op's type. For expedience, just check power-of-2 integer types.
596	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
597	unsigned DemandedSize = DemandedBits.getActiveBits();
598	for (unsigned SmallVTBits = llvm::bit_ceil(Value: DemandedSize);
599	SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(A: SmallVTBits)) {
600	EVT SmallVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: SmallVTBits);
601	if (TLI.isTruncateFree(FromVT: VT, ToVT: SmallVT) && TLI.isZExtFree(FromTy: SmallVT, ToTy: VT)) {
602	// We found a type with free casts.
603	SDValue X = DAG.getNode(
604	Opcode: Op.getOpcode(), DL: dl, VT: SmallVT,
605	N1: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: `0`)),
606	N2: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: `1`)));
607	assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
608	SDValue Z = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: X);
609	return TLO.CombineTo(O: Op, N: Z);
610	}
611	}
612	return false;
613	}
614
615	bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
616	DAGCombinerInfo &DCI) const {
617	SelectionDAG &DAG = DCI.DAG;
618	TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
619	!DCI.isBeforeLegalizeOps());
620	KnownBits Known;
621
622	bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
623	if (Simplified) {
624	DCI.AddToWorklist(N: Op.getNode());
625	DCI.CommitTargetLoweringOpt(TLO);
626	}
627	return Simplified;
628	}
629
630	bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
631	const APInt &DemandedElts,
632	DAGCombinerInfo &DCI) const {
633	SelectionDAG &DAG = DCI.DAG;
634	TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
635	!DCI.isBeforeLegalizeOps());
636	KnownBits Known;
637
638	bool Simplified =
639	SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
640	if (Simplified) {
641	DCI.AddToWorklist(N: Op.getNode());
642	DCI.CommitTargetLoweringOpt(TLO);
643	}
644	return Simplified;
645	}
646
647	bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
648	KnownBits &Known,
649	TargetLoweringOpt &TLO,
650	unsigned Depth,
651	bool AssumeSingleUse) const {
652	EVT VT = Op.getValueType();
653
654	// Since the number of lanes in a scalable vector is unknown at compile time,
655	// we track one bit which is implicitly broadcast to all lanes. This means
656	// that all lanes in a scalable vector are considered demanded.
657	APInt DemandedElts = VT.isFixedLengthVector()
658	? APInt::getAllOnes(numBits: VT.getVectorNumElements())
659	: APInt (`1`, `1`);
660	return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
661	AssumeSingleUse);
662	}
663
664	// TODO: Under what circumstances can we create nodes? Constant folding?
665	SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
666	SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
667	SelectionDAG &DAG, unsigned Depth) const {
668	EVT VT = Op.getValueType();
669
670	// Limit search depth.
671	if (Depth >= SelectionDAG::MaxRecursionDepth)
672	return SDValue ();
673
674	// Ignore UNDEFs.
675	if (Op.isUndef())
676	return SDValue ();
677
678	// Not demanding any bits/elts from Op.
679	if (DemandedBits == `0` \|\| DemandedElts == `0`)
680	return DAG.getUNDEF(VT);
681
682	bool IsLE = DAG.getDataLayout().isLittleEndian();
683	unsigned NumElts = DemandedElts.getBitWidth();
684	unsigned BitWidth = DemandedBits.getBitWidth();
685	KnownBits LHSKnown, RHSKnown;
686	switch (Op.getOpcode()) {
687	case ISD::BITCAST: {
688	if (VT.isScalableVector())
689	return SDValue ();
690
691	SDValue Src = peekThroughBitcasts(V: Op.getOperand(i: `0`));
692	EVT SrcVT = Src.getValueType();
693	EVT DstVT = Op.getValueType();
694	if (SrcVT == DstVT)
695	return Src;
696
697	unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
698	unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
699	if (NumSrcEltBits == NumDstEltBits)
700	if (SDValue V = SimplifyMultipleUseDemandedBits(
701	Op: Src, DemandedBits, DemandedElts, DAG, Depth: Depth + `1`))
702	return DAG.getBitcast(VT: DstVT, V);
703
704	if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == `0`) {
705	unsigned Scale = NumDstEltBits / NumSrcEltBits;
706	unsigned NumSrcElts = SrcVT.getVectorNumElements();
707	APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
708	APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
709	for (unsigned i = `0`; i != Scale; ++i) {
710	unsigned EltOffset = IsLE ? i : (Scale - `1` - i);
711	unsigned BitOffset = EltOffset * NumSrcEltBits;
712	APInt Sub = DemandedBits.extractBits(numBits: NumSrcEltBits, bitPosition: BitOffset);
713	if (!Sub.isZero()) {
714	DemandedSrcBits \|= Sub;
715	for (unsigned j = `0`; j != NumElts; ++j)
716	if (DemandedElts [j])
717	DemandedSrcElts.setBit((j * Scale) + i);
718	}
719	}
720
721	if (SDValue V = SimplifyMultipleUseDemandedBits(
722	Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG, Depth: Depth + `1`))
723	return DAG.getBitcast(VT: DstVT, V);
724	}
725
726	// TODO - bigendian once we have test coverage.
727	if (IsLE && (NumSrcEltBits % NumDstEltBits) == `0`) {
728	unsigned Scale = NumSrcEltBits / NumDstEltBits;
729	unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : `1`;
730	APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
731	APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
732	for (unsigned i = `0`; i != NumElts; ++i)
733	if (DemandedElts [i]) {
734	unsigned Offset = (i % Scale) * NumDstEltBits;
735	DemandedSrcBits.insertBits(SubBits: DemandedBits, bitPosition: Offset);
736	DemandedSrcElts.setBit(i / Scale);
737	}
738
739	if (SDValue V = SimplifyMultipleUseDemandedBits(
740	Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG, Depth: Depth + `1`))
741	return DAG.getBitcast(VT: DstVT, V);
742	}
743
744	break;
745	}
746	case ISD::FREEZE: {
747	SDValue N0 = Op.getOperand(i: `0`);
748	if (DAG.isGuaranteedNotToBeUndefOrPoison(Op: N0, DemandedElts,
749	/PoisonOnly=/false))
750	return N0;
751	break;
752	}
753	case ISD::AND: {
754	LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
755	RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `1`), DemandedElts, Depth: Depth + `1`);
756
757	// If all of the demanded bits are known 1 on one side, return the other.
758	// These bits cannot contribute to the result of the 'and' in this
759	// context.
760	if (DemandedBits.isSubsetOf(RHS: LHSKnown.Zero \| RHSKnown.One))
761	return Op.getOperand(i: `0`);
762	if (DemandedBits.isSubsetOf(RHS: RHSKnown.Zero \| LHSKnown.One))
763	return Op.getOperand(i: `1`);
764	break;
765	}
766	case ISD::OR: {
767	LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
768	RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `1`), DemandedElts, Depth: Depth + `1`);
769
770	// If all of the demanded bits are known zero on one side, return the
771	// other. These bits cannot contribute to the result of the 'or' in this
772	// context.
773	if (DemandedBits.isSubsetOf(RHS: LHSKnown.One \| RHSKnown.Zero))
774	return Op.getOperand(i: `0`);
775	if (DemandedBits.isSubsetOf(RHS: RHSKnown.One \| LHSKnown.Zero))
776	return Op.getOperand(i: `1`);
777	break;
778	}
779	case ISD::XOR: {
780	LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
781	RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `1`), DemandedElts, Depth: Depth + `1`);
782
783	// If all of the demanded bits are known zero on one side, return the
784	// other.
785	if (DemandedBits.isSubsetOf(RHS: RHSKnown.Zero))
786	return Op.getOperand(i: `0`);
787	if (DemandedBits.isSubsetOf(RHS: LHSKnown.Zero))
788	return Op.getOperand(i: `1`);
789	break;
790	}
791	case ISD::SHL: {
792	// If we are only demanding sign bits then we can use the shift source
793	// directly.
794	if (const APInt *MaxSA =
795	DAG.getValidMaximumShiftAmountConstant(V: Op, DemandedElts)) {
796	SDValue Op0 = Op.getOperand(i: `0`);
797	unsigned ShAmt = MaxSA->getZExtValue();
798	unsigned NumSignBits =
799	DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
800	unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
801	if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
802	return Op0;
803	}
804	break;
805	}
806	case ISD::SETCC: {
807	SDValue Op0 = Op.getOperand(i: `0`);
808	SDValue Op1 = Op.getOperand(i: `1`);
809	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: `2`))->get();
810	// If (1) we only need the sign-bit, (2) the setcc operands are the same
811	// width as the setcc result, and (3) the result of a setcc conforms to 0 or
812	// -1, we may be able to bypass the setcc.
813	if (DemandedBits.isSignMask() &&
814	Op0.getScalarValueSizeInBits() == BitWidth &&
815	getBooleanContents(Type: Op0.getValueType()) ==
816	BooleanContent::ZeroOrNegativeOneBooleanContent) {
817	// If we're testing X < 0, then this compare isn't needed - just use X!
818	// FIXME: We're limiting to integer types here, but this should also work
819	// if we don't care about FP signed-zero. The use of SETLT with FP means
820	// that we don't care about NaNs.
821	if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
822	(isNullConstant(V: Op1) \|\| ISD::isBuildVectorAllZeros(N: Op1.getNode())))
823	return Op0;
824	}
825	break;
826	}
827	case ISD::SIGN_EXTEND_INREG: {
828	// If none of the extended bits are demanded, eliminate the sextinreg.
829	SDValue Op0 = Op.getOperand(i: `0`);
830	EVT ExVT = cast<VTSDNode>(Val: Op.getOperand(i: `1`))->getVT();
831	unsigned ExBits = ExVT.getScalarSizeInBits();
832	if (DemandedBits.getActiveBits() <= ExBits &&
833	shouldRemoveRedundantExtend(Op))
834	return Op0;
835	// If the input is already sign extended, just drop the extension.
836	unsigned NumSignBits = DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
837	if (NumSignBits >= (BitWidth - ExBits + `1`))
838	return Op0;
839	break;
840	}
841	case ISD::ANY_EXTEND_VECTOR_INREG:
842	case ISD::SIGN_EXTEND_VECTOR_INREG:
843	case ISD::ZERO_EXTEND_VECTOR_INREG: {
844	if (VT.isScalableVector())
845	return SDValue ();
846
847	// If we only want the lowest element and none of extended bits, then we can
848	// return the bitcasted source vector.
849	SDValue Src = Op.getOperand(i: `0`);
850	EVT SrcVT = Src.getValueType();
851	EVT DstVT = Op.getValueType();
852	if (IsLE && DemandedElts == `1` &&
853	DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
854	DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
855	return DAG.getBitcast(VT: DstVT, V: Src);
856	}
857	break;
858	}
859	case ISD::INSERT_VECTOR_ELT: {
860	if (VT.isScalableVector())
861	return SDValue ();
862
863	// If we don't demand the inserted element, return the base vector.
864	SDValue Vec = Op.getOperand(i: `0`);
865	auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `2`));
866	EVT VecVT = Vec.getValueType();
867	if (CIdx && CIdx->getAPIntValue().ult(RHS: VecVT.getVectorNumElements()) &&
868	!DemandedElts [CIdx->getZExtValue()])
869	return Vec;
870	break;
871	}
872	case ISD::INSERT_SUBVECTOR: {
873	if (VT.isScalableVector())
874	return SDValue ();
875
876	SDValue Vec = Op.getOperand(i: `0`);
877	SDValue Sub = Op.getOperand(i: `1`);
878	uint64_t Idx = Op.getConstantOperandVal(i: `2`);
879	unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
880	APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
881	// If we don't demand the inserted subvector, return the base vector.
882	if (DemandedSubElts == `0`)
883	return Vec;
884	break;
885	}
886	case ISD::VECTOR_SHUFFLE: {
887	assert(!VT.isScalableVector());
888	ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
889
890	// If all the demanded elts are from one operand and are inline,
891	// then we can use the operand directly.
892	bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
893	for (unsigned i = `0`; i != NumElts; ++i) {
894	int M = ShuffleMask [i];
895	if (M < `0` \|\| !DemandedElts [i])
896	continue;
897	AllUndef = false;
898	IdentityLHS &= (M == (int)i);
899	IdentityRHS &= ((M - NumElts) == i);
900	}
901
902	if (AllUndef)
903	return DAG.getUNDEF(VT: Op.getValueType());
904	if (IdentityLHS)
905	return Op.getOperand(i: `0`);
906	if (IdentityRHS)
907	return Op.getOperand(i: `1`);
908	break;
909	}
910	default:
911	// TODO: Probably okay to remove after audit; here to reduce change size
912	// in initial enablement patch for scalable vectors
913	if (VT.isScalableVector())
914	return SDValue ();
915
916	if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
917	if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
918	Op, DemandedBits, DemandedElts, DAG, Depth))
919	return V;
920	break;
921	}
922	return SDValue ();
923	}
924
925	SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
926	SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
927	unsigned Depth) const {
928	EVT VT = Op.getValueType();
929	// Since the number of lanes in a scalable vector is unknown at compile time,
930	// we track one bit which is implicitly broadcast to all lanes. This means
931	// that all lanes in a scalable vector are considered demanded.
932	APInt DemandedElts = VT.isFixedLengthVector()
933	? APInt::getAllOnes(numBits: VT.getVectorNumElements())
934	: APInt (`1`, `1`);
935	return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
936	Depth);
937	}
938
939	SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
940	SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
941	unsigned Depth) const {
942	APInt DemandedBits = APInt::getAllOnes(numBits: Op.getScalarValueSizeInBits());
943	return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
944	Depth);
945	}
946
947	// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
948	// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
949	static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
950	const TargetLowering &TLI,
951	const APInt &DemandedBits,
952	const APInt &DemandedElts,
953	unsigned Depth) {
954	assert((Op.getOpcode() == ISD::SRL \|\| Op.getOpcode() == ISD::SRA) &&
955	"SRL or SRA node is required here!");
956	// Is the right shift using an immediate value of 1?
957	ConstantSDNode *N1C = isConstOrConstSplat(N: Op.getOperand(i: `1`), DemandedElts);
958	if (!N1C \|\| !N1C->isOne())
959	return SDValue ();
960
961	// We are looking for an avgfloor
962	// add(ext, ext)
963	// or one of these as a avgceil
964	// add(add(ext, ext), 1)
965	// add(add(ext, 1), ext)
966	// add(ext, add(ext, 1))
967	SDValue Add = Op.getOperand(i: `0`);
968	if (Add.getOpcode() != ISD::ADD)
969	return SDValue ();
970
971	SDValue ExtOpA = Add.getOperand(i: `0`);
972	SDValue ExtOpB = Add.getOperand(i: `1`);
973	SDValue Add2;
974	auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
975	ConstantSDNode *ConstOp;
976	if ((ConstOp = isConstOrConstSplat(N: Op2, DemandedElts)) &&
977	ConstOp->isOne()) {
978	ExtOpA = Op1;
979	ExtOpB = Op3;
980	Add2 = A;
981	return true;
982	}
983	if ((ConstOp = isConstOrConstSplat(N: Op3, DemandedElts)) &&
984	ConstOp->isOne()) {
985	ExtOpA = Op1;
986	ExtOpB = Op2;
987	Add2 = A;
988	return true;
989	}
990	return false;
991	};
992	bool IsCeil =
993	(ExtOpA.getOpcode() == ISD::ADD &&
994	MatchOperands (ExtOpA.getOperand(i: `0`), ExtOpA.getOperand(i: `1`), ExtOpB, ExtOpA)) \|\|
995	(ExtOpB.getOpcode() == ISD::ADD &&
996	MatchOperands (ExtOpB.getOperand(i: `0`), ExtOpB.getOperand(i: `1`), ExtOpA, ExtOpB));
997
998	// If the shift is signed (sra):
999	// - Needs >= 2 sign bit for both operands.
1000	// - Needs >= 2 zero bits.
1001	// If the shift is unsigned (srl):
1002	// - Needs >= 1 zero bit for both operands.
1003	// - Needs 1 demanded bit zero and >= 2 sign bits.
1004	unsigned ShiftOpc = Op.getOpcode();
1005	bool IsSigned = false;
1006	unsigned KnownBits;
1007	unsigned NumSignedA = DAG.ComputeNumSignBits(Op: ExtOpA, DemandedElts, Depth);
1008	unsigned NumSignedB = DAG.ComputeNumSignBits(Op: ExtOpB, DemandedElts, Depth);
1009	unsigned NumSigned = std::min(a: NumSignedA, b: NumSignedB) - `1`;
1010	unsigned NumZeroA =
1011	DAG.computeKnownBits(Op: ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1012	unsigned NumZeroB =
1013	DAG.computeKnownBits(Op: ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1014	unsigned NumZero = std::min(a: NumZeroA, b: NumZeroB);
1015
1016	switch (ShiftOpc) {
1017	default:
1018	llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1019	case ISD::SRA: {
1020	if (NumZero >= `2` && NumSigned < NumZero) {
1021	IsSigned = false;
1022	KnownBits = NumZero;
1023	break;
1024	}
1025	if (NumSigned >= `1`) {
1026	IsSigned = true;
1027	KnownBits = NumSigned;
1028	break;
1029	}
1030	return SDValue ();
1031	}
1032	case ISD::SRL: {
1033	if (NumZero >= `1` && NumSigned < NumZero) {
1034	IsSigned = false;
1035	KnownBits = NumZero;
1036	break;
1037	}
1038	if (NumSigned >= `1` && DemandedBits.isSignBitClear()) {
1039	IsSigned = true;
1040	KnownBits = NumSigned;
1041	break;
1042	}
1043	return SDValue ();
1044	}
1045	}
1046
1047	unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1048	: (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1049
1050	// Find the smallest power-2 type that is legal for this vector size and
1051	// operation, given the original type size and the number of known sign/zero
1052	// bits.
1053	EVT VT = Op.getValueType();
1054	unsigned MinWidth =
1055	std::max<unsigned>(a: VT.getScalarSizeInBits() - KnownBits, b: `8`);
1056	EVT NVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: llvm::bit_ceil(Value: MinWidth));
1057	if (VT.isVector())
1058	NVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: NVT, EC: VT.getVectorElementCount());
1059	if (!TLI.isOperationLegalOrCustom(Op: AVGOpc, VT: NVT)) {
1060	// If we could not transform, and (both) adds are nuw/nsw, we can use the
1061	// larger type size to do the transform.
1062	if (!TLI.isOperationLegalOrCustom(Op: AVGOpc, VT))
1063	return SDValue ();
1064	if (DAG.willNotOverflowAdd(IsSigned, N0: Add.getOperand(i: `0`),
1065	N1: Add.getOperand(i: `1`)) &&
1066	(!Add2 \|\| DAG.willNotOverflowAdd(IsSigned, N0: Add2.getOperand(i: `0`),
1067	N1: Add2.getOperand(i: `1`))))
1068	NVT = VT;
1069	else
1070	return SDValue ();
1071	}
1072
1073	SDLoc DL(Op);
1074	SDValue ResultAVG =
1075	DAG.getNode(Opcode: AVGOpc, DL, VT: NVT, N1: DAG.getExtOrTrunc(IsSigned, Op: ExtOpA, DL, VT: NVT),
1076	N2: DAG.getExtOrTrunc(IsSigned, Op: ExtOpB, DL, VT: NVT));
1077	return DAG.getExtOrTrunc(IsSigned, Op: ResultAVG, DL, VT);
1078	}
1079
1080	/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1081	/// result of Op are ever used downstream. If we can use this information to
1082	/// simplify Op, create a new simplified DAG node and return true, returning the
1083	/// original and new nodes in Old and New. Otherwise, analyze the expression and
1084	/// return a mask of Known bits for the expression (used to simplify the
1085	/// caller). The Known bits may only be accurate for those bits in the
1086	/// OriginalDemandedBits and OriginalDemandedElts.
1087	bool TargetLowering::SimplifyDemandedBits(
1088	SDValue Op, const APInt &OriginalDemandedBits,
1089	const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1090	unsigned Depth, bool AssumeSingleUse) const {
1091	unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1092	assert(Op.getScalarValueSizeInBits() == BitWidth &&
1093	"Mask size mismatches value type size!");
1094
1095	// Don't know anything.
1096	Known = KnownBits (BitWidth);
1097
1098	EVT VT = Op.getValueType();
1099	bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1100	unsigned NumElts = OriginalDemandedElts.getBitWidth();
1101	assert((!VT.isFixedLengthVector() \|\| NumElts == VT.getVectorNumElements()) &&
1102	"Unexpected vector size");
1103
1104	APInt DemandedBits = OriginalDemandedBits;
1105	APInt DemandedElts = OriginalDemandedElts;
1106	SDLoc dl(Op);
1107
1108	// Undef operand.
1109	if (Op.isUndef())
1110	return false;
1111
1112	// We can't simplify target constants.
1113	if (Op.getOpcode() == ISD::TargetConstant)
1114	return false;
1115
1116	if (Op.getOpcode() == ISD::Constant) {
1117	// We know all of the bits for a constant!
1118	Known = KnownBits::makeConstant(C: Op ->getAsAPIntVal());
1119	return false;
1120	}
1121
1122	if (Op.getOpcode() == ISD::ConstantFP) {
1123	// We know all of the bits for a floating point constant!
1124	Known = KnownBits::makeConstant(
1125	C: cast<ConstantFPSDNode>(Val&: Op)->getValueAPF().bitcastToAPInt());
1126	return false;
1127	}
1128
1129	// Other users may use these bits.
1130	bool HasMultiUse = false;
1131	if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1132	if (Depth >= SelectionDAG::MaxRecursionDepth) {
1133	// Limit search depth.
1134	return false;
1135	}
1136	// Allow multiple uses, just set the DemandedBits/Elts to all bits.
1137	DemandedBits = APInt::getAllOnes(numBits: BitWidth);
1138	DemandedElts = APInt::getAllOnes(numBits: NumElts);
1139	HasMultiUse = true;
1140	} else if (OriginalDemandedBits == `0` \|\| OriginalDemandedElts == `0`) {
1141	// Not demanding any bits/elts from Op.
1142	return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
1143	} else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1144	// Limit search depth.
1145	return false;
1146	}
1147
1148	KnownBits Known2;
1149	switch (Op.getOpcode()) {
1150	case ISD::SCALAR_TO_VECTOR: {
1151	if (VT.isScalableVector())
1152	return false;
1153	if (!DemandedElts [`0`])
1154	return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
1155
1156	KnownBits SrcKnown;
1157	SDValue Src = Op.getOperand(i: `0`);
1158	unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1159	APInt SrcDemandedBits = DemandedBits.zext(width: SrcBitWidth);
1160	if (SimplifyDemandedBits(Op: Src, DemandedBits: SrcDemandedBits, Known&: SrcKnown, TLO, Depth: Depth + `1`))
1161	return true;
1162
1163	// Upper elements are undef, so only get the knownbits if we just demand
1164	// the bottom element.
1165	if (DemandedElts == `1`)
1166	Known = SrcKnown.anyextOrTrunc(BitWidth);
1167	break;
1168	}
1169	case ISD::BUILD_VECTOR:
1170	// Collect the known bits that are shared by every demanded element.
1171	// TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1172	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1173	return false; // Don't fall through, will infinitely loop.
1174	case ISD::SPLAT_VECTOR: {
1175	SDValue Scl = Op.getOperand(i: `0`);
1176	APInt DemandedSclBits = DemandedBits.zextOrTrunc(width: Scl.getValueSizeInBits());
1177	KnownBits KnownScl;
1178	if (SimplifyDemandedBits(Op: Scl, DemandedBits: DemandedSclBits, Known&: KnownScl, TLO, Depth: Depth + `1`))
1179	return true;
1180
1181	// Implicitly truncate the bits to match the official semantics of
1182	// SPLAT_VECTOR.
1183	Known = KnownScl.trunc(BitWidth);
1184	break;
1185	}
1186	case ISD::LOAD: {
1187	auto *LD = cast<LoadSDNode>(Val&: Op);
1188	if (getTargetConstantFromLoad(LD)) {
1189	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1190	return false; // Don't fall through, will infinitely loop.
1191	}
1192	if (ISD::isZEXTLoad(N: Op.getNode()) && Op.getResNo() == `0`) {
1193	// If this is a ZEXTLoad and we are looking at the loaded value.
1194	EVT MemVT = LD->getMemoryVT();
1195	unsigned MemBits = MemVT.getScalarSizeInBits();
1196	Known.Zero.setBitsFrom(MemBits);
1197	return false; // Don't fall through, will infinitely loop.
1198	}
1199	break;
1200	}
1201	case ISD::INSERT_VECTOR_ELT: {
1202	if (VT.isScalableVector())
1203	return false;
1204	SDValue Vec = Op.getOperand(i: `0`);
1205	SDValue Scl = Op.getOperand(i: `1`);
1206	auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `2`));
1207	EVT VecVT = Vec.getValueType();
1208
1209	// If index isn't constant, assume we need all vector elements AND the
1210	// inserted element.
1211	APInt DemandedVecElts(DemandedElts);
1212	if (CIdx && CIdx->getAPIntValue().ult(RHS: VecVT.getVectorNumElements())) {
1213	unsigned Idx = CIdx->getZExtValue();
1214	DemandedVecElts.clearBit(BitPosition: Idx);
1215
1216	// Inserted element is not required.
1217	if (!DemandedElts [Idx])
1218	return TLO.CombineTo(O: Op, N: Vec);
1219	}
1220
1221	KnownBits KnownScl;
1222	unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1223	APInt DemandedSclBits = DemandedBits.zextOrTrunc(width: NumSclBits);
1224	if (SimplifyDemandedBits(Op: Scl, DemandedBits: DemandedSclBits, Known&: KnownScl, TLO, Depth: Depth + `1`))
1225	return true;
1226
1227	Known = KnownScl.anyextOrTrunc(BitWidth);
1228
1229	KnownBits KnownVec;
1230	if (SimplifyDemandedBits(Op: Vec, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedVecElts, Known&: KnownVec, TLO,
1231	Depth: Depth + `1`))
1232	return true;
1233
1234	if (!!DemandedVecElts)
1235	Known = Known.intersectWith(RHS: KnownVec);
1236
1237	return false;
1238	}
1239	case ISD::INSERT_SUBVECTOR: {
1240	if (VT.isScalableVector())
1241	return false;
1242	// Demand any elements from the subvector and the remainder from the src its
1243	// inserted into.
1244	SDValue Src = Op.getOperand(i: `0`);
1245	SDValue Sub = Op.getOperand(i: `1`);
1246	uint64_t Idx = Op.getConstantOperandVal(i: `2`);
1247	unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1248	APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
1249	APInt DemandedSrcElts = DemandedElts;
1250	DemandedSrcElts.insertBits(SubBits: APInt::getZero(numBits: NumSubElts), bitPosition: Idx);
1251
1252	KnownBits KnownSub, KnownSrc;
1253	if (SimplifyDemandedBits(Op: Sub, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSubElts, Known&: KnownSub, TLO,
1254	Depth: Depth + `1`))
1255	return true;
1256	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSrcElts, Known&: KnownSrc, TLO,
1257	Depth: Depth + `1`))
1258	return true;
1259
1260	Known.Zero.setAllBits();
1261	Known.One.setAllBits();
1262	if (!!DemandedSubElts)
1263	Known = Known.intersectWith(RHS: KnownSub);
1264	if (!!DemandedSrcElts)
1265	Known = Known.intersectWith(RHS: KnownSrc);
1266
1267	// Attempt to avoid multi-use src if we don't need anything from it.
1268	if (!DemandedBits.isAllOnes() \|\| !DemandedSubElts.isAllOnes() \|\|
1269	!DemandedSrcElts.isAllOnes()) {
1270	SDValue NewSub = SimplifyMultipleUseDemandedBits(
1271	Op: Sub, DemandedBits, DemandedElts: DemandedSubElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1272	SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1273	Op: Src, DemandedBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1274	if (NewSub \|\| NewSrc) {
1275	NewSub = NewSub ? NewSub : Sub;
1276	NewSrc = NewSrc ? NewSrc : Src;
1277	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: NewSrc, N2: NewSub,
1278	N3: Op.getOperand(i: `2`));
1279	return TLO.CombineTo(O: Op, N: NewOp);
1280	}
1281	}
1282	break;
1283	}
1284	case ISD::EXTRACT_SUBVECTOR: {
1285	if (VT.isScalableVector())
1286	return false;
1287	// Offset the demanded elts by the subvector index.
1288	SDValue Src = Op.getOperand(i: `0`);
1289	if (Src.getValueType().isScalableVector())
1290	break;
1291	uint64_t Idx = Op.getConstantOperandVal(i: `1`);
1292	unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1293	APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx);
1294
1295	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSrcElts, Known, TLO,
1296	Depth: Depth + `1`))
1297	return true;
1298
1299	// Attempt to avoid multi-use src if we don't need anything from it.
1300	if (!DemandedBits.isAllOnes() \|\| !DemandedSrcElts.isAllOnes()) {
1301	SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1302	Op: Src, DemandedBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1303	if (DemandedSrc) {
1304	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedSrc,
1305	N2: Op.getOperand(i: `1`));
1306	return TLO.CombineTo(O: Op, N: NewOp);
1307	}
1308	}
1309	break;
1310	}
1311	case ISD::CONCAT_VECTORS: {
1312	if (VT.isScalableVector())
1313	return false;
1314	Known.Zero.setAllBits();
1315	Known.One.setAllBits();
1316	EVT SubVT = Op.getOperand(i: `0`).getValueType();
1317	unsigned NumSubVecs = Op.getNumOperands();
1318	unsigned NumSubElts = SubVT.getVectorNumElements();
1319	for (unsigned i = `0`; i != NumSubVecs; ++i) {
1320	APInt DemandedSubElts =
1321	DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
1322	if (SimplifyDemandedBits(Op: Op.getOperand(i), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSubElts,
1323	Known&: Known2, TLO, Depth: Depth + `1`))
1324	return true;
1325	// Known bits are shared by every demanded subvector element.
1326	if (!!DemandedSubElts)
1327	Known = Known.intersectWith(RHS: Known2);
1328	}
1329	break;
1330	}
1331	case ISD::VECTOR_SHUFFLE: {
1332	assert(!VT.isScalableVector());
1333	ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
1334
1335	// Collect demanded elements from shuffle operands..
1336	APInt DemandedLHS, DemandedRHS;
1337	if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: ShuffleMask, DemandedElts, DemandedLHS,
1338	DemandedRHS))
1339	break;
1340
1341	if (!!DemandedLHS \|\| !!DemandedRHS) {
1342	SDValue Op0 = Op.getOperand(i: `0`);
1343	SDValue Op1 = Op.getOperand(i: `1`);
1344
1345	Known.Zero.setAllBits();
1346	Known.One.setAllBits();
1347	if (!!DemandedLHS) {
1348	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedLHS, Known&: Known2, TLO,
1349	Depth: Depth + `1`))
1350	return true;
1351	Known = Known.intersectWith(RHS: Known2);
1352	}
1353	if (!!DemandedRHS) {
1354	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedRHS, Known&: Known2, TLO,
1355	Depth: Depth + `1`))
1356	return true;
1357	Known = Known.intersectWith(RHS: Known2);
1358	}
1359
1360	// Attempt to avoid multi-use ops if we don't need anything from them.
1361	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1362	Op: Op0, DemandedBits, DemandedElts: DemandedLHS, DAG&: TLO.DAG, Depth: Depth + `1`);
1363	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1364	Op: Op1, DemandedBits, DemandedElts: DemandedRHS, DAG&: TLO.DAG, Depth: Depth + `1`);
1365	if (DemandedOp0 \|\| DemandedOp1) {
1366	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1367	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1368	SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, N1: Op0, N2: Op1, Mask: ShuffleMask);
1369	return TLO.CombineTo(O: Op, N: NewOp);
1370	}
1371	}
1372	break;
1373	}
1374	case ISD::AND: {
1375	SDValue Op0 = Op.getOperand(i: `0`);
1376	SDValue Op1 = Op.getOperand(i: `1`);
1377
1378	// If the RHS is a constant, check to see if the LHS would be zero without
1379	// using the bits from the RHS. Below, we use knowledge about the RHS to
1380	// simplify the LHS, here we're using information from the LHS to simplify
1381	// the RHS.
1382	if (ConstantSDNode *RHSC = isConstOrConstSplat(N: Op1)) {
1383	// Do not increment Depth here; that can cause an infinite loop.
1384	KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op: Op0, DemandedElts, Depth);
1385	// If the LHS already has zeros where RHSC does, this 'and' is dead.
1386	if ((LHSKnown.Zero & DemandedBits) ==
1387	(~RHSC->getAPIntValue() & DemandedBits))
1388	return TLO.CombineTo(O: Op, N: Op0);
1389
1390	// If any of the set bits in the RHS are known zero on the LHS, shrink
1391	// the constant.
1392	if (ShrinkDemandedConstant(Op, DemandedBits: ~LHSKnown.Zero & DemandedBits,
1393	DemandedElts, TLO))
1394	return true;
1395
1396	// Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1397	// constant, but if this 'and' is only clearing bits that were just set by
1398	// the xor, then this 'and' can be eliminated by shrinking the mask of
1399	// the xor. For example, for a 32-bit X:
1400	// and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1401	if (isBitwiseNot(V: Op0) && Op0.hasOneUse() &&
1402	LHSKnown.One == ~RHSC->getAPIntValue()) {
1403	SDValue Xor = TLO.DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op0.getOperand(i: `0`), N2: Op1);
1404	return TLO.CombineTo(O: Op, N: Xor);
1405	}
1406	}
1407
1408	// AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1409	// iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1410	if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1411	(Op0.getOperand(i: `0`).isUndef() \|\|
1412	ISD::isBuildVectorOfConstantSDNodes(N: Op0.getOperand(i: `0`).getNode())) &&
1413	Op0 ->hasOneUse()) {
1414	unsigned NumSubElts =
1415	Op0.getOperand(i: `1`).getValueType().getVectorNumElements();
1416	unsigned SubIdx = Op0.getConstantOperandVal(i: `2`);
1417	APInt DemandedSub =
1418	APInt::getBitsSet(numBits: NumElts, loBit: SubIdx, hiBit: SubIdx + NumSubElts);
1419	KnownBits KnownSubMask =
1420	TLO.DAG.computeKnownBits(Op: Op1, DemandedElts: DemandedSub & DemandedElts, Depth: Depth + `1`);
1421	if (DemandedBits.isSubsetOf(RHS: KnownSubMask.One)) {
1422	SDValue NewAnd =
1423	TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op0.getOperand(i: `0`), N2: Op1);
1424	SDValue NewInsert =
1425	TLO.DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL: dl, VT, N1: NewAnd,
1426	N2: Op0.getOperand(i: `1`), N3: Op0.getOperand(i: `2`));
1427	return TLO.CombineTo(O: Op, N: NewInsert);
1428	}
1429	}
1430
1431	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1432	Depth: Depth + `1`))
1433	return true;
1434	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1435	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~Known.Zero & DemandedBits, OriginalDemandedElts: DemandedElts,
1436	Known&: Known2, TLO, Depth: Depth + `1`))
1437	return true;
1438	assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1439
1440	// If all of the demanded bits are known one on one side, return the other.
1441	// These bits cannot contribute to the result of the 'and'.
1442	if (DemandedBits.isSubsetOf(RHS: Known2.Zero \| Known.One))
1443	return TLO.CombineTo(O: Op, N: Op0);
1444	if (DemandedBits.isSubsetOf(RHS: Known.Zero \| Known2.One))
1445	return TLO.CombineTo(O: Op, N: Op1);
1446	// If all of the demanded bits in the inputs are known zeros, return zero.
1447	if (DemandedBits.isSubsetOf(RHS: Known.Zero \| Known2.Zero))
1448	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: `0`, DL: dl, VT));
1449	// If the RHS is a constant, see if we can simplify it.
1450	if (ShrinkDemandedConstant(Op, DemandedBits: ~Known2.Zero & DemandedBits, DemandedElts,
1451	TLO))
1452	return true;
1453	// If the operation can be done in a smaller type, do so.
1454	if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1455	return true;
1456
1457	// Attempt to avoid multi-use ops if we don't need anything from them.
1458	if (!DemandedBits.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
1459	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1460	Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1461	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1462	Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1463	if (DemandedOp0 \|\| DemandedOp1) {
1464	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1465	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1466	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1467	return TLO.CombineTo(O: Op, N: NewOp);
1468	}
1469	}
1470
1471	Known &= Known2;
1472	break;
1473	}
1474	case ISD::OR: {
1475	SDValue Op0 = Op.getOperand(i: `0`);
1476	SDValue Op1 = Op.getOperand(i: `1`);
1477	SDNodeFlags Flags = Op.getNode()->getFlags();
1478	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1479	Depth: Depth + `1`)) {
1480	if (Flags.hasDisjoint()) {
1481	Flags.setDisjoint(false);
1482	Op ->setFlags(Flags);
1483	}
1484	return true;
1485	}
1486	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1487	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~Known.One & DemandedBits, OriginalDemandedElts: DemandedElts,
1488	Known&: Known2, TLO, Depth: Depth + `1`)) {
1489	if (Flags.hasDisjoint()) {
1490	Flags.setDisjoint(false);
1491	Op ->setFlags(Flags);
1492	}
1493	return true;
1494	}
1495	assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1496
1497	// If all of the demanded bits are known zero on one side, return the other.
1498	// These bits cannot contribute to the result of the 'or'.
1499	if (DemandedBits.isSubsetOf(RHS: Known2.One \| Known.Zero))
1500	return TLO.CombineTo(O: Op, N: Op0);
1501	if (DemandedBits.isSubsetOf(RHS: Known.One \| Known2.Zero))
1502	return TLO.CombineTo(O: Op, N: Op1);
1503	// If the RHS is a constant, see if we can simplify it.
1504	if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1505	return true;
1506	// If the operation can be done in a smaller type, do so.
1507	if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1508	return true;
1509
1510	// Attempt to avoid multi-use ops if we don't need anything from them.
1511	if (!DemandedBits.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
1512	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1513	Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1514	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1515	Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1516	if (DemandedOp0 \|\| DemandedOp1) {
1517	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1518	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1519	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1520	return TLO.CombineTo(O: Op, N: NewOp);
1521	}
1522	}
1523
1524	// (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1\|C2), (and Y, C2))
1525	// TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1526	if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1527	Op0 ->hasOneUse() && Op1 ->hasOneUse()) {
1528	// Attempt to match all commutations - m_c_Or would've been useful!
1529	for (int I = `0`; I != `2`; ++I) {
1530	SDValue X = Op.getOperand(i: I).getOperand(i: `0`);
1531	SDValue C1 = Op.getOperand(i: I).getOperand(i: `1`);
1532	SDValue Alt = Op.getOperand(i: `1` - I).getOperand(i: `0`);
1533	SDValue C2 = Op.getOperand(i: `1` - I).getOperand(i: `1`);
1534	if (Alt.getOpcode() == ISD::OR) {
1535	for (int J = `0`; J != `2`; ++J) {
1536	if (X == Alt.getOperand(i: J)) {
1537	SDValue Y = Alt.getOperand(i: `1` - J);
1538	if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(Opcode: ISD::OR, DL: dl, VT,
1539	Ops: {C1, C2})) {
1540	SDValue MaskX = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: X, N2: C12);
1541	SDValue MaskY = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Y, N2: C2);
1542	return TLO.CombineTo(
1543	O: Op, N: TLO.DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: MaskX, N2: MaskY));
1544	}
1545	}
1546	}
1547	}
1548	}
1549	}
1550
1551	Known \|= Known2;
1552	break;
1553	}
1554	case ISD::XOR: {
1555	SDValue Op0 = Op.getOperand(i: `0`);
1556	SDValue Op1 = Op.getOperand(i: `1`);
1557
1558	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1559	Depth: Depth + `1`))
1560	return true;
1561	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1562	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
1563	Depth: Depth + `1`))
1564	return true;
1565	assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1566
1567	// If all of the demanded bits are known zero on one side, return the other.
1568	// These bits cannot contribute to the result of the 'xor'.
1569	if (DemandedBits.isSubsetOf(RHS: Known.Zero))
1570	return TLO.CombineTo(O: Op, N: Op0);
1571	if (DemandedBits.isSubsetOf(RHS: Known2.Zero))
1572	return TLO.CombineTo(O: Op, N: Op1);
1573	// If the operation can be done in a smaller type, do so.
1574	if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1575	return true;
1576
1577	// If all of the unknown bits are known to be zero on one side or the other
1578	// turn this into an inclusive* or.*
1579	// e.g. (A & C1)^(B & C2) -> (A & C1)\|(B & C2) iff C1&C2 == 0
1580	if (DemandedBits.isSubsetOf(RHS: Known.Zero \| Known2.Zero))
1581	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Op0, N2: Op1));
1582
1583	ConstantSDNode *C = isConstOrConstSplat(N: Op1, DemandedElts);
1584	if (C) {
1585	// If one side is a constant, and all of the set bits in the constant are
1586	// also known set on the other side, turn this into an AND, as we know
1587	// the bits will be cleared.
1588	// e.g. (X \| C1) ^ C2 --> (X \| C1) & ~C2 iff (C1&C2) == C2
1589	// NB: it is okay if more bits are known than are requested
1590	if (C->getAPIntValue() == Known2.One) {
1591	SDValue ANDC =
1592	TLO.DAG.getConstant(Val: ~C->getAPIntValue() & DemandedBits, DL: dl, VT);
1593	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op0, N2: ANDC));
1594	}
1595
1596	// If the RHS is a constant, see if we can change it. Don't alter a -1
1597	// constant because that's a 'not' op, and that is better for combining
1598	// and codegen.
1599	if (!C->isAllOnes() && DemandedBits.isSubsetOf(RHS: C->getAPIntValue())) {
1600	// We're flipping all demanded bits. Flip the undemanded bits too.
1601	SDValue New = TLO.DAG.getNOT(DL: dl, Val: Op0, VT);
1602	return TLO.CombineTo(O: Op, N: New);
1603	}
1604
1605	unsigned Op0Opcode = Op0.getOpcode();
1606	if ((Op0Opcode == ISD::SRL \|\| Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1607	if (ConstantSDNode *ShiftC =
1608	isConstOrConstSplat(N: Op0.getOperand(i: `1`), DemandedElts)) {
1609	// Don't crash on an oversized shift. We can not guarantee that a
1610	// bogus shift has been simplified to undef.
1611	if (ShiftC->getAPIntValue().ult(RHS: BitWidth)) {
1612	uint64_t ShiftAmt = ShiftC->getZExtValue();
1613	APInt Ones = APInt::getAllOnes(numBits: BitWidth);
1614	Ones = Op0Opcode == ISD::SHL ? Ones.shl(shiftAmt: ShiftAmt)
1615	: Ones.lshr(shiftAmt: ShiftAmt);
1616	const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
1617	if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1618	TLI.isDesirableToCommuteXorWithShift(N: Op.getNode())) {
1619	// If the xor constant is a demanded mask, do a 'not' before the
1620	// shift:
1621	// xor (X << ShiftC), XorC --> (not X) << ShiftC
1622	// xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1623	SDValue Not = TLO.DAG.getNOT(DL: dl, Val: Op0.getOperand(i: `0`), VT);
1624	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op0Opcode, DL: dl, VT, N1: Not,
1625	N2: Op0.getOperand(i: `1`)));
1626	}
1627	}
1628	}
1629	}
1630	}
1631
1632	// If we can't turn this into a 'not', try to shrink the constant.
1633	if (!C \|\| !C->isAllOnes())
1634	if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1635	return true;
1636
1637	// Attempt to avoid multi-use ops if we don't need anything from them.
1638	if (!DemandedBits.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
1639	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1640	Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1641	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1642	Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1643	if (DemandedOp0 \|\| DemandedOp1) {
1644	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1645	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1646	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1647	return TLO.CombineTo(O: Op, N: NewOp);
1648	}
1649	}
1650
1651	Known ^= Known2;
1652	break;
1653	}
1654	case ISD::SELECT:
1655	if (SimplifyDemandedBits(Op: Op.getOperand(i: `2`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1656	Known, TLO, Depth: Depth + `1`))
1657	return true;
1658	if (SimplifyDemandedBits(Op: Op.getOperand(i: `1`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1659	Known&: Known2, TLO, Depth: Depth + `1`))
1660	return true;
1661	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1662	assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1663
1664	// If the operands are constants, see if we can simplify them.
1665	if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1666	return true;
1667
1668	// Only known if known in both the LHS and RHS.
1669	Known = Known.intersectWith(RHS: Known2);
1670	break;
1671	case ISD::VSELECT:
1672	if (SimplifyDemandedBits(Op: Op.getOperand(i: `2`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1673	Known, TLO, Depth: Depth + `1`))
1674	return true;
1675	if (SimplifyDemandedBits(Op: Op.getOperand(i: `1`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1676	Known&: Known2, TLO, Depth: Depth + `1`))
1677	return true;
1678	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1679	assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1680
1681	// Only known if known in both the LHS and RHS.
1682	Known = Known.intersectWith(RHS: Known2);
1683	break;
1684	case ISD::SELECT_CC:
1685	if (SimplifyDemandedBits(Op: Op.getOperand(i: `3`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1686	Known, TLO, Depth: Depth + `1`))
1687	return true;
1688	if (SimplifyDemandedBits(Op: Op.getOperand(i: `2`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1689	Known&: Known2, TLO, Depth: Depth + `1`))
1690	return true;
1691	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1692	assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1693
1694	// If the operands are constants, see if we can simplify them.
1695	if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1696	return true;
1697
1698	// Only known if known in both the LHS and RHS.
1699	Known = Known.intersectWith(RHS: Known2);
1700	break;
1701	case ISD::SETCC: {
1702	SDValue Op0 = Op.getOperand(i: `0`);
1703	SDValue Op1 = Op.getOperand(i: `1`);
1704	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: `2`))->get();
1705	// If (1) we only need the sign-bit, (2) the setcc operands are the same
1706	// width as the setcc result, and (3) the result of a setcc conforms to 0 or
1707	// -1, we may be able to bypass the setcc.
1708	if (DemandedBits.isSignMask() &&
1709	Op0.getScalarValueSizeInBits() == BitWidth &&
1710	getBooleanContents(Type: Op0.getValueType()) ==
1711	BooleanContent::ZeroOrNegativeOneBooleanContent) {
1712	// If we're testing X < 0, then this compare isn't needed - just use X!
1713	// FIXME: We're limiting to integer types here, but this should also work
1714	// if we don't care about FP signed-zero. The use of SETLT with FP means
1715	// that we don't care about NaNs.
1716	if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1717	(isNullConstant(V: Op1) \|\| ISD::isBuildVectorAllZeros(N: Op1.getNode())))
1718	return TLO.CombineTo(O: Op, N: Op0);
1719
1720	// TODO: Should we check for other forms of sign-bit comparisons?
1721	// Examples: X <= -1, X >= 0
1722	}
1723	if (getBooleanContents(Type: Op0.getValueType()) ==
1724	TargetLowering::ZeroOrOneBooleanContent &&
1725	BitWidth > `1`)
1726	Known.Zero.setBitsFrom(`1`);
1727	break;
1728	}
1729	case ISD::SHL: {
1730	SDValue Op0 = Op.getOperand(i: `0`);
1731	SDValue Op1 = Op.getOperand(i: `1`);
1732	EVT ShiftVT = Op1.getValueType();
1733
1734	if (const APInt *SA =
1735	TLO.DAG.getValidShiftAmountConstant(V: Op, DemandedElts)) {
1736	unsigned ShAmt = SA->getZExtValue();
1737	if (ShAmt == `0`)
1738	return TLO.CombineTo(O: Op, N: Op0);
1739
1740	// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1741	// single shift. We can do this if the bottom bits (which are shifted
1742	// out) are never demanded.
1743	// TODO - support non-uniform vector amounts.
1744	if (Op0.getOpcode() == ISD::SRL) {
1745	if (!DemandedBits.intersects(RHS: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ShAmt))) {
1746	if (const APInt *SA2 =
1747	TLO.DAG.getValidShiftAmountConstant(V: Op0, DemandedElts)) {
1748	unsigned C1 = SA2->getZExtValue();
1749	unsigned Opc = ISD::SHL;
1750	int Diff = ShAmt - C1;
1751	if (Diff < `0`) {
1752	Diff = -Diff;
1753	Opc = ISD::SRL;
1754	}
1755	SDValue NewSA = TLO.DAG.getConstant(Val: Diff, DL: dl, VT: ShiftVT);
1756	return TLO.CombineTo(
1757	O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, N1: Op0.getOperand(i: `0`), N2: NewSA));
1758	}
1759	}
1760	}
1761
1762	// Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1763	// are not demanded. This will likely allow the anyext to be folded away.
1764	// TODO - support non-uniform vector amounts.
1765	if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1766	SDValue InnerOp = Op0.getOperand(i: `0`);
1767	EVT InnerVT = InnerOp.getValueType();
1768	unsigned InnerBits = InnerVT.getScalarSizeInBits();
1769	if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1770	isTypeDesirableForOp(ISD::SHL, VT: InnerVT)) {
1771	SDValue NarrowShl = TLO.DAG.getNode(
1772	Opcode: ISD::SHL, DL: dl, VT: InnerVT, N1: InnerOp,
1773	N2: TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: InnerVT, DL: dl));
1774	return TLO.CombineTo(
1775	O: Op, N: TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: NarrowShl));
1776	}
1777
1778	// Repeat the SHL optimization above in cases where an extension
1779	// intervenes: (shl (anyext (shr x, c1)), c2) to
1780	// (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1781	// aren't demanded (as above) and that the shifted upper c1 bits of
1782	// x aren't demanded.
1783	// TODO - support non-uniform vector amounts.
1784	if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1785	InnerOp.hasOneUse()) {
1786	if (const APInt *SA2 =
1787	TLO.DAG.getValidShiftAmountConstant(V: InnerOp, DemandedElts)) {
1788	unsigned InnerShAmt = SA2->getZExtValue();
1789	if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1790	DemandedBits.getActiveBits() <=
1791	(InnerBits - InnerShAmt + ShAmt) &&
1792	DemandedBits.countr_zero() >= ShAmt) {
1793	SDValue NewSA =
1794	TLO.DAG.getConstant(Val: ShAmt - InnerShAmt, DL: dl, VT: ShiftVT);
1795	SDValue NewExt = TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT,
1796	Operand: InnerOp.getOperand(i: `0`));
1797	return TLO.CombineTo(
1798	O: Op, N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: NewExt, N2: NewSA));
1799	}
1800	}
1801	}
1802	}
1803
1804	APInt InDemandedMask = DemandedBits.lshr(shiftAmt: ShAmt);
1805	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
1806	Depth: Depth + `1`)) {
1807	SDNodeFlags Flags = Op.getNode()->getFlags();
1808	if (Flags.hasNoSignedWrap() \|\| Flags.hasNoUnsignedWrap()) {
1809	// Disable the nsw and nuw flags. We can no longer guarantee that we
1810	// won't wrap after simplification.
1811	Flags.setNoSignedWrap(false);
1812	Flags.setNoUnsignedWrap(false);
1813	Op ->setFlags(Flags);
1814	}
1815	return true;
1816	}
1817	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1818	Known.Zero <<= ShAmt;
1819	Known.One <<= ShAmt;
1820	// low bits known zero.
1821	Known.Zero.setLowBits(ShAmt);
1822
1823	// Attempt to avoid multi-use ops if we don't need anything from them.
1824	if (!InDemandedMask.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
1825	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1826	Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1827	if (DemandedOp0) {
1828	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: DemandedOp0, N2: Op1);
1829	return TLO.CombineTo(O: Op, N: NewOp);
1830	}
1831	}
1832
1833	// Try shrinking the operation as long as the shift amount will still be
1834	// in range.
1835	if ((ShAmt < DemandedBits.getActiveBits()) &&
1836	ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1837	return true;
1838
1839	// Narrow shift to lower half - similar to ShrinkDemandedOp.
1840	// (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1841	// Only do this if we demand the upper half so the knownbits are correct.
1842	unsigned HalfWidth = BitWidth / `2`;
1843	if ((BitWidth % `2`) == `0` && !VT.isVector() && ShAmt < HalfWidth &&
1844	DemandedBits.countLeadingOnes() >= HalfWidth) {
1845	EVT HalfVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: HalfWidth);
1846	if (isNarrowingProfitable(SrcVT: VT, DestVT: HalfVT) &&
1847	isTypeDesirableForOp(ISD::SHL, VT: HalfVT) &&
1848	isTruncateFree(FromVT: VT, ToVT: HalfVT) && isZExtFree(FromTy: HalfVT, ToTy: VT) &&
1849	(!TLO.LegalOperations() \|\| isOperationLegal(Op: ISD::SHL, VT: HalfVT))) {
1850	// If we're demanding the upper bits at all, we must ensure
1851	// that the upper bits of the shift result are known to be zero,
1852	// which is equivalent to the narrow shift being NUW.
1853	if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1854	bool IsNSW = Known.countMinSignBits() > HalfWidth;
1855	SDNodeFlags Flags;
1856	Flags.setNoSignedWrap(IsNSW);
1857	Flags.setNoUnsignedWrap(IsNUW);
1858	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HalfVT, Operand: Op0);
1859	SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
1860	Val: ShAmt, VT: HalfVT, DL: dl, LegalTypes: TLO.LegalTypes());
1861	SDValue NewShift = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HalfVT, N1: NewOp,
1862	N2: NewShiftAmt, Flags);
1863	SDValue NewExt =
1864	TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: NewShift);
1865	return TLO.CombineTo(O: Op, N: NewExt);
1866	}
1867	}
1868	}
1869	} else {
1870	// This is a variable shift, so we can't shift the demand mask by a known
1871	// amount. But if we are not demanding high bits, then we are not
1872	// demanding those bits from the pre-shifted operand either.
1873	if (unsigned CTLZ = DemandedBits.countl_zero()) {
1874	APInt DemandedFromOp(APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - CTLZ));
1875	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedFromOp, OriginalDemandedElts: DemandedElts, Known, TLO,
1876	Depth: Depth + `1`)) {
1877	SDNodeFlags Flags = Op.getNode()->getFlags();
1878	if (Flags.hasNoSignedWrap() \|\| Flags.hasNoUnsignedWrap()) {
1879	// Disable the nsw and nuw flags. We can no longer guarantee that we
1880	// won't wrap after simplification.
1881	Flags.setNoSignedWrap(false);
1882	Flags.setNoUnsignedWrap(false);
1883	Op ->setFlags(Flags);
1884	}
1885	return true;
1886	}
1887	Known.resetAll();
1888	}
1889	}
1890
1891	// If we are only demanding sign bits then we can use the shift source
1892	// directly.
1893	if (const APInt *MaxSA =
1894	TLO.DAG.getValidMaximumShiftAmountConstant(V: Op, DemandedElts)) {
1895	unsigned ShAmt = MaxSA->getZExtValue();
1896	unsigned NumSignBits =
1897	TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
1898	unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1899	if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1900	return TLO.CombineTo(O: Op, N: Op0);
1901	}
1902	break;
1903	}
1904	case ISD::SRL: {
1905	SDValue Op0 = Op.getOperand(i: `0`);
1906	SDValue Op1 = Op.getOperand(i: `1`);
1907	EVT ShiftVT = Op1.getValueType();
1908
1909	// Try to match AVG patterns.
1910	if (SDValue AVG = combineShiftToAVG(Op, DAG&: TLO.DAG, TLI: *this, DemandedBits,
1911	DemandedElts, Depth: Depth + `1`))
1912	return TLO.CombineTo(O: Op, N: AVG);
1913
1914	if (const APInt *SA =
1915	TLO.DAG.getValidShiftAmountConstant(V: Op, DemandedElts)) {
1916	unsigned ShAmt = SA->getZExtValue();
1917	if (ShAmt == `0`)
1918	return TLO.CombineTo(O: Op, N: Op0);
1919
1920	// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1921	// single shift. We can do this if the top bits (which are shifted out)
1922	// are never demanded.
1923	// TODO - support non-uniform vector amounts.
1924	if (Op0.getOpcode() == ISD::SHL) {
1925	if (!DemandedBits.intersects(RHS: APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: ShAmt))) {
1926	if (const APInt *SA2 =
1927	TLO.DAG.getValidShiftAmountConstant(V: Op0, DemandedElts)) {
1928	unsigned C1 = SA2->getZExtValue();
1929	unsigned Opc = ISD::SRL;
1930	int Diff = ShAmt - C1;
1931	if (Diff < `0`) {
1932	Diff = -Diff;
1933	Opc = ISD::SHL;
1934	}
1935	SDValue NewSA = TLO.DAG.getConstant(Val: Diff, DL: dl, VT: ShiftVT);
1936	return TLO.CombineTo(
1937	O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, N1: Op0.getOperand(i: `0`), N2: NewSA));
1938	}
1939	}
1940	}
1941
1942	APInt InDemandedMask = (DemandedBits << ShAmt);
1943
1944	// If the shift is exact, then it does demand the low bits (and knows that
1945	// they are zero).
1946	if (Op ->getFlags().hasExact())
1947	InDemandedMask.setLowBits(ShAmt);
1948
1949	// Narrow shift to lower half - similar to ShrinkDemandedOp.
1950	// (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
1951	if ((BitWidth % `2`) == `0` && !VT.isVector()) {
1952	APInt HiBits = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth / `2`);
1953	EVT HalfVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: BitWidth / `2`);
1954	if (isNarrowingProfitable(SrcVT: VT, DestVT: HalfVT) &&
1955	isTypeDesirableForOp(ISD::SRL, VT: HalfVT) &&
1956	isTruncateFree(FromVT: VT, ToVT: HalfVT) && isZExtFree(FromTy: HalfVT, ToTy: VT) &&
1957	(!TLO.LegalOperations() \|\| isOperationLegal(Op: ISD::SRL, VT: HalfVT)) &&
1958	((InDemandedMask.countLeadingZeros() >= (BitWidth / `2`)) \|\|
1959	TLO.DAG.MaskedValueIsZero(Op: Op0, Mask: HiBits))) {
1960	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HalfVT, Operand: Op0);
1961	SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
1962	Val: ShAmt, VT: HalfVT, DL: dl, LegalTypes: TLO.LegalTypes());
1963	SDValue NewShift =
1964	TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HalfVT, N1: NewOp, N2: NewShiftAmt);
1965	return TLO.CombineTo(
1966	O: Op, N: TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: NewShift));
1967	}
1968	}
1969
1970	// Compute the new bits that are at the top now.
1971	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
1972	Depth: Depth + `1`))
1973	return true;
1974	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1975	Known.Zero.lshrInPlace(ShiftAmt: ShAmt);
1976	Known.One.lshrInPlace(ShiftAmt: ShAmt);
1977	// High bits known zero.
1978	Known.Zero.setHighBits(ShAmt);
1979
1980	// Attempt to avoid multi-use ops if we don't need anything from them.
1981	if (!InDemandedMask.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
1982	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1983	Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1984	if (DemandedOp0) {
1985	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: DemandedOp0, N2: Op1);
1986	return TLO.CombineTo(O: Op, N: NewOp);
1987	}
1988	}
1989	} else {
1990	// Use generic knownbits computation as it has support for non-uniform
1991	// shift amounts.
1992	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1993	}
1994	break;
1995	}
1996	case ISD::SRA: {
1997	SDValue Op0 = Op.getOperand(i: `0`);
1998	SDValue Op1 = Op.getOperand(i: `1`);
1999	EVT ShiftVT = Op1.getValueType();
2000
2001	// If we only want bits that already match the signbit then we don't need
2002	// to shift.
2003	unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2004	if (TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`) >=
2005	NumHiDemandedBits)
2006	return TLO.CombineTo(O: Op, N: Op0);
2007
2008	// If this is an arithmetic shift right and only the low-bit is set, we can
2009	// always convert this into a logical shr, even if the shift amount is
2010	// variable. The low bit of the shift cannot be an input sign bit unless
2011	// the shift amount is >= the size of the datatype, which is undefined.
2012	if (DemandedBits.isOne())
2013	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1));
2014
2015	// Try to match AVG patterns.
2016	if (SDValue AVG = combineShiftToAVG(Op, DAG&: TLO.DAG, TLI: *this, DemandedBits,
2017	DemandedElts, Depth: Depth + `1`))
2018	return TLO.CombineTo(O: Op, N: AVG);
2019
2020	if (const APInt *SA =
2021	TLO.DAG.getValidShiftAmountConstant(V: Op, DemandedElts)) {
2022	unsigned ShAmt = SA->getZExtValue();
2023	if (ShAmt == `0`)
2024	return TLO.CombineTo(O: Op, N: Op0);
2025
2026	// fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2027	// supports sext_inreg.
2028	if (Op0.getOpcode() == ISD::SHL) {
2029	if (const APInt *InnerSA =
2030	TLO.DAG.getValidShiftAmountConstant(V: Op0, DemandedElts)) {
2031	unsigned LowBits = BitWidth - ShAmt;
2032	EVT ExtVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: LowBits);
2033	if (VT.isVector())
2034	ExtVT = EVT::getVectorVT(Context&: *TLO.DAG.getContext(), VT: ExtVT,
2035	EC: VT.getVectorElementCount());
2036
2037	if (*InnerSA == ShAmt) {
2038	if (!TLO.LegalOperations() \|\|
2039	getOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: ExtVT) == Legal)
2040	return TLO.CombineTo(
2041	O: Op, N: TLO.DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT,
2042	N1: Op0.getOperand(i: `0`),
2043	N2: TLO.DAG.getValueType(ExtVT)));
2044
2045	// Even if we can't convert to sext_inreg, we might be able to
2046	// remove this shift pair if the input is already sign extended.
2047	unsigned NumSignBits =
2048	TLO.DAG.ComputeNumSignBits(Op: Op0.getOperand(i: `0`), DemandedElts);
2049	if (NumSignBits > ShAmt)
2050	return TLO.CombineTo(O: Op, N: Op0.getOperand(i: `0`));
2051	}
2052	}
2053	}
2054
2055	APInt InDemandedMask = (DemandedBits << ShAmt);
2056
2057	// If the shift is exact, then it does demand the low bits (and knows that
2058	// they are zero).
2059	if (Op ->getFlags().hasExact())
2060	InDemandedMask.setLowBits(ShAmt);
2061
2062	// If any of the demanded bits are produced by the sign extension, we also
2063	// demand the input sign bit.
2064	if (DemandedBits.countl_zero() < ShAmt)
2065	InDemandedMask.setSignBit();
2066
2067	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2068	Depth: Depth + `1`))
2069	return true;
2070	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2071	Known.Zero.lshrInPlace(ShiftAmt: ShAmt);
2072	Known.One.lshrInPlace(ShiftAmt: ShAmt);
2073
2074	// If the input sign bit is known to be zero, or if none of the top bits
2075	// are demanded, turn this into an unsigned shift right.
2076	if (Known.Zero [BitWidth - ShAmt - `1`] \|\|
2077	DemandedBits.countl_zero() >= ShAmt) {
2078	SDNodeFlags Flags;
2079	Flags.setExact(Op ->getFlags().hasExact());
2080	return TLO.CombineTo(
2081	O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1, Flags));
2082	}
2083
2084	int Log2 = DemandedBits.exactLogBase2();
2085	if (Log2 >= `0`) {
2086	// The bit must come from the sign.
2087	SDValue NewSA = TLO.DAG.getConstant(Val: BitWidth - `1` - Log2, DL: dl, VT: ShiftVT);
2088	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: NewSA));
2089	}
2090
2091	if (Known.One [BitWidth - ShAmt - `1`])
2092	// New bits are known one.
2093	Known.One.setHighBits(ShAmt);
2094
2095	// Attempt to avoid multi-use ops if we don't need anything from them.
2096	if (!InDemandedMask.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
2097	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2098	Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2099	if (DemandedOp0) {
2100	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: DemandedOp0, N2: Op1);
2101	return TLO.CombineTo(O: Op, N: NewOp);
2102	}
2103	}
2104	}
2105	break;
2106	}
2107	case ISD::FSHL:
2108	case ISD::FSHR: {
2109	SDValue Op0 = Op.getOperand(i: `0`);
2110	SDValue Op1 = Op.getOperand(i: `1`);
2111	SDValue Op2 = Op.getOperand(i: `2`);
2112	bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2113
2114	if (ConstantSDNode *SA = isConstOrConstSplat(N: Op2, DemandedElts)) {
2115	unsigned Amt = SA->getAPIntValue().urem(RHS: BitWidth);
2116
2117	// For fshl, 0-shift returns the 1st arg.
2118	// For fshr, 0-shift returns the 2nd arg.
2119	if (Amt == `0`) {
2120	if (SimplifyDemandedBits(Op: IsFSHL ? Op0 : Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
2121	Known, TLO, Depth: Depth + `1`))
2122	return true;
2123	break;
2124	}
2125
2126	// fshl: (Op0 << Amt) \| (Op1 >> (BW - Amt))
2127	// fshr: (Op0 << (BW - Amt)) \| (Op1 >> Amt)
2128	APInt Demanded0 = DemandedBits.lshr(shiftAmt: IsFSHL ? Amt : (BitWidth - Amt));
2129	APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2130	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: Demanded0, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2131	Depth: Depth + `1`))
2132	return true;
2133	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: Demanded1, OriginalDemandedElts: DemandedElts, Known, TLO,
2134	Depth: Depth + `1`))
2135	return true;
2136
2137	Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2138	Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2139	Known.One.lshrInPlace(ShiftAmt: IsFSHL ? (BitWidth - Amt) : Amt);
2140	Known.Zero.lshrInPlace(ShiftAmt: IsFSHL ? (BitWidth - Amt) : Amt);
2141	Known = Known.unionWith(RHS: Known2);
2142
2143	// Attempt to avoid multi-use ops if we don't need anything from them.
2144	if (!Demanded0.isAllOnes() \|\| !Demanded1.isAllOnes() \|\|
2145	!DemandedElts.isAllOnes()) {
2146	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2147	Op: Op0, DemandedBits: Demanded0, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2148	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2149	Op: Op1, DemandedBits: Demanded1, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2150	if (DemandedOp0 \|\| DemandedOp1) {
2151	DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2152	DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2153	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedOp0,
2154	N2: DemandedOp1, N3: Op2);
2155	return TLO.CombineTo(O: Op, N: NewOp);
2156	}
2157	}
2158	}
2159
2160	// For pow-2 bitwidths we only demand the bottom modulo amt bits.
2161	if (isPowerOf2_32(Value: BitWidth)) {
2162	APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - `1`);
2163	if (SimplifyDemandedBits(Op: Op2, OriginalDemandedBits: DemandedAmtBits, OriginalDemandedElts: DemandedElts,
2164	Known&: Known2, TLO, Depth: Depth + `1`))
2165	return true;
2166	}
2167	break;
2168	}
2169	case ISD::ROTL:
2170	case ISD::ROTR: {
2171	SDValue Op0 = Op.getOperand(i: `0`);
2172	SDValue Op1 = Op.getOperand(i: `1`);
2173	bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2174
2175	// If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2176	if (BitWidth == TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`))
2177	return TLO.CombineTo(O: Op, N: Op0);
2178
2179	if (ConstantSDNode *SA = isConstOrConstSplat(N: Op1, DemandedElts)) {
2180	unsigned Amt = SA->getAPIntValue().urem(RHS: BitWidth);
2181	unsigned RevAmt = BitWidth - Amt;
2182
2183	// rotl: (Op0 << Amt) \| (Op0 >> (BW - Amt))
2184	// rotr: (Op0 << (BW - Amt)) \| (Op0 >> Amt)
2185	APInt Demanded0 = DemandedBits.rotr(rotateAmt: IsROTL ? Amt : RevAmt);
2186	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: Demanded0, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2187	Depth: Depth + `1`))
2188	return true;
2189
2190	// rot(x, 0) --> x*
2191	if (Amt == `0`)
2192	return TLO.CombineTo(O: Op, N: Op0);
2193
2194	// See if we don't demand either half of the rotated bits.
2195	if ((!TLO.LegalOperations() \|\| isOperationLegal(Op: ISD::SHL, VT)) &&
2196	DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2197	Op1 = TLO.DAG.getConstant(Val: IsROTL ? Amt : RevAmt, DL: dl, VT: Op1.getValueType());
2198	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op0, N2: Op1));
2199	}
2200	if ((!TLO.LegalOperations() \|\| isOperationLegal(Op: ISD::SRL, VT)) &&
2201	DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2202	Op1 = TLO.DAG.getConstant(Val: IsROTL ? RevAmt : Amt, DL: dl, VT: Op1.getValueType());
2203	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1));
2204	}
2205	}
2206
2207	// For pow-2 bitwidths we only demand the bottom modulo amt bits.
2208	if (isPowerOf2_32(Value: BitWidth)) {
2209	APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - `1`);
2210	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedAmtBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2211	Depth: Depth + `1`))
2212	return true;
2213	}
2214	break;
2215	}
2216	case ISD::SMIN:
2217	case ISD::SMAX:
2218	case ISD::UMIN:
2219	case ISD::UMAX: {
2220	unsigned Opc = Op.getOpcode();
2221	SDValue Op0 = Op.getOperand(i: `0`);
2222	SDValue Op1 = Op.getOperand(i: `1`);
2223
2224	// If we're only demanding signbits, then we can simplify to OR/AND node.
2225	unsigned BitOp =
2226	(Opc == ISD::SMIN \|\| Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2227	unsigned NumSignBits =
2228	std::min(a: TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`),
2229	b: TLO.DAG.ComputeNumSignBits(Op: Op1, DemandedElts, Depth: Depth + `1`));
2230	unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2231	if (NumSignBits >= NumDemandedUpperBits)
2232	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: BitOp, DL: SDLoc (Op), VT, N1: Op0, N2: Op1));
2233
2234	// Check if one arg is always less/greater than (or equal) to the other arg.
2235	KnownBits Known0 = TLO.DAG.computeKnownBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
2236	KnownBits Known1 = TLO.DAG.computeKnownBits(Op: Op1, DemandedElts, Depth: Depth + `1`);
2237	switch (Opc) {
2238	case ISD::SMIN:
2239	if (std::optional<bool> IsSLE = KnownBits::sle(LHS: Known0, RHS: Known1))
2240	return TLO.CombineTo(O: Op, N: *IsSLE ? Op0 : Op1);
2241	if (std::optional<bool> IsSLT = KnownBits::slt(LHS: Known0, RHS: Known1))
2242	return TLO.CombineTo(O: Op, N: *IsSLT ? Op0 : Op1);
2243	Known = KnownBits::smin(LHS: Known0, RHS: Known1);
2244	break;
2245	case ISD::SMAX:
2246	if (std::optional<bool> IsSGE = KnownBits::sge(LHS: Known0, RHS: Known1))
2247	return TLO.CombineTo(O: Op, N: *IsSGE ? Op0 : Op1);
2248	if (std::optional<bool> IsSGT = KnownBits::sgt(LHS: Known0, RHS: Known1))
2249	return TLO.CombineTo(O: Op, N: *IsSGT ? Op0 : Op1);
2250	Known = KnownBits::smax(LHS: Known0, RHS: Known1);
2251	break;
2252	case ISD::UMIN:
2253	if (std::optional<bool> IsULE = KnownBits::ule(LHS: Known0, RHS: Known1))
2254	return TLO.CombineTo(O: Op, N: *IsULE ? Op0 : Op1);
2255	if (std::optional<bool> IsULT = KnownBits::ult(LHS: Known0, RHS: Known1))
2256	return TLO.CombineTo(O: Op, N: *IsULT ? Op0 : Op1);
2257	Known = KnownBits::umin(LHS: Known0, RHS: Known1);
2258	break;
2259	case ISD::UMAX:
2260	if (std::optional<bool> IsUGE = KnownBits::uge(LHS: Known0, RHS: Known1))
2261	return TLO.CombineTo(O: Op, N: *IsUGE ? Op0 : Op1);
2262	if (std::optional<bool> IsUGT = KnownBits::ugt(LHS: Known0, RHS: Known1))
2263	return TLO.CombineTo(O: Op, N: *IsUGT ? Op0 : Op1);
2264	Known = KnownBits::umax(LHS: Known0, RHS: Known1);
2265	break;
2266	}
2267	break;
2268	}
2269	case ISD::BITREVERSE: {
2270	SDValue Src = Op.getOperand(i: `0`);
2271	APInt DemandedSrcBits = DemandedBits.reverseBits();
2272	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2273	Depth: Depth + `1`))
2274	return true;
2275	Known.One = Known2.One.reverseBits();
2276	Known.Zero = Known2.Zero.reverseBits();
2277	break;
2278	}
2279	case ISD::BSWAP: {
2280	SDValue Src = Op.getOperand(i: `0`);
2281
2282	// If the only bits demanded come from one byte of the bswap result,
2283	// just shift the input byte into position to eliminate the bswap.
2284	unsigned NLZ = DemandedBits.countl_zero();
2285	unsigned NTZ = DemandedBits.countr_zero();
2286
2287	// Round NTZ down to the next byte. If we have 11 trailing zeros, then
2288	// we need all the bits down to bit 8. Likewise, round NLZ. If we
2289	// have 14 leading zeros, round to 8.
2290	NLZ = alignDown(Value: NLZ, Align: `8`);
2291	NTZ = alignDown(Value: NTZ, Align: `8`);
2292	// If we need exactly one byte, we can do this transformation.
2293	if (BitWidth - NLZ - NTZ == `8`) {
2294	// Replace this with either a left or right shift to get the byte into
2295	// the right place.
2296	unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2297	if (!TLO.LegalOperations() \|\| isOperationLegal(Op: ShiftOpcode, VT)) {
2298	unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2299	SDValue ShAmt = TLO.DAG.getShiftAmountConstant(Val: ShiftAmount, VT, DL: dl);
2300	SDValue NewOp = TLO.DAG.getNode(Opcode: ShiftOpcode, DL: dl, VT, N1: Src, N2: ShAmt);
2301	return TLO.CombineTo(O: Op, N: NewOp);
2302	}
2303	}
2304
2305	APInt DemandedSrcBits = DemandedBits.byteSwap();
2306	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2307	Depth: Depth + `1`))
2308	return true;
2309	Known.One = Known2.One.byteSwap();
2310	Known.Zero = Known2.Zero.byteSwap();
2311	break;
2312	}
2313	case ISD::CTPOP: {
2314	// If only 1 bit is demanded, replace with PARITY as long as we're before
2315	// op legalization.
2316	// FIXME: Limit to scalars for now.
2317	if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2318	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::PARITY, DL: dl, VT,
2319	Operand: Op.getOperand(i: `0`)));
2320
2321	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2322	break;
2323	}
2324	case ISD::SIGN_EXTEND_INREG: {
2325	SDValue Op0 = Op.getOperand(i: `0`);
2326	EVT ExVT = cast<VTSDNode>(Val: Op.getOperand(i: `1`))->getVT();
2327	unsigned ExVTBits = ExVT.getScalarSizeInBits();
2328
2329	// If we only care about the highest bit, don't bother shifting right.
2330	if (DemandedBits.isSignMask()) {
2331	unsigned MinSignedBits =
2332	TLO.DAG.ComputeMaxSignificantBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
2333	bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2334	// However if the input is already sign extended we expect the sign
2335	// extension to be dropped altogether later and do not simplify.
2336	if (!AlreadySignExtended) {
2337	// Compute the correct shift amount type, which must be getShiftAmountTy
2338	// for scalar types after legalization.
2339	SDValue ShiftAmt =
2340	TLO.DAG.getShiftAmountConstant(Val: BitWidth - ExVTBits, VT, DL: dl);
2341	return TLO.CombineTo(O: Op,
2342	N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op0, N2: ShiftAmt));
2343	}
2344	}
2345
2346	// If none of the extended bits are demanded, eliminate the sextinreg.
2347	if (DemandedBits.getActiveBits() <= ExVTBits)
2348	return TLO.CombineTo(O: Op, N: Op0);
2349
2350	APInt InputDemandedBits = DemandedBits.getLoBits(numBits: ExVTBits);
2351
2352	// Since the sign extended bits are demanded, we know that the sign
2353	// bit is demanded.
2354	InputDemandedBits.setBit(ExVTBits - `1`);
2355
2356	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InputDemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
2357	Depth: Depth + `1`))
2358	return true;
2359	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2360
2361	// If the sign bit of the input is known set or clear, then we know the
2362	// top bits of the result.
2363
2364	// If the input sign bit is known zero, convert this into a zero extension.
2365	if (Known.Zero [ExVTBits - `1`])
2366	return TLO.CombineTo(O: Op, N: TLO.DAG.getZeroExtendInReg(Op: Op0, DL: dl, VT: ExVT));
2367
2368	APInt Mask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ExVTBits);
2369	if (Known.One [ExVTBits - `1`]) { // Input sign bit known set
2370	Known.One.setBitsFrom(ExVTBits);
2371	Known.Zero &= Mask;
2372	} else { // Input sign bit unknown
2373	Known.Zero &= Mask;
2374	Known.One &= Mask;
2375	}
2376	break;
2377	}
2378	case ISD::BUILD_PAIR: {
2379	EVT HalfVT = Op.getOperand(i: `0`).getValueType();
2380	unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2381
2382	APInt MaskLo = DemandedBits.getLoBits(numBits: HalfBitWidth).trunc(width: HalfBitWidth);
2383	APInt MaskHi = DemandedBits.getHiBits(numBits: HalfBitWidth).trunc(width: HalfBitWidth);
2384
2385	KnownBits KnownLo, KnownHi;
2386
2387	if (SimplifyDemandedBits(Op: Op.getOperand(i: `0`), DemandedBits: MaskLo, Known&: KnownLo, TLO, Depth: Depth + `1`))
2388	return true;
2389
2390	if (SimplifyDemandedBits(Op: Op.getOperand(i: `1`), DemandedBits: MaskHi, Known&: KnownHi, TLO, Depth: Depth + `1`))
2391	return true;
2392
2393	Known = KnownHi.concat(Lo: KnownLo);
2394	break;
2395	}
2396	case ISD::ZERO_EXTEND_VECTOR_INREG:
2397	if (VT.isScalableVector())
2398	return false;
2399	[[fallthrough]];
2400	case ISD::ZERO_EXTEND: {
2401	SDValue Src = Op.getOperand(i: `0`);
2402	EVT SrcVT = Src.getValueType();
2403	unsigned InBits = SrcVT.getScalarSizeInBits();
2404	unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : `1`;
2405	bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2406
2407	// If none of the top bits are demanded, convert this into an any_extend.
2408	if (DemandedBits.getActiveBits() <= InBits) {
2409	// If we only need the non-extended bits of the bottom element
2410	// then we can just bitcast to the result.
2411	if (IsLE && IsVecInReg && DemandedElts == `1` &&
2412	VT.getSizeInBits() == SrcVT.getSizeInBits())
2413	return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2414
2415	unsigned Opc =
2416	IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2417	if (!TLO.LegalOperations() \|\| isOperationLegal(Op: Opc, VT))
2418	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src));
2419	}
2420
2421	SDNodeFlags Flags = Op ->getFlags();
2422	APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2423	APInt InDemandedElts = DemandedElts.zext(width: InElts);
2424	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2425	Depth: Depth + `1`)) {
2426	if (Flags.hasNonNeg()) {
2427	Flags.setNonNeg(false);
2428	Op ->setFlags(Flags);
2429	}
2430	return true;
2431	}
2432	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2433	assert(Known.getBitWidth() == InBits && "Src width has changed?");
2434	Known = Known.zext(BitWidth);
2435
2436	// Attempt to avoid multi-use ops if we don't need anything from them.
2437	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2438	Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`))
2439	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2440	break;
2441	}
2442	case ISD::SIGN_EXTEND_VECTOR_INREG:
2443	if (VT.isScalableVector())
2444	return false;
2445	[[fallthrough]];
2446	case ISD::SIGN_EXTEND: {
2447	SDValue Src = Op.getOperand(i: `0`);
2448	EVT SrcVT = Src.getValueType();
2449	unsigned InBits = SrcVT.getScalarSizeInBits();
2450	unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : `1`;
2451	bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2452
2453	APInt InDemandedElts = DemandedElts.zext(width: InElts);
2454	APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2455
2456	// Since some of the sign extended bits are demanded, we know that the sign
2457	// bit is demanded.
2458	InDemandedBits.setBit(InBits - `1`);
2459
2460	// If none of the top bits are demanded, convert this into an any_extend.
2461	if (DemandedBits.getActiveBits() <= InBits) {
2462	// If we only need the non-extended bits of the bottom element
2463	// then we can just bitcast to the result.
2464	if (IsLE && IsVecInReg && DemandedElts == `1` &&
2465	VT.getSizeInBits() == SrcVT.getSizeInBits())
2466	return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2467
2468	// Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2469	if (getBooleanContents(Type: VT) != ZeroOrNegativeOneBooleanContent \|\|
2470	TLO.DAG.ComputeNumSignBits(Op: Src, DemandedElts: InDemandedElts, Depth: Depth + `1`) !=
2471	InBits) {
2472	unsigned Opc =
2473	IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2474	if (!TLO.LegalOperations() \|\| isOperationLegal(Op: Opc, VT))
2475	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src));
2476	}
2477	}
2478
2479	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2480	Depth: Depth + `1`))
2481	return true;
2482	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2483	assert(Known.getBitWidth() == InBits && "Src width has changed?");
2484
2485	// If the sign bit is known one, the top bits match.
2486	Known = Known.sext(BitWidth);
2487
2488	// If the sign bit is known zero, convert this to a zero extend.
2489	if (Known.isNonNegative()) {
2490	unsigned Opc =
2491	IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
2492	if (!TLO.LegalOperations() \|\| isOperationLegal(Op: Opc, VT)) {
2493	SDNodeFlags Flags;
2494	if (!IsVecInReg)
2495	Flags.setNonNeg(true);
2496	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src, Flags));
2497	}
2498	}
2499
2500	// Attempt to avoid multi-use ops if we don't need anything from them.
2501	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2502	Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`))
2503	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2504	break;
2505	}
2506	case ISD::ANY_EXTEND_VECTOR_INREG:
2507	if (VT.isScalableVector())
2508	return false;
2509	[[fallthrough]];
2510	case ISD::ANY_EXTEND: {
2511	SDValue Src = Op.getOperand(i: `0`);
2512	EVT SrcVT = Src.getValueType();
2513	unsigned InBits = SrcVT.getScalarSizeInBits();
2514	unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : `1`;
2515	bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2516
2517	// If we only need the bottom element then we can just bitcast.
2518	// TODO: Handle ANY_EXTEND?
2519	if (IsLE && IsVecInReg && DemandedElts == `1` &&
2520	VT.getSizeInBits() == SrcVT.getSizeInBits())
2521	return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2522
2523	APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2524	APInt InDemandedElts = DemandedElts.zext(width: InElts);
2525	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2526	Depth: Depth + `1`))
2527	return true;
2528	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2529	assert(Known.getBitWidth() == InBits && "Src width has changed?");
2530	Known = Known.anyext(BitWidth);
2531
2532	// Attempt to avoid multi-use ops if we don't need anything from them.
2533	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2534	Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`))
2535	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2536	break;
2537	}
2538	case ISD::TRUNCATE: {
2539	SDValue Src = Op.getOperand(i: `0`);
2540
2541	// Simplify the input, using demanded bit information, and compute the known
2542	// zero/one bits live out.
2543	unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2544	APInt TruncMask = DemandedBits.zext(width: OperandBitWidth);
2545	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: TruncMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2546	Depth: Depth + `1`))
2547	return true;
2548	Known = Known.trunc(BitWidth);
2549
2550	// Attempt to avoid multi-use ops if we don't need anything from them.
2551	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2552	Op: Src, DemandedBits: TruncMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`))
2553	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: NewSrc));
2554
2555	// If the input is only used by this truncate, see if we can shrink it based
2556	// on the known demanded bits.
2557	switch (Src.getOpcode()) {
2558	default:
2559	break;
2560	case ISD::SRL:
2561	// Shrink SRL by a constant if none of the high bits shifted in are
2562	// demanded.
2563	if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2564	// Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2565	// undesirable.
2566	break;
2567
2568	if (Src.getNode()->hasOneUse()) {
2569	const APInt *ShAmtC =
2570	TLO.DAG.getValidShiftAmountConstant(V: Src, DemandedElts);
2571	if (!ShAmtC \|\| ShAmtC->uge(RHS: BitWidth))
2572	break;
2573	uint64_t ShVal = ShAmtC->getZExtValue();
2574
2575	APInt HighBits =
2576	APInt::getHighBitsSet(numBits: OperandBitWidth, hiBitsSet: OperandBitWidth - BitWidth);
2577	HighBits.lshrInPlace(ShiftAmt: ShVal);
2578	HighBits = HighBits.trunc(width: BitWidth);
2579
2580	if (!(HighBits & DemandedBits)) {
2581	// None of the shifted in bits are needed. Add a truncate of the
2582	// shift input, then shift it.
2583	SDValue NewShAmt =
2584	TLO.DAG.getShiftAmountConstant(Val: ShVal, VT, DL: dl, LegalTypes: TLO.LegalTypes());
2585	SDValue NewTrunc =
2586	TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Src.getOperand(i: `0`));
2587	return TLO.CombineTo(
2588	O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: NewTrunc, N2: NewShAmt));
2589	}
2590	}
2591	break;
2592	}
2593
2594	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2595	break;
2596	}
2597	case ISD::AssertZext: {
2598	// AssertZext demands all of the high bits, plus any of the low bits
2599	// demanded by its users.
2600	EVT ZVT = cast<VTSDNode>(Val: Op.getOperand(i: `1`))->getVT();
2601	APInt InMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ZVT.getSizeInBits());
2602	if (SimplifyDemandedBits(Op: Op.getOperand(i: `0`), DemandedBits: ~InMask \| DemandedBits, Known,
2603	TLO, Depth: Depth + `1`))
2604	return true;
2605	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2606
2607	Known.Zero \|= ~InMask;
2608	Known.One &= (~Known.Zero);
2609	break;
2610	}
2611	case ISD::EXTRACT_VECTOR_ELT: {
2612	SDValue Src = Op.getOperand(i: `0`);
2613	SDValue Idx = Op.getOperand(i: `1`);
2614	ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2615	unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2616
2617	if (SrcEltCnt.isScalable())
2618	return false;
2619
2620	// Demand the bits from every vector element without a constant index.
2621	unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2622	APInt DemandedSrcElts = APInt::getAllOnes(numBits: NumSrcElts);
2623	if (auto *CIdx = dyn_cast<ConstantSDNode>(Val&: Idx))
2624	if (CIdx->getAPIntValue().ult(RHS: NumSrcElts))
2625	DemandedSrcElts = APInt::getOneBitSet(numBits: NumSrcElts, BitNo: CIdx->getZExtValue());
2626
2627	// If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2628	// anything about the extended bits.
2629	APInt DemandedSrcBits = DemandedBits;
2630	if (BitWidth > EltBitWidth)
2631	DemandedSrcBits = DemandedSrcBits.trunc(width: EltBitWidth);
2632
2633	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts, Known&: Known2, TLO,
2634	Depth: Depth + `1`))
2635	return true;
2636
2637	// Attempt to avoid multi-use ops if we don't need anything from them.
2638	if (!DemandedSrcBits.isAllOnes() \|\| !DemandedSrcElts.isAllOnes()) {
2639	if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2640	Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`)) {
2641	SDValue NewOp =
2642	TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedSrc, N2: Idx);
2643	return TLO.CombineTo(O: Op, N: NewOp);
2644	}
2645	}
2646
2647	Known = Known2;
2648	if (BitWidth > EltBitWidth)
2649	Known = Known.anyext(BitWidth);
2650	break;
2651	}
2652	case ISD::BITCAST: {
2653	if (VT.isScalableVector())
2654	return false;
2655	SDValue Src = Op.getOperand(i: `0`);
2656	EVT SrcVT = Src.getValueType();
2657	unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2658
2659	// If this is an FP->Int bitcast and if the sign bit is the only
2660	// thing demanded, turn this into a FGETSIGN.
2661	if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2662	DemandedBits == APInt::getSignMask(BitWidth: Op.getValueSizeInBits()) &&
2663	SrcVT.isFloatingPoint()) {
2664	bool OpVTLegal = isOperationLegalOrCustom(Op: ISD::FGETSIGN, VT);
2665	bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2666	if ((OpVTLegal \|\| i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2667	SrcVT != MVT::f128) {
2668	// Cannot eliminate/lower SHL for f128 yet.
2669	EVT Ty = OpVTLegal ? VT : MVT::i32;
2670	// Make a FGETSIGN + SHL to move the sign bit into the appropriate
2671	// place. We expect the SHL to be eliminated by other optimizations.
2672	SDValue Sign = TLO.DAG.getNode(Opcode: ISD::FGETSIGN, DL: dl, VT: Ty, Operand: Src);
2673	unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2674	if (!OpVTLegal && OpVTSizeInBits > `32`)
2675	Sign = TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Sign);
2676	unsigned ShVal = Op.getValueSizeInBits() - `1`;
2677	SDValue ShAmt = TLO.DAG.getConstant(Val: ShVal, DL: dl, VT);
2678	return TLO.CombineTo(O: Op,
2679	N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Sign, N2: ShAmt));
2680	}
2681	}
2682
2683	// Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2684	// Demand the elt/bit if any of the original elts/bits are demanded.
2685	if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == `0`) {
2686	unsigned Scale = BitWidth / NumSrcEltBits;
2687	unsigned NumSrcElts = SrcVT.getVectorNumElements();
2688	APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
2689	APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
2690	for (unsigned i = `0`; i != Scale; ++i) {
2691	unsigned EltOffset = IsLE ? i : (Scale - `1` - i);
2692	unsigned BitOffset = EltOffset * NumSrcEltBits;
2693	APInt Sub = DemandedBits.extractBits(numBits: NumSrcEltBits, bitPosition: BitOffset);
2694	if (!Sub.isZero()) {
2695	DemandedSrcBits \|= Sub;
2696	for (unsigned j = `0`; j != NumElts; ++j)
2697	if (DemandedElts [j])
2698	DemandedSrcElts.setBit((j * Scale) + i);
2699	}
2700	}
2701
2702	APInt KnownSrcUndef, KnownSrcZero;
2703	if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedSrcElts, KnownUndef&: KnownSrcUndef,
2704	KnownZero&: KnownSrcZero, TLO, Depth: Depth + `1`))
2705	return true;
2706
2707	KnownBits KnownSrcBits;
2708	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts,
2709	Known&: KnownSrcBits, TLO, Depth: Depth + `1`))
2710	return true;
2711	} else if (IsLE && (NumSrcEltBits % BitWidth) == `0`) {
2712	// TODO - bigendian once we have test coverage.
2713	unsigned Scale = NumSrcEltBits / BitWidth;
2714	unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : `1`;
2715	APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
2716	APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
2717	for (unsigned i = `0`; i != NumElts; ++i)
2718	if (DemandedElts [i]) {
2719	unsigned Offset = (i % Scale) * BitWidth;
2720	DemandedSrcBits.insertBits(SubBits: DemandedBits, bitPosition: Offset);
2721	DemandedSrcElts.setBit(i / Scale);
2722	}
2723
2724	if (SrcVT.isVector()) {
2725	APInt KnownSrcUndef, KnownSrcZero;
2726	if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedSrcElts, KnownUndef&: KnownSrcUndef,
2727	KnownZero&: KnownSrcZero, TLO, Depth: Depth + `1`))
2728	return true;
2729	}
2730
2731	KnownBits KnownSrcBits;
2732	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts,
2733	Known&: KnownSrcBits, TLO, Depth: Depth + `1`))
2734	return true;
2735
2736	// Attempt to avoid multi-use ops if we don't need anything from them.
2737	if (!DemandedSrcBits.isAllOnes() \|\| !DemandedSrcElts.isAllOnes()) {
2738	if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2739	Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`)) {
2740	SDValue NewOp = TLO.DAG.getBitcast(VT, V: DemandedSrc);
2741	return TLO.CombineTo(O: Op, N: NewOp);
2742	}
2743	}
2744	}
2745
2746	// If this is a bitcast, let computeKnownBits handle it. Only do this on a
2747	// recursive call where Known may be useful to the caller.
2748	if (Depth > `0`) {
2749	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2750	return false;
2751	}
2752	break;
2753	}
2754	case ISD::MUL:
2755	if (DemandedBits.isPowerOf2()) {
2756	// The LSB of XY is set only if (X & 1) == 1 and (Y & 1) == 1.*
2757	// If we demand exactly one bit N and we have "X (C' << N)" where C' is*
2758	// odd (has LSB set), then the left-shifted low bit of X is the answer.
2759	unsigned CTZ = DemandedBits.countr_zero();
2760	ConstantSDNode *C = isConstOrConstSplat(N: Op.getOperand(i: `1`), DemandedElts);
2761	if (C && C->getAPIntValue().countr_zero() == CTZ) {
2762	SDValue AmtC = TLO.DAG.getShiftAmountConstant(Val: CTZ, VT, DL: dl);
2763	SDValue Shl = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op.getOperand(i: `0`), N2: AmtC);
2764	return TLO.CombineTo(O: Op, N: Shl);
2765	}
2766	}
2767	// For a squared value "X X", the bottom 2 bits are 0 and X[0] because:*
2768	// X X is odd iff X is odd.*
2769	// 'Quadratic Reciprocity': X X -> 0 for bit[1]*
2770	if (Op.getOperand(i: `0`) == Op.getOperand(i: `1`) && DemandedBits.ult(RHS: `4`)) {
2771	SDValue One = TLO.DAG.getConstant(Val: `1`, DL: dl, VT);
2772	SDValue And1 = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op.getOperand(i: `0`), N2: One);
2773	return TLO.CombineTo(O: Op, N: And1);
2774	}
2775	[[fallthrough]];
2776	case ISD::ADD:
2777	case ISD::SUB: {
2778	// Add, Sub, and Mul don't demand any bits in positions beyond that
2779	// of the highest bit demanded of them.
2780	SDValue Op0 = Op.getOperand(i: `0`), Op1 = Op.getOperand(i: `1`);
2781	SDNodeFlags Flags = Op.getNode()->getFlags();
2782	unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2783	APInt LoMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - DemandedBitsLZ);
2784	KnownBits KnownOp0, KnownOp1;
2785	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: LoMask, OriginalDemandedElts: DemandedElts, Known&: KnownOp0, TLO,
2786	Depth: Depth + `1`) \|\|
2787	SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: LoMask, OriginalDemandedElts: DemandedElts, Known&: KnownOp1, TLO,
2788	Depth: Depth + `1`) \|\|
2789	// See if the operation should be performed at a smaller bit width.
2790	ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2791	if (Flags.hasNoSignedWrap() \|\| Flags.hasNoUnsignedWrap()) {
2792	// Disable the nsw and nuw flags. We can no longer guarantee that we
2793	// won't wrap after simplification.
2794	Flags.setNoSignedWrap(false);
2795	Flags.setNoUnsignedWrap(false);
2796	Op ->setFlags(Flags);
2797	}
2798	return true;
2799	}
2800
2801	// neg x with only low bit demanded is simply x.
2802	if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2803	isNullConstant(V: Op0))
2804	return TLO.CombineTo(O: Op, N: Op1);
2805
2806	// Attempt to avoid multi-use ops if we don't need anything from them.
2807	if (!LoMask.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
2808	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2809	Op: Op0, DemandedBits: LoMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2810	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2811	Op: Op1, DemandedBits: LoMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2812	if (DemandedOp0 \|\| DemandedOp1) {
2813	Flags.setNoSignedWrap(false);
2814	Flags.setNoUnsignedWrap(false);
2815	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2816	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2817	SDValue NewOp =
2818	TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1, Flags);
2819	return TLO.CombineTo(O: Op, N: NewOp);
2820	}
2821	}
2822
2823	// If we have a constant operand, we may be able to turn it into -1 if we
2824	// do not demand the high bits. This can make the constant smaller to
2825	// encode, allow more general folding, or match specialized instruction
2826	// patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2827	// is probably not useful (and could be detrimental).
2828	ConstantSDNode *C = isConstOrConstSplat(N: Op1);
2829	APInt HighMask = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: DemandedBitsLZ);
2830	if (C && !C->isAllOnes() && !C->isOne() &&
2831	(C->getAPIntValue() \| HighMask).isAllOnes()) {
2832	SDValue Neg1 = TLO.DAG.getAllOnesConstant(DL: dl, VT);
2833	// Disable the nsw and nuw flags. We can no longer guarantee that we
2834	// won't wrap after simplification.
2835	Flags.setNoSignedWrap(false);
2836	Flags.setNoUnsignedWrap(false);
2837	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Neg1, Flags);
2838	return TLO.CombineTo(O: Op, N: NewOp);
2839	}
2840
2841	// Match a multiply with a disguised negated-power-of-2 and convert to a
2842	// an equivalent shift-left amount.
2843	// Example: (X MulC) + Op1 --> Op1 - (X << log2(-MulC))*
2844	auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2845	if (Mul.getOpcode() != ISD::MUL \|\| !Mul.hasOneUse())
2846	return `0`;
2847
2848	// Don't touch opaque constants. Also, ignore zero and power-of-2
2849	// multiplies. Those will get folded later.
2850	ConstantSDNode *MulC = isConstOrConstSplat(N: Mul.getOperand(i: `1`));
2851	if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2852	!MulC->getAPIntValue().isPowerOf2()) {
2853	APInt UnmaskedC = MulC->getAPIntValue() \| HighMask;
2854	if (UnmaskedC.isNegatedPowerOf2())
2855	return (-UnmaskedC).logBase2();
2856	}
2857	return `0`;
2858	};
2859
2860	auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2861	unsigned ShlAmt) {
2862	SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(Val: ShlAmt, VT, DL: dl);
2863	SDValue Shl = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: X, N2: ShlAmtC);
2864	SDValue Res = TLO.DAG.getNode(Opcode: NT, DL: dl, VT, N1: Y, N2: Shl);
2865	return TLO.CombineTo(O: Op, N: Res);
2866	};
2867
2868	if (isOperationLegalOrCustom(Op: ISD::SHL, VT)) {
2869	if (Op.getOpcode() == ISD::ADD) {
2870	// (X MulC) + Op1 --> Op1 - (X << log2(-MulC))*
2871	if (unsigned ShAmt = getShiftLeftAmt (Op0))
2872	return foldMul (ISD::SUB, Op0.getOperand(i: `0`), Op1, ShAmt);
2873	// Op0 + (X MulC) --> Op0 - (X << log2(-MulC))*
2874	if (unsigned ShAmt = getShiftLeftAmt (Op1))
2875	return foldMul (ISD::SUB, Op1.getOperand(i: `0`), Op0, ShAmt);
2876	}
2877	if (Op.getOpcode() == ISD::SUB) {
2878	// Op0 - (X MulC) --> Op0 + (X << log2(-MulC))*
2879	if (unsigned ShAmt = getShiftLeftAmt (Op1))
2880	return foldMul (ISD::ADD, Op1.getOperand(i: `0`), Op0, ShAmt);
2881	}
2882	}
2883
2884	if (Op.getOpcode() == ISD::MUL) {
2885	Known = KnownBits::mul(LHS: KnownOp0, RHS: KnownOp1);
2886	} else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2887	Known = KnownBits::computeForAddSub(
2888	Add: Op.getOpcode() == ISD::ADD, NSW: Flags.hasNoSignedWrap(),
2889	NUW: Flags.hasNoUnsignedWrap(), LHS: KnownOp0, RHS: KnownOp1);
2890	}
2891	break;
2892	}
2893	default:
2894	// We also ask the target about intrinsics (which could be specific to it).
2895	if (Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
2896	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2897	// TODO: Probably okay to remove after audit; here to reduce change size
2898	// in initial enablement patch for scalable vectors
2899	if (Op.getValueType().isScalableVector())
2900	break;
2901	if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2902	Known, TLO, Depth))
2903	return true;
2904	break;
2905	}
2906
2907	// Just use computeKnownBits to compute output bits.
2908	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2909	break;
2910	}
2911
2912	// If we know the value of all of the demanded bits, return this as a
2913	// constant.
2914	if (!isTargetCanonicalConstantNode(Op) &&
2915	DemandedBits.isSubsetOf(RHS: Known.Zero \| Known.One)) {
2916	// Avoid folding to a constant if any OpaqueConstant is involved.
2917	const SDNode *N = Op.getNode();
2918	for (SDNode *Op :
2919	llvm::make_range(x: SDNodeIterator::begin(N), y: SDNodeIterator::end(N))) {
2920	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: Op))
2921	if (C->isOpaque())
2922	return false;
2923	}
2924	if (VT.isInteger())
2925	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: Known.One, DL: dl, VT));
2926	if (VT.isFloatingPoint())
2927	return TLO.CombineTo(
2928	O: Op,
2929	N: TLO.DAG.getConstantFP(
2930	Val: APFloat (TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), DL: dl, VT));
2931	}
2932
2933	// A multi use 'all demanded elts' simplify failed to find any knownbits.
2934	// Try again just for the original demanded elts.
2935	// Ensure we do this AFTER constant folding above.
2936	if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2937	Known = TLO.DAG.computeKnownBits(Op, DemandedElts: OriginalDemandedElts, Depth);
2938
2939	return false;
2940	}
2941
2942	bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
2943	const APInt &DemandedElts,
2944	DAGCombinerInfo &DCI) const {
2945	SelectionDAG &DAG = DCI.DAG;
2946	TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2947	!DCI.isBeforeLegalizeOps());
2948
2949	APInt KnownUndef, KnownZero;
2950	bool Simplified =
2951	SimplifyDemandedVectorElts(Op, DemandedEltMask: DemandedElts, KnownUndef, KnownZero, TLO);
2952	if (Simplified) {
2953	DCI.AddToWorklist(N: Op.getNode());
2954	DCI.CommitTargetLoweringOpt(TLO);
2955	}
2956
2957	return Simplified;
2958	}
2959
2960	/// Given a vector binary operation and known undefined elements for each input
2961	/// operand, compute whether each element of the output is undefined.
2962	static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
2963	const APInt &UndefOp0,
2964	const APInt &UndefOp1) {
2965	EVT VT = BO.getValueType();
2966	assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
2967	"Vector binop only");
2968
2969	EVT EltVT = VT.getVectorElementType();
2970	unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : `1`;
2971	assert(UndefOp0.getBitWidth() == NumElts &&
2972	UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2973
2974	auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2975	const APInt &UndefVals) {
2976	if (UndefVals [Index])
2977	return DAG.getUNDEF(VT: EltVT);
2978
2979	if (auto *BV = dyn_cast<BuildVectorSDNode>(Val&: V)) {
2980	// Try hard to make sure that the getNode() call is not creating temporary
2981	// nodes. Ignore opaque integers because they do not constant fold.
2982	SDValue Elt = BV->getOperand(Num: Index);
2983	auto *C = dyn_cast<ConstantSDNode>(Val&: Elt);
2984	if (isa<ConstantFPSDNode>(Val: Elt) \|\| Elt.isUndef() \|\| (C && !C->isOpaque()))
2985	return Elt;
2986	}
2987
2988	return SDValue ();
2989	};
2990
2991	APInt KnownUndef = APInt::getZero(numBits: NumElts);
2992	for (unsigned i = `0`; i != NumElts; ++i) {
2993	// If both inputs for this element are either constant or undef and match
2994	// the element type, compute the constant/undef result for this element of
2995	// the vector.
2996	// TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2997	// not handle FP constants. The code within getNode() should be refactored
2998	// to avoid the danger of creating a bogus temporary node here.
2999	SDValue C0 = getUndefOrConstantElt (BO.getOperand(i: `0`), i, UndefOp0);
3000	SDValue C1 = getUndefOrConstantElt (BO.getOperand(i: `1`), i, UndefOp1);
3001	if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3002	if (DAG.getNode(Opcode: BO.getOpcode(), DL: SDLoc (BO), VT: EltVT, N1: C0, N2: C1).isUndef())
3003	KnownUndef.setBit(i);
3004	}
3005	return KnownUndef;
3006	}
3007
3008	bool TargetLowering::SimplifyDemandedVectorElts(
3009	SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3010	APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3011	bool AssumeSingleUse) const {
3012	EVT VT = Op.getValueType();
3013	unsigned Opcode = Op.getOpcode();
3014	APInt DemandedElts = OriginalDemandedElts;
3015	unsigned NumElts = DemandedElts.getBitWidth();
3016	assert(VT.isVector() && "Expected vector op");
3017
3018	KnownUndef = KnownZero = APInt::getZero(numBits: NumElts);
3019
3020	const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
3021	if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
3022	return false;
3023
3024	// TODO: For now we assume we know nothing about scalable vectors.
3025	if (VT.isScalableVector())
3026	return false;
3027
3028	assert(VT.getVectorNumElements() == NumElts &&
3029	"Mask size mismatches value type element count!");
3030
3031	// Undef operand.
3032	if (Op.isUndef()) {
3033	KnownUndef.setAllBits();
3034	return false;
3035	}
3036
3037	// If Op has other users, assume that all elements are needed.
3038	if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3039	DemandedElts.setAllBits();
3040
3041	// Not demanding any elements from Op.
3042	if (DemandedElts == `0`) {
3043	KnownUndef.setAllBits();
3044	return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3045	}
3046
3047	// Limit search depth.
3048	if (Depth >= SelectionDAG::MaxRecursionDepth)
3049	return false;
3050
3051	SDLoc DL(Op);
3052	unsigned EltSizeInBits = VT.getScalarSizeInBits();
3053	bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3054
3055	// Helper for demanding the specified elements and all the bits of both binary
3056	// operands.
3057	auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3058	SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op: Op0, DemandedElts,
3059	DAG&: TLO.DAG, Depth: Depth + `1`);
3060	SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op: Op1, DemandedElts,
3061	DAG&: TLO.DAG, Depth: Depth + `1`);
3062	if (NewOp0 \|\| NewOp1) {
3063	SDValue NewOp =
3064	TLO.DAG.getNode(Opcode, DL: SDLoc (Op), VT, N1: NewOp0 ? NewOp0 : Op0,
3065	N2: NewOp1 ? NewOp1 : Op1, Flags: Op ->getFlags());
3066	return TLO.CombineTo(O: Op, N: NewOp);
3067	}
3068	return false;
3069	};
3070
3071	switch (Opcode) {
3072	case ISD::SCALAR_TO_VECTOR: {
3073	if (!DemandedElts [`0`]) {
3074	KnownUndef.setAllBits();
3075	return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3076	}
3077	SDValue ScalarSrc = Op.getOperand(i: `0`);
3078	if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3079	SDValue Src = ScalarSrc.getOperand(i: `0`);
3080	SDValue Idx = ScalarSrc.getOperand(i: `1`);
3081	EVT SrcVT = Src.getValueType();
3082
3083	ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3084
3085	if (SrcEltCnt.isScalable())
3086	return false;
3087
3088	unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3089	if (isNullConstant(V: Idx)) {
3090	APInt SrcDemandedElts = APInt::getOneBitSet(numBits: NumSrcElts, BitNo: `0`);
3091	APInt SrcUndef = KnownUndef.zextOrTrunc(width: NumSrcElts);
3092	APInt SrcZero = KnownZero.zextOrTrunc(width: NumSrcElts);
3093	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3094	TLO, Depth: Depth + `1`))
3095	return true;
3096	}
3097	}
3098	KnownUndef.setHighBits(NumElts - `1`);
3099	break;
3100	}
3101	case ISD::BITCAST: {
3102	SDValue Src = Op.getOperand(i: `0`);
3103	EVT SrcVT = Src.getValueType();
3104
3105	// We only handle vectors here.
3106	// TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3107	if (!SrcVT.isVector())
3108	break;
3109
3110	// Fast handling of 'identity' bitcasts.
3111	unsigned NumSrcElts = SrcVT.getVectorNumElements();
3112	if (NumSrcElts == NumElts)
3113	return SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedElts, KnownUndef,
3114	KnownZero, TLO, Depth: Depth + `1`);
3115
3116	APInt SrcDemandedElts, SrcZero, SrcUndef;
3117
3118	// Bitcast from 'large element' src vector to 'small element' vector, we
3119	// must demand a source element if any DemandedElt maps to it.
3120	if ((NumElts % NumSrcElts) == `0`) {
3121	unsigned Scale = NumElts / NumSrcElts;
3122	SrcDemandedElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
3123	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3124	TLO, Depth: Depth + `1`))
3125	return true;
3126
3127	// Try calling SimplifyDemandedBits, converting demanded elts to the bits
3128	// of the large element.
3129	// TODO - bigendian once we have test coverage.
3130	if (IsLE) {
3131	unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3132	APInt SrcDemandedBits = APInt::getZero(numBits: SrcEltSizeInBits);
3133	for (unsigned i = `0`; i != NumElts; ++i)
3134	if (DemandedElts [i]) {
3135	unsigned Ofs = (i % Scale) * EltSizeInBits;
3136	SrcDemandedBits.setBits(loBit: Ofs, hiBit: Ofs + EltSizeInBits);
3137	}
3138
3139	KnownBits Known;
3140	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: SrcDemandedBits, OriginalDemandedElts: SrcDemandedElts, Known,
3141	TLO, Depth: Depth + `1`))
3142	return true;
3143
3144	// The bitcast has split each wide element into a number of
3145	// narrow subelements. We have just computed the Known bits
3146	// for wide elements. See if element splitting results in
3147	// some subelements being zero. Only for demanded elements!
3148	for (unsigned SubElt = `0`; SubElt != Scale; ++SubElt) {
3149	if (!Known.Zero.extractBits(numBits: EltSizeInBits, bitPosition: SubElt * EltSizeInBits)
3150	.isAllOnes())
3151	continue;
3152	for (unsigned SrcElt = `0`; SrcElt != NumSrcElts; ++SrcElt) {
3153	unsigned Elt = Scale * SrcElt + SubElt;
3154	if (DemandedElts [Elt])
3155	KnownZero.setBit(Elt);
3156	}
3157	}
3158	}
3159
3160	// If the src element is zero/undef then all the output elements will be -
3161	// only demanded elements are guaranteed to be correct.
3162	for (unsigned i = `0`; i != NumSrcElts; ++i) {
3163	if (SrcDemandedElts [i]) {
3164	if (SrcZero [i])
3165	KnownZero.setBits(loBit: i * Scale, hiBit: (i + `1`) * Scale);
3166	if (SrcUndef [i])
3167	KnownUndef.setBits(loBit: i * Scale, hiBit: (i + `1`) * Scale);
3168	}
3169	}
3170	}
3171
3172	// Bitcast from 'small element' src vector to 'large element' vector, we
3173	// demand all smaller source elements covered by the larger demanded element
3174	// of this vector.
3175	if ((NumSrcElts % NumElts) == `0`) {
3176	unsigned Scale = NumSrcElts / NumElts;
3177	SrcDemandedElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
3178	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3179	TLO, Depth: Depth + `1`))
3180	return true;
3181
3182	// If all the src elements covering an output element are zero/undef, then
3183	// the output element will be as well, assuming it was demanded.
3184	for (unsigned i = `0`; i != NumElts; ++i) {
3185	if (DemandedElts [i]) {
3186	if (SrcZero.extractBits(numBits: Scale, bitPosition: i * Scale).isAllOnes())
3187	KnownZero.setBit(i);
3188	if (SrcUndef.extractBits(numBits: Scale, bitPosition: i * Scale).isAllOnes())
3189	KnownUndef.setBit(i);
3190	}
3191	}
3192	}
3193	break;
3194	}
3195	case ISD::FREEZE: {
3196	SDValue N0 = Op.getOperand(i: `0`);
3197	if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(Op: N0, DemandedElts,
3198	/PoisonOnly=/false))
3199	return TLO.CombineTo(O: Op, N: N0);
3200
3201	// TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3202	// freeze(op(x, ...)) -> op(freeze(x), ...).
3203	if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == `1`)
3204	return TLO.CombineTo(
3205	O: Op, N: TLO.DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT,
3206	Operand: TLO.DAG.getFreeze(V: N0.getOperand(i: `0`))));
3207	break;
3208	}
3209	case ISD::BUILD_VECTOR: {
3210	// Check all elements and simplify any unused elements with UNDEF.
3211	if (!DemandedElts.isAllOnes()) {
3212	// Don't simplify BROADCASTS.
3213	if (llvm::any_of(Range: Op ->op_values(),
3214	P: [&](SDValue Elt) { return Op.getOperand(i: `0`) != Elt; })) {
3215	SmallVector<SDValue, `32`> Ops(Op ->op_begin(), Op ->op_end());
3216	bool Updated = false;
3217	for (unsigned i = `0`; i != NumElts; ++i) {
3218	if (!DemandedElts [i] && !Ops [i].isUndef()) {
3219	Ops [i] = TLO.DAG.getUNDEF(VT: Ops [`0`].getValueType());
3220	KnownUndef.setBit(i);
3221	Updated = true;
3222	}
3223	}
3224	if (Updated)
3225	return TLO.CombineTo(O: Op, N: TLO.DAG.getBuildVector(VT, DL, Ops));
3226	}
3227	}
3228	for (unsigned i = `0`; i != NumElts; ++i) {
3229	SDValue SrcOp = Op.getOperand(i);
3230	if (SrcOp.isUndef()) {
3231	KnownUndef.setBit(i);
3232	} else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3233	(isNullConstant(V: SrcOp) \|\| isNullFPConstant(V: SrcOp))) {
3234	KnownZero.setBit(i);
3235	}
3236	}
3237	break;
3238	}
3239	case ISD::CONCAT_VECTORS: {
3240	EVT SubVT = Op.getOperand(i: `0`).getValueType();
3241	unsigned NumSubVecs = Op.getNumOperands();
3242	unsigned NumSubElts = SubVT.getVectorNumElements();
3243	for (unsigned i = `0`; i != NumSubVecs; ++i) {
3244	SDValue SubOp = Op.getOperand(i);
3245	APInt SubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
3246	APInt SubUndef, SubZero;
3247	if (SimplifyDemandedVectorElts(Op: SubOp, OriginalDemandedElts: SubElts, KnownUndef&: SubUndef, KnownZero&: SubZero, TLO,
3248	Depth: Depth + `1`))
3249	return true;
3250	KnownUndef.insertBits(SubBits: SubUndef, bitPosition: i * NumSubElts);
3251	KnownZero.insertBits(SubBits: SubZero, bitPosition: i * NumSubElts);
3252	}
3253
3254	// Attempt to avoid multi-use ops if we don't need anything from them.
3255	if (!DemandedElts.isAllOnes()) {
3256	bool FoundNewSub = false;
3257	SmallVector<SDValue, `2`> DemandedSubOps;
3258	for (unsigned i = `0`; i != NumSubVecs; ++i) {
3259	SDValue SubOp = Op.getOperand(i);
3260	APInt SubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
3261	SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3262	Op: SubOp, DemandedElts: SubElts, DAG&: TLO.DAG, Depth: Depth + `1`);
3263	DemandedSubOps.push_back(Elt: NewSubOp ? NewSubOp : SubOp);
3264	FoundNewSub = NewSubOp ? true : FoundNewSub;
3265	}
3266	if (FoundNewSub) {
3267	SDValue NewOp =
3268	TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc (Op), VT, Ops: DemandedSubOps);
3269	return TLO.CombineTo(O: Op, N: NewOp);
3270	}
3271	}
3272	break;
3273	}
3274	case ISD::INSERT_SUBVECTOR: {
3275	// Demand any elements from the subvector and the remainder from the src its
3276	// inserted into.
3277	SDValue Src = Op.getOperand(i: `0`);
3278	SDValue Sub = Op.getOperand(i: `1`);
3279	uint64_t Idx = Op.getConstantOperandVal(i: `2`);
3280	unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3281	APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
3282	APInt DemandedSrcElts = DemandedElts;
3283	DemandedSrcElts.insertBits(SubBits: APInt::getZero(numBits: NumSubElts), bitPosition: Idx);
3284
3285	APInt SubUndef, SubZero;
3286	if (SimplifyDemandedVectorElts(Op: Sub, OriginalDemandedElts: DemandedSubElts, KnownUndef&: SubUndef, KnownZero&: SubZero, TLO,
3287	Depth: Depth + `1`))
3288	return true;
3289
3290	// If none of the src operand elements are demanded, replace it with undef.
3291	if (!DemandedSrcElts && !Src.isUndef())
3292	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT,
3293	N1: TLO.DAG.getUNDEF(VT), N2: Sub,
3294	N3: Op.getOperand(i: `2`)));
3295
3296	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef, KnownZero,
3297	TLO, Depth: Depth + `1`))
3298	return true;
3299	KnownUndef.insertBits(SubBits: SubUndef, bitPosition: Idx);
3300	KnownZero.insertBits(SubBits: SubZero, bitPosition: Idx);
3301
3302	// Attempt to avoid multi-use ops if we don't need anything from them.
3303	if (!DemandedSrcElts.isAllOnes() \|\| !DemandedSubElts.isAllOnes()) {
3304	SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3305	Op: Src, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`);
3306	SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3307	Op: Sub, DemandedElts: DemandedSubElts, DAG&: TLO.DAG, Depth: Depth + `1`);
3308	if (NewSrc \|\| NewSub) {
3309	NewSrc = NewSrc ? NewSrc : Src;
3310	NewSub = NewSub ? NewSub : Sub;
3311	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc (Op), VT, N1: NewSrc,
3312	N2: NewSub, N3: Op.getOperand(i: `2`));
3313	return TLO.CombineTo(O: Op, N: NewOp);
3314	}
3315	}
3316	break;
3317	}
3318	case ISD::EXTRACT_SUBVECTOR: {
3319	// Offset the demanded elts by the subvector index.
3320	SDValue Src = Op.getOperand(i: `0`);
3321	if (Src.getValueType().isScalableVector())
3322	break;
3323	uint64_t Idx = Op.getConstantOperandVal(i: `1`);
3324	unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3325	APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx);
3326
3327	APInt SrcUndef, SrcZero;
3328	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3329	Depth: Depth + `1`))
3330	return true;
3331	KnownUndef = SrcUndef.extractBits(numBits: NumElts, bitPosition: Idx);
3332	KnownZero = SrcZero.extractBits(numBits: NumElts, bitPosition: Idx);
3333
3334	// Attempt to avoid multi-use ops if we don't need anything from them.
3335	if (!DemandedElts.isAllOnes()) {
3336	SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3337	Op: Src, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`);
3338	if (NewSrc) {
3339	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc (Op), VT, N1: NewSrc,
3340	N2: Op.getOperand(i: `1`));
3341	return TLO.CombineTo(O: Op, N: NewOp);
3342	}
3343	}
3344	break;
3345	}
3346	case ISD::INSERT_VECTOR_ELT: {
3347	SDValue Vec = Op.getOperand(i: `0`);
3348	SDValue Scl = Op.getOperand(i: `1`);
3349	auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `2`));
3350
3351	// For a legal, constant insertion index, if we don't need this insertion
3352	// then strip it, else remove it from the demanded elts.
3353	if (CIdx && CIdx->getAPIntValue().ult(RHS: NumElts)) {
3354	unsigned Idx = CIdx->getZExtValue();
3355	if (!DemandedElts [Idx])
3356	return TLO.CombineTo(O: Op, N: Vec);
3357
3358	APInt DemandedVecElts(DemandedElts);
3359	DemandedVecElts.clearBit(BitPosition: Idx);
3360	if (SimplifyDemandedVectorElts(Op: Vec, OriginalDemandedElts: DemandedVecElts, KnownUndef,
3361	KnownZero, TLO, Depth: Depth + `1`))
3362	return true;
3363
3364	KnownUndef.setBitVal(BitPosition: Idx, BitValue: Scl.isUndef());
3365
3366	KnownZero.setBitVal(BitPosition: Idx, BitValue: isNullConstant(V: Scl) \|\| isNullFPConstant(V: Scl));
3367	break;
3368	}
3369
3370	APInt VecUndef, VecZero;
3371	if (SimplifyDemandedVectorElts(Op: Vec, OriginalDemandedElts: DemandedElts, KnownUndef&: VecUndef, KnownZero&: VecZero, TLO,
3372	Depth: Depth + `1`))
3373	return true;
3374	// Without knowing the insertion index we can't set KnownUndef/KnownZero.
3375	break;
3376	}
3377	case ISD::VSELECT: {
3378	SDValue Sel = Op.getOperand(i: `0`);
3379	SDValue LHS = Op.getOperand(i: `1`);
3380	SDValue RHS = Op.getOperand(i: `2`);
3381
3382	// Try to transform the select condition based on the current demanded
3383	// elements.
3384	APInt UndefSel, ZeroSel;
3385	if (SimplifyDemandedVectorElts(Op: Sel, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefSel, KnownZero&: ZeroSel, TLO,
3386	Depth: Depth + `1`))
3387	return true;
3388
3389	// See if we can simplify either vselect operand.
3390	APInt DemandedLHS(DemandedElts);
3391	APInt DemandedRHS(DemandedElts);
3392	APInt UndefLHS, ZeroLHS;
3393	APInt UndefRHS, ZeroRHS;
3394	if (SimplifyDemandedVectorElts(Op: LHS, OriginalDemandedElts: DemandedLHS, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3395	Depth: Depth + `1`))
3396	return true;
3397	if (SimplifyDemandedVectorElts(Op: RHS, OriginalDemandedElts: DemandedRHS, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3398	Depth: Depth + `1`))
3399	return true;
3400
3401	KnownUndef = UndefLHS & UndefRHS;
3402	KnownZero = ZeroLHS & ZeroRHS;
3403
3404	// If we know that the selected element is always zero, we don't need the
3405	// select value element.
3406	APInt DemandedSel = DemandedElts & ~KnownZero;
3407	if (DemandedSel != DemandedElts)
3408	if (SimplifyDemandedVectorElts(Op: Sel, OriginalDemandedElts: DemandedSel, KnownUndef&: UndefSel, KnownZero&: ZeroSel, TLO,
3409	Depth: Depth + `1`))
3410	return true;
3411
3412	break;
3413	}
3414	case ISD::VECTOR_SHUFFLE: {
3415	SDValue LHS = Op.getOperand(i: `0`);
3416	SDValue RHS = Op.getOperand(i: `1`);
3417	ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
3418
3419	// Collect demanded elements from shuffle operands..
3420	APInt DemandedLHS(NumElts, `0`);
3421	APInt DemandedRHS(NumElts, `0`);
3422	for (unsigned i = `0`; i != NumElts; ++i) {
3423	int M = ShuffleMask [i];
3424	if (M < `0` \|\| !DemandedElts [i])
3425	continue;
3426	assert(`0` <= M && M < (int)(`2` * NumElts) && "Shuffle index out of range");
3427	if (M < (int)NumElts)
3428	DemandedLHS.setBit(M);
3429	else
3430	DemandedRHS.setBit(M - NumElts);
3431	}
3432
3433	// See if we can simplify either shuffle operand.
3434	APInt UndefLHS, ZeroLHS;
3435	APInt UndefRHS, ZeroRHS;
3436	if (SimplifyDemandedVectorElts(Op: LHS, OriginalDemandedElts: DemandedLHS, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3437	Depth: Depth + `1`))
3438	return true;
3439	if (SimplifyDemandedVectorElts(Op: RHS, OriginalDemandedElts: DemandedRHS, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3440	Depth: Depth + `1`))
3441	return true;
3442
3443	// Simplify mask using undef elements from LHS/RHS.
3444	bool Updated = false;
3445	bool IdentityLHS = true, IdentityRHS = true;
3446	SmallVector<int, `32`> NewMask(ShuffleMask);
3447	for (unsigned i = `0`; i != NumElts; ++i) {
3448	int &M = NewMask [i];
3449	if (M < `0`)
3450	continue;
3451	if (!DemandedElts [i] \|\| (M < (int)NumElts && UndefLHS [M]) \|\|
3452	(M >= (int)NumElts && UndefRHS [M - NumElts])) {
3453	Updated = true;
3454	M = -`1`;
3455	}
3456	IdentityLHS &= (M < `0`) \|\| (M == (int)i);
3457	IdentityRHS &= (M < `0`) \|\| ((M - NumElts) == i);
3458	}
3459
3460	// Update legal shuffle masks based on demanded elements if it won't reduce
3461	// to Identity which can cause premature removal of the shuffle mask.
3462	if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3463	SDValue LegalShuffle =
3464	buildLegalVectorShuffle(VT, DL, N0: LHS, N1: RHS, Mask: NewMask, DAG&: TLO.DAG);
3465	if (LegalShuffle)
3466	return TLO.CombineTo(O: Op, N: LegalShuffle);
3467	}
3468
3469	// Propagate undef/zero elements from LHS/RHS.
3470	for (unsigned i = `0`; i != NumElts; ++i) {
3471	int M = ShuffleMask [i];
3472	if (M < `0`) {
3473	KnownUndef.setBit(i);
3474	} else if (M < (int)NumElts) {
3475	if (UndefLHS [M])
3476	KnownUndef.setBit(i);
3477	if (ZeroLHS [M])
3478	KnownZero.setBit(i);
3479	} else {
3480	if (UndefRHS [M - NumElts])
3481	KnownUndef.setBit(i);
3482	if (ZeroRHS [M - NumElts])
3483	KnownZero.setBit(i);
3484	}
3485	}
3486	break;
3487	}
3488	case ISD::ANY_EXTEND_VECTOR_INREG:
3489	case ISD::SIGN_EXTEND_VECTOR_INREG:
3490	case ISD::ZERO_EXTEND_VECTOR_INREG: {
3491	APInt SrcUndef, SrcZero;
3492	SDValue Src = Op.getOperand(i: `0`);
3493	unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3494	APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts);
3495	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3496	Depth: Depth + `1`))
3497	return true;
3498	KnownZero = SrcZero.zextOrTrunc(width: NumElts);
3499	KnownUndef = SrcUndef.zextOrTrunc(width: NumElts);
3500
3501	if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3502	Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3503	DemandedSrcElts == `1`) {
3504	// aext - if we just need the bottom element then we can bitcast.
3505	return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
3506	}
3507
3508	if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3509	// zext(undef) upper bits are guaranteed to be zero.
3510	if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3511	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: `0`, DL: SDLoc (Op), VT));
3512	KnownUndef.clearAllBits();
3513
3514	// zext - if we just need the bottom element then we can mask:
3515	// zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3516	if (IsLE && DemandedSrcElts == `1` && Src.getOpcode() == ISD::AND &&
3517	Op ->isOnlyUserOf(N: Src.getNode()) &&
3518	Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3519	SDLoc DL(Op);
3520	EVT SrcVT = Src.getValueType();
3521	EVT SrcSVT = SrcVT.getScalarType();
3522	SmallVector<SDValue> MaskElts;
3523	MaskElts.push_back(Elt: TLO.DAG.getAllOnesConstant(DL, VT: SrcSVT));
3524	MaskElts.append(NumInputs: NumSrcElts - `1`, Elt: TLO.DAG.getConstant(Val: `0`, DL, VT: SrcSVT));
3525	SDValue Mask = TLO.DAG.getBuildVector(VT: SrcVT, DL, Ops: MaskElts);
3526	if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3527	Opcode: ISD::AND, DL, VT: SrcVT, Ops: {Src.getOperand(i: `1`), Mask})) {
3528	Fold = TLO.DAG.getNode(Opcode: ISD::AND, DL, VT: SrcVT, N1: Src.getOperand(i: `0`), N2: Fold);
3529	return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Fold));
3530	}
3531	}
3532	}
3533	break;
3534	}
3535
3536	// TODO: There are more binop opcodes that could be handled here - MIN,
3537	// MAX, saturated math, etc.
3538	case ISD::ADD: {
3539	SDValue Op0 = Op.getOperand(i: `0`);
3540	SDValue Op1 = Op.getOperand(i: `1`);
3541	if (Op0 == Op1 && Op ->isOnlyUserOf(N: Op0.getNode())) {
3542	APInt UndefLHS, ZeroLHS;
3543	if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3544	Depth: Depth + `1`, /AssumeSingleUse/ true))
3545	return true;
3546	}
3547	[[fallthrough]];
3548	}
3549	case ISD::AVGCEILS:
3550	case ISD::AVGCEILU:
3551	case ISD::AVGFLOORS:
3552	case ISD::AVGFLOORU:
3553	case ISD::OR:
3554	case ISD::XOR:
3555	case ISD::SUB:
3556	case ISD::FADD:
3557	case ISD::FSUB:
3558	case ISD::FMUL:
3559	case ISD::FDIV:
3560	case ISD::FREM: {
3561	SDValue Op0 = Op.getOperand(i: `0`);
3562	SDValue Op1 = Op.getOperand(i: `1`);
3563
3564	APInt UndefRHS, ZeroRHS;
3565	if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3566	Depth: Depth + `1`))
3567	return true;
3568	APInt UndefLHS, ZeroLHS;
3569	if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3570	Depth: Depth + `1`))
3571	return true;
3572
3573	KnownZero = ZeroLHS & ZeroRHS;
3574	KnownUndef = getKnownUndefForVectorBinop(BO: Op, DAG&: TLO.DAG, UndefOp0: UndefLHS, UndefOp1: UndefRHS);
3575
3576	// Attempt to avoid multi-use ops if we don't need anything from them.
3577	// TODO - use KnownUndef to relax the demandedelts?
3578	if (!DemandedElts.isAllOnes())
3579	if (SimplifyDemandedVectorEltsBinOp (Op0, Op1))
3580	return true;
3581	break;
3582	}
3583	case ISD::SHL:
3584	case ISD::SRL:
3585	case ISD::SRA:
3586	case ISD::ROTL:
3587	case ISD::ROTR: {
3588	SDValue Op0 = Op.getOperand(i: `0`);
3589	SDValue Op1 = Op.getOperand(i: `1`);
3590
3591	APInt UndefRHS, ZeroRHS;
3592	if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3593	Depth: Depth + `1`))
3594	return true;
3595	APInt UndefLHS, ZeroLHS;
3596	if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3597	Depth: Depth + `1`))
3598	return true;
3599
3600	KnownZero = ZeroLHS;
3601	KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3602
3603	// Attempt to avoid multi-use ops if we don't need anything from them.
3604	// TODO - use KnownUndef to relax the demandedelts?
3605	if (!DemandedElts.isAllOnes())
3606	if (SimplifyDemandedVectorEltsBinOp (Op0, Op1))
3607	return true;
3608	break;
3609	}
3610	case ISD::MUL:
3611	case ISD::MULHU:
3612	case ISD::MULHS:
3613	case ISD::AND: {
3614	SDValue Op0 = Op.getOperand(i: `0`);
3615	SDValue Op1 = Op.getOperand(i: `1`);
3616
3617	APInt SrcUndef, SrcZero;
3618	if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3619	Depth: Depth + `1`))
3620	return true;
3621	// If we know that a demanded element was zero in Op1 we don't need to
3622	// demand it in Op0 - its guaranteed to be zero.
3623	APInt DemandedElts0 = DemandedElts & ~SrcZero;
3624	if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts0, KnownUndef, KnownZero,
3625	TLO, Depth: Depth + `1`))
3626	return true;
3627
3628	KnownUndef &= DemandedElts0;
3629	KnownZero &= DemandedElts0;
3630
3631	// If every element pair has a zero/undef then just fold to zero.
3632	// fold (and x, undef) -> 0 / (and x, 0) -> 0
3633	// fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3634	if (DemandedElts.isSubsetOf(RHS: SrcZero \| KnownZero \| SrcUndef \| KnownUndef))
3635	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: `0`, DL: SDLoc (Op), VT));
3636
3637	// If either side has a zero element, then the result element is zero, even
3638	// if the other is an UNDEF.
3639	// TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3640	// and then handle 'and' nodes with the rest of the binop opcodes.
3641	KnownZero \|= SrcZero;
3642	KnownUndef &= SrcUndef;
3643	KnownUndef &= ~KnownZero;
3644
3645	// Attempt to avoid multi-use ops if we don't need anything from them.
3646	if (!DemandedElts.isAllOnes())
3647	if (SimplifyDemandedVectorEltsBinOp (Op0, Op1))
3648	return true;
3649	break;
3650	}
3651	case ISD::TRUNCATE:
3652	case ISD::SIGN_EXTEND:
3653	case ISD::ZERO_EXTEND:
3654	if (SimplifyDemandedVectorElts(Op: Op.getOperand(i: `0`), OriginalDemandedElts: DemandedElts, KnownUndef,
3655	KnownZero, TLO, Depth: Depth + `1`))
3656	return true;
3657
3658	if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3659	// zext(undef) upper bits are guaranteed to be zero.
3660	if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3661	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: `0`, DL: SDLoc (Op), VT));
3662	KnownUndef.clearAllBits();
3663	}
3664	break;
3665	default: {
3666	if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3667	if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3668	KnownZero, TLO, Depth))
3669	return true;
3670	} else {
3671	KnownBits Known;
3672	APInt DemandedBits = APInt::getAllOnes(numBits: EltSizeInBits);
3673	if (SimplifyDemandedBits(Op, OriginalDemandedBits: DemandedBits, OriginalDemandedElts, Known,
3674	TLO, Depth, AssumeSingleUse))
3675	return true;
3676	}
3677	break;
3678	}
3679	}
3680	assert((KnownUndef & KnownZero) == `0` && "Elements flagged as undef AND zero");
3681
3682	// Constant fold all undef cases.
3683	// TODO: Handle zero cases as well.
3684	if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3685	return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3686
3687	return false;
3688	}
3689
3690	/// Determine which of the bits specified in Mask are known to be either zero or
3691	/// one and return them in the Known.
3692	void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3693	KnownBits &Known,
3694	const APInt &DemandedElts,
3695	const SelectionDAG &DAG,
3696	unsigned Depth) const {
3697	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3698	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3699	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3700	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3701	"Should use MaskedValueIsZero if you don't know whether Op"
3702	" is a target node!");
3703	Known.resetAll();
3704	}
3705
3706	void TargetLowering::computeKnownBitsForTargetInstr(
3707	GISelKnownBits &Analysis, Register R, KnownBits &Known,
3708	const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3709	unsigned Depth) const {
3710	Known.resetAll();
3711	}
3712
3713	void TargetLowering::computeKnownBitsForFrameIndex(
3714	const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3715	// The low bits are known zero if the pointer is aligned.
3716	Known.Zero.setLowBits(Log2(A: MF.getFrameInfo().getObjectAlign(ObjectIdx: FrameIdx)));
3717	}
3718
3719	Align TargetLowering::computeKnownAlignForTargetInstr(
3720	GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
3721	unsigned Depth) const {
3722	return Align (`1`);
3723	}
3724
3725	/// This method can be implemented by targets that want to expose additional
3726	/// information about sign bits to the DAG Combiner.
3727	unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3728	const APInt &,
3729	const SelectionDAG &,
3730	unsigned Depth) const {
3731	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3732	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3733	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3734	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3735	"Should use ComputeNumSignBits if you don't know whether Op"
3736	" is a target node!");
3737	return `1`;
3738	}
3739
3740	unsigned TargetLowering::computeNumSignBitsForTargetInstr(
3741	GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3742	const MachineRegisterInfo &MRI, unsigned Depth) const {
3743	return `1`;
3744	}
3745
3746	bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3747	SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3748	TargetLoweringOpt &TLO, unsigned Depth) const {
3749	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3750	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3751	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3752	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3753	"Should use SimplifyDemandedVectorElts if you don't know whether Op"
3754	" is a target node!");
3755	return false;
3756	}
3757
3758	bool TargetLowering::SimplifyDemandedBitsForTargetNode(
3759	SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3760	KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3761	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3762	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3763	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3764	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3765	"Should use SimplifyDemandedBits if you don't know whether Op"
3766	" is a target node!");
3767	computeKnownBitsForTargetNode(Op, Known, DemandedElts, DAG: TLO.DAG, Depth);
3768	return false;
3769	}
3770
3771	SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
3772	SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3773	SelectionDAG &DAG, unsigned Depth) const {
3774	assert(
3775	(Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3776	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3777	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3778	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3779	"Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3780	" is a target node!");
3781	return SDValue ();
3782	}
3783
3784	SDValue
3785	TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
3786	SDValue N1, MutableArrayRef<int> Mask,
3787	SelectionDAG &DAG) const {
3788	bool LegalMask = isShuffleMaskLegal(Mask, VT);
3789	if (!LegalMask) {
3790	std::swap(a&: N0, b&: N1);
3791	ShuffleVectorSDNode::commuteMask(Mask);
3792	LegalMask = isShuffleMaskLegal(Mask, VT);
3793	}
3794
3795	if (!LegalMask)
3796	return SDValue ();
3797
3798	return DAG.getVectorShuffle(VT, dl: DL, N1: N0, N2: N1, Mask);
3799	}
3800
3801	const Constant TargetLowering::getTargetConstantFromLoad(LoadSDNode) const {
3802	return nullptr;
3803	}
3804
3805	bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
3806	SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3807	bool PoisonOnly, unsigned Depth) const {
3808	assert(
3809	(Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3810	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3811	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3812	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3813	"Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3814	" is a target node!");
3815
3816	// If Op can't create undef/poison and none of its operands are undef/poison
3817	// then Op is never undef/poison.
3818	return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
3819	/ConsiderFlags/ true, Depth) &&
3820	all_of(Range: Op ->ops(), P: [&](SDValue V) {
3821	return DAG.isGuaranteedNotToBeUndefOrPoison(Op: V, PoisonOnly,
3822	Depth: Depth + `1`);
3823	});
3824	}
3825
3826	bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
3827	SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3828	bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3829	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3830	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3831	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3832	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3833	"Should use canCreateUndefOrPoison if you don't know whether Op"
3834	" is a target node!");
3835	// Be conservative and return true.
3836	return true;
3837	}
3838
3839	bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
3840	const SelectionDAG &DAG,
3841	bool SNaN,
3842	unsigned Depth) const {
3843	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3844	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3845	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3846	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3847	"Should use isKnownNeverNaN if you don't know whether Op"
3848	" is a target node!");
3849	return false;
3850	}
3851
3852	bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
3853	const APInt &DemandedElts,
3854	APInt &UndefElts,
3855	const SelectionDAG &DAG,
3856	unsigned Depth) const {
3857	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3858	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3859	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3860	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3861	"Should use isSplatValue if you don't know whether Op"
3862	" is a target node!");
3863	return false;
3864	}
3865
3866	// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3867	// work with truncating build vectors and vectors with elements of less than
3868	// 8 bits.
3869	bool TargetLowering::isConstTrueVal(SDValue N) const {
3870	if (!N)
3871	return false;
3872
3873	unsigned EltWidth;
3874	APInt CVal;
3875	if (ConstantSDNode CN = isConstOrConstSplat(N, /AllowUndefs=/*false,
3876	/AllowTruncation=/true)) {
3877	CVal = CN->getAPIntValue();
3878	EltWidth = N.getValueType().getScalarSizeInBits();
3879	} else
3880	return false;
3881
3882	// If this is a truncating splat, truncate the splat value.
3883	// Otherwise, we may fail to match the expected values below.
3884	if (EltWidth < CVal.getBitWidth())
3885	CVal = CVal.trunc(width: EltWidth);
3886
3887	switch (getBooleanContents(Type: N.getValueType())) {
3888	case UndefinedBooleanContent:
3889	return CVal [`0`];
3890	case ZeroOrOneBooleanContent:
3891	return CVal.isOne();
3892	case ZeroOrNegativeOneBooleanContent:
3893	return CVal.isAllOnes();
3894	}
3895
3896	llvm_unreachable("Invalid boolean contents");
3897	}
3898
3899	bool TargetLowering::isConstFalseVal(SDValue N) const {
3900	if (!N)
3901	return false;
3902
3903	const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: N);
3904	if (!CN) {
3905	const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Val&: N);
3906	if (!BV)
3907	return false;
3908
3909	// Only interested in constant splats, we don't care about undef
3910	// elements in identifying boolean constants and getConstantSplatNode
3911	// returns NULL if all ops are undef;
3912	CN = BV->getConstantSplatNode();
3913	if (!CN)
3914	return false;
3915	}
3916
3917	if (getBooleanContents(Type: N ->getValueType(ResNo: `0`)) == UndefinedBooleanContent)
3918	return !CN->getAPIntValue()[`0`];
3919
3920	return CN->isZero();
3921	}
3922
3923	bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
3924	bool SExt) const {
3925	if (VT == MVT::i1)
3926	return N->isOne();
3927
3928	TargetLowering::BooleanContent Cnt = getBooleanContents(Type: VT);
3929	switch (Cnt) {
3930	case TargetLowering::ZeroOrOneBooleanContent:
3931	// An extended value of 1 is always true, unless its original type is i1,
3932	// in which case it will be sign extended to -1.
3933	return (N->isOne() && !SExt) \|\| (SExt && (N->getValueType(`0`) != MVT::i1));
3934	case TargetLowering::UndefinedBooleanContent:
3935	case TargetLowering::ZeroOrNegativeOneBooleanContent:
3936	return N->isAllOnes() && SExt;
3937	}
3938	llvm_unreachable("Unexpected enumeration.");
3939	}
3940
3941	/// This helper function of SimplifySetCC tries to optimize the comparison when
3942	/// either operand of the SetCC node is a bitwise-and instruction.
3943	SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3944	ISD::CondCode Cond, const SDLoc &DL,
3945	DAGCombinerInfo &DCI) const {
3946	if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3947	std::swap(a&: N0, b&: N1);
3948
3949	SelectionDAG &DAG = DCI.DAG;
3950	EVT OpVT = N0.getValueType();
3951	if (N0.getOpcode() != ISD::AND \|\| !OpVT.isInteger() \|\|
3952	(Cond != ISD::SETEQ && Cond != ISD::SETNE))
3953	return SDValue ();
3954
3955	// (X & Y) != 0 --> zextOrTrunc(X & Y)
3956	// iff everything but LSB is known zero:
3957	if (Cond == ISD::SETNE && isNullConstant(V: N1) &&
3958	(getBooleanContents(Type: OpVT) == TargetLowering::UndefinedBooleanContent \|\|
3959	getBooleanContents(Type: OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
3960	unsigned NumEltBits = OpVT.getScalarSizeInBits();
3961	APInt UpperBits = APInt::getHighBitsSet(numBits: NumEltBits, hiBitsSet: NumEltBits - `1`);
3962	if (DAG.MaskedValueIsZero(Op: N0, Mask: UpperBits))
3963	return DAG.getBoolExtOrTrunc(Op: N0, SL: DL, VT, OpVT);
3964	}
3965
3966	// Try to eliminate a power-of-2 mask constant by converting to a signbit
3967	// test in a narrow type that we can truncate to with no cost. Examples:
3968	// (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
3969	// (i32 X & 32768) != 0 --> (trunc X to i16) < 0
3970	// TODO: This conservatively checks for type legality on the source and
3971	// destination types. That may inhibit optimizations, but it also
3972	// allows setcc->shift transforms that may be more beneficial.
3973	auto *AndC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`));
3974	if (AndC && isNullConstant(V: N1) && AndC->getAPIntValue().isPowerOf2() &&
3975	isTypeLegal(VT: OpVT) && N0.hasOneUse()) {
3976	EVT NarrowVT = EVT::getIntegerVT(Context&: *DAG.getContext(),
3977	BitWidth: AndC->getAPIntValue().getActiveBits());
3978	if (isTruncateFree(FromVT: OpVT, ToVT: NarrowVT) && isTypeLegal(VT: NarrowVT)) {
3979	SDValue Trunc = DAG.getZExtOrTrunc(Op: N0.getOperand(i: `0`), DL, VT: NarrowVT);
3980	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: NarrowVT);
3981	return DAG.getSetCC(DL, VT, LHS: Trunc, RHS: Zero,
3982	Cond: Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
3983	}
3984	}
3985
3986	// Match these patterns in any of their permutations:
3987	// (X & Y) == Y
3988	// (X & Y) != Y
3989	SDValue X, Y;
3990	if (N0.getOperand(i: `0`) == N1) {
3991	X = N0.getOperand(i: `1`);
3992	Y = N0.getOperand(i: `0`);
3993	} else if (N0.getOperand(i: `1`) == N1) {
3994	X = N0.getOperand(i: `0`);
3995	Y = N0.getOperand(i: `1`);
3996	} else {
3997	return SDValue ();
3998	}
3999
4000	// TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4001	// `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4002	// its liable to create and infinite loop.
4003	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: OpVT);
4004	if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4005	DAG.isKnownToBeAPowerOfTwo(Val: Y)) {
4006	// Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4007	// Note that where Y is variable and is known to have at most one bit set
4008	// (for example, if it is Z & 1) we cannot do this; the expressions are not
4009	// equivalent when Y == 0.
4010	assert(OpVT.isInteger());
4011	Cond = ISD::getSetCCInverse(Operation: Cond, Type: OpVT);
4012	if (DCI.isBeforeLegalizeOps() \|\|
4013	isCondCodeLegal(CC: Cond, VT: N0.getSimpleValueType()))
4014	return DAG.getSetCC(DL, VT, LHS: N0, RHS: Zero, Cond);
4015	} else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4016	// If the target supports an 'and-not' or 'and-complement' logic operation,
4017	// try to use that to make a comparison operation more efficient.
4018	// But don't do this transform if the mask is a single bit because there are
4019	// more efficient ways to deal with that case (for example, 'bt' on x86 or
4020	// 'rlwinm' on PPC).
4021
4022	// Bail out if the compare operand that we want to turn into a zero is
4023	// already a zero (otherwise, infinite loop).
4024	if (isNullConstant(V: Y))
4025	return SDValue ();
4026
4027	// Transform this into: ~X & Y == 0.
4028	SDValue NotX = DAG.getNOT(DL: SDLoc (X), Val: X, VT: OpVT);
4029	SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc (N0), VT: OpVT, N1: NotX, N2: Y);
4030	return DAG.getSetCC(DL, VT, LHS: NewAnd, RHS: Zero, Cond);
4031	}
4032
4033	return SDValue ();
4034	}
4035
4036	/// There are multiple IR patterns that could be checking whether certain
4037	/// truncation of a signed number would be lossy or not. The pattern which is
4038	/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4039	/// We are looking for the following pattern: (KeptBits is a constant)
4040	/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4041	/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4042	/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4043	/// We will unfold it into the natural trunc+sext pattern:
4044	/// ((%x << C) a>> C) dstcond %x
4045	/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4046	SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4047	EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4048	const SDLoc &DL) const {
4049	// We must be comparing with a constant.
4050	ConstantSDNode *C1;
4051	if (!(C1 = dyn_cast<ConstantSDNode>(Val&: N1)))
4052	return SDValue ();
4053
4054	// N0 should be: add %x, (1 << (KeptBits-1))
4055	if (N0 ->getOpcode() != ISD::ADD)
4056	return SDValue ();
4057
4058	// And we must be 'add'ing a constant.
4059	ConstantSDNode *C01;
4060	if (!(C01 = dyn_cast<ConstantSDNode>(Val: N0 ->getOperand(Num: `1`))))
4061	return SDValue ();
4062
4063	SDValue X = N0 ->getOperand(Num: `0`);
4064	EVT XVT = X.getValueType();
4065
4066	// Validate constants ...
4067
4068	APInt I1 = C1->getAPIntValue();
4069
4070	ISD::CondCode NewCond;
4071	if (Cond == ISD::CondCode::SETULT) {
4072	NewCond = ISD::CondCode::SETEQ;
4073	} else if (Cond == ISD::CondCode::SETULE) {
4074	NewCond = ISD::CondCode::SETEQ;
4075	// But need to 'canonicalize' the constant.
4076	I1 += `1`;
4077	} else if (Cond == ISD::CondCode::SETUGT) {
4078	NewCond = ISD::CondCode::SETNE;
4079	// But need to 'canonicalize' the constant.
4080	I1 += `1`;
4081	} else if (Cond == ISD::CondCode::SETUGE) {
4082	NewCond = ISD::CondCode::SETNE;
4083	} else
4084	return SDValue ();
4085
4086	APInt I01 = C01->getAPIntValue();
4087
4088	auto checkConstants = [&I1, &I01]() -> bool {
4089	// Both of them must be power-of-two, and the constant from setcc is bigger.
4090	return I1.ugt(RHS: I01) && I1.isPowerOf2() && I01.isPowerOf2();
4091	};
4092
4093	if (checkConstants ()) {
4094	// Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4095	} else {
4096	// What if we invert constants? (and the target predicate)
4097	I1.negate();
4098	I01.negate();
4099	assert(XVT.isInteger());
4100	NewCond = getSetCCInverse(Operation: NewCond, Type: XVT);
4101	if (!checkConstants ())
4102	return SDValue ();
4103	// Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4104	}
4105
4106	// They are power-of-two, so which bit is set?
4107	const unsigned KeptBits = I1.logBase2();
4108	const unsigned KeptBitsMinusOne = I01.logBase2();
4109
4110	// Magic!
4111	if (KeptBits != (KeptBitsMinusOne + `1`))
4112	return SDValue ();
4113	assert(KeptBits > `0` && KeptBits < XVT.getSizeInBits() && "unreachable");
4114
4115	// We don't want to do this in every single case.
4116	SelectionDAG &DAG = DCI.DAG;
4117	if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
4118	XVT, KeptBits))
4119	return SDValue ();
4120
4121	// Unfold into: sext_inreg(%x) cond %x
4122	// Where 'cond' will be either 'eq' or 'ne'.
4123	SDValue SExtInReg = DAG.getNode(
4124	Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: XVT, N1: X,
4125	N2: DAG.getValueType(EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: KeptBits)));
4126	return DAG.getSetCC(DL, VT: SCCVT, LHS: SExtInReg, RHS: X, Cond: NewCond);
4127	}
4128
4129	// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4130	SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4131	EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4132	DAGCombinerInfo &DCI, const SDLoc &DL) const {
4133	assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() &&
4134	"Should be a comparison with 0.");
4135	assert((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4136	"Valid only for [in]equality comparisons.");
4137
4138	unsigned NewShiftOpcode;
4139	SDValue X, C, Y;
4140
4141	SelectionDAG &DAG = DCI.DAG;
4142	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4143
4144	// Look for '(C l>>/<< Y)'.
4145	auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
4146	// The shift should be one-use.
4147	if (!V.hasOneUse())
4148	return false;
4149	unsigned OldShiftOpcode = V.getOpcode();
4150	switch (OldShiftOpcode) {
4151	case ISD::SHL:
4152	NewShiftOpcode = ISD::SRL;
4153	break;
4154	case ISD::SRL:
4155	NewShiftOpcode = ISD::SHL;
4156	break;
4157	default:
4158	return false; // must be a logical shift.
4159	}
4160	// We should be shifting a constant.
4161	// FIXME: best to use isConstantOrConstantVector().
4162	C = V.getOperand(i: `0`);
4163	ConstantSDNode *CC =
4164	isConstOrConstSplat(N: C, /AllowUndefs=/true, /AllowTruncation=/true);
4165	if (!CC)
4166	return false;
4167	Y = V.getOperand(i: `1`);
4168
4169	ConstantSDNode *XC =
4170	isConstOrConstSplat(N: X, /AllowUndefs=/true, /AllowTruncation=/true);
4171	return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4172	X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4173	};
4174
4175	// LHS of comparison should be an one-use 'and'.
4176	if (N0.getOpcode() != ISD::AND \|\| !N0.hasOneUse())
4177	return SDValue ();
4178
4179	X = N0.getOperand(i: `0`);
4180	SDValue Mask = N0.getOperand(i: `1`);
4181
4182	// 'and' is commutative!
4183	if (!Match (Mask)) {
4184	std::swap(a&: X, b&: Mask);
4185	if (!Match (Mask))
4186	return SDValue ();
4187	}
4188
4189	EVT VT = X.getValueType();
4190
4191	// Produce:
4192	// ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4193	SDValue T0 = DAG.getNode(Opcode: NewShiftOpcode, DL, VT, N1: X, N2: Y);
4194	SDValue T1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: T0, N2: C);
4195	SDValue T2 = DAG.getSetCC(DL, VT: SCCVT, LHS: T1, RHS: N1C, Cond);
4196	return T2;
4197	}
4198
4199	/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4200	/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4201	/// handle the commuted versions of these patterns.
4202	SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4203	ISD::CondCode Cond, const SDLoc &DL,
4204	DAGCombinerInfo &DCI) const {
4205	unsigned BOpcode = N0.getOpcode();
4206	assert((BOpcode == ISD::ADD \|\| BOpcode == ISD::SUB \|\| BOpcode == ISD::XOR) &&
4207	"Unexpected binop");
4208	assert((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) && "Unexpected condcode");
4209
4210	// (X + Y) == X --> Y == 0
4211	// (X - Y) == X --> Y == 0
4212	// (X ^ Y) == X --> Y == 0
4213	SelectionDAG &DAG = DCI.DAG;
4214	EVT OpVT = N0.getValueType();
4215	SDValue X = N0.getOperand(i: `0`);
4216	SDValue Y = N0.getOperand(i: `1`);
4217	if (X == N1)
4218	return DAG.getSetCC(DL, VT, LHS: Y, RHS: DAG.getConstant(Val: `0`, DL, VT: OpVT), Cond);
4219
4220	if (Y != N1)
4221	return SDValue ();
4222
4223	// (X + Y) == Y --> X == 0
4224	// (X ^ Y) == Y --> X == 0
4225	if (BOpcode == ISD::ADD \|\| BOpcode == ISD::XOR)
4226	return DAG.getSetCC(DL, VT, LHS: X, RHS: DAG.getConstant(Val: `0`, DL, VT: OpVT), Cond);
4227
4228	// The shift would not be valid if the operands are boolean (i1).
4229	if (!N0.hasOneUse() \|\| OpVT.getScalarSizeInBits() == `1`)
4230	return SDValue ();
4231
4232	// (X - Y) == Y --> X == Y << 1
4233	SDValue One =
4234	DAG.getShiftAmountConstant(Val: `1`, VT: OpVT, DL, LegalTypes: !DCI.isBeforeLegalize());
4235	SDValue YShl1 = DAG.getNode(Opcode: ISD::SHL, DL, VT: N1.getValueType(), N1: Y, N2: One);
4236	if (!DCI.isCalledByLegalizer())
4237	DCI.AddToWorklist(N: YShl1.getNode());
4238	return DAG.getSetCC(DL, VT, LHS: X, RHS: YShl1, Cond);
4239	}
4240
4241	static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
4242	SDValue N0, const APInt &C1,
4243	ISD::CondCode Cond, const SDLoc &dl,
4244	SelectionDAG &DAG) {
4245	// Look through truncs that don't change the value of a ctpop.
4246	// FIXME: Add vector support? Need to be careful with setcc result type below.
4247	SDValue CTPOP = N0;
4248	if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4249	N0.getScalarValueSizeInBits() > Log2_32(Value: N0.getOperand(i: `0`).getScalarValueSizeInBits()))
4250	CTPOP = N0.getOperand(i: `0`);
4251
4252	if (CTPOP.getOpcode() != ISD::CTPOP \|\| !CTPOP.hasOneUse())
4253	return SDValue ();
4254
4255	EVT CTVT = CTPOP.getValueType();
4256	SDValue CTOp = CTPOP.getOperand(i: `0`);
4257
4258	// Expand a power-of-2-or-zero comparison based on ctpop:
4259	// (ctpop x) u< 2 -> (x & x-1) == 0
4260	// (ctpop x) u> 1 -> (x & x-1) != 0
4261	if (Cond == ISD::SETULT \|\| Cond == ISD::SETUGT) {
4262	// Keep the CTPOP if it is a cheap vector op.
4263	if (CTVT.isVector() && TLI.isCtpopFast(VT: CTVT))
4264	return SDValue ();
4265
4266	unsigned CostLimit = TLI.getCustomCtpopCost(VT: CTVT, Cond);
4267	if (C1.ugt(RHS: CostLimit + (Cond == ISD::SETULT)))
4268	return SDValue ();
4269	if (C1 == `0` && (Cond == ISD::SETULT))
4270	return SDValue (); // This is handled elsewhere.
4271
4272	unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4273
4274	SDValue NegOne = DAG.getAllOnesConstant(DL: dl, VT: CTVT);
4275	SDValue Result = CTOp;
4276	for (unsigned i = `0`; i < Passes; i++) {
4277	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: CTVT, N1: Result, N2: NegOne);
4278	Result = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: CTVT, N1: Result, N2: Add);
4279	}
4280	ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
4281	return DAG.getSetCC(DL: dl, VT, LHS: Result, RHS: DAG.getConstant(Val: `0`, DL: dl, VT: CTVT), Cond: CC);
4282	}
4283
4284	// Expand a power-of-2 comparison based on ctpop
4285	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) && C1 == `1`) {
4286	// Keep the CTPOP if it is cheap.
4287	if (TLI.isCtpopFast(VT: CTVT))
4288	return SDValue ();
4289
4290	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: CTVT);
4291	SDValue NegOne = DAG.getAllOnesConstant(DL: dl, VT: CTVT);
4292	assert(CTVT.isInteger());
4293	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: CTVT, N1: CTOp, N2: NegOne);
4294
4295	// Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4296	// check before emitting a potentially unnecessary op.
4297	if (DAG.isKnownNeverZero(Op: CTOp)) {
4298	// (ctpop x) == 1 --> (x & x-1) == 0
4299	// (ctpop x) != 1 --> (x & x-1) != 0
4300	SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: CTVT, N1: CTOp, N2: Add);
4301	SDValue RHS = DAG.getSetCC(DL: dl, VT, LHS: And, RHS: Zero, Cond);
4302	return RHS;
4303	}
4304
4305	// (ctpop x) == 1 --> (x ^ x-1) > x-1
4306	// (ctpop x) != 1 --> (x ^ x-1) <= x-1
4307	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: CTVT, N1: CTOp, N2: Add);
4308	ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE;
4309	return DAG.getSetCC(DL: dl, VT, LHS: Xor, RHS: Add, Cond: CmpCond);
4310	}
4311
4312	return SDValue ();
4313	}
4314
4315	static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
4316	ISD::CondCode Cond, const SDLoc &dl,
4317	SelectionDAG &DAG) {
4318	if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4319	return SDValue ();
4320
4321	auto C1 = isConstOrConstSplat(N: N1, /* AllowUndefs / true);
4322	if (!C1 \|\| !(C1->isZero() \|\| C1->isAllOnes()))
4323	return SDValue ();
4324
4325	auto getRotateSource = [](SDValue X) {
4326	if (X.getOpcode() == ISD::ROTL \|\| X.getOpcode() == ISD::ROTR)
4327	return X.getOperand(i: `0`);
4328	return SDValue ();
4329	};
4330
4331	// Peek through a rotated value compared against 0 or -1:
4332	// (rot X, Y) == 0/-1 --> X == 0/-1
4333	// (rot X, Y) != 0/-1 --> X != 0/-1
4334	if (SDValue R = getRotateSource (N0))
4335	return DAG.getSetCC(DL: dl, VT, LHS: R, RHS: N1, Cond);
4336
4337	// Peek through an 'or' of a rotated value compared against 0:
4338	// or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4339	// or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4340	//
4341	// TODO: Add the 'and' with -1 sibling.
4342	// TODO: Recurse through a series of 'or' ops to find the rotate.
4343	EVT OpVT = N0.getValueType();
4344	if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4345	if (SDValue R = getRotateSource (N0.getOperand(i: `0`))) {
4346	SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: R, N2: N0.getOperand(i: `1`));
4347	return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4348	}
4349	if (SDValue R = getRotateSource (N0.getOperand(i: `1`))) {
4350	SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: R, N2: N0.getOperand(i: `0`));
4351	return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4352	}
4353	}
4354
4355	return SDValue ();
4356	}
4357
4358	static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
4359	ISD::CondCode Cond, const SDLoc &dl,
4360	SelectionDAG &DAG) {
4361	// If we are testing for all-bits-clear, we might be able to do that with
4362	// less shifting since bit-order does not matter.
4363	if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4364	return SDValue ();
4365
4366	auto C1 = isConstOrConstSplat(N: N1, /* AllowUndefs / true);
4367	if (!C1 \|\| !C1->isZero())
4368	return SDValue ();
4369
4370	if (!N0.hasOneUse() \|\|
4371	(N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4372	return SDValue ();
4373
4374	unsigned BitWidth = N0.getScalarValueSizeInBits();
4375	auto *ShAmtC = isConstOrConstSplat(N: N0.getOperand(i: `2`));
4376	if (!ShAmtC \|\| ShAmtC->getAPIntValue().uge(RHS: BitWidth))
4377	return SDValue ();
4378
4379	// Canonicalize fshr as fshl to reduce pattern-matching.
4380	unsigned ShAmt = ShAmtC->getZExtValue();
4381	if (N0.getOpcode() == ISD::FSHR)
4382	ShAmt = BitWidth - ShAmt;
4383
4384	// Match an 'or' with a specific operand 'Other' in either commuted variant.
4385	SDValue X, Y;
4386	auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4387	if (Or.getOpcode() != ISD::OR \|\| !Or.hasOneUse())
4388	return false;
4389	if (Or.getOperand(i: `0`) == Other) {
4390	X = Or.getOperand(i: `0`);
4391	Y = Or.getOperand(i: `1`);
4392	return true;
4393	}
4394	if (Or.getOperand(i: `1`) == Other) {
4395	X = Or.getOperand(i: `1`);
4396	Y = Or.getOperand(i: `0`);
4397	return true;
4398	}
4399	return false;
4400	};
4401
4402	EVT OpVT = N0.getValueType();
4403	EVT ShAmtVT = N0.getOperand(i: `2`).getValueType();
4404	SDValue F0 = N0.getOperand(i: `0`);
4405	SDValue F1 = N0.getOperand(i: `1`);
4406	if (matchOr (F0, F1)) {
4407	// fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4408	SDValue NewShAmt = DAG.getConstant(Val: ShAmt, DL: dl, VT: ShAmtVT);
4409	SDValue Shift = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: OpVT, N1: Y, N2: NewShAmt);
4410	SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: Shift, N2: X);
4411	return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4412	}
4413	if (matchOr (F1, F0)) {
4414	// fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4415	SDValue NewShAmt = DAG.getConstant(Val: BitWidth - ShAmt, DL: dl, VT: ShAmtVT);
4416	SDValue Shift = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: OpVT, N1: Y, N2: NewShAmt);
4417	SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: Shift, N2: X);
4418	return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4419	}
4420
4421	return SDValue ();
4422	}
4423
4424	/// Try to simplify a setcc built with the specified operands and cc. If it is
4425	/// unable to simplify it, return a null SDValue.
4426	SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
4427	ISD::CondCode Cond, bool foldBooleans,
4428	DAGCombinerInfo &DCI,
4429	const SDLoc &dl) const {
4430	SelectionDAG &DAG = DCI.DAG;
4431	const DataLayout &Layout = DAG.getDataLayout();
4432	EVT OpVT = N0.getValueType();
4433	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4434
4435	// Constant fold or commute setcc.
4436	if (SDValue Fold = DAG.FoldSetCC(VT, N1: N0, N2: N1, Cond, dl))
4437	return Fold;
4438
4439	bool N0ConstOrSplat =
4440	isConstOrConstSplat(N: N0, /AllowUndefs/ false, /AllowTruncate/ AllowTruncation: true);
4441	bool N1ConstOrSplat =
4442	isConstOrConstSplat(N: N1, /AllowUndefs/ false, /AllowTruncate/ AllowTruncation: true);
4443
4444	// Canonicalize toward having the constant on the RHS.
4445	// TODO: Handle non-splat vector constants. All undef causes trouble.
4446	// FIXME: We can't yet fold constant scalable vector splats, so avoid an
4447	// infinite loop here when we encounter one.
4448	ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Operation: Cond);
4449	if (N0ConstOrSplat && !N1ConstOrSplat &&
4450	(DCI.isBeforeLegalizeOps() \|\|
4451	isCondCodeLegal(CC: SwappedCC, VT: N0.getSimpleValueType())))
4452	return DAG.getSetCC(DL: dl, VT, LHS: N1, RHS: N0, Cond: SwappedCC);
4453
4454	// If we have a subtract with the same 2 non-constant operands as this setcc
4455	// -- but in reverse order -- then try to commute the operands of this setcc
4456	// to match. A matching pair of setcc (cmp) and sub may be combined into 1
4457	// instruction on some targets.
4458	if (!N0ConstOrSplat && !N1ConstOrSplat &&
4459	(DCI.isBeforeLegalizeOps() \|\|
4460	isCondCodeLegal(CC: SwappedCC, VT: N0.getSimpleValueType())) &&
4461	DAG.doesNodeExist(Opcode: ISD::SUB, VTList: DAG.getVTList(VT: OpVT), Ops: {N1, N0}) &&
4462	!DAG.doesNodeExist(Opcode: ISD::SUB, VTList: DAG.getVTList(VT: OpVT), Ops: {N0, N1}))
4463	return DAG.getSetCC(DL: dl, VT, LHS: N1, RHS: N0, Cond: SwappedCC);
4464
4465	if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4466	return V;
4467
4468	if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4469	return V;
4470
4471	if (auto *N1C = isConstOrConstSplat(N: N1)) {
4472	const APInt &C1 = N1C->getAPIntValue();
4473
4474	// Optimize some CTPOP cases.
4475	if (SDValue V = simplifySetCCWithCTPOP(TLI: *this, VT, N0, C1, Cond, dl, DAG))
4476	return V;
4477
4478	// For equality to 0 of a no-wrap multiply, decompose and test each op:
4479	// X Y == 0 --> (X == 0) \|\| (Y == 0)*
4480	// X Y != 0 --> (X != 0) && (Y != 0)*
4481	// TODO: This bails out if minsize is set, but if the target doesn't have a
4482	// single instruction multiply for this type, it would likely be
4483	// smaller to decompose.
4484	if (C1.isZero() && (Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4485	N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4486	(N0->getFlags().hasNoUnsignedWrap() \|\|
4487	N0->getFlags().hasNoSignedWrap()) &&
4488	!Attr.hasFnAttr(Attribute::MinSize)) {
4489	SDValue IsXZero = DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N1, Cond);
4490	SDValue IsYZero = DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `1`), RHS: N1, Cond);
4491	unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4492	return DAG.getNode(Opcode: LogicOp, DL: dl, VT, N1: IsXZero, N2: IsYZero);
4493	}
4494
4495	// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4496	// equality comparison, then we're just comparing whether X itself is
4497	// zero.
4498	if (N0.getOpcode() == ISD::SRL && (C1.isZero() \|\| C1.isOne()) &&
4499	N0.getOperand(i: `0`).getOpcode() == ISD::CTLZ &&
4500	llvm::has_single_bit<uint32_t>(Value: N0.getScalarValueSizeInBits())) {
4501	if (ConstantSDNode *ShAmt = isConstOrConstSplat(N: N0.getOperand(i: `1`))) {
4502	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4503	ShAmt->getAPIntValue() == Log2_32(Value: N0.getScalarValueSizeInBits())) {
4504	if ((C1 == `0`) == (Cond == ISD::SETEQ)) {
4505	// (srl (ctlz x), 5) == 0 -> X != 0
4506	// (srl (ctlz x), 5) != 1 -> X != 0
4507	Cond = ISD::SETNE;
4508	} else {
4509	// (srl (ctlz x), 5) != 0 -> X == 0
4510	// (srl (ctlz x), 5) == 1 -> X == 0
4511	Cond = ISD::SETEQ;
4512	}
4513	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: N0.getValueType());
4514	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`).getOperand(i: `0`), RHS: Zero,
4515	Cond);
4516	}
4517	}
4518	}
4519	}
4520
4521	// FIXME: Support vectors.
4522	if (auto *N1C = dyn_cast<ConstantSDNode>(Val: N1.getNode())) {
4523	const APInt &C1 = N1C->getAPIntValue();
4524
4525	// (zext x) == C --> x == (trunc C)
4526	// (sext x) == C --> x == (trunc C)
4527	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4528	DCI.isBeforeLegalize() && N0 ->hasOneUse()) {
4529	unsigned MinBits = N0.getValueSizeInBits();
4530	SDValue PreExt;
4531	bool Signed = false;
4532	if (N0 ->getOpcode() == ISD::ZERO_EXTEND) {
4533	// ZExt
4534	MinBits = N0 ->getOperand(Num: `0`).getValueSizeInBits();
4535	PreExt = N0 ->getOperand(Num: `0`);
4536	} else if (N0 ->getOpcode() == ISD::AND) {
4537	// DAGCombine turns costly ZExts into ANDs
4538	if (auto *C = dyn_cast<ConstantSDNode>(Val: N0 ->getOperand(Num: `1`)))
4539	if ((C->getAPIntValue()+`1`).isPowerOf2()) {
4540	MinBits = C->getAPIntValue().countr_one();
4541	PreExt = N0 ->getOperand(Num: `0`);
4542	}
4543	} else if (N0 ->getOpcode() == ISD::SIGN_EXTEND) {
4544	// SExt
4545	MinBits = N0 ->getOperand(Num: `0`).getValueSizeInBits();
4546	PreExt = N0 ->getOperand(Num: `0`);
4547	Signed = true;
4548	} else if (auto *LN0 = dyn_cast<LoadSDNode>(Val&: N0)) {
4549	// ZEXTLOAD / SEXTLOAD
4550	if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4551	MinBits = LN0->getMemoryVT().getSizeInBits();
4552	PreExt = N0;
4553	} else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4554	Signed = true;
4555	MinBits = LN0->getMemoryVT().getSizeInBits();
4556	PreExt = N0;
4557	}
4558	}
4559
4560	// Figure out how many bits we need to preserve this constant.
4561	unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4562
4563	// Make sure we're not losing bits from the constant.
4564	if (MinBits > `0` &&
4565	MinBits < C1.getBitWidth() &&
4566	MinBits >= ReqdBits) {
4567	EVT MinVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MinBits);
4568	if (isTypeDesirableForOp(ISD::SETCC, VT: MinVT)) {
4569	// Will get folded away.
4570	SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MinVT, Operand: PreExt);
4571	if (MinBits == `1` && C1 == `1`)
4572	// Invert the condition.
4573	return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(`0`, dl, MVT::i1),
4574	Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4575	SDValue C = DAG.getConstant(Val: C1.trunc(width: MinBits), DL: dl, VT: MinVT);
4576	return DAG.getSetCC(DL: dl, VT, LHS: Trunc, RHS: C, Cond);
4577	}
4578
4579	// If truncating the setcc operands is not desirable, we can still
4580	// simplify the expression in some cases:
4581	// setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4582	// setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4583	// setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4584	// setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4585	// setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4586	// setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4587	SDValue TopSetCC = N0 ->getOperand(Num: `0`);
4588	unsigned N0Opc = N0 ->getOpcode();
4589	bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4590	if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4591	TopSetCC.getOpcode() == ISD::SETCC &&
4592	(N0Opc == ISD::ZERO_EXTEND \|\| N0Opc == ISD::SIGN_EXTEND) &&
4593	(isConstFalseVal(N1) \|\|
4594	isExtendedTrueVal(N1C, N0->getValueType(`0`), SExt))) {
4595
4596	bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) \|\|
4597	(!N1C->isZero() && Cond == ISD::SETNE);
4598
4599	if (!Inverse)
4600	return TopSetCC;
4601
4602	ISD::CondCode InvCond = ISD::getSetCCInverse(
4603	Operation: cast<CondCodeSDNode>(Val: TopSetCC.getOperand(i: `2`))->get(),
4604	Type: TopSetCC.getOperand(i: `0`).getValueType());
4605	return DAG.getSetCC(DL: dl, VT, LHS: TopSetCC.getOperand(i: `0`),
4606	RHS: TopSetCC.getOperand(i: `1`),
4607	Cond: InvCond);
4608	}
4609	}
4610	}
4611
4612	// If the LHS is '(and load, const)', the RHS is 0, the test is for
4613	// equality or unsigned, and all 1 bits of the const are in the same
4614	// partial word, see if we can shorten the load.
4615	if (DCI.isBeforeLegalize() &&
4616	!ISD::isSignedIntSetCC(Code: Cond) &&
4617	N0.getOpcode() == ISD::AND && C1 == `0` &&
4618	N0.getNode()->hasOneUse() &&
4619	isa<LoadSDNode>(Val: N0.getOperand(i: `0`)) &&
4620	N0.getOperand(i: `0`).getNode()->hasOneUse() &&
4621	isa<ConstantSDNode>(Val: N0.getOperand(i: `1`))) {
4622	LoadSDNode *Lod = cast<LoadSDNode>(Val: N0.getOperand(i: `0`));
4623	APInt bestMask;
4624	unsigned bestWidth = `0`, bestOffset = `0`;
4625	if (Lod->isSimple() && Lod->isUnindexed() &&
4626	(Lod->getMemoryVT().isByteSized() \|\|
4627	isPaddedAtMostSignificantBitsWhenStored(VT: Lod->getMemoryVT()))) {
4628	unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4629	unsigned origWidth = N0.getValueSizeInBits();
4630	unsigned maskWidth = origWidth;
4631	// We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4632	// 8 bits, but have to be careful...
4633	if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4634	origWidth = Lod->getMemoryVT().getSizeInBits();
4635	const APInt &Mask = N0.getConstantOperandAPInt(i: `1`);
4636	// Only consider power-of-2 widths (and at least one byte) as candiates
4637	// for the narrowed load.
4638	for (unsigned width = `8`; width < origWidth; width *= `2`) {
4639	EVT newVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: width);
4640	if (!shouldReduceLoadWidth(Load: Lod, ExtTy: ISD::NON_EXTLOAD, NewVT: newVT))
4641	continue;
4642	APInt newMask = APInt::getLowBitsSet(numBits: maskWidth, loBitsSet: width);
4643	// Avoid accessing any padding here for now (we could use memWidth
4644	// instead of origWidth here otherwise).
4645	unsigned maxOffset = origWidth - width;
4646	for (unsigned offset = `0`; offset <= maxOffset; offset += `8`) {
4647	if (Mask.isSubsetOf(RHS: newMask)) {
4648	unsigned ptrOffset =
4649	Layout.isLittleEndian() ? offset : memWidth - width - offset;
4650	unsigned IsFast = `0`;
4651	Align NewAlign = commonAlignment(A: Lod->getAlign(), Offset: ptrOffset / `8`);
4652	if (allowsMemoryAccess(
4653	Context&: *DAG.getContext(), DL: Layout, VT: newVT, AddrSpace: Lod->getAddressSpace(),
4654	Alignment: NewAlign, Flags: Lod->getMemOperand()->getFlags(), Fast: &IsFast) &&
4655	IsFast) {
4656	bestOffset = ptrOffset / `8`;
4657	bestMask = Mask.lshr(shiftAmt: offset);
4658	bestWidth = width;
4659	break;
4660	}
4661	}
4662	newMask <<= `8`;
4663	}
4664	if (bestWidth)
4665	break;
4666	}
4667	}
4668	if (bestWidth) {
4669	EVT newVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: bestWidth);
4670	SDValue Ptr = Lod->getBasePtr();
4671	if (bestOffset != `0`)
4672	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: bestOffset));
4673	SDValue NewLoad =
4674	DAG.getLoad(VT: newVT, dl, Chain: Lod->getChain(), Ptr,
4675	PtrInfo: Lod->getPointerInfo().getWithOffset(O: bestOffset),
4676	Alignment: Lod->getOriginalAlign());
4677	SDValue And =
4678	DAG.getNode(Opcode: ISD::AND, DL: dl, VT: newVT, N1: NewLoad,
4679	N2: DAG.getConstant(Val: bestMask.trunc(width: bestWidth), DL: dl, VT: newVT));
4680	return DAG.getSetCC(DL: dl, VT, LHS: And, RHS: DAG.getConstant(Val: `0LL`, DL: dl, VT: newVT), Cond);
4681	}
4682	}
4683
4684	// If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4685	if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4686	unsigned InSize = N0.getOperand(i: `0`).getValueSizeInBits();
4687
4688	// If the comparison constant has bits in the upper part, the
4689	// zero-extended value could never match.
4690	if (C1.intersects(RHS: APInt::getHighBitsSet(numBits: C1.getBitWidth(),
4691	hiBitsSet: C1.getBitWidth() - InSize))) {
4692	switch (Cond) {
4693	case ISD::SETUGT:
4694	case ISD::SETUGE:
4695	case ISD::SETEQ:
4696	return DAG.getConstant(Val: `0`, DL: dl, VT);
4697	case ISD::SETULT:
4698	case ISD::SETULE:
4699	case ISD::SETNE:
4700	return DAG.getConstant(Val: `1`, DL: dl, VT);
4701	case ISD::SETGT:
4702	case ISD::SETGE:
4703	// True if the sign bit of C1 is set.
4704	return DAG.getConstant(Val: C1.isNegative(), DL: dl, VT);
4705	case ISD::SETLT:
4706	case ISD::SETLE:
4707	// True if the sign bit of C1 isn't set.
4708	return DAG.getConstant(Val: C1.isNonNegative(), DL: dl, VT);
4709	default:
4710	break;
4711	}
4712	}
4713
4714	// Otherwise, we can perform the comparison with the low bits.
4715	switch (Cond) {
4716	case ISD::SETEQ:
4717	case ISD::SETNE:
4718	case ISD::SETUGT:
4719	case ISD::SETUGE:
4720	case ISD::SETULT:
4721	case ISD::SETULE: {
4722	EVT newVT = N0.getOperand(i: `0`).getValueType();
4723	if (DCI.isBeforeLegalizeOps() \|\|
4724	(isOperationLegal(Op: ISD::SETCC, VT: newVT) &&
4725	isCondCodeLegal(CC: Cond, VT: newVT.getSimpleVT()))) {
4726	EVT NewSetCCVT = getSetCCResultType(DL: Layout, Context&: *DAG.getContext(), VT: newVT);
4727	SDValue NewConst = DAG.getConstant(Val: C1.trunc(width: InSize), DL: dl, VT: newVT);
4728
4729	SDValue NewSetCC = DAG.getSetCC(DL: dl, VT: NewSetCCVT, LHS: N0.getOperand(i: `0`),
4730	RHS: NewConst, Cond);
4731	return DAG.getBoolExtOrTrunc(Op: NewSetCC, SL: dl, VT, OpVT: N0.getValueType());
4732	}
4733	break;
4734	}
4735	default:
4736	break; // todo, be more careful with signed comparisons
4737	}
4738	} else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4739	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4740	!isSExtCheaperThanZExt(FromTy: cast<VTSDNode>(Val: N0.getOperand(i: `1`))->getVT(),
4741	ToTy: OpVT)) {
4742	EVT ExtSrcTy = cast<VTSDNode>(Val: N0.getOperand(i: `1`))->getVT();
4743	unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4744	EVT ExtDstTy = N0.getValueType();
4745	unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4746
4747	// If the constant doesn't fit into the number of bits for the source of
4748	// the sign extension, it is impossible for both sides to be equal.
4749	if (C1.getSignificantBits() > ExtSrcTyBits)
4750	return DAG.getBoolConstant(V: Cond == ISD::SETNE, DL: dl, VT, OpVT);
4751
4752	assert(ExtDstTy == N0.getOperand(`0`).getValueType() &&
4753	ExtDstTy != ExtSrcTy && "Unexpected types!");
4754	APInt Imm = APInt::getLowBitsSet(numBits: ExtDstTyBits, loBitsSet: ExtSrcTyBits);
4755	SDValue ZextOp = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ExtDstTy, N1: N0.getOperand(i: `0`),
4756	N2: DAG.getConstant(Val: Imm, DL: dl, VT: ExtDstTy));
4757	if (!DCI.isCalledByLegalizer())
4758	DCI.AddToWorklist(N: ZextOp.getNode());
4759	// Otherwise, make this a use of a zext.
4760	return DAG.getSetCC(DL: dl, VT, LHS: ZextOp,
4761	RHS: DAG.getConstant(Val: C1 & Imm, DL: dl, VT: ExtDstTy), Cond);
4762	} else if ((N1C->isZero() \|\| N1C->isOne()) &&
4763	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE)) {
4764	// SETCC (X), [0\|1], [EQ\|NE] -> X if X is known 0/1. i1 types are
4765	// excluded as they are handled below whilst checking for foldBooleans.
4766	if ((N0.getOpcode() == ISD::SETCC \|\| VT.getScalarType() != MVT::i1) &&
4767	isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4768	(N0.getValueType() == MVT::i1 \|\|
4769	getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
4770	DAG.MaskedValueIsZero(
4771	N0, APInt::getBitsSetFrom(N0.getValueSizeInBits(), `1`))) {
4772	bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4773	if (TrueWhenTrue)
4774	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: N0);
4775	// Invert the condition.
4776	if (N0.getOpcode() == ISD::SETCC) {
4777	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N0.getOperand(i: `2`))->get();
4778	CC = ISD::getSetCCInverse(Operation: CC, Type: N0.getOperand(i: `0`).getValueType());
4779	if (DCI.isBeforeLegalizeOps() \|\|
4780	isCondCodeLegal(CC, VT: N0.getOperand(i: `0`).getSimpleValueType()))
4781	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N0.getOperand(i: `1`), Cond: CC);
4782	}
4783	}
4784
4785	if ((N0.getOpcode() == ISD::XOR \|\|
4786	(N0.getOpcode() == ISD::AND &&
4787	N0.getOperand(i: `0`).getOpcode() == ISD::XOR &&
4788	N0.getOperand(i: `1`) == N0.getOperand(i: `0`).getOperand(i: `1`))) &&
4789	isOneConstant(V: N0.getOperand(i: `1`))) {
4790	// If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
4791	// can only do this if the top bits are known zero.
4792	unsigned BitWidth = N0.getValueSizeInBits();
4793	if (DAG.MaskedValueIsZero(Op: N0,
4794	Mask: APInt::getHighBitsSet(numBits: BitWidth,
4795	hiBitsSet: BitWidth-`1`))) {
4796	// Okay, get the un-inverted input value.
4797	SDValue Val;
4798	if (N0.getOpcode() == ISD::XOR) {
4799	Val = N0.getOperand(i: `0`);
4800	} else {
4801	assert(N0.getOpcode() == ISD::AND &&
4802	N0.getOperand(`0`).getOpcode() == ISD::XOR);
4803	// ((X^1)&1)^1 -> X & 1
4804	Val = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: N0.getValueType(),
4805	N1: N0.getOperand(i: `0`).getOperand(i: `0`),
4806	N2: N0.getOperand(i: `1`));
4807	}
4808
4809	return DAG.getSetCC(DL: dl, VT, LHS: Val, RHS: N1,
4810	Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4811	}
4812	} else if (N1C->isOne()) {
4813	SDValue Op0 = N0;
4814	if (Op0.getOpcode() == ISD::TRUNCATE)
4815	Op0 = Op0.getOperand(i: `0`);
4816
4817	if ((Op0.getOpcode() == ISD::XOR) &&
4818	Op0.getOperand(i: `0`).getOpcode() == ISD::SETCC &&
4819	Op0.getOperand(i: `1`).getOpcode() == ISD::SETCC) {
4820	SDValue XorLHS = Op0.getOperand(i: `0`);
4821	SDValue XorRHS = Op0.getOperand(i: `1`);
4822	// Ensure that the input setccs return an i1 type or 0/1 value.
4823	if (Op0.getValueType() == MVT::i1 \|\|
4824	(getBooleanContents(XorLHS.getOperand(`0`).getValueType()) ==
4825	ZeroOrOneBooleanContent &&
4826	getBooleanContents(XorRHS.getOperand(`0`).getValueType()) ==
4827	ZeroOrOneBooleanContent)) {
4828	// (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4829	Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
4830	return DAG.getSetCC(DL: dl, VT, LHS: XorLHS, RHS: XorRHS, Cond);
4831	}
4832	}
4833	if (Op0.getOpcode() == ISD::AND && isOneConstant(V: Op0.getOperand(i: `1`))) {
4834	// If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4835	if (Op0.getValueType().bitsGT(VT))
4836	Op0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
4837	N1: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Op0.getOperand(i: `0`)),
4838	N2: DAG.getConstant(Val: `1`, DL: dl, VT));
4839	else if (Op0.getValueType().bitsLT(VT))
4840	Op0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
4841	N1: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: Op0.getOperand(i: `0`)),
4842	N2: DAG.getConstant(Val: `1`, DL: dl, VT));
4843
4844	return DAG.getSetCC(DL: dl, VT, LHS: Op0,
4845	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: Op0.getValueType()),
4846	Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4847	}
4848	if (Op0.getOpcode() == ISD::AssertZext &&
4849	cast<VTSDNode>(Op0.getOperand(`1`))->getVT() == MVT::i1)
4850	return DAG.getSetCC(DL: dl, VT, LHS: Op0,
4851	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: Op0.getValueType()),
4852	Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4853	}
4854	}
4855
4856	// Given:
4857	// icmp eq/ne (urem %x, %y), 0
4858	// Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4859	// icmp eq/ne %x, 0
4860	if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4861	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE)) {
4862	KnownBits XKnown = DAG.computeKnownBits(Op: N0.getOperand(i: `0`));
4863	KnownBits YKnown = DAG.computeKnownBits(Op: N0.getOperand(i: `1`));
4864	if (XKnown.countMaxPopulation() == `1` && YKnown.countMinPopulation() >= `2`)
4865	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N1, Cond);
4866	}
4867
4868	// Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4869	// and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4870	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4871	N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(Val: N0.getOperand(i: `1`)) &&
4872	N0.getConstantOperandAPInt(i: `1`) == OpVT.getScalarSizeInBits() - `1` &&
4873	N1C && N1C->isAllOnes()) {
4874	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`),
4875	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: OpVT),
4876	Cond: Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
4877	}
4878
4879	if (SDValue V =
4880	optimizeSetCCOfSignedTruncationCheck(SCCVT: VT, N0, N1, Cond, DCI, DL: dl))
4881	return V;
4882	}
4883
4884	// These simplifications apply to splat vectors as well.
4885	// TODO: Handle more splat vector cases.
4886	if (auto *N1C = isConstOrConstSplat(N: N1)) {
4887	const APInt &C1 = N1C->getAPIntValue();
4888
4889	APInt MinVal, MaxVal;
4890	unsigned OperandBitSize = N1C->getValueType(ResNo: `0`).getScalarSizeInBits();
4891	if (ISD::isSignedIntSetCC(Code: Cond)) {
4892	MinVal = APInt::getSignedMinValue(numBits: OperandBitSize);
4893	MaxVal = APInt::getSignedMaxValue(numBits: OperandBitSize);
4894	} else {
4895	MinVal = APInt::getMinValue(numBits: OperandBitSize);
4896	MaxVal = APInt::getMaxValue(numBits: OperandBitSize);
4897	}
4898
4899	// Canonicalize GE/LE comparisons to use GT/LT comparisons.
4900	if (Cond == ISD::SETGE \|\| Cond == ISD::SETUGE) {
4901	// X >= MIN --> true
4902	if (C1 == MinVal)
4903	return DAG.getBoolConstant(V: true, DL: dl, VT, OpVT);
4904
4905	if (!VT.isVector()) { // TODO: Support this for vectors.
4906	// X >= C0 --> X > (C0 - 1)
4907	APInt C = C1 - `1`;
4908	ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
4909	if ((DCI.isBeforeLegalizeOps() \|\|
4910	isCondCodeLegal(CC: NewCC, VT: VT.getSimpleVT())) &&
4911	(!N1C->isOpaque() \|\| (C.getBitWidth() <= `64` &&
4912	isLegalICmpImmediate(C.getSExtValue())))) {
4913	return DAG.getSetCC(DL: dl, VT, LHS: N0,
4914	RHS: DAG.getConstant(Val: C, DL: dl, VT: N1.getValueType()),
4915	Cond: NewCC);
4916	}
4917	}
4918	}
4919
4920	if (Cond == ISD::SETLE \|\| Cond == ISD::SETULE) {
4921	// X <= MAX --> true
4922	if (C1 == MaxVal)
4923	return DAG.getBoolConstant(V: true, DL: dl, VT, OpVT);
4924
4925	// X <= C0 --> X < (C0 + 1)
4926	if (!VT.isVector()) { // TODO: Support this for vectors.
4927	APInt C = C1 + `1`;
4928	ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
4929	if ((DCI.isBeforeLegalizeOps() \|\|
4930	isCondCodeLegal(CC: NewCC, VT: VT.getSimpleVT())) &&
4931	(!N1C->isOpaque() \|\| (C.getBitWidth() <= `64` &&
4932	isLegalICmpImmediate(C.getSExtValue())))) {
4933	return DAG.getSetCC(DL: dl, VT, LHS: N0,
4934	RHS: DAG.getConstant(Val: C, DL: dl, VT: N1.getValueType()),
4935	Cond: NewCC);
4936	}
4937	}
4938	}
4939
4940	if (Cond == ISD::SETLT \|\| Cond == ISD::SETULT) {
4941	if (C1 == MinVal)
4942	return DAG.getBoolConstant(V: false, DL: dl, VT, OpVT); // X < MIN --> false
4943
4944	// TODO: Support this for vectors after legalize ops.
4945	if (!VT.isVector() \|\| DCI.isBeforeLegalizeOps()) {
4946	// Canonicalize setlt X, Max --> setne X, Max
4947	if (C1 == MaxVal)
4948	return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: ISD::SETNE);
4949
4950	// If we have setult X, 1, turn it into seteq X, 0
4951	if (C1 == MinVal +`1`)
4952	return DAG.getSetCC(DL: dl, VT, LHS: N0,
4953	RHS: DAG.getConstant(Val: MinVal, DL: dl, VT: N0.getValueType()),
4954	Cond: ISD::SETEQ);
4955	}
4956	}
4957
4958	if (Cond == ISD::SETGT \|\| Cond == ISD::SETUGT) {
4959	if (C1 == MaxVal)
4960	return DAG.getBoolConstant(V: false, DL: dl, VT, OpVT); // X > MAX --> false
4961
4962	// TODO: Support this for vectors after legalize ops.
4963	if (!VT.isVector() \|\| DCI.isBeforeLegalizeOps()) {
4964	// Canonicalize setgt X, Min --> setne X, Min
4965	if (C1 == MinVal)
4966	return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: ISD::SETNE);
4967
4968	// If we have setugt X, Max-1, turn it into seteq X, Max
4969	if (C1 == MaxVal -`1`)
4970	return DAG.getSetCC(DL: dl, VT, LHS: N0,
4971	RHS: DAG.getConstant(Val: MaxVal, DL: dl, VT: N0.getValueType()),
4972	Cond: ISD::SETEQ);
4973	}
4974	}
4975
4976	if (Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) {
4977	// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4978	if (C1.isZero())
4979	if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
4980	SCCVT: VT, N0, N1C: N1, Cond, DCI, DL: dl))
4981	return CC;
4982
4983	// For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
4984	// For example, when high 32-bits of i64 X are known clear:
4985	// all bits clear: (X \| (Y<<32)) == 0 --> (X \| Y) == 0
4986	// all bits set: (X \| (Y<<32)) == -1 --> (X & Y) == -1
4987	bool CmpZero = N1C->isZero();
4988	bool CmpNegOne = N1C->isAllOnes();
4989	if ((CmpZero \|\| CmpNegOne) && N0.hasOneUse()) {
4990	// Match or(lo,shl(hi,bw/2)) pattern.
4991	auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
4992	unsigned EltBits = V.getScalarValueSizeInBits();
4993	if (V.getOpcode() != ISD::OR \|\| (EltBits % `2`) != `0`)
4994	return false;
4995	SDValue LHS = V.getOperand(i: `0`);
4996	SDValue RHS = V.getOperand(i: `1`);
4997	APInt HiBits = APInt::getHighBitsSet(numBits: EltBits, hiBitsSet: EltBits / `2`);
4998	// Unshifted element must have zero upperbits.
4999	if (RHS.getOpcode() == ISD::SHL &&
5000	isa<ConstantSDNode>(Val: RHS.getOperand(i: `1`)) &&
5001	RHS.getConstantOperandAPInt(i: `1`) == (EltBits / `2`) &&
5002	DAG.MaskedValueIsZero(Op: LHS, Mask: HiBits)) {
5003	Lo = LHS;
5004	Hi = RHS.getOperand(i: `0`);
5005	return true;
5006	}
5007	if (LHS.getOpcode() == ISD::SHL &&
5008	isa<ConstantSDNode>(Val: LHS.getOperand(i: `1`)) &&
5009	LHS.getConstantOperandAPInt(i: `1`) == (EltBits / `2`) &&
5010	DAG.MaskedValueIsZero(Op: RHS, Mask: HiBits)) {
5011	Lo = RHS;
5012	Hi = LHS.getOperand(i: `0`);
5013	return true;
5014	}
5015	return false;
5016	};
5017
5018	auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5019	unsigned EltBits = N0.getScalarValueSizeInBits();
5020	unsigned HalfBits = EltBits / `2`;
5021	APInt HiBits = APInt::getHighBitsSet(numBits: EltBits, hiBitsSet: HalfBits);
5022	SDValue LoBits = DAG.getConstant(Val: ~HiBits, DL: dl, VT: OpVT);
5023	SDValue HiMask = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1: Hi, N2: LoBits);
5024	SDValue NewN0 =
5025	DAG.getNode(Opcode: CmpZero ? ISD::OR : ISD::AND, DL: dl, VT: OpVT, N1: Lo, N2: HiMask);
5026	SDValue NewN1 = CmpZero ? DAG.getConstant(Val: `0`, DL: dl, VT: OpVT) : LoBits;
5027	return DAG.getSetCC(DL: dl, VT, LHS: NewN0, RHS: NewN1, Cond);
5028	};
5029
5030	SDValue Lo, Hi;
5031	if (IsConcat (N0, Lo, Hi))
5032	return MergeConcat (Lo, Hi);
5033
5034	if (N0.getOpcode() == ISD::AND \|\| N0.getOpcode() == ISD::OR) {
5035	SDValue Lo0, Lo1, Hi0, Hi1;
5036	if (IsConcat (N0.getOperand(i: `0`), Lo0, Hi0) &&
5037	IsConcat (N0.getOperand(i: `1`), Lo1, Hi1)) {
5038	return MergeConcat (DAG.getNode(Opcode: N0.getOpcode(), DL: dl, VT: OpVT, N1: Lo0, N2: Lo1),
5039	DAG.getNode(Opcode: N0.getOpcode(), DL: dl, VT: OpVT, N1: Hi0, N2: Hi1));
5040	}
5041	}
5042	}
5043	}
5044
5045	// If we have "setcc X, C0", check to see if we can shrink the immediate
5046	// by changing cc.
5047	// TODO: Support this for vectors after legalize ops.
5048	if (!VT.isVector() \|\| DCI.isBeforeLegalizeOps()) {
5049	// SETUGT X, SINTMAX -> SETLT X, 0
5050	// SETUGE X, SINTMIN -> SETLT X, 0
5051	if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) \|\|
5052	(Cond == ISD::SETUGE && C1.isMinSignedValue()))
5053	return DAG.getSetCC(DL: dl, VT, LHS: N0,
5054	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: N1.getValueType()),
5055	Cond: ISD::SETLT);
5056
5057	// SETULT X, SINTMIN -> SETGT X, -1
5058	// SETULE X, SINTMAX -> SETGT X, -1
5059	if ((Cond == ISD::SETULT && C1.isMinSignedValue()) \|\|
5060	(Cond == ISD::SETULE && C1.isMaxSignedValue()))
5061	return DAG.getSetCC(DL: dl, VT, LHS: N0,
5062	RHS: DAG.getAllOnesConstant(DL: dl, VT: N1.getValueType()),
5063	Cond: ISD::SETGT);
5064	}
5065	}
5066
5067	// Back to non-vector simplifications.
5068	// TODO: Can we do these for vector splats?
5069	if (auto *N1C = dyn_cast<ConstantSDNode>(Val: N1.getNode())) {
5070	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5071	const APInt &C1 = N1C->getAPIntValue();
5072	EVT ShValTy = N0.getValueType();
5073
5074	// Fold bit comparisons when we can. This will result in an
5075	// incorrect value when boolean false is negative one, unless
5076	// the bitsize is 1 in which case the false value is the same
5077	// in practice regardless of the representation.
5078	if ((VT.getSizeInBits() == `1` \|\|
5079	getBooleanContents(Type: N0.getValueType()) == ZeroOrOneBooleanContent) &&
5080	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
5081	(VT == ShValTy \|\| (isTypeLegal(VT) && VT.bitsLE(VT: ShValTy))) &&
5082	N0.getOpcode() == ISD::AND) {
5083	if (auto *AndRHS = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) {
5084	if (Cond == ISD::SETNE && C1 == `0`) {// (X & 8) != 0 --> (X & 8) >> 3
5085	// Perform the xform if the AND RHS is a single bit.
5086	unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5087	if (AndRHS->getAPIntValue().isPowerOf2() &&
5088	!TLI.shouldAvoidTransformToShift(VT: ShValTy, Amount: ShCt)) {
5089	return DAG.getNode(
5090	Opcode: ISD::TRUNCATE, DL: dl, VT,
5091	Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5092	N2: DAG.getShiftAmountConstant(
5093	Val: ShCt, VT: ShValTy, DL: dl, LegalTypes: !DCI.isBeforeLegalize())));
5094	}
5095	} else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5096	// (X & 8) == 8 --> (X & 8) >> 3
5097	// Perform the xform if C1 is a single bit.
5098	unsigned ShCt = C1.logBase2();
5099	if (C1.isPowerOf2() &&
5100	!TLI.shouldAvoidTransformToShift(VT: ShValTy, Amount: ShCt)) {
5101	return DAG.getNode(
5102	Opcode: ISD::TRUNCATE, DL: dl, VT,
5103	Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5104	N2: DAG.getShiftAmountConstant(
5105	Val: ShCt, VT: ShValTy, DL: dl, LegalTypes: !DCI.isBeforeLegalize())));
5106	}
5107	}
5108	}
5109	}
5110
5111	if (C1.getSignificantBits() <= `64` &&
5112	!isLegalICmpImmediate(C1.getSExtValue())) {
5113	// (X & -256) == 256 -> (X >> 8) == 1
5114	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
5115	N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5116	if (auto *AndRHS = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) {
5117	const APInt &AndRHSC = AndRHS->getAPIntValue();
5118	if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
5119	unsigned ShiftBits = AndRHSC.countr_zero();
5120	if (!TLI.shouldAvoidTransformToShift(VT: ShValTy, Amount: ShiftBits)) {
5121	SDValue Shift = DAG.getNode(
5122	Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0.getOperand(i: `0`),
5123	N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl,
5124	LegalTypes: !DCI.isBeforeLegalize()));
5125	SDValue CmpRHS = DAG.getConstant(Val: C1.lshr(shiftAmt: ShiftBits), DL: dl, VT: ShValTy);
5126	return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond);
5127	}
5128	}
5129	}
5130	} else if (Cond == ISD::SETULT \|\| Cond == ISD::SETUGE \|\|
5131	Cond == ISD::SETULE \|\| Cond == ISD::SETUGT) {
5132	bool AdjOne = (Cond == ISD::SETULE \|\| Cond == ISD::SETUGT);
5133	// X < 0x100000000 -> (X >> 32) < 1
5134	// X >= 0x100000000 -> (X >> 32) >= 1
5135	// X <= 0x0ffffffff -> (X >> 32) < 1
5136	// X > 0x0ffffffff -> (X >> 32) >= 1
5137	unsigned ShiftBits;
5138	APInt NewC = C1;
5139	ISD::CondCode NewCond = Cond;
5140	if (AdjOne) {
5141	ShiftBits = C1.countr_one();
5142	NewC = NewC + `1`;
5143	NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5144	} else {
5145	ShiftBits = C1.countr_zero();
5146	}
5147	NewC.lshrInPlace(ShiftAmt: ShiftBits);
5148	if (ShiftBits && NewC.getSignificantBits() <= `64` &&
5149	isLegalICmpImmediate(NewC.getSExtValue()) &&
5150	!TLI.shouldAvoidTransformToShift(VT: ShValTy, Amount: ShiftBits)) {
5151	SDValue Shift =
5152	DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5153	N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl,
5154	LegalTypes: !DCI.isBeforeLegalize()));
5155	SDValue CmpRHS = DAG.getConstant(Val: NewC, DL: dl, VT: ShValTy);
5156	return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond: NewCond);
5157	}
5158	}
5159	}
5160	}
5161
5162	if (!isa<ConstantFPSDNode>(Val: N0) && isa<ConstantFPSDNode>(Val: N1)) {
5163	auto *CFP = cast<ConstantFPSDNode>(Val&: N1);
5164	assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5165
5166	// Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5167	// constant if knowing that the operand is non-nan is enough. We prefer to
5168	// have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5169	// materialize 0.0.
5170	if (Cond == ISD::SETO \|\| Cond == ISD::SETUO)
5171	return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N0, Cond);
5172
5173	// setcc (fneg x), C -> setcc swap(pred) x, -C
5174	if (N0.getOpcode() == ISD::FNEG) {
5175	ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Operation: Cond);
5176	if (DCI.isBeforeLegalizeOps() \|\|
5177	isCondCodeLegal(CC: SwapCond, VT: N0.getSimpleValueType())) {
5178	SDValue NegN1 = DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: N0.getValueType(), Operand: N1);
5179	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: NegN1, Cond: SwapCond);
5180	}
5181	}
5182
5183	// setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5184	if (isOperationLegalOrCustom(Op: ISD::IS_FPCLASS, VT: N0.getValueType()) &&
5185	!isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(ResNo: `0`))) {
5186	bool IsFabs = N0.getOpcode() == ISD::FABS;
5187	SDValue Op = IsFabs ? N0.getOperand(i: `0`) : N0;
5188	if ((Cond == ISD::SETOEQ \|\| Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5189	FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5190	: (IsFabs ? fcInf : fcPosInf);
5191	if (Cond == ISD::SETUEQ)
5192	Flag \|= fcNan;
5193	return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5194	DAG.getTargetConstant(Flag, dl, MVT::i32));
5195	}
5196	}
5197
5198	// If the condition is not legal, see if we can find an equivalent one
5199	// which is legal.
5200	if (!isCondCodeLegal(CC: Cond, VT: N0.getSimpleValueType())) {
5201	// If the comparison was an awkward floating-point == or != and one of
5202	// the comparison operands is infinity or negative infinity, convert the
5203	// condition to a less-awkward <= or >=.
5204	if (CFP->getValueAPF().isInfinity()) {
5205	bool IsNegInf = CFP->getValueAPF().isNegative();
5206	ISD::CondCode NewCond = ISD::SETCC_INVALID;
5207	switch (Cond) {
5208	case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5209	case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5210	case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5211	case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5212	default: break;
5213	}
5214	if (NewCond != ISD::SETCC_INVALID &&
5215	isCondCodeLegal(CC: NewCond, VT: N0.getSimpleValueType()))
5216	return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: NewCond);
5217	}
5218	}
5219	}
5220
5221	if (N0 == N1) {
5222	// The sext(setcc()) => setcc() optimization relies on the appropriate
5223	// constant being emitted.
5224	assert(!N0.getValueType().isInteger() &&
5225	"Integer types should be handled by FoldSetCC");
5226
5227	bool EqTrue = ISD::isTrueWhenEqual(Cond);
5228	unsigned UOF = ISD::getUnorderedFlavor(Cond);
5229	if (UOF == `2`) // FP operators that are undefined on NaNs.
5230	return DAG.getBoolConstant(V: EqTrue, DL: dl, VT, OpVT);
5231	if (UOF == unsigned(EqTrue))
5232	return DAG.getBoolConstant(V: EqTrue, DL: dl, VT, OpVT);
5233	// Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5234	// if it is not already.
5235	ISD::CondCode NewCond = UOF == `0` ? ISD::SETO : ISD::SETUO;
5236	if (NewCond != Cond &&
5237	(DCI.isBeforeLegalizeOps() \|\|
5238	isCondCodeLegal(CC: NewCond, VT: N0.getSimpleValueType())))
5239	return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: NewCond);
5240	}
5241
5242	// ~X > ~Y --> Y > X
5243	// ~X < ~Y --> Y < X
5244	// ~X < C --> X > ~C
5245	// ~X > C --> X < ~C
5246	if ((isSignedIntSetCC(Code: Cond) \|\| isUnsignedIntSetCC(Code: Cond)) &&
5247	N0.getValueType().isInteger()) {
5248	if (isBitwiseNot(V: N0)) {
5249	if (isBitwiseNot(V: N1))
5250	return DAG.getSetCC(DL: dl, VT, LHS: N1.getOperand(i: `0`), RHS: N0.getOperand(i: `0`), Cond);
5251
5252	if (DAG.isConstantIntBuildVectorOrConstantInt(N: N1) &&
5253	!DAG.isConstantIntBuildVectorOrConstantInt(N: N0.getOperand(i: `0`))) {
5254	SDValue Not = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5255	return DAG.getSetCC(DL: dl, VT, LHS: Not, RHS: N0.getOperand(i: `0`), Cond);
5256	}
5257	}
5258	}
5259
5260	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
5261	N0.getValueType().isInteger()) {
5262	if (N0.getOpcode() == ISD::ADD \|\| N0.getOpcode() == ISD::SUB \|\|
5263	N0.getOpcode() == ISD::XOR) {
5264	// Simplify (X+Y) == (X+Z) --> Y == Z
5265	if (N0.getOpcode() == N1.getOpcode()) {
5266	if (N0.getOperand(i: `0`) == N1.getOperand(i: `0`))
5267	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `1`), RHS: N1.getOperand(i: `1`), Cond);
5268	if (N0.getOperand(i: `1`) == N1.getOperand(i: `1`))
5269	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N1.getOperand(i: `0`), Cond);
5270	if (isCommutativeBinOp(Opcode: N0.getOpcode())) {
5271	// If X op Y == Y op X, try other combinations.
5272	if (N0.getOperand(i: `0`) == N1.getOperand(i: `1`))
5273	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `1`), RHS: N1.getOperand(i: `0`),
5274	Cond);
5275	if (N0.getOperand(i: `1`) == N1.getOperand(i: `0`))
5276	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N1.getOperand(i: `1`),
5277	Cond);
5278	}
5279	}
5280
5281	// If RHS is a legal immediate value for a compare instruction, we need
5282	// to be careful about increasing register pressure needlessly.
5283	bool LegalRHSImm = false;
5284
5285	if (auto *RHSC = dyn_cast<ConstantSDNode>(Val&: N1)) {
5286	if (auto *LHSR = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) {
5287	// Turn (X+C1) == C2 --> X == C2-C1
5288	if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5289	return DAG.getSetCC(
5290	DL: dl, VT, LHS: N0.getOperand(i: `0`),
5291	RHS: DAG.getConstant(Val: RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5292	DL: dl, VT: N0.getValueType()),
5293	Cond);
5294
5295	// Turn (X^C1) == C2 --> X == C1^C2
5296	if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5297	return DAG.getSetCC(
5298	DL: dl, VT, LHS: N0.getOperand(i: `0`),
5299	RHS: DAG.getConstant(Val: LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5300	DL: dl, VT: N0.getValueType()),
5301	Cond);
5302	}
5303
5304	// Turn (C1-X) == C2 --> X == C1-C2
5305	if (auto *SUBC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `0`)))
5306	if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5307	return DAG.getSetCC(
5308	DL: dl, VT, LHS: N0.getOperand(i: `1`),
5309	RHS: DAG.getConstant(Val: SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5310	DL: dl, VT: N0.getValueType()),
5311	Cond);
5312
5313	// Could RHSC fold directly into a compare?
5314	if (RHSC->getValueType(ResNo: `0`).getSizeInBits() <= `64`)
5315	LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5316	}
5317
5318	// (X+Y) == X --> Y == 0 and similar folds.
5319	// Don't do this if X is an immediate that can fold into a cmp
5320	// instruction and X+Y has other uses. It could be an induction variable
5321	// chain, and the transform would increase register pressure.
5322	if (!LegalRHSImm \|\| N0.hasOneUse())
5323	if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, DL: dl, DCI))
5324	return V;
5325	}
5326
5327	if (N1.getOpcode() == ISD::ADD \|\| N1.getOpcode() == ISD::SUB \|\|
5328	N1.getOpcode() == ISD::XOR)
5329	if (SDValue V = foldSetCCWithBinOp(VT, N0: N1, N1: N0, Cond, DL: dl, DCI))
5330	return V;
5331
5332	if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, DL: dl, DCI))
5333	return V;
5334	}
5335
5336	// Fold remainder of division by a constant.
5337	if ((N0.getOpcode() == ISD::UREM \|\| N0.getOpcode() == ISD::SREM) &&
5338	N0.hasOneUse() && (Cond == ISD::SETEQ \|\| Cond == ISD::SETNE)) {
5339	// When division is cheap or optimizing for minimum size,
5340	// fall through to DIVREM creation by skipping this fold.
5341	if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5342	if (N0.getOpcode() == ISD::UREM) {
5343	if (SDValue Folded = buildUREMEqFold(SETCCVT: VT, REMNode: N0, CompTargetNode: N1, Cond, DCI, DL: dl))
5344	return Folded;
5345	} else if (N0.getOpcode() == ISD::SREM) {
5346	if (SDValue Folded = buildSREMEqFold(SETCCVT: VT, REMNode: N0, CompTargetNode: N1, Cond, DCI, DL: dl))
5347	return Folded;
5348	}
5349	}
5350	}
5351
5352	// Fold away ALL boolean setcc's.
5353	if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5354	SDValue Temp;
5355	switch (Cond) {
5356	default: llvm_unreachable("Unknown integer setcc!");
5357	case ISD::SETEQ: // X == Y -> ~(X^Y)
5358	Temp = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OpVT, N1: N0, N2: N1);
5359	N0 = DAG.getNOT(DL: dl, Val: Temp, VT: OpVT);
5360	if (!DCI.isCalledByLegalizer())
5361	DCI.AddToWorklist(N: Temp.getNode());
5362	break;
5363	case ISD::SETNE: // X != Y --> (X^Y)
5364	N0 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OpVT, N1: N0, N2: N1);
5365	break;
5366	case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5367	case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5368	Temp = DAG.getNOT(DL: dl, Val: N0, VT: OpVT);
5369	N0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1, N2: Temp);
5370	if (!DCI.isCalledByLegalizer())
5371	DCI.AddToWorklist(N: Temp.getNode());
5372	break;
5373	case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5374	case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5375	Temp = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5376	N0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1: N0, N2: Temp);
5377	if (!DCI.isCalledByLegalizer())
5378	DCI.AddToWorklist(N: Temp.getNode());
5379	break;
5380	case ISD::SETULE: // X <=u Y --> X == 0 \| Y == 1 --> ~X \| Y
5381	case ISD::SETGE: // X >=s Y --> X == 0 \| Y == 1 --> ~X \| Y
5382	Temp = DAG.getNOT(DL: dl, Val: N0, VT: OpVT);
5383	N0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1, N2: Temp);
5384	if (!DCI.isCalledByLegalizer())
5385	DCI.AddToWorklist(N: Temp.getNode());
5386	break;
5387	case ISD::SETUGE: // X >=u Y --> X == 1 \| Y == 0 --> ~Y \| X
5388	case ISD::SETLE: // X <=s Y --> X == 1 \| Y == 0 --> ~Y \| X
5389	Temp = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5390	N0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: N0, N2: Temp);
5391	break;
5392	}
5393	if (VT.getScalarType() != MVT::i1) {
5394	if (!DCI.isCalledByLegalizer())
5395	DCI.AddToWorklist(N: N0.getNode());
5396	// FIXME: If running after legalize, we probably can't do this.
5397	ISD::NodeType ExtendCode = getExtendForContent(Content: getBooleanContents(Type: OpVT));
5398	N0 = DAG.getNode(Opcode: ExtendCode, DL: dl, VT, Operand: N0);
5399	}
5400	return N0;
5401	}
5402
5403	// Could not fold it.
5404	return SDValue ();
5405	}
5406
5407	/// Returns true (and the GlobalValue and the offset) if the node is a
5408	/// GlobalAddress + offset.
5409	bool TargetLowering::isGAPlusOffset(SDNode WN, const* GlobalValue *&GA,
5410	int64_t &Offset) const {
5411
5412	SDNode *N = unwrapAddress(N: SDValue (WN, `0`)).getNode();
5413
5414	if (auto *GASD = dyn_cast<GlobalAddressSDNode>(Val: N)) {
5415	GA = GASD->getGlobal();
5416	Offset += GASD->getOffset();
5417	return true;
5418	}
5419
5420	if (N->getOpcode() == ISD::ADD) {
5421	SDValue N1 = N->getOperand(Num: `0`);
5422	SDValue N2 = N->getOperand(Num: `1`);
5423	if (isGAPlusOffset(WN: N1.getNode(), GA, Offset)) {
5424	if (auto *V = dyn_cast<ConstantSDNode>(Val&: N2)) {
5425	Offset += V->getSExtValue();
5426	return true;
5427	}
5428	} else if (isGAPlusOffset(WN: N2.getNode(), GA, Offset)) {
5429	if (auto *V = dyn_cast<ConstantSDNode>(Val&: N1)) {
5430	Offset += V->getSExtValue();
5431	return true;
5432	}
5433	}
5434	}
5435
5436	return false;
5437	}
5438
5439	SDValue TargetLowering::PerformDAGCombine(SDNode *N,
5440	DAGCombinerInfo &DCI) const {
5441	// Default implementation: no optimization.
5442	return SDValue ();
5443	}
5444
5445	//===----------------------------------------------------------------------===//
5446	// Inline Assembler Implementation Methods
5447	//===----------------------------------------------------------------------===//
5448
5449	TargetLowering::ConstraintType
5450	TargetLowering::getConstraintType(StringRef Constraint) const {
5451	unsigned S = Constraint.size();
5452
5453	if (S == `1`) {
5454	switch (Constraint [`0`]) {
5455	default: break;
5456	case `'r'`:
5457	return C_RegisterClass;
5458	case `'m'`: // memory
5459	case `'o'`: // offsetable
5460	case `'V'`: // not offsetable
5461	return C_Memory;
5462	case `'p'`: // Address.
5463	return C_Address;
5464	case `'n'`: // Simple Integer
5465	case `'E'`: // Floating Point Constant
5466	case `'F'`: // Floating Point Constant
5467	return C_Immediate;
5468	case `'i'`: // Simple Integer or Relocatable Constant
5469	case `'s'`: // Relocatable Constant
5470	case `'X'`: // Allow ANY value.
5471	case `'I'`: // Target registers.
5472	case `'J'`:
5473	case `'K'`:
5474	case `'L'`:
5475	case `'M'`:
5476	case `'N'`:
5477	case `'O'`:
5478	case `'P'`:
5479	case `'<'`:
5480	case `'>'`:
5481	return C_Other;
5482	}
5483	}
5484
5485	if (S > `1` && Constraint [`0`] == `'{'` && Constraint [S - `1`] == `'}'`) {
5486	if (S == `8` && Constraint.substr(Start: `1`, N: `6`) == "memory") // "{memory}"
5487	return C_Memory;
5488	return C_Register;
5489	}
5490	return C_Unknown;
5491	}
5492
5493	/// Try to replace an X constraint, which matches anything, with another that
5494	/// has more specific requirements based on the type of the corresponding
5495	/// operand.
5496	const char TargetLowering::LowerXConstraint(EVT ConstraintVT) const* {
5497	if (ConstraintVT.isInteger())
5498	return "r";
5499	if (ConstraintVT.isFloatingPoint())
5500	return "f"; // works for many targets
5501	return nullptr;
5502	}
5503
5504	SDValue TargetLowering::LowerAsmOutputForConstraint(
5505	SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5506	const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5507	return SDValue ();
5508	}
5509
5510	/// Lower the specified operand into the Ops vector.
5511	/// If it is invalid, don't add anything to Ops.
5512	void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5513	StringRef Constraint,
5514	std::vector<SDValue> &Ops,
5515	SelectionDAG &DAG) const {
5516
5517	if (Constraint.size() > `1`)
5518	return;
5519
5520	char ConstraintLetter = Constraint [`0`];
5521	switch (ConstraintLetter) {
5522	default: break;
5523	case `'X'`: // Allows any operand
5524	case `'i'`: // Simple Integer or Relocatable Constant
5525	case `'n'`: // Simple Integer
5526	case `'s'`: { // Relocatable Constant
5527
5528	ConstantSDNode *C;
5529	uint64_t Offset = `0`;
5530
5531	// Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5532	// etc., since getelementpointer is variadic. We can't use
5533	// SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5534	// while in this case the GA may be furthest from the root node which is
5535	// likely an ISD::ADD.
5536	while (true) {
5537	if ((C = dyn_cast<ConstantSDNode>(Val&: Op)) && ConstraintLetter != `'s'`) {
5538	// gcc prints these as sign extended. Sign extend value to 64 bits
5539	// now; without this it would get ZExt'd later in
5540	// ScheduleDAGSDNodes::EmitNode, which is very generic.
5541	bool IsBool = C->getConstantIntValue()->getBitWidth() == `1`;
5542	BooleanContent BCont = getBooleanContents(MVT::i64);
5543	ISD::NodeType ExtOpc =
5544	IsBool ? getExtendForContent(Content: BCont) : ISD::SIGN_EXTEND;
5545	int64_t ExtVal =
5546	ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5547	Ops.push_back(
5548	DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5549	return;
5550	}
5551	if (ConstraintLetter != `'n'`) {
5552	if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op)) {
5553	Ops.push_back(x: DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: SDLoc (Op),
5554	VT: GA->getValueType(ResNo: `0`),
5555	offset: Offset + GA->getOffset()));
5556	return;
5557	}
5558	if (const auto *BA = dyn_cast<BlockAddressSDNode>(Val&: Op)) {
5559	Ops.push_back(x: DAG.getTargetBlockAddress(
5560	BA: BA->getBlockAddress(), VT: BA->getValueType(ResNo: `0`),
5561	Offset: Offset + BA->getOffset(), TargetFlags: BA->getTargetFlags()));
5562	return;
5563	}
5564	if (isa<BasicBlockSDNode>(Val: Op)) {
5565	Ops.push_back(x: Op);
5566	return;
5567	}
5568	}
5569	const unsigned OpCode = Op.getOpcode();
5570	if (OpCode == ISD::ADD \|\| OpCode == ISD::SUB) {
5571	if ((C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `0`))))
5572	Op = Op.getOperand(i: `1`);
5573	// Subtraction is not commutative.
5574	else if (OpCode == ISD::ADD &&
5575	(C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `1`))))
5576	Op = Op.getOperand(i: `0`);
5577	else
5578	return;
5579	Offset += (OpCode == ISD::ADD ? `1` : -`1`) * C->getSExtValue();
5580	continue;
5581	}
5582	return;
5583	}
5584	break;
5585	}
5586	}
5587	}
5588
5589	void TargetLowering::CollectTargetIntrinsicOperands(
5590	const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5591	}
5592
5593	std::pair<unsigned, const TargetRegisterClass *>
5594	TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
5595	StringRef Constraint,
5596	MVT VT) const {
5597	if (!Constraint.starts_with(Prefix: "{"))
5598	return std::make_pair(x: `0u`, y: static_cast<TargetRegisterClass >(nullptr*));
5599	assert(*(Constraint.end() - `1`) == `'}'` && "Not a brace enclosed constraint?");
5600
5601	// Remove the braces from around the name.
5602	StringRef RegName(Constraint.data() + `1`, Constraint.size() - `2`);
5603
5604	std::pair<unsigned, const TargetRegisterClass *> R =
5605	std::make_pair(x: `0u`, y: static_cast<const TargetRegisterClass >(nullptr*));
5606
5607	// Figure out which register class contains this reg.
5608	for (const TargetRegisterClass *RC : RI->regclasses()) {
5609	// If none of the value types for this register class are valid, we
5610	// can't use it. For example, 64-bit reg classes on 32-bit targets.
5611	if (!isLegalRC(TRI: RI, RC: RC))
5612	continue;
5613
5614	for (const MCPhysReg &PR : *RC) {
5615	if (RegName.equals_insensitive(RHS: RI->getRegAsmName(Reg: PR))) {
5616	std::pair<unsigned, const TargetRegisterClass *> S =
5617	std::make_pair(x: PR, y&: RC);
5618
5619	// If this register class has the requested value type, return it,
5620	// otherwise keep searching and return the first class found
5621	// if no other is found which explicitly has the requested type.
5622	if (RI->isTypeLegalForClass(RC: *RC, T: VT))
5623	return S;
5624	if (!R.second)
5625	R = S;
5626	}
5627	}
5628	}
5629
5630	return R;
5631	}
5632
5633	//===----------------------------------------------------------------------===//
5634	// Constraint Selection.
5635
5636	/// Return true of this is an input operand that is a matching constraint like
5637	/// "4".
5638	bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
5639	assert(!ConstraintCode.empty() && "No known constraint!");
5640	return isdigit(static_cast<unsigned char>(ConstraintCode [`0`]));
5641	}
5642
5643	/// If this is an input matching constraint, this method returns the output
5644	/// operand it matches.
5645	unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
5646	assert(!ConstraintCode.empty() && "No known constraint!");
5647	return atoi(nptr: ConstraintCode.c_str());
5648	}
5649
5650	/// Split up the constraint string from the inline assembly value into the
5651	/// specific constraints and their prefixes, and also tie in the associated
5652	/// operand values.
5653	/// If this returns an empty vector, and if the constraint string itself
5654	/// isn't empty, there was an error parsing.
5655	TargetLowering::AsmOperandInfoVector
5656	TargetLowering::ParseConstraints(const DataLayout &DL,
5657	const TargetRegisterInfo *TRI,
5658	const CallBase &Call) const {
5659	/// Information about all of the constraints.
5660	AsmOperandInfoVector ConstraintOperands;
5661	const InlineAsm *IA = cast<InlineAsm>(Val: Call.getCalledOperand());
5662	unsigned maCount = `0`; // Largest number of multiple alternative constraints.
5663
5664	// Do a prepass over the constraints, canonicalizing them, and building up the
5665	// ConstraintOperands list.
5666	unsigned ArgNo = `0`; // ArgNo - The argument of the CallInst.
5667	unsigned ResNo = `0`; // ResNo - The result number of the next output.
5668	unsigned LabelNo = `0`; // LabelNo - CallBr indirect dest number.
5669
5670	for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5671	ConstraintOperands.emplace_back(args: std::move(CI));
5672	AsmOperandInfo &OpInfo = ConstraintOperands.back();
5673
5674	// Update multiple alternative constraint count.
5675	if (OpInfo.multipleAlternatives.size() > maCount)
5676	maCount = OpInfo.multipleAlternatives.size();
5677
5678	OpInfo.ConstraintVT = MVT::Other;
5679
5680	// Compute the value type for each operand.
5681	switch (OpInfo.Type) {
5682	case InlineAsm::isOutput:
5683	// Indirect outputs just consume an argument.
5684	if (OpInfo.isIndirect) {
5685	OpInfo.CallOperandVal = Call.getArgOperand(i: ArgNo);
5686	break;
5687	}
5688
5689	// The return value of the call is this value. As such, there is no
5690	// corresponding argument.
5691	assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5692	if (StructType *STy = dyn_cast<StructType>(Val: Call.getType())) {
5693	OpInfo.ConstraintVT =
5694	getSimpleValueType(DL, Ty: STy->getElementType(N: ResNo));
5695	} else {
5696	assert(ResNo == `0` && "Asm only has one result!");
5697	OpInfo.ConstraintVT =
5698	getAsmOperandValueType(DL, Ty: Call.getType()).getSimpleVT();
5699	}
5700	++ResNo;
5701	break;
5702	case InlineAsm::isInput:
5703	OpInfo.CallOperandVal = Call.getArgOperand(i: ArgNo);
5704	break;
5705	case InlineAsm::isLabel:
5706	OpInfo.CallOperandVal = cast<CallBrInst>(Val: &Call)->getIndirectDest(i: LabelNo);
5707	++LabelNo;
5708	continue;
5709	case InlineAsm::isClobber:
5710	// Nothing to do.
5711	break;
5712	}
5713
5714	if (OpInfo.CallOperandVal) {
5715	llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5716	if (OpInfo.isIndirect) {
5717	OpTy = Call.getParamElementType(ArgNo);
5718	assert(OpTy && "Indirect operand must have elementtype attribute");
5719	}
5720
5721	// Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5722	if (StructType *STy = dyn_cast<StructType>(Val: OpTy))
5723	if (STy->getNumElements() == `1`)
5724	OpTy = STy->getElementType(N: `0`);
5725
5726	// If OpTy is not a single value, it may be a struct/union that we
5727	// can tile with integers.
5728	if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5729	unsigned BitSize = DL.getTypeSizeInBits(Ty: OpTy);
5730	switch (BitSize) {
5731	default: break;
5732	case `1`:
5733	case `8`:
5734	case `16`:
5735	case `32`:
5736	case `64`:
5737	case `128`:
5738	OpTy = IntegerType::get(C&: OpTy->getContext(), NumBits: BitSize);
5739	break;
5740	}
5741	}
5742
5743	EVT VT = getAsmOperandValueType(DL, Ty: OpTy, AllowUnknown: true);
5744	OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5745	ArgNo++;
5746	}
5747	}
5748
5749	// If we have multiple alternative constraints, select the best alternative.
5750	if (!ConstraintOperands.empty()) {
5751	if (maCount) {
5752	unsigned bestMAIndex = `0`;
5753	int bestWeight = -`1`;
5754	// weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
5755	int weight = -`1`;
5756	unsigned maIndex;
5757	// Compute the sums of the weights for each alternative, keeping track
5758	// of the best (highest weight) one so far.
5759	for (maIndex = `0`; maIndex < maCount; ++maIndex) {
5760	int weightSum = `0`;
5761	for (unsigned cIndex = `0`, eIndex = ConstraintOperands.size();
5762	cIndex != eIndex; ++cIndex) {
5763	AsmOperandInfo &OpInfo = ConstraintOperands [cIndex];
5764	if (OpInfo.Type == InlineAsm::isClobber)
5765	continue;
5766
5767	// If this is an output operand with a matching input operand,
5768	// look up the matching input. If their types mismatch, e.g. one
5769	// is an integer, the other is floating point, or their sizes are
5770	// different, flag it as an maCantMatch.
5771	if (OpInfo.hasMatchingInput()) {
5772	AsmOperandInfo &Input = ConstraintOperands [OpInfo.MatchingInput];
5773	if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5774	if ((OpInfo.ConstraintVT.isInteger() !=
5775	Input.ConstraintVT.isInteger()) \|\|
5776	(OpInfo.ConstraintVT.getSizeInBits() !=
5777	Input.ConstraintVT.getSizeInBits())) {
5778	weightSum = -`1`; // Can't match.
5779	break;
5780	}
5781	}
5782	}
5783	weight = getMultipleConstraintMatchWeight(info&: OpInfo, maIndex);
5784	if (weight == -`1`) {
5785	weightSum = -`1`;
5786	break;
5787	}
5788	weightSum += weight;
5789	}
5790	// Update best.
5791	if (weightSum > bestWeight) {
5792	bestWeight = weightSum;
5793	bestMAIndex = maIndex;
5794	}
5795	}
5796
5797	// Now select chosen alternative in each constraint.
5798	for (AsmOperandInfo &cInfo : ConstraintOperands)
5799	if (cInfo.Type != InlineAsm::isClobber)
5800	cInfo.selectAlternative(index: bestMAIndex);
5801	}
5802	}
5803
5804	// Check and hook up tied operands, choose constraint code to use.
5805	for (unsigned cIndex = `0`, eIndex = ConstraintOperands.size();
5806	cIndex != eIndex; ++cIndex) {
5807	AsmOperandInfo &OpInfo = ConstraintOperands [cIndex];
5808
5809	// If this is an output operand with a matching input operand, look up the
5810	// matching input. If their types mismatch, e.g. one is an integer, the
5811	// other is floating point, or their sizes are different, flag it as an
5812	// error.
5813	if (OpInfo.hasMatchingInput()) {
5814	AsmOperandInfo &Input = ConstraintOperands [OpInfo.MatchingInput];
5815
5816	if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5817	std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5818	getRegForInlineAsmConstraint(RI: TRI, Constraint: OpInfo.ConstraintCode,
5819	VT: OpInfo.ConstraintVT);
5820	std::pair<unsigned, const TargetRegisterClass *> InputRC =
5821	getRegForInlineAsmConstraint(RI: TRI, Constraint: Input.ConstraintCode,
5822	VT: Input.ConstraintVT);
5823	if ((OpInfo.ConstraintVT.isInteger() !=
5824	Input.ConstraintVT.isInteger()) \|\|
5825	(MatchRC.second != InputRC.second)) {
5826	report_fatal_error(reason: "Unsupported asm: input constraint"
5827	" with a matching output constraint of"
5828	" incompatible type!");
5829	}
5830	}
5831	}
5832	}
5833
5834	return ConstraintOperands;
5835	}
5836
5837	/// Return a number indicating our preference for chosing a type of constraint
5838	/// over another, for the purpose of sorting them. Immediates are almost always
5839	/// preferrable (when they can be emitted). A higher return value means a
5840	/// stronger preference for one constraint type relative to another.
5841	/// FIXME: We should prefer registers over memory but doing so may lead to
5842	/// unrecoverable register exhaustion later.
5843	/// https://github.com/llvm/llvm-project/issues/20571
5844	static unsigned getConstraintPiority(TargetLowering::ConstraintType CT) {
5845	switch (CT) {
5846	case TargetLowering::C_Immediate:
5847	case TargetLowering::C_Other:
5848	return `4`;
5849	case TargetLowering::C_Memory:
5850	case TargetLowering::C_Address:
5851	return `3`;
5852	case TargetLowering::C_RegisterClass:
5853	return `2`;
5854	case TargetLowering::C_Register:
5855	return `1`;
5856	case TargetLowering::C_Unknown:
5857	return `0`;
5858	}
5859	llvm_unreachable("Invalid constraint type");
5860	}
5861
5862	/// Examine constraint type and operand type and determine a weight value.
5863	/// This object must already have been set up with the operand type
5864	/// and the current alternative constraint selected.
5865	TargetLowering::ConstraintWeight
5866	TargetLowering::getMultipleConstraintMatchWeight(
5867	AsmOperandInfo &info, int maIndex) const {
5868	InlineAsm::ConstraintCodeVector *rCodes;
5869	if (maIndex >= (int)info.multipleAlternatives.size())
5870	rCodes = &info.Codes;
5871	else
5872	rCodes = &info.multipleAlternatives [maIndex].Codes;
5873	ConstraintWeight BestWeight = CW_Invalid;
5874
5875	// Loop over the options, keeping track of the most general one.
5876	for (const std::string &rCode : *rCodes) {
5877	ConstraintWeight weight =
5878	getSingleConstraintMatchWeight(info, constraint: rCode.c_str());
5879	if (weight > BestWeight)
5880	BestWeight = weight;
5881	}
5882
5883	return BestWeight;
5884	}
5885
5886	/// Examine constraint type and operand type and determine a weight value.
5887	/// This object must already have been set up with the operand type
5888	/// and the current alternative constraint selected.
5889	TargetLowering::ConstraintWeight
5890	TargetLowering::getSingleConstraintMatchWeight(
5891	AsmOperandInfo &info, const char constraint) const* {
5892	ConstraintWeight weight = CW_Invalid;
5893	Value *CallOperandVal = info.CallOperandVal;
5894	// If we don't have a value, we can't do a match,
5895	// but allow it at the lowest weight.
5896	if (!CallOperandVal)
5897	return CW_Default;
5898	// Look at the constraint type.
5899	switch (*constraint) {
5900	case `'i'`: // immediate integer.
5901	case `'n'`: // immediate integer with a known value.
5902	if (isa<ConstantInt>(Val: CallOperandVal))
5903	weight = CW_Constant;
5904	break;
5905	case `'s'`: // non-explicit intregal immediate.
5906	if (isa<GlobalValue>(Val: CallOperandVal))
5907	weight = CW_Constant;
5908	break;
5909	case `'E'`: // immediate float if host format.
5910	case `'F'`: // immediate float.
5911	if (isa<ConstantFP>(Val: CallOperandVal))
5912	weight = CW_Constant;
5913	break;
5914	case `'<'`: // memory operand with autodecrement.
5915	case `'>'`: // memory operand with autoincrement.
5916	case `'m'`: // memory operand.
5917	case `'o'`: // offsettable memory operand
5918	case `'V'`: // non-offsettable memory operand
5919	weight = CW_Memory;
5920	break;
5921	case `'r'`: // general register.
5922	case `'g'`: // general register, memory operand or immediate integer.
5923	// note: Clang converts "g" to "imr".
5924	if (CallOperandVal->getType()->isIntegerTy())
5925	weight = CW_Register;
5926	break;
5927	case `'X'`: // any operand.
5928	default:
5929	weight = CW_Default;
5930	break;
5931	}
5932	return weight;
5933	}
5934
5935	/// If there are multiple different constraints that we could pick for this
5936	/// operand (e.g. "imr") try to pick the 'best' one.
5937	/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
5938	/// into seven classes:
5939	/// Register -> one specific register
5940	/// RegisterClass -> a group of regs
5941	/// Memory -> memory
5942	/// Address -> a symbolic memory reference
5943	/// Immediate -> immediate values
5944	/// Other -> magic values (such as "Flag Output Operands")
5945	/// Unknown -> something we don't recognize yet and can't handle
5946	/// Ideally, we would pick the most specific constraint possible: if we have
5947	/// something that fits into a register, we would pick it. The problem here
5948	/// is that if we have something that could either be in a register or in
5949	/// memory that use of the register could cause selection of other
5950	/// operands to fail: they might only succeed if we pick memory. Because of
5951	/// this the heuristic we use is:
5952	///
5953	/// 1) If there is an 'other' constraint, and if the operand is valid for
5954	/// that constraint, use it. This makes us take advantage of 'i'
5955	/// constraints when available.
5956	/// 2) Otherwise, pick the most general constraint present. This prefers
5957	/// 'm' over 'r', for example.
5958	///
5959	TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
5960	TargetLowering::AsmOperandInfo &OpInfo) const {
5961	ConstraintGroup Ret;
5962
5963	Ret.reserve(N: OpInfo.Codes.size());
5964	for (StringRef Code : OpInfo.Codes) {
5965	TargetLowering::ConstraintType CType = getConstraintType(Constraint: Code);
5966
5967	// Indirect 'other' or 'immediate' constraints are not allowed.
5968	if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory \|\|
5969	CType == TargetLowering::C_Register \|\|
5970	CType == TargetLowering::C_RegisterClass))
5971	continue;
5972
5973	// Things with matching constraints can only be registers, per gcc
5974	// documentation. This mainly affects "g" constraints.
5975	if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
5976	continue;
5977
5978	Ret.emplace_back(Args&: Code, Args&: CType);
5979	}
5980
5981	std::stable_sort(
5982	first: Ret.begin(), last: Ret.end(), comp: [](ConstraintPair a, ConstraintPair b) {
5983	return getConstraintPiority(CT: a.second) > getConstraintPiority(CT: b.second);
5984	});
5985
5986	return Ret;
5987	}
5988
5989	/// If we have an immediate, see if we can lower it. Return true if we can,
5990	/// false otherwise.
5991	static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P,
5992	SDValue Op, SelectionDAG *DAG,
5993	const TargetLowering &TLI) {
5994
5995	assert((P.second == TargetLowering::C_Other \|\|
5996	P.second == TargetLowering::C_Immediate) &&
5997	"need immediate or other");
5998
5999	if (!Op.getNode())
6000	return false;
6001
6002	std::vector<SDValue> ResultOps;
6003	TLI.LowerAsmOperandForConstraint(Op, Constraint: P.first, Ops&: ResultOps, DAG&: *DAG);
6004	return !ResultOps.empty();
6005	}
6006
6007	/// Determines the constraint code and constraint type to use for the specific
6008	/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6009	void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
6010	SDValue Op,
6011	SelectionDAG DAG) const* {
6012	assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6013
6014	// Single-letter constraints ('r') are very common.
6015	if (OpInfo.Codes.size() == `1`) {
6016	OpInfo.ConstraintCode = OpInfo.Codes [`0`];
6017	OpInfo.ConstraintType = getConstraintType(Constraint: OpInfo.ConstraintCode);
6018	} else {
6019	ConstraintGroup G = getConstraintPreferences(OpInfo);
6020	if (G.empty())
6021	return;
6022
6023	unsigned BestIdx = `0`;
6024	for (const unsigned E = G.size();
6025	BestIdx < E && (G [BestIdx].second == TargetLowering::C_Other \|\|
6026	G [BestIdx].second == TargetLowering::C_Immediate);
6027	++BestIdx) {
6028	if (lowerImmediateIfPossible(P&: G [BestIdx], Op, DAG, TLI: *this))
6029	break;
6030	// If we're out of constraints, just pick the first one.
6031	if (BestIdx + `1` == E) {
6032	BestIdx = `0`;
6033	break;
6034	}
6035	}
6036
6037	OpInfo.ConstraintCode = G [BestIdx].first;
6038	OpInfo.ConstraintType = G [BestIdx].second;
6039	}
6040
6041	// 'X' matches anything.
6042	if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6043	// Constants are handled elsewhere. For Functions, the type here is the
6044	// type of the result, which is not what we want to look at; leave them
6045	// alone.
6046	Value *v = OpInfo.CallOperandVal;
6047	if (isa<ConstantInt>(Val: v) \|\| isa<Function>(Val: v)) {
6048	return;
6049	}
6050
6051	if (isa<BasicBlock>(Val: v) \|\| isa<BlockAddress>(Val: v)) {
6052	OpInfo.ConstraintCode = "i";
6053	return;
6054	}
6055
6056	// Otherwise, try to resolve it to something we know about by looking at
6057	// the actual operand type.
6058	if (const char *Repl = LowerXConstraint(ConstraintVT: OpInfo.ConstraintVT)) {
6059	OpInfo.ConstraintCode = Repl;
6060	OpInfo.ConstraintType = getConstraintType(Constraint: OpInfo.ConstraintCode);
6061	}
6062	}
6063	}
6064
6065	/// Given an exact SDIV by a constant, create a multiplication
6066	/// with the multiplicative inverse of the constant.
6067	static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
6068	const SDLoc &dl, SelectionDAG &DAG,
6069	SmallVectorImpl<SDNode *> &Created) {
6070	SDValue Op0 = N->getOperand(Num: `0`);
6071	SDValue Op1 = N->getOperand(Num: `1`);
6072	EVT VT = N->getValueType(ResNo: `0`);
6073	EVT SVT = VT.getScalarType();
6074	EVT ShVT = TLI.getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6075	EVT ShSVT = ShVT.getScalarType();
6076
6077	bool UseSRA = false;
6078	SmallVector<SDValue, `16`> Shifts, Factors;
6079
6080	auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6081	if (C->isZero())
6082	return false;
6083	APInt Divisor = C->getAPIntValue();
6084	unsigned Shift = Divisor.countr_zero();
6085	if (Shift) {
6086	Divisor.ashrInPlace(ShiftAmt: Shift);
6087	UseSRA = true;
6088	}
6089	APInt Factor = Divisor.multiplicativeInverse();
6090	Shifts.push_back(Elt: DAG.getConstant(Val: Shift, DL: dl, VT: ShSVT));
6091	Factors.push_back(Elt: DAG.getConstant(Val: Factor, DL: dl, VT: SVT));
6092	return true;
6093	};
6094
6095	// Collect all magic values from the build vector.
6096	if (!ISD::matchUnaryPredicate(Op: Op1, Match: BuildSDIVPattern))
6097	return SDValue ();
6098
6099	SDValue Shift, Factor;
6100	if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6101	Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6102	Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6103	} else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6104	assert(Shifts.size() == `1` && Factors.size() == `1` &&
6105	"Expected matchUnaryPredicate to return one element for scalable "
6106	"vectors");
6107	Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts [`0`]);
6108	Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors [`0`]);
6109	} else {
6110	assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6111	Shift = Shifts [`0`];
6112	Factor = Factors [`0`];
6113	}
6114
6115	SDValue Res = Op0;
6116
6117	// Shift the value upfront if it is even, so the LSB is one.
6118	if (UseSRA) {
6119	// TODO: For UDIV use SRL instead of SRA.
6120	SDNodeFlags Flags;
6121	Flags.setExact(true);
6122	Res = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Res, N2: Shift, Flags);
6123	Created.push_back(Elt: Res.getNode());
6124	}
6125
6126	return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Res, N2: Factor);
6127	}
6128
6129	SDValue TargetLowering::BuildSDIVPow2(SDNode N, const* APInt &Divisor,
6130	SelectionDAG &DAG,
6131	SmallVectorImpl<SDNode > &Created) const* {
6132	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6133	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6134	if (TLI.isIntDivCheap(VT: N->getValueType(ResNo: `0`), Attr))
6135	return SDValue (N, `0`); // Lower SDIV as SDIV
6136	return SDValue ();
6137	}
6138
6139	SDValue
6140	TargetLowering::BuildSREMPow2(SDNode N, const* APInt &Divisor,
6141	SelectionDAG &DAG,
6142	SmallVectorImpl<SDNode > &Created) const* {
6143	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6144	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6145	if (TLI.isIntDivCheap(VT: N->getValueType(ResNo: `0`), Attr))
6146	return SDValue (N, `0`); // Lower SREM as SREM
6147	return SDValue ();
6148	}
6149
6150	/// Build sdiv by power-of-2 with conditional move instructions
6151	/// Ref: "Hacker's Delight" by Henry Warren 10-1
6152	/// If conditional move/branch is preferred, we lower sdiv x, +/-2k into:
6153	/// bgez x, label
6154	/// add x, x, 2k-1
6155	/// label:
6156	/// sra res, x, k
6157	/// neg res, res (when the divisor is negative)
6158	SDValue TargetLowering::buildSDIVPow2WithCMov(
6159	SDNode N, const* APInt &Divisor, SelectionDAG &DAG,
6160	SmallVectorImpl<SDNode > &Created) const* {
6161	unsigned Lg2 = Divisor.countr_zero();
6162	EVT VT = N->getValueType(ResNo: `0`);
6163
6164	SDLoc DL(N);
6165	SDValue N0 = N->getOperand(Num: `0`);
6166	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
6167	APInt Lg2Mask = APInt::getLowBitsSet(numBits: VT.getSizeInBits(), loBitsSet: Lg2);
6168	SDValue Pow2MinusOne = DAG.getConstant(Val: Lg2Mask, DL, VT);
6169
6170	// If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6171	EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
6172	SDValue Cmp = DAG.getSetCC(DL, VT: CCVT, LHS: N0, RHS: Zero, Cond: ISD::SETLT);
6173	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0, N2: Pow2MinusOne);
6174	SDValue CMov = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: Cmp, N2: Add, N3: N0);
6175
6176	Created.push_back(Elt: Cmp.getNode());
6177	Created.push_back(Elt: Add.getNode());
6178	Created.push_back(Elt: CMov.getNode());
6179
6180	// Divide by pow2.
6181	SDValue SRA =
6182	DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: CMov, N2: DAG.getConstant(Val: Lg2, DL, VT));
6183
6184	// If we're dividing by a positive value, we're done. Otherwise, we must
6185	// negate the result.
6186	if (Divisor.isNonNegative())
6187	return SRA;
6188
6189	Created.push_back(Elt: SRA.getNode());
6190	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Zero, N2: SRA);
6191	}
6192
6193	/// Given an ISD::SDIV node expressing a divide by constant,
6194	/// return a DAG expression to select that will generate the same value by
6195	/// multiplying by a magic number.
6196	/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6197	SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
6198	bool IsAfterLegalization,
6199	SmallVectorImpl<SDNode > &Created) const* {
6200	SDLoc dl(N);
6201	EVT VT = N->getValueType(ResNo: `0`);
6202	EVT SVT = VT.getScalarType();
6203	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6204	EVT ShSVT = ShVT.getScalarType();
6205	unsigned EltBits = VT.getScalarSizeInBits();
6206	EVT MulVT;
6207
6208	// Check to see if we can do this.
6209	// FIXME: We should be more aggressive here.
6210	if (!isTypeLegal(VT)) {
6211	// Limit this to simple scalars for now.
6212	if (VT.isVector() \|\| !VT.isSimple())
6213	return SDValue ();
6214
6215	// If this type will be promoted to a large enough type with a legal
6216	// multiply operation, we can go ahead and do this transform.
6217	if (getTypeAction(VT: VT.getSimpleVT()) != TypePromoteInteger)
6218	return SDValue ();
6219
6220	MulVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT);
6221	if (MulVT.getSizeInBits() < (`2` * EltBits) \|\|
6222	!isOperationLegal(Op: ISD::MUL, VT: MulVT))
6223	return SDValue ();
6224	}
6225
6226	// If the sdiv has an 'exact' bit we can use a simpler lowering.
6227	if (N->getFlags().hasExact())
6228	return BuildExactSDIV(TLI: *this, N, dl, DAG, Created);
6229
6230	SmallVector<SDValue, `16`> MagicFactors, Factors, Shifts, ShiftMasks;
6231
6232	auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6233	if (C->isZero())
6234	return false;
6235
6236	const APInt &Divisor = C->getAPIntValue();
6237	SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(D: Divisor);
6238	int NumeratorFactor = `0`;
6239	int ShiftMask = -`1`;
6240
6241	if (Divisor.isOne() \|\| Divisor.isAllOnes()) {
6242	// If d is +1/-1, we just multiply the numerator by +1/-1.
6243	NumeratorFactor = Divisor.getSExtValue();
6244	magics.Magic = `0`;
6245	magics.ShiftAmount = `0`;
6246	ShiftMask = `0`;
6247	} else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6248	// If d > 0 and m < 0, add the numerator.
6249	NumeratorFactor = `1`;
6250	} else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6251	// If d < 0 and m > 0, subtract the numerator.
6252	NumeratorFactor = -`1`;
6253	}
6254
6255	MagicFactors.push_back(Elt: DAG.getConstant(Val: magics.Magic, DL: dl, VT: SVT));
6256	Factors.push_back(Elt: DAG.getConstant(Val: NumeratorFactor, DL: dl, VT: SVT));
6257	Shifts.push_back(Elt: DAG.getConstant(Val: magics.ShiftAmount, DL: dl, VT: ShSVT));
6258	ShiftMasks.push_back(Elt: DAG.getConstant(Val: ShiftMask, DL: dl, VT: SVT));
6259	return true;
6260	};
6261
6262	SDValue N0 = N->getOperand(Num: `0`);
6263	SDValue N1 = N->getOperand(Num: `1`);
6264
6265	// Collect the shifts / magic values from each element.
6266	if (!ISD::matchUnaryPredicate(Op: N1, Match: BuildSDIVPattern))
6267	return SDValue ();
6268
6269	SDValue MagicFactor, Factor, Shift, ShiftMask;
6270	if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6271	MagicFactor = DAG.getBuildVector(VT, DL: dl, Ops: MagicFactors);
6272	Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6273	Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6274	ShiftMask = DAG.getBuildVector(VT, DL: dl, Ops: ShiftMasks);
6275	} else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6276	assert(MagicFactors.size() == `1` && Factors.size() == `1` &&
6277	Shifts.size() == `1` && ShiftMasks.size() == `1` &&
6278	"Expected matchUnaryPredicate to return one element for scalable "
6279	"vectors");
6280	MagicFactor = DAG.getSplatVector(VT, DL: dl, Op: MagicFactors [`0`]);
6281	Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors [`0`]);
6282	Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts [`0`]);
6283	ShiftMask = DAG.getSplatVector(VT, DL: dl, Op: ShiftMasks [`0`]);
6284	} else {
6285	assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6286	MagicFactor = MagicFactors [`0`];
6287	Factor = Factors [`0`];
6288	Shift = Shifts [`0`];
6289	ShiftMask = ShiftMasks [`0`];
6290	}
6291
6292	// Multiply the numerator (operand 0) by the magic value.
6293	// FIXME: We should support doing a MUL in a wider type.
6294	auto GetMULHS = [&](SDValue X, SDValue Y) {
6295	// If the type isn't legal, use a wider mul of the type calculated
6296	// earlier.
6297	if (!isTypeLegal(VT)) {
6298	X = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MulVT, Operand: X);
6299	Y = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MulVT, Operand: Y);
6300	Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MulVT, N1: X, N2: Y);
6301	Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MulVT, N1: Y,
6302	N2: DAG.getShiftAmountConstant(Val: EltBits, VT: MulVT, DL: dl));
6303	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6304	}
6305
6306	if (isOperationLegalOrCustom(Op: ISD::MULHS, VT, LegalOnly: IsAfterLegalization))
6307	return DAG.getNode(Opcode: ISD::MULHS, DL: dl, VT, N1: X, N2: Y);
6308	if (isOperationLegalOrCustom(Op: ISD::SMUL_LOHI, VT, LegalOnly: IsAfterLegalization)) {
6309	SDValue LoHi =
6310	DAG.getNode(Opcode: ISD::SMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: X, N2: Y);
6311	return SDValue (LoHi.getNode(), `1`);
6312	}
6313	// If type twice as wide legal, widen and use a mul plus a shift.
6314	unsigned Size = VT.getScalarSizeInBits();
6315	EVT WideVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: Size `2`);
6316	if (VT.isVector())
6317	WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT,
6318	EC: VT.getVectorElementCount());
6319	if (isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
6320	X = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: WideVT, Operand: X);
6321	Y = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: WideVT, Operand: Y);
6322	Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: X, N2: Y);
6323	Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Y,
6324	N2: DAG.getShiftAmountConstant(Val: EltBits, VT: WideVT, DL: dl));
6325	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6326	}
6327	return SDValue ();
6328	};
6329
6330	SDValue Q = GetMULHS (N0, MagicFactor);
6331	if (!Q)
6332	return SDValue ();
6333
6334	Created.push_back(Elt: Q.getNode());
6335
6336	// (Optionally) Add/subtract the numerator using Factor.
6337	Factor = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: N0, N2: Factor);
6338	Created.push_back(Elt: Factor.getNode());
6339	Q = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Q, N2: Factor);
6340	Created.push_back(Elt: Q.getNode());
6341
6342	// Shift right algebraic by shift value.
6343	Q = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Q, N2: Shift);
6344	Created.push_back(Elt: Q.getNode());
6345
6346	// Extract the sign bit, mask it and add it to the quotient.
6347	SDValue SignShift = DAG.getConstant(Val: EltBits - `1`, DL: dl, VT: ShVT);
6348	SDValue T = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: SignShift);
6349	Created.push_back(Elt: T.getNode());
6350	T = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: T, N2: ShiftMask);
6351	Created.push_back(Elt: T.getNode());
6352	return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Q, N2: T);
6353	}
6354
6355	/// Given an ISD::UDIV node expressing a divide by constant,
6356	/// return a DAG expression to select that will generate the same value by
6357	/// multiplying by a magic number.
6358	/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6359	SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
6360	bool IsAfterLegalization,
6361	SmallVectorImpl<SDNode > &Created) const* {
6362	SDLoc dl(N);
6363	EVT VT = N->getValueType(ResNo: `0`);
6364	EVT SVT = VT.getScalarType();
6365	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6366	EVT ShSVT = ShVT.getScalarType();
6367	unsigned EltBits = VT.getScalarSizeInBits();
6368	EVT MulVT;
6369
6370	// Check to see if we can do this.
6371	// FIXME: We should be more aggressive here.
6372	if (!isTypeLegal(VT)) {
6373	// Limit this to simple scalars for now.
6374	if (VT.isVector() \|\| !VT.isSimple())
6375	return SDValue ();
6376
6377	// If this type will be promoted to a large enough type with a legal
6378	// multiply operation, we can go ahead and do this transform.
6379	if (getTypeAction(VT: VT.getSimpleVT()) != TypePromoteInteger)
6380	return SDValue ();
6381
6382	MulVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT);
6383	if (MulVT.getSizeInBits() < (`2` * EltBits) \|\|
6384	!isOperationLegal(Op: ISD::MUL, VT: MulVT))
6385	return SDValue ();
6386	}
6387
6388	SDValue N0 = N->getOperand(Num: `0`);
6389	SDValue N1 = N->getOperand(Num: `1`);
6390
6391	// Try to use leading zeros of the dividend to reduce the multiplier and
6392	// avoid expensive fixups.
6393	// TODO: Support vectors.
6394	unsigned LeadingZeros = `0`;
6395	if (!VT.isVector() && isa<ConstantSDNode>(Val: N1)) {
6396	assert(!isOneConstant(N1) && "Unexpected divisor");
6397	LeadingZeros = DAG.computeKnownBits(Op: N0).countMinLeadingZeros();
6398	// UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in
6399	// the dividend exceeds the leading zeros for the divisor.
6400	LeadingZeros = std::min(a: LeadingZeros, b: N1 ->getAsAPIntVal().countl_zero());
6401	}
6402
6403	bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6404	SmallVector<SDValue, `16`> PreShifts, PostShifts, MagicFactors, NPQFactors;
6405
6406	auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6407	if (C->isZero())
6408	return false;
6409	const APInt& Divisor = C->getAPIntValue();
6410
6411	SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6412
6413	// Magic algorithm doesn't work for division by 1. We need to emit a select
6414	// at the end.
6415	if (Divisor.isOne()) {
6416	PreShift = PostShift = DAG.getUNDEF(VT: ShSVT);
6417	MagicFactor = NPQFactor = DAG.getUNDEF(VT: SVT);
6418	} else {
6419	UnsignedDivisionByConstantInfo magics =
6420	UnsignedDivisionByConstantInfo::get(D: Divisor, LeadingZeros);
6421
6422	MagicFactor = DAG.getConstant(Val: magics.Magic, DL: dl, VT: SVT);
6423
6424	assert(magics.PreShift < Divisor.getBitWidth() &&
6425	"We shouldn't generate an undefined shift!");
6426	assert(magics.PostShift < Divisor.getBitWidth() &&
6427	"We shouldn't generate an undefined shift!");
6428	assert((!magics.IsAdd \|\| magics.PreShift == `0`) &&
6429	"Unexpected pre-shift");
6430	PreShift = DAG.getConstant(Val: magics.PreShift, DL: dl, VT: ShSVT);
6431	PostShift = DAG.getConstant(Val: magics.PostShift, DL: dl, VT: ShSVT);
6432	NPQFactor = DAG.getConstant(
6433	Val: magics.IsAdd ? APInt::getOneBitSet(numBits: EltBits, BitNo: EltBits - `1`)
6434	: APInt::getZero(numBits: EltBits),
6435	DL: dl, VT: SVT);
6436	UseNPQ \|= magics.IsAdd;
6437	UsePreShift \|= magics.PreShift != `0`;
6438	UsePostShift \|= magics.PostShift != `0`;
6439	}
6440
6441	PreShifts.push_back(Elt: PreShift);
6442	MagicFactors.push_back(Elt: MagicFactor);
6443	NPQFactors.push_back(Elt: NPQFactor);
6444	PostShifts.push_back(Elt: PostShift);
6445	return true;
6446	};
6447
6448	// Collect the shifts/magic values from each element.
6449	if (!ISD::matchUnaryPredicate(Op: N1, Match: BuildUDIVPattern))
6450	return SDValue ();
6451
6452	SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6453	if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6454	PreShift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: PreShifts);
6455	MagicFactor = DAG.getBuildVector(VT, DL: dl, Ops: MagicFactors);
6456	NPQFactor = DAG.getBuildVector(VT, DL: dl, Ops: NPQFactors);
6457	PostShift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: PostShifts);
6458	} else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6459	assert(PreShifts.size() == `1` && MagicFactors.size() == `1` &&
6460	NPQFactors.size() == `1` && PostShifts.size() == `1` &&
6461	"Expected matchUnaryPredicate to return one for scalable vectors");
6462	PreShift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: PreShifts [`0`]);
6463	MagicFactor = DAG.getSplatVector(VT, DL: dl, Op: MagicFactors [`0`]);
6464	NPQFactor = DAG.getSplatVector(VT, DL: dl, Op: NPQFactors [`0`]);
6465	PostShift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: PostShifts [`0`]);
6466	} else {
6467	assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6468	PreShift = PreShifts [`0`];
6469	MagicFactor = MagicFactors [`0`];
6470	PostShift = PostShifts [`0`];
6471	}
6472
6473	SDValue Q = N0;
6474	if (UsePreShift) {
6475	Q = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: PreShift);
6476	Created.push_back(Elt: Q.getNode());
6477	}
6478
6479	// FIXME: We should support doing a MUL in a wider type.
6480	auto GetMULHU = [&](SDValue X, SDValue Y) {
6481	// If the type isn't legal, use a wider mul of the type calculated
6482	// earlier.
6483	if (!isTypeLegal(VT)) {
6484	X = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MulVT, Operand: X);
6485	Y = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MulVT, Operand: Y);
6486	Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MulVT, N1: X, N2: Y);
6487	Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MulVT, N1: Y,
6488	N2: DAG.getShiftAmountConstant(Val: EltBits, VT: MulVT, DL: dl));
6489	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6490	}
6491
6492	if (isOperationLegalOrCustom(Op: ISD::MULHU, VT, LegalOnly: IsAfterLegalization))
6493	return DAG.getNode(Opcode: ISD::MULHU, DL: dl, VT, N1: X, N2: Y);
6494	if (isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT, LegalOnly: IsAfterLegalization)) {
6495	SDValue LoHi =
6496	DAG.getNode(Opcode: ISD::UMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: X, N2: Y);
6497	return SDValue (LoHi.getNode(), `1`);
6498	}
6499	// If type twice as wide legal, widen and use a mul plus a shift.
6500	unsigned Size = VT.getScalarSizeInBits();
6501	EVT WideVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: Size `2`);
6502	if (VT.isVector())
6503	WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT,
6504	EC: VT.getVectorElementCount());
6505	if (isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
6506	X = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: WideVT, Operand: X);
6507	Y = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: WideVT, Operand: Y);
6508	Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: X, N2: Y);
6509	Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Y,
6510	N2: DAG.getShiftAmountConstant(Val: EltBits, VT: WideVT, DL: dl));
6511	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6512	}
6513	return SDValue (); // No mulhu or equivalent
6514	};
6515
6516	// Multiply the numerator (operand 0) by the magic value.
6517	Q = GetMULHU (Q, MagicFactor);
6518	if (!Q)
6519	return SDValue ();
6520
6521	Created.push_back(Elt: Q.getNode());
6522
6523	if (UseNPQ) {
6524	SDValue NPQ = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: N0, N2: Q);
6525	Created.push_back(Elt: NPQ.getNode());
6526
6527	// For vectors we might have a mix of non-NPQ/NPQ paths, so use
6528	// MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6529	if (VT.isVector())
6530	NPQ = GetMULHU (NPQ, NPQFactor);
6531	else
6532	NPQ = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: NPQ, N2: DAG.getConstant(Val: `1`, DL: dl, VT: ShVT));
6533
6534	Created.push_back(Elt: NPQ.getNode());
6535
6536	Q = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: NPQ, N2: Q);
6537	Created.push_back(Elt: Q.getNode());
6538	}
6539
6540	if (UsePostShift) {
6541	Q = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: PostShift);
6542	Created.push_back(Elt: Q.getNode());
6543	}
6544
6545	EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
6546
6547	SDValue One = DAG.getConstant(Val: `1`, DL: dl, VT);
6548	SDValue IsOne = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: N1, RHS: One, Cond: ISD::SETEQ);
6549	return DAG.getSelect(DL: dl, VT, Cond: IsOne, LHS: N0, RHS: Q);
6550	}
6551
6552	/// If all values in Values that don't* match the predicate are same 'splat'*
6553	/// value, then replace all values with that splat value.
6554	/// Else, if AlternativeReplacement was provided, then replace all values that
6555	/// do match predicate with AlternativeReplacement value.
6556	static void
6557	turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
6558	std::function<bool(SDValue)> Predicate,
6559	SDValue AlternativeReplacement = SDValue ()) {
6560	SDValue Replacement;
6561	// Is there a value for which the Predicate does NOT* match? What is it?*
6562	auto SplatValue = llvm::find_if_not(Range&: Values, P: Predicate);
6563	if (SplatValue != Values.end()) {
6564	// Does Values consist only of SplatValue's and values matching Predicate?
6565	if (llvm::all_of(Range&: Values, P: [Predicate, SplatValue](SDValue Value) {
6566	return Value == *SplatValue \|\| Predicate (Value);
6567	})) // Then we shall replace values matching predicate with SplatValue.
6568	Replacement = *SplatValue;
6569	}
6570	if (!Replacement) {
6571	// Oops, we did not find the "baseline" splat value.
6572	if (!AlternativeReplacement)
6573	return; // Nothing to do.
6574	// Let's replace with provided value then.
6575	Replacement = AlternativeReplacement;
6576	}
6577	std::replace_if(first: Values.begin(), last: Values.end(), pred: Predicate, new_value: Replacement);
6578	}
6579
6580	/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6581	/// where the divisor is constant and the comparison target is zero,
6582	/// return a DAG expression that will generate the same comparison result
6583	/// using only multiplications, additions and shifts/rotations.
6584	/// Ref: "Hacker's Delight" 10-17.
6585	SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6586	SDValue CompTargetNode,
6587	ISD::CondCode Cond,
6588	DAGCombinerInfo &DCI,
6589	const SDLoc &DL) const {
6590	SmallVector<SDNode *, `5`> Built;
6591	if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6592	DCI, DL, Created&: Built)) {
6593	for (SDNode *N : Built)
6594	DCI.AddToWorklist(N);
6595	return Folded;
6596	}
6597
6598	return SDValue ();
6599	}
6600
6601	SDValue
6602	TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6603	SDValue CompTargetNode, ISD::CondCode Cond,
6604	DAGCombinerInfo &DCI, const SDLoc &DL,
6605	SmallVectorImpl<SDNode > &Created) const* {
6606	// fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6607	// - D must be constant, with D = D0 2^K where D0 is odd*
6608	// - P is the multiplicative inverse of D0 modulo 2^W
6609	// - Q = floor(((2^W) - 1) / D)
6610	// where W is the width of the common type of N and D.
6611	assert((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
6612	"Only applicable for (in)equality comparisons.");
6613
6614	SelectionDAG &DAG = DCI.DAG;
6615
6616	EVT VT = REMNode.getValueType();
6617	EVT SVT = VT.getScalarType();
6618	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout(), LegalTypes: !DCI.isBeforeLegalize());
6619	EVT ShSVT = ShVT.getScalarType();
6620
6621	// If MUL is unavailable, we cannot proceed in any case.
6622	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::MUL, VT))
6623	return SDValue ();
6624
6625	bool ComparingWithAllZeros = true;
6626	bool AllComparisonsWithNonZerosAreTautological = true;
6627	bool HadTautologicalLanes = false;
6628	bool AllLanesAreTautological = true;
6629	bool HadEvenDivisor = false;
6630	bool AllDivisorsArePowerOfTwo = true;
6631	bool HadTautologicalInvertedLanes = false;
6632	SmallVector<SDValue, `16`> PAmts, KAmts, QAmts, IAmts;
6633
6634	auto BuildUREMPattern = [&](ConstantSDNode CDiv, ConstantSDNode CCmp) {
6635	// Division by 0 is UB. Leave it to be constant-folded elsewhere.
6636	if (CDiv->isZero())
6637	return false;
6638
6639	const APInt &D = CDiv->getAPIntValue();
6640	const APInt &Cmp = CCmp->getAPIntValue();
6641
6642	ComparingWithAllZeros &= Cmp.isZero();
6643
6644	// x u% C1` is always* less than C1. So given `x u% C1 == C2`,*
6645	// if C2 is not less than C1, the comparison is always false.
6646	// But we will only be able to produce the comparison that will give the
6647	// opposive tautological answer. So this lane would need to be fixed up.
6648	bool TautologicalInvertedLane = D.ule(RHS: Cmp);
6649	HadTautologicalInvertedLanes \|= TautologicalInvertedLane;
6650
6651	// If all lanes are tautological (either all divisors are ones, or divisor
6652	// is not greater than the constant we are comparing with),
6653	// we will prefer to avoid the fold.
6654	bool TautologicalLane = D.isOne() \|\| TautologicalInvertedLane;
6655	HadTautologicalLanes \|= TautologicalLane;
6656	AllLanesAreTautological &= TautologicalLane;
6657
6658	// If we are comparing with non-zero, we need'll need to subtract said
6659	// comparison value from the LHS. But there is no point in doing that if
6660	// every lane where we are comparing with non-zero is tautological..
6661	if (!Cmp.isZero())
6662	AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6663
6664	// Decompose D into D0 2^K*
6665	unsigned K = D.countr_zero();
6666	assert((!D.isOne() \|\| (K == `0`)) && "For divisor '1' we won't rotate.");
6667	APInt D0 = D.lshr(shiftAmt: K);
6668
6669	// D is even if it has trailing zeros.
6670	HadEvenDivisor \|= (K != `0`);
6671	// D is a power-of-two if D0 is one.
6672	// If all divisors are power-of-two, we will prefer to avoid the fold.
6673	AllDivisorsArePowerOfTwo &= D0.isOne();
6674
6675	// P = inv(D0, 2^W)
6676	// 2^W requires W + 1 bits, so we have to extend and then truncate.
6677	unsigned W = D.getBitWidth();
6678	APInt P = D0.multiplicativeInverse();
6679	assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6680
6681	// Q = floor((2^W - 1) u/ D)
6682	// R = ((2^W - 1) u% D)
6683	APInt Q, R;
6684	APInt::udivrem(LHS: APInt::getAllOnes(numBits: W), RHS: D, Quotient&: Q, Remainder&: R);
6685
6686	// If we are comparing with zero, then that comparison constant is okay,
6687	// else it may need to be one less than that.
6688	if (Cmp.ugt(RHS: R))
6689	Q -= `1`;
6690
6691	assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6692	"We are expecting that K is always less than all-ones for ShSVT");
6693
6694	// If the lane is tautological the result can be constant-folded.
6695	if (TautologicalLane) {
6696	// Set P and K amount to a bogus values so we can try to splat them.
6697	P = `0`;
6698	K = -`1`;
6699	// And ensure that comparison constant is tautological,
6700	// it will always compare true/false.
6701	Q = -`1`;
6702	}
6703
6704	PAmts.push_back(Elt: DAG.getConstant(Val: P, DL, VT: SVT));
6705	KAmts.push_back(
6706	Elt: DAG.getConstant(Val: APInt (ShSVT.getSizeInBits(), K), DL, VT: ShSVT));
6707	QAmts.push_back(Elt: DAG.getConstant(Val: Q, DL, VT: SVT));
6708	return true;
6709	};
6710
6711	SDValue N = REMNode.getOperand(i: `0`);
6712	SDValue D = REMNode.getOperand(i: `1`);
6713
6714	// Collect the values from each element.
6715	if (!ISD::matchBinaryPredicate(LHS: D, RHS: CompTargetNode, Match: BuildUREMPattern))
6716	return SDValue ();
6717
6718	// If all lanes are tautological, the result can be constant-folded.
6719	if (AllLanesAreTautological)
6720	return SDValue ();
6721
6722	// If this is a urem by a powers-of-two, avoid the fold since it can be
6723	// best implemented as a bit test.
6724	if (AllDivisorsArePowerOfTwo)
6725	return SDValue ();
6726
6727	SDValue PVal, KVal, QVal;
6728	if (D.getOpcode() == ISD::BUILD_VECTOR) {
6729	if (HadTautologicalLanes) {
6730	// Try to turn PAmts into a splat, since we don't care about the values
6731	// that are currently '0'. If we can't, just keep '0'`s.
6732	turnVectorIntoSplatVector(Values: PAmts, Predicate: isNullConstant);
6733	// Try to turn KAmts into a splat, since we don't care about the values
6734	// that are currently '-1'. If we can't, change them to '0'`s.
6735	turnVectorIntoSplatVector(Values: KAmts, Predicate: isAllOnesConstant,
6736	AlternativeReplacement: DAG.getConstant(Val: `0`, DL, VT: ShSVT));
6737	}
6738
6739	PVal = DAG.getBuildVector(VT, DL, Ops: PAmts);
6740	KVal = DAG.getBuildVector(VT: ShVT, DL, Ops: KAmts);
6741	QVal = DAG.getBuildVector(VT, DL, Ops: QAmts);
6742	} else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6743	assert(PAmts.size() == `1` && KAmts.size() == `1` && QAmts.size() == `1` &&
6744	"Expected matchBinaryPredicate to return one element for "
6745	"SPLAT_VECTORs");
6746	PVal = DAG.getSplatVector(VT, DL, Op: PAmts [`0`]);
6747	KVal = DAG.getSplatVector(VT: ShVT, DL, Op: KAmts [`0`]);
6748	QVal = DAG.getSplatVector(VT, DL, Op: QAmts [`0`]);
6749	} else {
6750	PVal = PAmts [`0`];
6751	KVal = KAmts [`0`];
6752	QVal = QAmts [`0`];
6753	}
6754
6755	if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6756	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::SUB, VT))
6757	return SDValue (); // FIXME: Could/should use `ISD::ADD`?
6758	assert(CompTargetNode.getValueType() == N.getValueType() &&
6759	"Expecting that the types on LHS and RHS of comparisons match.");
6760	N = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N, N2: CompTargetNode);
6761	}
6762
6763	// (mul N, P)
6764	SDValue Op0 = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N, N2: PVal);
6765	Created.push_back(Elt: Op0.getNode());
6766
6767	// Rotate right only if any divisor was even. We avoid rotates for all-odd
6768	// divisors as a performance improvement, since rotating by 0 is a no-op.
6769	if (HadEvenDivisor) {
6770	// We need ROTR to do this.
6771	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ROTR, VT))
6772	return SDValue ();
6773	// UREM: (rotr (mul N, P), K)
6774	Op0 = DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: Op0, N2: KVal);
6775	Created.push_back(Elt: Op0.getNode());
6776	}
6777
6778	// UREM: (setule/setugt (rotr (mul N, P), K), Q)
6779	SDValue NewCC =
6780	DAG.getSetCC(DL, VT: SETCCVT, LHS: Op0, RHS: QVal,
6781	Cond: ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
6782	if (!HadTautologicalInvertedLanes)
6783	return NewCC;
6784
6785	// If any lanes previously compared always-false, the NewCC will give
6786	// always-true result for them, so we need to fixup those lanes.
6787	// Or the other way around for inequality predicate.
6788	assert(VT.isVector() && "Can/should only get here for vectors.");
6789	Created.push_back(Elt: NewCC.getNode());
6790
6791	// x u% C1` is always* less than C1. So given `x u% C1 == C2`,*
6792	// if C2 is not less than C1, the comparison is always false.
6793	// But we have produced the comparison that will give the
6794	// opposive tautological answer. So these lanes would need to be fixed up.
6795	SDValue TautologicalInvertedChannels =
6796	DAG.getSetCC(DL, VT: SETCCVT, LHS: D, RHS: CompTargetNode, Cond: ISD::SETULE);
6797	Created.push_back(Elt: TautologicalInvertedChannels.getNode());
6798
6799	// NOTE: we avoid letting illegal types through even if we're before legalize
6800	// ops – legalization has a hard time producing good code for this.
6801	if (isOperationLegalOrCustom(Op: ISD::VSELECT, VT: SETCCVT)) {
6802	// If we have a vector select, let's replace the comparison results in the
6803	// affected lanes with the correct tautological result.
6804	SDValue Replacement = DAG.getBoolConstant(V: Cond == ISD::SETEQ ? false : true,
6805	DL, VT: SETCCVT, OpVT: SETCCVT);
6806	return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: SETCCVT, N1: TautologicalInvertedChannels,
6807	N2: Replacement, N3: NewCC);
6808	}
6809
6810	// Else, we can just invert the comparison result in the appropriate lanes.
6811	//
6812	// NOTE: see the note above VSELECT above.
6813	if (isOperationLegalOrCustom(Op: ISD::XOR, VT: SETCCVT))
6814	return DAG.getNode(Opcode: ISD::XOR, DL, VT: SETCCVT, N1: NewCC,
6815	N2: TautologicalInvertedChannels);
6816
6817	return SDValue (); // Don't know how to lower.
6818	}
6819
6820	/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6821	/// where the divisor is constant and the comparison target is zero,
6822	/// return a DAG expression that will generate the same comparison result
6823	/// using only multiplications, additions and shifts/rotations.
6824	/// Ref: "Hacker's Delight" 10-17.
6825	SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6826	SDValue CompTargetNode,
6827	ISD::CondCode Cond,
6828	DAGCombinerInfo &DCI,
6829	const SDLoc &DL) const {
6830	SmallVector<SDNode *, `7`> Built;
6831	if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6832	DCI, DL, Created&: Built)) {
6833	assert(Built.size() <= `7` && "Max size prediction failed.");
6834	for (SDNode *N : Built)
6835	DCI.AddToWorklist(N);
6836	return Folded;
6837	}
6838
6839	return SDValue ();
6840	}
6841
6842	SDValue
6843	TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6844	SDValue CompTargetNode, ISD::CondCode Cond,
6845	DAGCombinerInfo &DCI, const SDLoc &DL,
6846	SmallVectorImpl<SDNode > &Created) const* {
6847	// Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
6848	// Fold:
6849	// (seteq/ne (srem N, D), 0)
6850	// To:
6851	// (setule/ugt (rotr (add (mul N, P), A), K), Q)
6852	//
6853	// - D must be constant, with D = D0 2^K where D0 is odd*
6854	// - P is the multiplicative inverse of D0 modulo 2^W
6855	// - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
6856	// - Q = floor((2 A) / (2^K))*
6857	// where W is the width of the common type of N and D.
6858	//
6859	// When D is a power of two (and thus D0 is 1), the normal
6860	// formula for A and Q don't apply, because the derivation
6861	// depends on D not dividing 2^(W-1), and thus theorem ZRS
6862	// does not apply. This specifically fails when N = INT_MIN.
6863	//
6864	// Instead, for power-of-two D, we use:
6865	// - A = 2^(W-1)
6866	// \|-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
6867	// - Q = 2^(W-K) - 1
6868	// \|-> Test that the top K bits are zero after rotation
6869	assert((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
6870	"Only applicable for (in)equality comparisons.");
6871
6872	SelectionDAG &DAG = DCI.DAG;
6873
6874	EVT VT = REMNode.getValueType();
6875	EVT SVT = VT.getScalarType();
6876	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout(), LegalTypes: !DCI.isBeforeLegalize());
6877	EVT ShSVT = ShVT.getScalarType();
6878
6879	// If we are after ops legalization, and MUL is unavailable, we can not
6880	// proceed.
6881	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::MUL, VT))
6882	return SDValue ();
6883
6884	// TODO: Could support comparing with non-zero too.
6885	ConstantSDNode *CompTarget = isConstOrConstSplat(N: CompTargetNode);
6886	if (!CompTarget \|\| !CompTarget->isZero())
6887	return SDValue ();
6888
6889	bool HadIntMinDivisor = false;
6890	bool HadOneDivisor = false;
6891	bool AllDivisorsAreOnes = true;
6892	bool HadEvenDivisor = false;
6893	bool NeedToApplyOffset = false;
6894	bool AllDivisorsArePowerOfTwo = true;
6895	SmallVector<SDValue, `16`> PAmts, AAmts, KAmts, QAmts;
6896
6897	auto BuildSREMPattern = [&](ConstantSDNode *C) {
6898	// Division by 0 is UB. Leave it to be constant-folded elsewhere.
6899	if (C->isZero())
6900	return false;
6901
6902	// FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
6903
6904	// WARNING: this fold is only valid for positive divisors!
6905	APInt D = C->getAPIntValue();
6906	if (D.isNegative())
6907	D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
6908
6909	HadIntMinDivisor \|= D.isMinSignedValue();
6910
6911	// If all divisors are ones, we will prefer to avoid the fold.
6912	HadOneDivisor \|= D.isOne();
6913	AllDivisorsAreOnes &= D.isOne();
6914
6915	// Decompose D into D0 2^K*
6916	unsigned K = D.countr_zero();
6917	assert((!D.isOne() \|\| (K == `0`)) && "For divisor '1' we won't rotate.");
6918	APInt D0 = D.lshr(shiftAmt: K);
6919
6920	if (!D.isMinSignedValue()) {
6921	// D is even if it has trailing zeros; unless it's INT_MIN, in which case
6922	// we don't care about this lane in this fold, we'll special-handle it.
6923	HadEvenDivisor \|= (K != `0`);
6924	}
6925
6926	// D is a power-of-two if D0 is one. This includes INT_MIN.
6927	// If all divisors are power-of-two, we will prefer to avoid the fold.
6928	AllDivisorsArePowerOfTwo &= D0.isOne();
6929
6930	// P = inv(D0, 2^W)
6931	// 2^W requires W + 1 bits, so we have to extend and then truncate.
6932	unsigned W = D.getBitWidth();
6933	APInt P = D0.multiplicativeInverse();
6934	assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6935
6936	// A = floor((2^(W - 1) - 1) / D0) & -2^K
6937	APInt A = APInt::getSignedMaxValue(numBits: W).udiv(RHS: D0);
6938	A.clearLowBits(loBits: K);
6939
6940	if (!D.isMinSignedValue()) {
6941	// If divisor INT_MIN, then we don't care about this lane in this fold,
6942	// we'll special-handle it.
6943	NeedToApplyOffset \|= A != `0`;
6944	}
6945
6946	// Q = floor((2 A) / (2^K))*
6947	APInt Q = (`2` * A).udiv(RHS: APInt::getOneBitSet(numBits: W, BitNo: K));
6948
6949	assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
6950	"We are expecting that A is always less than all-ones for SVT");
6951	assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6952	"We are expecting that K is always less than all-ones for ShSVT");
6953
6954	// If D was a power of two, apply the alternate constant derivation.
6955	if (D0.isOne()) {
6956	// A = 2^(W-1)
6957	A = APInt::getSignedMinValue(numBits: W);
6958	// - Q = 2^(W-K) - 1
6959	Q = APInt::getAllOnes(numBits: W - K).zext(width: W);
6960	}
6961
6962	// If the divisor is 1 the result can be constant-folded. Likewise, we
6963	// don't care about INT_MIN lanes, those can be set to undef if appropriate.
6964	if (D.isOne()) {
6965	// Set P, A and K to a bogus values so we can try to splat them.
6966	P = `0`;
6967	A = -`1`;
6968	K = -`1`;
6969
6970	// x ?% 1 == 0 <--> true <--> x u<= -1
6971	Q = -`1`;
6972	}
6973
6974	PAmts.push_back(Elt: DAG.getConstant(Val: P, DL, VT: SVT));
6975	AAmts.push_back(Elt: DAG.getConstant(Val: A, DL, VT: SVT));
6976	KAmts.push_back(
6977	Elt: DAG.getConstant(Val: APInt (ShSVT.getSizeInBits(), K), DL, VT: ShSVT));
6978	QAmts.push_back(Elt: DAG.getConstant(Val: Q, DL, VT: SVT));
6979	return true;
6980	};
6981
6982	SDValue N = REMNode.getOperand(i: `0`);
6983	SDValue D = REMNode.getOperand(i: `1`);
6984
6985	// Collect the values from each element.
6986	if (!ISD::matchUnaryPredicate(Op: D, Match: BuildSREMPattern))
6987	return SDValue ();
6988
6989	// If this is a srem by a one, avoid the fold since it can be constant-folded.
6990	if (AllDivisorsAreOnes)
6991	return SDValue ();
6992
6993	// If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
6994	// since it can be best implemented as a bit test.
6995	if (AllDivisorsArePowerOfTwo)
6996	return SDValue ();
6997
6998	SDValue PVal, AVal, KVal, QVal;
6999	if (D.getOpcode() == ISD::BUILD_VECTOR) {
7000	if (HadOneDivisor) {
7001	// Try to turn PAmts into a splat, since we don't care about the values
7002	// that are currently '0'. If we can't, just keep '0'`s.
7003	turnVectorIntoSplatVector(Values: PAmts, Predicate: isNullConstant);
7004	// Try to turn AAmts into a splat, since we don't care about the
7005	// values that are currently '-1'. If we can't, change them to '0'`s.
7006	turnVectorIntoSplatVector(Values: AAmts, Predicate: isAllOnesConstant,
7007	AlternativeReplacement: DAG.getConstant(Val: `0`, DL, VT: SVT));
7008	// Try to turn KAmts into a splat, since we don't care about the values
7009	// that are currently '-1'. If we can't, change them to '0'`s.
7010	turnVectorIntoSplatVector(Values: KAmts, Predicate: isAllOnesConstant,
7011	AlternativeReplacement: DAG.getConstant(Val: `0`, DL, VT: ShSVT));
7012	}
7013
7014	PVal = DAG.getBuildVector(VT, DL, Ops: PAmts);
7015	AVal = DAG.getBuildVector(VT, DL, Ops: AAmts);
7016	KVal = DAG.getBuildVector(VT: ShVT, DL, Ops: KAmts);
7017	QVal = DAG.getBuildVector(VT, DL, Ops: QAmts);
7018	} else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7019	assert(PAmts.size() == `1` && AAmts.size() == `1` && KAmts.size() == `1` &&
7020	QAmts.size() == `1` &&
7021	"Expected matchUnaryPredicate to return one element for scalable "
7022	"vectors");
7023	PVal = DAG.getSplatVector(VT, DL, Op: PAmts [`0`]);
7024	AVal = DAG.getSplatVector(VT, DL, Op: AAmts [`0`]);
7025	KVal = DAG.getSplatVector(VT: ShVT, DL, Op: KAmts [`0`]);
7026	QVal = DAG.getSplatVector(VT, DL, Op: QAmts [`0`]);
7027	} else {
7028	assert(isa<ConstantSDNode>(D) && "Expected a constant");
7029	PVal = PAmts [`0`];
7030	AVal = AAmts [`0`];
7031	KVal = KAmts [`0`];
7032	QVal = QAmts [`0`];
7033	}
7034
7035	// (mul N, P)
7036	SDValue Op0 = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N, N2: PVal);
7037	Created.push_back(Elt: Op0.getNode());
7038
7039	if (NeedToApplyOffset) {
7040	// We need ADD to do this.
7041	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ADD, VT))
7042	return SDValue ();
7043
7044	// (add (mul N, P), A)
7045	Op0 = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0, N2: AVal);
7046	Created.push_back(Elt: Op0.getNode());
7047	}
7048
7049	// Rotate right only if any divisor was even. We avoid rotates for all-odd
7050	// divisors as a performance improvement, since rotating by 0 is a no-op.
7051	if (HadEvenDivisor) {
7052	// We need ROTR to do this.
7053	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ROTR, VT))
7054	return SDValue ();
7055	// SREM: (rotr (add (mul N, P), A), K)
7056	Op0 = DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: Op0, N2: KVal);
7057	Created.push_back(Elt: Op0.getNode());
7058	}
7059
7060	// SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7061	SDValue Fold =
7062	DAG.getSetCC(DL, VT: SETCCVT, LHS: Op0, RHS: QVal,
7063	Cond: ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7064
7065	// If we didn't have lanes with INT_MIN divisor, then we're done.
7066	if (!HadIntMinDivisor)
7067	return Fold;
7068
7069	// That fold is only valid for positive divisors. Which effectively means,
7070	// it is invalid for INT_MIN divisors. So if we have such a lane,
7071	// we must fix-up results for said lanes.
7072	assert(VT.isVector() && "Can/should only get here for vectors.");
7073
7074	// NOTE: we avoid letting illegal types through even if we're before legalize
7075	// ops – legalization has a hard time producing good code for the code that
7076	// follows.
7077	if (!isOperationLegalOrCustom(Op: ISD::SETCC, VT: SETCCVT) \|\|
7078	!isOperationLegalOrCustom(Op: ISD::AND, VT) \|\|
7079	!isCondCodeLegalOrCustom(CC: Cond, VT: VT.getSimpleVT()) \|\|
7080	!isOperationLegalOrCustom(Op: ISD::VSELECT, VT: SETCCVT))
7081	return SDValue ();
7082
7083	Created.push_back(Elt: Fold.getNode());
7084
7085	SDValue IntMin = DAG.getConstant(
7086	Val: APInt::getSignedMinValue(numBits: SVT.getScalarSizeInBits()), DL, VT);
7087	SDValue IntMax = DAG.getConstant(
7088	Val: APInt::getSignedMaxValue(numBits: SVT.getScalarSizeInBits()), DL, VT);
7089	SDValue Zero =
7090	DAG.getConstant(Val: APInt::getZero(numBits: SVT.getScalarSizeInBits()), DL, VT);
7091
7092	// Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7093	SDValue DivisorIsIntMin = DAG.getSetCC(DL, VT: SETCCVT, LHS: D, RHS: IntMin, Cond: ISD::SETEQ);
7094	Created.push_back(Elt: DivisorIsIntMin.getNode());
7095
7096	// (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7097	SDValue Masked = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: N, N2: IntMax);
7098	Created.push_back(Elt: Masked.getNode());
7099	SDValue MaskedIsZero = DAG.getSetCC(DL, VT: SETCCVT, LHS: Masked, RHS: Zero, Cond);
7100	Created.push_back(Elt: MaskedIsZero.getNode());
7101
7102	// To produce final result we need to blend 2 vectors: 'SetCC' and
7103	// 'MaskedIsZero'. If the divisor for channel was NOT* INT_MIN, we pick*
7104	// from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7105	// constant-folded, select can get lowered to a shuffle with constant mask.
7106	SDValue Blended = DAG.getNode(Opcode: ISD::VSELECT, DL, VT: SETCCVT, N1: DivisorIsIntMin,
7107	N2: MaskedIsZero, N3: Fold);
7108
7109	return Blended;
7110	}
7111
7112	bool TargetLowering::
7113	verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
7114	if (!isa<ConstantSDNode>(Val: Op.getOperand(i: `0`))) {
7115	DAG.getContext()->emitError(ErrorStr: "argument to '__builtin_return_address' must "
7116	"be a constant integer");
7117	return true;
7118	}
7119
7120	return false;
7121	}
7122
7123	SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
7124	const DenormalMode &Mode) const {
7125	SDLoc DL(Op);
7126	EVT VT = Op.getValueType();
7127	EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
7128	SDValue FPZero = DAG.getConstantFP(Val: `0.0`, DL, VT);
7129
7130	// This is specifically a check for the handling of denormal inputs, not the
7131	// result.
7132	if (Mode.Input == DenormalMode::PreserveSign \|\|
7133	Mode.Input == DenormalMode::PositiveZero) {
7134	// Test = X == 0.0
7135	return DAG.getSetCC(DL, VT: CCVT, LHS: Op, RHS: FPZero, Cond: ISD::SETEQ);
7136	}
7137
7138	// Testing it with denormal inputs to avoid wrong estimate.
7139	//
7140	// Test = fabs(X) < SmallestNormal
7141	const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
7142	APFloat SmallestNorm = APFloat::getSmallestNormalized(Sem: FltSem);
7143	SDValue NormC = DAG.getConstantFP(Val: SmallestNorm, DL, VT);
7144	SDValue Fabs = DAG.getNode(Opcode: ISD::FABS, DL, VT, Operand: Op);
7145	return DAG.getSetCC(DL, VT: CCVT, LHS: Fabs, RHS: NormC, Cond: ISD::SETLT);
7146	}
7147
7148	SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
7149	bool LegalOps, bool OptForSize,
7150	NegatibleCost &Cost,
7151	unsigned Depth) const {
7152	// fneg is removable even if it has multiple uses.
7153	if (Op.getOpcode() == ISD::FNEG \|\| Op.getOpcode() == ISD::VP_FNEG) {
7154	Cost = NegatibleCost::Cheaper;
7155	return Op.getOperand(i: `0`);
7156	}
7157
7158	// Don't recurse exponentially.
7159	if (Depth > SelectionDAG::MaxRecursionDepth)
7160	return SDValue ();
7161
7162	// Pre-increment recursion depth for use in recursive calls.
7163	++Depth;
7164	const SDNodeFlags Flags = Op ->getFlags();
7165	const TargetOptions &Options = DAG.getTarget().Options;
7166	EVT VT = Op.getValueType();
7167	unsigned Opcode = Op.getOpcode();
7168
7169	// Don't allow anything with multiple uses unless we know it is free.
7170	if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7171	bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7172	isFPExtFree(DestVT: VT, SrcVT: Op.getOperand(i: `0`).getValueType());
7173	if (!IsFreeExtend)
7174	return SDValue ();
7175	}
7176
7177	auto RemoveDeadNode = [&](SDValue N) {
7178	if (N && N.getNode()->use_empty())
7179	DAG.RemoveDeadNode(N: N.getNode());
7180	};
7181
7182	SDLoc DL(Op);
7183
7184	// Because getNegatedExpression can delete nodes we need a handle to keep
7185	// temporary nodes alive in case the recursion manages to create an identical
7186	// node.
7187	std::list<HandleSDNode> Handles;
7188
7189	switch (Opcode) {
7190	case ISD::ConstantFP: {
7191	// Don't invert constant FP values after legalization unless the target says
7192	// the negated constant is legal.
7193	bool IsOpLegal =
7194	isOperationLegal(Op: ISD::ConstantFP, VT) \|\|
7195	isFPImmLegal(neg(X: cast<ConstantFPSDNode>(Val&: Op)->getValueAPF()), VT,
7196	ForCodeSize: OptForSize);
7197
7198	if (LegalOps && !IsOpLegal)
7199	break;
7200
7201	APFloat V = cast<ConstantFPSDNode>(Val&: Op)->getValueAPF();
7202	V.changeSign();
7203	SDValue CFP = DAG.getConstantFP(Val: V, DL, VT);
7204
7205	// If we already have the use of the negated floating constant, it is free
7206	// to negate it even it has multiple uses.
7207	if (!Op.hasOneUse() && CFP.use_empty())
7208	break;
7209	Cost = NegatibleCost::Neutral;
7210	return CFP;
7211	}
7212	case ISD::BUILD_VECTOR: {
7213	// Only permit BUILD_VECTOR of constants.
7214	if (llvm::any_of(Range: Op ->op_values(), P: [&](SDValue N) {
7215	return !N.isUndef() && !isa<ConstantFPSDNode>(Val: N);
7216	}))
7217	break;
7218
7219	bool IsOpLegal =
7220	(isOperationLegal(Op: ISD::ConstantFP, VT) &&
7221	isOperationLegal(Op: ISD::BUILD_VECTOR, VT)) \|\|
7222	llvm::all_of(Range: Op ->op_values(), P: [&](SDValue N) {
7223	return N.isUndef() \|\|
7224	isFPImmLegal(neg(X: cast<ConstantFPSDNode>(Val&: N)->getValueAPF()), VT,
7225	ForCodeSize: OptForSize);
7226	});
7227
7228	if (LegalOps && !IsOpLegal)
7229	break;
7230
7231	SmallVector<SDValue, `4`> Ops;
7232	for (SDValue C : Op ->op_values()) {
7233	if (C.isUndef()) {
7234	Ops.push_back(Elt: C);
7235	continue;
7236	}
7237	APFloat V = cast<ConstantFPSDNode>(Val&: C)->getValueAPF();
7238	V.changeSign();
7239	Ops.push_back(Elt: DAG.getConstantFP(Val: V, DL, VT: C.getValueType()));
7240	}
7241	Cost = NegatibleCost::Neutral;
7242	return DAG.getBuildVector(VT, DL, Ops);
7243	}
7244	case ISD::FADD: {
7245	if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7246	break;
7247
7248	// After operation legalization, it might not be legal to create new FSUBs.
7249	if (LegalOps && !isOperationLegalOrCustom(Op: ISD::FSUB, VT))
7250	break;
7251	SDValue X = Op.getOperand(i: `0`), Y = Op.getOperand(i: `1`);
7252
7253	// fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7254	NegatibleCost CostX = NegatibleCost::Expensive;
7255	SDValue NegX =
7256	getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7257	// Prevent this node from being deleted by the next call.
7258	if (NegX)
7259	Handles.emplace_back(args&: NegX);
7260
7261	// fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7262	NegatibleCost CostY = NegatibleCost::Expensive;
7263	SDValue NegY =
7264	getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7265
7266	// We're done with the handles.
7267	Handles.clear();
7268
7269	// Negate the X if its cost is less or equal than Y.
7270	if (NegX && (CostX <= CostY)) {
7271	Cost = CostX;
7272	SDValue N = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: NegX, N2: Y, Flags);
7273	if (NegY != N)
7274	RemoveDeadNode (NegY);
7275	return N;
7276	}
7277
7278	// Negate the Y if it is not expensive.
7279	if (NegY) {
7280	Cost = CostY;
7281	SDValue N = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: NegY, N2: X, Flags);
7282	if (NegX != N)
7283	RemoveDeadNode (NegX);
7284	return N;
7285	}
7286	break;
7287	}
7288	case ISD::FSUB: {
7289	// We can't turn -(A-B) into B-A when we honor signed zeros.
7290	if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7291	break;
7292
7293	SDValue X = Op.getOperand(i: `0`), Y = Op.getOperand(i: `1`);
7294	// fold (fneg (fsub 0, Y)) -> Y
7295	if (ConstantFPSDNode C = isConstOrConstSplatFP(N: X, /AllowUndefs/* true))
7296	if (C->isZero()) {
7297	Cost = NegatibleCost::Cheaper;
7298	return Y;
7299	}
7300
7301	// fold (fneg (fsub X, Y)) -> (fsub Y, X)
7302	Cost = NegatibleCost::Neutral;
7303	return DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: Y, N2: X, Flags);
7304	}
7305	case ISD::FMUL:
7306	case ISD::FDIV: {
7307	SDValue X = Op.getOperand(i: `0`), Y = Op.getOperand(i: `1`);
7308
7309	// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7310	NegatibleCost CostX = NegatibleCost::Expensive;
7311	SDValue NegX =
7312	getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7313	// Prevent this node from being deleted by the next call.
7314	if (NegX)
7315	Handles.emplace_back(args&: NegX);
7316
7317	// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7318	NegatibleCost CostY = NegatibleCost::Expensive;
7319	SDValue NegY =
7320	getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7321
7322	// We're done with the handles.
7323	Handles.clear();
7324
7325	// Negate the X if its cost is less or equal than Y.
7326	if (NegX && (CostX <= CostY)) {
7327	Cost = CostX;
7328	SDValue N = DAG.getNode(Opcode, DL, VT, N1: NegX, N2: Y, Flags);
7329	if (NegY != N)
7330	RemoveDeadNode (NegY);
7331	return N;
7332	}
7333
7334	// Ignore X 2.0 because that is expected to be canonicalized to X + X.*
7335	if (auto *C = isConstOrConstSplatFP(N: Op.getOperand(i: `1`)))
7336	if (C->isExactlyValue(V: `2.0`) && Op.getOpcode() == ISD::FMUL)
7337	break;
7338
7339	// Negate the Y if it is not expensive.
7340	if (NegY) {
7341	Cost = CostY;
7342	SDValue N = DAG.getNode(Opcode, DL, VT, N1: X, N2: NegY, Flags);
7343	if (NegX != N)
7344	RemoveDeadNode (NegX);
7345	return N;
7346	}
7347	break;
7348	}
7349	case ISD::FMA:
7350	case ISD::FMAD: {
7351	if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7352	break;
7353
7354	SDValue X = Op.getOperand(i: `0`), Y = Op.getOperand(i: `1`), Z = Op.getOperand(i: `2`);
7355	NegatibleCost CostZ = NegatibleCost::Expensive;
7356	SDValue NegZ =
7357	getNegatedExpression(Op: Z, DAG, LegalOps, OptForSize, Cost&: CostZ, Depth);
7358	// Give up if fail to negate the Z.
7359	if (!NegZ)
7360	break;
7361
7362	// Prevent this node from being deleted by the next two calls.
7363	Handles.emplace_back(args&: NegZ);
7364
7365	// fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7366	NegatibleCost CostX = NegatibleCost::Expensive;
7367	SDValue NegX =
7368	getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7369	// Prevent this node from being deleted by the next call.
7370	if (NegX)
7371	Handles.emplace_back(args&: NegX);
7372
7373	// fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7374	NegatibleCost CostY = NegatibleCost::Expensive;
7375	SDValue NegY =
7376	getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7377
7378	// We're done with the handles.
7379	Handles.clear();
7380
7381	// Negate the X if its cost is less or equal than Y.
7382	if (NegX && (CostX <= CostY)) {
7383	Cost = std::min(a: CostX, b: CostZ);
7384	SDValue N = DAG.getNode(Opcode, DL, VT, N1: NegX, N2: Y, N3: NegZ, Flags);
7385	if (NegY != N)
7386	RemoveDeadNode (NegY);
7387	return N;
7388	}
7389
7390	// Negate the Y if it is not expensive.
7391	if (NegY) {
7392	Cost = std::min(a: CostY, b: CostZ);
7393	SDValue N = DAG.getNode(Opcode, DL, VT, N1: X, N2: NegY, N3: NegZ, Flags);
7394	if (NegX != N)
7395	RemoveDeadNode (NegX);
7396	return N;
7397	}
7398	break;
7399	}
7400
7401	case ISD::FP_EXTEND:
7402	case ISD::FSIN:
7403	if (SDValue NegV = getNegatedExpression(Op: Op.getOperand(i: `0`), DAG, LegalOps,
7404	OptForSize, Cost, Depth))
7405	return DAG.getNode(Opcode, DL, VT, Operand: NegV);
7406	break;
7407	case ISD::FP_ROUND:
7408	if (SDValue NegV = getNegatedExpression(Op: Op.getOperand(i: `0`), DAG, LegalOps,
7409	OptForSize, Cost, Depth))
7410	return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT, N1: NegV, N2: Op.getOperand(i: `1`));
7411	break;
7412	case ISD::SELECT:
7413	case ISD::VSELECT: {
7414	// fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7415	// iff at least one cost is cheaper and the other is neutral/cheaper
7416	SDValue LHS = Op.getOperand(i: `1`);
7417	NegatibleCost CostLHS = NegatibleCost::Expensive;
7418	SDValue NegLHS =
7419	getNegatedExpression(Op: LHS, DAG, LegalOps, OptForSize, Cost&: CostLHS, Depth);
7420	if (!NegLHS \|\| CostLHS > NegatibleCost::Neutral) {
7421	RemoveDeadNode (NegLHS);
7422	break;
7423	}
7424
7425	// Prevent this node from being deleted by the next call.
7426	Handles.emplace_back(args&: NegLHS);
7427
7428	SDValue RHS = Op.getOperand(i: `2`);
7429	NegatibleCost CostRHS = NegatibleCost::Expensive;
7430	SDValue NegRHS =
7431	getNegatedExpression(Op: RHS, DAG, LegalOps, OptForSize, Cost&: CostRHS, Depth);
7432
7433	// We're done with the handles.
7434	Handles.clear();
7435
7436	if (!NegRHS \|\| CostRHS > NegatibleCost::Neutral \|\|
7437	(CostLHS != NegatibleCost::Cheaper &&
7438	CostRHS != NegatibleCost::Cheaper)) {
7439	RemoveDeadNode (NegLHS);
7440	RemoveDeadNode (NegRHS);
7441	break;
7442	}
7443
7444	Cost = std::min(a: CostLHS, b: CostRHS);
7445	return DAG.getSelect(DL, VT, Cond: Op.getOperand(i: `0`), LHS: NegLHS, RHS: NegRHS);
7446	}
7447	}
7448
7449	return SDValue ();
7450	}
7451
7452	//===----------------------------------------------------------------------===//
7453	// Legalization Utilities
7454	//===----------------------------------------------------------------------===//
7455
7456	bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7457	SDValue LHS, SDValue RHS,
7458	SmallVectorImpl<SDValue> &Result,
7459	EVT HiLoVT, SelectionDAG &DAG,
7460	MulExpansionKind Kind, SDValue LL,
7461	SDValue LH, SDValue RL, SDValue RH) const {
7462	assert(Opcode == ISD::MUL \|\| Opcode == ISD::UMUL_LOHI \|\|
7463	Opcode == ISD::SMUL_LOHI);
7464
7465	bool HasMULHS = (Kind == MulExpansionKind::Always) \|\|
7466	isOperationLegalOrCustom(Op: ISD::MULHS, VT: HiLoVT);
7467	bool HasMULHU = (Kind == MulExpansionKind::Always) \|\|
7468	isOperationLegalOrCustom(Op: ISD::MULHU, VT: HiLoVT);
7469	bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) \|\|
7470	isOperationLegalOrCustom(Op: ISD::SMUL_LOHI, VT: HiLoVT);
7471	bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) \|\|
7472	isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: HiLoVT);
7473
7474	if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7475	return false;
7476
7477	unsigned OuterBitSize = VT.getScalarSizeInBits();
7478	unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7479
7480	// LL, LH, RL, and RH must be either all NULL or all set to a value.
7481	assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) \|\|
7482	(!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7483
7484	SDVTList VTs = DAG.getVTList(VT1: HiLoVT, VT2: HiLoVT);
7485	auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7486	bool Signed) -> bool {
7487	if ((Signed && HasSMUL_LOHI) \|\| (!Signed && HasUMUL_LOHI)) {
7488	Lo = DAG.getNode(Opcode: Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, DL: dl, VTList: VTs, N1: L, N2: R);
7489	Hi = SDValue (Lo.getNode(), `1`);
7490	return true;
7491	}
7492	if ((Signed && HasMULHS) \|\| (!Signed && HasMULHU)) {
7493	Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: L, N2: R);
7494	Hi = DAG.getNode(Opcode: Signed ? ISD::MULHS : ISD::MULHU, DL: dl, VT: HiLoVT, N1: L, N2: R);
7495	return true;
7496	}
7497	return false;
7498	};
7499
7500	SDValue Lo, Hi;
7501
7502	if (!LL.getNode() && !RL.getNode() &&
7503	isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT: HiLoVT)) {
7504	LL = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: LHS);
7505	RL = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: RHS);
7506	}
7507
7508	if (!LL.getNode())
7509	return false;
7510
7511	APInt HighMask = APInt::getHighBitsSet(numBits: OuterBitSize, hiBitsSet: InnerBitSize);
7512	if (DAG.MaskedValueIsZero(Op: LHS, Mask: HighMask) &&
7513	DAG.MaskedValueIsZero(Op: RHS, Mask: HighMask)) {
7514	// The inputs are both zero-extended.
7515	if (MakeMUL_LOHI (LL, RL, Lo, Hi, false)) {
7516	Result.push_back(Elt: Lo);
7517	Result.push_back(Elt: Hi);
7518	if (Opcode != ISD::MUL) {
7519	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT);
7520	Result.push_back(Elt: Zero);
7521	Result.push_back(Elt: Zero);
7522	}
7523	return true;
7524	}
7525	}
7526
7527	if (!VT.isVector() && Opcode == ISD::MUL &&
7528	DAG.ComputeMaxSignificantBits(Op: LHS) <= InnerBitSize &&
7529	DAG.ComputeMaxSignificantBits(Op: RHS) <= InnerBitSize) {
7530	// The input values are both sign-extended.
7531	// TODO non-MUL case?
7532	if (MakeMUL_LOHI (LL, RL, Lo, Hi, true)) {
7533	Result.push_back(Elt: Lo);
7534	Result.push_back(Elt: Hi);
7535	return true;
7536	}
7537	}
7538
7539	unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7540	SDValue Shift = DAG.getShiftAmountConstant(Val: ShiftAmount, VT, DL: dl);
7541
7542	if (!LH.getNode() && !RH.getNode() &&
7543	isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
7544	isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT: HiLoVT)) {
7545	LH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: LHS, N2: Shift);
7546	LH = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: LH);
7547	RH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: RHS, N2: Shift);
7548	RH = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: RH);
7549	}
7550
7551	if (!LH.getNode())
7552	return false;
7553
7554	if (!MakeMUL_LOHI (LL, RL, Lo, Hi, false))
7555	return false;
7556
7557	Result.push_back(Elt: Lo);
7558
7559	if (Opcode == ISD::MUL) {
7560	RH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: LL, N2: RH);
7561	LH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: LH, N2: RL);
7562	Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Hi, N2: RH);
7563	Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Hi, N2: LH);
7564	Result.push_back(Elt: Hi);
7565	return true;
7566	}
7567
7568	// Compute the full width result.
7569	auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7570	Lo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Lo);
7571	Hi = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Hi);
7572	Hi = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Hi, N2: Shift);
7573	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Lo, N2: Hi);
7574	};
7575
7576	SDValue Next = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Hi);
7577	if (!MakeMUL_LOHI (LL, RH, Lo, Hi, false))
7578	return false;
7579
7580	// This is effectively the add part of a multiply-add of half-sized operands,
7581	// so it cannot overflow.
7582	Next = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Next, N2: Merge (Lo, Hi));
7583
7584	if (!MakeMUL_LOHI (LH, RL, Lo, Hi, false))
7585	return false;
7586
7587	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT);
7588	EVT BoolType = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
7589
7590	bool UseGlue = (isOperationLegalOrCustom(Op: ISD::ADDC, VT) &&
7591	isOperationLegalOrCustom(Op: ISD::ADDE, VT));
7592	if (UseGlue)
7593	Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7594	Merge(Lo, Hi));
7595	else
7596	Next = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolType), N1: Next,
7597	N2: Merge (Lo, Hi), N3: DAG.getConstant(Val: `0`, DL: dl, VT: BoolType));
7598
7599	SDValue Carry = Next.getValue(R: `1`);
7600	Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
7601	Next = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Next, N2: Shift);
7602
7603	if (!MakeMUL_LOHI (LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7604	return false;
7605
7606	if (UseGlue)
7607	Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7608	Carry);
7609	else
7610	Hi = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList: DAG.getVTList(VT1: HiLoVT, VT2: BoolType), N1: Hi,
7611	N2: Zero, N3: Carry);
7612
7613	Next = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Next, N2: Merge (Lo, Hi));
7614
7615	if (Opcode == ISD::SMUL_LOHI) {
7616	SDValue NextSub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Next,
7617	N2: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: RL));
7618	Next = DAG.getSelectCC(DL: dl, LHS: LH, RHS: Zero, True: NextSub, False: Next, Cond: ISD::SETLT);
7619
7620	NextSub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Next,
7621	N2: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: LL));
7622	Next = DAG.getSelectCC(DL: dl, LHS: RH, RHS: Zero, True: NextSub, False: Next, Cond: ISD::SETLT);
7623	}
7624
7625	Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
7626	Next = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Next, N2: Shift);
7627	Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
7628	return true;
7629	}
7630
7631	bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
7632	SelectionDAG &DAG, MulExpansionKind Kind,
7633	SDValue LL, SDValue LH, SDValue RL,
7634	SDValue RH) const {
7635	SmallVector<SDValue, `2`> Result;
7636	bool Ok = expandMUL_LOHI(Opcode: N->getOpcode(), VT: N->getValueType(ResNo: `0`), dl: SDLoc (N),
7637	LHS: N->getOperand(Num: `0`), RHS: N->getOperand(Num: `1`), Result, HiLoVT,
7638	DAG, Kind, LL, LH, RL, RH);
7639	if (Ok) {
7640	assert(Result.size() == `2`);
7641	Lo = Result [`0`];
7642	Hi = Result [`1`];
7643	}
7644	return Ok;
7645	}
7646
7647	// Optimize unsigned division or remainder by constants for types twice as large
7648	// as a legal VT.
7649	//
7650	// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7651	// can be computed
7652	// as:
7653	// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7654	// Remainder = Sum % Constant
7655	// This is based on "Remainder by Summing Digits" from Hacker's Delight.
7656	//
7657	// For division, we can compute the remainder using the algorithm described
7658	// above, subtract it from the dividend to get an exact multiple of Constant.
7659	// Then multiply that exact multiply by the multiplicative inverse modulo
7660	// (1 << (BitWidth / 2)) to get the quotient.
7661
7662	// If Constant is even, we can shift right the dividend and the divisor by the
7663	// number of trailing zeros in Constant before applying the remainder algorithm.
7664	// If we're after the quotient, we can subtract this value from the shifted
7665	// dividend and multiply by the multiplicative inverse of the shifted divisor.
7666	// If we want the remainder, we shift the value left by the number of trailing
7667	// zeros and add the bits that were shifted out of the dividend.
7668	bool TargetLowering::expandDIVREMByConstant(SDNode *N,
7669	SmallVectorImpl<SDValue> &Result,
7670	EVT HiLoVT, SelectionDAG &DAG,
7671	SDValue LL, SDValue LH) const {
7672	unsigned Opcode = N->getOpcode();
7673	EVT VT = N->getValueType(ResNo: `0`);
7674
7675	// TODO: Support signed division/remainder.
7676	if (Opcode == ISD::SREM \|\| Opcode == ISD::SDIV \|\| Opcode == ISD::SDIVREM)
7677	return false;
7678	assert(
7679	(Opcode == ISD::UREM \|\| Opcode == ISD::UDIV \|\| Opcode == ISD::UDIVREM) &&
7680	"Unexpected opcode");
7681
7682	auto *CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
7683	if (!CN)
7684	return false;
7685
7686	APInt Divisor = CN->getAPIntValue();
7687	unsigned BitWidth = Divisor.getBitWidth();
7688	unsigned HBitWidth = BitWidth / `2`;
7689	assert(VT.getScalarSizeInBits() == BitWidth &&
7690	HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7691
7692	// Divisor needs to less than (1 << HBitWidth).
7693	APInt HalfMaxPlus1 = APInt::getOneBitSet(numBits: BitWidth, BitNo: HBitWidth);
7694	if (Divisor.uge(RHS: HalfMaxPlus1))
7695	return false;
7696
7697	// We depend on the UREM by constant optimization in DAGCombiner that requires
7698	// high multiply.
7699	if (!isOperationLegalOrCustom(Op: ISD::MULHU, VT: HiLoVT) &&
7700	!isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: HiLoVT))
7701	return false;
7702
7703	// Don't expand if optimizing for size.
7704	if (DAG.shouldOptForSize())
7705	return false;
7706
7707	// Early out for 0 or 1 divisors.
7708	if (Divisor.ule(RHS: `1`))
7709	return false;
7710
7711	// If the divisor is even, shift it until it becomes odd.
7712	unsigned TrailingZeros = `0`;
7713	if (!Divisor [`0`]) {
7714	TrailingZeros = Divisor.countr_zero();
7715	Divisor.lshrInPlace(ShiftAmt: TrailingZeros);
7716	}
7717
7718	SDLoc dl(N);
7719	SDValue Sum;
7720	SDValue PartialRem;
7721
7722	// If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
7723	// then add in the carry.
7724	// TODO: If we can't split it in half, we might be able to split into 3 or
7725	// more pieces using a smaller bit width.
7726	if (HalfMaxPlus1.urem(RHS: Divisor).isOne()) {
7727	assert(!LL == !LH && "Expected both input halves or no input halves!");
7728	if (!LL)
7729	std::tie(args&: LL, args&: LH) = DAG.SplitScalar(N: N->getOperand(Num: `0`), DL: dl, LoVT: HiLoVT, HiVT: HiLoVT);
7730
7731	// Shift the input by the number of TrailingZeros in the divisor. The
7732	// shifted out bits will be added to the remainder later.
7733	if (TrailingZeros) {
7734	// Save the shifted off bits if we need the remainder.
7735	if (Opcode != ISD::UDIV) {
7736	APInt Mask = APInt::getLowBitsSet(numBits: HBitWidth, loBitsSet: TrailingZeros);
7737	PartialRem = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: HiLoVT, N1: LL,
7738	N2: DAG.getConstant(Val: Mask, DL: dl, VT: HiLoVT));
7739	}
7740
7741	LL = DAG.getNode(
7742	Opcode: ISD::OR, DL: dl, VT: HiLoVT,
7743	N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: LL,
7744	N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl)),
7745	N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HiLoVT, N1: LH,
7746	N2: DAG.getShiftAmountConstant(Val: HBitWidth - TrailingZeros,
7747	VT: HiLoVT, DL: dl)));
7748	LH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: LH,
7749	N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl));
7750	}
7751
7752	// Use uaddo_carry if we can, otherwise use a compare to detect overflow.
7753	EVT SetCCType =
7754	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: HiLoVT);
7755	if (isOperationLegalOrCustom(Op: ISD::UADDO_CARRY, VT: HiLoVT)) {
7756	SDVTList VTList = DAG.getVTList(VT1: HiLoVT, VT2: SetCCType);
7757	Sum = DAG.getNode(Opcode: ISD::UADDO, DL: dl, VTList, N1: LL, N2: LH);
7758	Sum = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList, N1: Sum,
7759	N2: DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT), N3: Sum.getValue(R: `1`));
7760	} else {
7761	Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: LL, N2: LH);
7762	SDValue Carry = DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Sum, RHS: LL, Cond: ISD::SETULT);
7763	// If the boolean for the target is 0 or 1, we can add the setcc result
7764	// directly.
7765	if (getBooleanContents(Type: HiLoVT) ==
7766	TargetLoweringBase::ZeroOrOneBooleanContent)
7767	Carry = DAG.getZExtOrTrunc(Op: Carry, DL: dl, VT: HiLoVT);
7768	else
7769	Carry = DAG.getSelect(DL: dl, VT: HiLoVT, Cond: Carry, LHS: DAG.getConstant(Val: `1`, DL: dl, VT: HiLoVT),
7770	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT));
7771	Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Sum, N2: Carry);
7772	}
7773	}
7774
7775	// If we didn't find a sum, we can't do the expansion.
7776	if (!Sum)
7777	return false;
7778
7779	// Perform a HiLoVT urem on the Sum using truncated divisor.
7780	SDValue RemL =
7781	DAG.getNode(Opcode: ISD::UREM, DL: dl, VT: HiLoVT, N1: Sum,
7782	N2: DAG.getConstant(Val: Divisor.trunc(width: HBitWidth), DL: dl, VT: HiLoVT));
7783	SDValue RemH = DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT);
7784
7785	if (Opcode != ISD::UREM) {
7786	// Subtract the remainder from the shifted dividend.
7787	SDValue Dividend = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT, N1: LL, N2: LH);
7788	SDValue Rem = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT, N1: RemL, N2: RemH);
7789
7790	Dividend = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Dividend, N2: Rem);
7791
7792	// Multiply by the multiplicative inverse of the divisor modulo
7793	// (1 << BitWidth).
7794	APInt MulFactor = Divisor.multiplicativeInverse();
7795
7796	SDValue Quotient = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Dividend,
7797	N2: DAG.getConstant(Val: MulFactor, DL: dl, VT));
7798
7799	// Split the quotient into low and high parts.
7800	SDValue QuotL, QuotH;
7801	std::tie(args&: QuotL, args&: QuotH) = DAG.SplitScalar(N: Quotient, DL: dl, LoVT: HiLoVT, HiVT: HiLoVT);
7802	Result.push_back(Elt: QuotL);
7803	Result.push_back(Elt: QuotH);
7804	}
7805
7806	if (Opcode != ISD::UDIV) {
7807	// If we shifted the input, shift the remainder left and add the bits we
7808	// shifted off the input.
7809	if (TrailingZeros) {
7810	APInt Mask = APInt::getLowBitsSet(numBits: HBitWidth, loBitsSet: TrailingZeros);
7811	RemL = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HiLoVT, N1: RemL,
7812	N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl));
7813	RemL = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: RemL, N2: PartialRem);
7814	}
7815	Result.push_back(Elt: RemL);
7816	Result.push_back(Elt: DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT));
7817	}
7818
7819	return true;
7820	}
7821
7822	// Check that (every element of) Z is undef or not an exact multiple of BW.
7823	static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
7824	return ISD::matchUnaryPredicate(
7825	Op: Z,
7826	Match: [=](ConstantSDNode C) { return* !C \|\| C->getAPIntValue().urem(RHS: BW) != `0`; },
7827	AllowUndefs: true);
7828	}
7829
7830	static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
7831	EVT VT = Node->getValueType(ResNo: `0`);
7832	SDValue ShX, ShY;
7833	SDValue ShAmt, InvShAmt;
7834	SDValue X = Node->getOperand(Num: `0`);
7835	SDValue Y = Node->getOperand(Num: `1`);
7836	SDValue Z = Node->getOperand(Num: `2`);
7837	SDValue Mask = Node->getOperand(Num: `3`);
7838	SDValue VL = Node->getOperand(Num: `4`);
7839
7840	unsigned BW = VT.getScalarSizeInBits();
7841	bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
7842	SDLoc DL(SDValue (Node, `0`));
7843
7844	EVT ShVT = Z.getValueType();
7845	if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7846	// fshl: X << C \| Y >> (BW - C)
7847	// fshr: X << (BW - C) \| Y >> C
7848	// where C = Z % BW is not zero
7849	SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
7850	ShAmt = DAG.getNode(Opcode: ISD::VP_UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC, N3: Mask, N4: VL);
7851	InvShAmt = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT: ShVT, N1: BitWidthC, N2: ShAmt, N3: Mask, N4: VL);
7852	ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: IsFSHL ? ShAmt : InvShAmt, N3: Mask,
7853	N4: VL);
7854	ShY = DAG.getNode(Opcode: ISD::VP_LSHR, DL, VT, N1: Y, N2: IsFSHL ? InvShAmt : ShAmt, N3: Mask,
7855	N4: VL);
7856	} else {
7857	// fshl: X << (Z % BW) \| Y >> 1 >> (BW - 1 - (Z % BW))
7858	// fshr: X << 1 << (BW - 1 - (Z % BW)) \| Y >> (Z % BW)
7859	SDValue BitMask = DAG.getConstant(Val: BW - `1`, DL, VT: ShVT);
7860	if (isPowerOf2_32(Value: BW)) {
7861	// Z % BW -> Z & (BW - 1)
7862	ShAmt = DAG.getNode(Opcode: ISD::VP_AND, DL, VT: ShVT, N1: Z, N2: BitMask, N3: Mask, N4: VL);
7863	// (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7864	SDValue NotZ = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT: ShVT, N1: Z,
7865	N2: DAG.getAllOnesConstant(DL, VT: ShVT), N3: Mask, N4: VL);
7866	InvShAmt = DAG.getNode(Opcode: ISD::VP_AND, DL, VT: ShVT, N1: NotZ, N2: BitMask, N3: Mask, N4: VL);
7867	} else {
7868	SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
7869	ShAmt = DAG.getNode(Opcode: ISD::VP_UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC, N3: Mask, N4: VL);
7870	InvShAmt = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT: ShVT, N1: BitMask, N2: ShAmt, N3: Mask, N4: VL);
7871	}
7872
7873	SDValue One = DAG.getConstant(Val: `1`, DL, VT: ShVT);
7874	if (IsFSHL) {
7875	ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: ShAmt, N3: Mask, N4: VL);
7876	SDValue ShY1 = DAG.getNode(Opcode: ISD::VP_LSHR, DL, VT, N1: Y, N2: One, N3: Mask, N4: VL);
7877	ShY = DAG.getNode(Opcode: ISD::VP_LSHR, DL, VT, N1: ShY1, N2: InvShAmt, N3: Mask, N4: VL);
7878	} else {
7879	SDValue ShX1 = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: One, N3: Mask, N4: VL);
7880	ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: ShX1, N2: InvShAmt, N3: Mask, N4: VL);
7881	ShY = DAG.getNode(Opcode: ISD::VP_LSHR, DL, VT, N1: Y, N2: ShAmt, N3: Mask, N4: VL);
7882	}
7883	}
7884	return DAG.getNode(Opcode: ISD::VP_OR, DL, VT, N1: ShX, N2: ShY, N3: Mask, N4: VL);
7885	}
7886
7887	SDValue TargetLowering::expandFunnelShift(SDNode *Node,
7888	SelectionDAG &DAG) const {
7889	if (Node->isVPOpcode())
7890	return expandVPFunnelShift(Node, DAG);
7891
7892	EVT VT = Node->getValueType(ResNo: `0`);
7893
7894	if (VT.isVector() && (!isOperationLegalOrCustom(Op: ISD::SHL, VT) \|\|
7895	!isOperationLegalOrCustom(Op: ISD::SRL, VT) \|\|
7896	!isOperationLegalOrCustom(Op: ISD::SUB, VT) \|\|
7897	!isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)))
7898	return SDValue ();
7899
7900	SDValue X = Node->getOperand(Num: `0`);
7901	SDValue Y = Node->getOperand(Num: `1`);
7902	SDValue Z = Node->getOperand(Num: `2`);
7903
7904	unsigned BW = VT.getScalarSizeInBits();
7905	bool IsFSHL = Node->getOpcode() == ISD::FSHL;
7906	SDLoc DL(SDValue (Node, `0`));
7907
7908	EVT ShVT = Z.getValueType();
7909
7910	// If a funnel shift in the other direction is more supported, use it.
7911	unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
7912	if (!isOperationLegalOrCustom(Op: Node->getOpcode(), VT) &&
7913	isOperationLegalOrCustom(Op: RevOpcode, VT) && isPowerOf2_32(Value: BW)) {
7914	if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7915	// fshl X, Y, Z -> fshr X, Y, -Z
7916	// fshr X, Y, Z -> fshl X, Y, -Z
7917	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: ShVT);
7918	Z = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Zero, N2: Z);
7919	} else {
7920	// fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7921	// fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7922	SDValue One = DAG.getConstant(Val: `1`, DL, VT: ShVT);
7923	if (IsFSHL) {
7924	Y = DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: One);
7925	X = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: X, N2: One);
7926	} else {
7927	X = DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: One);
7928	Y = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Y, N2: One);
7929	}
7930	Z = DAG.getNOT(DL, Val: Z, VT: ShVT);
7931	}
7932	return DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: Z);
7933	}
7934
7935	SDValue ShX, ShY;
7936	SDValue ShAmt, InvShAmt;
7937	if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7938	// fshl: X << C \| Y >> (BW - C)
7939	// fshr: X << (BW - C) \| Y >> C
7940	// where C = Z % BW is not zero
7941	SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
7942	ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC);
7943	InvShAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: BitWidthC, N2: ShAmt);
7944	ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: IsFSHL ? ShAmt : InvShAmt);
7945	ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: IsFSHL ? InvShAmt : ShAmt);
7946	} else {
7947	// fshl: X << (Z % BW) \| Y >> 1 >> (BW - 1 - (Z % BW))
7948	// fshr: X << 1 << (BW - 1 - (Z % BW)) \| Y >> (Z % BW)
7949	SDValue Mask = DAG.getConstant(Val: BW - `1`, DL, VT: ShVT);
7950	if (isPowerOf2_32(Value: BW)) {
7951	// Z % BW -> Z & (BW - 1)
7952	ShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: Z, N2: Mask);
7953	// (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7954	InvShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: DAG.getNOT(DL, Val: Z, VT: ShVT), N2: Mask);
7955	} else {
7956	SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
7957	ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC);
7958	InvShAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Mask, N2: ShAmt);
7959	}
7960
7961	SDValue One = DAG.getConstant(Val: `1`, DL, VT: ShVT);
7962	if (IsFSHL) {
7963	ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: ShAmt);
7964	SDValue ShY1 = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: One);
7965	ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShY1, N2: InvShAmt);
7966	} else {
7967	SDValue ShX1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: One);
7968	ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShX1, N2: InvShAmt);
7969	ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: ShAmt);
7970	}
7971	}
7972	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShX, N2: ShY);
7973	}
7974
7975	// TODO: Merge with expandFunnelShift.
7976	SDValue TargetLowering::expandROT(SDNode Node, bool* AllowVectorOps,
7977	SelectionDAG &DAG) const {
7978	EVT VT = Node->getValueType(ResNo: `0`);
7979	unsigned EltSizeInBits = VT.getScalarSizeInBits();
7980	bool IsLeft = Node->getOpcode() == ISD::ROTL;
7981	SDValue Op0 = Node->getOperand(Num: `0`);
7982	SDValue Op1 = Node->getOperand(Num: `1`);
7983	SDLoc DL(SDValue (Node, `0`));
7984
7985	EVT ShVT = Op1.getValueType();
7986	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: ShVT);
7987
7988	// If a rotate in the other direction is more supported, use it.
7989	unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
7990	if (!isOperationLegalOrCustom(Op: Node->getOpcode(), VT) &&
7991	isOperationLegalOrCustom(Op: RevRot, VT) && isPowerOf2_32(Value: EltSizeInBits)) {
7992	SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Zero, N2: Op1);
7993	return DAG.getNode(Opcode: RevRot, DL, VT, N1: Op0, N2: Sub);
7994	}
7995
7996	if (!AllowVectorOps && VT.isVector() &&
7997	(!isOperationLegalOrCustom(Op: ISD::SHL, VT) \|\|
7998	!isOperationLegalOrCustom(Op: ISD::SRL, VT) \|\|
7999	!isOperationLegalOrCustom(Op: ISD::SUB, VT) \|\|
8000	!isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT) \|\|
8001	!isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT)))
8002	return SDValue ();
8003
8004	unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8005	unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8006	SDValue BitWidthMinusOneC = DAG.getConstant(Val: EltSizeInBits - `1`, DL, VT: ShVT);
8007	SDValue ShVal;
8008	SDValue HsVal;
8009	if (isPowerOf2_32(Value: EltSizeInBits)) {
8010	// (rotl x, c) -> x << (c & (w - 1)) \| x >> (-c & (w - 1))
8011	// (rotr x, c) -> x >> (c & (w - 1)) \| x << (-c & (w - 1))
8012	SDValue NegOp1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Zero, N2: Op1);
8013	SDValue ShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: Op1, N2: BitWidthMinusOneC);
8014	ShVal = DAG.getNode(Opcode: ShOpc, DL, VT, N1: Op0, N2: ShAmt);
8015	SDValue HsAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: NegOp1, N2: BitWidthMinusOneC);
8016	HsVal = DAG.getNode(Opcode: HsOpc, DL, VT, N1: Op0, N2: HsAmt);
8017	} else {
8018	// (rotl x, c) -> x << (c % w) \| x >> 1 >> (w - 1 - (c % w))
8019	// (rotr x, c) -> x >> (c % w) \| x << 1 << (w - 1 - (c % w))
8020	SDValue BitWidthC = DAG.getConstant(Val: EltSizeInBits, DL, VT: ShVT);
8021	SDValue ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Op1, N2: BitWidthC);
8022	ShVal = DAG.getNode(Opcode: ShOpc, DL, VT, N1: Op0, N2: ShAmt);
8023	SDValue HsAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: BitWidthMinusOneC, N2: ShAmt);
8024	SDValue One = DAG.getConstant(Val: `1`, DL, VT: ShVT);
8025	HsVal =
8026	DAG.getNode(Opcode: HsOpc, DL, VT, N1: DAG.getNode(Opcode: HsOpc, DL, VT, N1: Op0, N2: One), N2: HsAmt);
8027	}
8028	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShVal, N2: HsVal);
8029	}
8030
8031	void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
8032	SelectionDAG &DAG) const {
8033	assert(Node->getNumOperands() == `3` && "Not a double-shift!");
8034	EVT VT = Node->getValueType(ResNo: `0`);
8035	unsigned VTBits = VT.getScalarSizeInBits();
8036	assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8037
8038	bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8039	bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8040	SDValue ShOpLo = Node->getOperand(Num: `0`);
8041	SDValue ShOpHi = Node->getOperand(Num: `1`);
8042	SDValue ShAmt = Node->getOperand(Num: `2`);
8043	EVT ShAmtVT = ShAmt.getValueType();
8044	EVT ShAmtCCVT =
8045	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: ShAmtVT);
8046	SDLoc dl(Node);
8047
8048	// ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8049	// ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8050	// away during isel.
8051	SDValue SafeShAmt = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ShAmtVT, N1: ShAmt,
8052	N2: DAG.getConstant(Val: VTBits - `1`, DL: dl, VT: ShAmtVT));
8053	SDValue Tmp1 = IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: ShOpHi,
8054	N2: DAG.getConstant(Val: VTBits - `1`, DL: dl, VT: ShAmtVT))
8055	: DAG.getConstant(Val: `0`, DL: dl, VT);
8056
8057	SDValue Tmp2, Tmp3;
8058	if (IsSHL) {
8059	Tmp2 = DAG.getNode(Opcode: ISD::FSHL, DL: dl, VT, N1: ShOpHi, N2: ShOpLo, N3: ShAmt);
8060	Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: ShOpLo, N2: SafeShAmt);
8061	} else {
8062	Tmp2 = DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT, N1: ShOpHi, N2: ShOpLo, N3: ShAmt);
8063	Tmp3 = DAG.getNode(Opcode: IsSRA ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: ShOpHi, N2: SafeShAmt);
8064	}
8065
8066	// If the shift amount is larger or equal than the width of a part we don't
8067	// use the result from the FSHL/FSHR. Insert a test and select the appropriate
8068	// values for large shift amounts.
8069	SDValue AndNode = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ShAmtVT, N1: ShAmt,
8070	N2: DAG.getConstant(Val: VTBits, DL: dl, VT: ShAmtVT));
8071	SDValue Cond = DAG.getSetCC(DL: dl, VT: ShAmtCCVT, LHS: AndNode,
8072	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: ShAmtVT), Cond: ISD::SETNE);
8073
8074	if (IsSHL) {
8075	Hi = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp3, N3: Tmp2);
8076	Lo = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp1, N3: Tmp3);
8077	} else {
8078	Lo = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp3, N3: Tmp2);
8079	Hi = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp1, N3: Tmp3);
8080	}
8081	}
8082
8083	bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
8084	SelectionDAG &DAG) const {
8085	unsigned OpNo = Node->isStrictFPOpcode() ? `1` : `0`;
8086	SDValue Src = Node->getOperand(Num: OpNo);
8087	EVT SrcVT = Src.getValueType();
8088	EVT DstVT = Node->getValueType(ResNo: `0`);
8089	SDLoc dl(SDValue (Node, `0`));
8090
8091	// FIXME: Only f32 to i64 conversions are supported.
8092	if (SrcVT != MVT::f32 \|\| DstVT != MVT::i64)
8093	return false;
8094
8095	if (Node->isStrictFPOpcode())
8096	// When a NaN is converted to an integer a trap is allowed. We can't
8097	// use this expansion here because it would eliminate that trap. Other
8098	// traps are also allowed and cannot be eliminated. See
8099	// IEEE 754-2008 sec 5.8.
8100	return false;
8101
8102	// Expand f32 -> i64 conversion
8103	// This algorithm comes from compiler-rt's implementation of fixsfdi:
8104	// https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8105	unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8106	EVT IntVT = SrcVT.changeTypeToInteger();
8107	EVT IntShVT = getShiftAmountTy(LHSTy: IntVT, DL: DAG.getDataLayout());
8108
8109	SDValue ExponentMask = DAG.getConstant(Val: `0x7F800000`, DL: dl, VT: IntVT);
8110	SDValue ExponentLoBit = DAG.getConstant(Val: `23`, DL: dl, VT: IntVT);
8111	SDValue Bias = DAG.getConstant(Val: `127`, DL: dl, VT: IntVT);
8112	SDValue SignMask = DAG.getConstant(Val: APInt::getSignMask(BitWidth: SrcEltBits), DL: dl, VT: IntVT);
8113	SDValue SignLowBit = DAG.getConstant(Val: SrcEltBits - `1`, DL: dl, VT: IntVT);
8114	SDValue MantissaMask = DAG.getConstant(Val: `0x007FFFFF`, DL: dl, VT: IntVT);
8115
8116	SDValue Bits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: IntVT, Operand: Src);
8117
8118	SDValue ExponentBits = DAG.getNode(
8119	Opcode: ISD::SRL, DL: dl, VT: IntVT, N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: ExponentMask),
8120	N2: DAG.getZExtOrTrunc(Op: ExponentLoBit, DL: dl, VT: IntShVT));
8121	SDValue Exponent = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: ExponentBits, N2: Bias);
8122
8123	SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: IntVT,
8124	N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: SignMask),
8125	N2: DAG.getZExtOrTrunc(Op: SignLowBit, DL: dl, VT: IntShVT));
8126	Sign = DAG.getSExtOrTrunc(Op: Sign, DL: dl, VT: DstVT);
8127
8128	SDValue R = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT,
8129	N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: MantissaMask),
8130	N2: DAG.getConstant(Val: `0x00800000`, DL: dl, VT: IntVT));
8131
8132	R = DAG.getZExtOrTrunc(Op: R, DL: dl, VT: DstVT);
8133
8134	R = DAG.getSelectCC(
8135	DL: dl, LHS: Exponent, RHS: ExponentLoBit,
8136	True: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: DstVT, N1: R,
8137	N2: DAG.getZExtOrTrunc(
8138	Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: Exponent, N2: ExponentLoBit),
8139	DL: dl, VT: IntShVT)),
8140	False: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: DstVT, N1: R,
8141	N2: DAG.getZExtOrTrunc(
8142	Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: ExponentLoBit, N2: Exponent),
8143	DL: dl, VT: IntShVT)),
8144	Cond: ISD::SETGT);
8145
8146	SDValue Ret = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: DstVT,
8147	N1: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: R, N2: Sign), N2: Sign);
8148
8149	Result = DAG.getSelectCC(DL: dl, LHS: Exponent, RHS: DAG.getConstant(Val: `0`, DL: dl, VT: IntVT),
8150	True: DAG.getConstant(Val: `0`, DL: dl, VT: DstVT), False: Ret, Cond: ISD::SETLT);
8151	return true;
8152	}
8153
8154	bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
8155	SDValue &Chain,
8156	SelectionDAG &DAG) const {
8157	SDLoc dl(SDValue (Node, `0`));
8158	unsigned OpNo = Node->isStrictFPOpcode() ? `1` : `0`;
8159	SDValue Src = Node->getOperand(Num: OpNo);
8160
8161	EVT SrcVT = Src.getValueType();
8162	EVT DstVT = Node->getValueType(ResNo: `0`);
8163	EVT SetCCVT =
8164	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
8165	EVT DstSetCCVT =
8166	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: DstVT);
8167
8168	// Only expand vector types if we have the appropriate vector bit operations.
8169	unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8170	ISD::FP_TO_SINT;
8171	if (DstVT.isVector() && (!isOperationLegalOrCustom(Op: SIntOpcode, VT: DstVT) \|\|
8172	!isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT: SrcVT)))
8173	return false;
8174
8175	// If the maximum float value is smaller then the signed integer range,
8176	// the destination signmask can't be represented by the float, so we can
8177	// just use FP_TO_SINT directly.
8178	const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(VT: SrcVT);
8179	APFloat APF(APFSem, APInt::getZero(numBits: SrcVT.getScalarSizeInBits()));
8180	APInt SignMask = APInt::getSignMask(BitWidth: DstVT.getScalarSizeInBits());
8181	if (APFloat::opOverflow &
8182	APF.convertFromAPInt(Input: SignMask, IsSigned: false, RM: APFloat::rmNearestTiesToEven)) {
8183	if (Node->isStrictFPOpcode()) {
8184	Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8185	{ Node->getOperand(`0`), Src });
8186	Chain = Result.getValue(R: `1`);
8187	} else
8188	Result = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Src);
8189	return true;
8190	}
8191
8192	// Don't expand it if there isn't cheap fsub instruction.
8193	if (!isOperationLegalOrCustom(
8194	Op: Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, VT: SrcVT))
8195	return false;
8196
8197	SDValue Cst = DAG.getConstantFP(Val: APF, DL: dl, VT: SrcVT);
8198	SDValue Sel;
8199
8200	if (Node->isStrictFPOpcode()) {
8201	Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT,
8202	Chain: Node->getOperand(Num: `0`), /IsSignaling/ true);
8203	Chain = Sel.getValue(R: `1`);
8204	} else {
8205	Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT);
8206	}
8207
8208	bool Strict = Node->isStrictFPOpcode() \|\|
8209	shouldUseStrictFP_TO_INT(FpVT: SrcVT, IntVT: DstVT, /IsSigned/ false);
8210
8211	if (Strict) {
8212	// Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8213	// signmask then offset (the result of which should be fully representable).
8214	// Sel = Src < 0x8000000000000000
8215	// FltOfs = select Sel, 0, 0x8000000000000000
8216	// IntOfs = select Sel, 0, 0x8000000000000000
8217	// Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8218
8219	// TODO: Should any fast-math-flags be set for the FSUB?
8220	SDValue FltOfs = DAG.getSelect(DL: dl, VT: SrcVT, Cond: Sel,
8221	LHS: DAG.getConstantFP(Val: `0.0`, DL: dl, VT: SrcVT), RHS: Cst);
8222	Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8223	SDValue IntOfs = DAG.getSelect(DL: dl, VT: DstVT, Cond: Sel,
8224	LHS: DAG.getConstant(Val: `0`, DL: dl, VT: DstVT),
8225	RHS: DAG.getConstant(Val: SignMask, DL: dl, VT: DstVT));
8226	SDValue SInt;
8227	if (Node->isStrictFPOpcode()) {
8228	SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8229	{ Chain, Src, FltOfs });
8230	SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8231	{ Val.getValue(`1`), Val });
8232	Chain = SInt.getValue(R: `1`);
8233	} else {
8234	SDValue Val = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: SrcVT, N1: Src, N2: FltOfs);
8235	SInt = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Val);
8236	}
8237	Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: SInt, N2: IntOfs);
8238	} else {
8239	// Expand based on maximum range of FP_TO_SINT:
8240	// True = fp_to_sint(Src)
8241	// False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8242	// Result = select (Src < 0x8000000000000000), True, False
8243
8244	SDValue True = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Src);
8245	// TODO: Should any fast-math-flags be set for the FSUB?
8246	SDValue False = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT,
8247	Operand: DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: SrcVT, N1: Src, N2: Cst));
8248	False = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: False,
8249	N2: DAG.getConstant(Val: SignMask, DL: dl, VT: DstVT));
8250	Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8251	Result = DAG.getSelect(DL: dl, VT: DstVT, Cond: Sel, LHS: True, RHS: False);
8252	}
8253	return true;
8254	}
8255
8256	bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
8257	SDValue &Chain,
8258	SelectionDAG &DAG) const {
8259	// This transform is not correct for converting 0 when rounding mode is set
8260	// to round toward negative infinity which will produce -0.0. So disable under
8261	// strictfp.
8262	if (Node->isStrictFPOpcode())
8263	return false;
8264
8265	SDValue Src = Node->getOperand(Num: `0`);
8266	EVT SrcVT = Src.getValueType();
8267	EVT DstVT = Node->getValueType(ResNo: `0`);
8268
8269	if (SrcVT.getScalarType() != MVT::i64 \|\| DstVT.getScalarType() != MVT::f64)
8270	return false;
8271
8272	// Only expand vector types if we have the appropriate vector bit operations.
8273	if (SrcVT.isVector() && (!isOperationLegalOrCustom(Op: ISD::SRL, VT: SrcVT) \|\|
8274	!isOperationLegalOrCustom(Op: ISD::FADD, VT: DstVT) \|\|
8275	!isOperationLegalOrCustom(Op: ISD::FSUB, VT: DstVT) \|\|
8276	!isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT: SrcVT) \|\|
8277	!isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT: SrcVT)))
8278	return false;
8279
8280	SDLoc dl(SDValue (Node, `0`));
8281	EVT ShiftVT = getShiftAmountTy(LHSTy: SrcVT, DL: DAG.getDataLayout());
8282
8283	// Implementation of unsigned i64 to f64 following the algorithm in
8284	// __floatundidf in compiler_rt. This implementation performs rounding
8285	// correctly in all rounding modes with the exception of converting 0
8286	// when rounding toward negative infinity. In that case the fsub will produce
8287	// -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
8288	SDValue TwoP52 = DAG.getConstant(UINT64_C(`0x4330000000000000`), DL: dl, VT: SrcVT);
8289	SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8290	Val: llvm::bit_cast<double>(UINT64_C(`0x4530000000100000`)), DL: dl, VT: DstVT);
8291	SDValue TwoP84 = DAG.getConstant(UINT64_C(`0x4530000000000000`), DL: dl, VT: SrcVT);
8292	SDValue LoMask = DAG.getConstant(UINT64_C(`0x00000000FFFFFFFF`), DL: dl, VT: SrcVT);
8293	SDValue HiShift = DAG.getConstant(Val: `32`, DL: dl, VT: ShiftVT);
8294
8295	SDValue Lo = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: SrcVT, N1: Src, N2: LoMask);
8296	SDValue Hi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: SrcVT, N1: Src, N2: HiShift);
8297	SDValue LoOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: SrcVT, N1: Lo, N2: TwoP52);
8298	SDValue HiOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: SrcVT, N1: Hi, N2: TwoP84);
8299	SDValue LoFlt = DAG.getBitcast(VT: DstVT, V: LoOr);
8300	SDValue HiFlt = DAG.getBitcast(VT: DstVT, V: HiOr);
8301	SDValue HiSub =
8302	DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: DstVT, N1: HiFlt, N2: TwoP84PlusTwoP52);
8303	Result = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: DstVT, N1: LoFlt, N2: HiSub);
8304	return true;
8305	}
8306
8307	SDValue
8308	TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
8309	SelectionDAG &DAG) const {
8310	unsigned Opcode = Node->getOpcode();
8311	assert((Opcode == ISD::FMINNUM \|\| Opcode == ISD::FMAXNUM \|\|
8312	Opcode == ISD::STRICT_FMINNUM \|\| Opcode == ISD::STRICT_FMAXNUM) &&
8313	"Wrong opcode");
8314
8315	if (Node->getFlags().hasNoNaNs()) {
8316	ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8317	SDValue Op1 = Node->getOperand(Num: `0`);
8318	SDValue Op2 = Node->getOperand(Num: `1`);
8319	SDValue SelCC = DAG.getSelectCC(DL: SDLoc (Node), LHS: Op1, RHS: Op2, True: Op1, False: Op2, Cond: Pred);
8320	// Copy FMF flags, but always set the no-signed-zeros flag
8321	// as this is implied by the FMINNUM/FMAXNUM semantics.
8322	SDNodeFlags Flags = Node->getFlags();
8323	Flags.setNoSignedZeros(true);
8324	SelCC ->setFlags(Flags);
8325	return SelCC;
8326	}
8327
8328	return SDValue ();
8329	}
8330
8331	SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
8332	SelectionDAG &DAG) const {
8333	SDLoc dl(Node);
8334	unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
8335	ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8336	EVT VT = Node->getValueType(ResNo: `0`);
8337
8338	if (VT.isScalableVector())
8339	report_fatal_error(
8340	reason: "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8341
8342	if (isOperationLegalOrCustom(Op: NewOp, VT)) {
8343	SDValue Quiet0 = Node->getOperand(Num: `0`);
8344	SDValue Quiet1 = Node->getOperand(Num: `1`);
8345
8346	if (!Node->getFlags().hasNoNaNs()) {
8347	// Insert canonicalizes if it's possible we need to quiet to get correct
8348	// sNaN behavior.
8349	if (!DAG.isKnownNeverSNaN(Op: Quiet0)) {
8350	Quiet0 = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL: dl, VT, Operand: Quiet0,
8351	Flags: Node->getFlags());
8352	}
8353	if (!DAG.isKnownNeverSNaN(Op: Quiet1)) {
8354	Quiet1 = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL: dl, VT, Operand: Quiet1,
8355	Flags: Node->getFlags());
8356	}
8357	}
8358
8359	return DAG.getNode(Opcode: NewOp, DL: dl, VT, N1: Quiet0, N2: Quiet1, Flags: Node->getFlags());
8360	}
8361
8362	// If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8363	// instead if there are no NaNs and there can't be an incompatible zero
8364	// compare: at least one operand isn't +/-0, or there are no signed-zeros.
8365	if ((Node->getFlags().hasNoNaNs() \|\|
8366	(DAG.isKnownNeverNaN(Op: Node->getOperand(Num: `0`)) &&
8367	DAG.isKnownNeverNaN(Op: Node->getOperand(Num: `1`)))) &&
8368	(Node->getFlags().hasNoSignedZeros() \|\|
8369	DAG.isKnownNeverZeroFloat(Op: Node->getOperand(Num: `0`)) \|\|
8370	DAG.isKnownNeverZeroFloat(Op: Node->getOperand(Num: `1`)))) {
8371	unsigned IEEE2018Op =
8372	Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8373	if (isOperationLegalOrCustom(Op: IEEE2018Op, VT))
8374	return DAG.getNode(Opcode: IEEE2018Op, DL: dl, VT, N1: Node->getOperand(Num: `0`),
8375	N2: Node->getOperand(Num: `1`), Flags: Node->getFlags());
8376	}
8377
8378	if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8379	return SelCC;
8380
8381	return SDValue ();
8382	}
8383
8384	/// Returns a true value if if this FPClassTest can be performed with an ordered
8385	/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8386	/// std::nullopt if it cannot be performed as a compare with 0.
8387	static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8388	const fltSemantics &Semantics,
8389	const MachineFunction &MF) {
8390	FPClassTest OrderedMask = Test & ~fcNan;
8391	FPClassTest NanTest = Test & fcNan;
8392	bool IsOrdered = NanTest == fcNone;
8393	bool IsUnordered = NanTest == fcNan;
8394
8395	// Skip cases that are testing for only a qnan or snan.
8396	if (!IsOrdered && !IsUnordered)
8397	return std::nullopt;
8398
8399	if (OrderedMask == fcZero &&
8400	MF.getDenormalMode(FPType: Semantics).Input == DenormalMode::IEEE)
8401	return IsOrdered;
8402	if (OrderedMask == (fcZero \| fcSubnormal) &&
8403	MF.getDenormalMode(FPType: Semantics).inputsAreZero())
8404	return IsOrdered;
8405	return std::nullopt;
8406	}
8407
8408	SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
8409	FPClassTest Test, SDNodeFlags Flags,
8410	const SDLoc &DL,
8411	SelectionDAG &DAG) const {
8412	EVT OperandVT = Op.getValueType();
8413	assert(OperandVT.isFloatingPoint());
8414
8415	// Degenerated cases.
8416	if (Test == fcNone)
8417	return DAG.getBoolConstant(V: false, DL, VT: ResultVT, OpVT: OperandVT);
8418	if ((Test & fcAllFlags) == fcAllFlags)
8419	return DAG.getBoolConstant(V: true, DL, VT: ResultVT, OpVT: OperandVT);
8420
8421	// PPC double double is a pair of doubles, of which the higher part determines
8422	// the value class.
8423	if (OperandVT == MVT::ppcf128) {
8424	Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8425	DAG.getConstant(`1`, DL, MVT::i32));
8426	OperandVT = MVT::f64;
8427	}
8428
8429	// Some checks may be represented as inversion of simpler check, for example
8430	// "inf\|normal\|subnormal\|zero" => !"nan".
8431	bool IsInverted = false;
8432	if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
8433	IsInverted = true;
8434	Test = InvertedCheck;
8435	}
8436
8437	// Floating-point type properties.
8438	EVT ScalarFloatVT = OperandVT.getScalarType();
8439	const Type FloatTy = ScalarFloatVT.getTypeForEVT(Context&: DAG.getContext());
8440	const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8441	bool IsF80 = (ScalarFloatVT == MVT::f80);
8442
8443	// Some checks can be implemented using float comparisons, if floating point
8444	// exceptions are ignored.
8445	if (Flags.hasNoFPExcept() &&
8446	isOperationLegalOrCustom(Op: ISD::SETCC, VT: OperandVT.getScalarType())) {
8447	ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
8448	ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
8449
8450	if (std::optional<bool> IsCmp0 =
8451	isFCmpEqualZero(Test, Semantics, MF: DAG.getMachineFunction());
8452	IsCmp0 && (isCondCodeLegalOrCustom(
8453	CC: *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8454	VT: OperandVT.getScalarType().getSimpleVT()))) {
8455
8456	// If denormals could be implicitly treated as 0, this is not equivalent
8457	// to a compare with 0 since it will also be true for denormals.
8458	return DAG.getSetCC(DL, VT: ResultVT, LHS: Op,
8459	RHS: DAG.getConstantFP(Val: `0.0`, DL, VT: OperandVT),
8460	Cond: *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8461	}
8462
8463	if (Test == fcNan &&
8464	isCondCodeLegalOrCustom(CC: IsInverted ? ISD::SETO : ISD::SETUO,
8465	VT: OperandVT.getScalarType().getSimpleVT())) {
8466	return DAG.getSetCC(DL, VT: ResultVT, LHS: Op, RHS: Op,
8467	Cond: IsInverted ? ISD::SETO : ISD::SETUO);
8468	}
8469
8470	if (Test == fcInf &&
8471	isCondCodeLegalOrCustom(CC: IsInverted ? ISD::SETUNE : ISD::SETOEQ,
8472	VT: OperandVT.getScalarType().getSimpleVT()) &&
8473	isOperationLegalOrCustom(Op: ISD::FABS, VT: OperandVT.getScalarType())) {
8474	// isinf(x) --> fabs(x) == inf
8475	SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
8476	SDValue Inf =
8477	DAG.getConstantFP(Val: APFloat::getInf(Sem: Semantics), DL, VT: OperandVT);
8478	return DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: Inf,
8479	Cond: IsInverted ? ISD::SETUNE : ISD::SETOEQ);
8480	}
8481	}
8482
8483	// In the general case use integer operations.
8484	unsigned BitSize = OperandVT.getScalarSizeInBits();
8485	EVT IntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: BitSize);
8486	if (OperandVT.isVector())
8487	IntVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: IntVT,
8488	EC: OperandVT.getVectorElementCount());
8489	SDValue OpAsInt = DAG.getBitcast(VT: IntVT, V: Op);
8490
8491	// Various masks.
8492	APInt SignBit = APInt::getSignMask(BitWidth: BitSize);
8493	APInt ValueMask = APInt::getSignedMaxValue(numBits: BitSize); // All bits but sign.
8494	APInt Inf = APFloat::getInf(Sem: Semantics).bitcastToAPInt(); // Exp and int bit.
8495	const unsigned ExplicitIntBitInF80 = `63`;
8496	APInt ExpMask = Inf;
8497	if (IsF80)
8498	ExpMask.clearBit(BitPosition: ExplicitIntBitInF80);
8499	APInt AllOneMantissa = APFloat::getLargest(Sem: Semantics).bitcastToAPInt() & ~Inf;
8500	APInt QNaNBitMask =
8501	APInt::getOneBitSet(numBits: BitSize, BitNo: AllOneMantissa.getActiveBits() - `1`);
8502	APInt InvertionMask = APInt::getAllOnes(numBits: ResultVT.getScalarSizeInBits());
8503
8504	SDValue ValueMaskV = DAG.getConstant(Val: ValueMask, DL, VT: IntVT);
8505	SDValue SignBitV = DAG.getConstant(Val: SignBit, DL, VT: IntVT);
8506	SDValue ExpMaskV = DAG.getConstant(Val: ExpMask, DL, VT: IntVT);
8507	SDValue ZeroV = DAG.getConstant(Val: `0`, DL, VT: IntVT);
8508	SDValue InfV = DAG.getConstant(Val: Inf, DL, VT: IntVT);
8509	SDValue ResultInvertionMask = DAG.getConstant(Val: InvertionMask, DL, VT: ResultVT);
8510
8511	SDValue Res;
8512	const auto appendResult = [&](SDValue PartialRes) {
8513	if (PartialRes) {
8514	if (Res)
8515	Res = DAG.getNode(Opcode: ISD::OR, DL, VT: ResultVT, N1: Res, N2: PartialRes);
8516	else
8517	Res = PartialRes;
8518	}
8519	};
8520
8521	SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
8522	const auto getIntBitIsSet = [&]() -> SDValue {
8523	if (!IntBitIsSetV) {
8524	APInt IntBitMask(BitSize, `0`);
8525	IntBitMask.setBit(ExplicitIntBitInF80);
8526	SDValue IntBitMaskV = DAG.getConstant(Val: IntBitMask, DL, VT: IntVT);
8527	SDValue IntBitV = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: IntBitMaskV);
8528	IntBitIsSetV = DAG.getSetCC(DL, VT: ResultVT, LHS: IntBitV, RHS: ZeroV, Cond: ISD::SETNE);
8529	}
8530	return IntBitIsSetV;
8531	};
8532
8533	// Split the value into sign bit and absolute value.
8534	SDValue AbsV = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: ValueMaskV);
8535	SDValue SignV = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt,
8536	RHS: DAG.getConstant(Val: `0.0`, DL, VT: IntVT), Cond: ISD::SETLT);
8537
8538	// Tests that involve more than one class should be processed first.
8539	SDValue PartialRes;
8540
8541	if (IsF80)
8542	; // Detect finite numbers of f80 by checking individual classes because
8543	// they have different settings of the explicit integer bit.
8544	else if ((Test & fcFinite) == fcFinite) {
8545	// finite(V) ==> abs(V) < exp_mask
8546	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ExpMaskV, Cond: ISD::SETLT);
8547	Test &= ~fcFinite;
8548	} else if ((Test & fcFinite) == fcPosFinite) {
8549	// finite(V) && V > 0 ==> V < exp_mask
8550	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: ExpMaskV, Cond: ISD::SETULT);
8551	Test &= ~fcPosFinite;
8552	} else if ((Test & fcFinite) == fcNegFinite) {
8553	// finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
8554	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ExpMaskV, Cond: ISD::SETLT);
8555	PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
8556	Test &= ~fcNegFinite;
8557	}
8558	appendResult (PartialRes);
8559
8560	if (FPClassTest PartialCheck = Test & (fcZero \| fcSubnormal)) {
8561	// fcZero \| fcSubnormal => test all exponent bits are 0
8562	// TODO: Handle sign bit specific cases
8563	if (PartialCheck == (fcZero \| fcSubnormal)) {
8564	SDValue ExpBits = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: ExpMaskV);
8565	SDValue ExpIsZero =
8566	DAG.getSetCC(DL, VT: ResultVT, LHS: ExpBits, RHS: ZeroV, Cond: ISD::SETEQ);
8567	appendResult (ExpIsZero);
8568	Test &= ~PartialCheck & fcAllFlags;
8569	}
8570	}
8571
8572	// Check for individual classes.
8573
8574	if (unsigned PartialCheck = Test & fcZero) {
8575	if (PartialCheck == fcPosZero)
8576	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: ZeroV, Cond: ISD::SETEQ);
8577	else if (PartialCheck == fcZero)
8578	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ZeroV, Cond: ISD::SETEQ);
8579	else // ISD::fcNegZero
8580	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: SignBitV, Cond: ISD::SETEQ);
8581	appendResult (PartialRes);
8582	}
8583
8584	if (unsigned PartialCheck = Test & fcSubnormal) {
8585	// issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
8586	// issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
8587	SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
8588	SDValue MantissaV = DAG.getConstant(Val: AllOneMantissa, DL, VT: IntVT);
8589	SDValue VMinusOneV =
8590	DAG.getNode(Opcode: ISD::SUB, DL, VT: IntVT, N1: V, N2: DAG.getConstant(Val: `1`, DL, VT: IntVT));
8591	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: VMinusOneV, RHS: MantissaV, Cond: ISD::SETULT);
8592	if (PartialCheck == fcNegSubnormal)
8593	PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
8594	appendResult (PartialRes);
8595	}
8596
8597	if (unsigned PartialCheck = Test & fcInf) {
8598	if (PartialCheck == fcPosInf)
8599	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: InfV, Cond: ISD::SETEQ);
8600	else if (PartialCheck == fcInf)
8601	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETEQ);
8602	else { // ISD::fcNegInf
8603	APInt NegInf = APFloat::getInf(Sem: Semantics, Negative: true).bitcastToAPInt();
8604	SDValue NegInfV = DAG.getConstant(Val: NegInf, DL, VT: IntVT);
8605	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: NegInfV, Cond: ISD::SETEQ);
8606	}
8607	appendResult (PartialRes);
8608	}
8609
8610	if (unsigned PartialCheck = Test & fcNan) {
8611	APInt InfWithQnanBit = Inf \| QNaNBitMask;
8612	SDValue InfWithQnanBitV = DAG.getConstant(Val: InfWithQnanBit, DL, VT: IntVT);
8613	if (PartialCheck == fcNan) {
8614	// isnan(V) ==> abs(V) > int(inf)
8615	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETGT);
8616	if (IsF80) {
8617	// Recognize unsupported values as NaNs for compatibility with glibc.
8618	// In them (exp(V)==0) == int_bit.
8619	SDValue ExpBits = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: AbsV, N2: ExpMaskV);
8620	SDValue ExpIsZero =
8621	DAG.getSetCC(DL, VT: ResultVT, LHS: ExpBits, RHS: ZeroV, Cond: ISD::SETEQ);
8622	SDValue IsPseudo =
8623	DAG.getSetCC(DL, VT: ResultVT, LHS: getIntBitIsSet (), RHS: ExpIsZero, Cond: ISD::SETEQ);
8624	PartialRes = DAG.getNode(Opcode: ISD::OR, DL, VT: ResultVT, N1: PartialRes, N2: IsPseudo);
8625	}
8626	} else if (PartialCheck == fcQNan) {
8627	// isquiet(V) ==> abs(V) >= (unsigned(Inf) \| quiet_bit)
8628	PartialRes =
8629	DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfWithQnanBitV, Cond: ISD::SETGE);
8630	} else { // ISD::fcSNan
8631	// issignaling(V) ==> abs(V) > unsigned(Inf) &&
8632	// abs(V) < (unsigned(Inf) \| quiet_bit)
8633	SDValue IsNan = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETGT);
8634	SDValue IsNotQnan =
8635	DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfWithQnanBitV, Cond: ISD::SETLT);
8636	PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: IsNan, N2: IsNotQnan);
8637	}
8638	appendResult (PartialRes);
8639	}
8640
8641	if (unsigned PartialCheck = Test & fcNormal) {
8642	// isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
8643	APInt ExpLSB = ExpMask & ~(ExpMask.shl(shiftAmt: `1`));
8644	SDValue ExpLSBV = DAG.getConstant(Val: ExpLSB, DL, VT: IntVT);
8645	SDValue ExpMinus1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: IntVT, N1: AbsV, N2: ExpLSBV);
8646	APInt ExpLimit = ExpMask - ExpLSB;
8647	SDValue ExpLimitV = DAG.getConstant(Val: ExpLimit, DL, VT: IntVT);
8648	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: ExpMinus1, RHS: ExpLimitV, Cond: ISD::SETULT);
8649	if (PartialCheck == fcNegNormal)
8650	PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
8651	else if (PartialCheck == fcPosNormal) {
8652	SDValue PosSignV =
8653	DAG.getNode(Opcode: ISD::XOR, DL, VT: ResultVT, N1: SignV, N2: ResultInvertionMask);
8654	PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: PosSignV);
8655	}
8656	if (IsF80)
8657	PartialRes =
8658	DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: getIntBitIsSet ());
8659	appendResult (PartialRes);
8660	}
8661
8662	if (!Res)
8663	return DAG.getConstant(Val: IsInverted, DL, VT: ResultVT);
8664	if (IsInverted)
8665	Res = DAG.getNode(Opcode: ISD::XOR, DL, VT: ResultVT, N1: Res, N2: ResultInvertionMask);
8666	return Res;
8667	}
8668
8669	// Only expand vector types if we have the appropriate vector bit operations.
8670	static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
8671	assert(VT.isVector() && "Expected vector type");
8672	unsigned Len = VT.getScalarSizeInBits();
8673	return TLI.isOperationLegalOrCustom(Op: ISD::ADD, VT) &&
8674	TLI.isOperationLegalOrCustom(Op: ISD::SUB, VT) &&
8675	TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
8676	(Len == `8` \|\| TLI.isOperationLegalOrCustom(Op: ISD::MUL, VT)) &&
8677	TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT);
8678	}
8679
8680	SDValue TargetLowering::expandCTPOP(SDNode Node, SelectionDAG &DAG) const* {
8681	SDLoc dl(Node);
8682	EVT VT = Node->getValueType(ResNo: `0`);
8683	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
8684	SDValue Op = Node->getOperand(Num: `0`);
8685	unsigned Len = VT.getScalarSizeInBits();
8686	assert(VT.isInteger() && "CTPOP not implemented for this type.");
8687
8688	// TODO: Add support for irregular type lengths.
8689	if (!(Len <= `128` && Len % `8` == `0`))
8690	return SDValue ();
8691
8692	// Only expand vector types if we have the appropriate vector bit operations.
8693	if (VT.isVector() && !canExpandVectorCTPOP(TLI: *this, VT))
8694	return SDValue ();
8695
8696	// This is the "best" algorithm from
8697	// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8698	SDValue Mask55 =
8699	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x55`)), DL: dl, VT);
8700	SDValue Mask33 =
8701	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x33`)), DL: dl, VT);
8702	SDValue Mask0F =
8703	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x0F`)), DL: dl, VT);
8704
8705	// v = v - ((v >> 1) & 0x55555555...)
8706	Op = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Op,
8707	N2: DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
8708	N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
8709	N2: DAG.getConstant(Val: `1`, DL: dl, VT: ShVT)),
8710	N2: Mask55));
8711	// v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8712	Op = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op, N2: Mask33),
8713	N2: DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
8714	N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
8715	N2: DAG.getConstant(Val: `2`, DL: dl, VT: ShVT)),
8716	N2: Mask33));
8717	// v = (v + (v >> 4)) & 0x0F0F0F0F...
8718	Op = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
8719	N1: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Op,
8720	N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
8721	N2: DAG.getConstant(Val: `4`, DL: dl, VT: ShVT))),
8722	N2: Mask0F);
8723
8724	if (Len <= `8`)
8725	return Op;
8726
8727	// Avoid the multiply if we only have 2 bytes to add.
8728	// TODO: Only doing this for scalars because vectors weren't as obviously
8729	// improved.
8730	if (Len == `16` && !VT.isVector()) {
8731	// v = (v + (v >> 8)) & 0x00FF;
8732	return DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
8733	N1: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Op,
8734	N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
8735	N2: DAG.getConstant(Val: `8`, DL: dl, VT: ShVT))),
8736	N2: DAG.getConstant(Val: `0xFF`, DL: dl, VT));
8737	}
8738
8739	// v = (v 0x01010101...) >> (Len - 8)*
8740	SDValue V;
8741	if (isOperationLegalOrCustomOrPromote(
8742	Op: ISD::MUL, VT: getTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
8743	SDValue Mask01 =
8744	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x01`)), DL: dl, VT);
8745	V = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Op, N2: Mask01);
8746	} else {
8747	V = Op;
8748	for (unsigned Shift = `8`; Shift < Len; Shift *= `2`) {
8749	SDValue ShiftC = DAG.getShiftAmountConstant(Val: Shift, VT, DL: dl);
8750	V = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: V,
8751	N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: V, N2: ShiftC));
8752	}
8753	}
8754	return DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: V, N2: DAG.getConstant(Val: Len - `8`, DL: dl, VT: ShVT));
8755	}
8756
8757	SDValue TargetLowering::expandVPCTPOP(SDNode Node, SelectionDAG &DAG) const* {
8758	SDLoc dl(Node);
8759	EVT VT = Node->getValueType(ResNo: `0`);
8760	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
8761	SDValue Op = Node->getOperand(Num: `0`);
8762	SDValue Mask = Node->getOperand(Num: `1`);
8763	SDValue VL = Node->getOperand(Num: `2`);
8764	unsigned Len = VT.getScalarSizeInBits();
8765	assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
8766
8767	// TODO: Add support for irregular type lengths.
8768	if (!(Len <= `128` && Len % `8` == `0`))
8769	return SDValue ();
8770
8771	// This is same algorithm of expandCTPOP from
8772	// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8773	SDValue Mask55 =
8774	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x55`)), DL: dl, VT);
8775	SDValue Mask33 =
8776	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x33`)), DL: dl, VT);
8777	SDValue Mask0F =
8778	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x0F`)), DL: dl, VT);
8779
8780	SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
8781
8782	// v = v - ((v >> 1) & 0x55555555...)
8783	Tmp1 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT,
8784	N1: DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op,
8785	N2: DAG.getConstant(Val: `1`, DL: dl, VT: ShVT), N3: Mask, N4: VL),
8786	N2: Mask55, N3: Mask, N4: VL);
8787	Op = DAG.getNode(Opcode: ISD::VP_SUB, DL: dl, VT, N1: Op, N2: Tmp1, N3: Mask, N4: VL);
8788
8789	// v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8790	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op, N2: Mask33, N3: Mask, N4: VL);
8791	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT,
8792	N1: DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op,
8793	N2: DAG.getConstant(Val: `2`, DL: dl, VT: ShVT), N3: Mask, N4: VL),
8794	N2: Mask33, N3: Mask, N4: VL);
8795	Op = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: VL);
8796
8797	// v = (v + (v >> 4)) & 0x0F0F0F0F...
8798	Tmp4 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `4`, DL: dl, VT: ShVT),
8799	N3: Mask, N4: VL),
8800	Tmp5 = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: Op, N2: Tmp4, N3: Mask, N4: VL);
8801	Op = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp5, N2: Mask0F, N3: Mask, N4: VL);
8802
8803	if (Len <= `8`)
8804	return Op;
8805
8806	// v = (v 0x01010101...) >> (Len - 8)*
8807	SDValue V;
8808	if (isOperationLegalOrCustomOrPromote(
8809	Op: ISD::VP_MUL, VT: getTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
8810	SDValue Mask01 =
8811	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x01`)), DL: dl, VT);
8812	V = DAG.getNode(Opcode: ISD::VP_MUL, DL: dl, VT, N1: Op, N2: Mask01, N3: Mask, N4: VL);
8813	} else {
8814	V = Op;
8815	for (unsigned Shift = `8`; Shift < Len; Shift *= `2`) {
8816	SDValue ShiftC = DAG.getShiftAmountConstant(Val: Shift, VT, DL: dl);
8817	V = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: V,
8818	N2: DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: V, N2: ShiftC, N3: Mask, N4: VL),
8819	N3: Mask, N4: VL);
8820	}
8821	}
8822	return DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: V,
8823	N2: DAG.getConstant(Val: Len - `8`, DL: dl, VT: ShVT), N3: Mask, N4: VL);
8824	}
8825
8826	SDValue TargetLowering::expandCTLZ(SDNode Node, SelectionDAG &DAG) const* {
8827	SDLoc dl(Node);
8828	EVT VT = Node->getValueType(ResNo: `0`);
8829	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
8830	SDValue Op = Node->getOperand(Num: `0`);
8831	unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8832
8833	// If the non-ZERO_UNDEF version is supported we can use that instead.
8834	if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
8835	isOperationLegalOrCustom(Op: ISD::CTLZ, VT))
8836	return DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Op);
8837
8838	// If the ZERO_UNDEF version is supported use that and handle the zero case.
8839	if (isOperationLegalOrCustom(Op: ISD::CTLZ_ZERO_UNDEF, VT)) {
8840	EVT SetCCVT =
8841	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8842	SDValue CTLZ = DAG.getNode(Opcode: ISD::CTLZ_ZERO_UNDEF, DL: dl, VT, Operand: Op);
8843	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
8844	SDValue SrcIsZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
8845	return DAG.getSelect(DL: dl, VT, Cond: SrcIsZero,
8846	LHS: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT), RHS: CTLZ);
8847	}
8848
8849	// Only expand vector types if we have the appropriate vector bit operations.
8850	// This includes the operations needed to expand CTPOP if it isn't supported.
8851	if (VT.isVector() && (!isPowerOf2_32(Value: NumBitsPerElt) \|\|
8852	(!isOperationLegalOrCustom(Op: ISD::CTPOP, VT) &&
8853	!canExpandVectorCTPOP(TLI: *this, VT)) \|\|
8854	!isOperationLegalOrCustom(Op: ISD::SRL, VT) \|\|
8855	!isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)))
8856	return SDValue ();
8857
8858	// for now, we do this:
8859	// x = x \| (x >> 1);
8860	// x = x \| (x >> 2);
8861	// ...
8862	// x = x \| (x >>16);
8863	// x = x \| (x >>32); // for 64-bit input
8864	// return popcount(~x);
8865	//
8866	// Ref: "Hacker's Delight" by Henry Warren
8867	for (unsigned i = `0`; (`1U` << i) < NumBitsPerElt; ++i) {
8868	SDValue Tmp = DAG.getConstant(Val: `1ULL` << i, DL: dl, VT: ShVT);
8869	Op = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Op,
8870	N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: Tmp));
8871	}
8872	Op = DAG.getNOT(DL: dl, Val: Op, VT);
8873	return DAG.getNode(Opcode: ISD::CTPOP, DL: dl, VT, Operand: Op);
8874	}
8875
8876	SDValue TargetLowering::expandVPCTLZ(SDNode Node, SelectionDAG &DAG) const* {
8877	SDLoc dl(Node);
8878	EVT VT = Node->getValueType(ResNo: `0`);
8879	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
8880	SDValue Op = Node->getOperand(Num: `0`);
8881	SDValue Mask = Node->getOperand(Num: `1`);
8882	SDValue VL = Node->getOperand(Num: `2`);
8883	unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8884
8885	// do this:
8886	// x = x \| (x >> 1);
8887	// x = x \| (x >> 2);
8888	// ...
8889	// x = x \| (x >>16);
8890	// x = x \| (x >>32); // for 64-bit input
8891	// return popcount(~x);
8892	for (unsigned i = `0`; (`1U` << i) < NumBitsPerElt; ++i) {
8893	SDValue Tmp = DAG.getConstant(Val: `1ULL` << i, DL: dl, VT: ShVT);
8894	Op = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Op,
8895	N2: DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: Tmp, N3: Mask, N4: VL), N3: Mask,
8896	N4: VL);
8897	}
8898	Op = DAG.getNode(Opcode: ISD::VP_XOR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: -`1`, DL: dl, VT), N3: Mask,
8899	N4: VL);
8900	return DAG.getNode(Opcode: ISD::VP_CTPOP, DL: dl, VT, N1: Op, N2: Mask, N3: VL);
8901	}
8902
8903	SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
8904	const SDLoc &DL, EVT VT, SDValue Op,
8905	unsigned BitWidth) const {
8906	if (BitWidth != `32` && BitWidth != `64`)
8907	return SDValue ();
8908	APInt DeBruijn = BitWidth == `32` ? APInt (`32`, `0x077CB531U`)
8909	: APInt (`64`, `0x0218A392CD3D5DBFULL`);
8910	const DataLayout &TD = DAG.getDataLayout();
8911	MachinePointerInfo PtrInfo =
8912	MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction());
8913	unsigned ShiftAmt = BitWidth - Log2_32(Value: BitWidth);
8914	SDValue Neg = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: `0`, DL, VT), N2: Op);
8915	SDValue Lookup = DAG.getNode(
8916	Opcode: ISD::SRL, DL, VT,
8917	N1: DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op, N2: Neg),
8918	N2: DAG.getConstant(Val: DeBruijn, DL, VT)),
8919	N2: DAG.getConstant(Val: ShiftAmt, DL, VT));
8920	Lookup = DAG.getSExtOrTrunc(Op: Lookup, DL, VT: getPointerTy(DL: TD));
8921
8922	SmallVector<uint8_t> Table(BitWidth, `0`);
8923	for (unsigned i = `0`; i < BitWidth; i++) {
8924	APInt Shl = DeBruijn.shl(shiftAmt: i);
8925	APInt Lshr = Shl.lshr(shiftAmt: ShiftAmt);
8926	Table [Lshr.getZExtValue()] = i;
8927	}
8928
8929	// Create a ConstantArray in Constant Pool
8930	auto CA = ConstantDataArray::get(Context&: DAG.getContext(), Elts&: Table);
8931	SDValue CPIdx = DAG.getConstantPool(C: CA, VT: getPointerTy(DL: TD),
8932	Align: TD.getPrefTypeAlign(Ty: CA->getType()));
8933	SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
8934	DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
8935	PtrInfo, MVT::i8);
8936	if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
8937	return ExtLoad;
8938
8939	EVT SetCCVT =
8940	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8941	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
8942	SDValue SrcIsZero = DAG.getSetCC(DL, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
8943	return DAG.getSelect(DL, VT, Cond: SrcIsZero,
8944	LHS: DAG.getConstant(Val: BitWidth, DL, VT), RHS: ExtLoad);
8945	}
8946
8947	SDValue TargetLowering::expandCTTZ(SDNode Node, SelectionDAG &DAG) const* {
8948	SDLoc dl(Node);
8949	EVT VT = Node->getValueType(ResNo: `0`);
8950	SDValue Op = Node->getOperand(Num: `0`);
8951	unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8952
8953	// If the non-ZERO_UNDEF version is supported we can use that instead.
8954	if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
8955	isOperationLegalOrCustom(Op: ISD::CTTZ, VT))
8956	return DAG.getNode(Opcode: ISD::CTTZ, DL: dl, VT, Operand: Op);
8957
8958	// If the ZERO_UNDEF version is supported use that and handle the zero case.
8959	if (isOperationLegalOrCustom(Op: ISD::CTTZ_ZERO_UNDEF, VT)) {
8960	EVT SetCCVT =
8961	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8962	SDValue CTTZ = DAG.getNode(Opcode: ISD::CTTZ_ZERO_UNDEF, DL: dl, VT, Operand: Op);
8963	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
8964	SDValue SrcIsZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
8965	return DAG.getSelect(DL: dl, VT, Cond: SrcIsZero,
8966	LHS: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT), RHS: CTTZ);
8967	}
8968
8969	// Only expand vector types if we have the appropriate vector bit operations.
8970	// This includes the operations needed to expand CTPOP if it isn't supported.
8971	if (VT.isVector() && (!isPowerOf2_32(Value: NumBitsPerElt) \|\|
8972	(!isOperationLegalOrCustom(Op: ISD::CTPOP, VT) &&
8973	!isOperationLegalOrCustom(Op: ISD::CTLZ, VT) &&
8974	!canExpandVectorCTPOP(TLI: *this, VT)) \|\|
8975	!isOperationLegalOrCustom(Op: ISD::SUB, VT) \|\|
8976	!isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) \|\|
8977	!isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT)))
8978	return SDValue ();
8979
8980	// Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
8981	if (!VT.isVector() && isOperationExpand(Op: ISD::CTPOP, VT) &&
8982	!isOperationLegal(Op: ISD::CTLZ, VT))
8983	if (SDValue V = CTTZTableLookup(Node, DAG, DL: dl, VT, Op, BitWidth: NumBitsPerElt))
8984	return V;
8985
8986	// for now, we use: { return popcount(~x & (x - 1)); }
8987	// unless the target has ctlz but not ctpop, in which case we use:
8988	// { return 32 - nlz(~x & (x-1)); }
8989	// Ref: "Hacker's Delight" by Henry Warren
8990	SDValue Tmp = DAG.getNode(
8991	Opcode: ISD::AND, DL: dl, VT, N1: DAG.getNOT(DL: dl, Val: Op, VT),
8992	N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `1`, DL: dl, VT)));
8993
8994	// If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
8995	if (isOperationLegal(Op: ISD::CTLZ, VT) && !isOperationLegal(Op: ISD::CTPOP, VT)) {
8996	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT),
8997	N2: DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Tmp));
8998	}
8999
9000	return DAG.getNode(Opcode: ISD::CTPOP, DL: dl, VT, Operand: Tmp);
9001	}
9002
9003	SDValue TargetLowering::expandVPCTTZ(SDNode Node, SelectionDAG &DAG) const* {
9004	SDValue Op = Node->getOperand(Num: `0`);
9005	SDValue Mask = Node->getOperand(Num: `1`);
9006	SDValue VL = Node->getOperand(Num: `2`);
9007	SDLoc dl(Node);
9008	EVT VT = Node->getValueType(ResNo: `0`);
9009
9010	// Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9011	SDValue Not = DAG.getNode(Opcode: ISD::VP_XOR, DL: dl, VT, N1: Op,
9012	N2: DAG.getConstant(Val: -`1`, DL: dl, VT), N3: Mask, N4: VL);
9013	SDValue MinusOne = DAG.getNode(Opcode: ISD::VP_SUB, DL: dl, VT, N1: Op,
9014	N2: DAG.getConstant(Val: `1`, DL: dl, VT), N3: Mask, N4: VL);
9015	SDValue Tmp = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Not, N2: MinusOne, N3: Mask, N4: VL);
9016	return DAG.getNode(Opcode: ISD::VP_CTPOP, DL: dl, VT, N1: Tmp, N2: Mask, N3: VL);
9017	}
9018
9019	SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
9020	bool IsNegative) const {
9021	SDLoc dl(N);
9022	EVT VT = N->getValueType(ResNo: `0`);
9023	SDValue Op = N->getOperand(Num: `0`);
9024
9025	// abs(x) -> smax(x,sub(0,x))
9026	if (!IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
9027	isOperationLegal(Op: ISD::SMAX, VT)) {
9028	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
9029	return DAG.getNode(Opcode: ISD::SMAX, DL: dl, VT, N1: Op,
9030	N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
9031	}
9032
9033	// abs(x) -> umin(x,sub(0,x))
9034	if (!IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
9035	isOperationLegal(Op: ISD::UMIN, VT)) {
9036	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
9037	Op = DAG.getFreeze(V: Op);
9038	return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT, N1: Op,
9039	N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
9040	}
9041
9042	// 0 - abs(x) -> smin(x, sub(0,x))
9043	if (IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
9044	isOperationLegal(Op: ISD::SMIN, VT)) {
9045	Op = DAG.getFreeze(V: Op);
9046	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
9047	return DAG.getNode(Opcode: ISD::SMIN, DL: dl, VT, N1: Op,
9048	N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
9049	}
9050
9051	// Only expand vector types if we have the appropriate vector operations.
9052	if (VT.isVector() &&
9053	(!isOperationLegalOrCustom(Op: ISD::SRA, VT) \|\|
9054	(!IsNegative && !isOperationLegalOrCustom(Op: ISD::ADD, VT)) \|\|
9055	(IsNegative && !isOperationLegalOrCustom(Op: ISD::SUB, VT)) \|\|
9056	!isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT)))
9057	return SDValue ();
9058
9059	Op = DAG.getFreeze(V: Op);
9060	SDValue Shift = DAG.getNode(
9061	Opcode: ISD::SRA, DL: dl, VT, N1: Op,
9062	N2: DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits() - `1`, VT, DL: dl));
9063	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op, N2: Shift);
9064
9065	// abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9066	if (!IsNegative)
9067	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Xor, N2: Shift);
9068
9069	// 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9070	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Shift, N2: Xor);
9071	}
9072
9073	SDValue TargetLowering::expandABD(SDNode N, SelectionDAG &DAG) const* {
9074	SDLoc dl(N);
9075	EVT VT = N->getValueType(ResNo: `0`);
9076	SDValue LHS = DAG.getFreeze(V: N->getOperand(Num: `0`));
9077	SDValue RHS = DAG.getFreeze(V: N->getOperand(Num: `1`));
9078	bool IsSigned = N->getOpcode() == ISD::ABDS;
9079
9080	// abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9081	// abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9082	unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9083	unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9084	if (isOperationLegal(Op: MaxOpc, VT) && isOperationLegal(Op: MinOpc, VT)) {
9085	SDValue Max = DAG.getNode(Opcode: MaxOpc, DL: dl, VT, N1: LHS, N2: RHS);
9086	SDValue Min = DAG.getNode(Opcode: MinOpc, DL: dl, VT, N1: LHS, N2: RHS);
9087	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: Min);
9088	}
9089
9090	// abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9091	if (!IsSigned && isOperationLegal(Op: ISD::USUBSAT, VT))
9092	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT,
9093	N1: DAG.getNode(Opcode: ISD::USUBSAT, DL: dl, VT, N1: LHS, N2: RHS),
9094	N2: DAG.getNode(Opcode: ISD::USUBSAT, DL: dl, VT, N1: RHS, N2: LHS));
9095
9096	// abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9097	// abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9098	EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9099	ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
9100	SDValue Cmp = DAG.getSetCC(DL: dl, VT: CCVT, LHS, RHS, Cond: CC);
9101	return DAG.getSelect(DL: dl, VT, Cond: Cmp, LHS: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS),
9102	RHS: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: RHS, N2: LHS));
9103	}
9104
9105	SDValue TargetLowering::expandBSWAP(SDNode N, SelectionDAG &DAG) const* {
9106	SDLoc dl(N);
9107	EVT VT = N->getValueType(ResNo: `0`);
9108	SDValue Op = N->getOperand(Num: `0`);
9109
9110	if (!VT.isSimple())
9111	return SDValue ();
9112
9113	EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9114	SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9115	switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9116	default:
9117	return SDValue ();
9118	case MVT::i16:
9119	// Use a rotate by 8. This can be further expanded if necessary.
9120	return DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
9121	case MVT::i32:
9122	Tmp4 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT));
9123	Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9124	N2: DAG.getConstant(Val: `0xFF00`, DL: dl, VT));
9125	Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
9126	Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
9127	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: `0xFF00`, DL: dl, VT));
9128	Tmp1 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT));
9129	Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp3);
9130	Tmp2 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp1);
9131	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp2);
9132	case MVT::i64:
9133	Tmp8 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `56`, DL: dl, VT: SHVT));
9134	Tmp7 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9135	N2: DAG.getConstant(Val: `255ULL`<<`8`, DL: dl, VT));
9136	Tmp7 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp7, N2: DAG.getConstant(Val: `40`, DL: dl, VT: SHVT));
9137	Tmp6 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9138	N2: DAG.getConstant(Val: `255ULL`<<`16`, DL: dl, VT));
9139	Tmp6 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp6, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT));
9140	Tmp5 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9141	N2: DAG.getConstant(Val: `255ULL`<<`24`, DL: dl, VT));
9142	Tmp5 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp5, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
9143	Tmp4 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
9144	Tmp4 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp4,
9145	N2: DAG.getConstant(Val: `255ULL`<<`24`, DL: dl, VT));
9146	Tmp3 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT));
9147	Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp3,
9148	N2: DAG.getConstant(Val: `255ULL`<<`16`, DL: dl, VT));
9149	Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `40`, DL: dl, VT: SHVT));
9150	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2,
9151	N2: DAG.getConstant(Val: `255ULL`<<`8`, DL: dl, VT));
9152	Tmp1 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `56`, DL: dl, VT: SHVT));
9153	Tmp8 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp7);
9154	Tmp6 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp6, N2: Tmp5);
9155	Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp3);
9156	Tmp2 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp1);
9157	Tmp8 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp6);
9158	Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp2);
9159	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp4);
9160	}
9161	}
9162
9163	SDValue TargetLowering::expandVPBSWAP(SDNode N, SelectionDAG &DAG) const* {
9164	SDLoc dl(N);
9165	EVT VT = N->getValueType(ResNo: `0`);
9166	SDValue Op = N->getOperand(Num: `0`);
9167	SDValue Mask = N->getOperand(Num: `1`);
9168	SDValue EVL = N->getOperand(Num: `2`);
9169
9170	if (!VT.isSimple())
9171	return SDValue ();
9172
9173	EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9174	SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9175	switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9176	default:
9177	return SDValue ();
9178	case MVT::i16:
9179	Tmp1 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
9180	N3: Mask, N4: EVL);
9181	Tmp2 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
9182	N3: Mask, N4: EVL);
9183	return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp1, N2: Tmp2, N3: Mask, N4: EVL);
9184	case MVT::i32:
9185	Tmp4 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT),
9186	N3: Mask, N4: EVL);
9187	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `0xFF00`, DL: dl, VT),
9188	N3: Mask, N4: EVL);
9189	Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
9190	N3: Mask, N4: EVL);
9191	Tmp2 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
9192	N3: Mask, N4: EVL);
9193	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9194	N2: DAG.getConstant(Val: `0xFF00`, DL: dl, VT), N3: Mask, N4: EVL);
9195	Tmp1 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT),
9196	N3: Mask, N4: EVL);
9197	Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp3, N3: Mask, N4: EVL);
9198	Tmp2 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp1, N3: Mask, N4: EVL);
9199	return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp2, N3: Mask, N4: EVL);
9200	case MVT::i64:
9201	Tmp8 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `56`, DL: dl, VT: SHVT),
9202	N3: Mask, N4: EVL);
9203	Tmp7 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
9204	N2: DAG.getConstant(Val: `255ULL` << `8`, DL: dl, VT), N3: Mask, N4: EVL);
9205	Tmp7 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp7, N2: DAG.getConstant(Val: `40`, DL: dl, VT: SHVT),
9206	N3: Mask, N4: EVL);
9207	Tmp6 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
9208	N2: DAG.getConstant(Val: `255ULL` << `16`, DL: dl, VT), N3: Mask, N4: EVL);
9209	Tmp6 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp6, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT),
9210	N3: Mask, N4: EVL);
9211	Tmp5 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
9212	N2: DAG.getConstant(Val: `255ULL` << `24`, DL: dl, VT), N3: Mask, N4: EVL);
9213	Tmp5 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp5, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
9214	N3: Mask, N4: EVL);
9215	Tmp4 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
9216	N3: Mask, N4: EVL);
9217	Tmp4 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp4,
9218	N2: DAG.getConstant(Val: `255ULL` << `24`, DL: dl, VT), N3: Mask, N4: EVL);
9219	Tmp3 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT),
9220	N3: Mask, N4: EVL);
9221	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp3,
9222	N2: DAG.getConstant(Val: `255ULL` << `16`, DL: dl, VT), N3: Mask, N4: EVL);
9223	Tmp2 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `40`, DL: dl, VT: SHVT),
9224	N3: Mask, N4: EVL);
9225	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9226	N2: DAG.getConstant(Val: `255ULL` << `8`, DL: dl, VT), N3: Mask, N4: EVL);
9227	Tmp1 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `56`, DL: dl, VT: SHVT),
9228	N3: Mask, N4: EVL);
9229	Tmp8 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp7, N3: Mask, N4: EVL);
9230	Tmp6 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp6, N2: Tmp5, N3: Mask, N4: EVL);
9231	Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp3, N3: Mask, N4: EVL);
9232	Tmp2 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp1, N3: Mask, N4: EVL);
9233	Tmp8 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp6, N3: Mask, N4: EVL);
9234	Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp2, N3: Mask, N4: EVL);
9235	return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp4, N3: Mask, N4: EVL);
9236	}
9237	}
9238
9239	SDValue TargetLowering::expandBITREVERSE(SDNode N, SelectionDAG &DAG) const* {
9240	SDLoc dl(N);
9241	EVT VT = N->getValueType(ResNo: `0`);
9242	SDValue Op = N->getOperand(Num: `0`);
9243	EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9244	unsigned Sz = VT.getScalarSizeInBits();
9245
9246	SDValue Tmp, Tmp2, Tmp3;
9247
9248	// If we can, perform BSWAP first and then the mask+swap the i4, then i2
9249	// and finally the i1 pairs.
9250	// TODO: We can easily support i4/i2 legal types if any target ever does.
9251	if (Sz >= `8` && isPowerOf2_32(Value: Sz)) {
9252	// Create the masks - repeating the pattern every byte.
9253	APInt Mask4 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x0F`));
9254	APInt Mask2 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x33`));
9255	APInt Mask1 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x55`));
9256
9257	// BSWAP if the type is wider than a single byte.
9258	Tmp = (Sz > `8` ? DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT, Operand: Op) : Op);
9259
9260	// swap i4: ((V >> 4) & 0x0F) \| ((V & 0x0F) << 4)
9261	Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `4`, DL: dl, VT: SHVT));
9262	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask4, DL: dl, VT));
9263	Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask4, DL: dl, VT));
9264	Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `4`, DL: dl, VT: SHVT));
9265	Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9266
9267	// swap i2: ((V >> 2) & 0x33) \| ((V & 0x33) << 2)
9268	Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `2`, DL: dl, VT: SHVT));
9269	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask2, DL: dl, VT));
9270	Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask2, DL: dl, VT));
9271	Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `2`, DL: dl, VT: SHVT));
9272	Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9273
9274	// swap i1: ((V >> 1) & 0x55) \| ((V & 0x55) << 1)
9275	Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `1`, DL: dl, VT: SHVT));
9276	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask1, DL: dl, VT));
9277	Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask1, DL: dl, VT));
9278	Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `1`, DL: dl, VT: SHVT));
9279	Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9280	return Tmp;
9281	}
9282
9283	Tmp = DAG.getConstant(Val: `0`, DL: dl, VT);
9284	for (unsigned I = `0`, J = Sz-`1`; I < Sz; ++I, --J) {
9285	if (I < J)
9286	Tmp2 =
9287	DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: J - I, DL: dl, VT: SHVT));
9288	else
9289	Tmp2 =
9290	DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: I - J, DL: dl, VT: SHVT));
9291
9292	APInt Shift = APInt::getOneBitSet(numBits: Sz, BitNo: J);
9293	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Shift, DL: dl, VT));
9294	Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp, N2: Tmp2);
9295	}
9296
9297	return Tmp;
9298	}
9299
9300	SDValue TargetLowering::expandVPBITREVERSE(SDNode N, SelectionDAG &DAG) const* {
9301	assert(N->getOpcode() == ISD::VP_BITREVERSE);
9302
9303	SDLoc dl(N);
9304	EVT VT = N->getValueType(ResNo: `0`);
9305	SDValue Op = N->getOperand(Num: `0`);
9306	SDValue Mask = N->getOperand(Num: `1`);
9307	SDValue EVL = N->getOperand(Num: `2`);
9308	EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9309	unsigned Sz = VT.getScalarSizeInBits();
9310
9311	SDValue Tmp, Tmp2, Tmp3;
9312
9313	// If we can, perform BSWAP first and then the mask+swap the i4, then i2
9314	// and finally the i1 pairs.
9315	// TODO: We can easily support i4/i2 legal types if any target ever does.
9316	if (Sz >= `8` && isPowerOf2_32(Value: Sz)) {
9317	// Create the masks - repeating the pattern every byte.
9318	APInt Mask4 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x0F`));
9319	APInt Mask2 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x33`));
9320	APInt Mask1 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x55`));
9321
9322	// BSWAP if the type is wider than a single byte.
9323	Tmp = (Sz > `8` ? DAG.getNode(Opcode: ISD::VP_BSWAP, DL: dl, VT, N1: Op, N2: Mask, N3: EVL) : Op);
9324
9325	// swap i4: ((V >> 4) & 0x0F) \| ((V & 0x0F) << 4)
9326	Tmp2 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `4`, DL: dl, VT: SHVT),
9327	N3: Mask, N4: EVL);
9328	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9329	N2: DAG.getConstant(Val: Mask4, DL: dl, VT), N3: Mask, N4: EVL);
9330	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask4, DL: dl, VT),
9331	N3: Mask, N4: EVL);
9332	Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `4`, DL: dl, VT: SHVT),
9333	N3: Mask, N4: EVL);
9334	Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
9335
9336	// swap i2: ((V >> 2) & 0x33) \| ((V & 0x33) << 2)
9337	Tmp2 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `2`, DL: dl, VT: SHVT),
9338	N3: Mask, N4: EVL);
9339	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9340	N2: DAG.getConstant(Val: Mask2, DL: dl, VT), N3: Mask, N4: EVL);
9341	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask2, DL: dl, VT),
9342	N3: Mask, N4: EVL);
9343	Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `2`, DL: dl, VT: SHVT),
9344	N3: Mask, N4: EVL);
9345	Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
9346
9347	// swap i1: ((V >> 1) & 0x55) \| ((V & 0x55) << 1)
9348	Tmp2 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `1`, DL: dl, VT: SHVT),
9349	N3: Mask, N4: EVL);
9350	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9351	N2: DAG.getConstant(Val: Mask1, DL: dl, VT), N3: Mask, N4: EVL);
9352	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask1, DL: dl, VT),
9353	N3: Mask, N4: EVL);
9354	Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `1`, DL: dl, VT: SHVT),
9355	N3: Mask, N4: EVL);
9356	Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
9357	return Tmp;
9358	}
9359	return SDValue ();
9360	}
9361
9362	std::pair<SDValue, SDValue>
9363	TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
9364	SelectionDAG &DAG) const {
9365	SDLoc SL(LD);
9366	SDValue Chain = LD->getChain();
9367	SDValue BasePTR = LD->getBasePtr();
9368	EVT SrcVT = LD->getMemoryVT();
9369	EVT DstVT = LD->getValueType(ResNo: `0`);
9370	ISD::LoadExtType ExtType = LD->getExtensionType();
9371
9372	if (SrcVT.isScalableVector())
9373	report_fatal_error(reason: "Cannot scalarize scalable vector loads");
9374
9375	unsigned NumElem = SrcVT.getVectorNumElements();
9376
9377	EVT SrcEltVT = SrcVT.getScalarType();
9378	EVT DstEltVT = DstVT.getScalarType();
9379
9380	// A vector must always be stored in memory as-is, i.e. without any padding
9381	// between the elements, since various code depend on it, e.g. in the
9382	// handling of a bitcast of a vector type to int, which may be done with a
9383	// vector store followed by an integer load. A vector that does not have
9384	// elements that are byte-sized must therefore be stored as an integer
9385	// built out of the extracted vector elements.
9386	if (!SrcEltVT.isByteSized()) {
9387	unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
9388	EVT LoadVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumLoadBits);
9389
9390	unsigned NumSrcBits = SrcVT.getSizeInBits();
9391	EVT SrcIntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumSrcBits);
9392
9393	unsigned SrcEltBits = SrcEltVT.getSizeInBits();
9394	SDValue SrcEltBitMask = DAG.getConstant(
9395	Val: APInt::getLowBitsSet(numBits: NumLoadBits, loBitsSet: SrcEltBits), DL: SL, VT: LoadVT);
9396
9397	// Load the whole vector and avoid masking off the top bits as it makes
9398	// the codegen worse.
9399	SDValue Load =
9400	DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl: SL, VT: LoadVT, Chain, Ptr: BasePTR,
9401	PtrInfo: LD->getPointerInfo(), MemVT: SrcIntVT, Alignment: LD->getOriginalAlign(),
9402	MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
9403
9404	SmallVector<SDValue, `8`> Vals;
9405	for (unsigned Idx = `0`; Idx < NumElem; ++Idx) {
9406	unsigned ShiftIntoIdx =
9407	(DAG.getDataLayout().isBigEndian() ? (NumElem - `1`) - Idx : Idx);
9408	SDValue ShiftAmount =
9409	DAG.getShiftAmountConstant(Val: ShiftIntoIdx * SrcEltVT.getSizeInBits(),
9410	VT: LoadVT, DL: SL, /LegalTypes=/false);
9411	SDValue ShiftedElt = DAG.getNode(Opcode: ISD::SRL, DL: SL, VT: LoadVT, N1: Load, N2: ShiftAmount);
9412	SDValue Elt =
9413	DAG.getNode(Opcode: ISD::AND, DL: SL, VT: LoadVT, N1: ShiftedElt, N2: SrcEltBitMask);
9414	SDValue Scalar = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT: SrcEltVT, Operand: Elt);
9415
9416	if (ExtType != ISD::NON_EXTLOAD) {
9417	unsigned ExtendOp = ISD::getExtForLoadExtType(IsFP: false, ExtType);
9418	Scalar = DAG.getNode(Opcode: ExtendOp, DL: SL, VT: DstEltVT, Operand: Scalar);
9419	}
9420
9421	Vals.push_back(Elt: Scalar);
9422	}
9423
9424	SDValue Value = DAG.getBuildVector(VT: DstVT, DL: SL, Ops: Vals);
9425	return std::make_pair(x&: Value, y: Load.getValue(R: `1`));
9426	}
9427
9428	unsigned Stride = SrcEltVT.getSizeInBits() / `8`;
9429	assert(SrcEltVT.isByteSized());
9430
9431	SmallVector<SDValue, `8`> Vals;
9432	SmallVector<SDValue, `8`> LoadChains;
9433
9434	for (unsigned Idx = `0`; Idx < NumElem; ++Idx) {
9435	SDValue ScalarLoad =
9436	DAG.getExtLoad(ExtType, dl: SL, VT: DstEltVT, Chain, Ptr: BasePTR,
9437	PtrInfo: LD->getPointerInfo().getWithOffset(O: Idx * Stride),
9438	MemVT: SrcEltVT, Alignment: LD->getOriginalAlign(),
9439	MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
9440
9441	BasePTR = DAG.getObjectPtrOffset(SL, Ptr: BasePTR, Offset: TypeSize::getFixed(ExactSize: Stride));
9442
9443	Vals.push_back(Elt: ScalarLoad.getValue(R: `0`));
9444	LoadChains.push_back(Elt: ScalarLoad.getValue(R: `1`));
9445	}
9446
9447	SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
9448	SDValue Value = DAG.getBuildVector(VT: DstVT, DL: SL, Ops: Vals);
9449
9450	return std::make_pair(x&: Value, y&: NewChain);
9451	}
9452
9453	SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
9454	SelectionDAG &DAG) const {
9455	SDLoc SL(ST);
9456
9457	SDValue Chain = ST->getChain();
9458	SDValue BasePtr = ST->getBasePtr();
9459	SDValue Value = ST->getValue();
9460	EVT StVT = ST->getMemoryVT();
9461
9462	if (StVT.isScalableVector())
9463	report_fatal_error(reason: "Cannot scalarize scalable vector stores");
9464
9465	// The type of the data we want to save
9466	EVT RegVT = Value.getValueType();
9467	EVT RegSclVT = RegVT.getScalarType();
9468
9469	// The type of data as saved in memory.
9470	EVT MemSclVT = StVT.getScalarType();
9471
9472	unsigned NumElem = StVT.getVectorNumElements();
9473
9474	// A vector must always be stored in memory as-is, i.e. without any padding
9475	// between the elements, since various code depend on it, e.g. in the
9476	// handling of a bitcast of a vector type to int, which may be done with a
9477	// vector store followed by an integer load. A vector that does not have
9478	// elements that are byte-sized must therefore be stored as an integer
9479	// built out of the extracted vector elements.
9480	if (!MemSclVT.isByteSized()) {
9481	unsigned NumBits = StVT.getSizeInBits();
9482	EVT IntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumBits);
9483
9484	SDValue CurrVal = DAG.getConstant(Val: `0`, DL: SL, VT: IntVT);
9485
9486	for (unsigned Idx = `0`; Idx < NumElem; ++Idx) {
9487	SDValue Elt = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SL, VT: RegSclVT, N1: Value,
9488	N2: DAG.getVectorIdxConstant(Val: Idx, DL: SL));
9489	SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT: MemSclVT, Operand: Elt);
9490	SDValue ExtElt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SL, VT: IntVT, Operand: Trunc);
9491	unsigned ShiftIntoIdx =
9492	(DAG.getDataLayout().isBigEndian() ? (NumElem - `1`) - Idx : Idx);
9493	SDValue ShiftAmount =
9494	DAG.getConstant(Val: ShiftIntoIdx * MemSclVT.getSizeInBits(), DL: SL, VT: IntVT);
9495	SDValue ShiftedElt =
9496	DAG.getNode(Opcode: ISD::SHL, DL: SL, VT: IntVT, N1: ExtElt, N2: ShiftAmount);
9497	CurrVal = DAG.getNode(Opcode: ISD::OR, DL: SL, VT: IntVT, N1: CurrVal, N2: ShiftedElt);
9498	}
9499
9500	return DAG.getStore(Chain, dl: SL, Val: CurrVal, Ptr: BasePtr, PtrInfo: ST->getPointerInfo(),
9501	Alignment: ST->getOriginalAlign(), MMOFlags: ST->getMemOperand()->getFlags(),
9502	AAInfo: ST->getAAInfo());
9503	}
9504
9505	// Store Stride in bytes
9506	unsigned Stride = MemSclVT.getSizeInBits() / `8`;
9507	assert(Stride && "Zero stride!");
9508	// Extract each of the elements from the original vector and save them into
9509	// memory individually.
9510	SmallVector<SDValue, `8`> Stores;
9511	for (unsigned Idx = `0`; Idx < NumElem; ++Idx) {
9512	SDValue Elt = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SL, VT: RegSclVT, N1: Value,
9513	N2: DAG.getVectorIdxConstant(Val: Idx, DL: SL));
9514
9515	SDValue Ptr =
9516	DAG.getObjectPtrOffset(SL, Ptr: BasePtr, Offset: TypeSize::getFixed(ExactSize: Idx * Stride));
9517
9518	// This scalar TruncStore may be illegal, but we legalize it later.
9519	SDValue Store = DAG.getTruncStore(
9520	Chain, dl: SL, Val: Elt, Ptr, PtrInfo: ST->getPointerInfo().getWithOffset(O: Idx * Stride),
9521	SVT: MemSclVT, Alignment: ST->getOriginalAlign(), MMOFlags: ST->getMemOperand()->getFlags(),
9522	AAInfo: ST->getAAInfo());
9523
9524	Stores.push_back(Elt: Store);
9525	}
9526
9527	return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
9528	}
9529
9530	std::pair<SDValue, SDValue>
9531	TargetLowering::expandUnalignedLoad(LoadSDNode LD, SelectionDAG &DAG) const* {
9532	assert(LD->getAddressingMode() == ISD::UNINDEXED &&
9533	"unaligned indexed loads not implemented!");
9534	SDValue Chain = LD->getChain();
9535	SDValue Ptr = LD->getBasePtr();
9536	EVT VT = LD->getValueType(ResNo: `0`);
9537	EVT LoadedVT = LD->getMemoryVT();
9538	SDLoc dl(LD);
9539	auto &MF = DAG.getMachineFunction();
9540
9541	if (VT.isFloatingPoint() \|\| VT.isVector()) {
9542	EVT intVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: LoadedVT.getSizeInBits());
9543	if (isTypeLegal(VT: intVT) && isTypeLegal(VT: LoadedVT)) {
9544	if (!isOperationLegalOrCustom(Op: ISD::LOAD, VT: intVT) &&
9545	LoadedVT.isVector()) {
9546	// Scalarize the load and let the individual components be handled.
9547	return scalarizeVectorLoad(LD, DAG);
9548	}
9549
9550	// Expand to a (misaligned) integer load of the same size,
9551	// then bitconvert to floating point or vector.
9552	SDValue newLoad = DAG.getLoad(VT: intVT, dl, Chain, Ptr,
9553	MMO: LD->getMemOperand());
9554	SDValue Result = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: LoadedVT, Operand: newLoad);
9555	if (LoadedVT != VT)
9556	Result = DAG.getNode(Opcode: VT.isFloatingPoint() ? ISD::FP_EXTEND :
9557	ISD::ANY_EXTEND, DL: dl, VT, Operand: Result);
9558
9559	return std::make_pair(x&: Result, y: newLoad.getValue(R: `1`));
9560	}
9561
9562	// Copy the value to a (aligned) stack slot using (unaligned) integer
9563	// loads and stores, then do a (aligned) load from the stack slot.
9564	MVT RegVT = getRegisterType(Context&: *DAG.getContext(), VT: intVT);
9565	unsigned LoadedBytes = LoadedVT.getStoreSize();
9566	unsigned RegBytes = RegVT.getSizeInBits() / `8`;
9567	unsigned NumRegs = (LoadedBytes + RegBytes - `1`) / RegBytes;
9568
9569	// Make sure the stack slot is also aligned for the register type.
9570	SDValue StackBase = DAG.CreateStackTemporary(VT1: LoadedVT, VT2: RegVT);
9571	auto FrameIndex = cast<FrameIndexSDNode>(Val: StackBase.getNode())->getIndex();
9572	SmallVector<SDValue, `8`> Stores;
9573	SDValue StackPtr = StackBase;
9574	unsigned Offset = `0`;
9575
9576	EVT PtrVT = Ptr.getValueType();
9577	EVT StackPtrVT = StackPtr.getValueType();
9578
9579	SDValue PtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: PtrVT);
9580	SDValue StackPtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: StackPtrVT);
9581
9582	// Do all but one copies using the full register width.
9583	for (unsigned i = `1`; i < NumRegs; i++) {
9584	// Load one integer register's worth from the original location.
9585	SDValue Load = DAG.getLoad(
9586	VT: RegVT, dl, Chain, Ptr, PtrInfo: LD->getPointerInfo().getWithOffset(O: Offset),
9587	Alignment: LD->getOriginalAlign(), MMOFlags: LD->getMemOperand()->getFlags(),
9588	AAInfo: LD->getAAInfo());
9589	// Follow the load with a store to the stack slot. Remember the store.
9590	Stores.push_back(Elt: DAG.getStore(
9591	Chain: Load.getValue(R: `1`), dl, Val: Load, Ptr: StackPtr,
9592	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset)));
9593	// Increment the pointers.
9594	Offset += RegBytes;
9595
9596	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: PtrIncrement);
9597	StackPtr = DAG.getObjectPtrOffset(SL: dl, Ptr: StackPtr, Offset: StackPtrIncrement);
9598	}
9599
9600	// The last copy may be partial. Do an extending load.
9601	EVT MemVT = EVT::getIntegerVT(Context&: *DAG.getContext(),
9602	BitWidth: `8` * (LoadedBytes - Offset));
9603	SDValue Load =
9604	DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl, VT: RegVT, Chain, Ptr,
9605	PtrInfo: LD->getPointerInfo().getWithOffset(O: Offset), MemVT,
9606	Alignment: LD->getOriginalAlign(), MMOFlags: LD->getMemOperand()->getFlags(),
9607	AAInfo: LD->getAAInfo());
9608	// Follow the load with a store to the stack slot. Remember the store.
9609	// On big-endian machines this requires a truncating store to ensure
9610	// that the bits end up in the right place.
9611	Stores.push_back(Elt: DAG.getTruncStore(
9612	Chain: Load.getValue(R: `1`), dl, Val: Load, Ptr: StackPtr,
9613	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset), SVT: MemVT));
9614
9615	// The order of the stores doesn't matter - say it with a TokenFactor.
9616	SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9617
9618	// Finally, perform the original load only redirected to the stack slot.
9619	Load = DAG.getExtLoad(ExtType: LD->getExtensionType(), dl, VT, Chain: TF, Ptr: StackBase,
9620	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset: `0`),
9621	MemVT: LoadedVT);
9622
9623	// Callers expect a MERGE_VALUES node.
9624	return std::make_pair(x&: Load, y&: TF);
9625	}
9626
9627	assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
9628	"Unaligned load of unsupported type.");
9629
9630	// Compute the new VT that is half the size of the old one. This is an
9631	// integer MVT.
9632	unsigned NumBits = LoadedVT.getSizeInBits();
9633	EVT NewLoadedVT;
9634	NewLoadedVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumBits/`2`);
9635	NumBits >>= `1`;
9636
9637	Align Alignment = LD->getOriginalAlign();
9638	unsigned IncrementSize = NumBits / `8`;
9639	ISD::LoadExtType HiExtType = LD->getExtensionType();
9640
9641	// If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
9642	if (HiExtType == ISD::NON_EXTLOAD)
9643	HiExtType = ISD::ZEXTLOAD;
9644
9645	// Load the value in two parts
9646	SDValue Lo, Hi;
9647	if (DAG.getDataLayout().isLittleEndian()) {
9648	Lo = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT, Chain, Ptr, PtrInfo: LD->getPointerInfo(),
9649	MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
9650	AAInfo: LD->getAAInfo());
9651
9652	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
9653	Hi = DAG.getExtLoad(ExtType: HiExtType, dl, VT, Chain, Ptr,
9654	PtrInfo: LD->getPointerInfo().getWithOffset(O: IncrementSize),
9655	MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
9656	AAInfo: LD->getAAInfo());
9657	} else {
9658	Hi = DAG.getExtLoad(ExtType: HiExtType, dl, VT, Chain, Ptr, PtrInfo: LD->getPointerInfo(),
9659	MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
9660	AAInfo: LD->getAAInfo());
9661
9662	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
9663	Lo = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
9664	PtrInfo: LD->getPointerInfo().getWithOffset(O: IncrementSize),
9665	MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
9666	AAInfo: LD->getAAInfo());
9667	}
9668
9669	// aggregate the two parts
9670	SDValue ShiftAmount = DAG.getShiftAmountConstant(Val: NumBits, VT, DL: dl);
9671	SDValue Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Hi, N2: ShiftAmount);
9672	Result = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Result, N2: Lo);
9673
9674	SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(`1`),
9675	Hi.getValue(`1`));
9676
9677	return std::make_pair(x&: Result, y&: TF);
9678	}
9679
9680	SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
9681	SelectionDAG &DAG) const {
9682	assert(ST->getAddressingMode() == ISD::UNINDEXED &&
9683	"unaligned indexed stores not implemented!");
9684	SDValue Chain = ST->getChain();
9685	SDValue Ptr = ST->getBasePtr();
9686	SDValue Val = ST->getValue();
9687	EVT VT = Val.getValueType();
9688	Align Alignment = ST->getOriginalAlign();
9689	auto &MF = DAG.getMachineFunction();
9690	EVT StoreMemVT = ST->getMemoryVT();
9691
9692	SDLoc dl(ST);
9693	if (StoreMemVT.isFloatingPoint() \|\| StoreMemVT.isVector()) {
9694	EVT intVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VT.getSizeInBits());
9695	if (isTypeLegal(VT: intVT)) {
9696	if (!isOperationLegalOrCustom(Op: ISD::STORE, VT: intVT) &&
9697	StoreMemVT.isVector()) {
9698	// Scalarize the store and let the individual components be handled.
9699	SDValue Result = scalarizeVectorStore(ST, DAG);
9700	return Result;
9701	}
9702	// Expand to a bitconvert of the value to the integer type of the
9703	// same size, then a (misaligned) int store.
9704	// FIXME: Does not handle truncating floating point stores!
9705	SDValue Result = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: intVT, Operand: Val);
9706	Result = DAG.getStore(Chain, dl, Val: Result, Ptr, PtrInfo: ST->getPointerInfo(),
9707	Alignment, MMOFlags: ST->getMemOperand()->getFlags());
9708	return Result;
9709	}
9710	// Do a (aligned) store to a stack slot, then copy from the stack slot
9711	// to the final destination using (unaligned) integer loads and stores.
9712	MVT RegVT = getRegisterType(
9713	Context&: *DAG.getContext(),
9714	VT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: StoreMemVT.getSizeInBits()));
9715	EVT PtrVT = Ptr.getValueType();
9716	unsigned StoredBytes = StoreMemVT.getStoreSize();
9717	unsigned RegBytes = RegVT.getSizeInBits() / `8`;
9718	unsigned NumRegs = (StoredBytes + RegBytes - `1`) / RegBytes;
9719
9720	// Make sure the stack slot is also aligned for the register type.
9721	SDValue StackPtr = DAG.CreateStackTemporary(VT1: StoreMemVT, VT2: RegVT);
9722	auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
9723
9724	// Perform the original store, only redirected to the stack slot.
9725	SDValue Store = DAG.getTruncStore(
9726	Chain, dl, Val, Ptr: StackPtr,
9727	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset: `0`), SVT: StoreMemVT);
9728
9729	EVT StackPtrVT = StackPtr.getValueType();
9730
9731	SDValue PtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: PtrVT);
9732	SDValue StackPtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: StackPtrVT);
9733	SmallVector<SDValue, `8`> Stores;
9734	unsigned Offset = `0`;
9735
9736	// Do all but one copies using the full register width.
9737	for (unsigned i = `1`; i < NumRegs; i++) {
9738	// Load one integer register's worth from the stack slot.
9739	SDValue Load = DAG.getLoad(
9740	VT: RegVT, dl, Chain: Store, Ptr: StackPtr,
9741	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset));
9742	// Store it to the final location. Remember the store.
9743	Stores.push_back(Elt: DAG.getStore(Chain: Load.getValue(R: `1`), dl, Val: Load, Ptr,
9744	PtrInfo: ST->getPointerInfo().getWithOffset(O: Offset),
9745	Alignment: ST->getOriginalAlign(),
9746	MMOFlags: ST->getMemOperand()->getFlags()));
9747	// Increment the pointers.
9748	Offset += RegBytes;
9749	StackPtr = DAG.getObjectPtrOffset(SL: dl, Ptr: StackPtr, Offset: StackPtrIncrement);
9750	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: PtrIncrement);
9751	}
9752
9753	// The last store may be partial. Do a truncating store. On big-endian
9754	// machines this requires an extending load from the stack slot to ensure
9755	// that the bits are in the right place.
9756	EVT LoadMemVT =
9757	EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: `8` (StoredBytes - Offset));
9758
9759	// Load from the stack slot.
9760	SDValue Load = DAG.getExtLoad(
9761	ExtType: ISD::EXTLOAD, dl, VT: RegVT, Chain: Store, Ptr: StackPtr,
9762	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset), MemVT: LoadMemVT);
9763
9764	Stores.push_back(
9765	Elt: DAG.getTruncStore(Chain: Load.getValue(R: `1`), dl, Val: Load, Ptr,
9766	PtrInfo: ST->getPointerInfo().getWithOffset(O: Offset), SVT: LoadMemVT,
9767	Alignment: ST->getOriginalAlign(),
9768	MMOFlags: ST->getMemOperand()->getFlags(), AAInfo: ST->getAAInfo()));
9769	// The order of the stores doesn't matter - say it with a TokenFactor.
9770	SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9771	return Result;
9772	}
9773
9774	assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
9775	"Unaligned store of unknown type.");
9776	// Get the half-size VT
9777	EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(Context&: *DAG.getContext());
9778	unsigned NumBits = NewStoredVT.getFixedSizeInBits();
9779	unsigned IncrementSize = NumBits / `8`;
9780
9781	// Divide the stored value in two parts.
9782	SDValue ShiftAmount =
9783	DAG.getShiftAmountConstant(Val: NumBits, VT: Val.getValueType(), DL: dl);
9784	SDValue Lo = Val;
9785	// If Val is a constant, replace the upper bits with 0. The SRL will constant
9786	// fold and not use the upper bits. A smaller constant may be easier to
9787	// materialize.
9788	if (auto *C = dyn_cast<ConstantSDNode>(Val&: Lo); C && !C->isOpaque())
9789	Lo = DAG.getNode(
9790	Opcode: ISD::AND, DL: dl, VT, N1: Lo,
9791	N2: DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VT.getSizeInBits(), loBitsSet: NumBits), DL: dl,
9792	VT));
9793	SDValue Hi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Val, N2: ShiftAmount);
9794
9795	// Store the two parts
9796	SDValue Store1, Store2;
9797	Store1 = DAG.getTruncStore(Chain, dl,
9798	Val: DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
9799	Ptr, PtrInfo: ST->getPointerInfo(), SVT: NewStoredVT, Alignment,
9800	MMOFlags: ST->getMemOperand()->getFlags());
9801
9802	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
9803	Store2 = DAG.getTruncStore(
9804	Chain, dl, Val: DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
9805	PtrInfo: ST->getPointerInfo().getWithOffset(O: IncrementSize), SVT: NewStoredVT, Alignment,
9806	MMOFlags: ST->getMemOperand()->getFlags(), AAInfo: ST->getAAInfo());
9807
9808	SDValue Result =
9809	DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
9810	return Result;
9811	}
9812
9813	SDValue
9814	TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
9815	const SDLoc &DL, EVT DataVT,
9816	SelectionDAG &DAG,
9817	bool IsCompressedMemory) const {
9818	SDValue Increment;
9819	EVT AddrVT = Addr.getValueType();
9820	EVT MaskVT = Mask.getValueType();
9821	assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
9822	"Incompatible types of Data and Mask");
9823	if (IsCompressedMemory) {
9824	if (DataVT.isScalableVector())
9825	report_fatal_error(
9826	reason: "Cannot currently handle compressed memory with scalable vectors");
9827	// Incrementing the pointer according to number of '1's in the mask.
9828	EVT MaskIntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MaskVT.getSizeInBits());
9829	SDValue MaskInIntReg = DAG.getBitcast(VT: MaskIntVT, V: Mask);
9830	if (MaskIntVT.getSizeInBits() < `32`) {
9831	MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
9832	MaskIntVT = MVT::i32;
9833	}
9834
9835	// Count '1's with POPCNT.
9836	Increment = DAG.getNode(Opcode: ISD::CTPOP, DL, VT: MaskIntVT, Operand: MaskInIntReg);
9837	Increment = DAG.getZExtOrTrunc(Op: Increment, DL, VT: AddrVT);
9838	// Scale is an element size in bytes.
9839	SDValue Scale = DAG.getConstant(Val: DataVT.getScalarSizeInBits() / `8`, DL,
9840	VT: AddrVT);
9841	Increment = DAG.getNode(Opcode: ISD::MUL, DL, VT: AddrVT, N1: Increment, N2: Scale);
9842	} else if (DataVT.isScalableVector()) {
9843	Increment = DAG.getVScale(DL, VT: AddrVT,
9844	MulImm: APInt (AddrVT.getFixedSizeInBits(),
9845	DataVT.getStoreSize().getKnownMinValue()));
9846	} else
9847	Increment = DAG.getConstant(Val: DataVT.getStoreSize(), DL, VT: AddrVT);
9848
9849	return DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: Addr, N2: Increment);
9850	}
9851
9852	static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
9853	EVT VecVT, const SDLoc &dl,
9854	ElementCount SubEC) {
9855	assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
9856	"Cannot index a scalable vector within a fixed-width vector");
9857
9858	unsigned NElts = VecVT.getVectorMinNumElements();
9859	unsigned NumSubElts = SubEC.getKnownMinValue();
9860	EVT IdxVT = Idx.getValueType();
9861
9862	if (VecVT.isScalableVector() && !SubEC.isScalable()) {
9863	// If this is a constant index and we know the value plus the number of the
9864	// elements in the subvector minus one is less than the minimum number of
9865	// elements then it's safe to return Idx.
9866	if (auto *IdxCst = dyn_cast<ConstantSDNode>(Val&: Idx))
9867	if (IdxCst->getZExtValue() + (NumSubElts - `1`) < NElts)
9868	return Idx;
9869	SDValue VS =
9870	DAG.getVScale(DL: dl, VT: IdxVT, MulImm: APInt (IdxVT.getFixedSizeInBits(), NElts));
9871	unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
9872	SDValue Sub = DAG.getNode(Opcode: SubOpcode, DL: dl, VT: IdxVT, N1: VS,
9873	N2: DAG.getConstant(Val: NumSubElts, DL: dl, VT: IdxVT));
9874	return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT: IdxVT, N1: Idx, N2: Sub);
9875	}
9876	if (isPowerOf2_32(Value: NElts) && NumSubElts == `1`) {
9877	APInt Imm = APInt::getLowBitsSet(numBits: IdxVT.getSizeInBits(), loBitsSet: Log2_32(Value: NElts));
9878	return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IdxVT, N1: Idx,
9879	N2: DAG.getConstant(Val: Imm, DL: dl, VT: IdxVT));
9880	}
9881	unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : `0`;
9882	return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT: IdxVT, N1: Idx,
9883	N2: DAG.getConstant(Val: MaxIndex, DL: dl, VT: IdxVT));
9884	}
9885
9886	SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
9887	SDValue VecPtr, EVT VecVT,
9888	SDValue Index) const {
9889	return getVectorSubVecPointer(
9890	DAG, VecPtr, VecVT,
9891	SubVecVT: EVT::getVectorVT(Context&: *DAG.getContext(), VT: VecVT.getVectorElementType(), NumElements: `1`),
9892	Index);
9893	}
9894
9895	SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
9896	SDValue VecPtr, EVT VecVT,
9897	EVT SubVecVT,
9898	SDValue Index) const {
9899	SDLoc dl(Index);
9900	// Make sure the index type is big enough to compute in.
9901	Index = DAG.getZExtOrTrunc(Op: Index, DL: dl, VT: VecPtr.getValueType());
9902
9903	EVT EltVT = VecVT.getVectorElementType();
9904
9905	// Calculate the element offset and add it to the pointer.
9906	unsigned EltSize = EltVT.getFixedSizeInBits() / `8`; // FIXME: should be ABI size.
9907	assert(EltSize * `8` == EltVT.getFixedSizeInBits() &&
9908	"Converting bits to bytes lost precision");
9909	assert(SubVecVT.getVectorElementType() == EltVT &&
9910	"Sub-vector must be a vector with matching element type");
9911	Index = clampDynamicVectorIndex(DAG, Idx: Index, VecVT, dl,
9912	SubEC: SubVecVT.getVectorElementCount());
9913
9914	EVT IdxVT = Index.getValueType();
9915	if (SubVecVT.isScalableVector())
9916	Index =
9917	DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IdxVT, N1: Index,
9918	N2: DAG.getVScale(DL: dl, VT: IdxVT, MulImm: APInt (IdxVT.getSizeInBits(), `1`)));
9919
9920	Index = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IdxVT, N1: Index,
9921	N2: DAG.getConstant(Val: EltSize, DL: dl, VT: IdxVT));
9922	return DAG.getMemBasePlusOffset(Base: VecPtr, Offset: Index, DL: dl);
9923	}
9924
9925	//===----------------------------------------------------------------------===//
9926	// Implementation of Emulated TLS Model
9927	//===----------------------------------------------------------------------===//
9928
9929	SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
9930	SelectionDAG &DAG) const {
9931	// Access to address of TLS varialbe xyz is lowered to a function call:
9932	// __emutls_get_address( address of global variable named "__emutls_v.xyz" )
9933	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
9934	PointerType VoidPtrType = PointerType::get(C&: DAG.getContext(), AddressSpace: `0`);
9935	SDLoc dl(GA);
9936
9937	ArgListTy Args;
9938	ArgListEntry Entry;
9939	std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
9940	Module VariableModule = const_cast<Module>(GA->getGlobal()->getParent());
9941	StringRef EmuTlsVarName(NameString);
9942	GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(Name: EmuTlsVarName);
9943	assert(EmuTlsVar && "Cannot find EmuTlsVar ");
9944	Entry.Node = DAG.getGlobalAddress(GV: EmuTlsVar, DL: dl, VT: PtrVT);
9945	Entry.Ty = VoidPtrType;
9946	Args.push_back(x: Entry);
9947
9948	SDValue EmuTlsGetAddr = DAG.getExternalSymbol(Sym: "__emutls_get_address", VT: PtrVT);
9949
9950	TargetLowering::CallLoweringInfo CLI(DAG);
9951	CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
9952	CLI.setLibCallee(CC: CallingConv::C, ResultType: VoidPtrType, Target: EmuTlsGetAddr, ArgsList: std::move(Args));
9953	std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
9954
9955	// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
9956	// At last for X86 targets, maybe good for other targets too?
9957	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9958	MFI.setAdjustsStack(true); // Is this only for X86 target?
9959	MFI.setHasCalls(true);
9960
9961	assert((GA->getOffset() == `0`) &&
9962	"Emulated TLS must have zero offset in GlobalAddressSDNode");
9963	return CallResult.first;
9964	}
9965
9966	SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
9967	SelectionDAG &DAG) const {
9968	assert((Op ->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
9969	if (!isCtlzFast())
9970	return SDValue ();
9971	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: `2`))->get();
9972	SDLoc dl(Op);
9973	if (isNullConstant(V: Op.getOperand(i: `1`)) && CC == ISD::SETEQ) {
9974	EVT VT = Op.getOperand(i: `0`).getValueType();
9975	SDValue Zext = Op.getOperand(i: `0`);
9976	if (VT.bitsLT(MVT::i32)) {
9977	VT = MVT::i32;
9978	Zext = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Op.getOperand(i: `0`));
9979	}
9980	unsigned Log2b = Log2_32(Value: VT.getSizeInBits());
9981	SDValue Clz = DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Zext);
9982	SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
9983	DAG.getConstant(Log2b, dl, MVT::i32));
9984	return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
9985	}
9986	return SDValue ();
9987	}
9988
9989	SDValue TargetLowering::expandIntMINMAX(SDNode Node, SelectionDAG &DAG) const* {
9990	SDValue Op0 = Node->getOperand(Num: `0`);
9991	SDValue Op1 = Node->getOperand(Num: `1`);
9992	EVT VT = Op0.getValueType();
9993	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9994	unsigned Opcode = Node->getOpcode();
9995	SDLoc DL(Node);
9996
9997	// umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
9998	if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(V: Op1, AllowUndefs: true) && BoolVT == VT &&
9999	getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
10000	Op0 = DAG.getFreeze(V: Op0);
10001	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
10002	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0,
10003	N2: DAG.getSetCC(DL, VT, LHS: Op0, RHS: Zero, Cond: ISD::SETEQ));
10004	}
10005
10006	// umin(x,y) -> sub(x,usubsat(x,y))
10007	// TODO: Missing freeze(Op0)?
10008	if (Opcode == ISD::UMIN && isOperationLegal(Op: ISD::SUB, VT) &&
10009	isOperationLegal(Op: ISD::USUBSAT, VT)) {
10010	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0,
10011	N2: DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: Op0, N2: Op1));
10012	}
10013
10014	// umax(x,y) -> add(x,usubsat(y,x))
10015	// TODO: Missing freeze(Op0)?
10016	if (Opcode == ISD::UMAX && isOperationLegal(Op: ISD::ADD, VT) &&
10017	isOperationLegal(Op: ISD::USUBSAT, VT)) {
10018	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0,
10019	N2: DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: Op1, N2: Op0));
10020	}
10021
10022	// FIXME: Should really try to split the vector in case it's legal on a
10023	// subvector.
10024	if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
10025	return DAG.UnrollVectorOp(N: Node);
10026
10027	// Attempt to find an existing SETCC node that we can reuse.
10028	// TODO: Do we need a generic doesSETCCNodeExist?
10029	// TODO: Missing freeze(Op0)/freeze(Op1)?
10030	auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10031	ISD::CondCode PrefCommuteCC,
10032	ISD::CondCode AltCommuteCC) {
10033	SDVTList BoolVTList = DAG.getVTList(VT: BoolVT);
10034	for (ISD::CondCode CC : {PrefCC, AltCC}) {
10035	if (DAG.doesNodeExist(Opcode: ISD::SETCC, VTList: BoolVTList,
10036	Ops: {Op0, Op1, DAG.getCondCode(Cond: CC)})) {
10037	SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: CC);
10038	return DAG.getSelect(DL, VT, Cond, LHS: Op0, RHS: Op1);
10039	}
10040	}
10041	for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10042	if (DAG.doesNodeExist(Opcode: ISD::SETCC, VTList: BoolVTList,
10043	Ops: {Op0, Op1, DAG.getCondCode(Cond: CC)})) {
10044	SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: CC);
10045	return DAG.getSelect(DL, VT, Cond, LHS: Op1, RHS: Op0);
10046	}
10047	}
10048	SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: PrefCC);
10049	return DAG.getSelect(DL, VT, Cond, LHS: Op0, RHS: Op1);
10050	};
10051
10052	// Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10053	// -> Y = (A < B) ? B : A
10054	// -> Y = (A >= B) ? A : B
10055	// -> Y = (A <= B) ? B : A
10056	switch (Opcode) {
10057	case ISD::SMAX:
10058	return buildMinMax (ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10059	case ISD::SMIN:
10060	return buildMinMax (ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10061	case ISD::UMAX:
10062	return buildMinMax (ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10063	case ISD::UMIN:
10064	return buildMinMax (ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10065	}
10066
10067	llvm_unreachable("How did we get here?");
10068	}
10069
10070	SDValue TargetLowering::expandAddSubSat(SDNode Node, SelectionDAG &DAG) const* {
10071	unsigned Opcode = Node->getOpcode();
10072	SDValue LHS = Node->getOperand(Num: `0`);
10073	SDValue RHS = Node->getOperand(Num: `1`);
10074	EVT VT = LHS.getValueType();
10075	SDLoc dl(Node);
10076
10077	assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10078	assert(VT.isInteger() && "Expected operands to be integers");
10079
10080	// usub.sat(a, b) -> umax(a, b) - b
10081	if (Opcode == ISD::USUBSAT && isOperationLegal(Op: ISD::UMAX, VT)) {
10082	SDValue Max = DAG.getNode(Opcode: ISD::UMAX, DL: dl, VT, N1: LHS, N2: RHS);
10083	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: RHS);
10084	}
10085
10086	// uadd.sat(a, b) -> umin(a, ~b) + b
10087	if (Opcode == ISD::UADDSAT && isOperationLegal(Op: ISD::UMIN, VT)) {
10088	SDValue InvRHS = DAG.getNOT(DL: dl, Val: RHS, VT);
10089	SDValue Min = DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT, N1: LHS, N2: InvRHS);
10090	return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Min, N2: RHS);
10091	}
10092
10093	unsigned OverflowOp;
10094	switch (Opcode) {
10095	case ISD::SADDSAT:
10096	OverflowOp = ISD::SADDO;
10097	break;
10098	case ISD::UADDSAT:
10099	OverflowOp = ISD::UADDO;
10100	break;
10101	case ISD::SSUBSAT:
10102	OverflowOp = ISD::SSUBO;
10103	break;
10104	case ISD::USUBSAT:
10105	OverflowOp = ISD::USUBO;
10106	break;
10107	default:
10108	llvm_unreachable("Expected method to receive signed or unsigned saturation "
10109	"addition or subtraction node.");
10110	}
10111
10112	// FIXME: Should really try to split the vector in case it's legal on a
10113	// subvector.
10114	if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
10115	return DAG.UnrollVectorOp(N: Node);
10116
10117	unsigned BitWidth = LHS.getScalarValueSizeInBits();
10118	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10119	SDValue Result = DAG.getNode(Opcode: OverflowOp, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
10120	SDValue SumDiff = Result.getValue(R: `0`);
10121	SDValue Overflow = Result.getValue(R: `1`);
10122	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
10123	SDValue AllOnes = DAG.getAllOnesConstant(DL: dl, VT);
10124
10125	if (Opcode == ISD::UADDSAT) {
10126	if (getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
10127	// (LHS + RHS) \| OverflowMask
10128	SDValue OverflowMask = DAG.getSExtOrTrunc(Op: Overflow, DL: dl, VT);
10129	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: SumDiff, N2: OverflowMask);
10130	}
10131	// Overflow ? 0xffff.... : (LHS + RHS)
10132	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: AllOnes, RHS: SumDiff);
10133	}
10134
10135	if (Opcode == ISD::USUBSAT) {
10136	if (getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
10137	// (LHS - RHS) & ~OverflowMask
10138	SDValue OverflowMask = DAG.getSExtOrTrunc(Op: Overflow, DL: dl, VT);
10139	SDValue Not = DAG.getNOT(DL: dl, Val: OverflowMask, VT);
10140	return DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: SumDiff, N2: Not);
10141	}
10142	// Overflow ? 0 : (LHS - RHS)
10143	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Zero, RHS: SumDiff);
10144	}
10145
10146	if (Opcode == ISD::SADDSAT \|\| Opcode == ISD::SSUBSAT) {
10147	APInt MinVal = APInt::getSignedMinValue(numBits: BitWidth);
10148	APInt MaxVal = APInt::getSignedMaxValue(numBits: BitWidth);
10149
10150	KnownBits KnownLHS = DAG.computeKnownBits(Op: LHS);
10151	KnownBits KnownRHS = DAG.computeKnownBits(Op: RHS);
10152
10153	// If either of the operand signs are known, then they are guaranteed to
10154	// only saturate in one direction. If non-negative they will saturate
10155	// towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10156	//
10157	// In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10158	// sign of 'y' has to be flipped.
10159
10160	bool LHSIsNonNegative = KnownLHS.isNonNegative();
10161	bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10162	: KnownRHS.isNegative();
10163	if (LHSIsNonNegative \|\| RHSIsNonNegative) {
10164	SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
10165	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMax, RHS: SumDiff);
10166	}
10167
10168	bool LHSIsNegative = KnownLHS.isNegative();
10169	bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10170	: KnownRHS.isNonNegative();
10171	if (LHSIsNegative \|\| RHSIsNegative) {
10172	SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
10173	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMin, RHS: SumDiff);
10174	}
10175	}
10176
10177	// Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10178	APInt MinVal = APInt::getSignedMinValue(numBits: BitWidth);
10179	SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
10180	SDValue Shift = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: SumDiff,
10181	N2: DAG.getConstant(Val: BitWidth - `1`, DL: dl, VT));
10182	Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Shift, N2: SatMin);
10183	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Result, RHS: SumDiff);
10184	}
10185
10186	SDValue TargetLowering::expandShlSat(SDNode Node, SelectionDAG &DAG) const* {
10187	unsigned Opcode = Node->getOpcode();
10188	bool IsSigned = Opcode == ISD::SSHLSAT;
10189	SDValue LHS = Node->getOperand(Num: `0`);
10190	SDValue RHS = Node->getOperand(Num: `1`);
10191	EVT VT = LHS.getValueType();
10192	SDLoc dl(Node);
10193
10194	assert((Node->getOpcode() == ISD::SSHLSAT \|\|
10195	Node->getOpcode() == ISD::USHLSAT) &&
10196	"Expected a SHLSAT opcode");
10197	assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10198	assert(VT.isInteger() && "Expected operands to be integers");
10199
10200	if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
10201	return DAG.UnrollVectorOp(N: Node);
10202
10203	// If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10204
10205	unsigned BW = VT.getScalarSizeInBits();
10206	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10207	SDValue Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS, N2: RHS);
10208	SDValue Orig =
10209	DAG.getNode(Opcode: IsSigned ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: Result, N2: RHS);
10210
10211	SDValue SatVal;
10212	if (IsSigned) {
10213	SDValue SatMin = DAG.getConstant(Val: APInt::getSignedMinValue(numBits: BW), DL: dl, VT);
10214	SDValue SatMax = DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: BW), DL: dl, VT);
10215	SDValue Cond =
10216	DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: DAG.getConstant(Val: `0`, DL: dl, VT), Cond: ISD::SETLT);
10217	SatVal = DAG.getSelect(DL: dl, VT, Cond, LHS: SatMin, RHS: SatMax);
10218	} else {
10219	SatVal = DAG.getConstant(Val: APInt::getMaxValue(numBits: BW), DL: dl, VT);
10220	}
10221	SDValue Cond = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Orig, Cond: ISD::SETNE);
10222	return DAG.getSelect(DL: dl, VT, Cond, LHS: SatVal, RHS: Result);
10223	}
10224
10225	void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10226	bool Signed, EVT WideVT,
10227	const SDValue LL, const SDValue LH,
10228	const SDValue RL, const SDValue RH,
10229	SDValue &Lo, SDValue &Hi) const {
10230	// We can fall back to a libcall with an illegal type for the MUL if we
10231	// have a libcall big enough.
10232	// Also, we can fall back to a division in some cases, but that's a big
10233	// performance hit in the general case.
10234	RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10235	if (WideVT == MVT::i16)
10236	LC = RTLIB::MUL_I16;
10237	else if (WideVT == MVT::i32)
10238	LC = RTLIB::MUL_I32;
10239	else if (WideVT == MVT::i64)
10240	LC = RTLIB::MUL_I64;
10241	else if (WideVT == MVT::i128)
10242	LC = RTLIB::MUL_I128;
10243
10244	if (LC == RTLIB::UNKNOWN_LIBCALL \|\| !getLibcallName(Call: LC)) {
10245	// We'll expand the multiplication by brute force because we have no other
10246	// options. This is a trivially-generalized version of the code from
10247	// Hacker's Delight (itself derived from Knuth's Algorithm M from section
10248	// 4.3.1).
10249	EVT VT = LL.getValueType();
10250	unsigned Bits = VT.getSizeInBits();
10251	unsigned HalfBits = Bits >> `1`;
10252	SDValue Mask =
10253	DAG.getConstant(Val: APInt::getLowBitsSet(numBits: Bits, loBitsSet: HalfBits), DL: dl, VT);
10254	SDValue LLL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: LL, N2: Mask);
10255	SDValue RLL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: RL, N2: Mask);
10256
10257	SDValue T = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LLL, N2: RLL);
10258	SDValue TL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: T, N2: Mask);
10259
10260	SDValue Shift = DAG.getShiftAmountConstant(Val: HalfBits, VT, DL: dl);
10261	SDValue TH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: T, N2: Shift);
10262	SDValue LLH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: LL, N2: Shift);
10263	SDValue RLH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: RL, N2: Shift);
10264
10265	SDValue U = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT,
10266	N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LLH, N2: RLL), N2: TH);
10267	SDValue UL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: U, N2: Mask);
10268	SDValue UH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: U, N2: Shift);
10269
10270	SDValue V = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT,
10271	N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LLL, N2: RLH), N2: UL);
10272	SDValue VH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: V, N2: Shift);
10273
10274	SDValue W =
10275	DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LLH, N2: RLH),
10276	N2: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: UH, N2: VH));
10277	Lo = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: TL,
10278	N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: V, N2: Shift));
10279
10280	Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: W,
10281	N2: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT,
10282	N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: RH, N2: LL),
10283	N2: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: RL, N2: LH)));
10284	} else {
10285	// Attempt a libcall.
10286	SDValue Ret;
10287	TargetLowering::MakeLibCallOptions CallOptions;
10288	CallOptions.setSExt(Signed);
10289	CallOptions.setIsPostTypeLegalization(true);
10290	if (shouldSplitFunctionArgumentsAsLittleEndian(DL: DAG.getDataLayout())) {
10291	// Halves of WideVT are packed into registers in different order
10292	// depending on platform endianness. This is usually handled by
10293	// the C calling convention, but we can't defer to it in
10294	// the legalizer.
10295	SDValue Args[] = {LL, LH, RL, RH};
10296	Ret = makeLibCall(DAG, LC, RetVT: WideVT, Ops: Args, CallOptions, dl).first;
10297	} else {
10298	SDValue Args[] = {LH, LL, RH, RL};
10299	Ret = makeLibCall(DAG, LC, RetVT: WideVT, Ops: Args, CallOptions, dl).first;
10300	}
10301	assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10302	"Ret value is a collection of constituent nodes holding result.");
10303	if (DAG.getDataLayout().isLittleEndian()) {
10304	// Same as above.
10305	Lo = Ret.getOperand(i: `0`);
10306	Hi = Ret.getOperand(i: `1`);
10307	} else {
10308	Lo = Ret.getOperand(i: `1`);
10309	Hi = Ret.getOperand(i: `0`);
10310	}
10311	}
10312	}
10313
10314	void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10315	bool Signed, const SDValue LHS,
10316	const SDValue RHS, SDValue &Lo,
10317	SDValue &Hi) const {
10318	EVT VT = LHS.getValueType();
10319	assert(RHS.getValueType() == VT && "Mismatching operand types");
10320
10321	SDValue HiLHS;
10322	SDValue HiRHS;
10323	if (Signed) {
10324	// The high part is obtained by SRA'ing all but one of the bits of low
10325	// part.
10326	unsigned LoSize = VT.getFixedSizeInBits();
10327	HiLHS = DAG.getNode(
10328	Opcode: ISD::SRA, DL: dl, VT, N1: LHS,
10329	N2: DAG.getConstant(Val: LoSize - `1`, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
10330	HiRHS = DAG.getNode(
10331	Opcode: ISD::SRA, DL: dl, VT, N1: RHS,
10332	N2: DAG.getConstant(Val: LoSize - `1`, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
10333	} else {
10334	HiLHS = DAG.getConstant(Val: `0`, DL: dl, VT);
10335	HiRHS = DAG.getConstant(Val: `0`, DL: dl, VT);
10336	}
10337	EVT WideVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: VT.getSizeInBits() `2`);
10338	forceExpandWideMUL(DAG, dl, Signed, WideVT, LL: LHS, LH: HiLHS, RL: RHS, RH: HiRHS, Lo, Hi);
10339	}
10340
10341	SDValue
10342	TargetLowering::expandFixedPointMul(SDNode Node, SelectionDAG &DAG) const* {
10343	assert((Node->getOpcode() == ISD::SMULFIX \|\|
10344	Node->getOpcode() == ISD::UMULFIX \|\|
10345	Node->getOpcode() == ISD::SMULFIXSAT \|\|
10346	Node->getOpcode() == ISD::UMULFIXSAT) &&
10347	"Expected a fixed point multiplication opcode");
10348
10349	SDLoc dl(Node);
10350	SDValue LHS = Node->getOperand(Num: `0`);
10351	SDValue RHS = Node->getOperand(Num: `1`);
10352	EVT VT = LHS.getValueType();
10353	unsigned Scale = Node->getConstantOperandVal(Num: `2`);
10354	bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT \|\|
10355	Node->getOpcode() == ISD::UMULFIXSAT);
10356	bool Signed = (Node->getOpcode() == ISD::SMULFIX \|\|
10357	Node->getOpcode() == ISD::SMULFIXSAT);
10358	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10359	unsigned VTSize = VT.getScalarSizeInBits();
10360
10361	if (!Scale) {
10362	// [us]mul.fix(a, b, 0) -> mul(a, b)
10363	if (!Saturating) {
10364	if (isOperationLegalOrCustom(Op: ISD::MUL, VT))
10365	return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
10366	} else if (Signed && isOperationLegalOrCustom(Op: ISD::SMULO, VT)) {
10367	SDValue Result =
10368	DAG.getNode(Opcode: ISD::SMULO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
10369	SDValue Product = Result.getValue(R: `0`);
10370	SDValue Overflow = Result.getValue(R: `1`);
10371	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
10372
10373	APInt MinVal = APInt::getSignedMinValue(numBits: VTSize);
10374	APInt MaxVal = APInt::getSignedMaxValue(numBits: VTSize);
10375	SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
10376	SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
10377	// Xor the inputs, if resulting sign bit is 0 the product will be
10378	// positive, else negative.
10379	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: LHS, N2: RHS);
10380	SDValue ProdNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Xor, RHS: Zero, Cond: ISD::SETLT);
10381	Result = DAG.getSelect(DL: dl, VT, Cond: ProdNeg, LHS: SatMin, RHS: SatMax);
10382	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Result, RHS: Product);
10383	} else if (!Signed && isOperationLegalOrCustom(Op: ISD::UMULO, VT)) {
10384	SDValue Result =
10385	DAG.getNode(Opcode: ISD::UMULO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
10386	SDValue Product = Result.getValue(R: `0`);
10387	SDValue Overflow = Result.getValue(R: `1`);
10388
10389	APInt MaxVal = APInt::getMaxValue(numBits: VTSize);
10390	SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
10391	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMax, RHS: Product);
10392	}
10393	}
10394
10395	assert(((Signed && Scale < VTSize) \|\| (!Signed && Scale <= VTSize)) &&
10396	"Expected scale to be less than the number of bits if signed or at "
10397	"most the number of bits if unsigned.");
10398	assert(LHS.getValueType() == RHS.getValueType() &&
10399	"Expected both operands to be the same type");
10400
10401	// Get the upper and lower bits of the result.
10402	SDValue Lo, Hi;
10403	unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10404	unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
10405	if (isOperationLegalOrCustom(Op: LoHiOp, VT)) {
10406	SDValue Result = DAG.getNode(Opcode: LoHiOp, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: LHS, N2: RHS);
10407	Lo = Result.getValue(R: `0`);
10408	Hi = Result.getValue(R: `1`);
10409	} else if (isOperationLegalOrCustom(Op: HiOp, VT)) {
10410	Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
10411	Hi = DAG.getNode(Opcode: HiOp, DL: dl, VT, N1: LHS, N2: RHS);
10412	} else if (VT.isVector()) {
10413	return SDValue ();
10414	} else {
10415	forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
10416	}
10417
10418	if (Scale == VTSize)
10419	// Result is just the top half since we'd be shifting by the width of the
10420	// operand. Overflow impossible so this works for both UMULFIX and
10421	// UMULFIXSAT.
10422	return Hi;
10423
10424	// The result will need to be shifted right by the scale since both operands
10425	// are scaled. The result is given to us in 2 halves, so we only want part of
10426	// both in the result.
10427	SDValue Result = DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT, N1: Hi, N2: Lo,
10428	N3: DAG.getShiftAmountConstant(Val: Scale, VT, DL: dl));
10429	if (!Saturating)
10430	return Result;
10431
10432	if (!Signed) {
10433	// Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
10434	// widened multiplication) aren't all zeroes.
10435
10436	// Saturate to max if ((Hi >> Scale) != 0),
10437	// which is the same as if (Hi > ((1 << Scale) - 1))
10438	APInt MaxVal = APInt::getMaxValue(numBits: VTSize);
10439	SDValue LowMask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VTSize, loBitsSet: Scale),
10440	DL: dl, VT);
10441	Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: LowMask,
10442	True: DAG.getConstant(Val: MaxVal, DL: dl, VT), False: Result,
10443	Cond: ISD::SETUGT);
10444
10445	return Result;
10446	}
10447
10448	// Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
10449	// widened multiplication) aren't all ones or all zeroes.
10450
10451	SDValue SatMin = DAG.getConstant(Val: APInt::getSignedMinValue(numBits: VTSize), DL: dl, VT);
10452	SDValue SatMax = DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: VTSize), DL: dl, VT);
10453
10454	if (Scale == `0`) {
10455	SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Lo,
10456	N2: DAG.getShiftAmountConstant(Val: VTSize - `1`, VT, DL: dl));
10457	SDValue Overflow = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Hi, RHS: Sign, Cond: ISD::SETNE);
10458	// Saturated to SatMin if wide product is negative, and SatMax if wide
10459	// product is positive ...
10460	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
10461	SDValue ResultIfOverflow = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: Zero, True: SatMin, False: SatMax,
10462	Cond: ISD::SETLT);
10463	// ... but only if we overflowed.
10464	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: ResultIfOverflow, RHS: Result);
10465	}
10466
10467	// We handled Scale==0 above so all the bits to examine is in Hi.
10468
10469	// Saturate to max if ((Hi >> (Scale - 1)) > 0),
10470	// which is the same as if (Hi > (1 << (Scale - 1)) - 1)
10471	SDValue LowMask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VTSize, loBitsSet: Scale - `1`),
10472	DL: dl, VT);
10473	Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: LowMask, True: SatMax, False: Result, Cond: ISD::SETGT);
10474	// Saturate to min if (Hi >> (Scale - 1)) < -1),
10475	// which is the same as if (HI < (-1 << (Scale - 1))
10476	SDValue HighMask =
10477	DAG.getConstant(Val: APInt::getHighBitsSet(numBits: VTSize, hiBitsSet: VTSize - Scale + `1`),
10478	DL: dl, VT);
10479	Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: HighMask, True: SatMin, False: Result, Cond: ISD::SETLT);
10480	return Result;
10481	}
10482
10483	SDValue
10484	TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
10485	SDValue LHS, SDValue RHS,
10486	unsigned Scale, SelectionDAG &DAG) const {
10487	assert((Opcode == ISD::SDIVFIX \|\| Opcode == ISD::SDIVFIXSAT \|\|
10488	Opcode == ISD::UDIVFIX \|\| Opcode == ISD::UDIVFIXSAT) &&
10489	"Expected a fixed point division opcode");
10490
10491	EVT VT = LHS.getValueType();
10492	bool Signed = Opcode == ISD::SDIVFIX \|\| Opcode == ISD::SDIVFIXSAT;
10493	bool Saturating = Opcode == ISD::SDIVFIXSAT \|\| Opcode == ISD::UDIVFIXSAT;
10494	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10495
10496	// If there is enough room in the type to upscale the LHS or downscale the
10497	// RHS before the division, we can perform it in this type without having to
10498	// resize. For signed operations, the LHS headroom is the number of
10499	// redundant sign bits, and for unsigned ones it is the number of zeroes.
10500	// The headroom for the RHS is the number of trailing zeroes.
10501	unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(Op: LHS) - `1`
10502	: DAG.computeKnownBits(Op: LHS).countMinLeadingZeros();
10503	unsigned RHSTrail = DAG.computeKnownBits(Op: RHS).countMinTrailingZeros();
10504
10505	// For signed saturating operations, we need to be able to detect true integer
10506	// division overflow; that is, when you have MIN / -EPS. However, this
10507	// is undefined behavior and if we emit divisions that could take such
10508	// values it may cause undesired behavior (arithmetic exceptions on x86, for
10509	// example).
10510	// Avoid this by requiring an extra bit so that we never get this case.
10511	// FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
10512	// signed saturating division, we need to emit a whopping 32-bit division.
10513	if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
10514	return SDValue ();
10515
10516	unsigned LHSShift = std::min(a: LHSLead, b: Scale);
10517	unsigned RHSShift = Scale - LHSShift;
10518
10519	// At this point, we know that if we shift the LHS up by LHSShift and the
10520	// RHS down by RHSShift, we can emit a regular division with a final scaling
10521	// factor of Scale.
10522
10523	if (LHSShift)
10524	LHS = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS,
10525	N2: DAG.getShiftAmountConstant(Val: LHSShift, VT, DL: dl));
10526	if (RHSShift)
10527	RHS = DAG.getNode(Opcode: Signed ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: RHS,
10528	N2: DAG.getShiftAmountConstant(Val: RHSShift, VT, DL: dl));
10529
10530	SDValue Quot;
10531	if (Signed) {
10532	// For signed operations, if the resulting quotient is negative and the
10533	// remainder is nonzero, subtract 1 from the quotient to round towards
10534	// negative infinity.
10535	SDValue Rem;
10536	// FIXME: Ideally we would always produce an SDIVREM here, but if the
10537	// type isn't legal, SDIVREM cannot be expanded. There is no reason why
10538	// we couldn't just form a libcall, but the type legalizer doesn't do it.
10539	if (isTypeLegal(VT) &&
10540	isOperationLegalOrCustom(Op: ISD::SDIVREM, VT)) {
10541	Quot = DAG.getNode(Opcode: ISD::SDIVREM, DL: dl,
10542	VTList: DAG.getVTList(VT1: VT, VT2: VT),
10543	N1: LHS, N2: RHS);
10544	Rem = Quot.getValue(R: `1`);
10545	Quot = Quot.getValue(R: `0`);
10546	} else {
10547	Quot = DAG.getNode(Opcode: ISD::SDIV, DL: dl, VT,
10548	N1: LHS, N2: RHS);
10549	Rem = DAG.getNode(Opcode: ISD::SREM, DL: dl, VT,
10550	N1: LHS, N2: RHS);
10551	}
10552	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
10553	SDValue RemNonZero = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Rem, RHS: Zero, Cond: ISD::SETNE);
10554	SDValue LHSNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Zero, Cond: ISD::SETLT);
10555	SDValue RHSNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: RHS, RHS: Zero, Cond: ISD::SETLT);
10556	SDValue QuotNeg = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: BoolVT, N1: LHSNeg, N2: RHSNeg);
10557	SDValue Sub1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Quot,
10558	N2: DAG.getConstant(Val: `1`, DL: dl, VT));
10559	Quot = DAG.getSelect(DL: dl, VT,
10560	Cond: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: BoolVT, N1: RemNonZero, N2: QuotNeg),
10561	LHS: Sub1, RHS: Quot);
10562	} else
10563	Quot = DAG.getNode(Opcode: ISD::UDIV, DL: dl, VT,
10564	N1: LHS, N2: RHS);
10565
10566	return Quot;
10567	}
10568
10569	void TargetLowering::expandUADDSUBO(
10570	SDNode Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const* {
10571	SDLoc dl(Node);
10572	SDValue LHS = Node->getOperand(Num: `0`);
10573	SDValue RHS = Node->getOperand(Num: `1`);
10574	bool IsAdd = Node->getOpcode() == ISD::UADDO;
10575
10576	// If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
10577	unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
10578	if (isOperationLegalOrCustom(Op: OpcCarry, VT: Node->getValueType(ResNo: `0`))) {
10579	SDValue CarryIn = DAG.getConstant(Val: `0`, DL: dl, VT: Node->getValueType(ResNo: `1`));
10580	SDValue NodeCarry = DAG.getNode(Opcode: OpcCarry, DL: dl, VTList: Node->getVTList(),
10581	Ops: { LHS, RHS, CarryIn });
10582	Result = SDValue (NodeCarry.getNode(), `0`);
10583	Overflow = SDValue (NodeCarry.getNode(), `1`);
10584	return;
10585	}
10586
10587	Result = DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL: dl,
10588	VT: LHS.getValueType(), N1: LHS, N2: RHS);
10589
10590	EVT ResultType = Node->getValueType(ResNo: `1`);
10591	EVT SetCCType = getSetCCResultType(
10592	DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: `0`));
10593	SDValue SetCC;
10594	if (IsAdd && isOneConstant(V: RHS)) {
10595	// Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
10596	// the live range of X. We assume comparing with 0 is cheap.
10597	// The general case (X + C) < C is not necessarily beneficial. Although we
10598	// reduce the live range of X, we may introduce the materialization of
10599	// constant C.
10600	SetCC =
10601	DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Result,
10602	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: Node->getValueType(ResNo: `0`)), Cond: ISD::SETEQ);
10603	} else if (IsAdd && isAllOnesConstant(V: RHS)) {
10604	// Special case: uaddo X, -1 overflows if X != 0.
10605	SetCC =
10606	DAG.getSetCC(DL: dl, VT: SetCCType, LHS,
10607	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: Node->getValueType(ResNo: `0`)), Cond: ISD::SETNE);
10608	} else {
10609	ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
10610	SetCC = DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Result, RHS: LHS, Cond: CC);
10611	}
10612	Overflow = DAG.getBoolExtOrTrunc(Op: SetCC, SL: dl, VT: ResultType, OpVT: ResultType);
10613	}
10614
10615	void TargetLowering::expandSADDSUBO(
10616	SDNode Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const* {
10617	SDLoc dl(Node);
10618	SDValue LHS = Node->getOperand(Num: `0`);
10619	SDValue RHS = Node->getOperand(Num: `1`);
10620	bool IsAdd = Node->getOpcode() == ISD::SADDO;
10621
10622	Result = DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL: dl,
10623	VT: LHS.getValueType(), N1: LHS, N2: RHS);
10624
10625	EVT ResultType = Node->getValueType(ResNo: `1`);
10626	EVT OType = getSetCCResultType(
10627	DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: `0`));
10628
10629	// If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
10630	unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
10631	if (isOperationLegal(Op: OpcSat, VT: LHS.getValueType())) {
10632	SDValue Sat = DAG.getNode(Opcode: OpcSat, DL: dl, VT: LHS.getValueType(), N1: LHS, N2: RHS);
10633	SDValue SetCC = DAG.getSetCC(DL: dl, VT: OType, LHS: Result, RHS: Sat, Cond: ISD::SETNE);
10634	Overflow = DAG.getBoolExtOrTrunc(Op: SetCC, SL: dl, VT: ResultType, OpVT: ResultType);
10635	return;
10636	}
10637
10638	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: LHS.getValueType());
10639
10640	// For an addition, the result should be less than one of the operands (LHS)
10641	// if and only if the other operand (RHS) is negative, otherwise there will
10642	// be overflow.
10643	// For a subtraction, the result should be less than one of the operands
10644	// (LHS) if and only if the other operand (RHS) is (non-zero) positive,
10645	// otherwise there will be overflow.
10646	SDValue ResultLowerThanLHS = DAG.getSetCC(DL: dl, VT: OType, LHS: Result, RHS: LHS, Cond: ISD::SETLT);
10647	SDValue ConditionRHS =
10648	DAG.getSetCC(DL: dl, VT: OType, LHS: RHS, RHS: Zero, Cond: IsAdd ? ISD::SETLT : ISD::SETGT);
10649
10650	Overflow = DAG.getBoolExtOrTrunc(
10651	Op: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OType, N1: ConditionRHS, N2: ResultLowerThanLHS), SL: dl,
10652	VT: ResultType, OpVT: ResultType);
10653	}
10654
10655	bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
10656	SDValue &Overflow, SelectionDAG &DAG) const {
10657	SDLoc dl(Node);
10658	EVT VT = Node->getValueType(ResNo: `0`);
10659	EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10660	SDValue LHS = Node->getOperand(Num: `0`);
10661	SDValue RHS = Node->getOperand(Num: `1`);
10662	bool isSigned = Node->getOpcode() == ISD::SMULO;
10663
10664	// For power-of-two multiplications we can use a simpler shift expansion.
10665	if (ConstantSDNode *RHSC = isConstOrConstSplat(N: RHS)) {
10666	const APInt &C = RHSC->getAPIntValue();
10667	// mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
10668	if (C.isPowerOf2()) {
10669	// smulo(x, signed_min) is same as umulo(x, signed_min).
10670	bool UseArithShift = isSigned && !C.isMinSignedValue();
10671	SDValue ShiftAmt = DAG.getShiftAmountConstant(Val: C.logBase2(), VT, DL: dl);
10672	Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS, N2: ShiftAmt);
10673	Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT,
10674	LHS: DAG.getNode(Opcode: UseArithShift ? ISD::SRA : ISD::SRL,
10675	DL: dl, VT, N1: Result, N2: ShiftAmt),
10676	RHS: LHS, Cond: ISD::SETNE);
10677	return true;
10678	}
10679	}
10680
10681	EVT WideVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: VT.getScalarSizeInBits() `2`);
10682	if (VT.isVector())
10683	WideVT =
10684	EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT, EC: VT.getVectorElementCount());
10685
10686	SDValue BottomHalf;
10687	SDValue TopHalf;
10688	static const unsigned Ops[`2`][`3`] =
10689	{ { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
10690	{ ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
10691	if (isOperationLegalOrCustom(Op: Ops[isSigned][`0`], VT)) {
10692	BottomHalf = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
10693	TopHalf = DAG.getNode(Opcode: Ops[isSigned][`0`], DL: dl, VT, N1: LHS, N2: RHS);
10694	} else if (isOperationLegalOrCustom(Op: Ops[isSigned][`1`], VT)) {
10695	BottomHalf = DAG.getNode(Opcode: Ops[isSigned][`1`], DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: LHS,
10696	N2: RHS);
10697	TopHalf = BottomHalf.getValue(R: `1`);
10698	} else if (isTypeLegal(VT: WideVT)) {
10699	LHS = DAG.getNode(Opcode: Ops[isSigned][`2`], DL: dl, VT: WideVT, Operand: LHS);
10700	RHS = DAG.getNode(Opcode: Ops[isSigned][`2`], DL: dl, VT: WideVT, Operand: RHS);
10701	SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: LHS, N2: RHS);
10702	BottomHalf = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Mul);
10703	SDValue ShiftAmt =
10704	DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits(), VT: WideVT, DL: dl);
10705	TopHalf = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT,
10706	Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Mul, N2: ShiftAmt));
10707	} else {
10708	if (VT.isVector())
10709	return false;
10710
10711	forceExpandWideMUL(DAG, dl, Signed: isSigned, LHS, RHS, Lo&: BottomHalf, Hi&: TopHalf);
10712	}
10713
10714	Result = BottomHalf;
10715	if (isSigned) {
10716	SDValue ShiftAmt = DAG.getShiftAmountConstant(
10717	Val: VT.getScalarSizeInBits() - `1`, VT: BottomHalf.getValueType(), DL: dl);
10718	SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: BottomHalf, N2: ShiftAmt);
10719	Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: TopHalf, RHS: Sign, Cond: ISD::SETNE);
10720	} else {
10721	Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: TopHalf,
10722	RHS: DAG.getConstant(Val: `0`, DL: dl, VT), Cond: ISD::SETNE);
10723	}
10724
10725	// Truncate the result if SetCC returns a larger type than needed.
10726	EVT RType = Node->getValueType(ResNo: `1`);
10727	if (RType.bitsLT(VT: Overflow.getValueType()))
10728	Overflow = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: RType, Operand: Overflow);
10729
10730	assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
10731	"Unexpected result type for S/UMULO legalization");
10732	return true;
10733	}
10734
10735	SDValue TargetLowering::expandVecReduce(SDNode Node, SelectionDAG &DAG) const* {
10736	SDLoc dl(Node);
10737	unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Node->getOpcode());
10738	SDValue Op = Node->getOperand(Num: `0`);
10739	EVT VT = Op.getValueType();
10740
10741	if (VT.isScalableVector())
10742	report_fatal_error(
10743	reason: "Expanding reductions for scalable vectors is undefined.");
10744
10745	// Try to use a shuffle reduction for power of two vectors.
10746	if (VT.isPow2VectorType()) {
10747	while (VT.getVectorNumElements() > `1`) {
10748	EVT HalfVT = VT.getHalfNumVectorElementsVT(Context&: *DAG.getContext());
10749	if (!isOperationLegalOrCustom(Op: BaseOpcode, VT: HalfVT))
10750	break;
10751
10752	SDValue Lo, Hi;
10753	std::tie(args&: Lo, args&: Hi) = DAG.SplitVector(N: Op, DL: dl);
10754	Op = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: HalfVT, N1: Lo, N2: Hi, Flags: Node->getFlags());
10755	VT = HalfVT;
10756	}
10757	}
10758
10759	EVT EltVT = VT.getVectorElementType();
10760	unsigned NumElts = VT.getVectorNumElements();
10761
10762	SmallVector<SDValue, `8`> Ops;
10763	DAG.ExtractVectorElements(Op, Args&: Ops, Start: `0`, Count: NumElts);
10764
10765	SDValue Res = Ops [`0`];
10766	for (unsigned i = `1`; i < NumElts; i++)
10767	Res = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: EltVT, N1: Res, N2: Ops [i], Flags: Node->getFlags());
10768
10769	// Result type may be wider than element type.
10770	if (EltVT != Node->getValueType(ResNo: `0`))
10771	Res = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: Node->getValueType(ResNo: `0`), Operand: Res);
10772	return Res;
10773	}
10774
10775	SDValue TargetLowering::expandVecReduceSeq(SDNode Node, SelectionDAG &DAG) const* {
10776	SDLoc dl(Node);
10777	SDValue AccOp = Node->getOperand(Num: `0`);
10778	SDValue VecOp = Node->getOperand(Num: `1`);
10779	SDNodeFlags Flags = Node->getFlags();
10780
10781	EVT VT = VecOp.getValueType();
10782	EVT EltVT = VT.getVectorElementType();
10783
10784	if (VT.isScalableVector())
10785	report_fatal_error(
10786	reason: "Expanding reductions for scalable vectors is undefined.");
10787
10788	unsigned NumElts = VT.getVectorNumElements();
10789
10790	SmallVector<SDValue, `8`> Ops;
10791	DAG.ExtractVectorElements(Op: VecOp, Args&: Ops, Start: `0`, Count: NumElts);
10792
10793	unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Node->getOpcode());
10794
10795	SDValue Res = AccOp;
10796	for (unsigned i = `0`; i < NumElts; i++)
10797	Res = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: EltVT, N1: Res, N2: Ops [i], Flags);
10798
10799	return Res;
10800	}
10801
10802	bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
10803	SelectionDAG &DAG) const {
10804	EVT VT = Node->getValueType(ResNo: `0`);
10805	SDLoc dl(Node);
10806	bool isSigned = Node->getOpcode() == ISD::SREM;
10807	unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
10808	unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
10809	SDValue Dividend = Node->getOperand(Num: `0`);
10810	SDValue Divisor = Node->getOperand(Num: `1`);
10811	if (isOperationLegalOrCustom(Op: DivRemOpc, VT)) {
10812	SDVTList VTs = DAG.getVTList(VT1: VT, VT2: VT);
10813	Result = DAG.getNode(Opcode: DivRemOpc, DL: dl, VTList: VTs, N1: Dividend, N2: Divisor).getValue(R: `1`);
10814	return true;
10815	}
10816	if (isOperationLegalOrCustom(Op: DivOpc, VT)) {
10817	// X % Y -> X-X/YY*
10818	SDValue Divide = DAG.getNode(Opcode: DivOpc, DL: dl, VT, N1: Dividend, N2: Divisor);
10819	SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Divide, N2: Divisor);
10820	Result = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Dividend, N2: Mul);
10821	return true;
10822	}
10823	return false;
10824	}
10825
10826	SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
10827	SelectionDAG &DAG) const {
10828	bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
10829	SDLoc dl(SDValue (Node, `0`));
10830	SDValue Src = Node->getOperand(Num: `0`);
10831
10832	// DstVT is the result type, while SatVT is the size to which we saturate
10833	EVT SrcVT = Src.getValueType();
10834	EVT DstVT = Node->getValueType(ResNo: `0`);
10835
10836	EVT SatVT = cast<VTSDNode>(Val: Node->getOperand(Num: `1`))->getVT();
10837	unsigned SatWidth = SatVT.getScalarSizeInBits();
10838	unsigned DstWidth = DstVT.getScalarSizeInBits();
10839	assert(SatWidth <= DstWidth &&
10840	"Expected saturation width smaller than result width");
10841
10842	// Determine minimum and maximum integer values and their corresponding
10843	// floating-point values.
10844	APInt MinInt, MaxInt;
10845	if (IsSigned) {
10846	MinInt = APInt::getSignedMinValue(numBits: SatWidth).sext(width: DstWidth);
10847	MaxInt = APInt::getSignedMaxValue(numBits: SatWidth).sext(width: DstWidth);
10848	} else {
10849	MinInt = APInt::getMinValue(numBits: SatWidth).zext(width: DstWidth);
10850	MaxInt = APInt::getMaxValue(numBits: SatWidth).zext(width: DstWidth);
10851	}
10852
10853	// We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
10854	// libcall emission cannot handle this. Large result types will fail.
10855	if (SrcVT == MVT::f16 \|\| SrcVT == MVT::bf16) {
10856	Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
10857	SrcVT = Src.getValueType();
10858	}
10859
10860	APFloat MinFloat(DAG.EVTToAPFloatSemantics(VT: SrcVT));
10861	APFloat MaxFloat(DAG.EVTToAPFloatSemantics(VT: SrcVT));
10862
10863	APFloat::opStatus MinStatus =
10864	MinFloat.convertFromAPInt(Input: MinInt, IsSigned, RM: APFloat::rmTowardZero);
10865	APFloat::opStatus MaxStatus =
10866	MaxFloat.convertFromAPInt(Input: MaxInt, IsSigned, RM: APFloat::rmTowardZero);
10867	bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
10868	!(MaxStatus & APFloat::opStatus::opInexact);
10869
10870	SDValue MinFloatNode = DAG.getConstantFP(Val: MinFloat, DL: dl, VT: SrcVT);
10871	SDValue MaxFloatNode = DAG.getConstantFP(Val: MaxFloat, DL: dl, VT: SrcVT);
10872
10873	// If the integer bounds are exactly representable as floats and min/max are
10874	// legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
10875	// of comparisons and selects.
10876	bool MinMaxLegal = isOperationLegal(Op: ISD::FMINNUM, VT: SrcVT) &&
10877	isOperationLegal(Op: ISD::FMAXNUM, VT: SrcVT);
10878	if (AreExactFloatBounds && MinMaxLegal) {
10879	SDValue Clamped = Src;
10880
10881	// Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
10882	Clamped = DAG.getNode(Opcode: ISD::FMAXNUM, DL: dl, VT: SrcVT, N1: Clamped, N2: MinFloatNode);
10883	// Clamp by MaxFloat from above. NaN cannot occur.
10884	Clamped = DAG.getNode(Opcode: ISD::FMINNUM, DL: dl, VT: SrcVT, N1: Clamped, N2: MaxFloatNode);
10885	// Convert clamped value to integer.
10886	SDValue FpToInt = DAG.getNode(Opcode: IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
10887	DL: dl, VT: DstVT, Operand: Clamped);
10888
10889	// In the unsigned case we're done, because we mapped NaN to MinFloat,
10890	// which will cast to zero.
10891	if (!IsSigned)
10892	return FpToInt;
10893
10894	// Otherwise, select 0 if Src is NaN.
10895	SDValue ZeroInt = DAG.getConstant(Val: `0`, DL: dl, VT: DstVT);
10896	EVT SetCCVT =
10897	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
10898	SDValue IsNan = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Src, Cond: ISD::CondCode::SETUO);
10899	return DAG.getSelect(DL: dl, VT: DstVT, Cond: IsNan, LHS: ZeroInt, RHS: FpToInt);
10900	}
10901
10902	SDValue MinIntNode = DAG.getConstant(Val: MinInt, DL: dl, VT: DstVT);
10903	SDValue MaxIntNode = DAG.getConstant(Val: MaxInt, DL: dl, VT: DstVT);
10904
10905	// Result of direct conversion. The assumption here is that the operation is
10906	// non-trapping and it's fine to apply it to an out-of-range value if we
10907	// select it away later.
10908	SDValue FpToInt =
10909	DAG.getNode(Opcode: IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, DL: dl, VT: DstVT, Operand: Src);
10910
10911	SDValue Select = FpToInt;
10912
10913	EVT SetCCVT =
10914	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
10915
10916	// If Src ULT MinFloat, select MinInt. In particular, this also selects
10917	// MinInt if Src is NaN.
10918	SDValue ULT = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: MinFloatNode, Cond: ISD::SETULT);
10919	Select = DAG.getSelect(DL: dl, VT: DstVT, Cond: ULT, LHS: MinIntNode, RHS: Select);
10920	// If Src OGT MaxFloat, select MaxInt.
10921	SDValue OGT = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: MaxFloatNode, Cond: ISD::SETOGT);
10922	Select = DAG.getSelect(DL: dl, VT: DstVT, Cond: OGT, LHS: MaxIntNode, RHS: Select);
10923
10924	// In the unsigned case we are done, because we mapped NaN to MinInt, which
10925	// is already zero.
10926	if (!IsSigned)
10927	return Select;
10928
10929	// Otherwise, select 0 if Src is NaN.
10930	SDValue ZeroInt = DAG.getConstant(Val: `0`, DL: dl, VT: DstVT);
10931	SDValue IsNan = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Src, Cond: ISD::CondCode::SETUO);
10932	return DAG.getSelect(DL: dl, VT: DstVT, Cond: IsNan, LHS: ZeroInt, RHS: Select);
10933	}
10934
10935	SDValue TargetLowering::expandRoundInexactToOdd(EVT ResultVT, SDValue Op,
10936	const SDLoc &dl,
10937	SelectionDAG &DAG) const {
10938	EVT OperandVT = Op.getValueType();
10939	if (OperandVT.getScalarType() == ResultVT.getScalarType())
10940	return Op;
10941	EVT ResultIntVT = ResultVT.changeTypeToInteger();
10942	// We are rounding binary64/binary128 -> binary32 -> bfloat16. This
10943	// can induce double-rounding which may alter the results. We can
10944	// correct for this using a trick explained in: Boldo, Sylvie, and
10945	// Guillaume Melquiond. "When double rounding is odd." 17th IMACS
10946	// World Congress. 2005.
10947	unsigned BitSize = OperandVT.getScalarSizeInBits();
10948	EVT WideIntVT = OperandVT.changeTypeToInteger();
10949	SDValue OpAsInt = DAG.getBitcast(VT: WideIntVT, V: Op);
10950	SDValue SignBit =
10951	DAG.getNode(Opcode: ISD::AND, DL: dl, VT: WideIntVT, N1: OpAsInt,
10952	N2: DAG.getConstant(Val: APInt::getSignMask(BitWidth: BitSize), DL: dl, VT: WideIntVT));
10953	SDValue AbsWide;
10954	if (isOperationLegalOrCustom(Op: ISD::FABS, VT: OperandVT)) {
10955	AbsWide = DAG.getNode(Opcode: ISD::FABS, DL: dl, VT: OperandVT, Operand: Op);
10956	} else {
10957	SDValue ClearedSign = DAG.getNode(
10958	Opcode: ISD::AND, DL: dl, VT: WideIntVT, N1: OpAsInt,
10959	N2: DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: BitSize), DL: dl, VT: WideIntVT));
10960	AbsWide = DAG.getBitcast(VT: OperandVT, V: ClearedSign);
10961	}
10962	SDValue AbsNarrow = DAG.getFPExtendOrRound(Op: AbsWide, DL: dl, VT: ResultVT);
10963	SDValue AbsNarrowAsWide = DAG.getFPExtendOrRound(Op: AbsNarrow, DL: dl, VT: OperandVT);
10964
10965	// We can keep the narrow value as-is if narrowing was exact (no
10966	// rounding error), the wide value was NaN (the narrow value is also
10967	// NaN and should be preserved) or if we rounded to the odd value.
10968	SDValue NarrowBits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ResultIntVT, Operand: AbsNarrow);
10969	SDValue One = DAG.getConstant(Val: `1`, DL: dl, VT: ResultIntVT);
10970	SDValue NegativeOne = DAG.getAllOnesConstant(DL: dl, VT: ResultIntVT);
10971	SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResultIntVT, N1: NarrowBits, N2: One);
10972	EVT ResultIntVTCCVT = getSetCCResultType(
10973	DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: And.getValueType());
10974	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: ResultIntVT);
10975	// The result is already odd so we don't need to do anything.
10976	SDValue AlreadyOdd = DAG.getSetCC(DL: dl, VT: ResultIntVTCCVT, LHS: And, RHS: Zero, Cond: ISD::SETNE);
10977
10978	EVT WideSetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(),
10979	VT: AbsWide.getValueType());
10980	// We keep results which are exact, odd or NaN.
10981	SDValue KeepNarrow =
10982	DAG.getSetCC(DL: dl, VT: WideSetCCVT, LHS: AbsWide, RHS: AbsNarrowAsWide, Cond: ISD::SETUEQ);
10983	KeepNarrow = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: WideSetCCVT, N1: KeepNarrow, N2: AlreadyOdd);
10984	// We morally performed a round-down if AbsNarrow is smaller than
10985	// AbsWide.
10986	SDValue NarrowIsRd =
10987	DAG.getSetCC(DL: dl, VT: WideSetCCVT, LHS: AbsWide, RHS: AbsNarrowAsWide, Cond: ISD::SETOGT);
10988	// If the narrow value is odd or exact, pick it.
10989	// Otherwise, narrow is even and corresponds to either the rounded-up
10990	// or rounded-down value. If narrow is the rounded-down value, we want
10991	// the rounded-up value as it will be odd.
10992	SDValue Adjust = DAG.getSelect(DL: dl, VT: ResultIntVT, Cond: NarrowIsRd, LHS: One, RHS: NegativeOne);
10993	SDValue Adjusted = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResultIntVT, N1: NarrowBits, N2: Adjust);
10994	Op = DAG.getSelect(DL: dl, VT: ResultIntVT, Cond: KeepNarrow, LHS: NarrowBits, RHS: Adjusted);
10995	int ShiftAmount = BitSize - ResultVT.getScalarSizeInBits();
10996	SDValue ShiftCnst = DAG.getShiftAmountConstant(Val: ShiftAmount, VT: WideIntVT, DL: dl);
10997	SignBit = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideIntVT, N1: SignBit, N2: ShiftCnst);
10998	SignBit = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: ResultIntVT, Operand: SignBit);
10999	Op = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ResultIntVT, N1: Op, N2: SignBit);
11000	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ResultVT, Operand: Op);
11001	}
11002
11003	SDValue TargetLowering::expandFP_ROUND(SDNode Node, SelectionDAG &DAG) const* {
11004	assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11005	SDValue Op = Node->getOperand(Num: `0`);
11006	EVT VT = Node->getValueType(ResNo: `0`);
11007	SDLoc dl(Node);
11008	if (VT.getScalarType() == MVT::bf16) {
11009	if (Node->getConstantOperandVal(Num: `1`) == `1`) {
11010	return DAG.getNode(Opcode: ISD::FP_TO_BF16, DL: dl, VT, Operand: Node->getOperand(Num: `0`));
11011	}
11012	EVT OperandVT = Op.getValueType();
11013	SDValue IsNaN = DAG.getSetCC(
11014	DL: dl,
11015	VT: getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: OperandVT),
11016	LHS: Op, RHS: Op, Cond: ISD::SETUO);
11017
11018	// We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11019	// can induce double-rounding which may alter the results. We can
11020	// correct for this using a trick explained in: Boldo, Sylvie, and
11021	// Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11022	// World Congress. 2005.
11023	EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
11024	EVT I32 = F32.changeTypeToInteger();
11025	Op = expandRoundInexactToOdd(ResultVT: F32, Op, dl, DAG);
11026	Op = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: I32, Operand: Op);
11027
11028	// Conversions should set NaN's quiet bit. This also prevents NaNs from
11029	// turning into infinities.
11030	SDValue NaN =
11031	DAG.getNode(Opcode: ISD::OR, DL: dl, VT: I32, N1: Op, N2: DAG.getConstant(Val: `0x400000`, DL: dl, VT: I32));
11032
11033	// Factor in the contribution of the low 16 bits.
11034	SDValue One = DAG.getConstant(Val: `1`, DL: dl, VT: I32);
11035	SDValue Lsb = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: I32, N1: Op,
11036	N2: DAG.getShiftAmountConstant(Val: `16`, VT: I32, DL: dl));
11037	Lsb = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: I32, N1: Lsb, N2: One);
11038	SDValue RoundingBias =
11039	DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: I32, N1: DAG.getConstant(Val: `0x7fff`, DL: dl, VT: I32), N2: Lsb);
11040	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: I32, N1: Op, N2: RoundingBias);
11041
11042	// Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11043	// 0x80000000.
11044	Op = DAG.getSelect(DL: dl, VT: I32, Cond: IsNaN, LHS: NaN, RHS: Add);
11045
11046	// Now that we have rounded, shift the bits into position.
11047	Op = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: I32, N1: Op,
11048	N2: DAG.getShiftAmountConstant(Val: `16`, VT: I32, DL: dl));
11049	Op = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: I32, Operand: Op);
11050	EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11051	Op = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: I16, Operand: Op);
11052	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: Op);
11053	}
11054	return SDValue ();
11055	}
11056
11057	SDValue TargetLowering::expandVectorSplice(SDNode *Node,
11058	SelectionDAG &DAG) const {
11059	assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11060	assert(Node->getValueType(`0`).isScalableVector() &&
11061	"Fixed length vector types expected to use SHUFFLE_VECTOR!");
11062
11063	EVT VT = Node->getValueType(ResNo: `0`);
11064	SDValue V1 = Node->getOperand(Num: `0`);
11065	SDValue V2 = Node->getOperand(Num: `1`);
11066	int64_t Imm = cast<ConstantSDNode>(Val: Node->getOperand(Num: `2`))->getSExtValue();
11067	SDLoc DL(Node);
11068
11069	// Expand through memory thusly:
11070	// Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11071	// Store V1, Ptr
11072	// Store V2, Ptr + sizeof(V1)
11073	// If (Imm < 0)
11074	// TrailingElts = -Imm
11075	// Ptr = Ptr + sizeof(V1) - (TrailingElts sizeof(VT.Elt))*
11076	// else
11077	// Ptr = Ptr + (Imm sizeof(VT.Elt))*
11078	// Res = Load Ptr
11079
11080	Align Alignment = DAG.getReducedAlign(VT, /UseABI=/false);
11081
11082	EVT MemVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: VT.getVectorElementType(),
11083	EC: VT.getVectorElementCount() * `2`);
11084	SDValue StackPtr = DAG.CreateStackTemporary(Bytes: MemVT.getStoreSize(), Alignment);
11085	EVT PtrVT = StackPtr.getValueType();
11086	auto &MF = DAG.getMachineFunction();
11087	auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
11088	auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FI: FrameIndex);
11089
11090	// Store the lo part of CONCAT_VECTORS(V1, V2)
11091	SDValue StoreV1 = DAG.getStore(Chain: DAG.getEntryNode(), dl: DL, Val: V1, Ptr: StackPtr, PtrInfo);
11092	// Store the hi part of CONCAT_VECTORS(V1, V2)
11093	SDValue OffsetToV2 = DAG.getVScale(
11094	DL, VT: PtrVT,
11095	MulImm: APInt (PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue()));
11096	SDValue StackPtr2 = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, N2: OffsetToV2);
11097	SDValue StoreV2 = DAG.getStore(Chain: StoreV1, dl: DL, Val: V2, Ptr: StackPtr2, PtrInfo);
11098
11099	if (Imm >= `0`) {
11100	// Load back the required element. getVectorElementPointer takes care of
11101	// clamping the index if it's out-of-bounds.
11102	StackPtr = getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT: VT, Index: Node->getOperand(Num: `2`));
11103	// Load the spliced result
11104	return DAG.getLoad(VT, dl: DL, Chain: StoreV2, Ptr: StackPtr,
11105	PtrInfo: MachinePointerInfo::getUnknownStack(MF));
11106	}
11107
11108	uint64_t TrailingElts = -Imm;
11109
11110	// NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11111	TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11112	SDValue TrailingBytes =
11113	DAG.getConstant(Val: TrailingElts * EltByteSize, DL, VT: PtrVT);
11114
11115	if (TrailingElts > VT.getVectorMinNumElements()) {
11116	SDValue VLBytes =
11117	DAG.getVScale(DL, VT: PtrVT,
11118	MulImm: APInt (PtrVT.getFixedSizeInBits(),
11119	VT.getStoreSize().getKnownMinValue()));
11120	TrailingBytes = DAG.getNode(Opcode: ISD::UMIN, DL, VT: PtrVT, N1: TrailingBytes, N2: VLBytes);
11121	}
11122
11123	// Calculate the start address of the spliced result.
11124	StackPtr2 = DAG.getNode(Opcode: ISD::SUB, DL, VT: PtrVT, N1: StackPtr2, N2: TrailingBytes);
11125
11126	// Load the spliced result
11127	return DAG.getLoad(VT, dl: DL, Chain: StoreV2, Ptr: StackPtr2,
11128	PtrInfo: MachinePointerInfo::getUnknownStack(MF));
11129	}
11130
11131	bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
11132	SDValue &LHS, SDValue &RHS,
11133	SDValue &CC, SDValue Mask,
11134	SDValue EVL, bool &NeedInvert,
11135	const SDLoc &dl, SDValue &Chain,
11136	bool IsSignaling) const {
11137	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11138	MVT OpVT = LHS.getSimpleValueType();
11139	ISD::CondCode CCCode = cast<CondCodeSDNode>(Val&: CC)->get();
11140	NeedInvert = false;
11141	assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
11142	bool IsNonVP = !EVL;
11143	switch (TLI.getCondCodeAction(CC: CCCode, VT: OpVT)) {
11144	default:
11145	llvm_unreachable("Unknown condition code action!");
11146	case TargetLowering::Legal:
11147	// Nothing to do.
11148	break;
11149	case TargetLowering::Expand: {
11150	ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(Operation: CCCode);
11151	if (TLI.isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
11152	std::swap(a&: LHS, b&: RHS);
11153	CC = DAG.getCondCode(Cond: InvCC);
11154	return true;
11155	}
11156	// Swapping operands didn't work. Try inverting the condition.
11157	bool NeedSwap = false;
11158	InvCC = getSetCCInverse(Operation: CCCode, Type: OpVT);
11159	if (!TLI.isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
11160	// If inverting the condition is not enough, try swapping operands
11161	// on top of it.
11162	InvCC = ISD::getSetCCSwappedOperands(Operation: InvCC);
11163	NeedSwap = true;
11164	}
11165	if (TLI.isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
11166	CC = DAG.getCondCode(Cond: InvCC);
11167	NeedInvert = true;
11168	if (NeedSwap)
11169	std::swap(a&: LHS, b&: RHS);
11170	return true;
11171	}
11172
11173	ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
11174	unsigned Opc = `0`;
11175	switch (CCCode) {
11176	default:
11177	llvm_unreachable("Don't know how to expand this condition!");
11178	case ISD::SETUO:
11179	if (TLI.isCondCodeLegal(CC: ISD::SETUNE, VT: OpVT)) {
11180	CC1 = ISD::SETUNE;
11181	CC2 = ISD::SETUNE;
11182	Opc = ISD::OR;
11183	break;
11184	}
11185	assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11186	"If SETUE is expanded, SETOEQ or SETUNE must be legal!");
11187	NeedInvert = true;
11188	[[fallthrough]];
11189	case ISD::SETO:
11190	assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11191	"If SETO is expanded, SETOEQ must be legal!");
11192	CC1 = ISD::SETOEQ;
11193	CC2 = ISD::SETOEQ;
11194	Opc = ISD::AND;
11195	break;
11196	case ISD::SETONE:
11197	case ISD::SETUEQ:
11198	// If the SETUO or SETO CC isn't legal, we might be able to use
11199	// SETOGT \|\| SETOLT, inverting the result for SETUEQ. We only need one
11200	// of SETOGT/SETOLT to be legal, the other can be emulated by swapping
11201	// the operands.
11202	CC2 = ((unsigned)CCCode & `0x8U`) ? ISD::SETUO : ISD::SETO;
11203	if (!TLI.isCondCodeLegal(CC: CC2, VT: OpVT) &&
11204	(TLI.isCondCodeLegal(CC: ISD::SETOGT, VT: OpVT) \|\|
11205	TLI.isCondCodeLegal(CC: ISD::SETOLT, VT: OpVT))) {
11206	CC1 = ISD::SETOGT;
11207	CC2 = ISD::SETOLT;
11208	Opc = ISD::OR;
11209	NeedInvert = ((unsigned)CCCode & `0x8U`);
11210	break;
11211	}
11212	[[fallthrough]];
11213	case ISD::SETOEQ:
11214	case ISD::SETOGT:
11215	case ISD::SETOGE:
11216	case ISD::SETOLT:
11217	case ISD::SETOLE:
11218	case ISD::SETUNE:
11219	case ISD::SETUGT:
11220	case ISD::SETUGE:
11221	case ISD::SETULT:
11222	case ISD::SETULE:
11223	// If we are floating point, assign and break, otherwise fall through.
11224	if (!OpVT.isInteger()) {
11225	// We can use the 4th bit to tell if we are the unordered
11226	// or ordered version of the opcode.
11227	CC2 = ((unsigned)CCCode & `0x8U`) ? ISD::SETUO : ISD::SETO;
11228	Opc = ((unsigned)CCCode & `0x8U`) ? ISD::OR : ISD::AND;
11229	CC1 = (ISD::CondCode)(((int)CCCode & `0x7`) \| `0x10`);
11230	break;
11231	}
11232	// Fallthrough if we are unsigned integer.
11233	[[fallthrough]];
11234	case ISD::SETLE:
11235	case ISD::SETGT:
11236	case ISD::SETGE:
11237	case ISD::SETLT:
11238	case ISD::SETNE:
11239	case ISD::SETEQ:
11240	// If all combinations of inverting the condition and swapping operands
11241	// didn't work then we have no means to expand the condition.
11242	llvm_unreachable("Don't know how to expand this condition!");
11243	}
11244
11245	SDValue SetCC1, SetCC2;
11246	if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
11247	// If we aren't the ordered or unorder operation,
11248	// then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
11249	if (IsNonVP) {
11250	SetCC1 = DAG.getSetCC(DL: dl, VT, LHS, RHS, Cond: CC1, Chain, IsSignaling);
11251	SetCC2 = DAG.getSetCC(DL: dl, VT, LHS, RHS, Cond: CC2, Chain, IsSignaling);
11252	} else {
11253	SetCC1 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS, Cond: CC1, Mask, EVL);
11254	SetCC2 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS, Cond: CC2, Mask, EVL);
11255	}
11256	} else {
11257	// Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
11258	if (IsNonVP) {
11259	SetCC1 = DAG.getSetCC(DL: dl, VT, LHS, RHS: LHS, Cond: CC1, Chain, IsSignaling);
11260	SetCC2 = DAG.getSetCC(DL: dl, VT, LHS: RHS, RHS, Cond: CC2, Chain, IsSignaling);
11261	} else {
11262	SetCC1 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS: LHS, Cond: CC1, Mask, EVL);
11263	SetCC2 = DAG.getSetCCVP(DL: dl, VT, LHS: RHS, RHS, Cond: CC2, Mask, EVL);
11264	}
11265	}
11266	if (Chain)
11267	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(`1`),
11268	SetCC2.getValue(`1`));
11269	if (IsNonVP)
11270	LHS = DAG.getNode(Opcode: Opc, DL: dl, VT, N1: SetCC1, N2: SetCC2);
11271	else {
11272	// Transform the binary opcode to the VP equivalent.
11273	assert((Opc == ISD::OR \|\| Opc == ISD::AND) && "Unexpected opcode");
11274	Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
11275	LHS = DAG.getNode(Opcode: Opc, DL: dl, VT, N1: SetCC1, N2: SetCC2, N3: Mask, N4: EVL);
11276	}
11277	RHS = SDValue ();
11278	CC = SDValue ();
11279	return true;
11280	}
11281	}
11282	return false;
11283	}
11284

source code of llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp