RISCVISelLowering.cpp source code [llvm/lib/Target/RISCV/RISCVISelLowering.cpp]

1	//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10	// selection DAG.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "RISCVISelLowering.h"
15	#include "MCTargetDesc/RISCVMatInt.h"
16	#include "RISCV.h"
17	#include "RISCVMachineFunctionInfo.h"
18	#include "RISCVRegisterInfo.h"
19	#include "RISCVSubtarget.h"
20	#include "RISCVTargetMachine.h"
21	#include "llvm/ADT/SmallSet.h"
22	#include "llvm/ADT/Statistic.h"
23	#include "llvm/Analysis/MemoryLocation.h"
24	#include "llvm/Analysis/VectorUtils.h"
25	#include "llvm/CodeGen/Analysis.h"
26	#include "llvm/CodeGen/MachineFrameInfo.h"
27	#include "llvm/CodeGen/MachineFunction.h"
28	#include "llvm/CodeGen/MachineInstrBuilder.h"
29	#include "llvm/CodeGen/MachineJumpTableInfo.h"
30	#include "llvm/CodeGen/MachineRegisterInfo.h"
31	#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
32	#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
33	#include "llvm/CodeGen/ValueTypes.h"
34	#include "llvm/IR/DiagnosticInfo.h"
35	#include "llvm/IR/DiagnosticPrinter.h"
36	#include "llvm/IR/IRBuilder.h"
37	#include "llvm/IR/Instructions.h"
38	#include "llvm/IR/IntrinsicsRISCV.h"
39	#include "llvm/IR/PatternMatch.h"
40	#include "llvm/Support/CommandLine.h"
41	#include "llvm/Support/Debug.h"
42	#include "llvm/Support/ErrorHandling.h"
43	#include "llvm/Support/InstructionCost.h"
44	#include "llvm/Support/KnownBits.h"
45	#include "llvm/Support/MathExtras.h"
46	#include "llvm/Support/raw_ostream.h"
47	#include <optional>
48
49	using namespace llvm;
50
51	#define DEBUG_TYPE "riscv-lower"
52
53	STATISTIC(NumTailCalls, "Number of tail calls");
54
55	static cl::opt<unsigned> ExtensionMaxWebSize(
56	DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
57	cl::desc ("Give the maximum size (in number of nodes) of the web of "
58	"instructions that we will consider for VW expansion"),
59	cl::init(Val: `18`));
60
61	static cl::opt<bool>
62	AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
63	cl::desc ("Allow the formation of VW_W operations (e.g., "
64	"VWADD_W) with splat constants"),
65	cl::init(Val: false));
66
67	static cl::opt<unsigned> NumRepeatedDivisors(
68	DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
69	cl::desc ("Set the minimum number of repetitions of a divisor to allow "
70	"transformation to multiplications by the reciprocal"),
71	cl::init(Val: `2`));
72
73	static cl::opt<int>
74	FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden,
75	cl::desc ("Give the maximum number of instructions that we will "
76	"use for creating a floating-point immediate value"),
77	cl::init(Val: `2`));
78
79	static cl::opt<bool>
80	RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,
81	cl::desc ("Make i32 a legal type for SelectionDAG on RV64."));
82
83	RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
84	const RISCVSubtarget &STI)
85	: TargetLowering (TM), Subtarget(STI) {
86
87	RISCVABI::ABI ABI = Subtarget.getTargetABI();
88	assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
89
90	if ((ABI == RISCVABI::ABI_ILP32F \|\| ABI == RISCVABI::ABI_LP64F) &&
91	!Subtarget.hasStdExtF()) {
92	errs() << "Hard-float 'f' ABI can't be used for a target that "
93	"doesn't support the F instruction set extension (ignoring "
94	"target-abi)\n";
95	ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
96	} else if ((ABI == RISCVABI::ABI_ILP32D \|\| ABI == RISCVABI::ABI_LP64D) &&
97	!Subtarget.hasStdExtD()) {
98	errs() << "Hard-float 'd' ABI can't be used for a target that "
99	"doesn't support the D instruction set extension (ignoring "
100	"target-abi)\n";
101	ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
102	}
103
104	switch (ABI) {
105	default:
106	report_fatal_error(reason: "Don't know how to lower this ABI");
107	case RISCVABI::ABI_ILP32:
108	case RISCVABI::ABI_ILP32E:
109	case RISCVABI::ABI_LP64E:
110	case RISCVABI::ABI_ILP32F:
111	case RISCVABI::ABI_ILP32D:
112	case RISCVABI::ABI_LP64:
113	case RISCVABI::ABI_LP64F:
114	case RISCVABI::ABI_LP64D:
115	break;
116	}
117
118	MVT XLenVT = Subtarget.getXLenVT();
119
120	// Set up the register classes.
121	addRegisterClass(VT: XLenVT, RC: &RISCV::GPRRegClass);
122	if (Subtarget.is64Bit() && RV64LegalI32)
123	addRegisterClass(MVT::VT: i32, RC: &RISCV::GPRRegClass);
124
125	if (Subtarget.hasStdExtZfhmin())
126	addRegisterClass(MVT::VT: f16, RC: &RISCV::FPR16RegClass);
127	if (Subtarget.hasStdExtZfbfmin())
128	addRegisterClass(MVT::VT: bf16, RC: &RISCV::FPR16RegClass);
129	if (Subtarget.hasStdExtF())
130	addRegisterClass(MVT::VT: f32, RC: &RISCV::FPR32RegClass);
131	if (Subtarget.hasStdExtD())
132	addRegisterClass(MVT::VT: f64, RC: &RISCV::FPR64RegClass);
133	if (Subtarget.hasStdExtZhinxmin())
134	addRegisterClass(MVT::VT: f16, RC: &RISCV::GPRF16RegClass);
135	if (Subtarget.hasStdExtZfinx())
136	addRegisterClass(MVT::VT: f32, RC: &RISCV::GPRF32RegClass);
137	if (Subtarget.hasStdExtZdinx()) {
138	if (Subtarget.is64Bit())
139	addRegisterClass(MVT::VT: f64, RC: &RISCV::GPRRegClass);
140	else
141	addRegisterClass(MVT::VT: f64, RC: &RISCV::GPRPairRegClass);
142	}
143
144	static const MVT::SimpleValueType BoolVecVTs[] = {
145	MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
146	MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
147	static const MVT::SimpleValueType IntVecVTs[] = {
148	MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
149	MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
150	MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
151	MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
152	MVT::nxv4i64, MVT::nxv8i64};
153	static const MVT::SimpleValueType F16VecVTs[] = {
154	MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
155	MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
156	static const MVT::SimpleValueType BF16VecVTs[] = {
157	MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
158	MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
159	static const MVT::SimpleValueType F32VecVTs[] = {
160	MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
161	static const MVT::SimpleValueType F64VecVTs[] = {
162	MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
163
164	if (Subtarget.hasVInstructions()) {
165	auto addRegClassForRVV = [this](MVT VT) {
166	// Disable the smallest fractional LMUL types if ELEN is less than
167	// RVVBitsPerBlock.
168	unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
169	if (VT.getVectorMinNumElements() < MinElts)
170	return;
171
172	unsigned Size = VT.getSizeInBits().getKnownMinValue();
173	const TargetRegisterClass *RC;
174	if (Size <= RISCV::RVVBitsPerBlock)
175	RC = &RISCV::VRRegClass;
176	else if (Size == `2` * RISCV::RVVBitsPerBlock)
177	RC = &RISCV::VRM2RegClass;
178	else if (Size == `4` * RISCV::RVVBitsPerBlock)
179	RC = &RISCV::VRM4RegClass;
180	else if (Size == `8` * RISCV::RVVBitsPerBlock)
181	RC = &RISCV::VRM8RegClass;
182	else
183	llvm_unreachable("Unexpected size");
184
185	addRegisterClass(VT, RC);
186	};
187
188	for (MVT VT : BoolVecVTs)
189	addRegClassForRVV(VT);
190	for (MVT VT : IntVecVTs) {
191	if (VT.getVectorElementType() == MVT::i64 &&
192	!Subtarget.hasVInstructionsI64())
193	continue;
194	addRegClassForRVV(VT);
195	}
196
197	if (Subtarget.hasVInstructionsF16Minimal())
198	for (MVT VT : F16VecVTs)
199	addRegClassForRVV(VT);
200
201	if (Subtarget.hasVInstructionsBF16())
202	for (MVT VT : BF16VecVTs)
203	addRegClassForRVV(VT);
204
205	if (Subtarget.hasVInstructionsF32())
206	for (MVT VT : F32VecVTs)
207	addRegClassForRVV(VT);
208
209	if (Subtarget.hasVInstructionsF64())
210	for (MVT VT : F64VecVTs)
211	addRegClassForRVV(VT);
212
213	if (Subtarget.useRVVForFixedLengthVectors()) {
214	auto addRegClassForFixedVectors = [this](MVT VT) {
215	MVT ContainerVT = getContainerForFixedLengthVector(VT);
216	unsigned RCID = getRegClassIDForVecVT(VT: ContainerVT);
217	const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
218	addRegisterClass(VT, RC: TRI.getRegClass(RCID));
219	};
220	for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
221	if (useRVVForFixedLengthVectorVT(VT))
222	addRegClassForFixedVectors(VT);
223
224	for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
225	if (useRVVForFixedLengthVectorVT(VT))
226	addRegClassForFixedVectors(VT);
227	}
228	}
229
230	// Compute derived properties from the register classes.
231	computeRegisterProperties(STI.getRegisterInfo());
232
233	setStackPointerRegisterToSaveRestore(RISCV::X2);
234
235	setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT,
236	MVT::i1, Promote);
237	// DAGCombiner can call isLoadExtLegal for types that aren't legal.
238	setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32,
239	MVT::i1, Promote);
240
241	// TODO: add all necessary setOperationAction calls.
242	setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: XLenVT, Action: Expand);
243
244	setOperationAction(ISD::BR_JT, MVT::Other, Expand);
245	setOperationAction(Op: ISD::BR_CC, VT: XLenVT, Action: Expand);
246	if (RV64LegalI32 && Subtarget.is64Bit())
247	setOperationAction(ISD::BR_CC, MVT::i32, Expand);
248	setOperationAction(ISD::BRCOND, MVT::Other, Custom);
249	setOperationAction(Op: ISD::SELECT_CC, VT: XLenVT, Action: Expand);
250	if (RV64LegalI32 && Subtarget.is64Bit())
251	setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
252
253	setCondCodeAction(CCs: ISD::SETLE, VT: XLenVT, Action: Expand);
254	setCondCodeAction(CCs: ISD::SETGT, VT: XLenVT, Action: Custom);
255	setCondCodeAction(CCs: ISD::SETGE, VT: XLenVT, Action: Expand);
256	setCondCodeAction(CCs: ISD::SETULE, VT: XLenVT, Action: Expand);
257	setCondCodeAction(CCs: ISD::SETUGT, VT: XLenVT, Action: Custom);
258	setCondCodeAction(CCs: ISD::SETUGE, VT: XLenVT, Action: Expand);
259
260	if (RV64LegalI32 && Subtarget.is64Bit())
261	setOperationAction(ISD::SETCC, MVT::i32, Promote);
262
263	setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
264
265	setOperationAction(ISD::VASTART, MVT::Other, Custom);
266	setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
267	if (RV64LegalI32 && Subtarget.is64Bit())
268	setOperationAction(ISD::VAARG, MVT::i32, Promote);
269
270	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
271
272	setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
273
274	if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
275	setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
276
277	if (Subtarget.is64Bit()) {
278	setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
279
280	if (!RV64LegalI32) {
281	setOperationAction(ISD::LOAD, MVT::i32, Custom);
282	setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
283	MVT::i32, Custom);
284	setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
285	MVT::i32, Custom);
286	if (!Subtarget.hasStdExtZbb())
287	setOperationAction({ISD::SADDSAT, ISD::SSUBSAT}, MVT::i32, Custom);
288	} else {
289	setOperationAction(ISD::SSUBO, MVT::i32, Custom);
290	if (Subtarget.hasStdExtZbb()) {
291	setOperationAction({ISD::SADDSAT, ISD::SSUBSAT}, MVT::i32, Custom);
292	setOperationAction({ISD::UADDSAT, ISD::USUBSAT}, MVT::i32, Custom);
293	}
294	}
295	setOperationAction(ISD::SADDO, MVT::i32, Custom);
296	} else {
297	setLibcallName(
298	Calls: {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
299	Name: nullptr);
300	setLibcallName(Call: RTLIB::MULO_I64, Name: nullptr);
301	}
302
303	if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
304	setOperationAction(Ops: {ISD::MUL, ISD::MULHS, ISD::MULHU}, VT: XLenVT, Action: Expand);
305	if (RV64LegalI32 && Subtarget.is64Bit())
306	setOperationAction(ISD::MUL, MVT::i32, Promote);
307	} else if (Subtarget.is64Bit()) {
308	setOperationAction(ISD::MUL, MVT::i128, Custom);
309	if (!RV64LegalI32)
310	setOperationAction(ISD::MUL, MVT::i32, Custom);
311	else
312	setOperationAction(ISD::SMULO, MVT::i32, Custom);
313	} else {
314	setOperationAction(ISD::MUL, MVT::i64, Custom);
315	}
316
317	if (!Subtarget.hasStdExtM()) {
318	setOperationAction(Ops: {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM},
319	VT: XLenVT, Action: Expand);
320	if (RV64LegalI32 && Subtarget.is64Bit())
321	setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32,
322	Promote);
323	} else if (Subtarget.is64Bit()) {
324	if (!RV64LegalI32)
325	setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
326	{MVT::i8, MVT::i16, MVT::i32}, Custom);
327	}
328
329	if (RV64LegalI32 && Subtarget.is64Bit()) {
330	setOperationAction({ISD::MULHS, ISD::MULHU}, MVT::i32, Expand);
331	setOperationAction(
332	{ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32,
333	Expand);
334	}
335
336	setOperationAction(
337	Ops: {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT: XLenVT,
338	Action: Expand);
339
340	setOperationAction(Ops: {ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, VT: XLenVT,
341	Action: Custom);
342
343	if (Subtarget.hasStdExtZbb() \|\| Subtarget.hasStdExtZbkb()) {
344	if (!RV64LegalI32 && Subtarget.is64Bit())
345	setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
346	} else if (Subtarget.hasVendorXTHeadBb()) {
347	if (Subtarget.is64Bit())
348	setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
349	setOperationAction(Ops: {ISD::ROTL, ISD::ROTR}, VT: XLenVT, Action: Custom);
350	} else if (Subtarget.hasVendorXCVbitmanip()) {
351	setOperationAction(Op: ISD::ROTL, VT: XLenVT, Action: Expand);
352	} else {
353	setOperationAction(Ops: {ISD::ROTL, ISD::ROTR}, VT: XLenVT, Action: Expand);
354	if (RV64LegalI32 && Subtarget.is64Bit())
355	setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Expand);
356	}
357
358	// With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
359	// pattern match it directly in isel.
360	setOperationAction(ISD::BSWAP, XLenVT,
361	(Subtarget.hasStdExtZbb() \|\| Subtarget.hasStdExtZbkb() \|\|
362	Subtarget.hasVendorXTHeadBb())
363	? Legal
364	: Expand);
365	if (RV64LegalI32 && Subtarget.is64Bit())
366	setOperationAction(ISD::BSWAP, MVT::i32,
367	(Subtarget.hasStdExtZbb() \|\| Subtarget.hasStdExtZbkb() \|\|
368	Subtarget.hasVendorXTHeadBb())
369	? Promote
370	: Expand);
371
372
373	if (Subtarget.hasVendorXCVbitmanip()) {
374	setOperationAction(Op: ISD::BITREVERSE, VT: XLenVT, Action: Legal);
375	} else {
376	// Zbkb can use rev8+brev8 to implement bitreverse.
377	setOperationAction(ISD::BITREVERSE, XLenVT,
378	Subtarget.hasStdExtZbkb() ? Custom : Expand);
379	}
380
381	if (Subtarget.hasStdExtZbb()) {
382	setOperationAction(Ops: {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT: XLenVT,
383	Action: Legal);
384	if (RV64LegalI32 && Subtarget.is64Bit())
385	setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, MVT::i32,
386	Promote);
387
388	if (Subtarget.is64Bit()) {
389	if (RV64LegalI32)
390	setOperationAction(ISD::CTTZ, MVT::i32, Legal);
391	else
392	setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
393	}
394	} else if (!Subtarget.hasVendorXCVbitmanip()) {
395	setOperationAction(Ops: {ISD::CTTZ, ISD::CTPOP}, VT: XLenVT, Action: Expand);
396	if (RV64LegalI32 && Subtarget.is64Bit())
397	setOperationAction({ISD::CTTZ, ISD::CTPOP}, MVT::i32, Expand);
398	}
399
400	if (Subtarget.hasStdExtZbb() \|\| Subtarget.hasVendorXTHeadBb() \|\|
401	Subtarget.hasVendorXCVbitmanip()) {
402	// We need the custom lowering to make sure that the resulting sequence
403	// for the 32bit case is efficient on 64bit targets.
404	if (Subtarget.is64Bit()) {
405	if (RV64LegalI32) {
406	setOperationAction(ISD::CTLZ, MVT::i32,
407	Subtarget.hasStdExtZbb() ? Legal : Promote);
408	if (!Subtarget.hasStdExtZbb())
409	setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
410	} else
411	setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
412	}
413	} else {
414	setOperationAction(Op: ISD::CTLZ, VT: XLenVT, Action: Expand);
415	if (RV64LegalI32 && Subtarget.is64Bit())
416	setOperationAction(ISD::CTLZ, MVT::i32, Expand);
417	}
418
419	if (!RV64LegalI32 && Subtarget.is64Bit() &&
420	!Subtarget.hasShortForwardBranchOpt())
421	setOperationAction(ISD::ABS, MVT::i32, Custom);
422
423	// We can use PseudoCCSUB to implement ABS.
424	if (Subtarget.hasShortForwardBranchOpt())
425	setOperationAction(Op: ISD::ABS, VT: XLenVT, Action: Legal);
426
427	if (!Subtarget.hasVendorXTHeadCondMov()) {
428	setOperationAction(Op: ISD::SELECT, VT: XLenVT, Action: Custom);
429	if (RV64LegalI32 && Subtarget.is64Bit())
430	setOperationAction(ISD::SELECT, MVT::i32, Promote);
431	}
432
433	static const unsigned FPLegalNodeTypes[] = {
434	ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT,
435	ISD::LLRINT, ISD::LROUND, ISD::LLROUND,
436	ISD::STRICT_LRINT, ISD::STRICT_LLRINT, ISD::STRICT_LROUND,
437	ISD::STRICT_LLROUND, ISD::STRICT_FMA, ISD::STRICT_FADD,
438	ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
439	ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS};
440
441	static const ISD::CondCode FPCCToExpand[] = {
442	ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
443	ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
444	ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO};
445
446	static const unsigned FPOpToExpand[] = {
447	ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
448	ISD::FREM};
449
450	static const unsigned FPRndMode[] = {
451	ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
452	ISD::FROUNDEVEN};
453
454	if (Subtarget.hasStdExtZfhminOrZhinxmin())
455	setOperationAction(ISD::BITCAST, MVT::i16, Custom);
456
457	static const unsigned ZfhminZfbfminPromoteOps[] = {
458	ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD,
459	ISD::FSUB, ISD::FMUL, ISD::FMA,
460	ISD::FDIV, ISD::FSQRT, ISD::FABS,
461	ISD::FNEG, ISD::STRICT_FMA, ISD::STRICT_FADD,
462	ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
463	ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
464	ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,
465	ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
466	ISD::FROUNDEVEN, ISD::SELECT};
467
468	if (Subtarget.hasStdExtZfbfmin()) {
469	setOperationAction(ISD::BITCAST, MVT::i16, Custom);
470	setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
471	setOperationAction(ISD::FP_ROUND, MVT::bf16, Custom);
472	setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
473	setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
474	setOperationAction(ISD::ConstantFP, MVT::bf16, Expand);
475	setOperationAction(ISD::SELECT_CC, MVT::bf16, Expand);
476	setOperationAction(ISD::BR_CC, MVT::bf16, Expand);
477	setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
478	setOperationAction(ISD::FREM, MVT::bf16, Promote);
479	// FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
480	// DAGCombiner::visitFP_ROUND probably needs improvements first.
481	setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
482	}
483
484	if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
485	if (Subtarget.hasStdExtZfhOrZhinx()) {
486	setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
487	setOperationAction(FPRndMode, MVT::f16,
488	Subtarget.hasStdExtZfa() ? Legal : Custom);
489	setOperationAction(ISD::SELECT, MVT::f16, Custom);
490	setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom);
491	} else {
492	setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
493	setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT,
494	ISD::STRICT_LROUND, ISD::STRICT_LLROUND},
495	MVT::f16, Legal);
496	// FIXME: Need to promote f16 FCOPYSIGN to f32, but the
497	// DAGCombiner::visitFP_ROUND probably needs improvements first.
498	setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
499	}
500
501	setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
502	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
503	setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
504	setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
505	setOperationAction(ISD::BR_CC, MVT::f16, Expand);
506
507	setOperationAction(ISD::FNEARBYINT, MVT::f16,
508	Subtarget.hasStdExtZfa() ? Legal : Promote);
509	setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
510	ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
511	ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
512	ISD::FLOG10},
513	MVT::f16, Promote);
514
515	// FIXME: Need to promote f16 STRICT_ to f32 libcalls, but we don't have*
516	// complete support for all operations in LegalizeDAG.
517	setOperationAction({ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR,
518	ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT,
519	ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN,
520	ISD::STRICT_FTRUNC},
521	MVT::f16, Promote);
522
523	// We need to custom promote this.
524	if (Subtarget.is64Bit())
525	setOperationAction(ISD::FPOWI, MVT::i32, Custom);
526
527	if (!Subtarget.hasStdExtZfa())
528	setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom);
529	}
530
531	if (Subtarget.hasStdExtFOrZfinx()) {
532	setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
533	setOperationAction(FPRndMode, MVT::f32,
534	Subtarget.hasStdExtZfa() ? Legal : Custom);
535	setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
536	setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
537	setOperationAction(ISD::SELECT, MVT::f32, Custom);
538	setOperationAction(ISD::BR_CC, MVT::f32, Expand);
539	setOperationAction(FPOpToExpand, MVT::f32, Expand);
540	setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
541	setTruncStoreAction(MVT::f32, MVT::f16, Expand);
542	setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
543	setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
544	setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);
545	setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
546	setOperationAction(ISD::FP_TO_BF16, MVT::f32,
547	Subtarget.isSoftFPABI() ? LibCall : Custom);
548	setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
549	setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
550
551	if (Subtarget.hasStdExtZfa())
552	setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
553	else
554	setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);
555	}
556
557	if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
558	setOperationAction(ISD::BITCAST, MVT::i32, Custom);
559
560	if (Subtarget.hasStdExtDOrZdinx()) {
561	setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
562
563	if (!Subtarget.is64Bit())
564	setOperationAction(ISD::BITCAST, MVT::i64, Custom);
565
566	if (Subtarget.hasStdExtZfa()) {
567	setOperationAction(FPRndMode, MVT::f64, Legal);
568	setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
569	} else {
570	if (Subtarget.is64Bit())
571	setOperationAction(FPRndMode, MVT::f64, Custom);
572
573	setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom);
574	}
575
576	setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
577	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
578	setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
579	setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
580	setOperationAction(ISD::SELECT, MVT::f64, Custom);
581	setOperationAction(ISD::BR_CC, MVT::f64, Expand);
582	setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
583	setTruncStoreAction(MVT::f64, MVT::f32, Expand);
584	setOperationAction(FPOpToExpand, MVT::f64, Expand);
585	setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
586	setTruncStoreAction(MVT::f64, MVT::f16, Expand);
587	setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
588	setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
589	setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);
590	setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
591	setOperationAction(ISD::FP_TO_BF16, MVT::f64,
592	Subtarget.isSoftFPABI() ? LibCall : Custom);
593	setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
594	setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
595	}
596
597	if (Subtarget.is64Bit()) {
598	setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT,
599	ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT},
600	MVT::i32, Custom);
601	setOperationAction(ISD::LROUND, MVT::i32, Custom);
602	}
603
604	if (Subtarget.hasStdExtFOrZfinx()) {
605	setOperationAction(Ops: {ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, VT: XLenVT,
606	Action: Custom);
607
608	setOperationAction(Ops: {ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT,
609	ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
610	VT: XLenVT, Action: Legal);
611
612	if (RV64LegalI32 && Subtarget.is64Bit())
613	setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT,
614	ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
615	MVT::i32, Legal);
616
617	setOperationAction(Op: ISD::GET_ROUNDING, VT: XLenVT, Action: Custom);
618	setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
619	}
620
621	setOperationAction(Ops: {ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
622	ISD::JumpTable},
623	VT: XLenVT, Action: Custom);
624
625	setOperationAction(Op: ISD::GlobalTLSAddress, VT: XLenVT, Action: Custom);
626
627	if (Subtarget.is64Bit())
628	setOperationAction(ISD::Constant, MVT::i64, Custom);
629
630	// TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
631	// Unfortunately this can't be determined just from the ISA naming string.
632	setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
633	Subtarget.is64Bit() ? Legal : Custom);
634	setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64,
635	Subtarget.is64Bit() ? Legal : Custom);
636
637	setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
638	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
639	if (Subtarget.is64Bit())
640	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
641
642	if (Subtarget.hasStdExtZicbop()) {
643	setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
644	}
645
646	if (Subtarget.hasStdExtA()) {
647	setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
648	if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
649	setMinCmpXchgSizeInBits(`8`);
650	else
651	setMinCmpXchgSizeInBits(`32`);
652	} else if (Subtarget.hasForcedAtomics()) {
653	setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
654	} else {
655	setMaxAtomicSizeInBitsSupported(`0`);
656	}
657
658	setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
659
660	setBooleanContents(ZeroOrOneBooleanContent);
661
662	if (Subtarget.hasVInstructions()) {
663	setBooleanVectorContents(ZeroOrOneBooleanContent);
664
665	setOperationAction(Op: ISD::VSCALE, VT: XLenVT, Action: Custom);
666	if (RV64LegalI32 && Subtarget.is64Bit())
667	setOperationAction(ISD::VSCALE, MVT::i32, Custom);
668
669	// RVV intrinsics may have illegal operands.
670	// We also need to custom legalize vmv.x.s.
671	setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN,
672	ISD::INTRINSIC_VOID},
673	{MVT::i8, MVT::i16}, Custom);
674	if (Subtarget.is64Bit())
675	setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
676	MVT::i32, Custom);
677	else
678	setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN},
679	MVT::i64, Custom);
680
681	setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
682	MVT::Other, Custom);
683
684	static const unsigned IntegerVPOps[] = {
685	ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
686	ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
687	ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
688	ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
689	ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
690	ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
691	ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
692	ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
693	ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
694	ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
695	ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
696	ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
697	ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
698	ISD::VP_USUBSAT};
699
700	static const unsigned FloatingPointVPOps[] = {
701	ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
702	ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
703	ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
704	ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
705	ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
706	ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
707	ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
708	ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
709	ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
710	ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
711	ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
712	ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
713	ISD::EXPERIMENTAL_VP_SPLICE};
714
715	static const unsigned IntegerVecReduceOps[] = {
716	ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
717	ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
718	ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN};
719
720	static const unsigned FloatingPointVecReduceOps[] = {
721	ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,
722	ISD::VECREDUCE_FMAX, ISD::VECREDUCE_FMINIMUM, ISD::VECREDUCE_FMAXIMUM};
723
724	if (!Subtarget.is64Bit()) {
725	// We must custom-lower certain vXi64 operations on RV32 due to the vector
726	// element type being illegal.
727	setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
728	MVT::i64, Custom);
729
730	setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
731
732	setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
733	ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
734	ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
735	ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
736	MVT::i64, Custom);
737	}
738
739	for (MVT VT : BoolVecVTs) {
740	if (!isTypeLegal(VT))
741	continue;
742
743	setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
744
745	// Mask VTs are custom-expanded into a series of standard nodes
746	setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS,
747	ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
748	ISD::SCALAR_TO_VECTOR},
749	VT, Custom);
750
751	setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
752	Custom);
753
754	setOperationAction(ISD::SELECT, VT, Custom);
755	setOperationAction(
756	{ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
757	Expand);
758
759	setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
760
761	setOperationAction(
762	{ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
763	Custom);
764
765	setOperationAction(
766	{ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
767	Custom);
768
769	// RVV has native int->float & float->int conversions where the
770	// element type sizes are within one power-of-two of each other. Any
771	// wider distances between type sizes have to be lowered as sequences
772	// which progressively narrow the gap in stages.
773	setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
774	ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,
775	ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,
776	ISD::STRICT_FP_TO_UINT},
777	VT, Custom);
778	setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
779	Custom);
780
781	// Expand all extending loads to types larger than this, and truncating
782	// stores from types larger than this.
783	for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
784	setTruncStoreAction(VT, OtherVT, Expand);
785	setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,
786	OtherVT, Expand);
787	}
788
789	setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
790	ISD::VP_TRUNCATE, ISD::VP_SETCC},
791	VT, Custom);
792
793	setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
794	setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
795
796	setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
797
798	setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
799	setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
800
801	setOperationPromotedToType(
802	ISD::VECTOR_SPLICE, VT,
803	MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
804	}
805
806	for (MVT VT : IntVecVTs) {
807	if (!isTypeLegal(VT))
808	continue;
809
810	setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
811	setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
812
813	// Vectors implement MULHS/MULHU.
814	setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand);
815
816	// nxvXi64 MULHS/MULHU requires the V extension instead of Zve64.*
817	if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
818	setOperationAction({ISD::MULHU, ISD::MULHS}, VT, Expand);
819
820	setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT,
821	Legal);
822
823	setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom);
824
825	// Custom-lower extensions and truncations from/to mask types.
826	setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},
827	VT, Custom);
828
829	// RVV has native int->float & float->int conversions where the
830	// element type sizes are within one power-of-two of each other. Any
831	// wider distances between type sizes have to be lowered as sequences
832	// which progressively narrow the gap in stages.
833	setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
834	ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,
835	ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,
836	ISD::STRICT_FP_TO_UINT},
837	VT, Custom);
838	setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
839	Custom);
840	setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT,
841	ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT},
842	VT, Legal);
843
844	// Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
845	// nodes which truncate by one power of two at a time.
846	setOperationAction(ISD::TRUNCATE, VT, Custom);
847
848	// Custom-lower insert/extract operations to simplify patterns.
849	setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
850	Custom);
851
852	// Custom-lower reduction operations to set up the corresponding custom
853	// nodes' operands.
854	setOperationAction(IntegerVecReduceOps, VT, Custom);
855
856	setOperationAction(IntegerVPOps, VT, Custom);
857
858	setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
859
860	setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
861	VT, Custom);
862
863	setOperationAction(
864	{ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
865	ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
866	VT, Custom);
867
868	setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
869	ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
870	VT, Custom);
871
872	setOperationAction(ISD::SELECT, VT, Custom);
873	setOperationAction(ISD::SELECT_CC, VT, Expand);
874
875	setOperationAction({ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Custom);
876
877	for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
878	setTruncStoreAction(VT, OtherVT, Expand);
879	setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,
880	OtherVT, Expand);
881	}
882
883	setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
884	setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
885
886	// Splice
887	setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
888
889	if (Subtarget.hasStdExtZvkb()) {
890	setOperationAction(ISD::BSWAP, VT, Legal);
891	setOperationAction(ISD::VP_BSWAP, VT, Custom);
892	} else {
893	setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
894	setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand);
895	}
896
897	if (Subtarget.hasStdExtZvbb()) {
898	setOperationAction(ISD::BITREVERSE, VT, Legal);
899	setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
900	setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
901	ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
902	VT, Custom);
903	} else {
904	setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
905	setOperationAction({ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Expand);
906	setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
907	ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
908	VT, Expand);
909
910	// Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
911	// range of f32.
912	EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
913	if (isTypeLegal(FloatVT)) {
914	setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
915	ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
916	ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
917	VT, Custom);
918	}
919	}
920	}
921
922	// Expand various CCs to best match the RVV ISA, which natively supports UNE
923	// but no other unordered comparisons, and supports all ordered comparisons
924	// except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
925	// purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
926	// and we pattern-match those back to the "original", swapping operands once
927	// more. This way we catch both operations and both "vf" and "fv" forms with
928	// fewer patterns.
929	static const ISD::CondCode VFPCCToExpand[] = {
930	ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
931	ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
932	ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE,
933	};
934
935	// TODO: support more ops.
936	static const unsigned ZvfhminPromoteOps[] = {
937	ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
938	ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
939	ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL,
940	ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT,
941	ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, ISD::FMAXIMUM,
942	ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
943	ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA};
944
945	// TODO: support more vp ops.
946	static const unsigned ZvfhminPromoteVPOps[] = {
947	ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
948	ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
949	ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
950	ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
951	ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
952	ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
953	ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
954	ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,
955	ISD::VP_FMAXIMUM};
956
957	// Sets common operation actions on RVV floating-point vector types.
958	const auto SetCommonVFPActions = [&](MVT VT) {
959	setOperationAction(Op: ISD::SPLAT_VECTOR, VT, Action: Legal);
960	// RVV has native FP_ROUND & FP_EXTEND conversions where the element type
961	// sizes are within one power-of-two of each other. Therefore conversions
962	// between vXf16 and vXf64 must be lowered as sequences which convert via
963	// vXf32.
964	setOperationAction(Ops: {ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Action: Custom);
965	setOperationAction(Ops: {ISD::LRINT, ISD::LLRINT}, VT, Action: Custom);
966	// Custom-lower insert/extract operations to simplify patterns.
967	setOperationAction(Ops: {ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
968	Action: Custom);
969	// Expand various condition codes (explained above).
970	setCondCodeAction(CCs: VFPCCToExpand, VT, Action: Expand);
971
972	setOperationAction(Ops: {ISD::FMINNUM, ISD::FMAXNUM}, VT, Action: Legal);
973	setOperationAction(Ops: {ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Action: Custom);
974
975	setOperationAction(Ops: {ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
976	ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,
977	ISD::IS_FPCLASS},
978	VT, Action: Custom);
979
980	setOperationAction(Ops: FloatingPointVecReduceOps, VT, Action: Custom);
981
982	// Expand FP operations that need libcalls.
983	setOperationAction(Op: ISD::FREM, VT, Action: Expand);
984	setOperationAction(Op: ISD::FPOW, VT, Action: Expand);
985	setOperationAction(Op: ISD::FCOS, VT, Action: Expand);
986	setOperationAction(Op: ISD::FSIN, VT, Action: Expand);
987	setOperationAction(Op: ISD::FSINCOS, VT, Action: Expand);
988	setOperationAction(Op: ISD::FEXP, VT, Action: Expand);
989	setOperationAction(Op: ISD::FEXP2, VT, Action: Expand);
990	setOperationAction(Op: ISD::FEXP10, VT, Action: Expand);
991	setOperationAction(Op: ISD::FLOG, VT, Action: Expand);
992	setOperationAction(Op: ISD::FLOG2, VT, Action: Expand);
993	setOperationAction(Op: ISD::FLOG10, VT, Action: Expand);
994
995	setOperationAction(Op: ISD::FCOPYSIGN, VT, Action: Legal);
996
997	setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Custom);
998
999	setOperationAction(Ops: {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
1000	VT, Action: Custom);
1001
1002	setOperationAction(
1003	Ops: {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1004	ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1005	VT, Action: Custom);
1006
1007	setOperationAction(Op: ISD::SELECT, VT, Action: Custom);
1008	setOperationAction(Op: ISD::SELECT_CC, VT, Action: Expand);
1009
1010	setOperationAction(Ops: {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
1011	ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
1012	VT, Action: Custom);
1013
1014	setOperationAction(Op: ISD::VECTOR_DEINTERLEAVE, VT, Action: Custom);
1015	setOperationAction(Op: ISD::VECTOR_INTERLEAVE, VT, Action: Custom);
1016
1017	setOperationAction(Ops: {ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Action: Custom);
1018
1019	setOperationAction(Ops: FloatingPointVPOps, VT, Action: Custom);
1020
1021	setOperationAction(Ops: {ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT,
1022	Action: Custom);
1023	setOperationAction(Ops: {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
1024	ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA},
1025	VT, Action: Legal);
1026	setOperationAction(Ops: {ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
1027	ISD::STRICT_FTRUNC, ISD::STRICT_FCEIL,
1028	ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
1029	ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
1030	VT, Action: Custom);
1031	};
1032
1033	// Sets common extload/truncstore actions on RVV floating-point vector
1034	// types.
1035	const auto SetCommonVFPExtLoadTruncStoreActions =
1036	[&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1037	for (auto SmallVT : SmallerVTs) {
1038	setTruncStoreAction(ValVT: VT, MemVT: SmallVT, Action: Expand);
1039	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: SmallVT, Action: Expand);
1040	}
1041	};
1042
1043	if (Subtarget.hasVInstructionsF16()) {
1044	for (MVT VT : F16VecVTs) {
1045	if (!isTypeLegal(VT))
1046	continue;
1047	SetCommonVFPActions(VT);
1048	}
1049	} else if (Subtarget.hasVInstructionsF16Minimal()) {
1050	for (MVT VT : F16VecVTs) {
1051	if (!isTypeLegal(VT))
1052	continue;
1053	setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1054	setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
1055	Custom);
1056	setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1057	setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1058	Custom);
1059	setOperationAction(ISD::SELECT_CC, VT, Expand);
1060	setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP,
1061	ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1062	VT, Custom);
1063	setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
1064	ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
1065	VT, Custom);
1066	if (Subtarget.hasStdExtZfhminOrZhinxmin())
1067	setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1068	// load/store
1069	setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1070
1071	// Custom split nxv32f16 since nxv32f32 if not legal.
1072	if (VT == MVT::nxv32f16) {
1073	setOperationAction(ZvfhminPromoteOps, VT, Custom);
1074	setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
1075	continue;
1076	}
1077	// Add more promote ops.
1078	MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1079	setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1080	setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1081	}
1082	}
1083
1084	if (Subtarget.hasVInstructionsF32()) {
1085	for (MVT VT : F32VecVTs) {
1086	if (!isTypeLegal(VT))
1087	continue;
1088	SetCommonVFPActions(VT);
1089	SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1090	}
1091	}
1092
1093	if (Subtarget.hasVInstructionsF64()) {
1094	for (MVT VT : F64VecVTs) {
1095	if (!isTypeLegal(VT))
1096	continue;
1097	SetCommonVFPActions(VT);
1098	SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1099	SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1100	}
1101	}
1102
1103	if (Subtarget.useRVVForFixedLengthVectors()) {
1104	for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
1105	if (!useRVVForFixedLengthVectorVT(VT))
1106	continue;
1107
1108	// By default everything must be expanded.
1109	for (unsigned Op = `0`; Op < ISD::BUILTIN_OP_END; ++Op)
1110	setOperationAction(Op, VT, Expand);
1111	for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
1112	setTruncStoreAction(VT, OtherVT, Expand);
1113	setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,
1114	OtherVT, Expand);
1115	}
1116
1117	// Custom lower fixed vector undefs to scalable vector undefs to avoid
1118	// expansion to a build_vector of 0s.
1119	setOperationAction(ISD::UNDEF, VT, Custom);
1120
1121	// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1122	setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
1123	Custom);
1124
1125	setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}, VT,
1126	Custom);
1127
1128	setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
1129	VT, Custom);
1130
1131	setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1132
1133	setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1134
1135	setOperationAction(ISD::SETCC, VT, Custom);
1136
1137	setOperationAction(ISD::SELECT, VT, Custom);
1138
1139	setOperationAction(ISD::TRUNCATE, VT, Custom);
1140
1141	setOperationAction(ISD::BITCAST, VT, Custom);
1142
1143	setOperationAction(
1144	{ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
1145	Custom);
1146
1147	setOperationAction(
1148	{ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1149	Custom);
1150
1151	setOperationAction(
1152	{
1153	ISD::SINT_TO_FP,
1154	ISD::UINT_TO_FP,
1155	ISD::FP_TO_SINT,
1156	ISD::FP_TO_UINT,
1157	ISD::STRICT_SINT_TO_FP,
1158	ISD::STRICT_UINT_TO_FP,
1159	ISD::STRICT_FP_TO_SINT,
1160	ISD::STRICT_FP_TO_UINT,
1161	},
1162	VT, Custom);
1163	setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
1164	Custom);
1165
1166	setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1167
1168	// Operations below are different for between masks and other vectors.
1169	if (VT.getVectorElementType() == MVT::i1) {
1170	setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1171	ISD::OR, ISD::XOR},
1172	VT, Custom);
1173
1174	setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1175	ISD::VP_SETCC, ISD::VP_TRUNCATE},
1176	VT, Custom);
1177
1178	setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1179	setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1180	continue;
1181	}
1182
1183	// Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1184	// it before type legalization for i64 vectors on RV32. It will then be
1185	// type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1186	// FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1187	// improvements first.
1188	if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1189	setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
1190	setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
1191	}
1192
1193	setOperationAction(
1194	{ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
1195
1196	setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1197	ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1198	ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1199	ISD::VP_SCATTER},
1200	VT, Custom);
1201
1202	setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR,
1203	ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV,
1204	ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL},
1205	VT, Custom);
1206
1207	setOperationAction(
1208	{ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom);
1209
1210	setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom);
1211
1212	// vXi64 MULHS/MULHU requires the V extension instead of Zve64.*
1213	if (VT.getVectorElementType() != MVT::i64 \|\| Subtarget.hasStdExtV())
1214	setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);
1215
1216	setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT,
1217	ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT},
1218	VT, Custom);
1219
1220	setOperationAction(ISD::VSELECT, VT, Custom);
1221	setOperationAction(ISD::SELECT_CC, VT, Expand);
1222
1223	setOperationAction(
1224	{ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom);
1225
1226	// Custom-lower reduction operations to set up the corresponding custom
1227	// nodes' operands.
1228	setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,
1229	ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,
1230	ISD::VECREDUCE_UMIN},
1231	VT, Custom);
1232
1233	setOperationAction(IntegerVPOps, VT, Custom);
1234
1235	if (Subtarget.hasStdExtZvkb())
1236	setOperationAction({ISD::BSWAP, ISD::ROTL, ISD::ROTR}, VT, Custom);
1237
1238	if (Subtarget.hasStdExtZvbb()) {
1239	setOperationAction({ISD::BITREVERSE, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
1240	ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTPOP},
1241	VT, Custom);
1242	} else {
1243	// Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1244	// range of f32.
1245	EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1246	if (isTypeLegal(FloatVT))
1247	setOperationAction(
1248	{ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
1249	Custom);
1250	}
1251	}
1252
1253	for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
1254	// There are no extending loads or truncating stores.
1255	for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1256	setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1257	setTruncStoreAction(VT, InnerVT, Expand);
1258	}
1259
1260	if (!useRVVForFixedLengthVectorVT(VT))
1261	continue;
1262
1263	// By default everything must be expanded.
1264	for (unsigned Op = `0`; Op < ISD::BUILTIN_OP_END; ++Op)
1265	setOperationAction(Op, VT, Expand);
1266
1267	// Custom lower fixed vector undefs to scalable vector undefs to avoid
1268	// expansion to a build_vector of 0s.
1269	setOperationAction(ISD::UNDEF, VT, Custom);
1270
1271	if (VT.getVectorElementType() == MVT::f16 &&
1272	!Subtarget.hasVInstructionsF16()) {
1273	setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1274	setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
1275	Custom);
1276	setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1277	setOperationAction(
1278	{ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1279	Custom);
1280	setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP,
1281	ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1282	VT, Custom);
1283	setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
1284	ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
1285	VT, Custom);
1286	setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1287	setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1288	MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1289	// Don't promote f16 vector operations to f32 if f32 vector type is
1290	// not legal.
1291	// TODO: could split the f16 vector into two vectors and do promotion.
1292	if (!isTypeLegal(F32VecVT))
1293	continue;
1294	setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1295	setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1296	continue;
1297	}
1298
1299	// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1300	setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
1301	Custom);
1302
1303	setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
1304	ISD::VECTOR_SHUFFLE, ISD::INSERT_VECTOR_ELT,
1305	ISD::EXTRACT_VECTOR_ELT},
1306	VT, Custom);
1307
1308	setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1309	ISD::MGATHER, ISD::MSCATTER},
1310	VT, Custom);
1311
1312	setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1313	ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1314	ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1315	ISD::VP_SCATTER},
1316	VT, Custom);
1317
1318	setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV,
1319	ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
1320	ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
1321	ISD::IS_FPCLASS, ISD::FMAXIMUM, ISD::FMINIMUM},
1322	VT, Custom);
1323
1324	setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1325
1326	setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1327	ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT},
1328	VT, Custom);
1329
1330	setCondCodeAction(VFPCCToExpand, VT, Expand);
1331
1332	setOperationAction(ISD::SETCC, VT, Custom);
1333	setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom);
1334	setOperationAction(ISD::SELECT_CC, VT, Expand);
1335
1336	setOperationAction(ISD::BITCAST, VT, Custom);
1337
1338	setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1339
1340	setOperationAction(FloatingPointVPOps, VT, Custom);
1341
1342	setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT,
1343	Custom);
1344	setOperationAction(
1345	{ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
1346	ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA,
1347	ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::STRICT_FTRUNC,
1348	ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
1349	ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
1350	VT, Custom);
1351	}
1352
1353	// Custom-legalize bitcasts from fixed-length vectors to scalar types.
1354	setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1355	Custom);
1356	if (Subtarget.hasStdExtZfhminOrZhinxmin())
1357	setOperationAction(ISD::BITCAST, MVT::f16, Custom);
1358	if (Subtarget.hasStdExtFOrZfinx())
1359	setOperationAction(ISD::BITCAST, MVT::f32, Custom);
1360	if (Subtarget.hasStdExtDOrZdinx())
1361	setOperationAction(ISD::BITCAST, MVT::f64, Custom);
1362	}
1363	}
1364
1365	if (Subtarget.hasStdExtA()) {
1366	setOperationAction(Op: ISD::ATOMIC_LOAD_SUB, VT: XLenVT, Action: Expand);
1367	if (RV64LegalI32 && Subtarget.is64Bit())
1368	setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
1369	}
1370
1371	if (Subtarget.hasForcedAtomics()) {
1372	// Force __sync libcalls to be emitted for atomic rmw/cas operations.
1373	setOperationAction(
1374	Ops: {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
1375	ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
1376	ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
1377	ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
1378	VT: XLenVT, Action: LibCall);
1379	}
1380
1381	if (Subtarget.hasVendorXTHeadMemIdx()) {
1382	for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1383	setIndexedLoadAction(im, MVT::i8, Legal);
1384	setIndexedStoreAction(im, MVT::i8, Legal);
1385	setIndexedLoadAction(im, MVT::i16, Legal);
1386	setIndexedStoreAction(im, MVT::i16, Legal);
1387	setIndexedLoadAction(im, MVT::i32, Legal);
1388	setIndexedStoreAction(im, MVT::i32, Legal);
1389
1390	if (Subtarget.is64Bit()) {
1391	setIndexedLoadAction(im, MVT::i64, Legal);
1392	setIndexedStoreAction(im, MVT::i64, Legal);
1393	}
1394	}
1395	}
1396
1397	// Function alignments.
1398	const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? `2` : `4`);
1399	setMinFunctionAlignment(FunctionAlignment);
1400	// Set preferred alignments.
1401	setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
1402	setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
1403
1404	setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,
1405	ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::MUL,
1406	ISD::AND, ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
1407	if (Subtarget.is64Bit())
1408	setTargetDAGCombine(ISD::SRA);
1409
1410	if (Subtarget.hasStdExtFOrZfinx())
1411	setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM});
1412
1413	if (Subtarget.hasStdExtZbb())
1414	setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN});
1415
1416	if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
1417	setTargetDAGCombine(ISD::TRUNCATE);
1418
1419	if (Subtarget.hasStdExtZbkb())
1420	setTargetDAGCombine(ISD::BITREVERSE);
1421	if (Subtarget.hasStdExtZfhminOrZhinxmin())
1422	setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1423	if (Subtarget.hasStdExtFOrZfinx())
1424	setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
1425	ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT});
1426	if (Subtarget.hasVInstructions())
1427	setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
1428	ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1429	ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR,
1430	ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
1431	ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1432	ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM,
1433	ISD::INSERT_VECTOR_ELT, ISD::ABS});
1434	if (Subtarget.hasVendorXTHeadMemPair())
1435	setTargetDAGCombine({ISD::LOAD, ISD::STORE});
1436	if (Subtarget.useRVVForFixedLengthVectors())
1437	setTargetDAGCombine(ISD::BITCAST);
1438
1439	setLibcallName(Call: RTLIB::FPEXT_F16_F32, Name: "__extendhfsf2");
1440	setLibcallName(Call: RTLIB::FPROUND_F32_F16, Name: "__truncsfhf2");
1441
1442	// Disable strict node mutation.
1443	IsStrictFPEnabled = true;
1444	}
1445
1446	EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
1447	LLVMContext &Context,
1448	EVT VT) const {
1449	if (!VT.isVector())
1450	return getPointerTy(DL);
1451	if (Subtarget.hasVInstructions() &&
1452	(VT.isScalableVector() \|\| Subtarget.useRVVForFixedLengthVectors()))
1453	return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1454	return VT.changeVectorElementTypeToInteger();
1455	}
1456
1457	MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1458	return Subtarget.getXLenVT();
1459	}
1460
1461	// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1462	bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1463	unsigned VF,
1464	bool IsScalable) const {
1465	if (!Subtarget.hasVInstructions())
1466	return true;
1467
1468	if (!IsScalable)
1469	return true;
1470
1471	if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1472	return true;
1473
1474	// Don't allow VF=1 if those types are't legal.
1475	if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1476	return true;
1477
1478	// VLEN=32 support is incomplete.
1479	if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1480	return true;
1481
1482	// The maximum VF is for the smallest element width with LMUL=8.
1483	// VF must be a power of 2.
1484	unsigned MaxVF = (RISCV::RVVBitsPerBlock / `8`) * `8`;
1485	return VF > MaxVF \|\| !isPowerOf2_32(Value: VF);
1486	}
1487
1488	bool RISCVTargetLowering::shouldExpandCttzElements(EVT VT) const {
1489	return !Subtarget.hasVInstructions() \|\|
1490	VT.getVectorElementType() != MVT::i1 \|\| !isTypeLegal(VT);
1491	}
1492
1493	bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1494	const CallInst &I,
1495	MachineFunction &MF,
1496	unsigned Intrinsic) const {
1497	auto &DL = I.getModule()->getDataLayout();
1498
1499	auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1500	bool IsUnitStrided, bool UsePtrVal = false) {
1501	Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
1502	// We can't use ptrVal if the intrinsic can access memory before the
1503	// pointer. This means we can't use it for strided or indexed intrinsics.
1504	if (UsePtrVal)
1505	Info.ptrVal = I.getArgOperand(i: PtrOp);
1506	else
1507	Info.fallbackAddressSpace =
1508	I.getArgOperand(i: PtrOp)->getType()->getPointerAddressSpace();
1509	Type *MemTy;
1510	if (IsStore) {
1511	// Store value is the first operand.
1512	MemTy = I.getArgOperand(i: `0`)->getType();
1513	} else {
1514	// Use return type. If it's segment load, return type is a struct.
1515	MemTy = I.getType();
1516	if (MemTy->isStructTy())
1517	MemTy = MemTy->getStructElementType(N: `0`);
1518	}
1519	if (!IsUnitStrided)
1520	MemTy = MemTy->getScalarType();
1521
1522	Info.memVT = getValueType(DL, Ty: MemTy);
1523	Info.align = Align (DL.getTypeSizeInBits(Ty: MemTy->getScalarType()) / `8`);
1524	Info.size = MemoryLocation::UnknownSize;
1525	Info.flags \|=
1526	IsStore ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
1527	return true;
1528	};
1529
1530	if (I.hasMetadata(KindID: LLVMContext::MD_nontemporal))
1531	Info.flags \|= MachineMemOperand::MONonTemporal;
1532
1533	Info.flags \|= RISCVTargetLowering::getTargetMMOFlags(I);
1534	switch (Intrinsic) {
1535	default:
1536	return false;
1537	case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1538	case Intrinsic::riscv_masked_atomicrmw_add_i32:
1539	case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1540	case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1541	case Intrinsic::riscv_masked_atomicrmw_max_i32:
1542	case Intrinsic::riscv_masked_atomicrmw_min_i32:
1543	case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1544	case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1545	case Intrinsic::riscv_masked_cmpxchg_i32:
1546	Info.opc = ISD::INTRINSIC_W_CHAIN;
1547	Info.memVT = MVT::i32;
1548	Info.ptrVal = I.getArgOperand(i: `0`);
1549	Info.offset = `0`;
1550	Info.align = Align (`4`);
1551	Info.flags = MachineMemOperand::MOLoad \| MachineMemOperand::MOStore \|
1552	MachineMemOperand::MOVolatile;
1553	return true;
1554	case Intrinsic::riscv_masked_strided_load:
1555	return SetRVVLoadStoreInfo (/PtrOp/ `1`, /IsStore/ false,
1556	/IsUnitStrided/ false);
1557	case Intrinsic::riscv_masked_strided_store:
1558	return SetRVVLoadStoreInfo (/PtrOp/ `1`, /IsStore/ true,
1559	/IsUnitStrided/ false);
1560	case Intrinsic::riscv_seg2_load:
1561	case Intrinsic::riscv_seg3_load:
1562	case Intrinsic::riscv_seg4_load:
1563	case Intrinsic::riscv_seg5_load:
1564	case Intrinsic::riscv_seg6_load:
1565	case Intrinsic::riscv_seg7_load:
1566	case Intrinsic::riscv_seg8_load:
1567	return SetRVVLoadStoreInfo (/PtrOp/ `0`, /IsStore/ false,
1568	/IsUnitStrided/ false, /UsePtrVal/ true);
1569	case Intrinsic::riscv_seg2_store:
1570	case Intrinsic::riscv_seg3_store:
1571	case Intrinsic::riscv_seg4_store:
1572	case Intrinsic::riscv_seg5_store:
1573	case Intrinsic::riscv_seg6_store:
1574	case Intrinsic::riscv_seg7_store:
1575	case Intrinsic::riscv_seg8_store:
1576	// Operands are (vec, ..., vec, ptr, vl)
1577	return SetRVVLoadStoreInfo (/PtrOp/ I.arg_size() - `2`,
1578	/IsStore/ true,
1579	/IsUnitStrided/ false, /UsePtrVal/ true);
1580	case Intrinsic::riscv_vle:
1581	case Intrinsic::riscv_vle_mask:
1582	case Intrinsic::riscv_vleff:
1583	case Intrinsic::riscv_vleff_mask:
1584	return SetRVVLoadStoreInfo (/PtrOp/ `1`,
1585	/IsStore/ false,
1586	/IsUnitStrided/ true,
1587	/UsePtrVal/ true);
1588	case Intrinsic::riscv_vse:
1589	case Intrinsic::riscv_vse_mask:
1590	return SetRVVLoadStoreInfo (/PtrOp/ `1`,
1591	/IsStore/ true,
1592	/IsUnitStrided/ true,
1593	/UsePtrVal/ true);
1594	case Intrinsic::riscv_vlse:
1595	case Intrinsic::riscv_vlse_mask:
1596	case Intrinsic::riscv_vloxei:
1597	case Intrinsic::riscv_vloxei_mask:
1598	case Intrinsic::riscv_vluxei:
1599	case Intrinsic::riscv_vluxei_mask:
1600	return SetRVVLoadStoreInfo (/PtrOp/ `1`,
1601	/IsStore/ false,
1602	/IsUnitStrided/ false);
1603	case Intrinsic::riscv_vsse:
1604	case Intrinsic::riscv_vsse_mask:
1605	case Intrinsic::riscv_vsoxei:
1606	case Intrinsic::riscv_vsoxei_mask:
1607	case Intrinsic::riscv_vsuxei:
1608	case Intrinsic::riscv_vsuxei_mask:
1609	return SetRVVLoadStoreInfo (/PtrOp/ `1`,
1610	/IsStore/ true,
1611	/IsUnitStrided/ false);
1612	case Intrinsic::riscv_vlseg2:
1613	case Intrinsic::riscv_vlseg3:
1614	case Intrinsic::riscv_vlseg4:
1615	case Intrinsic::riscv_vlseg5:
1616	case Intrinsic::riscv_vlseg6:
1617	case Intrinsic::riscv_vlseg7:
1618	case Intrinsic::riscv_vlseg8:
1619	case Intrinsic::riscv_vlseg2ff:
1620	case Intrinsic::riscv_vlseg3ff:
1621	case Intrinsic::riscv_vlseg4ff:
1622	case Intrinsic::riscv_vlseg5ff:
1623	case Intrinsic::riscv_vlseg6ff:
1624	case Intrinsic::riscv_vlseg7ff:
1625	case Intrinsic::riscv_vlseg8ff:
1626	return SetRVVLoadStoreInfo (/PtrOp/ I.arg_size() - `2`,
1627	/IsStore/ false,
1628	/IsUnitStrided/ false, /UsePtrVal/ true);
1629	case Intrinsic::riscv_vlseg2_mask:
1630	case Intrinsic::riscv_vlseg3_mask:
1631	case Intrinsic::riscv_vlseg4_mask:
1632	case Intrinsic::riscv_vlseg5_mask:
1633	case Intrinsic::riscv_vlseg6_mask:
1634	case Intrinsic::riscv_vlseg7_mask:
1635	case Intrinsic::riscv_vlseg8_mask:
1636	case Intrinsic::riscv_vlseg2ff_mask:
1637	case Intrinsic::riscv_vlseg3ff_mask:
1638	case Intrinsic::riscv_vlseg4ff_mask:
1639	case Intrinsic::riscv_vlseg5ff_mask:
1640	case Intrinsic::riscv_vlseg6ff_mask:
1641	case Intrinsic::riscv_vlseg7ff_mask:
1642	case Intrinsic::riscv_vlseg8ff_mask:
1643	return SetRVVLoadStoreInfo (/PtrOp/ I.arg_size() - `4`,
1644	/IsStore/ false,
1645	/IsUnitStrided/ false, /UsePtrVal/ true);
1646	case Intrinsic::riscv_vlsseg2:
1647	case Intrinsic::riscv_vlsseg3:
1648	case Intrinsic::riscv_vlsseg4:
1649	case Intrinsic::riscv_vlsseg5:
1650	case Intrinsic::riscv_vlsseg6:
1651	case Intrinsic::riscv_vlsseg7:
1652	case Intrinsic::riscv_vlsseg8:
1653	case Intrinsic::riscv_vloxseg2:
1654	case Intrinsic::riscv_vloxseg3:
1655	case Intrinsic::riscv_vloxseg4:
1656	case Intrinsic::riscv_vloxseg5:
1657	case Intrinsic::riscv_vloxseg6:
1658	case Intrinsic::riscv_vloxseg7:
1659	case Intrinsic::riscv_vloxseg8:
1660	case Intrinsic::riscv_vluxseg2:
1661	case Intrinsic::riscv_vluxseg3:
1662	case Intrinsic::riscv_vluxseg4:
1663	case Intrinsic::riscv_vluxseg5:
1664	case Intrinsic::riscv_vluxseg6:
1665	case Intrinsic::riscv_vluxseg7:
1666	case Intrinsic::riscv_vluxseg8:
1667	return SetRVVLoadStoreInfo (/PtrOp/ I.arg_size() - `3`,
1668	/IsStore/ false,
1669	/IsUnitStrided/ false);
1670	case Intrinsic::riscv_vlsseg2_mask:
1671	case Intrinsic::riscv_vlsseg3_mask:
1672	case Intrinsic::riscv_vlsseg4_mask:
1673	case Intrinsic::riscv_vlsseg5_mask:
1674	case Intrinsic::riscv_vlsseg6_mask:
1675	case Intrinsic::riscv_vlsseg7_mask:
1676	case Intrinsic::riscv_vlsseg8_mask:
1677	case Intrinsic::riscv_vloxseg2_mask:
1678	case Intrinsic::riscv_vloxseg3_mask:
1679	case Intrinsic::riscv_vloxseg4_mask:
1680	case Intrinsic::riscv_vloxseg5_mask:
1681	case Intrinsic::riscv_vloxseg6_mask:
1682	case Intrinsic::riscv_vloxseg7_mask:
1683	case Intrinsic::riscv_vloxseg8_mask:
1684	case Intrinsic::riscv_vluxseg2_mask:
1685	case Intrinsic::riscv_vluxseg3_mask:
1686	case Intrinsic::riscv_vluxseg4_mask:
1687	case Intrinsic::riscv_vluxseg5_mask:
1688	case Intrinsic::riscv_vluxseg6_mask:
1689	case Intrinsic::riscv_vluxseg7_mask:
1690	case Intrinsic::riscv_vluxseg8_mask:
1691	return SetRVVLoadStoreInfo (/PtrOp/ I.arg_size() - `5`,
1692	/IsStore/ false,
1693	/IsUnitStrided/ false);
1694	case Intrinsic::riscv_vsseg2:
1695	case Intrinsic::riscv_vsseg3:
1696	case Intrinsic::riscv_vsseg4:
1697	case Intrinsic::riscv_vsseg5:
1698	case Intrinsic::riscv_vsseg6:
1699	case Intrinsic::riscv_vsseg7:
1700	case Intrinsic::riscv_vsseg8:
1701	return SetRVVLoadStoreInfo (/PtrOp/ I.arg_size() - `2`,
1702	/IsStore/ true,
1703	/IsUnitStrided/ false);
1704	case Intrinsic::riscv_vsseg2_mask:
1705	case Intrinsic::riscv_vsseg3_mask:
1706	case Intrinsic::riscv_vsseg4_mask:
1707	case Intrinsic::riscv_vsseg5_mask:
1708	case Intrinsic::riscv_vsseg6_mask:
1709	case Intrinsic::riscv_vsseg7_mask:
1710	case Intrinsic::riscv_vsseg8_mask:
1711	return SetRVVLoadStoreInfo (/PtrOp/ I.arg_size() - `3`,
1712	/IsStore/ true,
1713	/IsUnitStrided/ false);
1714	case Intrinsic::riscv_vssseg2:
1715	case Intrinsic::riscv_vssseg3:
1716	case Intrinsic::riscv_vssseg4:
1717	case Intrinsic::riscv_vssseg5:
1718	case Intrinsic::riscv_vssseg6:
1719	case Intrinsic::riscv_vssseg7:
1720	case Intrinsic::riscv_vssseg8:
1721	case Intrinsic::riscv_vsoxseg2:
1722	case Intrinsic::riscv_vsoxseg3:
1723	case Intrinsic::riscv_vsoxseg4:
1724	case Intrinsic::riscv_vsoxseg5:
1725	case Intrinsic::riscv_vsoxseg6:
1726	case Intrinsic::riscv_vsoxseg7:
1727	case Intrinsic::riscv_vsoxseg8:
1728	case Intrinsic::riscv_vsuxseg2:
1729	case Intrinsic::riscv_vsuxseg3:
1730	case Intrinsic::riscv_vsuxseg4:
1731	case Intrinsic::riscv_vsuxseg5:
1732	case Intrinsic::riscv_vsuxseg6:
1733	case Intrinsic::riscv_vsuxseg7:
1734	case Intrinsic::riscv_vsuxseg8:
1735	return SetRVVLoadStoreInfo (/PtrOp/ I.arg_size() - `3`,
1736	/IsStore/ true,
1737	/IsUnitStrided/ false);
1738	case Intrinsic::riscv_vssseg2_mask:
1739	case Intrinsic::riscv_vssseg3_mask:
1740	case Intrinsic::riscv_vssseg4_mask:
1741	case Intrinsic::riscv_vssseg5_mask:
1742	case Intrinsic::riscv_vssseg6_mask:
1743	case Intrinsic::riscv_vssseg7_mask:
1744	case Intrinsic::riscv_vssseg8_mask:
1745	case Intrinsic::riscv_vsoxseg2_mask:
1746	case Intrinsic::riscv_vsoxseg3_mask:
1747	case Intrinsic::riscv_vsoxseg4_mask:
1748	case Intrinsic::riscv_vsoxseg5_mask:
1749	case Intrinsic::riscv_vsoxseg6_mask:
1750	case Intrinsic::riscv_vsoxseg7_mask:
1751	case Intrinsic::riscv_vsoxseg8_mask:
1752	case Intrinsic::riscv_vsuxseg2_mask:
1753	case Intrinsic::riscv_vsuxseg3_mask:
1754	case Intrinsic::riscv_vsuxseg4_mask:
1755	case Intrinsic::riscv_vsuxseg5_mask:
1756	case Intrinsic::riscv_vsuxseg6_mask:
1757	case Intrinsic::riscv_vsuxseg7_mask:
1758	case Intrinsic::riscv_vsuxseg8_mask:
1759	return SetRVVLoadStoreInfo (/PtrOp/ I.arg_size() - `4`,
1760	/IsStore/ true,
1761	/IsUnitStrided/ false);
1762	}
1763	}
1764
1765	bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1766	const AddrMode &AM, Type *Ty,
1767	unsigned AS,
1768	Instruction I) const* {
1769	// No global is ever allowed as a base.
1770	if (AM.BaseGV)
1771	return false;
1772
1773	// RVV instructions only support register addressing.
1774	if (Subtarget.hasVInstructions() && isa<VectorType>(Val: Ty))
1775	return AM.HasBaseReg && AM.Scale == `0` && !AM.BaseOffs;
1776
1777	// Require a 12-bit signed offset.
1778	if (!isInt<`12`>(x: AM.BaseOffs))
1779	return false;
1780
1781	switch (AM.Scale) {
1782	case `0`: // "r+i" or just "i", depending on HasBaseReg.
1783	break;
1784	case `1`:
1785	if (!AM.HasBaseReg) // allow "r+i".
1786	break;
1787	return false; // disallow "r+r" or "r+r+i".
1788	default:
1789	return false;
1790	}
1791
1792	return true;
1793	}
1794
1795	bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
1796	return isInt<`12`>(x: Imm);
1797	}
1798
1799	bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
1800	return isInt<`12`>(x: Imm);
1801	}
1802
1803	// On RV32, 64-bit integers are split into their high and low parts and held
1804	// in two different registers, so the trunc is free since the low register can
1805	// just be used.
1806	// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1807	// isTruncateFree?
1808	bool RISCVTargetLowering::isTruncateFree(Type SrcTy, Type DstTy) const {
1809	if (Subtarget.is64Bit() \|\| !SrcTy->isIntegerTy() \|\| !DstTy->isIntegerTy())
1810	return false;
1811	unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1812	unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1813	return (SrcBits == `64` && DestBits == `32`);
1814	}
1815
1816	bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
1817	// We consider i64->i32 free on RV64 since we have good selection of W
1818	// instructions that make promoting operations back to i64 free in many cases.
1819	if (SrcVT.isVector() \|\| DstVT.isVector() \|\| !SrcVT.isInteger() \|\|
1820	!DstVT.isInteger())
1821	return false;
1822	unsigned SrcBits = SrcVT.getSizeInBits();
1823	unsigned DestBits = DstVT.getSizeInBits();
1824	return (SrcBits == `64` && DestBits == `32`);
1825	}
1826
1827	bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
1828	// Zexts are free if they can be combined with a load.
1829	// Don't advertise i32->i64 zextload as being free for RV64. It interacts
1830	// poorly with type legalization of compares preferring sext.
1831	if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1832	EVT MemVT = LD->getMemoryVT();
1833	if ((MemVT == MVT::i8 \|\| MemVT == MVT::i16) &&
1834	(LD->getExtensionType() == ISD::NON_EXTLOAD \|\|
1835	LD->getExtensionType() == ISD::ZEXTLOAD))
1836	return true;
1837	}
1838
1839	return TargetLowering::isZExtFree(Val, VT2);
1840	}
1841
1842	bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
1843	return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1844	}
1845
1846	bool RISCVTargetLowering::signExtendConstant(const ConstantInt CI) const* {
1847	return Subtarget.is64Bit() && CI->getType()->isIntegerTy(Bitwidth: `32`);
1848	}
1849
1850	bool RISCVTargetLowering::isCheapToSpeculateCttz(Type Ty) const* {
1851	return Subtarget.hasStdExtZbb() \|\| Subtarget.hasVendorXCVbitmanip();
1852	}
1853
1854	bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type Ty) const* {
1855	return Subtarget.hasStdExtZbb() \|\| Subtarget.hasVendorXTHeadBb() \|\|
1856	Subtarget.hasVendorXCVbitmanip();
1857	}
1858
1859	bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(
1860	const Instruction &AndI) const {
1861	// We expect to be able to match a bit extraction instruction if the Zbs
1862	// extension is supported and the mask is a power of two. However, we
1863	// conservatively return false if the mask would fit in an ANDI instruction,
1864	// on the basis that it's possible the sinking+duplication of the AND in
1865	// CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1866	// count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1867	if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1868	return false;
1869	ConstantInt *Mask = dyn_cast<ConstantInt>(Val: AndI.getOperand(i: `1`));
1870	if (!Mask)
1871	return false;
1872	return !Mask->getValue().isSignedIntN(N: `12`) && Mask->getValue().isPowerOf2();
1873	}
1874
1875	bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const {
1876	EVT VT = Y.getValueType();
1877
1878	// FIXME: Support vectors once we have tests.
1879	if (VT.isVector())
1880	return false;
1881
1882	return (Subtarget.hasStdExtZbb() \|\| Subtarget.hasStdExtZbkb()) &&
1883	!isa<ConstantSDNode>(Val: Y);
1884	}
1885
1886	bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
1887	// Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1888	if (Subtarget.hasStdExtZbs())
1889	return X.getValueType().isScalarInteger();
1890	auto *C = dyn_cast<ConstantSDNode>(Val&: Y);
1891	// XTheadBs provides th.tst (similar to bexti), if Y is a constant
1892	if (Subtarget.hasVendorXTHeadBs())
1893	return C != nullptr;
1894	// We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1895	return C && C->getAPIntValue().ule(RHS: `10`);
1896	}
1897
1898	bool RISCVTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode,
1899	EVT VT) const {
1900	// Only enable for rvv.
1901	if (!VT.isVector() \|\| !Subtarget.hasVInstructions())
1902	return false;
1903
1904	if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1905	return false;
1906
1907	return true;
1908	}
1909
1910	bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
1911	Type Ty) const* {
1912	assert(Ty->isIntegerTy());
1913
1914	unsigned BitSize = Ty->getIntegerBitWidth();
1915	if (BitSize > Subtarget.getXLen())
1916	return false;
1917
1918	// Fast path, assume 32-bit immediates are cheap.
1919	int64_t Val = Imm.getSExtValue();
1920	if (isInt<`32`>(x: Val))
1921	return true;
1922
1923	// A constant pool entry may be more aligned thant he load we're trying to
1924	// replace. If we don't support unaligned scalar mem, prefer the constant
1925	// pool.
1926	// TODO: Can the caller pass down the alignment?
1927	if (!Subtarget.enableUnalignedScalarMem())
1928	return true;
1929
1930	// Prefer to keep the load if it would require many instructions.
1931	// This uses the same threshold we use for constant pools but doesn't
1932	// check useConstantPoolForLargeInts.
1933	// TODO: Should we keep the load only when we're definitely going to emit a
1934	// constant pool?
1935
1936	RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, Subtarget);
1937	return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1938	}
1939
1940	bool RISCVTargetLowering::
1941	shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1942	SDValue X, ConstantSDNode XC, ConstantSDNode CC, SDValue Y,
1943	unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1944	SelectionDAG &DAG) const {
1945	// One interesting pattern that we'd want to form is 'bit extract':
1946	// ((1 >> Y) & 1) ==/!= 0
1947	// But we also need to be careful not to try to reverse that fold.
1948
1949	// Is this '((1 >> Y) & 1)'?
1950	if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1951	return false; // Keep the 'bit extract' pattern.
1952
1953	// Will this be '((1 >> Y) & 1)' after the transform?
1954	if (NewShiftOpcode == ISD::SRL && CC->isOne())
1955	return true; // Do form the 'bit extract' pattern.
1956
1957	// If 'X' is a constant, and we transform, then we will immediately
1958	// try to undo the fold, thus causing endless combine loop.
1959	// So only do the transform if X is not a constant. This matches the default
1960	// implementation of this function.
1961	return !XC;
1962	}
1963
1964	bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
1965	switch (Opcode) {
1966	case Instruction::Add:
1967	case Instruction::Sub:
1968	case Instruction::Mul:
1969	case Instruction::And:
1970	case Instruction::Or:
1971	case Instruction::Xor:
1972	case Instruction::FAdd:
1973	case Instruction::FSub:
1974	case Instruction::FMul:
1975	case Instruction::FDiv:
1976	case Instruction::ICmp:
1977	case Instruction::FCmp:
1978	return true;
1979	case Instruction::Shl:
1980	case Instruction::LShr:
1981	case Instruction::AShr:
1982	case Instruction::UDiv:
1983	case Instruction::SDiv:
1984	case Instruction::URem:
1985	case Instruction::SRem:
1986	return Operand == `1`;
1987	default:
1988	return false;
1989	}
1990	}
1991
1992
1993	bool RISCVTargetLowering::canSplatOperand(Instruction I, int* Operand) const {
1994	if (!I->getType()->isVectorTy() \|\| !Subtarget.hasVInstructions())
1995	return false;
1996
1997	if (canSplatOperand(Opcode: I->getOpcode(), Operand))
1998	return true;
1999
2000	auto *II = dyn_cast<IntrinsicInst>(Val: I);
2001	if (!II)
2002	return false;
2003
2004	switch (II->getIntrinsicID()) {
2005	case Intrinsic::fma:
2006	case Intrinsic::vp_fma:
2007	return Operand == `0` \|\| Operand == `1`;
2008	case Intrinsic::vp_shl:
2009	case Intrinsic::vp_lshr:
2010	case Intrinsic::vp_ashr:
2011	case Intrinsic::vp_udiv:
2012	case Intrinsic::vp_sdiv:
2013	case Intrinsic::vp_urem:
2014	case Intrinsic::vp_srem:
2015	case Intrinsic::ssub_sat:
2016	case Intrinsic::vp_ssub_sat:
2017	case Intrinsic::usub_sat:
2018	case Intrinsic::vp_usub_sat:
2019	return Operand == `1`;
2020	// These intrinsics are commutative.
2021	case Intrinsic::vp_add:
2022	case Intrinsic::vp_mul:
2023	case Intrinsic::vp_and:
2024	case Intrinsic::vp_or:
2025	case Intrinsic::vp_xor:
2026	case Intrinsic::vp_fadd:
2027	case Intrinsic::vp_fmul:
2028	case Intrinsic::vp_icmp:
2029	case Intrinsic::vp_fcmp:
2030	case Intrinsic::smin:
2031	case Intrinsic::vp_smin:
2032	case Intrinsic::umin:
2033	case Intrinsic::vp_umin:
2034	case Intrinsic::smax:
2035	case Intrinsic::vp_smax:
2036	case Intrinsic::umax:
2037	case Intrinsic::vp_umax:
2038	case Intrinsic::sadd_sat:
2039	case Intrinsic::vp_sadd_sat:
2040	case Intrinsic::uadd_sat:
2041	case Intrinsic::vp_uadd_sat:
2042	// These intrinsics have 'vr' versions.
2043	case Intrinsic::vp_sub:
2044	case Intrinsic::vp_fsub:
2045	case Intrinsic::vp_fdiv:
2046	return Operand == `0` \|\| Operand == `1`;
2047	default:
2048	return false;
2049	}
2050	}
2051
2052	/// Check if sinking \p I's operands to I's basic block is profitable, because
2053	/// the operands can be folded into a target instruction, e.g.
2054	/// splats of scalars can fold into vector instructions.
2055	bool RISCVTargetLowering::shouldSinkOperands(
2056	Instruction I, SmallVectorImpl<Use > &Ops) const {
2057	using namespace llvm::PatternMatch;
2058
2059	if (!I->getType()->isVectorTy() \|\| !Subtarget.hasVInstructions())
2060	return false;
2061
2062	// Don't sink splat operands if the target prefers it. Some targets requires
2063	// S2V transfer buffers and we can run out of them copying the same value
2064	// repeatedly.
2065	// FIXME: It could still be worth doing if it would improve vector register
2066	// pressure and prevent a vector spill.
2067	if (!Subtarget.sinkSplatOperands())
2068	return false;
2069
2070	for (auto OpIdx : enumerate(First: I->operands())) {
2071	if (!canSplatOperand(I, Operand: OpIdx.index()))
2072	continue;
2073
2074	Instruction *Op = dyn_cast<Instruction>(Val: OpIdx.value().get());
2075	// Make sure we are not already sinking this operand
2076	if (!Op \|\| any_of(Range&: Ops, P: [&](Use U) { return* U->get() == Op; }))
2077	continue;
2078
2079	// We are looking for a splat that can be sunk.
2080	if (!match(V: Op, P: m_Shuffle(v1: m_InsertElt(Val: m_Undef(), Elt: m_Value(), Idx: m_ZeroInt()),
2081	v2: m_Undef(), mask: m_ZeroMask ())))
2082	continue;
2083
2084	// Don't sink i1 splats.
2085	if (cast<VectorType>(Val: Op->getType())->getElementType()->isIntegerTy(Bitwidth: `1`))
2086	continue;
2087
2088	// All uses of the shuffle should be sunk to avoid duplicating it across gpr
2089	// and vector registers
2090	for (Use &U : Op->uses()) {
2091	Instruction *Insn = cast<Instruction>(Val: U.getUser());
2092	if (!canSplatOperand(I: Insn, Operand: U.getOperandNo()))
2093	return false;
2094	}
2095
2096	Ops.push_back(Elt: &Op->getOperandUse(i: `0`));
2097	Ops.push_back(Elt: &OpIdx.value());
2098	}
2099	return true;
2100	}
2101
2102	bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
2103	unsigned Opc = VecOp.getOpcode();
2104
2105	// Assume target opcodes can't be scalarized.
2106	// TODO - do we have any exceptions?
2107	if (Opc >= ISD::BUILTIN_OP_END)
2108	return false;
2109
2110	// If the vector op is not supported, try to convert to scalar.
2111	EVT VecVT = VecOp.getValueType();
2112	if (!isOperationLegalOrCustomOrPromote(Op: Opc, VT: VecVT))
2113	return true;
2114
2115	// If the vector op is supported, but the scalar op is not, the transform may
2116	// not be worthwhile.
2117	// Permit a vector binary operation can be converted to scalar binary
2118	// operation which is custom lowered with illegal type.
2119	EVT ScalarVT = VecVT.getScalarType();
2120	return isOperationLegalOrCustomOrPromote(Op: Opc, VT: ScalarVT) \|\|
2121	isOperationCustom(Op: Opc, VT: ScalarVT);
2122	}
2123
2124	bool RISCVTargetLowering::isOffsetFoldingLegal(
2125	const GlobalAddressSDNode GA) const* {
2126	// In order to maximise the opportunity for common subexpression elimination,
2127	// keep a separate ADD node for the global address offset instead of folding
2128	// it in the global address node. Later peephole optimisations may choose to
2129	// fold it back in when profitable.
2130	return false;
2131	}
2132
2133	// Return one of the followings:
2134	// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2135	// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2136	// positive counterpart, which will be materialized from the first returned
2137	// element. The second returned element indicated that there should be a FNEG
2138	// followed.
2139	// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2140	std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
2141	EVT VT) const {
2142	if (!Subtarget.hasStdExtZfa())
2143	return std::make_pair(x: -`1`, y: false);
2144
2145	bool IsSupportedVT = false;
2146	if (VT == MVT::f16) {
2147	IsSupportedVT = Subtarget.hasStdExtZfh() \|\| Subtarget.hasStdExtZvfh();
2148	} else if (VT == MVT::f32) {
2149	IsSupportedVT = true;
2150	} else if (VT == MVT::f64) {
2151	assert(Subtarget.hasStdExtD() && "Expect D extension");
2152	IsSupportedVT = true;
2153	}
2154
2155	if (!IsSupportedVT)
2156	return std::make_pair(x: -`1`, y: false);
2157
2158	int Index = RISCVLoadFPImm::getLoadFPImm(FPImm: Imm);
2159	if (Index < `0` && Imm.isNegative())
2160	// Try the combination of its positive counterpart + FNEG.
2161	return std::make_pair(x: RISCVLoadFPImm::getLoadFPImm(FPImm: -Imm), y: true);
2162	else
2163	return std::make_pair(x&: Index, y: false);
2164	}
2165
2166	bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
2167	bool ForCodeSize) const {
2168	bool IsLegalVT = false;
2169	if (VT == MVT::f16)
2170	IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2171	else if (VT == MVT::f32)
2172	IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2173	else if (VT == MVT::f64)
2174	IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2175	else if (VT == MVT::bf16)
2176	IsLegalVT = Subtarget.hasStdExtZfbfmin();
2177
2178	if (!IsLegalVT)
2179	return false;
2180
2181	if (getLegalZfaFPImm(Imm, VT).first >= `0`)
2182	return true;
2183
2184	// Cannot create a 64 bit floating-point immediate value for rv32.
2185	if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2186	// td can handle +0.0 or -0.0 already.
2187	// -0.0 can be created by fmv + fneg.
2188	return Imm.isZero();
2189	}
2190
2191	// Special case: fmv + fneg
2192	if (Imm.isNegZero())
2193	return true;
2194
2195	// Building an integer and then converting requires a fmv at the end of
2196	// the integer sequence.
2197	const int Cost =
2198	`1` + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
2199	Subtarget);
2200	return Cost <= FPImmCost;
2201	}
2202
2203	// TODO: This is very conservative.
2204	bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
2205	unsigned Index) const {
2206	if (!isOperationLegalOrCustom(Op: ISD::EXTRACT_SUBVECTOR, VT: ResVT))
2207	return false;
2208
2209	// Only support extracting a fixed from a fixed vector for now.
2210	if (ResVT.isScalableVector() \|\| SrcVT.isScalableVector())
2211	return false;
2212
2213	EVT EltVT = ResVT.getVectorElementType();
2214	assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2215
2216	// The smallest type we can slide is i8.
2217	// TODO: We can extract index 0 from a mask vector without a slide.
2218	if (EltVT == MVT::i1)
2219	return false;
2220
2221	unsigned ResElts = ResVT.getVectorNumElements();
2222	unsigned SrcElts = SrcVT.getVectorNumElements();
2223
2224	unsigned MinVLen = Subtarget.getRealMinVLen();
2225	unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2226
2227	// If we're extracting only data from the first VLEN bits of the source
2228	// then we can always do this with an m1 vslidedown.vx. Restricting the
2229	// Index ensures we can use a vslidedown.vi.
2230	// TODO: We can generalize this when the exact VLEN is known.
2231	if (Index + ResElts <= MinVLMAX && Index < `31`)
2232	return true;
2233
2234	// Convervatively only handle extracting half of a vector.
2235	// TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2236	// a cheap extract. However, this case is important in practice for
2237	// shuffled extracts of longer vectors. How resolve?
2238	if ((ResElts * `2`) != SrcElts)
2239	return false;
2240
2241	// Slide can support arbitrary index, but we only treat vslidedown.vi as
2242	// cheap.
2243	if (Index >= `32`)
2244	return false;
2245
2246	// TODO: We can do arbitrary slidedowns, but for now only support extracting
2247	// the upper half of a vector until we have more test coverage.
2248	return Index == `0` \|\| Index == ResElts;
2249	}
2250
2251	MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
2252	CallingConv::ID CC,
2253	EVT VT) const {
2254	// Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2255	// We might still end up using a GPR but that will be decided based on ABI.
2256	if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2257	!Subtarget.hasStdExtZfhminOrZhinxmin())
2258	return MVT::f32;
2259
2260	MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2261
2262	if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)
2263	return MVT::i64;
2264
2265	return PartVT;
2266	}
2267
2268	unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
2269	CallingConv::ID CC,
2270	EVT VT) const {
2271	// Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2272	// We might still end up using a GPR but that will be decided based on ABI.
2273	if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2274	!Subtarget.hasStdExtZfhminOrZhinxmin())
2275	return `1`;
2276
2277	return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2278	}
2279
2280	unsigned RISCVTargetLowering::getVectorTypeBreakdownForCallingConv(
2281	LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2282	unsigned &NumIntermediates, MVT &RegisterVT) const {
2283	unsigned NumRegs = TargetLowering::getVectorTypeBreakdownForCallingConv(
2284	Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2285
2286	if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)
2287	IntermediateVT = MVT::i64;
2288
2289	if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)
2290	RegisterVT = MVT::i64;
2291
2292	return NumRegs;
2293	}
2294
2295	// Changes the condition code and swaps operands if necessary, so the SetCC
2296	// operation matches one of the comparisons supported directly by branches
2297	// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2298	// with 1/-1.
2299	static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2300	ISD::CondCode &CC, SelectionDAG &DAG) {
2301	// If this is a single bit test that can't be handled by ANDI, shift the
2302	// bit to be tested to the MSB and perform a signed compare with 0.
2303	if (isIntEqualitySetCC(Code: CC) && isNullConstant(V: RHS) &&
2304	LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2305	isa<ConstantSDNode>(Val: LHS.getOperand(i: `1`))) {
2306	uint64_t Mask = LHS.getConstantOperandVal(i: `1`);
2307	if ((isPowerOf2_64(Value: Mask) \|\| isMask_64(Value: Mask)) && !isInt<`12`>(x: Mask)) {
2308	unsigned ShAmt = `0`;
2309	if (isPowerOf2_64(Value: Mask)) {
2310	CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
2311	ShAmt = LHS.getValueSizeInBits() - `1` - Log2_64(Value: Mask);
2312	} else {
2313	ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Value: Mask);
2314	}
2315
2316	LHS = LHS.getOperand(i: `0`);
2317	if (ShAmt != `0`)
2318	LHS = DAG.getNode(Opcode: ISD::SHL, DL, VT: LHS.getValueType(), N1: LHS,
2319	N2: DAG.getConstant(Val: ShAmt, DL, VT: LHS.getValueType()));
2320	return;
2321	}
2322	}
2323
2324	if (auto *RHSC = dyn_cast<ConstantSDNode>(Val&: RHS)) {
2325	int64_t C = RHSC->getSExtValue();
2326	switch (CC) {
2327	default: break;
2328	case ISD::SETGT:
2329	// Convert X > -1 to X >= 0.
2330	if (C == -`1`) {
2331	RHS = DAG.getConstant(Val: `0`, DL, VT: RHS.getValueType());
2332	CC = ISD::SETGE;
2333	return;
2334	}
2335	break;
2336	case ISD::SETLT:
2337	// Convert X < 1 to 0 >= X.
2338	if (C == `1`) {
2339	RHS = LHS;
2340	LHS = DAG.getConstant(Val: `0`, DL, VT: RHS.getValueType());
2341	CC = ISD::SETGE;
2342	return;
2343	}
2344	break;
2345	}
2346	}
2347
2348	switch (CC) {
2349	default:
2350	break;
2351	case ISD::SETGT:
2352	case ISD::SETLE:
2353	case ISD::SETUGT:
2354	case ISD::SETULE:
2355	CC = ISD::getSetCCSwappedOperands(Operation: CC);
2356	std::swap(a&: LHS, b&: RHS);
2357	break;
2358	}
2359	}
2360
2361	RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {
2362	assert(VT.isScalableVector() && "Expecting a scalable vector type");
2363	unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2364	if (VT.getVectorElementType() == MVT::i1)
2365	KnownSize *= `8`;
2366
2367	switch (KnownSize) {
2368	default:
2369	llvm_unreachable("Invalid LMUL.");
2370	case `8`:
2371	return RISCVII::VLMUL::LMUL_F8;
2372	case `16`:
2373	return RISCVII::VLMUL::LMUL_F4;
2374	case `32`:
2375	return RISCVII::VLMUL::LMUL_F2;
2376	case `64`:
2377	return RISCVII::VLMUL::LMUL_1;
2378	case `128`:
2379	return RISCVII::VLMUL::LMUL_2;
2380	case `256`:
2381	return RISCVII::VLMUL::LMUL_4;
2382	case `512`:
2383	return RISCVII::VLMUL::LMUL_8;
2384	}
2385	}
2386
2387	unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) {
2388	switch (LMul) {
2389	default:
2390	llvm_unreachable("Invalid LMUL.");
2391	case RISCVII::VLMUL::LMUL_F8:
2392	case RISCVII::VLMUL::LMUL_F4:
2393	case RISCVII::VLMUL::LMUL_F2:
2394	case RISCVII::VLMUL::LMUL_1:
2395	return RISCV::VRRegClassID;
2396	case RISCVII::VLMUL::LMUL_2:
2397	return RISCV::VRM2RegClassID;
2398	case RISCVII::VLMUL::LMUL_4:
2399	return RISCV::VRM4RegClassID;
2400	case RISCVII::VLMUL::LMUL_8:
2401	return RISCV::VRM8RegClassID;
2402	}
2403	}
2404
2405	unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2406	RISCVII::VLMUL LMUL = getLMUL(VT);
2407	if (LMUL == RISCVII::VLMUL::LMUL_F8 \|\|
2408	LMUL == RISCVII::VLMUL::LMUL_F4 \|\|
2409	LMUL == RISCVII::VLMUL::LMUL_F2 \|\|
2410	LMUL == RISCVII::VLMUL::LMUL_1) {
2411	static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + `7`,
2412	"Unexpected subreg numbering");
2413	return RISCV::sub_vrm1_0 + Index;
2414	}
2415	if (LMUL == RISCVII::VLMUL::LMUL_2) {
2416	static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + `3`,
2417	"Unexpected subreg numbering");
2418	return RISCV::sub_vrm2_0 + Index;
2419	}
2420	if (LMUL == RISCVII::VLMUL::LMUL_4) {
2421	static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + `1`,
2422	"Unexpected subreg numbering");
2423	return RISCV::sub_vrm4_0 + Index;
2424	}
2425	llvm_unreachable("Invalid vector type.");
2426	}
2427
2428	unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
2429	if (VT.getVectorElementType() == MVT::i1)
2430	return RISCV::VRRegClassID;
2431	return getRegClassIDForLMUL(LMul: getLMUL(VT));
2432	}
2433
2434	// Attempt to decompose a subvector insert/extract between VecVT and
2435	// SubVecVT via subregister indices. Returns the subregister index that
2436	// can perform the subvector insert/extract with the given element index, as
2437	// well as the index corresponding to any leftover subvectors that must be
2438	// further inserted/extracted within the register class for SubVecVT.
2439	std::pair<unsigned, unsigned>
2440	RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2441	MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2442	const RISCVRegisterInfo *TRI) {
2443	static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2444	RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2445	RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2446	"Register classes not ordered");
2447	unsigned VecRegClassID = getRegClassIDForVecVT(VT: VecVT);
2448	unsigned SubRegClassID = getRegClassIDForVecVT(VT: SubVecVT);
2449	// Try to compose a subregister index that takes us from the incoming
2450	// LMUL>1 register class down to the outgoing one. At each step we half
2451	// the LMUL:
2452	// nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2453	// Note that this is not guaranteed to find a subregister index, such as
2454	// when we are extracting from one VR type to another.
2455	unsigned SubRegIdx = RISCV::NoSubRegister;
2456	for (const unsigned RCID :
2457	{RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2458	if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2459	VecVT = VecVT.getHalfNumVectorElementsVT();
2460	bool IsHi =
2461	InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2462	SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2463	getSubregIndexByMVT(VecVT, IsHi));
2464	if (IsHi)
2465	InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2466	}
2467	return {SubRegIdx, InsertExtractIdx};
2468	}
2469
2470	// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2471	// stores for those types.
2472	bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2473	return !Subtarget.useRVVForFixedLengthVectors() \|\|
2474	(VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2475	}
2476
2477	bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const {
2478	if (!ScalarTy.isSimple())
2479	return false;
2480	switch (ScalarTy.getSimpleVT().SimpleTy) {
2481	case MVT::iPTR:
2482	return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2483	case MVT::i8:
2484	case MVT::i16:
2485	case MVT::i32:
2486	return true;
2487	case MVT::i64:
2488	return Subtarget.hasVInstructionsI64();
2489	case MVT::f16:
2490	return Subtarget.hasVInstructionsF16();
2491	case MVT::f32:
2492	return Subtarget.hasVInstructionsF32();
2493	case MVT::f64:
2494	return Subtarget.hasVInstructionsF64();
2495	default:
2496	return false;
2497	}
2498	}
2499
2500
2501	unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2502	return NumRepeatedDivisors;
2503	}
2504
2505	static SDValue getVLOperand(SDValue Op) {
2506	assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
2507	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2508	"Unexpected opcode");
2509	bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2510	unsigned IntNo = Op.getConstantOperandVal(i: HasChain ? `1` : `0`);
2511	const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
2512	RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2513	if (!II)
2514	return SDValue ();
2515	return Op.getOperand(i: II->VLOperand + `1` + HasChain);
2516	}
2517
2518	static bool useRVVForFixedLengthVectorVT(MVT VT,
2519	const RISCVSubtarget &Subtarget) {
2520	assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2521	if (!Subtarget.useRVVForFixedLengthVectors())
2522	return false;
2523
2524	// We only support a set of vector types with a consistent maximum fixed size
2525	// across all supported vector element types to avoid legalization issues.
2526	// Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2527	// fixed-length vector type we support is 1024 bytes.
2528	if (VT.getFixedSizeInBits() > `1024` * `8`)
2529	return false;
2530
2531	unsigned MinVLen = Subtarget.getRealMinVLen();
2532
2533	MVT EltVT = VT.getVectorElementType();
2534
2535	// Don't use RVV for vectors we cannot scalarize if required.
2536	switch (EltVT.SimpleTy) {
2537	// i1 is supported but has different rules.
2538	default:
2539	return false;
2540	case MVT::i1:
2541	// Masks can only use a single register.
2542	if (VT.getVectorNumElements() > MinVLen)
2543	return false;
2544	MinVLen /= `8`;
2545	break;
2546	case MVT::i8:
2547	case MVT::i16:
2548	case MVT::i32:
2549	break;
2550	case MVT::i64:
2551	if (!Subtarget.hasVInstructionsI64())
2552	return false;
2553	break;
2554	case MVT::f16:
2555	if (!Subtarget.hasVInstructionsF16Minimal())
2556	return false;
2557	break;
2558	case MVT::f32:
2559	if (!Subtarget.hasVInstructionsF32())
2560	return false;
2561	break;
2562	case MVT::f64:
2563	if (!Subtarget.hasVInstructionsF64())
2564	return false;
2565	break;
2566	}
2567
2568	// Reject elements larger than ELEN.
2569	if (EltVT.getSizeInBits() > Subtarget.getELen())
2570	return false;
2571
2572	unsigned LMul = divideCeil(Numerator: VT.getSizeInBits(), Denominator: MinVLen);
2573	// Don't use RVV for types that don't fit.
2574	if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2575	return false;
2576
2577	// TODO: Perhaps an artificial restriction, but worth having whilst getting
2578	// the base fixed length RVV support in place.
2579	if (!VT.isPow2VectorType())
2580	return false;
2581
2582	return true;
2583	}
2584
2585	bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2586	return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2587	}
2588
2589	// Return the largest legal scalable vector type that matches VT's element type.
2590	static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
2591	const RISCVSubtarget &Subtarget) {
2592	// This may be called before legal types are setup.
2593	assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) \|\|
2594	useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2595	"Expected legal fixed length vector!");
2596
2597	unsigned MinVLen = Subtarget.getRealMinVLen();
2598	unsigned MaxELen = Subtarget.getELen();
2599
2600	MVT EltVT = VT.getVectorElementType();
2601	switch (EltVT.SimpleTy) {
2602	default:
2603	llvm_unreachable("unexpected element type for RVV container");
2604	case MVT::i1:
2605	case MVT::i8:
2606	case MVT::i16:
2607	case MVT::i32:
2608	case MVT::i64:
2609	case MVT::f16:
2610	case MVT::f32:
2611	case MVT::f64: {
2612	// We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2613	// narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2614	// each fractional LMUL we support SEW between 8 and LMULELEN.*
2615	unsigned NumElts =
2616	(VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
2617	NumElts = std::max(a: NumElts, b: RISCV::RVVBitsPerBlock / MaxELen);
2618	assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2619	return MVT::getScalableVectorVT(VT: EltVT, NumElements: NumElts);
2620	}
2621	}
2622	}
2623
2624	static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
2625	const RISCVSubtarget &Subtarget) {
2626	return getContainerForFixedLengthVector(TLI: DAG.getTargetLoweringInfo(), VT,
2627	Subtarget);
2628	}
2629
2630	MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
2631	return ::getContainerForFixedLengthVector(TLI: *this, VT, Subtarget: getSubtarget());
2632	}
2633
2634	// Grow V to consume an entire RVV register.
2635	static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
2636	const RISCVSubtarget &Subtarget) {
2637	assert(VT.isScalableVector() &&
2638	"Expected to convert into a scalable vector!");
2639	assert(V.getValueType().isFixedLengthVector() &&
2640	"Expected a fixed length vector operand!");
2641	SDLoc DL(V);
2642	SDValue Zero = DAG.getVectorIdxConstant(Val: `0`, DL);
2643	return DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: DAG.getUNDEF(VT), N2: V, N3: Zero);
2644	}
2645
2646	// Shrink V so it's just big enough to maintain a VT's worth of data.
2647	static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
2648	const RISCVSubtarget &Subtarget) {
2649	assert(VT.isFixedLengthVector() &&
2650	"Expected to convert into a fixed length vector!");
2651	assert(V.getValueType().isScalableVector() &&
2652	"Expected a scalable vector operand!");
2653	SDLoc DL(V);
2654	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: Subtarget.getXLenVT());
2655	return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: V, N2: Zero);
2656	}
2657
2658	/// Return the type of the mask type suitable for masking the provided
2659	/// vector type. This is simply an i1 element type vector of the same
2660	/// (possibly scalable) length.
2661	static MVT getMaskTypeFor(MVT VecVT) {
2662	assert(VecVT.isVector());
2663	ElementCount EC = VecVT.getVectorElementCount();
2664	return MVT::getVectorVT(MVT::i1, EC);
2665	}
2666
2667	/// Creates an all ones mask suitable for masking a vector of type VecTy with
2668	/// vector length VL. .
2669	static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2670	SelectionDAG &DAG) {
2671	MVT MaskVT = getMaskTypeFor(VecVT);
2672	return DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT: MaskVT, Operand: VL);
2673	}
2674
2675	static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2676	SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2677	// If we know the exact VLEN, and our VL is exactly equal to VLMAX,
2678	// canonicalize the representation. InsertVSETVLI will pick the immediate
2679	// encoding later if profitable.
2680	const auto [MinVLMAX, MaxVLMAX] =
2681	RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
2682	if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX)
2683	return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2684
2685	return DAG.getConstant(Val: NumElts, DL, VT: Subtarget.getXLenVT());
2686	}
2687
2688	static std::pair<SDValue, SDValue>
2689	getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG,
2690	const RISCVSubtarget &Subtarget) {
2691	assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2692	SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2693	SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2694	return {Mask, VL};
2695	}
2696
2697	static std::pair<SDValue, SDValue>
2698	getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2699	SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2700	assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2701	SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget);
2702	SDValue Mask = getAllOnesMask(VecVT: ContainerVT, VL, DL, DAG);
2703	return {Mask, VL};
2704	}
2705
2706	// Gets the two common "VL" operands: an all-ones mask and the vector length.
2707	// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2708	// the vector type that the fixed-length vector is contained in. Otherwise if
2709	// VecVT is scalable, then ContainerVT should be the same as VecVT.
2710	static std::pair<SDValue, SDValue>
2711	getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2712	const RISCVSubtarget &Subtarget) {
2713	if (VecVT.isFixedLengthVector())
2714	return getDefaultVLOps(NumElts: VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2715	Subtarget);
2716	assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2717	return getDefaultScalableVLOps(VecVT: ContainerVT, DL, DAG, Subtarget);
2718	}
2719
2720	SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL,
2721	SelectionDAG &DAG) const {
2722	assert(VecVT.isScalableVector() && "Expected scalable vector");
2723	return DAG.getElementCount(DL, VT: Subtarget.getXLenVT(),
2724	EC: VecVT.getVectorElementCount());
2725	}
2726
2727	std::pair<unsigned, unsigned>
2728	RISCVTargetLowering::computeVLMAXBounds(MVT VecVT,
2729	const RISCVSubtarget &Subtarget) {
2730	assert(VecVT.isScalableVector() && "Expected scalable vector");
2731
2732	unsigned EltSize = VecVT.getScalarSizeInBits();
2733	unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2734
2735	unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2736	unsigned MaxVLMAX =
2737	RISCVTargetLowering::computeVLMAX(VectorBits: VectorBitsMax, EltSize, MinSize);
2738
2739	unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2740	unsigned MinVLMAX =
2741	RISCVTargetLowering::computeVLMAX(VectorBits: VectorBitsMin, EltSize, MinSize);
2742
2743	return std::make_pair(x&: MinVLMAX, y&: MaxVLMAX);
2744	}
2745
2746	// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2747	// of either is (currently) supported. This can get us into an infinite loop
2748	// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2749	// as a ..., etc.
2750	// Until either (or both) of these can reliably lower any node, reporting that
2751	// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2752	// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2753	// which is not desirable.
2754	bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
2755	EVT VT, unsigned DefinedValues) const {
2756	return false;
2757	}
2758
2759	InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const {
2760	// TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2761	// implementation-defined.
2762	if (!VT.isVector())
2763	return InstructionCost::getInvalid();
2764	unsigned DLenFactor = Subtarget.getDLenFactor();
2765	unsigned Cost;
2766	if (VT.isScalableVector()) {
2767	unsigned LMul;
2768	bool Fractional;
2769	std::tie(args&: LMul, args&: Fractional) =
2770	RISCVVType::decodeVLMUL(VLMUL: RISCVTargetLowering::getLMUL(VT));
2771	if (Fractional)
2772	Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : `1`;
2773	else
2774	Cost = (LMul * DLenFactor);
2775	} else {
2776	Cost = divideCeil(Numerator: VT.getSizeInBits(), Denominator: Subtarget.getRealMinVLen() / DLenFactor);
2777	}
2778	return Cost;
2779	}
2780
2781
2782	/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2783	/// is generally quadratic in the number of vreg implied by LMUL. Note that
2784	/// operand (index and possibly mask) are handled separately.
2785	InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const {
2786	return getLMULCost(VT) * getLMULCost(VT);
2787	}
2788
2789	/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2790	/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2791	/// or may track the vrgather.vv cost. It is implementation-dependent.
2792	InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const {
2793	return getLMULCost(VT);
2794	}
2795
2796	/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2797	/// for the type VT. (This does not cover the vslide1up or vslide1down
2798	/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2799	/// or may track the vrgather.vv cost. It is implementation-dependent.
2800	InstructionCost RISCVTargetLowering::getVSlideVXCost(MVT VT) const {
2801	return getLMULCost(VT);
2802	}
2803
2804	/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2805	/// for the type VT. (This does not cover the vslide1up or vslide1down
2806	/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2807	/// or may track the vrgather.vv cost. It is implementation-dependent.
2808	InstructionCost RISCVTargetLowering::getVSlideVICost(MVT VT) const {
2809	return getLMULCost(VT);
2810	}
2811
2812	static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
2813	const RISCVSubtarget &Subtarget) {
2814	// RISC-V FP-to-int conversions saturate to the destination register size, but
2815	// don't produce 0 for nan. We can use a conversion instruction and fix the
2816	// nan case with a compare and a select.
2817	SDValue Src = Op.getOperand(i: `0`);
2818
2819	MVT DstVT = Op.getSimpleValueType();
2820	EVT SatVT = cast<VTSDNode>(Val: Op.getOperand(i: `1`))->getVT();
2821
2822	bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2823
2824	if (!DstVT.isVector()) {
2825	// For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2826	// the result.
2827	if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) \|\|
2828	Src.getValueType() == MVT::bf16) {
2829	Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2830	}
2831
2832	unsigned Opc;
2833	if (SatVT == DstVT)
2834	Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2835	else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2836	Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
2837	else
2838	return SDValue ();
2839	// FIXME: Support other SatVTs by clamping before or after the conversion.
2840
2841	SDLoc DL(Op);
2842	SDValue FpToInt = DAG.getNode(
2843	Opcode: Opc, DL, VT: DstVT, N1: Src,
2844	N2: DAG.getTargetConstant(Val: RISCVFPRndMode::RTZ, DL, VT: Subtarget.getXLenVT()));
2845
2846	if (Opc == RISCVISD::FCVT_WU_RV64)
2847	FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2848
2849	SDValue ZeroInt = DAG.getConstant(Val: `0`, DL, VT: DstVT);
2850	return DAG.getSelectCC(DL, LHS: Src, RHS: Src, True: ZeroInt, False: FpToInt,
2851	Cond: ISD::CondCode::SETUO);
2852	}
2853
2854	// Vectors.
2855
2856	MVT DstEltVT = DstVT.getVectorElementType();
2857	MVT SrcVT = Src.getSimpleValueType();
2858	MVT SrcEltVT = SrcVT.getVectorElementType();
2859	unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2860	unsigned DstEltSize = DstEltVT.getSizeInBits();
2861
2862	// Only handle saturating to the destination type.
2863	if (SatVT != DstEltVT)
2864	return SDValue ();
2865
2866	// FIXME: Don't support narrowing by more than 1 steps for now.
2867	if (SrcEltSize > (`2` * DstEltSize))
2868	return SDValue ();
2869
2870	MVT DstContainerVT = DstVT;
2871	MVT SrcContainerVT = SrcVT;
2872	if (DstVT.isFixedLengthVector()) {
2873	DstContainerVT = getContainerForFixedLengthVector(DAG, VT: DstVT, Subtarget);
2874	SrcContainerVT = getContainerForFixedLengthVector(DAG, VT: SrcVT, Subtarget);
2875	assert(DstContainerVT.getVectorElementCount() ==
2876	SrcContainerVT.getVectorElementCount() &&
2877	"Expected same element count");
2878	Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget);
2879	}
2880
2881	SDLoc DL(Op);
2882
2883	auto [Mask, VL] = getDefaultVLOps(VecVT: DstVT, ContainerVT: DstContainerVT, DL, DAG, Subtarget);
2884
2885	SDValue IsNan = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: Mask.getValueType(),
2886	Ops: {Src, Src, DAG.getCondCode(Cond: ISD::SETNE),
2887	DAG.getUNDEF(VT: Mask.getValueType()), Mask, VL});
2888
2889	// Need to widen by more than 1 step, promote the FP type, then do a widening
2890	// convert.
2891	if (DstEltSize > (`2` * SrcEltSize)) {
2892	assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2893	MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2894	Src = DAG.getNode(Opcode: RISCVISD::FP_EXTEND_VL, DL, VT: InterVT, N1: Src, N2: Mask, N3: VL);
2895	}
2896
2897	unsigned RVVOpc =
2898	IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
2899	SDValue Res = DAG.getNode(Opcode: RVVOpc, DL, VT: DstContainerVT, N1: Src, N2: Mask, N3: VL);
2900
2901	SDValue SplatZero = DAG.getNode(
2902	Opcode: RISCVISD::VMV_V_X_VL, DL, VT: DstContainerVT, N1: DAG.getUNDEF(VT: DstContainerVT),
2903	N2: DAG.getConstant(Val: `0`, DL, VT: Subtarget.getXLenVT()), N3: VL);
2904	Res = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: DstContainerVT, N1: IsNan, N2: SplatZero,
2905	N3: Res, N4: DAG.getUNDEF(VT: DstContainerVT), N5: VL);
2906
2907	if (DstVT.isFixedLengthVector())
2908	Res = convertFromScalableVector(VT: DstVT, V: Res, DAG, Subtarget);
2909
2910	return Res;
2911	}
2912
2913	static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) {
2914	switch (Opc) {
2915	case ISD::FROUNDEVEN:
2916	case ISD::STRICT_FROUNDEVEN:
2917	case ISD::VP_FROUNDEVEN:
2918	return RISCVFPRndMode::RNE;
2919	case ISD::FTRUNC:
2920	case ISD::STRICT_FTRUNC:
2921	case ISD::VP_FROUNDTOZERO:
2922	return RISCVFPRndMode::RTZ;
2923	case ISD::FFLOOR:
2924	case ISD::STRICT_FFLOOR:
2925	case ISD::VP_FFLOOR:
2926	return RISCVFPRndMode::RDN;
2927	case ISD::FCEIL:
2928	case ISD::STRICT_FCEIL:
2929	case ISD::VP_FCEIL:
2930	return RISCVFPRndMode::RUP;
2931	case ISD::FROUND:
2932	case ISD::STRICT_FROUND:
2933	case ISD::VP_FROUND:
2934	return RISCVFPRndMode::RMM;
2935	case ISD::FRINT:
2936	return RISCVFPRndMode::DYN;
2937	}
2938
2939	return RISCVFPRndMode::Invalid;
2940	}
2941
2942	// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2943	// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2944	// the integer domain and back. Taking care to avoid converting values that are
2945	// nan or already correct.
2946	static SDValue
2947	lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
2948	const RISCVSubtarget &Subtarget) {
2949	MVT VT = Op.getSimpleValueType();
2950	assert(VT.isVector() && "Unexpected type");
2951
2952	SDLoc DL(Op);
2953
2954	SDValue Src = Op.getOperand(i: `0`);
2955
2956	MVT ContainerVT = VT;
2957	if (VT.isFixedLengthVector()) {
2958	ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2959	Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget);
2960	}
2961
2962	SDValue Mask, VL;
2963	if (Op ->isVPOpcode()) {
2964	Mask = Op.getOperand(i: `1`);
2965	if (VT.isFixedLengthVector())
2966	Mask = convertToScalableVector(VT: getMaskTypeFor(VecVT: ContainerVT), V: Mask, DAG,
2967	Subtarget);
2968	VL = Op.getOperand(i: `2`);
2969	} else {
2970	std::tie(args&: Mask, args&: VL) = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
2971	}
2972
2973	// Freeze the source since we are increasing the number of uses.
2974	Src = DAG.getFreeze(V: Src);
2975
2976	// We do the conversion on the absolute value and fix the sign at the end.
2977	SDValue Abs = DAG.getNode(Opcode: RISCVISD::FABS_VL, DL, VT: ContainerVT, N1: Src, N2: Mask, N3: VL);
2978
2979	// Determine the largest integer that can be represented exactly. This and
2980	// values larger than it don't have any fractional bits so don't need to
2981	// be converted.
2982	const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT: ContainerVT);
2983	unsigned Precision = APFloat::semanticsPrecision(FltSem);
2984	APFloat MaxVal = APFloat (FltSem);
2985	MaxVal.convertFromAPInt(Input: APInt::getOneBitSet(numBits: Precision, BitNo: Precision - `1`),
2986	/IsSigned/ false, RM: APFloat::rmNearestTiesToEven);
2987	SDValue MaxValNode =
2988	DAG.getConstantFP(Val: MaxVal, DL, VT: ContainerVT.getVectorElementType());
2989	SDValue MaxValSplat = DAG.getNode(Opcode: RISCVISD::VFMV_V_F_VL, DL, VT: ContainerVT,
2990	N1: DAG.getUNDEF(VT: ContainerVT), N2: MaxValNode, N3: VL);
2991
2992	// If abs(Src) was larger than MaxVal or nan, keep it.
2993	MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2994	Mask =
2995	DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: SetccVT,
2996	Ops: {Abs, MaxValSplat, DAG.getCondCode(Cond: ISD::SETOLT),
2997	Mask, Mask, VL});
2998
2999	// Truncate to integer and convert back to FP.
3000	MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3001	MVT XLenVT = Subtarget.getXLenVT();
3002	SDValue Truncated;
3003
3004	switch (Op.getOpcode()) {
3005	default:
3006	llvm_unreachable("Unexpected opcode");
3007	case ISD::FCEIL:
3008	case ISD::VP_FCEIL:
3009	case ISD::FFLOOR:
3010	case ISD::VP_FFLOOR:
3011	case ISD::FROUND:
3012	case ISD::FROUNDEVEN:
3013	case ISD::VP_FROUND:
3014	case ISD::VP_FROUNDEVEN:
3015	case ISD::VP_FROUNDTOZERO: {
3016	RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Opc: Op.getOpcode());
3017	assert(FRM != RISCVFPRndMode::Invalid);
3018	Truncated = DAG.getNode(Opcode: RISCVISD::VFCVT_RM_X_F_VL, DL, VT: IntVT, N1: Src, N2: Mask,
3019	N3: DAG.getTargetConstant(Val: FRM, DL, VT: XLenVT), N4: VL);
3020	break;
3021	}
3022	case ISD::FTRUNC:
3023	Truncated = DAG.getNode(Opcode: RISCVISD::VFCVT_RTZ_X_F_VL, DL, VT: IntVT, N1: Src,
3024	N2: Mask, N3: VL);
3025	break;
3026	case ISD::FRINT:
3027	case ISD::VP_FRINT:
3028	Truncated = DAG.getNode(Opcode: RISCVISD::VFCVT_X_F_VL, DL, VT: IntVT, N1: Src, N2: Mask, N3: VL);
3029	break;
3030	case ISD::FNEARBYINT:
3031	case ISD::VP_FNEARBYINT:
3032	Truncated = DAG.getNode(Opcode: RISCVISD::VFROUND_NOEXCEPT_VL, DL, VT: ContainerVT, N1: Src,
3033	N2: Mask, N3: VL);
3034	break;
3035	}
3036
3037	// VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3038	if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3039	Truncated = DAG.getNode(Opcode: RISCVISD::SINT_TO_FP_VL, DL, VT: ContainerVT, N1: Truncated,
3040	N2: Mask, N3: VL);
3041
3042	// Restore the original sign so that -0.0 is preserved.
3043	Truncated = DAG.getNode(Opcode: RISCVISD::FCOPYSIGN_VL, DL, VT: ContainerVT, N1: Truncated,
3044	N2: Src, N3: Src, N4: Mask, N5: VL);
3045
3046	if (!VT.isFixedLengthVector())
3047	return Truncated;
3048
3049	return convertFromScalableVector(VT, V: Truncated, DAG, Subtarget);
3050	}
3051
3052	// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3053	// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3054	// qNan and coverting the new source to integer and back to FP.
3055	static SDValue
3056	lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
3057	const RISCVSubtarget &Subtarget) {
3058	SDLoc DL(Op);
3059	MVT VT = Op.getSimpleValueType();
3060	SDValue Chain = Op.getOperand(i: `0`);
3061	SDValue Src = Op.getOperand(i: `1`);
3062
3063	MVT ContainerVT = VT;
3064	if (VT.isFixedLengthVector()) {
3065	ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3066	Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget);
3067	}
3068
3069	auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
3070
3071	// Freeze the source since we are increasing the number of uses.
3072	Src = DAG.getFreeze(V: Src);
3073
3074	// Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3075	MVT MaskVT = Mask.getSimpleValueType();
3076	SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL,
3077	DAG.getVTList(MaskVT, MVT::Other),
3078	{Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3079	DAG.getUNDEF(MaskVT), Mask, VL});
3080	Chain = Unorder.getValue(R: `1`);
3081	Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL,
3082	DAG.getVTList(ContainerVT, MVT::Other),
3083	{Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL});
3084	Chain = Src.getValue(R: `1`);
3085
3086	// We do the conversion on the absolute value and fix the sign at the end.
3087	SDValue Abs = DAG.getNode(Opcode: RISCVISD::FABS_VL, DL, VT: ContainerVT, N1: Src, N2: Mask, N3: VL);
3088
3089	// Determine the largest integer that can be represented exactly. This and
3090	// values larger than it don't have any fractional bits so don't need to
3091	// be converted.
3092	const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT: ContainerVT);
3093	unsigned Precision = APFloat::semanticsPrecision(FltSem);
3094	APFloat MaxVal = APFloat (FltSem);
3095	MaxVal.convertFromAPInt(Input: APInt::getOneBitSet(numBits: Precision, BitNo: Precision - `1`),
3096	/IsSigned/ false, RM: APFloat::rmNearestTiesToEven);
3097	SDValue MaxValNode =
3098	DAG.getConstantFP(Val: MaxVal, DL, VT: ContainerVT.getVectorElementType());
3099	SDValue MaxValSplat = DAG.getNode(Opcode: RISCVISD::VFMV_V_F_VL, DL, VT: ContainerVT,
3100	N1: DAG.getUNDEF(VT: ContainerVT), N2: MaxValNode, N3: VL);
3101
3102	// If abs(Src) was larger than MaxVal or nan, keep it.
3103	Mask = DAG.getNode(
3104	Opcode: RISCVISD::SETCC_VL, DL, VT: MaskVT,
3105	Ops: {Abs, MaxValSplat, DAG.getCondCode(Cond: ISD::SETOLT), Mask, Mask, VL});
3106
3107	// Truncate to integer and convert back to FP.
3108	MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3109	MVT XLenVT = Subtarget.getXLenVT();
3110	SDValue Truncated;
3111
3112	switch (Op.getOpcode()) {
3113	default:
3114	llvm_unreachable("Unexpected opcode");
3115	case ISD::STRICT_FCEIL:
3116	case ISD::STRICT_FFLOOR:
3117	case ISD::STRICT_FROUND:
3118	case ISD::STRICT_FROUNDEVEN: {
3119	RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Opc: Op.getOpcode());
3120	assert(FRM != RISCVFPRndMode::Invalid);
3121	Truncated = DAG.getNode(
3122	RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3123	{Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3124	break;
3125	}
3126	case ISD::STRICT_FTRUNC:
3127	Truncated =
3128	DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,
3129	DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3130	break;
3131	case ISD::STRICT_FNEARBYINT:
3132	Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,
3133	DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3134	Mask, VL);
3135	break;
3136	}
3137	Chain = Truncated.getValue(R: `1`);
3138
3139	// VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3140	if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3141	Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3142	DAG.getVTList(ContainerVT, MVT::Other), Chain,
3143	Truncated, Mask, VL);
3144	Chain = Truncated.getValue(R: `1`);
3145	}
3146
3147	// Restore the original sign so that -0.0 is preserved.
3148	Truncated = DAG.getNode(Opcode: RISCVISD::FCOPYSIGN_VL, DL, VT: ContainerVT, N1: Truncated,
3149	N2: Src, N3: Src, N4: Mask, N5: VL);
3150
3151	if (VT.isFixedLengthVector())
3152	Truncated = convertFromScalableVector(VT, V: Truncated, DAG, Subtarget);
3153	return DAG.getMergeValues(Ops: {Truncated, Chain}, dl: DL);
3154	}
3155
3156	static SDValue
3157	lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
3158	const RISCVSubtarget &Subtarget) {
3159	MVT VT = Op.getSimpleValueType();
3160	if (VT.isVector())
3161	return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3162
3163	if (DAG.shouldOptForSize())
3164	return SDValue ();
3165
3166	SDLoc DL(Op);
3167	SDValue Src = Op.getOperand(i: `0`);
3168
3169	// Create an integer the size of the mantissa with the MSB set. This and all
3170	// values larger than it don't have any fractional bits so don't need to be
3171	// converted.
3172	const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
3173	unsigned Precision = APFloat::semanticsPrecision(FltSem);
3174	APFloat MaxVal = APFloat (FltSem);
3175	MaxVal.convertFromAPInt(Input: APInt::getOneBitSet(numBits: Precision, BitNo: Precision - `1`),
3176	/IsSigned/ false, RM: APFloat::rmNearestTiesToEven);
3177	SDValue MaxValNode = DAG.getConstantFP(Val: MaxVal, DL, VT);
3178
3179	RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Opc: Op.getOpcode());
3180	return DAG.getNode(Opcode: RISCVISD::FROUND, DL, VT, N1: Src, N2: MaxValNode,
3181	N3: DAG.getTargetConstant(Val: FRM, DL, VT: Subtarget.getXLenVT()));
3182	}
3183
3184	// Expand vector LRINT and LLRINT by converting to the integer domain.
3185	static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG,
3186	const RISCVSubtarget &Subtarget) {
3187	MVT VT = Op.getSimpleValueType();
3188	assert(VT.isVector() && "Unexpected type");
3189
3190	SDLoc DL(Op);
3191	SDValue Src = Op.getOperand(i: `0`);
3192	MVT ContainerVT = VT;
3193
3194	if (VT.isFixedLengthVector()) {
3195	ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3196	Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget);
3197	}
3198
3199	auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
3200	SDValue Truncated =
3201	DAG.getNode(Opcode: RISCVISD::VFCVT_X_F_VL, DL, VT: ContainerVT, N1: Src, N2: Mask, N3: VL);
3202
3203	if (!VT.isFixedLengthVector())
3204	return Truncated;
3205
3206	return convertFromScalableVector(VT, V: Truncated, DAG, Subtarget);
3207	}
3208
3209	static SDValue
3210	getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget,
3211	const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
3212	SDValue Offset, SDValue Mask, SDValue VL,
3213	unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
3214	if (Merge.isUndef())
3215	Policy = RISCVII::TAIL_AGNOSTIC \| RISCVII::MASK_AGNOSTIC;
3216	SDValue PolicyOp = DAG.getTargetConstant(Val: Policy, DL, VT: Subtarget.getXLenVT());
3217	SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3218	return DAG.getNode(Opcode: RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3219	}
3220
3221	static SDValue
3222	getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3223	EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask,
3224	SDValue VL,
3225	unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
3226	if (Merge.isUndef())
3227	Policy = RISCVII::TAIL_AGNOSTIC \| RISCVII::MASK_AGNOSTIC;
3228	SDValue PolicyOp = DAG.getTargetConstant(Val: Policy, DL, VT: Subtarget.getXLenVT());
3229	SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3230	return DAG.getNode(Opcode: RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3231	}
3232
3233	static MVT getLMUL1VT(MVT VT) {
3234	assert(VT.getVectorElementType().getSizeInBits() <= `64` &&
3235	"Unexpected vector MVT");
3236	return MVT::getScalableVectorVT(
3237	VT: VT.getVectorElementType(),
3238	NumElements: RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
3239	}
3240
3241	struct VIDSequence {
3242	int64_t StepNumerator;
3243	unsigned StepDenominator;
3244	int64_t Addend;
3245	};
3246
3247	static std::optional<uint64_t> getExactInteger(const APFloat &APF,
3248	uint32_t BitWidth) {
3249	// We will use a SINT_TO_FP to materialize this constant so we should use a
3250	// signed APSInt here.
3251	APSInt ValInt(BitWidth, /IsUnsigned/ false);
3252	// We use an arbitrary rounding mode here. If a floating-point is an exact
3253	// integer (e.g., 1.0), the rounding mode does not affect the output value. If
3254	// the rounding mode changes the output value, then it is not an exact
3255	// integer.
3256	RoundingMode ArbitraryRM = RoundingMode::TowardZero;
3257	bool IsExact;
3258	// If it is out of signed integer range, it will return an invalid operation.
3259	// If it is not an exact integer, IsExact is false.
3260	if ((APF.convertToInteger(Result&: ValInt, RM: ArbitraryRM, IsExact: &IsExact) ==
3261	APFloatBase::opInvalidOp) \|\|
3262	!IsExact)
3263	return std::nullopt;
3264	return ValInt.extractBitsAsZExtValue(numBits: BitWidth, bitPosition: `0`);
3265	}
3266
3267	// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2S,...,X+(N-1)S]
3268	// to the (non-zero) step S and start value X. This can be then lowered as the
3269	// RVV sequence (VID S) + X, for example.*
3270	// The step S is represented as an integer numerator divided by a positive
3271	// denominator. Note that the implementation currently only identifies
3272	// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3273	// cannot detect 2/3, for example.
3274	// Note that this method will also match potentially unappealing index
3275	// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3276	// determine whether this is worth generating code for.
3277	static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3278	unsigned EltSizeInBits) {
3279	assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3280	if (!cast<BuildVectorSDNode>(Val&: Op)->isConstant())
3281	return std::nullopt;
3282	bool IsInteger = Op.getValueType().isInteger();
3283
3284	std::optional<unsigned> SeqStepDenom;
3285	std::optional<int64_t> SeqStepNum, SeqAddend;
3286	std::optional<std::pair<uint64_t, unsigned>> PrevElt;
3287	assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3288
3289	// First extract the ops into a list of constant integer values. This may not
3290	// be possible for floats if they're not all representable as integers.
3291	SmallVector<std::optional<uint64_t>> Elts(Op.getNumOperands());
3292	const unsigned OpSize = Op.getScalarValueSizeInBits();
3293	for (auto [Idx, Elt] : enumerate(First: Op ->op_values())) {
3294	if (Elt.isUndef()) {
3295	Elts [Idx] = std::nullopt;
3296	continue;
3297	}
3298	if (IsInteger) {
3299	Elts [Idx] = Elt ->getAsZExtVal() & maskTrailingOnes<uint64_t>(N: OpSize);
3300	} else {
3301	auto ExactInteger =
3302	getExactInteger(APF: cast<ConstantFPSDNode>(Val: Elt)->getValueAPF(), BitWidth: OpSize);
3303	if (!ExactInteger)
3304	return std::nullopt;
3305	Elts [Idx] = *ExactInteger;
3306	}
3307	}
3308
3309	for (auto [Idx, Elt] : enumerate(First&: Elts)) {
3310	// Assume undef elements match the sequence; we just have to be careful
3311	// when interpolating across them.
3312	if (!Elt)
3313	continue;
3314
3315	if (PrevElt) {
3316	// Calculate the step since the last non-undef element, and ensure
3317	// it's consistent across the entire sequence.
3318	unsigned IdxDiff = Idx - PrevElt ->second;
3319	int64_t ValDiff = SignExtend64(X: *Elt - PrevElt ->first, B: EltSizeInBits);
3320
3321	// A zero-value value difference means that we're somewhere in the middle
3322	// of a fractional step, e.g. <0,0,0,0,1,1,1,1>. Wait until we notice a*
3323	// step change before evaluating the sequence.
3324	if (ValDiff == `0`)
3325	continue;
3326
3327	int64_t Remainder = ValDiff % IdxDiff;
3328	// Normalize the step if it's greater than 1.
3329	if (Remainder != ValDiff) {
3330	// The difference must cleanly divide the element span.
3331	if (Remainder != `0`)
3332	return std::nullopt;
3333	ValDiff /= IdxDiff;
3334	IdxDiff = `1`;
3335	}
3336
3337	if (!SeqStepNum)
3338	SeqStepNum = ValDiff;
3339	else if (ValDiff != SeqStepNum)
3340	return std::nullopt;
3341
3342	if (!SeqStepDenom)
3343	SeqStepDenom = IdxDiff;
3344	else if (IdxDiff != *SeqStepDenom)
3345	return std::nullopt;
3346	}
3347
3348	// Record this non-undef element for later.
3349	if (!PrevElt \|\| PrevElt ->first != *Elt)
3350	PrevElt = std::make_pair(x&: *Elt, y&: Idx);
3351	}
3352
3353	// We need to have logged a step for this to count as a legal index sequence.
3354	if (!SeqStepNum \|\| !SeqStepDenom)
3355	return std::nullopt;
3356
3357	// Loop back through the sequence and validate elements we might have skipped
3358	// while waiting for a valid step. While doing this, log any sequence addend.
3359	for (auto [Idx, Elt] : enumerate(First&: Elts)) {
3360	if (!Elt)
3361	continue;
3362	uint64_t ExpectedVal =
3363	(int64_t)(Idx * (uint64_t)SeqStepNum) / SeqStepDenom;
3364	int64_t Addend = SignExtend64(X: *Elt - ExpectedVal, B: EltSizeInBits);
3365	if (!SeqAddend)
3366	SeqAddend = Addend;
3367	else if (Addend != SeqAddend)
3368	return std::nullopt;
3369	}
3370
3371	assert(SeqAddend && "Must have an addend if we have a step");
3372
3373	return VIDSequence{.StepNumerator: SeqStepNum, .StepDenominator: SeqStepDenom, .Addend: *SeqAddend};
3374	}
3375
3376	// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3377	// and lower it as a VRGATHER_VX_VL from the source vector.
3378	static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3379	SelectionDAG &DAG,
3380	const RISCVSubtarget &Subtarget) {
3381	if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3382	return SDValue ();
3383	SDValue Vec = SplatVal.getOperand(i: `0`);
3384	// Only perform this optimization on vectors of the same size for simplicity.
3385	// Don't perform this optimization for i1 vectors.
3386	// FIXME: Support i1 vectors, maybe by promoting to i8?
3387	if (Vec.getValueType() != VT \|\| VT.getVectorElementType() == MVT::i1)
3388	return SDValue ();
3389	SDValue Idx = SplatVal.getOperand(i: `1`);
3390	// The index must be a legal type.
3391	if (Idx.getValueType() != Subtarget.getXLenVT())
3392	return SDValue ();
3393
3394	MVT ContainerVT = VT;
3395	if (VT.isFixedLengthVector()) {
3396	ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3397	Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
3398	}
3399
3400	auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
3401
3402	SDValue Gather = DAG.getNode(Opcode: RISCVISD::VRGATHER_VX_VL, DL, VT: ContainerVT, N1: Vec,
3403	N2: Idx, N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL);
3404
3405	if (!VT.isFixedLengthVector())
3406	return Gather;
3407
3408	return convertFromScalableVector(VT, V: Gather, DAG, Subtarget);
3409	}
3410
3411
3412	/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3413	/// which constitute a large proportion of the elements. In such cases we can
3414	/// splat a vector with the dominant element and make up the shortfall with
3415	/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3416	/// Note that this includes vectors of 2 elements by association. The
3417	/// upper-most element is the "dominant" one, allowing us to use a splat to
3418	/// "insert" the upper element, and an insert of the lower element at position
3419	/// 0, which improves codegen.
3420	static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG,
3421	const RISCVSubtarget &Subtarget) {
3422	MVT VT = Op.getSimpleValueType();
3423	assert(VT.isFixedLengthVector() && "Unexpected vector!");
3424
3425	MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3426
3427	SDLoc DL(Op);
3428	auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
3429
3430	MVT XLenVT = Subtarget.getXLenVT();
3431	unsigned NumElts = Op.getNumOperands();
3432
3433	SDValue DominantValue;
3434	unsigned MostCommonCount = `0`;
3435	DenseMap<SDValue, unsigned> ValueCounts;
3436	unsigned NumUndefElts =
3437	count_if(Range: Op ->op_values(), P: [](const SDValue &V) { return V.isUndef(); });
3438
3439	// Track the number of scalar loads we know we'd be inserting, estimated as
3440	// any non-zero floating-point constant. Other kinds of element are either
3441	// already in registers or are materialized on demand. The threshold at which
3442	// a vector load is more desirable than several scalar materializion and
3443	// vector-insertion instructions is not known.
3444	unsigned NumScalarLoads = `0`;
3445
3446	for (SDValue V : Op ->op_values()) {
3447	if (V.isUndef())
3448	continue;
3449
3450	ValueCounts.insert(KV: std::make_pair(x&: V, y: `0`));
3451	unsigned &Count = ValueCounts [V];
3452	if (`0` == Count)
3453	if (auto *CFP = dyn_cast<ConstantFPSDNode>(Val&: V))
3454	NumScalarLoads += !CFP->isExactlyValue(V: +`0.0`);
3455
3456	// Is this value dominant? In case of a tie, prefer the highest element as
3457	// it's cheaper to insert near the beginning of a vector than it is at the
3458	// end.
3459	if (++Count >= MostCommonCount) {
3460	DominantValue = V;
3461	MostCommonCount = Count;
3462	}
3463	}
3464
3465	assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3466	unsigned NumDefElts = NumElts - NumUndefElts;
3467	unsigned DominantValueCountThreshold = NumDefElts <= `2` ? `0` : NumDefElts - `2`;
3468
3469	// Don't perform this optimization when optimizing for size, since
3470	// materializing elements and inserting them tends to cause code bloat.
3471	if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3472	(NumElts != `2` \|\| ISD::isBuildVectorOfConstantSDNodes(N: Op.getNode())) &&
3473	((MostCommonCount > DominantValueCountThreshold) \|\|
3474	(ValueCounts.size() <= Log2_32(Value: NumDefElts)))) {
3475	// Start by splatting the most common element.
3476	SDValue Vec = DAG.getSplatBuildVector(VT, DL, Op: DominantValue);
3477
3478	DenseSet<SDValue> Processed{DominantValue};
3479
3480	// We can handle an insert into the last element (of a splat) via
3481	// v(f)slide1down. This is slightly better than the vslideup insert
3482	// lowering as it avoids the need for a vector group temporary. It
3483	// is also better than using vmerge.vx as it avoids the need to
3484	// materialize the mask in a vector register.
3485	if (SDValue LastOp = Op ->getOperand(Num: Op ->getNumOperands() - `1`);
3486	!LastOp.isUndef() && ValueCounts [LastOp] == `1` &&
3487	LastOp != DominantValue) {
3488	Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
3489	auto OpCode =
3490	VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
3491	if (!VT.isFloatingPoint())
3492	LastOp = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: LastOp);
3493	Vec = DAG.getNode(Opcode: OpCode, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: Vec,
3494	N3: LastOp, N4: Mask, N5: VL);
3495	Vec = convertFromScalableVector(VT, V: Vec, DAG, Subtarget);
3496	Processed.insert(V: LastOp);
3497	}
3498
3499	MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3500	for (const auto &OpIdx : enumerate(First: Op ->ops())) {
3501	const SDValue &V = OpIdx.value();
3502	if (V.isUndef() \|\| !Processed.insert(V).second)
3503	continue;
3504	if (ValueCounts [V] == `1`) {
3505	Vec = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT, N1: Vec, N2: V,
3506	N3: DAG.getVectorIdxConstant(Val: OpIdx.index(), DL));
3507	} else {
3508	// Blend in all instances of this value using a VSELECT, using a
3509	// mask where each bit signals whether that element is the one
3510	// we're after.
3511	SmallVector<SDValue> Ops;
3512	transform(Range: Op ->op_values(), d_first: std::back_inserter(x&: Ops), F: [&](SDValue V1) {
3513	return DAG.getConstant(Val: V == V1, DL, VT: XLenVT);
3514	});
3515	Vec = DAG.getNode(Opcode: ISD::VSELECT, DL, VT,
3516	N1: DAG.getBuildVector(VT: SelMaskTy, DL, Ops),
3517	N2: DAG.getSplatBuildVector(VT, DL, Op: V), N3: Vec);
3518	}
3519	}
3520
3521	return Vec;
3522	}
3523
3524	return SDValue ();
3525	}
3526
3527	static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
3528	const RISCVSubtarget &Subtarget) {
3529	MVT VT = Op.getSimpleValueType();
3530	assert(VT.isFixedLengthVector() && "Unexpected vector!");
3531
3532	MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3533
3534	SDLoc DL(Op);
3535	auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
3536
3537	MVT XLenVT = Subtarget.getXLenVT();
3538	unsigned NumElts = Op.getNumOperands();
3539
3540	if (VT.getVectorElementType() == MVT::i1) {
3541	if (ISD::isBuildVectorAllZeros(N: Op.getNode())) {
3542	SDValue VMClr = DAG.getNode(Opcode: RISCVISD::VMCLR_VL, DL, VT: ContainerVT, Operand: VL);
3543	return convertFromScalableVector(VT, V: VMClr, DAG, Subtarget);
3544	}
3545
3546	if (ISD::isBuildVectorAllOnes(N: Op.getNode())) {
3547	SDValue VMSet = DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT: ContainerVT, Operand: VL);
3548	return convertFromScalableVector(VT, V: VMSet, DAG, Subtarget);
3549	}
3550
3551	// Lower constant mask BUILD_VECTORs via an integer vector type, in
3552	// scalar integer chunks whose bit-width depends on the number of mask
3553	// bits and XLEN.
3554	// First, determine the most appropriate scalar integer type to use. This
3555	// is at most XLenVT, but may be shrunk to a smaller vector element type
3556	// according to the size of the final vector - use i8 chunks rather than
3557	// XLenVT if we're producing a v8i1. This results in more consistent
3558	// codegen across RV32 and RV64.
3559	unsigned NumViaIntegerBits = std::clamp(val: NumElts, lo: `8u`, hi: Subtarget.getXLen());
3560	NumViaIntegerBits = std::min(a: NumViaIntegerBits, b: Subtarget.getELen());
3561	// If we have to use more than one INSERT_VECTOR_ELT then this
3562	// optimization is likely to increase code size; avoid peforming it in
3563	// such a case. We can use a load from a constant pool in this case.
3564	if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3565	return SDValue ();
3566	// Now we can create our integer vector type. Note that it may be larger
3567	// than the resulting mask type: v4i1 would use v1i8 as its integer type.
3568	unsigned IntegerViaVecElts = divideCeil(Numerator: NumElts, Denominator: NumViaIntegerBits);
3569	MVT IntegerViaVecVT =
3570	MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: NumViaIntegerBits),
3571	NumElements: IntegerViaVecElts);
3572
3573	uint64_t Bits = `0`;
3574	unsigned BitPos = `0`, IntegerEltIdx = `0`;
3575	SmallVector<SDValue, `8`> Elts(IntegerViaVecElts);
3576
3577	for (unsigned I = `0`; I < NumElts;) {
3578	SDValue V = Op.getOperand(i: I);
3579	bool BitValue = !V.isUndef() && V ->getAsZExtVal();
3580	Bits \|= ((uint64_t)BitValue << BitPos);
3581	++BitPos;
3582	++I;
3583
3584	// Once we accumulate enough bits to fill our scalar type or process the
3585	// last element, insert into our vector and clear our accumulated data.
3586	if (I % NumViaIntegerBits == `0` \|\| I == NumElts) {
3587	if (NumViaIntegerBits <= `32`)
3588	Bits = SignExtend64<`32`>(x: Bits);
3589	SDValue Elt = DAG.getConstant(Val: Bits, DL, VT: XLenVT);
3590	Elts [IntegerEltIdx] = Elt;
3591	Bits = `0`;
3592	BitPos = `0`;
3593	IntegerEltIdx++;
3594	}
3595	}
3596
3597	SDValue Vec = DAG.getBuildVector(VT: IntegerViaVecVT, DL, Ops: Elts);
3598
3599	if (NumElts < NumViaIntegerBits) {
3600	// If we're producing a smaller vector than our minimum legal integer
3601	// type, bitcast to the equivalent (known-legal) mask type, and extract
3602	// our final mask.
3603	assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3604	Vec = DAG.getBitcast(MVT::v8i1, Vec);
3605	Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: Vec,
3606	N2: DAG.getConstant(Val: `0`, DL, VT: XLenVT));
3607	} else {
3608	// Else we must have produced an integer type with the same size as the
3609	// mask type; bitcast for the final result.
3610	assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3611	Vec = DAG.getBitcast(VT, V: Vec);
3612	}
3613
3614	return Vec;
3615	}
3616
3617	if (SDValue Splat = cast<BuildVectorSDNode>(Val&: Op)->getSplatValue()) {
3618	unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3619	: RISCVISD::VMV_V_X_VL;
3620	if (!VT.isFloatingPoint())
3621	Splat = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Splat);
3622	Splat =
3623	DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: Splat, N3: VL);
3624	return convertFromScalableVector(VT, V: Splat, DAG, Subtarget);
3625	}
3626
3627	// Try and match index sequences, which we can lower to the vid instruction
3628	// with optional modifications. An all-undef vector is matched by
3629	// getSplatValue, above.
3630	if (auto SimpleVID = isSimpleVIDSequence(Op, EltSizeInBits: Op.getScalarValueSizeInBits())) {
3631	int64_t StepNumerator = SimpleVID ->StepNumerator;
3632	unsigned StepDenominator = SimpleVID ->StepDenominator;
3633	int64_t Addend = SimpleVID ->Addend;
3634
3635	assert(StepNumerator != `0` && "Invalid step");
3636	bool Negate = false;
3637	int64_t SplatStepVal = StepNumerator;
3638	unsigned StepOpcode = ISD::MUL;
3639	// Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3640	// anyway as the shift of 63 won't fit in uimm5.
3641	if (StepNumerator != `1` && StepNumerator != INT64_MIN &&
3642	isPowerOf2_64(Value: std::abs(i: StepNumerator))) {
3643	Negate = StepNumerator < `0`;
3644	StepOpcode = ISD::SHL;
3645	SplatStepVal = Log2_64(Value: std::abs(i: StepNumerator));
3646	}
3647
3648	// Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3649	// threshold since it's the immediate value many RVV instructions accept.
3650	// There is no vmul.vi instruction so ensure multiply constant can fit in
3651	// a single addi instruction.
3652	if (((StepOpcode == ISD::MUL && isInt<`12`>(x: SplatStepVal)) \|\|
3653	(StepOpcode == ISD::SHL && isUInt<`5`>(x: SplatStepVal))) &&
3654	isPowerOf2_32(Value: StepDenominator) &&
3655	(SplatStepVal >= `0` \|\| StepDenominator == `1`) && isInt<`5`>(x: Addend)) {
3656	MVT VIDVT =
3657	VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
3658	MVT VIDContainerVT =
3659	getContainerForFixedLengthVector(DAG, VT: VIDVT, Subtarget);
3660	SDValue VID = DAG.getNode(Opcode: RISCVISD::VID_VL, DL, VT: VIDContainerVT, N1: Mask, N2: VL);
3661	// Convert right out of the scalable type so we can use standard ISD
3662	// nodes for the rest of the computation. If we used scalable types with
3663	// these, we'd lose the fixed-length vector info and generate worse
3664	// vsetvli code.
3665	VID = convertFromScalableVector(VT: VIDVT, V: VID, DAG, Subtarget);
3666	if ((StepOpcode == ISD::MUL && SplatStepVal != `1`) \|\|
3667	(StepOpcode == ISD::SHL && SplatStepVal != `0`)) {
3668	SDValue SplatStep = DAG.getConstant(Val: SplatStepVal, DL, VT: VIDVT);
3669	VID = DAG.getNode(Opcode: StepOpcode, DL, VT: VIDVT, N1: VID, N2: SplatStep);
3670	}
3671	if (StepDenominator != `1`) {
3672	SDValue SplatStep =
3673	DAG.getConstant(Val: Log2_64(Value: StepDenominator), DL, VT: VIDVT);
3674	VID = DAG.getNode(Opcode: ISD::SRL, DL, VT: VIDVT, N1: VID, N2: SplatStep);
3675	}
3676	if (Addend != `0` \|\| Negate) {
3677	SDValue SplatAddend = DAG.getConstant(Val: Addend, DL, VT: VIDVT);
3678	VID = DAG.getNode(Opcode: Negate ? ISD::SUB : ISD::ADD, DL, VT: VIDVT, N1: SplatAddend,
3679	N2: VID);
3680	}
3681	if (VT.isFloatingPoint()) {
3682	// TODO: Use vfwcvt to reduce register pressure.
3683	VID = DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT, Operand: VID);
3684	}
3685	return VID;
3686	}
3687	}
3688
3689	// For very small build_vectors, use a single scalar insert of a constant.
3690	// TODO: Base this on constant rematerialization cost, not size.
3691	const unsigned EltBitSize = VT.getScalarSizeInBits();
3692	if (VT.getSizeInBits() <= `32` &&
3693	ISD::isBuildVectorOfConstantSDNodes(N: Op.getNode())) {
3694	MVT ViaIntVT = MVT::getIntegerVT(BitWidth: VT.getSizeInBits());
3695	assert((ViaIntVT == MVT::i16 \|\| ViaIntVT == MVT::i32) &&
3696	"Unexpected sequence type");
3697	// If we can use the original VL with the modified element type, this
3698	// means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3699	// be moved into InsertVSETVLI?
3700	unsigned ViaVecLen =
3701	(Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : `1`;
3702	MVT ViaVecVT = MVT::getVectorVT(VT: ViaIntVT, NumElements: ViaVecLen);
3703
3704	uint64_t EltMask = maskTrailingOnes<uint64_t>(N: EltBitSize);
3705	uint64_t SplatValue = `0`;
3706	// Construct the amalgamated value at this larger vector type.
3707	for (const auto &OpIdx : enumerate(First: Op ->op_values())) {
3708	const auto &SeqV = OpIdx.value();
3709	if (!SeqV.isUndef())
3710	SplatValue \|=
3711	((SeqV ->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3712	}
3713
3714	// On RV64, sign-extend from 32 to 64 bits where possible in order to
3715	// achieve better constant materializion.
3716	if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3717	SplatValue = SignExtend64<`32`>(x: SplatValue);
3718
3719	SDValue Vec = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ViaVecVT,
3720	N1: DAG.getUNDEF(VT: ViaVecVT),
3721	N2: DAG.getConstant(Val: SplatValue, DL, VT: XLenVT),
3722	N3: DAG.getVectorIdxConstant(Val: `0`, DL));
3723	if (ViaVecLen != `1`)
3724	Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL,
3725	VT: MVT::getVectorVT(VT: ViaIntVT, NumElements: `1`), N1: Vec,
3726	N2: DAG.getConstant(Val: `0`, DL, VT: XLenVT));
3727	return DAG.getBitcast(VT, V: Vec);
3728	}
3729
3730
3731	// Attempt to detect "hidden" splats, which only reveal themselves as splats
3732	// when re-interpreted as a vector with a larger element type. For example,
3733	// v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3734	// could be instead splat as
3735	// v2i32 = build_vector i32 0x00010000, i32 0x00010000
3736	// TODO: This optimization could also work on non-constant splats, but it
3737	// would require bit-manipulation instructions to construct the splat value.
3738	SmallVector<SDValue> Sequence;
3739	const auto *BV = cast<BuildVectorSDNode>(Val&: Op);
3740	if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3741	ISD::isBuildVectorOfConstantSDNodes(N: Op.getNode()) &&
3742	BV->getRepeatedSequence(Sequence) &&
3743	(Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3744	unsigned SeqLen = Sequence.size();
3745	MVT ViaIntVT = MVT::getIntegerVT(BitWidth: EltBitSize * SeqLen);
3746	assert((ViaIntVT == MVT::i16 \|\| ViaIntVT == MVT::i32 \|\|
3747	ViaIntVT == MVT::i64) &&
3748	"Unexpected sequence type");
3749
3750	// If we can use the original VL with the modified element type, this
3751	// means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3752	// be moved into InsertVSETVLI?
3753	const unsigned RequiredVL = NumElts / SeqLen;
3754	const unsigned ViaVecLen =
3755	(Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3756	NumElts : RequiredVL;
3757	MVT ViaVecVT = MVT::getVectorVT(VT: ViaIntVT, NumElements: ViaVecLen);
3758
3759	unsigned EltIdx = `0`;
3760	uint64_t EltMask = maskTrailingOnes<uint64_t>(N: EltBitSize);
3761	uint64_t SplatValue = `0`;
3762	// Construct the amalgamated value which can be splatted as this larger
3763	// vector type.
3764	for (const auto &SeqV : Sequence) {
3765	if (!SeqV.isUndef())
3766	SplatValue \|=
3767	((SeqV ->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3768	EltIdx++;
3769	}
3770
3771	// On RV64, sign-extend from 32 to 64 bits where possible in order to
3772	// achieve better constant materializion.
3773	if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3774	SplatValue = SignExtend64<`32`>(x: SplatValue);
3775
3776	// Since we can't introduce illegal i64 types at this stage, we can only
3777	// perform an i64 splat on RV32 if it is its own sign-extended value. That
3778	// way we can use RVV instructions to splat.
3779	assert((ViaIntVT.bitsLE(XLenVT) \|\|
3780	(!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3781	"Unexpected bitcast sequence");
3782	if (ViaIntVT.bitsLE(VT: XLenVT) \|\| isInt<`32`>(x: SplatValue)) {
3783	SDValue ViaVL =
3784	DAG.getConstant(Val: ViaVecVT.getVectorNumElements(), DL, VT: XLenVT);
3785	MVT ViaContainerVT =
3786	getContainerForFixedLengthVector(DAG, VT: ViaVecVT, Subtarget);
3787	SDValue Splat =
3788	DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ViaContainerVT,
3789	N1: DAG.getUNDEF(VT: ViaContainerVT),
3790	N2: DAG.getConstant(Val: SplatValue, DL, VT: XLenVT), N3: ViaVL);
3791	Splat = convertFromScalableVector(VT: ViaVecVT, V: Splat, DAG, Subtarget);
3792	if (ViaVecLen != RequiredVL)
3793	Splat = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL,
3794	VT: MVT::getVectorVT(VT: ViaIntVT, NumElements: RequiredVL), N1: Splat,
3795	N2: DAG.getConstant(Val: `0`, DL, VT: XLenVT));
3796	return DAG.getBitcast(VT, V: Splat);
3797	}
3798	}
3799
3800	// If the number of signbits allows, see if we can lower as a <N x i8>.
3801	// Our main goal here is to reduce LMUL (and thus work) required to
3802	// build the constant, but we will also narrow if the resulting
3803	// narrow vector is known to materialize cheaply.
3804	// TODO: We really should be costing the smaller vector. There are
3805	// profitable cases this misses.
3806	if (EltBitSize > `8` && VT.isInteger() &&
3807	(NumElts <= `4` \|\| VT.getSizeInBits() > Subtarget.getRealMinVLen())) {
3808	unsigned SignBits = DAG.ComputeNumSignBits(Op);
3809	if (EltBitSize - SignBits < `8`) {
3810	SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3811	DL, Op->ops());
3812	Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3813	Source, DAG, Subtarget);
3814	SDValue Res = DAG.getNode(Opcode: RISCVISD::VSEXT_VL, DL, VT: ContainerVT, N1: Source, N2: Mask, N3: VL);
3815	return convertFromScalableVector(VT, V: Res, DAG, Subtarget);
3816	}
3817	}
3818
3819	if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3820	return Res;
3821
3822	// For constant vectors, use generic constant pool lowering. Otherwise,
3823	// we'd have to materialize constants in GPRs just to move them into the
3824	// vector.
3825	return SDValue ();
3826	}
3827
3828	static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
3829	const RISCVSubtarget &Subtarget) {
3830	MVT VT = Op.getSimpleValueType();
3831	assert(VT.isFixedLengthVector() && "Unexpected vector!");
3832
3833	if (ISD::isBuildVectorOfConstantSDNodes(N: Op.getNode()) \|\|
3834	ISD::isBuildVectorOfConstantFPSDNodes(N: Op.getNode()))
3835	return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
3836
3837	MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3838
3839	SDLoc DL(Op);
3840	auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
3841
3842	MVT XLenVT = Subtarget.getXLenVT();
3843
3844	if (VT.getVectorElementType() == MVT::i1) {
3845	// A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3846	// vector type, we have a legal equivalently-sized i8 type, so we can use
3847	// that.
3848	MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3849	SDValue VecZero = DAG.getConstant(Val: `0`, DL, VT: WideVecVT);
3850
3851	SDValue WideVec;
3852	if (SDValue Splat = cast<BuildVectorSDNode>(Val&: Op)->getSplatValue()) {
3853	// For a splat, perform a scalar truncate before creating the wider
3854	// vector.
3855	Splat = DAG.getNode(Opcode: ISD::AND, DL, VT: Splat.getValueType(), N1: Splat,
3856	N2: DAG.getConstant(Val: `1`, DL, VT: Splat.getValueType()));
3857	WideVec = DAG.getSplatBuildVector(VT: WideVecVT, DL, Op: Splat);
3858	} else {
3859	SmallVector<SDValue, `8`> Ops(Op ->op_values());
3860	WideVec = DAG.getBuildVector(VT: WideVecVT, DL, Ops);
3861	SDValue VecOne = DAG.getConstant(Val: `1`, DL, VT: WideVecVT);
3862	WideVec = DAG.getNode(Opcode: ISD::AND, DL, VT: WideVecVT, N1: WideVec, N2: VecOne);
3863	}
3864
3865	return DAG.getSetCC(DL, VT, LHS: WideVec, RHS: VecZero, Cond: ISD::SETNE);
3866	}
3867
3868	if (SDValue Splat = cast<BuildVectorSDNode>(Val&: Op)->getSplatValue()) {
3869	if (auto Gather = matchSplatAsGather(SplatVal: Splat, VT, DL, DAG, Subtarget))
3870	return Gather;
3871	unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3872	: RISCVISD::VMV_V_X_VL;
3873	if (!VT.isFloatingPoint())
3874	Splat = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Splat);
3875	Splat =
3876	DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: Splat, N3: VL);
3877	return convertFromScalableVector(VT, V: Splat, DAG, Subtarget);
3878	}
3879
3880	if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3881	return Res;
3882
3883	// If we're compiling for an exact VLEN value, we can split our work per
3884	// register in the register group.
3885	if (const auto VLen = Subtarget.getRealVLen();
3886	VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
3887	MVT ElemVT = VT.getVectorElementType();
3888	unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
3889	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3890	MVT OneRegVT = MVT::getVectorVT(VT: ElemVT, NumElements: ElemsPerVReg);
3891	MVT M1VT = getContainerForFixedLengthVector(DAG, VT: OneRegVT, Subtarget);
3892	assert(M1VT == getLMUL1VT(M1VT));
3893
3894	// The following semantically builds up a fixed length concat_vector
3895	// of the component build_vectors. We eagerly lower to scalable and
3896	// insert_subvector here to avoid DAG combining it back to a large
3897	// build_vector.
3898	SmallVector<SDValue> BuildVectorOps(Op ->op_begin(), Op ->op_end());
3899	unsigned NumOpElts = M1VT.getVectorMinNumElements();
3900	SDValue Vec = DAG.getUNDEF(VT: ContainerVT);
3901	for (unsigned i = `0`; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
3902	auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(N: i, M: ElemsPerVReg);
3903	SDValue SubBV =
3904	DAG.getNode(Opcode: ISD::BUILD_VECTOR, DL, VT: OneRegVT, Ops: OneVRegOfOps);
3905	SubBV = convertToScalableVector(VT: M1VT, V: SubBV, DAG, Subtarget);
3906	unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
3907	Vec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ContainerVT, N1: Vec, N2: SubBV,
3908	N3: DAG.getVectorIdxConstant(Val: InsertIdx, DL));
3909	}
3910	return convertFromScalableVector(VT, V: Vec, DAG, Subtarget);
3911	}
3912
3913	// For m1 vectors, if we have non-undef values in both halves of our vector,
3914	// split the vector into low and high halves, build them separately, then
3915	// use a vselect to combine them. For long vectors, this cuts the critical
3916	// path of the vslide1down sequence in half, and gives us an opportunity
3917	// to special case each half independently. Note that we don't change the
3918	// length of the sub-vectors here, so if both fallback to the generic
3919	// vslide1down path, we should be able to fold the vselect into the final
3920	// vslidedown (for the undef tail) for the first half w/ masking.
3921	unsigned NumElts = VT.getVectorNumElements();
3922	unsigned NumUndefElts =
3923	count_if(Range: Op ->op_values(), P: [](const SDValue &V) { return V.isUndef(); });
3924	unsigned NumDefElts = NumElts - NumUndefElts;
3925	if (NumDefElts >= `8` && NumDefElts > NumElts / `2` &&
3926	ContainerVT.bitsLE(VT: getLMUL1VT(VT: ContainerVT))) {
3927	SmallVector<SDValue> SubVecAOps, SubVecBOps;
3928	SmallVector<SDValue> MaskVals;
3929	SDValue UndefElem = DAG.getUNDEF(VT: Op ->getOperand(Num: `0`)->getValueType(ResNo: `0`));
3930	SubVecAOps.reserve(N: NumElts);
3931	SubVecBOps.reserve(N: NumElts);
3932	for (unsigned i = `0`; i < NumElts; i++) {
3933	SDValue Elem = Op ->getOperand(Num: i);
3934	if (i < NumElts / `2`) {
3935	SubVecAOps.push_back(Elt: Elem);
3936	SubVecBOps.push_back(Elt: UndefElem);
3937	} else {
3938	SubVecAOps.push_back(Elt: UndefElem);
3939	SubVecBOps.push_back(Elt: Elem);
3940	}
3941	bool SelectMaskVal = (i < NumElts / `2`);
3942	MaskVals.push_back(Elt: DAG.getConstant(Val: SelectMaskVal, DL, VT: XLenVT));
3943	}
3944	assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
3945	MaskVals.size() == NumElts);
3946
3947	SDValue SubVecA = DAG.getBuildVector(VT, DL, Ops: SubVecAOps);
3948	SDValue SubVecB = DAG.getBuildVector(VT, DL, Ops: SubVecBOps);
3949	MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
3950	SDValue SelectMask = DAG.getBuildVector(VT: MaskVT, DL, Ops: MaskVals);
3951	return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: SelectMask, N2: SubVecA, N3: SubVecB);
3952	}
3953
3954	// Cap the cost at a value linear to the number of elements in the vector.
3955	// The default lowering is to use the stack. The vector store + scalar loads
3956	// is linear in VL. However, at high lmuls vslide1down and vslidedown end up
3957	// being (at least) linear in LMUL. As a result, using the vslidedown
3958	// lowering for every element ends up being VLLMUL..*
3959	// TODO: Should we be directly costing the stack alternative? Doing so might
3960	// give us a more accurate upper bound.
3961	InstructionCost LinearBudget = VT.getVectorNumElements() * `2`;
3962
3963	// TODO: unify with TTI getSlideCost.
3964	InstructionCost PerSlideCost = `1`;
3965	switch (RISCVTargetLowering::getLMUL(VT: ContainerVT)) {
3966	default: break;
3967	case RISCVII::VLMUL::LMUL_2:
3968	PerSlideCost = `2`;
3969	break;
3970	case RISCVII::VLMUL::LMUL_4:
3971	PerSlideCost = `4`;
3972	break;
3973	case RISCVII::VLMUL::LMUL_8:
3974	PerSlideCost = `8`;
3975	break;
3976	}
3977
3978	// TODO: Should we be using the build instseq then cost + evaluate scheme
3979	// we use for integer constants here?
3980	unsigned UndefCount = `0`;
3981	for (const SDValue &V : Op ->ops()) {
3982	if (V.isUndef()) {
3983	UndefCount++;
3984	continue;
3985	}
3986	if (UndefCount) {
3987	LinearBudget -= PerSlideCost;
3988	UndefCount = `0`;
3989	}
3990	LinearBudget -= PerSlideCost;
3991	}
3992	if (UndefCount) {
3993	LinearBudget -= PerSlideCost;
3994	}
3995
3996	if (LinearBudget < `0`)
3997	return SDValue ();
3998
3999	assert((!VT.isFloatingPoint() \|\|
4000	VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4001	"Illegal type which will result in reserved encoding");
4002
4003	const unsigned Policy = RISCVII::TAIL_AGNOSTIC \| RISCVII::MASK_AGNOSTIC;
4004
4005	SDValue Vec;
4006	UndefCount = `0`;
4007	for (SDValue V : Op ->ops()) {
4008	if (V.isUndef()) {
4009	UndefCount++;
4010	continue;
4011	}
4012
4013	// Start our sequence with a TA splat in the hopes that hardware is able to
4014	// recognize there's no dependency on the prior value of our temporary
4015	// register.
4016	if (!Vec) {
4017	Vec = DAG.getSplatVector(VT, DL, Op: V);
4018	Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
4019	UndefCount = `0`;
4020	continue;
4021	}
4022
4023	if (UndefCount) {
4024	const SDValue Offset = DAG.getConstant(Val: UndefCount, DL, VT: Subtarget.getXLenVT());
4025	Vec = getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, Merge: DAG.getUNDEF(VT: ContainerVT),
4026	Op: Vec, Offset, Mask, VL, Policy);
4027	UndefCount = `0`;
4028	}
4029	auto OpCode =
4030	VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
4031	if (!VT.isFloatingPoint())
4032	V = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getXLenVT(), Operand: V);
4033	Vec = DAG.getNode(Opcode: OpCode, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: Vec,
4034	N3: V, N4: Mask, N5: VL);
4035	}
4036	if (UndefCount) {
4037	const SDValue Offset = DAG.getConstant(Val: UndefCount, DL, VT: Subtarget.getXLenVT());
4038	Vec = getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, Merge: DAG.getUNDEF(VT: ContainerVT),
4039	Op: Vec, Offset, Mask, VL, Policy);
4040	}
4041	return convertFromScalableVector(VT, V: Vec, DAG, Subtarget);
4042	}
4043
4044	static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4045	SDValue Lo, SDValue Hi, SDValue VL,
4046	SelectionDAG &DAG) {
4047	if (!Passthru)
4048	Passthru = DAG.getUNDEF(VT);
4049	if (isa<ConstantSDNode>(Val: Lo) && isa<ConstantSDNode>(Val: Hi)) {
4050	int32_t LoC = cast<ConstantSDNode>(Val&: Lo)->getSExtValue();
4051	int32_t HiC = cast<ConstantSDNode>(Val&: Hi)->getSExtValue();
4052	// If Hi constant is all the same sign bit as Lo, lower this as a custom
4053	// node in order to try and match RVV vector/scalar instructions.
4054	if ((LoC >> `31`) == HiC)
4055	return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: Passthru, N2: Lo, N3: VL);
4056
4057	// If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4058	// we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4059	// vlmax vsetvli or vsetivli to change the VL.
4060	// FIXME: Support larger constants?
4061	// FIXME: Support non-constant VLs by saturating?
4062	if (LoC == HiC) {
4063	SDValue NewVL;
4064	if (isAllOnesConstant(VL) \|\|
4065	(isa<RegisterSDNode>(VL) &&
4066	cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4067	NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4068	else if (isa<ConstantSDNode>(Val: VL) && isUInt<`4`>(x: VL ->getAsZExtVal()))
4069	NewVL = DAG.getNode(Opcode: ISD::ADD, DL, VT: VL.getValueType(), N1: VL, N2: VL);
4070
4071	if (NewVL) {
4072	MVT InterVT =
4073	MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * `2`);
4074	auto InterVec = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: InterVT,
4075	N1: DAG.getUNDEF(VT: InterVT), N2: Lo, N3: NewVL);
4076	return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: InterVec);
4077	}
4078	}
4079	}
4080
4081	// Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4082	if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(i: `0`) == Lo &&
4083	isa<ConstantSDNode>(Val: Hi.getOperand(i: `1`)) &&
4084	Hi.getConstantOperandVal(i: `1`) == `31`)
4085	return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: Passthru, N2: Lo, N3: VL);
4086
4087	// If the hi bits of the splat are undefined, then it's fine to just splat Lo
4088	// even if it might be sign extended.
4089	if (Hi.isUndef())
4090	return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: Passthru, N2: Lo, N3: VL);
4091
4092	// Fall back to a stack store and stride x0 vector load.
4093	return DAG.getNode(Opcode: RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, N1: Passthru, N2: Lo,
4094	N3: Hi, N4: VL);
4095	}
4096
4097	// Called by type legalization to handle splat of i64 on RV32.
4098	// FIXME: We can optimize this when the type has sign or zero bits in one
4099	// of the halves.
4100	static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4101	SDValue Scalar, SDValue VL,
4102	SelectionDAG &DAG) {
4103	assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4104	SDValue Lo, Hi;
4105	std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4106	return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4107	}
4108
4109	// This function lowers a splat of a scalar operand Splat with the vector
4110	// length VL. It ensures the final sequence is type legal, which is useful when
4111	// lowering a splat after type legalization.
4112	static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4113	MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4114	const RISCVSubtarget &Subtarget) {
4115	bool HasPassthru = Passthru && !Passthru.isUndef();
4116	if (!HasPassthru && !Passthru)
4117	Passthru = DAG.getUNDEF(VT);
4118	if (VT.isFloatingPoint())
4119	return DAG.getNode(Opcode: RISCVISD::VFMV_V_F_VL, DL, VT, N1: Passthru, N2: Scalar, N3: VL);
4120
4121	MVT XLenVT = Subtarget.getXLenVT();
4122
4123	// Simplest case is that the operand needs to be promoted to XLenVT.
4124	if (Scalar.getValueType().bitsLE(VT: XLenVT)) {
4125	// If the operand is a constant, sign extend to increase our chances
4126	// of being able to use a .vi instruction. ANY_EXTEND would become a
4127	// a zero extend and the simm5 check in isel would fail.
4128	// FIXME: Should we ignore the upper bits in isel instead?
4129	unsigned ExtOpc =
4130	isa<ConstantSDNode>(Val: Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4131	Scalar = DAG.getNode(Opcode: ExtOpc, DL, VT: XLenVT, Operand: Scalar);
4132	return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: Passthru, N2: Scalar, N3: VL);
4133	}
4134
4135	assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4136	"Unexpected scalar for splat lowering!");
4137
4138	if (isOneConstant(V: VL) && isNullConstant(V: Scalar))
4139	return DAG.getNode(Opcode: RISCVISD::VMV_S_X_VL, DL, VT, N1: Passthru,
4140	N2: DAG.getConstant(Val: `0`, DL, VT: XLenVT), N3: VL);
4141
4142	// Otherwise use the more complicated splatting algorithm.
4143	return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4144	}
4145
4146	// This function lowers an insert of a scalar operand Scalar into lane
4147	// 0 of the vector regardless of the value of VL. The contents of the
4148	// remaining lanes of the result vector are unspecified. VL is assumed
4149	// to be non-zero.
4150	static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
4151	const SDLoc &DL, SelectionDAG &DAG,
4152	const RISCVSubtarget &Subtarget) {
4153	assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4154
4155	const MVT XLenVT = Subtarget.getXLenVT();
4156	SDValue Passthru = DAG.getUNDEF(VT);
4157
4158	if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4159	isNullConstant(V: Scalar.getOperand(i: `1`))) {
4160	SDValue ExtractedVal = Scalar.getOperand(i: `0`);
4161	// The element types must be the same.
4162	if (ExtractedVal.getValueType().getVectorElementType() ==
4163	VT.getVectorElementType()) {
4164	MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4165	MVT ExtractedContainerVT = ExtractedVT;
4166	if (ExtractedContainerVT.isFixedLengthVector()) {
4167	ExtractedContainerVT = getContainerForFixedLengthVector(
4168	DAG, VT: ExtractedContainerVT, Subtarget);
4169	ExtractedVal = convertToScalableVector(VT: ExtractedContainerVT,
4170	V: ExtractedVal, DAG, Subtarget);
4171	}
4172	if (ExtractedContainerVT.bitsLE(VT))
4173	return DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: Passthru,
4174	N2: ExtractedVal, N3: DAG.getVectorIdxConstant(Val: `0`, DL));
4175	return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: ExtractedVal,
4176	N2: DAG.getVectorIdxConstant(Val: `0`, DL));
4177	}
4178	}
4179
4180
4181	if (VT.isFloatingPoint())
4182	return DAG.getNode(Opcode: RISCVISD::VFMV_S_F_VL, DL, VT,
4183	N1: DAG.getUNDEF(VT), N2: Scalar, N3: VL);
4184
4185	// Avoid the tricky legalization cases by falling back to using the
4186	// splat code which already handles it gracefully.
4187	if (!Scalar.getValueType().bitsLE(VT: XLenVT))
4188	return lowerScalarSplat(Passthru: DAG.getUNDEF(VT), Scalar,
4189	VL: DAG.getConstant(Val: `1`, DL, VT: XLenVT),
4190	VT, DL, DAG, Subtarget);
4191
4192	// If the operand is a constant, sign extend to increase our chances
4193	// of being able to use a .vi instruction. ANY_EXTEND would become a
4194	// a zero extend and the simm5 check in isel would fail.
4195	// FIXME: Should we ignore the upper bits in isel instead?
4196	unsigned ExtOpc =
4197	isa<ConstantSDNode>(Val: Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4198	Scalar = DAG.getNode(Opcode: ExtOpc, DL, VT: XLenVT, Operand: Scalar);
4199	return DAG.getNode(Opcode: RISCVISD::VMV_S_X_VL, DL, VT,
4200	N1: DAG.getUNDEF(VT), N2: Scalar, N3: VL);
4201	}
4202
4203	// Is this a shuffle extracts either the even or odd elements of a vector?
4204	// That is, specifically, either (a) or (b) below.
4205	// t34: v8i8 = extract_subvector t11, Constant:i64<0>
4206	// t33: v8i8 = extract_subvector t11, Constant:i64<8>
4207	// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4208	// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4209	// Returns {Src Vector, Even Elements} om success
4210	static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
4211	SDValue V2, ArrayRef<int> Mask,
4212	const RISCVSubtarget &Subtarget) {
4213	// Need to be able to widen the vector.
4214	if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4215	return false;
4216
4217	// Both input must be extracts.
4218	if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR \|\|
4219	V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4220	return false;
4221
4222	// Extracting from the same source.
4223	SDValue Src = V1.getOperand(i: `0`);
4224	if (Src != V2.getOperand(i: `0`))
4225	return false;
4226
4227	// Src needs to have twice the number of elements.
4228	if (Src.getValueType().getVectorNumElements() != (Mask.size() * `2`))
4229	return false;
4230
4231	// The extracts must extract the two halves of the source.
4232	if (V1.getConstantOperandVal(i: `1`) != `0` \|\|
4233	V2.getConstantOperandVal(i: `1`) != Mask.size())
4234	return false;
4235
4236	// First index must be the first even or odd element from V1.
4237	if (Mask [`0`] != `0` && Mask [`0`] != `1`)
4238	return false;
4239
4240	// The others must increase by 2 each time.
4241	// TODO: Support undef elements?
4242	for (unsigned i = `1`; i != Mask.size(); ++i)
4243	if (Mask [i] != Mask [i - `1`] + `2`)
4244	return false;
4245
4246	return true;
4247	}
4248
4249	/// Is this shuffle interleaving contiguous elements from one vector into the
4250	/// even elements and contiguous elements from another vector into the odd
4251	/// elements. \p EvenSrc will contain the element that should be in the first
4252	/// even element. \p OddSrc will contain the element that should be in the first
4253	/// odd element. These can be the first element in a source or the element half
4254	/// way through the source.
4255	static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4256	int &OddSrc, const RISCVSubtarget &Subtarget) {
4257	// We need to be able to widen elements to the next larger integer type.
4258	if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4259	return false;
4260
4261	int Size = Mask.size();
4262	int NumElts = VT.getVectorNumElements();
4263	assert(Size == (int)NumElts && "Unexpected mask size");
4264
4265	SmallVector<unsigned, `2`> StartIndexes;
4266	if (!ShuffleVectorInst::isInterleaveMask(Mask, Factor: `2`, NumInputElts: Size * `2`, StartIndexes))
4267	return false;
4268
4269	EvenSrc = StartIndexes [`0`];
4270	OddSrc = StartIndexes [`1`];
4271
4272	// One source should be low half of first vector.
4273	if (EvenSrc != `0` && OddSrc != `0`)
4274	return false;
4275
4276	// Subvectors will be subtracted from either at the start of the two input
4277	// vectors, or at the start and middle of the first vector if it's an unary
4278	// interleave.
4279	// In both cases, HalfNumElts will be extracted.
4280	// We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4281	// we'll create an illegal extract_subvector.
4282	// FIXME: We could support other values using a slidedown first.
4283	int HalfNumElts = NumElts / `2`;
4284	return ((EvenSrc % HalfNumElts) == `0`) && ((OddSrc % HalfNumElts) == `0`);
4285	}
4286
4287	/// Match shuffles that concatenate two vectors, rotate the concatenation,
4288	/// and then extract the original number of elements from the rotated result.
4289	/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4290	/// returned rotation amount is for a rotate right, where elements move from
4291	/// higher elements to lower elements. \p LoSrc indicates the first source
4292	/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4293	/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4294	/// 0 or 1 if a rotation is found.
4295	///
4296	/// NOTE: We talk about rotate to the right which matches how bit shift and
4297	/// rotate instructions are described where LSBs are on the right, but LLVM IR
4298	/// and the table below write vectors with the lowest elements on the left.
4299	static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4300	int Size = Mask.size();
4301
4302	// We need to detect various ways of spelling a rotation:
4303	// [11, 12, 13, 14, 15, 0, 1, 2]
4304	// [-1, 12, 13, 14, -1, -1, 1, -1]
4305	// [-1, -1, -1, -1, -1, -1, 1, 2]
4306	// [ 3, 4, 5, 6, 7, 8, 9, 10]
4307	// [-1, 4, 5, 6, -1, -1, 9, -1]
4308	// [-1, 4, 5, 6, -1, -1, -1, -1]
4309	int Rotation = `0`;
4310	LoSrc = -`1`;
4311	HiSrc = -`1`;
4312	for (int i = `0`; i != Size; ++i) {
4313	int M = Mask [i];
4314	if (M < `0`)
4315	continue;
4316
4317	// Determine where a rotate vector would have started.
4318	int StartIdx = i - (M % Size);
4319	// The identity rotation isn't interesting, stop.
4320	if (StartIdx == `0`)
4321	return -`1`;
4322
4323	// If we found the tail of a vector the rotation must be the missing
4324	// front. If we found the head of a vector, it must be how much of the
4325	// head.
4326	int CandidateRotation = StartIdx < `0` ? -StartIdx : Size - StartIdx;
4327
4328	if (Rotation == `0`)
4329	Rotation = CandidateRotation;
4330	else if (Rotation != CandidateRotation)
4331	// The rotations don't match, so we can't match this mask.
4332	return -`1`;
4333
4334	// Compute which value this mask is pointing at.
4335	int MaskSrc = M < Size ? `0` : `1`;
4336
4337	// Compute which of the two target values this index should be assigned to.
4338	// This reflects whether the high elements are remaining or the low elemnts
4339	// are remaining.
4340	int &TargetSrc = StartIdx < `0` ? HiSrc : LoSrc;
4341
4342	// Either set up this value if we've not encountered it before, or check
4343	// that it remains consistent.
4344	if (TargetSrc < `0`)
4345	TargetSrc = MaskSrc;
4346	else if (TargetSrc != MaskSrc)
4347	// This may be a rotation, but it pulls from the inputs in some
4348	// unsupported interleaving.
4349	return -`1`;
4350	}
4351
4352	// Check that we successfully analyzed the mask, and normalize the results.
4353	assert(Rotation != `0` && "Failed to locate a viable rotation!");
4354	assert((LoSrc >= `0` \|\| HiSrc >= `0`) &&
4355	"Failed to find a rotated input vector!");
4356
4357	return Rotation;
4358	}
4359
4360	// Lower a deinterleave shuffle to vnsrl.
4361	// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4362	// -> [p, q, r, s] (EvenElts == false)
4363	// VT is the type of the vector to return, <[vscale x ]n x ty>
4364	// Src is the vector to deinterleave of type <[vscale x ]n2 x ty>*
4365	static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src,
4366	bool EvenElts,
4367	const RISCVSubtarget &Subtarget,
4368	SelectionDAG &DAG) {
4369	// The result is a vector of type <m x n x ty>
4370	MVT ContainerVT = VT;
4371	// Convert fixed vectors to scalable if needed
4372	if (ContainerVT.isFixedLengthVector()) {
4373	assert(Src.getSimpleValueType().isFixedLengthVector());
4374	ContainerVT = getContainerForFixedLengthVector(DAG, VT: ContainerVT, Subtarget);
4375
4376	// The source is a vector of type <m x n2 x ty>*
4377	MVT SrcContainerVT =
4378	MVT::getVectorVT(VT: ContainerVT.getVectorElementType(),
4379	EC: ContainerVT.getVectorElementCount() * `2`);
4380	Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget);
4381	}
4382
4383	auto [TrueMask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
4384
4385	// Bitcast the source vector from <m x n2 x ty> -> <m x n x ty2>
4386	// This also converts FP to int.
4387	unsigned EltBits = ContainerVT.getScalarSizeInBits();
4388	MVT WideSrcContainerVT = MVT::getVectorVT(
4389	VT: MVT::getIntegerVT(BitWidth: EltBits * `2`), EC: ContainerVT.getVectorElementCount());
4390	Src = DAG.getBitcast(VT: WideSrcContainerVT, V: Src);
4391
4392	// The integer version of the container type.
4393	MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
4394
4395	// If we want even elements, then the shift amount is 0. Otherwise, shift by
4396	// the original element size.
4397	unsigned Shift = EvenElts ? `0` : EltBits;
4398	SDValue SplatShift = DAG.getNode(
4399	Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IntContainerVT, N1: DAG.getUNDEF(VT: ContainerVT),
4400	N2: DAG.getConstant(Val: Shift, DL, VT: Subtarget.getXLenVT()), N3: VL);
4401	SDValue Res =
4402	DAG.getNode(Opcode: RISCVISD::VNSRL_VL, DL, VT: IntContainerVT, N1: Src, N2: SplatShift,
4403	N3: DAG.getUNDEF(VT: IntContainerVT), N4: TrueMask, N5: VL);
4404	// Cast back to FP if needed.
4405	Res = DAG.getBitcast(VT: ContainerVT, V: Res);
4406
4407	if (VT.isFixedLengthVector())
4408	Res = convertFromScalableVector(VT, V: Res, DAG, Subtarget);
4409	return Res;
4410	}
4411
4412	// Lower the following shuffle to vslidedown.
4413	// a)
4414	// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4415	// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4416	// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4417	// b)
4418	// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4419	// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4420	// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4421	// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4422	// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4423	// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4424	static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT,
4425	SDValue V1, SDValue V2,
4426	ArrayRef<int> Mask,
4427	const RISCVSubtarget &Subtarget,
4428	SelectionDAG &DAG) {
4429	auto findNonEXTRACT_SUBVECTORParent =
4430	[](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4431	uint64_t Offset = `0`;
4432	while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4433	// EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4434	// a scalable vector. But we don't want to match the case.
4435	Parent.getOperand(i: `0`).getSimpleValueType().isFixedLengthVector()) {
4436	Offset += Parent.getConstantOperandVal(i: `1`);
4437	Parent = Parent.getOperand(i: `0`);
4438	}
4439	return std::make_pair(x&: Parent, y&: Offset);
4440	};
4441
4442	auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent (V1);
4443	auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent (V2);
4444
4445	// Extracting from the same source.
4446	SDValue Src = V1Src;
4447	if (Src != V2Src)
4448	return SDValue ();
4449
4450	// Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4451	SmallVector<int, `16`> NewMask(Mask);
4452	for (size_t i = `0`; i != NewMask.size(); ++i) {
4453	if (NewMask [i] == -`1`)
4454	continue;
4455
4456	if (static_cast<size_t>(NewMask [i]) < NewMask.size()) {
4457	NewMask [i] = NewMask [i] + V1IndexOffset;
4458	} else {
4459	// Minus NewMask.size() is needed. Otherwise, the b case would be
4460	// <5,6,7,12> instead of <5,6,7,8>.
4461	NewMask [i] = NewMask [i] - NewMask.size() + V2IndexOffset;
4462	}
4463	}
4464
4465	// First index must be known and non-zero. It will be used as the slidedown
4466	// amount.
4467	if (NewMask [`0`] <= `0`)
4468	return SDValue ();
4469
4470	// NewMask is also continuous.
4471	for (unsigned i = `1`; i != NewMask.size(); ++i)
4472	if (NewMask [i - `1`] + `1` != NewMask [i])
4473	return SDValue ();
4474
4475	MVT XLenVT = Subtarget.getXLenVT();
4476	MVT SrcVT = Src.getSimpleValueType();
4477	MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT: SrcVT, Subtarget);
4478	auto [TrueMask, VL] = getDefaultVLOps(VecVT: SrcVT, ContainerVT, DL, DAG, Subtarget);
4479	SDValue Slidedown =
4480	getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, Merge: DAG.getUNDEF(VT: ContainerVT),
4481	Op: convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget),
4482	Offset: DAG.getConstant(Val: NewMask [`0`], DL, VT: XLenVT), Mask: TrueMask, VL);
4483	return DAG.getNode(
4484	Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT,
4485	N1: convertFromScalableVector(VT: SrcVT, V: Slidedown, DAG, Subtarget),
4486	N2: DAG.getConstant(Val: `0`, DL, VT: XLenVT));
4487	}
4488
4489	// Because vslideup leaves the destination elements at the start intact, we can
4490	// use it to perform shuffles that insert subvectors:
4491	//
4492	// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4493	// ->
4494	// vsetvli zero, 8, e8, mf2, ta, ma
4495	// vslideup.vi v8, v9, 4
4496	//
4497	// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4498	// ->
4499	// vsetvli zero, 5, e8, mf2, tu, ma
4500	// vslideup.v1 v8, v9, 2
4501	static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT,
4502	SDValue V1, SDValue V2,
4503	ArrayRef<int> Mask,
4504	const RISCVSubtarget &Subtarget,
4505	SelectionDAG &DAG) {
4506	unsigned NumElts = VT.getVectorNumElements();
4507	int NumSubElts, Index;
4508	if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumSrcElts: NumElts, NumSubElts,
4509	Index))
4510	return SDValue ();
4511
4512	bool OpsSwapped = Mask [Index] < (int)NumElts;
4513	SDValue InPlace = OpsSwapped ? V2 : V1;
4514	SDValue ToInsert = OpsSwapped ? V1 : V2;
4515
4516	MVT XLenVT = Subtarget.getXLenVT();
4517	MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4518	auto TrueMask = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).first;
4519	// We slide up by the index that the subvector is being inserted at, and set
4520	// VL to the index + the number of elements being inserted.
4521	unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED \| RISCVII::MASK_AGNOSTIC;
4522	// If the we're adding a suffix to the in place vector, i.e. inserting right
4523	// up to the very end of it, then we don't actually care about the tail.
4524	if (NumSubElts + Index >= (int)NumElts)
4525	Policy \|= RISCVII::TAIL_AGNOSTIC;
4526
4527	InPlace = convertToScalableVector(VT: ContainerVT, V: InPlace, DAG, Subtarget);
4528	ToInsert = convertToScalableVector(VT: ContainerVT, V: ToInsert, DAG, Subtarget);
4529	SDValue VL = DAG.getConstant(Val: NumSubElts + Index, DL, VT: XLenVT);
4530
4531	SDValue Res;
4532	// If we're inserting into the lowest elements, use a tail undisturbed
4533	// vmv.v.v.
4534	if (Index == `0`)
4535	Res = DAG.getNode(Opcode: RISCVISD::VMV_V_V_VL, DL, VT: ContainerVT, N1: InPlace, N2: ToInsert,
4536	N3: VL);
4537	else
4538	Res = getVSlideup(DAG, Subtarget, DL, VT: ContainerVT, Merge: InPlace, Op: ToInsert,
4539	Offset: DAG.getConstant(Val: Index, DL, VT: XLenVT), Mask: TrueMask, VL, Policy);
4540	return convertFromScalableVector(VT, V: Res, DAG, Subtarget);
4541	}
4542
4543	/// Match v(f)slide1up/down idioms. These operations involve sliding
4544	/// N-1 elements to make room for an inserted scalar at one end.
4545	static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
4546	SDValue V1, SDValue V2,
4547	ArrayRef<int> Mask,
4548	const RISCVSubtarget &Subtarget,
4549	SelectionDAG &DAG) {
4550	bool OpsSwapped = false;
4551	if (!isa<BuildVectorSDNode>(Val: V1)) {
4552	if (!isa<BuildVectorSDNode>(Val: V2))
4553	return SDValue ();
4554	std::swap(a&: V1, b&: V2);
4555	OpsSwapped = true;
4556	}
4557	SDValue Splat = cast<BuildVectorSDNode>(Val&: V1)->getSplatValue();
4558	if (!Splat)
4559	return SDValue ();
4560
4561	// Return true if the mask could describe a slide of Mask.size() - 1
4562	// elements from concat_vector(V1, V2)[Base:] to [Offset:].
4563	auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4564	const unsigned S = (Offset > `0`) ? `0` : -Offset;
4565	const unsigned E = Mask.size() - ((Offset > `0`) ? Offset : `0`);
4566	for (unsigned i = S; i != E; ++i)
4567	if (Mask [i] >= `0` && (unsigned)Mask [i] != Base + i + Offset)
4568	return false;
4569	return true;
4570	};
4571
4572	const unsigned NumElts = VT.getVectorNumElements();
4573	bool IsVSlidedown = isSlideMask (Mask, OpsSwapped ? `0` : NumElts, `1`);
4574	if (!IsVSlidedown && !isSlideMask (Mask, OpsSwapped ? `0` : NumElts, -`1`))
4575	return SDValue ();
4576
4577	const int InsertIdx = Mask [IsVSlidedown ? (NumElts - `1`) : `0`];
4578	// Inserted lane must come from splat, undef scalar is legal but not profitable.
4579	if (InsertIdx < `0` \|\| InsertIdx / NumElts != (unsigned)OpsSwapped)
4580	return SDValue ();
4581
4582	MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4583	auto [TrueMask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
4584	auto OpCode = IsVSlidedown ?
4585	(VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
4586	(VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
4587	if (!VT.isFloatingPoint())
4588	Splat = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getXLenVT(), Operand: Splat);
4589	auto Vec = DAG.getNode(Opcode: OpCode, DL, VT: ContainerVT,
4590	N1: DAG.getUNDEF(VT: ContainerVT),
4591	N2: convertToScalableVector(VT: ContainerVT, V: V2, DAG, Subtarget),
4592	N3: Splat, N4: TrueMask, N5: VL);
4593	return convertFromScalableVector(VT, V: Vec, DAG, Subtarget);
4594	}
4595
4596	// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4597	// to create an interleaved vector of <[vscale x] n2 x ty>.*
4598	// This requires that the size of ty is less than the subtarget's maximum ELEN.
4599	static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
4600	const SDLoc &DL, SelectionDAG &DAG,
4601	const RISCVSubtarget &Subtarget) {
4602	MVT VecVT = EvenV.getSimpleValueType();
4603	MVT VecContainerVT = VecVT; // <vscale x n x ty>
4604	// Convert fixed vectors to scalable if needed
4605	if (VecContainerVT.isFixedLengthVector()) {
4606	VecContainerVT = getContainerForFixedLengthVector(DAG, VT: VecVT, Subtarget);
4607	EvenV = convertToScalableVector(VT: VecContainerVT, V: EvenV, DAG, Subtarget);
4608	OddV = convertToScalableVector(VT: VecContainerVT, V: OddV, DAG, Subtarget);
4609	}
4610
4611	assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4612
4613	// We're working with a vector of the same size as the resulting
4614	// interleaved vector, but with half the number of elements and
4615	// twice the SEW (Hence the restriction on not using the maximum
4616	// ELEN)
4617	MVT WideVT =
4618	MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: VecVT.getScalarSizeInBits() * `2`),
4619	EC: VecVT.getVectorElementCount());
4620	MVT WideContainerVT = WideVT; // <vscale x n x ty2>*
4621	if (WideContainerVT.isFixedLengthVector())
4622	WideContainerVT = getContainerForFixedLengthVector(DAG, VT: WideVT, Subtarget);
4623
4624	// Bitcast the input vectors to integers in case they are FP
4625	VecContainerVT = VecContainerVT.changeTypeToInteger();
4626	EvenV = DAG.getBitcast(VT: VecContainerVT, V: EvenV);
4627	OddV = DAG.getBitcast(VT: VecContainerVT, V: OddV);
4628
4629	auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT: VecContainerVT, DL, DAG, Subtarget);
4630	SDValue Passthru = DAG.getUNDEF(VT: WideContainerVT);
4631
4632	SDValue Interleaved;
4633	if (OddV.isUndef()) {
4634	// If OddV is undef, this is a zero extend.
4635	// FIXME: Not only does this optimize the code, it fixes some correctness
4636	// issues because MIR does not have freeze.
4637	Interleaved =
4638	DAG.getNode(Opcode: RISCVISD::VZEXT_VL, DL, VT: WideContainerVT, N1: EvenV, N2: Mask, N3: VL);
4639	} else if (Subtarget.hasStdExtZvbb()) {
4640	// Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4641	SDValue OffsetVec =
4642	DAG.getConstant(Val: VecVT.getScalarSizeInBits(), DL, VT: VecContainerVT);
4643	Interleaved = DAG.getNode(Opcode: RISCVISD::VWSLL_VL, DL, VT: WideContainerVT, N1: OddV,
4644	N2: OffsetVec, N3: Passthru, N4: Mask, N5: VL);
4645	if (!EvenV.isUndef())
4646	Interleaved = DAG.getNode(Opcode: RISCVISD::VWADDU_W_VL, DL, VT: WideContainerVT,
4647	N1: Interleaved, N2: EvenV, N3: Passthru, N4: Mask, N5: VL);
4648	} else if (EvenV.isUndef()) {
4649	Interleaved =
4650	DAG.getNode(Opcode: RISCVISD::VZEXT_VL, DL, VT: WideContainerVT, N1: OddV, N2: Mask, N3: VL);
4651
4652	SDValue OffsetVec =
4653	DAG.getConstant(Val: VecVT.getScalarSizeInBits(), DL, VT: WideContainerVT);
4654	Interleaved = DAG.getNode(Opcode: RISCVISD::SHL_VL, DL, VT: WideContainerVT,
4655	N1: Interleaved, N2: OffsetVec, N3: Passthru, N4: Mask, N5: VL);
4656	} else {
4657	// FIXME: We should freeze the odd vector here. We already handled the case
4658	// of provably undef/poison above.
4659
4660	// Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4661	// vwaddu.vv
4662	Interleaved = DAG.getNode(Opcode: RISCVISD::VWADDU_VL, DL, VT: WideContainerVT, N1: EvenV,
4663	N2: OddV, N3: Passthru, N4: Mask, N5: VL);
4664
4665	// Then get OddV by 2^(VecVT.getScalarSizeInBits() - 1)*
4666	SDValue AllOnesVec = DAG.getSplatVector(
4667	VT: VecContainerVT, DL, Op: DAG.getAllOnesConstant(DL, VT: Subtarget.getXLenVT()));
4668	SDValue OddsMul = DAG.getNode(Opcode: RISCVISD::VWMULU_VL, DL, VT: WideContainerVT,
4669	N1: OddV, N2: AllOnesVec, N3: Passthru, N4: Mask, N5: VL);
4670
4671	// Add the two together so we get
4672	// (OddV 0xff...ff) + (OddV + EvenV)*
4673	// = (OddV 0x100...00) + EvenV*
4674	// = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4675	// Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4676	Interleaved = DAG.getNode(Opcode: RISCVISD::ADD_VL, DL, VT: WideContainerVT,
4677	N1: Interleaved, N2: OddsMul, N3: Passthru, N4: Mask, N5: VL);
4678	}
4679
4680	// Bitcast from <vscale x n ty2> to <vscale x 2n x ty>*
4681	MVT ResultContainerVT = MVT::getVectorVT(
4682	VT: VecVT.getVectorElementType(), // Make sure to use original type
4683	EC: VecContainerVT.getVectorElementCount().multiplyCoefficientBy(RHS: `2`));
4684	Interleaved = DAG.getBitcast(VT: ResultContainerVT, V: Interleaved);
4685
4686	// Convert back to a fixed vector if needed
4687	MVT ResultVT =
4688	MVT::getVectorVT(VT: VecVT.getVectorElementType(),
4689	EC: VecVT.getVectorElementCount().multiplyCoefficientBy(RHS: `2`));
4690	if (ResultVT.isFixedLengthVector())
4691	Interleaved =
4692	convertFromScalableVector(VT: ResultVT, V: Interleaved, DAG, Subtarget);
4693
4694	return Interleaved;
4695	}
4696
4697	// If we have a vector of bits that we want to reverse, we can use a vbrev on a
4698	// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4699	static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN,
4700	SelectionDAG &DAG,
4701	const RISCVSubtarget &Subtarget) {
4702	SDLoc DL(SVN);
4703	MVT VT = SVN->getSimpleValueType(ResNo: `0`);
4704	SDValue V = SVN->getOperand(Num: `0`);
4705	unsigned NumElts = VT.getVectorNumElements();
4706
4707	assert(VT.getVectorElementType() == MVT::i1);
4708
4709	if (!ShuffleVectorInst::isReverseMask(Mask: SVN->getMask(),
4710	NumSrcElts: SVN->getMask().size()) \|\|
4711	!SVN->getOperand(Num: `1`).isUndef())
4712	return SDValue ();
4713
4714	unsigned ViaEltSize = std::max(a: (uint64_t)`8`, b: PowerOf2Ceil(A: NumElts));
4715	EVT ViaVT = EVT::getVectorVT(
4716	Context&: DAG.getContext(), VT: EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: ViaEltSize), NumElements: `1`);
4717	EVT ViaBitVT =
4718	EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
4719
4720	// If we don't have zvbb or the larger element type > ELEN, the operation will
4721	// be illegal.
4722	if (!Subtarget.getTargetLowering()->isOperationLegalOrCustom(Op: ISD::BITREVERSE,
4723	VT: ViaVT) \|\|
4724	!Subtarget.getTargetLowering()->isTypeLegal(VT: ViaBitVT))
4725	return SDValue ();
4726
4727	// If the bit vector doesn't fit exactly into the larger element type, we need
4728	// to insert it into the larger vector and then shift up the reversed bits
4729	// afterwards to get rid of the gap introduced.
4730	if (ViaEltSize > NumElts)
4731	V = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ViaBitVT, N1: DAG.getUNDEF(VT: ViaBitVT),
4732	N2: V, N3: DAG.getVectorIdxConstant(Val: `0`, DL));
4733
4734	SDValue Res =
4735	DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT: ViaVT, Operand: DAG.getBitcast(VT: ViaVT, V));
4736
4737	// Shift up the reversed bits if the vector didn't exactly fit into the larger
4738	// element type.
4739	if (ViaEltSize > NumElts)
4740	Res = DAG.getNode(Opcode: ISD::SRL, DL, VT: ViaVT, N1: Res,
4741	N2: DAG.getConstant(Val: ViaEltSize - NumElts, DL, VT: ViaVT));
4742
4743	Res = DAG.getBitcast(VT: ViaBitVT, V: Res);
4744
4745	if (ViaEltSize > NumElts)
4746	Res = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: Res,
4747	N2: DAG.getVectorIdxConstant(Val: `0`, DL));
4748	return Res;
4749	}
4750
4751	static bool isLegalBitRotate(ShuffleVectorSDNode *SVN,
4752	SelectionDAG &DAG,
4753	const RISCVSubtarget &Subtarget,
4754	MVT &RotateVT, unsigned &RotateAmt) {
4755	SDLoc DL(SVN);
4756
4757	EVT VT = SVN->getValueType(ResNo: `0`);
4758	unsigned NumElts = VT.getVectorNumElements();
4759	unsigned EltSizeInBits = VT.getScalarSizeInBits();
4760	unsigned NumSubElts;
4761	if (!ShuffleVectorInst::isBitRotateMask(Mask: SVN->getMask(), EltSizeInBits, MinSubElts: `2`,
4762	MaxSubElts: NumElts, NumSubElts, RotateAmt))
4763	return false;
4764	RotateVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: EltSizeInBits * NumSubElts),
4765	NumElements: NumElts / NumSubElts);
4766
4767	// We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
4768	return Subtarget.getTargetLowering()->isTypeLegal(VT: RotateVT);
4769	}
4770
4771	// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4772	// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4773	// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
4774	static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN,
4775	SelectionDAG &DAG,
4776	const RISCVSubtarget &Subtarget) {
4777	SDLoc DL(SVN);
4778
4779	EVT VT = SVN->getValueType(ResNo: `0`);
4780	unsigned RotateAmt;
4781	MVT RotateVT;
4782	if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4783	return SDValue ();
4784
4785	SDValue Op = DAG.getBitcast(VT: RotateVT, V: SVN->getOperand(Num: `0`));
4786
4787	SDValue Rotate;
4788	// A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
4789	// so canonicalize to vrev8.
4790	if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == `8`)
4791	Rotate = DAG.getNode(Opcode: ISD::BSWAP, DL, VT: RotateVT, Operand: Op);
4792	else
4793	Rotate = DAG.getNode(Opcode: ISD::ROTL, DL, VT: RotateVT, N1: Op,
4794	N2: DAG.getConstant(Val: RotateAmt, DL, VT: RotateVT));
4795
4796	return DAG.getBitcast(VT, V: Rotate);
4797	}
4798
4799	// If compiling with an exactly known VLEN, see if we can split a
4800	// shuffle on m2 or larger into a small number of m1 sized shuffles
4801	// which write each destination registers exactly once.
4802	static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN,
4803	SelectionDAG &DAG,
4804	const RISCVSubtarget &Subtarget) {
4805	SDLoc DL(SVN);
4806	MVT VT = SVN->getSimpleValueType(ResNo: `0`);
4807	SDValue V1 = SVN->getOperand(Num: `0`);
4808	SDValue V2 = SVN->getOperand(Num: `1`);
4809	ArrayRef<int> Mask = SVN->getMask();
4810	unsigned NumElts = VT.getVectorNumElements();
4811
4812	// If we don't know exact data layout, not much we can do. If this
4813	// is already m1 or smaller, no point in splitting further.
4814	const auto VLen = Subtarget.getRealVLen();
4815	if (!VLen \|\| VT.getSizeInBits().getFixedValue() <= *VLen)
4816	return SDValue ();
4817
4818	// Avoid picking up bitrotate patterns which we have a linear-in-lmul
4819	// expansion for.
4820	unsigned RotateAmt;
4821	MVT RotateVT;
4822	if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4823	return SDValue ();
4824
4825	MVT ElemVT = VT.getVectorElementType();
4826	unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4827	unsigned VRegsPerSrc = NumElts / ElemsPerVReg;
4828
4829	SmallVector<std::pair<int, SmallVector<int>>>
4830	OutMasks(VRegsPerSrc, {-`1`, {}});
4831
4832	// Check if our mask can be done as a 1-to-1 mapping from source
4833	// to destination registers in the group without needing to
4834	// write each destination more than once.
4835	for (unsigned DstIdx = `0`; DstIdx < Mask.size(); DstIdx++) {
4836	int DstVecIdx = DstIdx / ElemsPerVReg;
4837	int DstSubIdx = DstIdx % ElemsPerVReg;
4838	int SrcIdx = Mask [DstIdx];
4839	if (SrcIdx < `0` \|\| (unsigned)SrcIdx >= `2` * NumElts)
4840	continue;
4841	int SrcVecIdx = SrcIdx / ElemsPerVReg;
4842	int SrcSubIdx = SrcIdx % ElemsPerVReg;
4843	if (OutMasks [DstVecIdx].first == -`1`)
4844	OutMasks [DstVecIdx].first = SrcVecIdx;
4845	if (OutMasks [DstVecIdx].first != SrcVecIdx)
4846	// Note: This case could easily be handled by keeping track of a chain
4847	// of source values and generating two element shuffles below. This is
4848	// less an implementation question, and more a profitability one.
4849	return SDValue ();
4850
4851	OutMasks [DstVecIdx].second.resize(N: ElemsPerVReg, NV: -`1`);
4852	OutMasks [DstVecIdx].second [DstSubIdx] = SrcSubIdx;
4853	}
4854
4855	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4856	MVT OneRegVT = MVT::getVectorVT(VT: ElemVT, NumElements: ElemsPerVReg);
4857	MVT M1VT = getContainerForFixedLengthVector(DAG, VT: OneRegVT, Subtarget);
4858	assert(M1VT == getLMUL1VT(M1VT));
4859	unsigned NumOpElts = M1VT.getVectorMinNumElements();
4860	SDValue Vec = DAG.getUNDEF(VT: ContainerVT);
4861	// The following semantically builds up a fixed length concat_vector
4862	// of the component shuffle_vectors. We eagerly lower to scalable here
4863	// to avoid DAG combining it back to a large shuffle_vector again.
4864	V1 = convertToScalableVector(VT: ContainerVT, V: V1, DAG, Subtarget);
4865	V2 = convertToScalableVector(VT: ContainerVT, V: V2, DAG, Subtarget);
4866	for (unsigned DstVecIdx = `0` ; DstVecIdx < OutMasks.size(); DstVecIdx++) {
4867	auto &[SrcVecIdx, SrcSubMask] = OutMasks [DstVecIdx];
4868	if (SrcVecIdx == -`1`)
4869	continue;
4870	unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
4871	SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;
4872	SDValue SubVec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: M1VT, N1: SrcVec,
4873	N2: DAG.getVectorIdxConstant(Val: ExtractIdx, DL));
4874	SubVec = convertFromScalableVector(VT: OneRegVT, V: SubVec, DAG, Subtarget);
4875	SubVec = DAG.getVectorShuffle(VT: OneRegVT, dl: DL, N1: SubVec, N2: SubVec, Mask: SrcSubMask);
4876	SubVec = convertToScalableVector(VT: M1VT, V: SubVec, DAG, Subtarget);
4877	unsigned InsertIdx = DstVecIdx * NumOpElts;
4878	Vec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ContainerVT, N1: Vec, N2: SubVec,
4879	N3: DAG.getVectorIdxConstant(Val: InsertIdx, DL));
4880	}
4881	return convertFromScalableVector(VT, V: Vec, DAG, Subtarget);
4882	}
4883
4884	static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
4885	const RISCVSubtarget &Subtarget) {
4886	SDValue V1 = Op.getOperand(i: `0`);
4887	SDValue V2 = Op.getOperand(i: `1`);
4888	SDLoc DL(Op);
4889	MVT XLenVT = Subtarget.getXLenVT();
4890	MVT VT = Op.getSimpleValueType();
4891	unsigned NumElts = VT.getVectorNumElements();
4892	ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val: Op.getNode());
4893
4894	if (VT.getVectorElementType() == MVT::i1) {
4895	// Lower to a vror.vi of a larger element type if possible before we promote
4896	// i1s to i8s.
4897	if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4898	return V;
4899	if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
4900	return V;
4901
4902	// Promote i1 shuffle to i8 shuffle.
4903	MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
4904	V1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WidenVT, Operand: V1);
4905	V2 = V2.isUndef() ? DAG.getUNDEF(VT: WidenVT)
4906	: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WidenVT, Operand: V2);
4907	SDValue Shuffled = DAG.getVectorShuffle(VT: WidenVT, dl: DL, N1: V1, N2: V2, Mask: SVN->getMask());
4908	return DAG.getSetCC(DL, VT, LHS: Shuffled, RHS: DAG.getConstant(Val: `0`, DL, VT: WidenVT),
4909	Cond: ISD::SETNE);
4910	}
4911
4912	MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4913
4914	auto [TrueMask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
4915
4916	if (SVN->isSplat()) {
4917	const int Lane = SVN->getSplatIndex();
4918	if (Lane >= `0`) {
4919	MVT SVT = VT.getVectorElementType();
4920
4921	// Turn splatted vector load into a strided load with an X0 stride.
4922	SDValue V = V1;
4923	// Peek through CONCAT_VECTORS as VectorCombine can concat a vector
4924	// with undef.
4925	// FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
4926	int Offset = Lane;
4927	if (V.getOpcode() == ISD::CONCAT_VECTORS) {
4928	int OpElements =
4929	V.getOperand(i: `0`).getSimpleValueType().getVectorNumElements();
4930	V = V.getOperand(i: Offset / OpElements);
4931	Offset %= OpElements;
4932	}
4933
4934	// We need to ensure the load isn't atomic or volatile.
4935	if (ISD::isNormalLoad(N: V.getNode()) && cast<LoadSDNode>(Val&: V)->isSimple()) {
4936	auto *Ld = cast<LoadSDNode>(Val&: V);
4937	Offset *= SVT.getStoreSize();
4938	SDValue NewAddr = DAG.getMemBasePlusOffset(
4939	Base: Ld->getBasePtr(), Offset: TypeSize::getFixed(ExactSize: Offset), DL);
4940
4941	// If this is SEW=64 on RV32, use a strided load with a stride of x0.
4942	if (SVT.isInteger() && SVT.bitsGT(VT: XLenVT)) {
4943	SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4944	SDValue IntID =
4945	DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
4946	SDValue Ops[] = {Ld->getChain(),
4947	IntID,
4948	DAG.getUNDEF(ContainerVT),
4949	NewAddr,
4950	DAG.getRegister(RISCV::X0, XLenVT),
4951	VL};
4952	SDValue NewLoad = DAG.getMemIntrinsicNode(
4953	ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
4954	DAG.getMachineFunction().getMachineMemOperand(
4955	MMO: Ld->getMemOperand(), Offset, Size: SVT.getStoreSize()));
4956	DAG.makeEquivalentMemoryOrdering(OldLoad: Ld, NewMemOp: NewLoad);
4957	return convertFromScalableVector(VT, V: NewLoad, DAG, Subtarget);
4958	}
4959
4960	// Otherwise use a scalar load and splat. This will give the best
4961	// opportunity to fold a splat into the operation. ISel can turn it into
4962	// the x0 strided load if we aren't able to fold away the select.
4963	if (SVT.isFloatingPoint())
4964	V = DAG.getLoad(VT: SVT, dl: DL, Chain: Ld->getChain(), Ptr: NewAddr,
4965	PtrInfo: Ld->getPointerInfo().getWithOffset(O: Offset),
4966	Alignment: Ld->getOriginalAlign(),
4967	MMOFlags: Ld->getMemOperand()->getFlags());
4968	else
4969	V = DAG.getExtLoad(ExtType: ISD::SEXTLOAD, dl: DL, VT: XLenVT, Chain: Ld->getChain(), Ptr: NewAddr,
4970	PtrInfo: Ld->getPointerInfo().getWithOffset(O: Offset), MemVT: SVT,
4971	Alignment: Ld->getOriginalAlign(),
4972	MMOFlags: Ld->getMemOperand()->getFlags());
4973	DAG.makeEquivalentMemoryOrdering(OldLoad: Ld, NewMemOp: V);
4974
4975	unsigned Opc =
4976	VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
4977	SDValue Splat =
4978	DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: V, N3: VL);
4979	return convertFromScalableVector(VT, V: Splat, DAG, Subtarget);
4980	}
4981
4982	V1 = convertToScalableVector(VT: ContainerVT, V: V1, DAG, Subtarget);
4983	assert(Lane < (int)NumElts && "Unexpected lane!");
4984	SDValue Gather = DAG.getNode(Opcode: RISCVISD::VRGATHER_VX_VL, DL, VT: ContainerVT,
4985	N1: V1, N2: DAG.getConstant(Val: Lane, DL, VT: XLenVT),
4986	N3: DAG.getUNDEF(VT: ContainerVT), N4: TrueMask, N5: VL);
4987	return convertFromScalableVector(VT, V: Gather, DAG, Subtarget);
4988	}
4989	}
4990
4991	// For exact VLEN m2 or greater, try to split to m1 operations if we
4992	// can split cleanly.
4993	if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
4994	return V;
4995
4996	ArrayRef<int> Mask = SVN->getMask();
4997
4998	if (SDValue V =
4999	lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5000	return V;
5001
5002	if (SDValue V =
5003	lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5004	return V;
5005
5006	// A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5007	// available.
5008	if (Subtarget.hasStdExtZvkb())
5009	if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5010	return V;
5011
5012	// Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5013	// be undef which can be handled with a single SLIDEDOWN/UP.
5014	int LoSrc, HiSrc;
5015	int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5016	if (Rotation > `0`) {
5017	SDValue LoV, HiV;
5018	if (LoSrc >= `0`) {
5019	LoV = LoSrc == `0` ? V1 : V2;
5020	LoV = convertToScalableVector(VT: ContainerVT, V: LoV, DAG, Subtarget);
5021	}
5022	if (HiSrc >= `0`) {
5023	HiV = HiSrc == `0` ? V1 : V2;
5024	HiV = convertToScalableVector(VT: ContainerVT, V: HiV, DAG, Subtarget);
5025	}
5026
5027	// We found a rotation. We need to slide HiV down by Rotation. Then we need
5028	// to slide LoV up by (NumElts - Rotation).
5029	unsigned InvRotate = NumElts - Rotation;
5030
5031	SDValue Res = DAG.getUNDEF(VT: ContainerVT);
5032	if (HiV) {
5033	// Even though we could use a smaller VL, don't to avoid a vsetivli
5034	// toggle.
5035	Res = getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, Merge: Res, Op: HiV,
5036	Offset: DAG.getConstant(Val: Rotation, DL, VT: XLenVT), Mask: TrueMask, VL);
5037	}
5038	if (LoV)
5039	Res = getVSlideup(DAG, Subtarget, DL, VT: ContainerVT, Merge: Res, Op: LoV,
5040	Offset: DAG.getConstant(Val: InvRotate, DL, VT: XLenVT), Mask: TrueMask, VL,
5041	Policy: RISCVII::TAIL_AGNOSTIC);
5042
5043	return convertFromScalableVector(VT, V: Res, DAG, Subtarget);
5044	}
5045
5046	// If this is a deinterleave and we can widen the vector, then we can use
5047	// vnsrl to deinterleave.
5048	if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
5049	return getDeinterleaveViaVNSRL(DL, VT, Src: V1.getOperand(i: `0`), EvenElts: Mask [`0`] == `0`,
5050	Subtarget, DAG);
5051	}
5052
5053	if (SDValue V =
5054	lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5055	return V;
5056
5057	// Detect an interleave shuffle and lower to
5058	// (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5059	int EvenSrc, OddSrc;
5060	if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5061	// Extract the halves of the vectors.
5062	MVT HalfVT = VT.getHalfNumVectorElementsVT();
5063
5064	int Size = Mask.size();
5065	SDValue EvenV, OddV;
5066	assert(EvenSrc >= `0` && "Undef source?");
5067	EvenV = (EvenSrc / Size) == `0` ? V1 : V2;
5068	EvenV = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: HalfVT, N1: EvenV,
5069	N2: DAG.getVectorIdxConstant(Val: EvenSrc % Size, DL));
5070
5071	assert(OddSrc >= `0` && "Undef source?");
5072	OddV = (OddSrc / Size) == `0` ? V1 : V2;
5073	OddV = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: HalfVT, N1: OddV,
5074	N2: DAG.getVectorIdxConstant(Val: OddSrc % Size, DL));
5075
5076	return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5077	}
5078
5079
5080	// Handle any remaining single source shuffles
5081	assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5082	if (V2.isUndef()) {
5083	// We might be able to express the shuffle as a bitrotate. But even if we
5084	// don't have Zvkb and have to expand, the expanded sequence of approx. 2
5085	// shifts and a vor will have a higher throughput than a vrgather.
5086	if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5087	return V;
5088
5089	if (VT.getScalarSizeInBits() == `8` &&
5090	any_of(Range&: Mask, P: [&](const auto &Idx) { return Idx > `255`; })) {
5091	// On such a vector we're unable to use i8 as the index type.
5092	// FIXME: We could promote the index to i16 and use vrgatherei16, but that
5093	// may involve vector splitting if we're already at LMUL=8, or our
5094	// user-supplied maximum fixed-length LMUL.
5095	return SDValue ();
5096	}
5097
5098	// Base case for the two operand recursion below - handle the worst case
5099	// single source shuffle.
5100	unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5101	MVT IndexVT = VT.changeTypeToInteger();
5102	// Since we can't introduce illegal index types at this stage, use i16 and
5103	// vrgatherei16 if the corresponding index type for plain vrgather is greater
5104	// than XLenVT.
5105	if (IndexVT.getScalarType().bitsGT(VT: XLenVT)) {
5106	GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5107	IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5108	}
5109
5110	// If the mask allows, we can do all the index computation in 16 bits. This
5111	// requires less work and less register pressure at high LMUL, and creates
5112	// smaller constants which may be cheaper to materialize.
5113	if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<`16`>(NumElts - `1`) &&
5114	(IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > `1`) {
5115	GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5116	IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5117	}
5118
5119	MVT IndexContainerVT =
5120	ContainerVT.changeVectorElementType(EltVT: IndexVT.getScalarType());
5121
5122	V1 = convertToScalableVector(VT: ContainerVT, V: V1, DAG, Subtarget);
5123	SmallVector<SDValue> GatherIndicesLHS;
5124	for (int MaskIndex : Mask) {
5125	bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= `0`;
5126	GatherIndicesLHS.push_back(Elt: IsLHSIndex
5127	? DAG.getConstant(Val: MaskIndex, DL, VT: XLenVT)
5128	: DAG.getUNDEF(VT: XLenVT));
5129	}
5130	SDValue LHSIndices = DAG.getBuildVector(VT: IndexVT, DL, Ops: GatherIndicesLHS);
5131	LHSIndices = convertToScalableVector(VT: IndexContainerVT, V: LHSIndices, DAG,
5132	Subtarget);
5133	SDValue Gather = DAG.getNode(Opcode: GatherVVOpc, DL, VT: ContainerVT, N1: V1, N2: LHSIndices,
5134	N3: DAG.getUNDEF(VT: ContainerVT), N4: TrueMask, N5: VL);
5135	return convertFromScalableVector(VT, V: Gather, DAG, Subtarget);
5136	}
5137
5138	// By default we preserve the original operand order, and use a mask to
5139	// select LHS as true and RHS as false. However, since RVV vector selects may
5140	// feature splats but only on the LHS, we may choose to invert our mask and
5141	// instead select between RHS and LHS.
5142	bool SwapOps = DAG.isSplatValue(V: V2) && !DAG.isSplatValue(V: V1);
5143
5144	// Detect shuffles which can be re-expressed as vector selects; these are
5145	// shuffles in which each element in the destination is taken from an element
5146	// at the corresponding index in either source vectors.
5147	bool IsSelect = all_of(Range: enumerate(First&: Mask), P: [&](const auto &MaskIdx) {
5148	int MaskIndex = MaskIdx.value();
5149	return MaskIndex < `0` \|\| MaskIdx.index() == (unsigned)MaskIndex % NumElts;
5150	});
5151	if (IsSelect) {
5152	// Now construct the mask that will be used by the vselect operation.
5153	SmallVector<SDValue> MaskVals;
5154	for (int MaskIndex : Mask) {
5155	bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps;
5156	MaskVals.push_back(Elt: DAG.getConstant(Val: SelectMaskVal, DL, VT: XLenVT));
5157	}
5158
5159	if (SwapOps)
5160	std::swap(a&: V1, b&: V2);
5161
5162	assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5163	MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5164	SDValue SelectMask = DAG.getBuildVector(VT: MaskVT, DL, Ops: MaskVals);
5165	return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: SelectMask, N2: V1, N3: V2);
5166	}
5167
5168	// As a backup, shuffles can be lowered via a vrgather instruction, possibly
5169	// merged with a second vrgather.
5170	SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5171	SmallVector<SDValue> MaskVals;
5172
5173	// Now construct the mask that will be used by the blended vrgather operation.
5174	// Cconstruct the appropriate indices into each vector.
5175	for (int MaskIndex : Mask) {
5176	bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5177	MaskVals.push_back(Elt: DAG.getConstant(Val: SelectMaskVal, DL, VT: XLenVT));
5178	bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5179	ShuffleMaskLHS.push_back(Elt: IsLHSOrUndefIndex && MaskIndex >= `0`
5180	? MaskIndex : -`1`);
5181	ShuffleMaskRHS.push_back(Elt: IsLHSOrUndefIndex ? -`1` : (MaskIndex - NumElts));
5182	}
5183
5184	if (SwapOps) {
5185	std::swap(a&: V1, b&: V2);
5186	std::swap(LHS&: ShuffleMaskLHS, RHS&: ShuffleMaskRHS);
5187	}
5188
5189	assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5190	MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5191	SDValue SelectMask = DAG.getBuildVector(VT: MaskVT, DL, Ops: MaskVals);
5192
5193	// Recursively invoke lowering for each operand if we had two
5194	// independent single source shuffles, and then combine the result via a
5195	// vselect. Note that the vselect will likely be folded back into the
5196	// second permute (vrgather, or other) by the post-isel combine.
5197	V1 = DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: DAG.getUNDEF(VT), Mask: ShuffleMaskLHS);
5198	V2 = DAG.getVectorShuffle(VT, dl: DL, N1: V2, N2: DAG.getUNDEF(VT), Mask: ShuffleMaskRHS);
5199	return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: SelectMask, N2: V2, N3: V1);
5200	}
5201
5202	bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
5203	// Support splats for any type. These should type legalize well.
5204	if (ShuffleVectorSDNode::isSplatMask(Mask: M.data(), VT))
5205	return true;
5206
5207	// Only support legal VTs for other shuffles for now.
5208	if (!isTypeLegal(VT))
5209	return false;
5210
5211	MVT SVT = VT.getSimpleVT();
5212
5213	// Not for i1 vectors.
5214	if (SVT.getScalarType() == MVT::i1)
5215	return false;
5216
5217	int Dummy1, Dummy2;
5218	return (isElementRotate(LoSrc&: Dummy1, HiSrc&: Dummy2, Mask: M) > `0`) \|\|
5219	isInterleaveShuffle(Mask: M, VT: SVT, EvenSrc&: Dummy1, OddSrc&: Dummy2, Subtarget);
5220	}
5221
5222	// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5223	// the exponent.
5224	SDValue
5225	RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5226	SelectionDAG &DAG) const {
5227	MVT VT = Op.getSimpleValueType();
5228	unsigned EltSize = VT.getScalarSizeInBits();
5229	SDValue Src = Op.getOperand(i: `0`);
5230	SDLoc DL(Op);
5231	MVT ContainerVT = VT;
5232
5233	SDValue Mask, VL;
5234	if (Op ->isVPOpcode()) {
5235	Mask = Op.getOperand(i: `1`);
5236	if (VT.isFixedLengthVector())
5237	Mask = convertToScalableVector(VT: getMaskTypeFor(VecVT: ContainerVT), V: Mask, DAG,
5238	Subtarget);
5239	VL = Op.getOperand(i: `2`);
5240	}
5241
5242	// We choose FP type that can represent the value if possible. Otherwise, we
5243	// use rounding to zero conversion for correct exponent of the result.
5244	// TODO: Use f16 for i8 when possible?
5245	MVT FloatEltVT = (EltSize >= `32`) ? MVT::f64 : MVT::f32;
5246	if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5247	FloatEltVT = MVT::f32;
5248	MVT FloatVT = MVT::getVectorVT(VT: FloatEltVT, EC: VT.getVectorElementCount());
5249
5250	// Legal types should have been checked in the RISCVTargetLowering
5251	// constructor.
5252	// TODO: Splitting may make sense in some cases.
5253	assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5254	"Expected legal float type!");
5255
5256	// For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5257	// The trailing zero count is equal to log2 of this single bit value.
5258	if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5259	SDValue Neg = DAG.getNegative(Val: Src, DL, VT);
5260	Src = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Src, N2: Neg);
5261	} else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5262	SDValue Neg = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT, N1: DAG.getConstant(Val: `0`, DL, VT),
5263	N2: Src, N3: Mask, N4: VL);
5264	Src = DAG.getNode(Opcode: ISD::VP_AND, DL, VT, N1: Src, N2: Neg, N3: Mask, N4: VL);
5265	}
5266
5267	// We have a legal FP type, convert to it.
5268	SDValue FloatVal;
5269	if (FloatVT.bitsGT(VT)) {
5270	if (Op ->isVPOpcode())
5271	FloatVal = DAG.getNode(Opcode: ISD::VP_UINT_TO_FP, DL, VT: FloatVT, N1: Src, N2: Mask, N3: VL);
5272	else
5273	FloatVal = DAG.getNode(Opcode: ISD::UINT_TO_FP, DL, VT: FloatVT, Operand: Src);
5274	} else {
5275	// Use RTZ to avoid rounding influencing exponent of FloatVal.
5276	if (VT.isFixedLengthVector()) {
5277	ContainerVT = getContainerForFixedLengthVector(VT);
5278	Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget);
5279	}
5280	if (!Op ->isVPOpcode())
5281	std::tie(args&: Mask, args&: VL) = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
5282	SDValue RTZRM =
5283	DAG.getTargetConstant(Val: RISCVFPRndMode::RTZ, DL, VT: Subtarget.getXLenVT());
5284	MVT ContainerFloatVT =
5285	MVT::getVectorVT(VT: FloatEltVT, EC: ContainerVT.getVectorElementCount());
5286	FloatVal = DAG.getNode(Opcode: RISCVISD::VFCVT_RM_F_XU_VL, DL, VT: ContainerFloatVT,
5287	N1: Src, N2: Mask, N3: RTZRM, N4: VL);
5288	if (VT.isFixedLengthVector())
5289	FloatVal = convertFromScalableVector(VT: FloatVT, V: FloatVal, DAG, Subtarget);
5290	}
5291	// Bitcast to integer and shift the exponent to the LSB.
5292	EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5293	SDValue Bitcast = DAG.getBitcast(VT: IntVT, V: FloatVal);
5294	unsigned ShiftAmt = FloatEltVT == MVT::f64 ? `52` : `23`;
5295
5296	SDValue Exp;
5297	// Restore back to original type. Truncation after SRL is to generate vnsrl.
5298	if (Op ->isVPOpcode()) {
5299	Exp = DAG.getNode(Opcode: ISD::VP_LSHR, DL, VT: IntVT, N1: Bitcast,
5300	N2: DAG.getConstant(Val: ShiftAmt, DL, VT: IntVT), N3: Mask, N4: VL);
5301	Exp = DAG.getVPZExtOrTrunc(DL, VT, Op: Exp, Mask, EVL: VL);
5302	} else {
5303	Exp = DAG.getNode(Opcode: ISD::SRL, DL, VT: IntVT, N1: Bitcast,
5304	N2: DAG.getConstant(Val: ShiftAmt, DL, VT: IntVT));
5305	if (IntVT.bitsLT(VT))
5306	Exp = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: Exp);
5307	else if (IntVT.bitsGT(VT))
5308	Exp = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Exp);
5309	}
5310
5311	// The exponent contains log2 of the value in biased form.
5312	unsigned ExponentBias = FloatEltVT == MVT::f64 ? `1023` : `127`;
5313	// For trailing zeros, we just need to subtract the bias.
5314	if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5315	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Exp,
5316	N2: DAG.getConstant(Val: ExponentBias, DL, VT));
5317	if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5318	return DAG.getNode(Opcode: ISD::VP_SUB, DL, VT, N1: Exp,
5319	N2: DAG.getConstant(Val: ExponentBias, DL, VT), N3: Mask, N4: VL);
5320
5321	// For leading zeros, we need to remove the bias and convert from log2 to
5322	// leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5323	unsigned Adjust = ExponentBias + (EltSize - `1`);
5324	SDValue Res;
5325	if (Op ->isVPOpcode())
5326	Res = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT, N1: DAG.getConstant(Val: Adjust, DL, VT), N2: Exp,
5327	N3: Mask, N4: VL);
5328	else
5329	Res = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: Adjust, DL, VT), N2: Exp);
5330
5331	// The above result with zero input equals to Adjust which is greater than
5332	// EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5333	if (Op.getOpcode() == ISD::CTLZ)
5334	Res = DAG.getNode(Opcode: ISD::UMIN, DL, VT, N1: Res, N2: DAG.getConstant(Val: EltSize, DL, VT));
5335	else if (Op.getOpcode() == ISD::VP_CTLZ)
5336	Res = DAG.getNode(Opcode: ISD::VP_UMIN, DL, VT, N1: Res,
5337	N2: DAG.getConstant(Val: EltSize, DL, VT), N3: Mask, N4: VL);
5338	return Res;
5339	}
5340
5341	// While RVV has alignment restrictions, we should always be able to load as a
5342	// legal equivalently-sized byte-typed vector instead. This method is
5343	// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5344	// the load is already correctly-aligned, it returns SDValue().
5345	SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5346	SelectionDAG &DAG) const {
5347	auto *Load = cast<LoadSDNode>(Val&: Op);
5348	assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5349
5350	if (allowsMemoryAccessForAlignment(Context&: *DAG.getContext(), DL: DAG.getDataLayout(),
5351	VT: Load->getMemoryVT(),
5352	MMO: *Load->getMemOperand()))
5353	return SDValue ();
5354
5355	SDLoc DL(Op);
5356	MVT VT = Op.getSimpleValueType();
5357	unsigned EltSizeBits = VT.getScalarSizeInBits();
5358	assert((EltSizeBits == `16` \|\| EltSizeBits == `32` \|\| EltSizeBits == `64`) &&
5359	"Unexpected unaligned RVV load type");
5360	MVT NewVT =
5361	MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / `8`));
5362	assert(NewVT.isValid() &&
5363	"Expecting equally-sized RVV vector types to be legal");
5364	SDValue L = DAG.getLoad(VT: NewVT, dl: DL, Chain: Load->getChain(), Ptr: Load->getBasePtr(),
5365	PtrInfo: Load->getPointerInfo(), Alignment: Load->getOriginalAlign(),
5366	MMOFlags: Load->getMemOperand()->getFlags());
5367	return DAG.getMergeValues(Ops: {DAG.getBitcast(VT, V: L), L.getValue(R: `1`)}, dl: DL);
5368	}
5369
5370	// While RVV has alignment restrictions, we should always be able to store as a
5371	// legal equivalently-sized byte-typed vector instead. This method is
5372	// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5373	// returns SDValue() if the store is already correctly aligned.
5374	SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5375	SelectionDAG &DAG) const {
5376	auto *Store = cast<StoreSDNode>(Val&: Op);
5377	assert(Store && Store->getValue().getValueType().isVector() &&
5378	"Expected vector store");
5379
5380	if (allowsMemoryAccessForAlignment(Context&: *DAG.getContext(), DL: DAG.getDataLayout(),
5381	VT: Store->getMemoryVT(),
5382	MMO: *Store->getMemOperand()))
5383	return SDValue ();
5384
5385	SDLoc DL(Op);
5386	SDValue StoredVal = Store->getValue();
5387	MVT VT = StoredVal.getSimpleValueType();
5388	unsigned EltSizeBits = VT.getScalarSizeInBits();
5389	assert((EltSizeBits == `16` \|\| EltSizeBits == `32` \|\| EltSizeBits == `64`) &&
5390	"Unexpected unaligned RVV store type");
5391	MVT NewVT =
5392	MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / `8`));
5393	assert(NewVT.isValid() &&
5394	"Expecting equally-sized RVV vector types to be legal");
5395	StoredVal = DAG.getBitcast(VT: NewVT, V: StoredVal);
5396	return DAG.getStore(Chain: Store->getChain(), dl: DL, Val: StoredVal, Ptr: Store->getBasePtr(),
5397	PtrInfo: Store->getPointerInfo(), Alignment: Store->getOriginalAlign(),
5398	MMOFlags: Store->getMemOperand()->getFlags());
5399	}
5400
5401	static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
5402	const RISCVSubtarget &Subtarget) {
5403	assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5404
5405	int64_t Imm = cast<ConstantSDNode>(Val&: Op)->getSExtValue();
5406
5407	// All simm32 constants should be handled by isel.
5408	// NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5409	// this check redundant, but small immediates are common so this check
5410	// should have better compile time.
5411	if (isInt<`32`>(x: Imm))
5412	return Op;
5413
5414	// We only need to cost the immediate, if constant pool lowering is enabled.
5415	if (!Subtarget.useConstantPoolForLargeInts())
5416	return Op;
5417
5418	RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget);
5419	if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5420	return Op;
5421
5422	// Optimizations below are disabled for opt size. If we're optimizing for
5423	// size, use a constant pool.
5424	if (DAG.shouldOptForSize())
5425	return SDValue ();
5426
5427	// Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5428	// that if it will avoid a constant pool.
5429	// It will require an extra temporary register though.
5430	// If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5431	// low and high 32 bits are the same and bit 31 and 63 are set.
5432	unsigned ShiftAmt, AddOpc;
5433	RISCVMatInt::InstSeq SeqLo =
5434	RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
5435	if (!SeqLo.empty() && (SeqLo.size() + `2`) <= Subtarget.getMaxBuildIntsCost())
5436	return Op;
5437
5438	return SDValue ();
5439	}
5440
5441	static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
5442	const RISCVSubtarget &Subtarget) {
5443	SDLoc dl(Op);
5444	AtomicOrdering FenceOrdering =
5445	static_cast<AtomicOrdering>(Op.getConstantOperandVal(i: `1`));
5446	SyncScope::ID FenceSSID =
5447	static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: `2`));
5448
5449	if (Subtarget.hasStdExtZtso()) {
5450	// The only fence that needs an instruction is a sequentially-consistent
5451	// cross-thread fence.
5452	if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5453	FenceSSID == SyncScope::System)
5454	return Op;
5455
5456	// MEMBARRIER is a compiler barrier; it codegens to a no-op.
5457	return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(`0`));
5458	}
5459
5460	// singlethread fences only synchronize with signal handlers on the same
5461	// thread and thus only need to preserve instruction order, not actually
5462	// enforce memory ordering.
5463	if (FenceSSID == SyncScope::SingleThread)
5464	// MEMBARRIER is a compiler barrier; it codegens to a no-op.
5465	return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(`0`));
5466
5467	return Op;
5468	}
5469
5470	static SDValue lowerSADDSAT_SSUBSAT(SDValue Op, SelectionDAG &DAG) {
5471	assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5472	"Unexpected custom legalisation");
5473
5474	// With Zbb, we can widen to i64 and smin/smax with INT32_MAX/MIN.
5475	bool IsAdd = Op.getOpcode() == ISD::SADDSAT;
5476	SDLoc DL(Op);
5477	SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(`0`));
5478	SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(`1`));
5479	SDValue Result =
5480	DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5481
5482	APInt MinVal = APInt::getSignedMinValue(numBits: `32`).sext(width: `64`);
5483	APInt MaxVal = APInt::getSignedMaxValue(numBits: `32`).sext(width: `64`);
5484	SDValue SatMin = DAG.getConstant(MinVal, DL, MVT::i64);
5485	SDValue SatMax = DAG.getConstant(MaxVal, DL, MVT::i64);
5486	Result = DAG.getNode(ISD::SMIN, DL, MVT::i64, Result, SatMax);
5487	Result = DAG.getNode(ISD::SMAX, DL, MVT::i64, Result, SatMin);
5488	return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5489	}
5490
5491	static SDValue lowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG) {
5492	assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5493	"Unexpected custom legalisation");
5494
5495	// With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
5496	// sign extend allows overflow of the lower 32 bits to be detected on
5497	// the promoted size.
5498	SDLoc DL(Op);
5499	SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(`0`));
5500	SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(`1`));
5501	SDValue WideOp = DAG.getNode(Op.getOpcode(), DL, MVT::i64, LHS, RHS);
5502	return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5503	}
5504
5505	// Custom lower i32 SADDO/SSUBO with RV64LegalI32 so we take advantage of addw.
5506	static SDValue lowerSADDO_SSUBO(SDValue Op, SelectionDAG &DAG) {
5507	assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5508	"Unexpected custom legalisation");
5509	if (isa<ConstantSDNode>(Val: Op.getOperand(i: `1`)))
5510	return SDValue ();
5511
5512	bool IsAdd = Op.getOpcode() == ISD::SADDO;
5513	SDLoc DL(Op);
5514	SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(`0`));
5515	SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(`1`));
5516	SDValue WideOp =
5517	DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5518	SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5519	SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, WideOp,
5520	DAG.getValueType(MVT::i32));
5521	SDValue Ovf = DAG.getSetCC(DL, VT: Op.getValue(R: `1`).getValueType(), LHS: WideOp, RHS: SExt,
5522	Cond: ISD::SETNE);
5523	return DAG.getMergeValues(Ops: {Res, Ovf}, dl: DL);
5524	}
5525
5526	// Custom lower i32 SMULO with RV64LegalI32 so we take advantage of mulw.
5527	static SDValue lowerSMULO(SDValue Op, SelectionDAG &DAG) {
5528	assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5529	"Unexpected custom legalisation");
5530	SDLoc DL(Op);
5531	SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(`0`));
5532	SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(`1`));
5533	SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
5534	SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
5535	SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Mul,
5536	DAG.getValueType(MVT::i32));
5537	SDValue Ovf = DAG.getSetCC(DL, VT: Op.getValue(R: `1`).getValueType(), LHS: Mul, RHS: SExt,
5538	Cond: ISD::SETNE);
5539	return DAG.getMergeValues(Ops: {Res, Ovf}, dl: DL);
5540	}
5541
5542	SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
5543	SelectionDAG &DAG) const {
5544	SDLoc DL(Op);
5545	MVT VT = Op.getSimpleValueType();
5546	MVT XLenVT = Subtarget.getXLenVT();
5547	unsigned Check = Op.getConstantOperandVal(i: `1`);
5548	unsigned TDCMask = `0`;
5549	if (Check & fcSNan)
5550	TDCMask \|= RISCV::FPMASK_Signaling_NaN;
5551	if (Check & fcQNan)
5552	TDCMask \|= RISCV::FPMASK_Quiet_NaN;
5553	if (Check & fcPosInf)
5554	TDCMask \|= RISCV::FPMASK_Positive_Infinity;
5555	if (Check & fcNegInf)
5556	TDCMask \|= RISCV::FPMASK_Negative_Infinity;
5557	if (Check & fcPosNormal)
5558	TDCMask \|= RISCV::FPMASK_Positive_Normal;
5559	if (Check & fcNegNormal)
5560	TDCMask \|= RISCV::FPMASK_Negative_Normal;
5561	if (Check & fcPosSubnormal)
5562	TDCMask \|= RISCV::FPMASK_Positive_Subnormal;
5563	if (Check & fcNegSubnormal)
5564	TDCMask \|= RISCV::FPMASK_Negative_Subnormal;
5565	if (Check & fcPosZero)
5566	TDCMask \|= RISCV::FPMASK_Positive_Zero;
5567	if (Check & fcNegZero)
5568	TDCMask \|= RISCV::FPMASK_Negative_Zero;
5569
5570	bool IsOneBitMask = isPowerOf2_32(Value: TDCMask);
5571
5572	SDValue TDCMaskV = DAG.getConstant(Val: TDCMask, DL, VT: XLenVT);
5573
5574	if (VT.isVector()) {
5575	SDValue Op0 = Op.getOperand(i: `0`);
5576	MVT VT0 = Op.getOperand(i: `0`).getSimpleValueType();
5577
5578	if (VT.isScalableVector()) {
5579	MVT DstVT = VT0.changeVectorElementTypeToInteger();
5580	auto [Mask, VL] = getDefaultScalableVLOps(VecVT: VT0, DL, DAG, Subtarget);
5581	if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5582	Mask = Op.getOperand(i: `2`);
5583	VL = Op.getOperand(i: `3`);
5584	}
5585	SDValue FPCLASS = DAG.getNode(Opcode: RISCVISD::FCLASS_VL, DL, VT: DstVT, N1: Op0, N2: Mask,
5586	N3: VL, Flags: Op ->getFlags());
5587	if (IsOneBitMask)
5588	return DAG.getSetCC(DL, VT, LHS: FPCLASS,
5589	RHS: DAG.getConstant(Val: TDCMask, DL, VT: DstVT),
5590	Cond: ISD::CondCode::SETEQ);
5591	SDValue AND = DAG.getNode(Opcode: ISD::AND, DL, VT: DstVT, N1: FPCLASS,
5592	N2: DAG.getConstant(Val: TDCMask, DL, VT: DstVT));
5593	return DAG.getSetCC(DL, VT, LHS: AND, RHS: DAG.getConstant(Val: `0`, DL, VT: DstVT),
5594	Cond: ISD::SETNE);
5595	}
5596
5597	MVT ContainerVT0 = getContainerForFixedLengthVector(VT: VT0);
5598	MVT ContainerVT = getContainerForFixedLengthVector(VT);
5599	MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
5600	auto [Mask, VL] = getDefaultVLOps(VecVT: VT0, ContainerVT: ContainerVT0, DL, DAG, Subtarget);
5601	if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5602	Mask = Op.getOperand(i: `2`);
5603	MVT MaskContainerVT =
5604	getContainerForFixedLengthVector(VT: Mask.getSimpleValueType());
5605	Mask = convertToScalableVector(VT: MaskContainerVT, V: Mask, DAG, Subtarget);
5606	VL = Op.getOperand(i: `3`);
5607	}
5608	Op0 = convertToScalableVector(VT: ContainerVT0, V: Op0, DAG, Subtarget);
5609
5610	SDValue FPCLASS = DAG.getNode(Opcode: RISCVISD::FCLASS_VL, DL, VT: ContainerDstVT, N1: Op0,
5611	N2: Mask, N3: VL, Flags: Op ->getFlags());
5612
5613	TDCMaskV = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerDstVT,
5614	N1: DAG.getUNDEF(VT: ContainerDstVT), N2: TDCMaskV, N3: VL);
5615	if (IsOneBitMask) {
5616	SDValue VMSEQ =
5617	DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: ContainerVT,
5618	Ops: {FPCLASS, TDCMaskV, DAG.getCondCode(Cond: ISD::SETEQ),
5619	DAG.getUNDEF(VT: ContainerVT), Mask, VL});
5620	return convertFromScalableVector(VT, V: VMSEQ, DAG, Subtarget);
5621	}
5622	SDValue AND = DAG.getNode(Opcode: RISCVISD::AND_VL, DL, VT: ContainerDstVT, N1: FPCLASS,
5623	N2: TDCMaskV, N3: DAG.getUNDEF(VT: ContainerDstVT), N4: Mask, N5: VL);
5624
5625	SDValue SplatZero = DAG.getConstant(Val: `0`, DL, VT: XLenVT);
5626	SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerDstVT,
5627	N1: DAG.getUNDEF(VT: ContainerDstVT), N2: SplatZero, N3: VL);
5628
5629	SDValue VMSNE = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: ContainerVT,
5630	Ops: {AND, SplatZero, DAG.getCondCode(Cond: ISD::SETNE),
5631	DAG.getUNDEF(VT: ContainerVT), Mask, VL});
5632	return convertFromScalableVector(VT, V: VMSNE, DAG, Subtarget);
5633	}
5634
5635	SDValue FCLASS = DAG.getNode(Opcode: RISCVISD::FCLASS, DL, VT: XLenVT, Operand: Op.getOperand(i: `0`));
5636	SDValue AND = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: FCLASS, N2: TDCMaskV);
5637	SDValue Res = DAG.getSetCC(DL, VT: XLenVT, LHS: AND, RHS: DAG.getConstant(Val: `0`, DL, VT: XLenVT),
5638	Cond: ISD::CondCode::SETNE);
5639	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Res);
5640	}
5641
5642	// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
5643	// operations propagate nans.
5644	static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG,
5645	const RISCVSubtarget &Subtarget) {
5646	SDLoc DL(Op);
5647	MVT VT = Op.getSimpleValueType();
5648
5649	SDValue X = Op.getOperand(i: `0`);
5650	SDValue Y = Op.getOperand(i: `1`);
5651
5652	if (!VT.isVector()) {
5653	MVT XLenVT = Subtarget.getXLenVT();
5654
5655	// If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
5656	// ensures that when one input is a nan, the other will also be a nan
5657	// allowing the nan to propagate. If both inputs are nan, this will swap the
5658	// inputs which is harmless.
5659
5660	SDValue NewY = Y;
5661	if (!Op ->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Op: X)) {
5662	SDValue XIsNonNan = DAG.getSetCC(DL, VT: XLenVT, LHS: X, RHS: X, Cond: ISD::SETOEQ);
5663	NewY = DAG.getSelect(DL, VT, Cond: XIsNonNan, LHS: Y, RHS: X);
5664	}
5665
5666	SDValue NewX = X;
5667	if (!Op ->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Op: Y)) {
5668	SDValue YIsNonNan = DAG.getSetCC(DL, VT: XLenVT, LHS: Y, RHS: Y, Cond: ISD::SETOEQ);
5669	NewX = DAG.getSelect(DL, VT, Cond: YIsNonNan, LHS: X, RHS: Y);
5670	}
5671
5672	unsigned Opc =
5673	Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
5674	return DAG.getNode(Opcode: Opc, DL, VT, N1: NewX, N2: NewY);
5675	}
5676
5677	// Check no NaNs before converting to fixed vector scalable.
5678	bool XIsNeverNan = Op ->getFlags().hasNoNaNs() \|\| DAG.isKnownNeverNaN(Op: X);
5679	bool YIsNeverNan = Op ->getFlags().hasNoNaNs() \|\| DAG.isKnownNeverNaN(Op: Y);
5680
5681	MVT ContainerVT = VT;
5682	if (VT.isFixedLengthVector()) {
5683	ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5684	X = convertToScalableVector(VT: ContainerVT, V: X, DAG, Subtarget);
5685	Y = convertToScalableVector(VT: ContainerVT, V: Y, DAG, Subtarget);
5686	}
5687
5688	SDValue Mask, VL;
5689	if (Op ->isVPOpcode()) {
5690	Mask = Op.getOperand(i: `2`);
5691	if (VT.isFixedLengthVector())
5692	Mask = convertToScalableVector(VT: getMaskTypeFor(VecVT: ContainerVT), V: Mask, DAG,
5693	Subtarget);
5694	VL = Op.getOperand(i: `3`);
5695	} else {
5696	std::tie(args&: Mask, args&: VL) = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
5697	}
5698
5699	SDValue NewY = Y;
5700	if (!XIsNeverNan) {
5701	SDValue XIsNonNan = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: Mask.getValueType(),
5702	Ops: {X, X, DAG.getCondCode(Cond: ISD::SETOEQ),
5703	DAG.getUNDEF(VT: ContainerVT), Mask, VL});
5704	NewY = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: XIsNonNan, N2: Y, N3: X,
5705	N4: DAG.getUNDEF(VT: ContainerVT), N5: VL);
5706	}
5707
5708	SDValue NewX = X;
5709	if (!YIsNeverNan) {
5710	SDValue YIsNonNan = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: Mask.getValueType(),
5711	Ops: {Y, Y, DAG.getCondCode(Cond: ISD::SETOEQ),
5712	DAG.getUNDEF(VT: ContainerVT), Mask, VL});
5713	NewX = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: YIsNonNan, N2: X, N3: Y,
5714	N4: DAG.getUNDEF(VT: ContainerVT), N5: VL);
5715	}
5716
5717	unsigned Opc =
5718	Op.getOpcode() == ISD::FMAXIMUM \|\| Op ->getOpcode() == ISD::VP_FMAXIMUM
5719	? RISCVISD::VFMAX_VL
5720	: RISCVISD::VFMIN_VL;
5721	SDValue Res = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: NewX, N2: NewY,
5722	N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL);
5723	if (VT.isFixedLengthVector())
5724	Res = convertFromScalableVector(VT, V: Res, DAG, Subtarget);
5725	return Res;
5726	}
5727
5728	/// Get a RISC-V target specified VL op for a given SDNode.
5729	static unsigned getRISCVVLOp(SDValue Op) {
5730	#define OP_CASE(NODE) \
5731	case ISD::NODE: \
5732	return RISCVISD::NODE##_VL;
5733	#define VP_CASE(NODE) \
5734	case ISD::VP_##NODE: \
5735	return RISCVISD::NODE##_VL;
5736	// clang-format off
5737	switch (Op.getOpcode()) {
5738	default:
5739	llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
5740	OP_CASE(ADD)
5741	OP_CASE(SUB)
5742	OP_CASE(MUL)
5743	OP_CASE(MULHS)
5744	OP_CASE(MULHU)
5745	OP_CASE(SDIV)
5746	OP_CASE(SREM)
5747	OP_CASE(UDIV)
5748	OP_CASE(UREM)
5749	OP_CASE(SHL)
5750	OP_CASE(SRA)
5751	OP_CASE(SRL)
5752	OP_CASE(ROTL)
5753	OP_CASE(ROTR)
5754	OP_CASE(BSWAP)
5755	OP_CASE(CTTZ)
5756	OP_CASE(CTLZ)
5757	OP_CASE(CTPOP)
5758	OP_CASE(BITREVERSE)
5759	OP_CASE(SADDSAT)
5760	OP_CASE(UADDSAT)
5761	OP_CASE(SSUBSAT)
5762	OP_CASE(USUBSAT)
5763	OP_CASE(AVGFLOORU)
5764	OP_CASE(AVGCEILU)
5765	OP_CASE(FADD)
5766	OP_CASE(FSUB)
5767	OP_CASE(FMUL)
5768	OP_CASE(FDIV)
5769	OP_CASE(FNEG)
5770	OP_CASE(FABS)
5771	OP_CASE(FSQRT)
5772	OP_CASE(SMIN)
5773	OP_CASE(SMAX)
5774	OP_CASE(UMIN)
5775	OP_CASE(UMAX)
5776	OP_CASE(STRICT_FADD)
5777	OP_CASE(STRICT_FSUB)
5778	OP_CASE(STRICT_FMUL)
5779	OP_CASE(STRICT_FDIV)
5780	OP_CASE(STRICT_FSQRT)
5781	VP_CASE(ADD) // VP_ADD
5782	VP_CASE(SUB) // VP_SUB
5783	VP_CASE(MUL) // VP_MUL
5784	VP_CASE(SDIV) // VP_SDIV
5785	VP_CASE(SREM) // VP_SREM
5786	VP_CASE(UDIV) // VP_UDIV
5787	VP_CASE(UREM) // VP_UREM
5788	VP_CASE(SHL) // VP_SHL
5789	VP_CASE(FADD) // VP_FADD
5790	VP_CASE(FSUB) // VP_FSUB
5791	VP_CASE(FMUL) // VP_FMUL
5792	VP_CASE(FDIV) // VP_FDIV
5793	VP_CASE(FNEG) // VP_FNEG
5794	VP_CASE(FABS) // VP_FABS
5795	VP_CASE(SMIN) // VP_SMIN
5796	VP_CASE(SMAX) // VP_SMAX
5797	VP_CASE(UMIN) // VP_UMIN
5798	VP_CASE(UMAX) // VP_UMAX
5799	VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
5800	VP_CASE(SETCC) // VP_SETCC
5801	VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
5802	VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
5803	VP_CASE(BITREVERSE) // VP_BITREVERSE
5804	VP_CASE(SADDSAT) // VP_SADDSAT
5805	VP_CASE(UADDSAT) // VP_UADDSAT
5806	VP_CASE(SSUBSAT) // VP_SSUBSAT
5807	VP_CASE(USUBSAT) // VP_USUBSAT
5808	VP_CASE(BSWAP) // VP_BSWAP
5809	VP_CASE(CTLZ) // VP_CTLZ
5810	VP_CASE(CTTZ) // VP_CTTZ
5811	VP_CASE(CTPOP) // VP_CTPOP
5812	case ISD::CTLZ_ZERO_UNDEF:
5813	case ISD::VP_CTLZ_ZERO_UNDEF:
5814	return RISCVISD::CTLZ_VL;
5815	case ISD::CTTZ_ZERO_UNDEF:
5816	case ISD::VP_CTTZ_ZERO_UNDEF:
5817	return RISCVISD::CTTZ_VL;
5818	case ISD::FMA:
5819	case ISD::VP_FMA:
5820	return RISCVISD::VFMADD_VL;
5821	case ISD::STRICT_FMA:
5822	return RISCVISD::STRICT_VFMADD_VL;
5823	case ISD::AND:
5824	case ISD::VP_AND:
5825	if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5826	return RISCVISD::VMAND_VL;
5827	return RISCVISD::AND_VL;
5828	case ISD::OR:
5829	case ISD::VP_OR:
5830	if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5831	return RISCVISD::VMOR_VL;
5832	return RISCVISD::OR_VL;
5833	case ISD::XOR:
5834	case ISD::VP_XOR:
5835	if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5836	return RISCVISD::VMXOR_VL;
5837	return RISCVISD::XOR_VL;
5838	case ISD::VP_SELECT:
5839	case ISD::VP_MERGE:
5840	return RISCVISD::VMERGE_VL;
5841	case ISD::VP_ASHR:
5842	return RISCVISD::SRA_VL;
5843	case ISD::VP_LSHR:
5844	return RISCVISD::SRL_VL;
5845	case ISD::VP_SQRT:
5846	return RISCVISD::FSQRT_VL;
5847	case ISD::VP_SIGN_EXTEND:
5848	return RISCVISD::VSEXT_VL;
5849	case ISD::VP_ZERO_EXTEND:
5850	return RISCVISD::VZEXT_VL;
5851	case ISD::VP_FP_TO_SINT:
5852	return RISCVISD::VFCVT_RTZ_X_F_VL;
5853	case ISD::VP_FP_TO_UINT:
5854	return RISCVISD::VFCVT_RTZ_XU_F_VL;
5855	case ISD::FMINNUM:
5856	case ISD::VP_FMINNUM:
5857	return RISCVISD::VFMIN_VL;
5858	case ISD::FMAXNUM:
5859	case ISD::VP_FMAXNUM:
5860	return RISCVISD::VFMAX_VL;
5861	case ISD::LRINT:
5862	case ISD::VP_LRINT:
5863	case ISD::LLRINT:
5864	case ISD::VP_LLRINT:
5865	return RISCVISD::VFCVT_X_F_VL;
5866	}
5867	// clang-format on
5868	#undef OP_CASE
5869	#undef VP_CASE
5870	}
5871
5872	/// Return true if a RISC-V target specified op has a merge operand.
5873	static bool hasMergeOp(unsigned Opcode) {
5874	assert(Opcode > RISCVISD::FIRST_NUMBER &&
5875	Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
5876	"not a RISC-V target specific op");
5877	static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
5878	`126` &&
5879	RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
5880	ISD::FIRST_TARGET_STRICTFP_OPCODE ==
5881	`21` &&
5882	"adding target specific op should update this function");
5883	if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
5884	return true;
5885	if (Opcode == RISCVISD::FCOPYSIGN_VL)
5886	return true;
5887	if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
5888	return true;
5889	if (Opcode == RISCVISD::SETCC_VL)
5890	return true;
5891	if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
5892	return true;
5893	if (Opcode == RISCVISD::VMERGE_VL)
5894	return true;
5895	return false;
5896	}
5897
5898	/// Return true if a RISC-V target specified op has a mask operand.
5899	static bool hasMaskOp(unsigned Opcode) {
5900	assert(Opcode > RISCVISD::FIRST_NUMBER &&
5901	Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
5902	"not a RISC-V target specific op");
5903	static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
5904	`126` &&
5905	RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
5906	ISD::FIRST_TARGET_STRICTFP_OPCODE ==
5907	`21` &&
5908	"adding target specific op should update this function");
5909	if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
5910	return true;
5911	if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
5912	return true;
5913	if (Opcode >= RISCVISD::STRICT_FADD_VL &&
5914	Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL)
5915	return true;
5916	return false;
5917	}
5918
5919	static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) {
5920	auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: Op.getValueType());
5921	SDLoc DL(Op);
5922
5923	SmallVector<SDValue, `4`> LoOperands(Op.getNumOperands());
5924	SmallVector<SDValue, `4`> HiOperands(Op.getNumOperands());
5925
5926	for (unsigned j = `0`; j != Op.getNumOperands(); ++j) {
5927	if (!Op.getOperand(i: j).getValueType().isVector()) {
5928	LoOperands [j] = Op.getOperand(i: j);
5929	HiOperands [j] = Op.getOperand(i: j);
5930	continue;
5931	}
5932	std::tie(args&: LoOperands [j], args&: HiOperands [j]) =
5933	DAG.SplitVector(N: Op.getOperand(i: j), DL);
5934	}
5935
5936	SDValue LoRes =
5937	DAG.getNode(Opcode: Op.getOpcode(), DL, VT: LoVT, Ops: LoOperands, Flags: Op ->getFlags());
5938	SDValue HiRes =
5939	DAG.getNode(Opcode: Op.getOpcode(), DL, VT: HiVT, Ops: HiOperands, Flags: Op ->getFlags());
5940
5941	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: Op.getValueType(), N1: LoRes, N2: HiRes);
5942	}
5943
5944	static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG) {
5945	assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
5946	auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: Op.getValueType());
5947	SDLoc DL(Op);
5948
5949	SmallVector<SDValue, `4`> LoOperands(Op.getNumOperands());
5950	SmallVector<SDValue, `4`> HiOperands(Op.getNumOperands());
5951
5952	for (unsigned j = `0`; j != Op.getNumOperands(); ++j) {
5953	if (ISD::getVPExplicitVectorLengthIdx(Opcode: Op.getOpcode()) == j) {
5954	std::tie(args&: LoOperands [j], args&: HiOperands [j]) =
5955	DAG.SplitEVL(N: Op.getOperand(i: j), VecVT: Op.getValueType(), DL);
5956	continue;
5957	}
5958	if (!Op.getOperand(i: j).getValueType().isVector()) {
5959	LoOperands [j] = Op.getOperand(i: j);
5960	HiOperands [j] = Op.getOperand(i: j);
5961	continue;
5962	}
5963	std::tie(args&: LoOperands [j], args&: HiOperands [j]) =
5964	DAG.SplitVector(N: Op.getOperand(i: j), DL);
5965	}
5966
5967	SDValue LoRes =
5968	DAG.getNode(Opcode: Op.getOpcode(), DL, VT: LoVT, Ops: LoOperands, Flags: Op ->getFlags());
5969	SDValue HiRes =
5970	DAG.getNode(Opcode: Op.getOpcode(), DL, VT: HiVT, Ops: HiOperands, Flags: Op ->getFlags());
5971
5972	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: Op.getValueType(), N1: LoRes, N2: HiRes);
5973	}
5974
5975	static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG) {
5976	SDLoc DL(Op);
5977
5978	auto [Lo, Hi] = DAG.SplitVector(N: Op.getOperand(i: `1`), DL);
5979	auto [MaskLo, MaskHi] = DAG.SplitVector(N: Op.getOperand(i: `2`), DL);
5980	auto [EVLLo, EVLHi] =
5981	DAG.SplitEVL(N: Op.getOperand(i: `3`), VecVT: Op.getOperand(i: `1`).getValueType(), DL);
5982
5983	SDValue ResLo =
5984	DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(),
5985	Ops: {Op.getOperand(i: `0`), Lo, MaskLo, EVLLo}, Flags: Op ->getFlags());
5986	return DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(),
5987	Ops: {ResLo, Hi, MaskHi, EVLHi}, Flags: Op ->getFlags());
5988	}
5989
5990	static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG) {
5991
5992	assert(Op ->isStrictFPOpcode());
5993
5994	auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: Op ->getValueType(ResNo: `0`));
5995
5996	SDVTList LoVTs = DAG.getVTList(VT1: LoVT, VT2: Op ->getValueType(ResNo: `1`));
5997	SDVTList HiVTs = DAG.getVTList(VT1: HiVT, VT2: Op ->getValueType(ResNo: `1`));
5998
5999	SDLoc DL(Op);
6000
6001	SmallVector<SDValue, `4`> LoOperands(Op.getNumOperands());
6002	SmallVector<SDValue, `4`> HiOperands(Op.getNumOperands());
6003
6004	for (unsigned j = `0`; j != Op.getNumOperands(); ++j) {
6005	if (!Op.getOperand(i: j).getValueType().isVector()) {
6006	LoOperands [j] = Op.getOperand(i: j);
6007	HiOperands [j] = Op.getOperand(i: j);
6008	continue;
6009	}
6010	std::tie(args&: LoOperands [j], args&: HiOperands [j]) =
6011	DAG.SplitVector(N: Op.getOperand(i: j), DL);
6012	}
6013
6014	SDValue LoRes =
6015	DAG.getNode(Opcode: Op.getOpcode(), DL, VTList: LoVTs, Ops: LoOperands, Flags: Op ->getFlags());
6016	HiOperands [`0`] = LoRes.getValue(R: `1`);
6017	SDValue HiRes =
6018	DAG.getNode(Opcode: Op.getOpcode(), DL, VTList: HiVTs, Ops: HiOperands, Flags: Op ->getFlags());
6019
6020	SDValue V = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: Op ->getValueType(ResNo: `0`),
6021	N1: LoRes.getValue(R: `0`), N2: HiRes.getValue(R: `0`));
6022	return DAG.getMergeValues(Ops: {V, HiRes.getValue(R: `1`)}, dl: DL);
6023	}
6024
6025	SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
6026	SelectionDAG &DAG) const {
6027	switch (Op.getOpcode()) {
6028	default:
6029	report_fatal_error(reason: "unimplemented operand");
6030	case ISD::ATOMIC_FENCE:
6031	return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6032	case ISD::GlobalAddress:
6033	return lowerGlobalAddress(Op, DAG);
6034	case ISD::BlockAddress:
6035	return lowerBlockAddress(Op, DAG);
6036	case ISD::ConstantPool:
6037	return lowerConstantPool(Op, DAG);
6038	case ISD::JumpTable:
6039	return lowerJumpTable(Op, DAG);
6040	case ISD::GlobalTLSAddress:
6041	return lowerGlobalTLSAddress(Op, DAG);
6042	case ISD::Constant:
6043	return lowerConstant(Op, DAG, Subtarget);
6044	case ISD::SELECT:
6045	return lowerSELECT(Op, DAG);
6046	case ISD::BRCOND:
6047	return lowerBRCOND(Op, DAG);
6048	case ISD::VASTART:
6049	return lowerVASTART(Op, DAG);
6050	case ISD::FRAMEADDR:
6051	return lowerFRAMEADDR(Op, DAG);
6052	case ISD::RETURNADDR:
6053	return lowerRETURNADDR(Op, DAG);
6054	case ISD::SADDO:
6055	case ISD::SSUBO:
6056	return lowerSADDO_SSUBO(Op, DAG);
6057	case ISD::SMULO:
6058	return lowerSMULO(Op, DAG);
6059	case ISD::SHL_PARTS:
6060	return lowerShiftLeftParts(Op, DAG);
6061	case ISD::SRA_PARTS:
6062	return lowerShiftRightParts(Op, DAG, IsSRA: true);
6063	case ISD::SRL_PARTS:
6064	return lowerShiftRightParts(Op, DAG, IsSRA: false);
6065	case ISD::ROTL:
6066	case ISD::ROTR:
6067	if (Op.getValueType().isFixedLengthVector()) {
6068	assert(Subtarget.hasStdExtZvkb());
6069	return lowerToScalableOp(Op, DAG);
6070	}
6071	assert(Subtarget.hasVendorXTHeadBb() &&
6072	!(Subtarget.hasStdExtZbb() \|\| Subtarget.hasStdExtZbkb()) &&
6073	"Unexpected custom legalization");
6074	// XTHeadBb only supports rotate by constant.
6075	if (!isa<ConstantSDNode>(Val: Op.getOperand(i: `1`)))
6076	return SDValue ();
6077	return Op;
6078	case ISD::BITCAST: {
6079	SDLoc DL(Op);
6080	EVT VT = Op.getValueType();
6081	SDValue Op0 = Op.getOperand(i: `0`);
6082	EVT Op0VT = Op0.getValueType();
6083	MVT XLenVT = Subtarget.getXLenVT();
6084	if (VT == MVT::f16 && Op0VT == MVT::i16 &&
6085	Subtarget.hasStdExtZfhminOrZhinxmin()) {
6086	SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Op0);
6087	SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
6088	return FPConv;
6089	}
6090	if (VT == MVT::bf16 && Op0VT == MVT::i16 &&
6091	Subtarget.hasStdExtZfbfmin()) {
6092	SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Op0);
6093	SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0);
6094	return FPConv;
6095	}
6096	if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6097	Subtarget.hasStdExtFOrZfinx()) {
6098	SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6099	SDValue FPConv =
6100	DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6101	return FPConv;
6102	}
6103	if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32) {
6104	SDValue Lo, Hi;
6105	std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6106	SDValue RetReg =
6107	DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6108	return RetReg;
6109	}
6110
6111	// Consider other scalar<->scalar casts as legal if the types are legal.
6112	// Otherwise expand them.
6113	if (!VT.isVector() && !Op0VT.isVector()) {
6114	if (isTypeLegal(VT) && isTypeLegal(VT: Op0VT))
6115	return Op;
6116	return SDValue ();
6117	}
6118
6119	assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6120	"Unexpected types");
6121
6122	if (VT.isFixedLengthVector()) {
6123	// We can handle fixed length vector bitcasts with a simple replacement
6124	// in isel.
6125	if (Op0VT.isFixedLengthVector())
6126	return Op;
6127	// When bitcasting from scalar to fixed-length vector, insert the scalar
6128	// into a one-element vector of the result type, and perform a vector
6129	// bitcast.
6130	if (!Op0VT.isVector()) {
6131	EVT BVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: Op0VT, NumElements: `1`);
6132	if (!isTypeLegal(VT: BVT))
6133	return SDValue ();
6134	return DAG.getBitcast(VT, V: DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: BVT,
6135	N1: DAG.getUNDEF(VT: BVT), N2: Op0,
6136	N3: DAG.getVectorIdxConstant(Val: `0`, DL)));
6137	}
6138	return SDValue ();
6139	}
6140	// Custom-legalize bitcasts from fixed-length vector types to scalar types
6141	// thus: bitcast the vector to a one-element vector type whose element type
6142	// is the same as the result type, and extract the first element.
6143	if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6144	EVT BVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT, NumElements: `1`);
6145	if (!isTypeLegal(VT: BVT))
6146	return SDValue ();
6147	SDValue BVec = DAG.getBitcast(VT: BVT, V: Op0);
6148	return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT, N1: BVec,
6149	N2: DAG.getVectorIdxConstant(Val: `0`, DL));
6150	}
6151	return SDValue ();
6152	}
6153	case ISD::INTRINSIC_WO_CHAIN:
6154	return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6155	case ISD::INTRINSIC_W_CHAIN:
6156	return LowerINTRINSIC_W_CHAIN(Op, DAG);
6157	case ISD::INTRINSIC_VOID:
6158	return LowerINTRINSIC_VOID(Op, DAG);
6159	case ISD::IS_FPCLASS:
6160	return LowerIS_FPCLASS(Op, DAG);
6161	case ISD::BITREVERSE: {
6162	MVT VT = Op.getSimpleValueType();
6163	if (VT.isFixedLengthVector()) {
6164	assert(Subtarget.hasStdExtZvbb());
6165	return lowerToScalableOp(Op, DAG);
6166	}
6167	SDLoc DL(Op);
6168	assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6169	assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6170	// Expand bitreverse to a bswap(rev8) followed by brev8.
6171	SDValue BSwap = DAG.getNode(Opcode: ISD::BSWAP, DL, VT, Operand: Op.getOperand(i: `0`));
6172	return DAG.getNode(Opcode: RISCVISD::BREV8, DL, VT, Operand: BSwap);
6173	}
6174	case ISD::TRUNCATE:
6175	// Only custom-lower vector truncates
6176	if (!Op.getSimpleValueType().isVector())
6177	return Op;
6178	return lowerVectorTruncLike(Op, DAG);
6179	case ISD::ANY_EXTEND:
6180	case ISD::ZERO_EXTEND:
6181	if (Op.getOperand(`0`).getValueType().isVector() &&
6182	Op.getOperand(`0`).getValueType().getVectorElementType() == MVT::i1)
6183	return lowerVectorMaskExt(Op, DAG, /ExtVal/ ExtTrueVal: `1`);
6184	return lowerFixedLengthVectorExtendToRVV(Op, DAG, ExtendOpc: RISCVISD::VZEXT_VL);
6185	case ISD::SIGN_EXTEND:
6186	if (Op.getOperand(`0`).getValueType().isVector() &&
6187	Op.getOperand(`0`).getValueType().getVectorElementType() == MVT::i1)
6188	return lowerVectorMaskExt(Op, DAG, /ExtVal/ ExtTrueVal: -`1`);
6189	return lowerFixedLengthVectorExtendToRVV(Op, DAG, ExtendOpc: RISCVISD::VSEXT_VL);
6190	case ISD::SPLAT_VECTOR_PARTS:
6191	return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6192	case ISD::INSERT_VECTOR_ELT:
6193	return lowerINSERT_VECTOR_ELT(Op, DAG);
6194	case ISD::EXTRACT_VECTOR_ELT:
6195	return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6196	case ISD::SCALAR_TO_VECTOR: {
6197	MVT VT = Op.getSimpleValueType();
6198	SDLoc DL(Op);
6199	SDValue Scalar = Op.getOperand(i: `0`);
6200	if (VT.getVectorElementType() == MVT::i1) {
6201	MVT WideVT = VT.changeVectorElementType(MVT::i8);
6202	SDValue V = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT: WideVT, Operand: Scalar);
6203	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: V);
6204	}
6205	MVT ContainerVT = VT;
6206	if (VT.isFixedLengthVector())
6207	ContainerVT = getContainerForFixedLengthVector(VT);
6208	SDValue VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second;
6209	Scalar = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getXLenVT(), Operand: Scalar);
6210	SDValue V = DAG.getNode(Opcode: RISCVISD::VMV_S_X_VL, DL, VT: ContainerVT,
6211	N1: DAG.getUNDEF(VT: ContainerVT), N2: Scalar, N3: VL);
6212	if (VT.isFixedLengthVector())
6213	V = convertFromScalableVector(VT, V, DAG, Subtarget);
6214	return V;
6215	}
6216	case ISD::VSCALE: {
6217	MVT XLenVT = Subtarget.getXLenVT();
6218	MVT VT = Op.getSimpleValueType();
6219	SDLoc DL(Op);
6220	SDValue Res = DAG.getNode(Opcode: RISCVISD::READ_VLENB, DL, VT: XLenVT);
6221	// We define our scalable vector types for lmul=1 to use a 64 bit known
6222	// minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6223	// vscale as VLENB / 8.
6224	static_assert(RISCV::RVVBitsPerBlock == `64`, "Unexpected bits per block!");
6225	if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6226	report_fatal_error(reason: "Support for VLEN==32 is incomplete.");
6227	// We assume VLENB is a multiple of 8. We manually choose the best shift
6228	// here because SimplifyDemandedBits isn't always able to simplify it.
6229	uint64_t Val = Op.getConstantOperandVal(i: `0`);
6230	if (isPowerOf2_64(Value: Val)) {
6231	uint64_t Log2 = Log2_64(Value: Val);
6232	if (Log2 < `3`)
6233	Res = DAG.getNode(Opcode: ISD::SRL, DL, VT: XLenVT, N1: Res,
6234	N2: DAG.getConstant(Val: `3` - Log2, DL, VT));
6235	else if (Log2 > `3`)
6236	Res = DAG.getNode(Opcode: ISD::SHL, DL, VT: XLenVT, N1: Res,
6237	N2: DAG.getConstant(Val: Log2 - `3`, DL, VT: XLenVT));
6238	} else if ((Val % `8`) == `0`) {
6239	// If the multiplier is a multiple of 8, scale it down to avoid needing
6240	// to shift the VLENB value.
6241	Res = DAG.getNode(Opcode: ISD::MUL, DL, VT: XLenVT, N1: Res,
6242	N2: DAG.getConstant(Val: Val / `8`, DL, VT: XLenVT));
6243	} else {
6244	SDValue VScale = DAG.getNode(Opcode: ISD::SRL, DL, VT: XLenVT, N1: Res,
6245	N2: DAG.getConstant(Val: `3`, DL, VT: XLenVT));
6246	Res = DAG.getNode(Opcode: ISD::MUL, DL, VT: XLenVT, N1: VScale,
6247	N2: DAG.getConstant(Val, DL, VT: XLenVT));
6248	}
6249	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Res);
6250	}
6251	case ISD::FPOWI: {
6252	// Custom promote f16 powi with illegal i32 integer type on RV64. Once
6253	// promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6254	if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6255	Op.getOperand(`1`).getValueType() == MVT::i32) {
6256	SDLoc DL(Op);
6257	SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(`0`));
6258	SDValue Powi =
6259	DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(`1`));
6260	return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6261	DAG.getIntPtrConstant(`0`, DL, /isTarget=/true));
6262	}
6263	return SDValue ();
6264	}
6265	case ISD::FMAXIMUM:
6266	case ISD::FMINIMUM:
6267	if (Op.getValueType() == MVT::nxv32f16 &&
6268	(Subtarget.hasVInstructionsF16Minimal() &&
6269	!Subtarget.hasVInstructionsF16()))
6270	return SplitVectorOp(Op, DAG);
6271	return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6272	case ISD::FP_EXTEND: {
6273	SDLoc DL(Op);
6274	EVT VT = Op.getValueType();
6275	SDValue Op0 = Op.getOperand(i: `0`);
6276	EVT Op0VT = Op0.getValueType();
6277	if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())
6278	return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6279	if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
6280	SDValue FloatVal =
6281	DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6282	return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal);
6283	}
6284
6285	if (!Op.getValueType().isVector())
6286	return Op;
6287	return lowerVectorFPExtendOrRoundLike(Op, DAG);
6288	}
6289	case ISD::FP_ROUND: {
6290	SDLoc DL(Op);
6291	EVT VT = Op.getValueType();
6292	SDValue Op0 = Op.getOperand(i: `0`);
6293	EVT Op0VT = Op0.getValueType();
6294	if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin())
6295	return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0);
6296	if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() &&
6297	Subtarget.hasStdExtDOrZdinx()) {
6298	SDValue FloatVal =
6299	DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0,
6300	DAG.getIntPtrConstant(`0`, DL, /isTarget=/true));
6301	return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal);
6302	}
6303
6304	if (!Op.getValueType().isVector())
6305	return Op;
6306	return lowerVectorFPExtendOrRoundLike(Op, DAG);
6307	}
6308	case ISD::STRICT_FP_ROUND:
6309	case ISD::STRICT_FP_EXTEND:
6310	return lowerStrictFPExtendOrRoundLike(Op, DAG);
6311	case ISD::SINT_TO_FP:
6312	case ISD::UINT_TO_FP:
6313	if (Op.getValueType().isVector() &&
6314	Op.getValueType().getScalarType() == MVT::f16 &&
6315	(Subtarget.hasVInstructionsF16Minimal() &&
6316	!Subtarget.hasVInstructionsF16())) {
6317	if (Op.getValueType() == MVT::nxv32f16)
6318	return SplitVectorOp(Op, DAG);
6319	// int -> f32
6320	SDLoc DL(Op);
6321	MVT NVT =
6322	MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6323	SDValue NC = DAG.getNode(Opcode: Op.getOpcode(), DL, VT: NVT, Ops: Op ->ops());
6324	// f32 -> f16
6325	return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: Op.getValueType(), N1: NC,
6326	N2: DAG.getIntPtrConstant(Val: `0`, DL, /isTarget=/true));
6327	}
6328	[[fallthrough]];
6329	case ISD::FP_TO_SINT:
6330	case ISD::FP_TO_UINT:
6331	if (SDValue Op1 = Op.getOperand(i: `0`);
6332	Op1.getValueType().isVector() &&
6333	Op1.getValueType().getScalarType() == MVT::f16 &&
6334	(Subtarget.hasVInstructionsF16Minimal() &&
6335	!Subtarget.hasVInstructionsF16())) {
6336	if (Op1.getValueType() == MVT::nxv32f16)
6337	return SplitVectorOp(Op, DAG);
6338	// f16 -> f32
6339	SDLoc DL(Op);
6340	MVT NVT = MVT::getVectorVT(MVT::f32,
6341	Op1.getValueType().getVectorElementCount());
6342	SDValue WidenVec = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: NVT, Operand: Op1);
6343	// f32 -> int
6344	return DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(), Operand: WidenVec);
6345	}
6346	[[fallthrough]];
6347	case ISD::STRICT_FP_TO_SINT:
6348	case ISD::STRICT_FP_TO_UINT:
6349	case ISD::STRICT_SINT_TO_FP:
6350	case ISD::STRICT_UINT_TO_FP: {
6351	// RVV can only do fp<->int conversions to types half/double the size as
6352	// the source. We custom-lower any conversions that do two hops into
6353	// sequences.
6354	MVT VT = Op.getSimpleValueType();
6355	if (!VT.isVector())
6356	return Op;
6357	SDLoc DL(Op);
6358	bool IsStrict = Op ->isStrictFPOpcode();
6359	SDValue Src = Op.getOperand(i: `0` + IsStrict);
6360	MVT EltVT = VT.getVectorElementType();
6361	MVT SrcVT = Src.getSimpleValueType();
6362	MVT SrcEltVT = SrcVT.getVectorElementType();
6363	unsigned EltSize = EltVT.getSizeInBits();
6364	unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6365	assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6366	"Unexpected vector element types");
6367
6368	bool IsInt2FP = SrcEltVT.isInteger();
6369	// Widening conversions
6370	if (EltSize > (`2` * SrcEltSize)) {
6371	if (IsInt2FP) {
6372	// Do a regular integer sign/zero extension then convert to float.
6373	MVT IVecVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: EltSize / `2`),
6374	EC: VT.getVectorElementCount());
6375	unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP \|\|
6376	Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6377	? ISD::ZERO_EXTEND
6378	: ISD::SIGN_EXTEND;
6379	SDValue Ext = DAG.getNode(Opcode: ExtOpcode, DL, VT: IVecVT, Operand: Src);
6380	if (IsStrict)
6381	return DAG.getNode(Opcode: Op.getOpcode(), DL, VTList: Op ->getVTList(),
6382	N1: Op.getOperand(i: `0`), N2: Ext);
6383	return DAG.getNode(Opcode: Op.getOpcode(), DL, VT, Operand: Ext);
6384	}
6385	// FP2Int
6386	assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6387	// Do one doubling fp_extend then complete the operation by converting
6388	// to int.
6389	MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6390	if (IsStrict) {
6391	auto [FExt, Chain] =
6392	DAG.getStrictFPExtendOrRound(Op: Src, Chain: Op.getOperand(i: `0`), DL, VT: InterimFVT);
6393	return DAG.getNode(Opcode: Op.getOpcode(), DL, VTList: Op ->getVTList(), N1: Chain, N2: FExt);
6394	}
6395	SDValue FExt = DAG.getFPExtendOrRound(Op: Src, DL, VT: InterimFVT);
6396	return DAG.getNode(Opcode: Op.getOpcode(), DL, VT, Operand: FExt);
6397	}
6398
6399	// Narrowing conversions
6400	if (SrcEltSize > (`2` * EltSize)) {
6401	if (IsInt2FP) {
6402	// One narrowing int_to_fp, then an fp_round.
6403	assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
6404	MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6405	if (IsStrict) {
6406	SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
6407	DAG.getVTList(InterimFVT, MVT::Other),
6408	Op.getOperand(`0`), Src);
6409	SDValue Chain = Int2FP.getValue(R: `1`);
6410	return DAG.getStrictFPExtendOrRound(Op: Int2FP, Chain, DL, VT).first;
6411	}
6412	SDValue Int2FP = DAG.getNode(Opcode: Op.getOpcode(), DL, VT: InterimFVT, Operand: Src);
6413	return DAG.getFPExtendOrRound(Op: Int2FP, DL, VT);
6414	}
6415	// FP2Int
6416	// One narrowing fp_to_int, then truncate the integer. If the float isn't
6417	// representable by the integer, the result is poison.
6418	MVT IVecVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SrcEltSize / `2`),
6419	EC: VT.getVectorElementCount());
6420	if (IsStrict) {
6421	SDValue FP2Int =
6422	DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
6423	Op.getOperand(`0`), Src);
6424	SDValue Res = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: FP2Int);
6425	return DAG.getMergeValues(Ops: {Res, FP2Int.getValue(R: `1`)}, dl: DL);
6426	}
6427	SDValue FP2Int = DAG.getNode(Opcode: Op.getOpcode(), DL, VT: IVecVT, Operand: Src);
6428	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: FP2Int);
6429	}
6430
6431	// Scalable vectors can exit here. Patterns will handle equally-sized
6432	// conversions halving/doubling ones.
6433	if (!VT.isFixedLengthVector())
6434	return Op;
6435
6436	// For fixed-length vectors we lower to a custom "VL" node.
6437	unsigned RVVOpc = `0`;
6438	switch (Op.getOpcode()) {
6439	default:
6440	llvm_unreachable("Impossible opcode");
6441	case ISD::FP_TO_SINT:
6442	RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;
6443	break;
6444	case ISD::FP_TO_UINT:
6445	RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;
6446	break;
6447	case ISD::SINT_TO_FP:
6448	RVVOpc = RISCVISD::SINT_TO_FP_VL;
6449	break;
6450	case ISD::UINT_TO_FP:
6451	RVVOpc = RISCVISD::UINT_TO_FP_VL;
6452	break;
6453	case ISD::STRICT_FP_TO_SINT:
6454	RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;
6455	break;
6456	case ISD::STRICT_FP_TO_UINT:
6457	RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;
6458	break;
6459	case ISD::STRICT_SINT_TO_FP:
6460	RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;
6461	break;
6462	case ISD::STRICT_UINT_TO_FP:
6463	RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;
6464	break;
6465	}
6466
6467	MVT ContainerVT = getContainerForFixedLengthVector(VT);
6468	MVT SrcContainerVT = getContainerForFixedLengthVector(VT: SrcVT);
6469	assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
6470	"Expected same element count");
6471
6472	auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
6473
6474	Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget);
6475	if (IsStrict) {
6476	Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6477	Op.getOperand(`0`), Src, Mask, VL);
6478	SDValue SubVec = convertFromScalableVector(VT, V: Src, DAG, Subtarget);
6479	return DAG.getMergeValues(Ops: {SubVec, Src.getValue(R: `1`)}, dl: DL);
6480	}
6481	Src = DAG.getNode(Opcode: RVVOpc, DL, VT: ContainerVT, N1: Src, N2: Mask, N3: VL);
6482	return convertFromScalableVector(VT, V: Src, DAG, Subtarget);
6483	}
6484	case ISD::FP_TO_SINT_SAT:
6485	case ISD::FP_TO_UINT_SAT:
6486	return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
6487	case ISD::FP_TO_BF16: {
6488	// Custom lower to ensure the libcall return is passed in an FPR on hard
6489	// float ABIs.
6490	assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
6491	SDLoc DL(Op);
6492	MakeLibCallOptions CallOptions;
6493	RTLIB::Libcall LC =
6494	RTLIB::getFPROUND(Op.getOperand(`0`).getValueType(), MVT::bf16);
6495	SDValue Res =
6496	makeLibCall(DAG, LC, MVT::f32, Op.getOperand(`0`), CallOptions, DL).first;
6497	if (Subtarget.is64Bit() && !RV64LegalI32)
6498	return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6499	return DAG.getBitcast(MVT::i32, Res);
6500	}
6501	case ISD::BF16_TO_FP: {
6502	assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
6503	MVT VT = Op.getSimpleValueType();
6504	SDLoc DL(Op);
6505	Op = DAG.getNode(
6506	Opcode: ISD::SHL, DL, VT: Op.getOperand(i: `0`).getValueType(), N1: Op.getOperand(i: `0`),
6507	N2: DAG.getShiftAmountConstant(Val: `16`, VT: Op.getOperand(i: `0`).getValueType(), DL));
6508	SDValue Res = Subtarget.is64Bit()
6509	? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
6510	: DAG.getBitcast(MVT::f32, Op);
6511	// fp_extend if the target VT is bigger than f32.
6512	if (VT != MVT::f32)
6513	return DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT, Operand: Res);
6514	return Res;
6515	}
6516	case ISD::FP_TO_FP16: {
6517	// Custom lower to ensure the libcall return is passed in an FPR on hard
6518	// float ABIs.
6519	assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6520	SDLoc DL(Op);
6521	MakeLibCallOptions CallOptions;
6522	RTLIB::Libcall LC =
6523	RTLIB::getFPROUND(Op.getOperand(`0`).getValueType(), MVT::f16);
6524	SDValue Res =
6525	makeLibCall(DAG, LC, MVT::f32, Op.getOperand(`0`), CallOptions, DL).first;
6526	if (Subtarget.is64Bit() && !RV64LegalI32)
6527	return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6528	return DAG.getBitcast(MVT::i32, Res);
6529	}
6530	case ISD::FP16_TO_FP: {
6531	// Custom lower to ensure the libcall argument is passed in an FPR on hard
6532	// float ABIs.
6533	assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6534	SDLoc DL(Op);
6535	MakeLibCallOptions CallOptions;
6536	SDValue Arg = Subtarget.is64Bit()
6537	? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
6538	Op.getOperand(`0`))
6539	: DAG.getBitcast(MVT::f32, Op.getOperand(`0`));
6540	SDValue Res =
6541	makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
6542	.first;
6543	return Res;
6544	}
6545	case ISD::FTRUNC:
6546	case ISD::FCEIL:
6547	case ISD::FFLOOR:
6548	case ISD::FNEARBYINT:
6549	case ISD::FRINT:
6550	case ISD::FROUND:
6551	case ISD::FROUNDEVEN:
6552	return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6553	case ISD::LRINT:
6554	case ISD::LLRINT:
6555	return lowerVectorXRINT(Op, DAG, Subtarget);
6556	case ISD::VECREDUCE_ADD:
6557	case ISD::VECREDUCE_UMAX:
6558	case ISD::VECREDUCE_SMAX:
6559	case ISD::VECREDUCE_UMIN:
6560	case ISD::VECREDUCE_SMIN:
6561	return lowerVECREDUCE(Op, DAG);
6562	case ISD::VECREDUCE_AND:
6563	case ISD::VECREDUCE_OR:
6564	case ISD::VECREDUCE_XOR:
6565	if (Op.getOperand(`0`).getValueType().getVectorElementType() == MVT::i1)
6566	return lowerVectorMaskVecReduction(Op, DAG, /IsVP/ false);
6567	return lowerVECREDUCE(Op, DAG);
6568	case ISD::VECREDUCE_FADD:
6569	case ISD::VECREDUCE_SEQ_FADD:
6570	case ISD::VECREDUCE_FMIN:
6571	case ISD::VECREDUCE_FMAX:
6572	case ISD::VECREDUCE_FMAXIMUM:
6573	case ISD::VECREDUCE_FMINIMUM:
6574	return lowerFPVECREDUCE(Op, DAG);
6575	case ISD::VP_REDUCE_ADD:
6576	case ISD::VP_REDUCE_UMAX:
6577	case ISD::VP_REDUCE_SMAX:
6578	case ISD::VP_REDUCE_UMIN:
6579	case ISD::VP_REDUCE_SMIN:
6580	case ISD::VP_REDUCE_FADD:
6581	case ISD::VP_REDUCE_SEQ_FADD:
6582	case ISD::VP_REDUCE_FMIN:
6583	case ISD::VP_REDUCE_FMAX:
6584	if (Op.getOperand(`1`).getValueType() == MVT::nxv32f16 &&
6585	(Subtarget.hasVInstructionsF16Minimal() &&
6586	!Subtarget.hasVInstructionsF16()))
6587	return SplitVectorReductionOp(Op, DAG);
6588	return lowerVPREDUCE(Op, DAG);
6589	case ISD::VP_REDUCE_AND:
6590	case ISD::VP_REDUCE_OR:
6591	case ISD::VP_REDUCE_XOR:
6592	if (Op.getOperand(`1`).getValueType().getVectorElementType() == MVT::i1)
6593	return lowerVectorMaskVecReduction(Op, DAG, /IsVP/ true);
6594	return lowerVPREDUCE(Op, DAG);
6595	case ISD::UNDEF: {
6596	MVT ContainerVT = getContainerForFixedLengthVector(VT: Op.getSimpleValueType());
6597	return convertFromScalableVector(VT: Op.getSimpleValueType(),
6598	V: DAG.getUNDEF(VT: ContainerVT), DAG, Subtarget);
6599	}
6600	case ISD::INSERT_SUBVECTOR:
6601	return lowerINSERT_SUBVECTOR(Op, DAG);
6602	case ISD::EXTRACT_SUBVECTOR:
6603	return lowerEXTRACT_SUBVECTOR(Op, DAG);
6604	case ISD::VECTOR_DEINTERLEAVE:
6605	return lowerVECTOR_DEINTERLEAVE(Op, DAG);
6606	case ISD::VECTOR_INTERLEAVE:
6607	return lowerVECTOR_INTERLEAVE(Op, DAG);
6608	case ISD::STEP_VECTOR:
6609	return lowerSTEP_VECTOR(Op, DAG);
6610	case ISD::VECTOR_REVERSE:
6611	return lowerVECTOR_REVERSE(Op, DAG);
6612	case ISD::VECTOR_SPLICE:
6613	return lowerVECTOR_SPLICE(Op, DAG);
6614	case ISD::BUILD_VECTOR:
6615	return lowerBUILD_VECTOR(Op, DAG, Subtarget);
6616	case ISD::SPLAT_VECTOR:
6617	if (Op.getValueType().getScalarType() == MVT::f16 &&
6618	(Subtarget.hasVInstructionsF16Minimal() &&
6619	!Subtarget.hasVInstructionsF16())) {
6620	if (Op.getValueType() == MVT::nxv32f16)
6621	return SplitVectorOp(Op, DAG);
6622	SDLoc DL(Op);
6623	SDValue NewScalar =
6624	DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(`0`));
6625	SDValue NewSplat = DAG.getNode(
6626	ISD::SPLAT_VECTOR, DL,
6627	MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()),
6628	NewScalar);
6629	return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: Op.getValueType(), N1: NewSplat,
6630	N2: DAG.getIntPtrConstant(Val: `0`, DL, /isTarget=/true));
6631	}
6632	if (Op.getValueType().getVectorElementType() == MVT::i1)
6633	return lowerVectorMaskSplat(Op, DAG);
6634	return SDValue ();
6635	case ISD::VECTOR_SHUFFLE:
6636	return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
6637	case ISD::CONCAT_VECTORS: {
6638	// Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
6639	// better than going through the stack, as the default expansion does.
6640	SDLoc DL(Op);
6641	MVT VT = Op.getSimpleValueType();
6642	MVT ContainerVT = VT;
6643	if (VT.isFixedLengthVector())
6644	ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
6645
6646	// Recursively split concat_vectors with more than 2 operands:
6647	//
6648	// concat_vector op1, op2, op3, op4
6649	// ->
6650	// concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
6651	//
6652	// This reduces the length of the chain of vslideups and allows us to
6653	// perform the vslideups at a smaller LMUL, limited to MF2.
6654	if (Op.getNumOperands() > `2` &&
6655	ContainerVT.bitsGE(VT: getLMUL1VT(VT: ContainerVT))) {
6656	MVT HalfVT = VT.getHalfNumVectorElementsVT();
6657	assert(isPowerOf2_32(Op.getNumOperands()));
6658	size_t HalfNumOps = Op.getNumOperands() / `2`;
6659	SDValue Lo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: HalfVT,
6660	Ops: Op ->ops().take_front(N: HalfNumOps));
6661	SDValue Hi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: HalfVT,
6662	Ops: Op ->ops().drop_front(N: HalfNumOps));
6663	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, N1: Lo, N2: Hi);
6664	}
6665
6666	unsigned NumOpElts =
6667	Op.getOperand(i: `0`).getSimpleValueType().getVectorMinNumElements();
6668	SDValue Vec = DAG.getUNDEF(VT);
6669	for (const auto &OpIdx : enumerate(First: Op ->ops())) {
6670	SDValue SubVec = OpIdx.value();
6671	// Don't insert undef subvectors.
6672	if (SubVec.isUndef())
6673	continue;
6674	Vec =
6675	DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: Vec, N2: SubVec,
6676	N3: DAG.getVectorIdxConstant(Val: OpIdx.index() * NumOpElts, DL));
6677	}
6678	return Vec;
6679	}
6680	case ISD::LOAD:
6681	if (auto V = expandUnalignedRVVLoad(Op, DAG))
6682	return V;
6683	if (Op.getValueType().isFixedLengthVector())
6684	return lowerFixedLengthVectorLoadToRVV(Op, DAG);
6685	return Op;
6686	case ISD::STORE:
6687	if (auto V = expandUnalignedRVVStore(Op, DAG))
6688	return V;
6689	if (Op.getOperand(i: `1`).getValueType().isFixedLengthVector())
6690	return lowerFixedLengthVectorStoreToRVV(Op, DAG);
6691	return Op;
6692	case ISD::MLOAD:
6693	case ISD::VP_LOAD:
6694	return lowerMaskedLoad(Op, DAG);
6695	case ISD::MSTORE:
6696	case ISD::VP_STORE:
6697	return lowerMaskedStore(Op, DAG);
6698	case ISD::SELECT_CC: {
6699	// This occurs because we custom legalize SETGT and SETUGT for setcc. That
6700	// causes LegalizeDAG to think we need to custom legalize select_cc. Expand
6701	// into separate SETCC+SELECT just like LegalizeDAG.
6702	SDValue Tmp1 = Op.getOperand(i: `0`);
6703	SDValue Tmp2 = Op.getOperand(i: `1`);
6704	SDValue True = Op.getOperand(i: `2`);
6705	SDValue False = Op.getOperand(i: `3`);
6706	EVT VT = Op.getValueType();
6707	SDValue CC = Op.getOperand(i: `4`);
6708	EVT CmpVT = Tmp1.getValueType();
6709	EVT CCVT =
6710	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: CmpVT);
6711	SDLoc DL(Op);
6712	SDValue Cond =
6713	DAG.getNode(Opcode: ISD::SETCC, DL, VT: CCVT, N1: Tmp1, N2: Tmp2, N3: CC, Flags: Op ->getFlags());
6714	return DAG.getSelect(DL, VT, Cond, LHS: True, RHS: False);
6715	}
6716	case ISD::SETCC: {
6717	MVT OpVT = Op.getOperand(i: `0`).getSimpleValueType();
6718	if (OpVT.isScalarInteger()) {
6719	MVT VT = Op.getSimpleValueType();
6720	SDValue LHS = Op.getOperand(i: `0`);
6721	SDValue RHS = Op.getOperand(i: `1`);
6722	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: Op.getOperand(i: `2`))->get();
6723	assert((CCVal == ISD::SETGT \|\| CCVal == ISD::SETUGT) &&
6724	"Unexpected CondCode");
6725
6726	SDLoc DL(Op);
6727
6728	// If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
6729	// convert this to the equivalent of (set(u)ge X, C+1) by using
6730	// (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
6731	// in a register.
6732	if (isa<ConstantSDNode>(Val: RHS)) {
6733	int64_t Imm = cast<ConstantSDNode>(Val&: RHS)->getSExtValue();
6734	if (Imm != `0` && isInt<`12`>(x: (uint64_t)Imm + `1`)) {
6735	// If this is an unsigned compare and the constant is -1, incrementing
6736	// the constant would change behavior. The result should be false.
6737	if (CCVal == ISD::SETUGT && Imm == -`1`)
6738	return DAG.getConstant(Val: `0`, DL, VT);
6739	// Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
6740	CCVal = ISD::getSetCCSwappedOperands(Operation: CCVal);
6741	SDValue SetCC = DAG.getSetCC(
6742	DL, VT, LHS, RHS: DAG.getConstant(Val: Imm + `1`, DL, VT: OpVT), Cond: CCVal);
6743	return DAG.getLogicalNOT(DL, Val: SetCC, VT);
6744	}
6745	}
6746
6747	// Not a constant we could handle, swap the operands and condition code to
6748	// SETLT/SETULT.
6749	CCVal = ISD::getSetCCSwappedOperands(Operation: CCVal);
6750	return DAG.getSetCC(DL, VT, LHS: RHS, RHS: LHS, Cond: CCVal);
6751	}
6752
6753	if (Op.getOperand(`0`).getSimpleValueType() == MVT::nxv32f16 &&
6754	(Subtarget.hasVInstructionsF16Minimal() &&
6755	!Subtarget.hasVInstructionsF16()))
6756	return SplitVectorOp(Op, DAG);
6757
6758	return lowerFixedLengthVectorSetccToRVV(Op, DAG);
6759	}
6760	case ISD::ADD:
6761	case ISD::SUB:
6762	case ISD::MUL:
6763	case ISD::MULHS:
6764	case ISD::MULHU:
6765	case ISD::AND:
6766	case ISD::OR:
6767	case ISD::XOR:
6768	case ISD::SDIV:
6769	case ISD::SREM:
6770	case ISD::UDIV:
6771	case ISD::UREM:
6772	case ISD::BSWAP:
6773	case ISD::CTPOP:
6774	return lowerToScalableOp(Op, DAG);
6775	case ISD::SHL:
6776	case ISD::SRA:
6777	case ISD::SRL:
6778	if (Op.getSimpleValueType().isFixedLengthVector())
6779	return lowerToScalableOp(Op, DAG);
6780	// This can be called for an i32 shift amount that needs to be promoted.
6781	assert(Op.getOperand(`1`).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
6782	"Unexpected custom legalisation");
6783	return SDValue ();
6784	case ISD::FADD:
6785	case ISD::FSUB:
6786	case ISD::FMUL:
6787	case ISD::FDIV:
6788	case ISD::FNEG:
6789	case ISD::FABS:
6790	case ISD::FSQRT:
6791	case ISD::FMA:
6792	case ISD::FMINNUM:
6793	case ISD::FMAXNUM:
6794	if (Op.getValueType() == MVT::nxv32f16 &&
6795	(Subtarget.hasVInstructionsF16Minimal() &&
6796	!Subtarget.hasVInstructionsF16()))
6797	return SplitVectorOp(Op, DAG);
6798	[[fallthrough]];
6799	case ISD::AVGFLOORU:
6800	case ISD::AVGCEILU:
6801	case ISD::SMIN:
6802	case ISD::SMAX:
6803	case ISD::UMIN:
6804	case ISD::UMAX:
6805	return lowerToScalableOp(Op, DAG);
6806	case ISD::UADDSAT:
6807	case ISD::USUBSAT:
6808	if (!Op.getValueType().isVector())
6809	return lowerUADDSAT_USUBSAT(Op, DAG);
6810	return lowerToScalableOp(Op, DAG);
6811	case ISD::SADDSAT:
6812	case ISD::SSUBSAT:
6813	if (!Op.getValueType().isVector())
6814	return lowerSADDSAT_SSUBSAT(Op, DAG);
6815	return lowerToScalableOp(Op, DAG);
6816	case ISD::ABDS:
6817	case ISD::ABDU: {
6818	SDLoc dl(Op);
6819	EVT VT = Op ->getValueType(ResNo: `0`);
6820	SDValue LHS = DAG.getFreeze(V: Op ->getOperand(Num: `0`));
6821	SDValue RHS = DAG.getFreeze(V: Op ->getOperand(Num: `1`));
6822	bool IsSigned = Op ->getOpcode() == ISD::ABDS;
6823
6824	// abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
6825	// abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
6826	unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
6827	unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
6828	SDValue Max = DAG.getNode(Opcode: MaxOpc, DL: dl, VT, N1: LHS, N2: RHS);
6829	SDValue Min = DAG.getNode(Opcode: MinOpc, DL: dl, VT, N1: LHS, N2: RHS);
6830	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: Min);
6831	}
6832	case ISD::ABS:
6833	case ISD::VP_ABS:
6834	return lowerABS(Op, DAG);
6835	case ISD::CTLZ:
6836	case ISD::CTLZ_ZERO_UNDEF:
6837	case ISD::CTTZ:
6838	case ISD::CTTZ_ZERO_UNDEF:
6839	if (Subtarget.hasStdExtZvbb())
6840	return lowerToScalableOp(Op, DAG);
6841	assert(Op.getOpcode() != ISD::CTTZ);
6842	return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6843	case ISD::VSELECT:
6844	return lowerFixedLengthVectorSelectToRVV(Op, DAG);
6845	case ISD::FCOPYSIGN:
6846	if (Op.getValueType() == MVT::nxv32f16 &&
6847	(Subtarget.hasVInstructionsF16Minimal() &&
6848	!Subtarget.hasVInstructionsF16()))
6849	return SplitVectorOp(Op, DAG);
6850	return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
6851	case ISD::STRICT_FADD:
6852	case ISD::STRICT_FSUB:
6853	case ISD::STRICT_FMUL:
6854	case ISD::STRICT_FDIV:
6855	case ISD::STRICT_FSQRT:
6856	case ISD::STRICT_FMA:
6857	if (Op.getValueType() == MVT::nxv32f16 &&
6858	(Subtarget.hasVInstructionsF16Minimal() &&
6859	!Subtarget.hasVInstructionsF16()))
6860	return SplitStrictFPVectorOp(Op, DAG);
6861	return lowerToScalableOp(Op, DAG);
6862	case ISD::STRICT_FSETCC:
6863	case ISD::STRICT_FSETCCS:
6864	return lowerVectorStrictFSetcc(Op, DAG);
6865	case ISD::STRICT_FCEIL:
6866	case ISD::STRICT_FRINT:
6867	case ISD::STRICT_FFLOOR:
6868	case ISD::STRICT_FTRUNC:
6869	case ISD::STRICT_FNEARBYINT:
6870	case ISD::STRICT_FROUND:
6871	case ISD::STRICT_FROUNDEVEN:
6872	return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6873	case ISD::MGATHER:
6874	case ISD::VP_GATHER:
6875	return lowerMaskedGather(Op, DAG);
6876	case ISD::MSCATTER:
6877	case ISD::VP_SCATTER:
6878	return lowerMaskedScatter(Op, DAG);
6879	case ISD::GET_ROUNDING:
6880	return lowerGET_ROUNDING(Op, DAG);
6881	case ISD::SET_ROUNDING:
6882	return lowerSET_ROUNDING(Op, DAG);
6883	case ISD::EH_DWARF_CFA:
6884	return lowerEH_DWARF_CFA(Op, DAG);
6885	case ISD::VP_SELECT:
6886	case ISD::VP_MERGE:
6887	case ISD::VP_ADD:
6888	case ISD::VP_SUB:
6889	case ISD::VP_MUL:
6890	case ISD::VP_SDIV:
6891	case ISD::VP_UDIV:
6892	case ISD::VP_SREM:
6893	case ISD::VP_UREM:
6894	case ISD::VP_UADDSAT:
6895	case ISD::VP_USUBSAT:
6896	case ISD::VP_SADDSAT:
6897	case ISD::VP_SSUBSAT:
6898	case ISD::VP_LRINT:
6899	case ISD::VP_LLRINT:
6900	return lowerVPOp(Op, DAG);
6901	case ISD::VP_AND:
6902	case ISD::VP_OR:
6903	case ISD::VP_XOR:
6904	return lowerLogicVPOp(Op, DAG);
6905	case ISD::VP_FADD:
6906	case ISD::VP_FSUB:
6907	case ISD::VP_FMUL:
6908	case ISD::VP_FDIV:
6909	case ISD::VP_FNEG:
6910	case ISD::VP_FABS:
6911	case ISD::VP_SQRT:
6912	case ISD::VP_FMA:
6913	case ISD::VP_FMINNUM:
6914	case ISD::VP_FMAXNUM:
6915	case ISD::VP_FCOPYSIGN:
6916	if (Op.getValueType() == MVT::nxv32f16 &&
6917	(Subtarget.hasVInstructionsF16Minimal() &&
6918	!Subtarget.hasVInstructionsF16()))
6919	return SplitVPOp(Op, DAG);
6920	[[fallthrough]];
6921	case ISD::VP_ASHR:
6922	case ISD::VP_LSHR:
6923	case ISD::VP_SHL:
6924	return lowerVPOp(Op, DAG);
6925	case ISD::VP_IS_FPCLASS:
6926	return LowerIS_FPCLASS(Op, DAG);
6927	case ISD::VP_SIGN_EXTEND:
6928	case ISD::VP_ZERO_EXTEND:
6929	if (Op.getOperand(`0`).getSimpleValueType().getVectorElementType() == MVT::i1)
6930	return lowerVPExtMaskOp(Op, DAG);
6931	return lowerVPOp(Op, DAG);
6932	case ISD::VP_TRUNCATE:
6933	return lowerVectorTruncLike(Op, DAG);
6934	case ISD::VP_FP_EXTEND:
6935	case ISD::VP_FP_ROUND:
6936	return lowerVectorFPExtendOrRoundLike(Op, DAG);
6937	case ISD::VP_SINT_TO_FP:
6938	case ISD::VP_UINT_TO_FP:
6939	if (Op.getValueType().isVector() &&
6940	Op.getValueType().getScalarType() == MVT::f16 &&
6941	(Subtarget.hasVInstructionsF16Minimal() &&
6942	!Subtarget.hasVInstructionsF16())) {
6943	if (Op.getValueType() == MVT::nxv32f16)
6944	return SplitVPOp(Op, DAG);
6945	// int -> f32
6946	SDLoc DL(Op);
6947	MVT NVT =
6948	MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6949	auto NC = DAG.getNode(Opcode: Op.getOpcode(), DL, VT: NVT, Ops: Op ->ops());
6950	// f32 -> f16
6951	return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: Op.getValueType(), N1: NC,
6952	N2: DAG.getIntPtrConstant(Val: `0`, DL, /isTarget=/true));
6953	}
6954	[[fallthrough]];
6955	case ISD::VP_FP_TO_SINT:
6956	case ISD::VP_FP_TO_UINT:
6957	if (SDValue Op1 = Op.getOperand(i: `0`);
6958	Op1.getValueType().isVector() &&
6959	Op1.getValueType().getScalarType() == MVT::f16 &&
6960	(Subtarget.hasVInstructionsF16Minimal() &&
6961	!Subtarget.hasVInstructionsF16())) {
6962	if (Op1.getValueType() == MVT::nxv32f16)
6963	return SplitVPOp(Op, DAG);
6964	// f16 -> f32
6965	SDLoc DL(Op);
6966	MVT NVT = MVT::getVectorVT(MVT::f32,
6967	Op1.getValueType().getVectorElementCount());
6968	SDValue WidenVec = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: NVT, Operand: Op1);
6969	// f32 -> int
6970	return DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(),
6971	Ops: {WidenVec, Op.getOperand(i: `1`), Op.getOperand(i: `2`)});
6972	}
6973	return lowerVPFPIntConvOp(Op, DAG);
6974	case ISD::VP_SETCC:
6975	if (Op.getOperand(`0`).getSimpleValueType() == MVT::nxv32f16 &&
6976	(Subtarget.hasVInstructionsF16Minimal() &&
6977	!Subtarget.hasVInstructionsF16()))
6978	return SplitVPOp(Op, DAG);
6979	if (Op.getOperand(`0`).getSimpleValueType().getVectorElementType() == MVT::i1)
6980	return lowerVPSetCCMaskOp(Op, DAG);
6981	[[fallthrough]];
6982	case ISD::VP_SMIN:
6983	case ISD::VP_SMAX:
6984	case ISD::VP_UMIN:
6985	case ISD::VP_UMAX:
6986	case ISD::VP_BITREVERSE:
6987	case ISD::VP_BSWAP:
6988	return lowerVPOp(Op, DAG);
6989	case ISD::VP_CTLZ:
6990	case ISD::VP_CTLZ_ZERO_UNDEF:
6991	if (Subtarget.hasStdExtZvbb())
6992	return lowerVPOp(Op, DAG);
6993	return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6994	case ISD::VP_CTTZ:
6995	case ISD::VP_CTTZ_ZERO_UNDEF:
6996	if (Subtarget.hasStdExtZvbb())
6997	return lowerVPOp(Op, DAG);
6998	return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6999	case ISD::VP_CTPOP:
7000	return lowerVPOp(Op, DAG);
7001	case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7002	return lowerVPStridedLoad(Op, DAG);
7003	case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7004	return lowerVPStridedStore(Op, DAG);
7005	case ISD::VP_FCEIL:
7006	case ISD::VP_FFLOOR:
7007	case ISD::VP_FRINT:
7008	case ISD::VP_FNEARBYINT:
7009	case ISD::VP_FROUND:
7010	case ISD::VP_FROUNDEVEN:
7011	case ISD::VP_FROUNDTOZERO:
7012	if (Op.getValueType() == MVT::nxv32f16 &&
7013	(Subtarget.hasVInstructionsF16Minimal() &&
7014	!Subtarget.hasVInstructionsF16()))
7015	return SplitVPOp(Op, DAG);
7016	return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7017	case ISD::VP_FMAXIMUM:
7018	case ISD::VP_FMINIMUM:
7019	if (Op.getValueType() == MVT::nxv32f16 &&
7020	(Subtarget.hasVInstructionsF16Minimal() &&
7021	!Subtarget.hasVInstructionsF16()))
7022	return SplitVPOp(Op, DAG);
7023	return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7024	case ISD::EXPERIMENTAL_VP_SPLICE:
7025	return lowerVPSpliceExperimental(Op, DAG);
7026	case ISD::EXPERIMENTAL_VP_REVERSE:
7027	return lowerVPReverseExperimental(Op, DAG);
7028	}
7029	}
7030
7031	static SDValue getTargetNode(GlobalAddressSDNode N, const* SDLoc &DL, EVT Ty,
7032	SelectionDAG &DAG, unsigned Flags) {
7033	return DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: `0`, TargetFlags: Flags);
7034	}
7035
7036	static SDValue getTargetNode(BlockAddressSDNode N, const* SDLoc &DL, EVT Ty,
7037	SelectionDAG &DAG, unsigned Flags) {
7038	return DAG.getTargetBlockAddress(BA: N->getBlockAddress(), VT: Ty, Offset: N->getOffset(),
7039	TargetFlags: Flags);
7040	}
7041
7042	static SDValue getTargetNode(ConstantPoolSDNode N, const* SDLoc &DL, EVT Ty,
7043	SelectionDAG &DAG, unsigned Flags) {
7044	return DAG.getTargetConstantPool(C: N->getConstVal(), VT: Ty, Align: N->getAlign(),
7045	Offset: N->getOffset(), TargetFlags: Flags);
7046	}
7047
7048	static SDValue getTargetNode(JumpTableSDNode N, const* SDLoc &DL, EVT Ty,
7049	SelectionDAG &DAG, unsigned Flags) {
7050	return DAG.getTargetJumpTable(JTI: N->getIndex(), VT: Ty, TargetFlags: Flags);
7051	}
7052
7053	template <class NodeTy>
7054	SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7055	bool IsLocal, bool IsExternWeak) const {
7056	SDLoc DL(N);
7057	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
7058
7059	// When HWASAN is used and tagging of global variables is enabled
7060	// they should be accessed via the GOT, since the tagged address of a global
7061	// is incompatible with existing code models. This also applies to non-pic
7062	// mode.
7063	if (isPositionIndependent() \|\| Subtarget.allowTaggedGlobals()) {
7064	SDValue Addr = getTargetNode(N, DL, Ty, DAG, `0`);
7065	if (IsLocal && !Subtarget.allowTaggedGlobals())
7066	// Use PC-relative addressing to access the symbol. This generates the
7067	// pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7068	// %pcrel_lo(auipc)).
7069	return DAG.getNode(Opcode: RISCVISD::LLA, DL, VT: Ty, Operand: Addr);
7070
7071	// Use PC-relative addressing to access the GOT for this symbol, then load
7072	// the address from the GOT. This generates the pattern (PseudoLGA sym),
7073	// which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7074	SDValue Load =
7075	SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), `0`);
7076	MachineFunction &MF = DAG.getMachineFunction();
7077	MachineMemOperand *MemOp = MF.getMachineMemOperand(
7078	PtrInfo: MachinePointerInfo::getGOT(MF),
7079	f: MachineMemOperand::MOLoad \| MachineMemOperand::MODereferenceable \|
7080	MachineMemOperand::MOInvariant,
7081	MemTy: LLT (Ty.getSimpleVT()), base_alignment: Align (Ty.getFixedSizeInBits() / `8`));
7082	DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp});
7083	return Load;
7084	}
7085
7086	switch (getTargetMachine().getCodeModel()) {
7087	default:
7088	report_fatal_error(reason: "Unsupported code model for lowering");
7089	case CodeModel::Small: {
7090	// Generate a sequence for accessing addresses within the first 2 GiB of
7091	// address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7092	SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7093	SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7094	SDValue MNHi = DAG.getNode(Opcode: RISCVISD::HI, DL, VT: Ty, Operand: AddrHi);
7095	return DAG.getNode(Opcode: RISCVISD::ADD_LO, DL, VT: Ty, N1: MNHi, N2: AddrLo);
7096	}
7097	case CodeModel::Medium: {
7098	SDValue Addr = getTargetNode(N, DL, Ty, DAG, `0`);
7099	if (IsExternWeak) {
7100	// An extern weak symbol may be undefined, i.e. have value 0, which may
7101	// not be within 2GiB of PC, so use GOT-indirect addressing to access the
7102	// symbol. This generates the pattern (PseudoLGA sym), which expands to
7103	// (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7104	SDValue Load =
7105	SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), `0`);
7106	MachineFunction &MF = DAG.getMachineFunction();
7107	MachineMemOperand *MemOp = MF.getMachineMemOperand(
7108	PtrInfo: MachinePointerInfo::getGOT(MF),
7109	f: MachineMemOperand::MOLoad \| MachineMemOperand::MODereferenceable \|
7110	MachineMemOperand::MOInvariant,
7111	MemTy: LLT (Ty.getSimpleVT()), base_alignment: Align (Ty.getFixedSizeInBits() / `8`));
7112	DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp});
7113	return Load;
7114	}
7115
7116	// Generate a sequence for accessing addresses within any 2GiB range within
7117	// the address space. This generates the pattern (PseudoLLA sym), which
7118	// expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
7119	return DAG.getNode(Opcode: RISCVISD::LLA, DL, VT: Ty, Operand: Addr);
7120	}
7121	}
7122	}
7123
7124	SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
7125	SelectionDAG &DAG) const {
7126	GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
7127	assert(N->getOffset() == `0` && "unexpected offset in global node");
7128	const GlobalValue *GV = N->getGlobal();
7129	return getAddr(N, DAG, IsLocal: GV->isDSOLocal(), IsExternWeak: GV->hasExternalWeakLinkage());
7130	}
7131
7132	SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
7133	SelectionDAG &DAG) const {
7134	BlockAddressSDNode *N = cast<BlockAddressSDNode>(Val&: Op);
7135
7136	return getAddr(N, DAG);
7137	}
7138
7139	SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
7140	SelectionDAG &DAG) const {
7141	ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Val&: Op);
7142
7143	return getAddr(N, DAG);
7144	}
7145
7146	SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
7147	SelectionDAG &DAG) const {
7148	JumpTableSDNode *N = cast<JumpTableSDNode>(Val&: Op);
7149
7150	return getAddr(N, DAG);
7151	}
7152
7153	SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
7154	SelectionDAG &DAG,
7155	bool UseGOT) const {
7156	SDLoc DL(N);
7157	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
7158	const GlobalValue *GV = N->getGlobal();
7159	MVT XLenVT = Subtarget.getXLenVT();
7160
7161	if (UseGOT) {
7162	// Use PC-relative addressing to access the GOT for this TLS symbol, then
7163	// load the address from the GOT and add the thread pointer. This generates
7164	// the pattern (PseudoLA_TLS_IE sym), which expands to
7165	// (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
7166	SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: `0`, TargetFlags: `0`);
7167	SDValue Load =
7168	SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), `0`);
7169	MachineFunction &MF = DAG.getMachineFunction();
7170	MachineMemOperand *MemOp = MF.getMachineMemOperand(
7171	PtrInfo: MachinePointerInfo::getGOT(MF),
7172	f: MachineMemOperand::MOLoad \| MachineMemOperand::MODereferenceable \|
7173	MachineMemOperand::MOInvariant,
7174	MemTy: LLT (Ty.getSimpleVT()), base_alignment: Align (Ty.getFixedSizeInBits() / `8`));
7175	DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp});
7176
7177	// Add the thread pointer.
7178	SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7179	return DAG.getNode(Opcode: ISD::ADD, DL, VT: Ty, N1: Load, N2: TPReg);
7180	}
7181
7182	// Generate a sequence for accessing the address relative to the thread
7183	// pointer, with the appropriate adjustment for the thread pointer offset.
7184	// This generates the pattern
7185	// (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
7186	SDValue AddrHi =
7187	DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: `0`, TargetFlags: RISCVII::MO_TPREL_HI);
7188	SDValue AddrAdd =
7189	DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: `0`, TargetFlags: RISCVII::MO_TPREL_ADD);
7190	SDValue AddrLo =
7191	DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: `0`, TargetFlags: RISCVII::MO_TPREL_LO);
7192
7193	SDValue MNHi = DAG.getNode(Opcode: RISCVISD::HI, DL, VT: Ty, Operand: AddrHi);
7194	SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7195	SDValue MNAdd =
7196	DAG.getNode(Opcode: RISCVISD::ADD_TPREL, DL, VT: Ty, N1: MNHi, N2: TPReg, N3: AddrAdd);
7197	return DAG.getNode(Opcode: RISCVISD::ADD_LO, DL, VT: Ty, N1: MNAdd, N2: AddrLo);
7198	}
7199
7200	SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
7201	SelectionDAG &DAG) const {
7202	SDLoc DL(N);
7203	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
7204	IntegerType CallTy = Type::getIntNTy(C&: DAG.getContext(), N: Ty.getSizeInBits());
7205	const GlobalValue *GV = N->getGlobal();
7206
7207	// Use a PC-relative addressing mode to access the global dynamic GOT address.
7208	// This generates the pattern (PseudoLA_TLS_GD sym), which expands to
7209	// (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
7210	SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: `0`, TargetFlags: `0`);
7211	SDValue Load =
7212	SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), `0`);
7213
7214	// Prepare argument list to generate call.
7215	ArgListTy Args;
7216	ArgListEntry Entry;
7217	Entry.Node = Load;
7218	Entry.Ty = CallTy;
7219	Args.push_back(x: Entry);
7220
7221	// Setup call to __tls_get_addr.
7222	TargetLowering::CallLoweringInfo CLI(DAG);
7223	CLI.setDebugLoc(DL)
7224	.setChain(DAG.getEntryNode())
7225	.setLibCallee(CC: CallingConv::C, ResultType: CallTy,
7226	Target: DAG.getExternalSymbol(Sym: "__tls_get_addr", VT: Ty),
7227	ArgsList: std::move(Args));
7228
7229	return LowerCallTo(CLI).first;
7230	}
7231
7232	SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
7233	SelectionDAG &DAG) const {
7234	SDLoc DL(N);
7235	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
7236	const GlobalValue *GV = N->getGlobal();
7237
7238	// Use a PC-relative addressing mode to access the global dynamic GOT address.
7239	// This generates the pattern (PseudoLA_TLSDESC sym), which expands to
7240	//
7241	// auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
7242	// lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
7243	// addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
7244	// jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
7245	SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: `0`, TargetFlags: `0`);
7246	return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), `0`);
7247	}
7248
7249	SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
7250	SelectionDAG &DAG) const {
7251	GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
7252	assert(N->getOffset() == `0` && "unexpected offset in global node");
7253
7254	if (DAG.getTarget().useEmulatedTLS())
7255	return LowerToTLSEmulatedModel(GA: N, DAG);
7256
7257	TLSModel::Model Model = getTargetMachine().getTLSModel(GV: N->getGlobal());
7258
7259	if (DAG.getMachineFunction().getFunction().getCallingConv() ==
7260	CallingConv::GHC)
7261	report_fatal_error(reason: "In GHC calling convention TLS is not supported");
7262
7263	SDValue Addr;
7264	switch (Model) {
7265	case TLSModel::LocalExec:
7266	Addr = getStaticTLSAddr(N, DAG, /UseGOT=/false);
7267	break;
7268	case TLSModel::InitialExec:
7269	Addr = getStaticTLSAddr(N, DAG, /UseGOT=/true);
7270	break;
7271	case TLSModel::LocalDynamic:
7272	case TLSModel::GeneralDynamic:
7273	Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
7274	: getDynamicTLSAddr(N, DAG);
7275	break;
7276	}
7277
7278	return Addr;
7279	}
7280
7281	// Return true if Val is equal to (setcc LHS, RHS, CC).
7282	// Return false if Val is the inverse of (setcc LHS, RHS, CC).
7283	// Otherwise, return std::nullopt.
7284	static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
7285	ISD::CondCode CC, SDValue Val) {
7286	assert(Val ->getOpcode() == ISD::SETCC);
7287	SDValue LHS2 = Val.getOperand(i: `0`);
7288	SDValue RHS2 = Val.getOperand(i: `1`);
7289	ISD::CondCode CC2 = cast<CondCodeSDNode>(Val: Val.getOperand(i: `2`))->get();
7290
7291	if (LHS == LHS2 && RHS == RHS2) {
7292	if (CC == CC2)
7293	return true;
7294	if (CC == ISD::getSetCCInverse(Operation: CC2, Type: LHS2.getValueType()))
7295	return false;
7296	} else if (LHS == RHS2 && RHS == LHS2) {
7297	CC2 = ISD::getSetCCSwappedOperands(Operation: CC2);
7298	if (CC == CC2)
7299	return true;
7300	if (CC == ISD::getSetCCInverse(Operation: CC2, Type: LHS2.getValueType()))
7301	return false;
7302	}
7303
7304	return std::nullopt;
7305	}
7306
7307	static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
7308	const RISCVSubtarget &Subtarget) {
7309	SDValue CondV = N->getOperand(Num: `0`);
7310	SDValue TrueV = N->getOperand(Num: `1`);
7311	SDValue FalseV = N->getOperand(Num: `2`);
7312	MVT VT = N->getSimpleValueType(ResNo: `0`);
7313	SDLoc DL(N);
7314
7315	if (!Subtarget.hasConditionalMoveFusion()) {
7316	// (select c, -1, y) -> -c \| y
7317	if (isAllOnesConstant(V: TrueV)) {
7318	SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
7319	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: DAG.getFreeze(V: FalseV));
7320	}
7321	// (select c, y, -1) -> (c-1) \| y
7322	if (isAllOnesConstant(V: FalseV)) {
7323	SDValue Neg = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV,
7324	N2: DAG.getAllOnesConstant(DL, VT));
7325	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: DAG.getFreeze(V: TrueV));
7326	}
7327
7328	// (select c, 0, y) -> (c-1) & y
7329	if (isNullConstant(V: TrueV)) {
7330	SDValue Neg = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV,
7331	N2: DAG.getAllOnesConstant(DL, VT));
7332	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: DAG.getFreeze(V: FalseV));
7333	}
7334	// (select c, y, 0) -> -c & y
7335	if (isNullConstant(V: FalseV)) {
7336	SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
7337	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: DAG.getFreeze(V: TrueV));
7338	}
7339	}
7340
7341	// select c, ~x, x --> xor -c, x
7342	if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV)) {
7343	const APInt &TrueVal = TrueV ->getAsAPIntVal();
7344	const APInt &FalseVal = FalseV ->getAsAPIntVal();
7345	if (~TrueVal == FalseVal) {
7346	SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
7347	return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Neg, N2: FalseV);
7348	}
7349	}
7350
7351	// Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
7352	// when both truev and falsev are also setcc.
7353	if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
7354	FalseV.getOpcode() == ISD::SETCC) {
7355	SDValue LHS = CondV.getOperand(i: `0`);
7356	SDValue RHS = CondV.getOperand(i: `1`);
7357	ISD::CondCode CC = cast<CondCodeSDNode>(Val: CondV.getOperand(i: `2`))->get();
7358
7359	// (select x, x, y) -> x \| y
7360	// (select !x, x, y) -> x & y
7361	if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, Val: TrueV)) {
7362	return DAG.getNode(Opcode: *MatchResult ? ISD::OR : ISD::AND, DL, VT, N1: TrueV,
7363	N2: DAG.getFreeze(V: FalseV));
7364	}
7365	// (select x, y, x) -> x & y
7366	// (select !x, y, x) -> x \| y
7367	if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, Val: FalseV)) {
7368	return DAG.getNode(Opcode: *MatchResult ? ISD::AND : ISD::OR, DL, VT,
7369	N1: DAG.getFreeze(V: TrueV), N2: FalseV);
7370	}
7371	}
7372
7373	return SDValue ();
7374	}
7375
7376	// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
7377	// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
7378	// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
7379	// being `0` or `-1`. In such cases we can replace `select` with `and`.
7380	// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
7381	// than `c0`?
7382	static SDValue
7383	foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG,
7384	const RISCVSubtarget &Subtarget) {
7385	if (Subtarget.hasShortForwardBranchOpt())
7386	return SDValue ();
7387
7388	unsigned SelOpNo = `0`;
7389	SDValue Sel = BO->getOperand(Num: `0`);
7390	if (Sel.getOpcode() != ISD::SELECT \|\| !Sel.hasOneUse()) {
7391	SelOpNo = `1`;
7392	Sel = BO->getOperand(Num: `1`);
7393	}
7394
7395	if (Sel.getOpcode() != ISD::SELECT \|\| !Sel.hasOneUse())
7396	return SDValue ();
7397
7398	unsigned ConstSelOpNo = `1`;
7399	unsigned OtherSelOpNo = `2`;
7400	if (!dyn_cast<ConstantSDNode>(Val: Sel ->getOperand(Num: ConstSelOpNo))) {
7401	ConstSelOpNo = `2`;
7402	OtherSelOpNo = `1`;
7403	}
7404	SDValue ConstSelOp = Sel ->getOperand(Num: ConstSelOpNo);
7405	ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(Val&: ConstSelOp);
7406	if (!ConstSelOpNode \|\| ConstSelOpNode->isOpaque())
7407	return SDValue ();
7408
7409	SDValue ConstBinOp = BO->getOperand(Num: SelOpNo ^ `1`);
7410	ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(Val&: ConstBinOp);
7411	if (!ConstBinOpNode \|\| ConstBinOpNode->isOpaque())
7412	return SDValue ();
7413
7414	SDLoc DL(Sel);
7415	EVT VT = BO->getValueType(ResNo: `0`);
7416
7417	SDValue NewConstOps[`2`] = {ConstSelOp, ConstBinOp};
7418	if (SelOpNo == `1`)
7419	std::swap(a&: NewConstOps[`0`], b&: NewConstOps[`1`]);
7420
7421	SDValue NewConstOp =
7422	DAG.FoldConstantArithmetic(Opcode: BO->getOpcode(), DL, VT, Ops: NewConstOps);
7423	if (!NewConstOp)
7424	return SDValue ();
7425
7426	const APInt &NewConstAPInt = NewConstOp ->getAsAPIntVal();
7427	if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
7428	return SDValue ();
7429
7430	SDValue OtherSelOp = Sel ->getOperand(Num: OtherSelOpNo);
7431	SDValue NewNonConstOps[`2`] = {OtherSelOp, ConstBinOp};
7432	if (SelOpNo == `1`)
7433	std::swap(a&: NewNonConstOps[`0`], b&: NewNonConstOps[`1`]);
7434	SDValue NewNonConstOp = DAG.getNode(Opcode: BO->getOpcode(), DL, VT, Ops: NewNonConstOps);
7435
7436	SDValue NewT = (ConstSelOpNo == `1`) ? NewConstOp : NewNonConstOp;
7437	SDValue NewF = (ConstSelOpNo == `1`) ? NewNonConstOp : NewConstOp;
7438	return DAG.getSelect(DL, VT, Cond: Sel.getOperand(i: `0`), LHS: NewT, RHS: NewF);
7439	}
7440
7441	SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
7442	SDValue CondV = Op.getOperand(i: `0`);
7443	SDValue TrueV = Op.getOperand(i: `1`);
7444	SDValue FalseV = Op.getOperand(i: `2`);
7445	SDLoc DL(Op);
7446	MVT VT = Op.getSimpleValueType();
7447	MVT XLenVT = Subtarget.getXLenVT();
7448
7449	// Lower vector SELECTs to VSELECTs by splatting the condition.
7450	if (VT.isVector()) {
7451	MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
7452	SDValue CondSplat = DAG.getSplat(VT: SplatCondVT, DL, Op: CondV);
7453	return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: CondSplat, N2: TrueV, N3: FalseV);
7454	}
7455
7456	// When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
7457	// nodes to implement the SELECT. Performing the lowering here allows for
7458	// greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
7459	// sequence or RISCVISD::SELECT_CC node (branch-based select).
7460	if ((Subtarget.hasStdExtZicond() \|\| Subtarget.hasVendorXVentanaCondOps()) &&
7461	VT.isScalarInteger()) {
7462	// (select c, t, 0) -> (czero_eqz t, c)
7463	if (isNullConstant(V: FalseV))
7464	return DAG.getNode(Opcode: RISCVISD::CZERO_EQZ, DL, VT, N1: TrueV, N2: CondV);
7465	// (select c, 0, f) -> (czero_nez f, c)
7466	if (isNullConstant(V: TrueV))
7467	return DAG.getNode(Opcode: RISCVISD::CZERO_NEZ, DL, VT, N1: FalseV, N2: CondV);
7468
7469	// (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
7470	if (TrueV.getOpcode() == ISD::AND &&
7471	(TrueV.getOperand(i: `0`) == FalseV \|\| TrueV.getOperand(i: `1`) == FalseV))
7472	return DAG.getNode(
7473	Opcode: ISD::OR, DL, VT, N1: TrueV,
7474	N2: DAG.getNode(Opcode: RISCVISD::CZERO_NEZ, DL, VT, N1: FalseV, N2: CondV));
7475	// (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
7476	if (FalseV.getOpcode() == ISD::AND &&
7477	(FalseV.getOperand(i: `0`) == TrueV \|\| FalseV.getOperand(i: `1`) == TrueV))
7478	return DAG.getNode(
7479	Opcode: ISD::OR, DL, VT, N1: FalseV,
7480	N2: DAG.getNode(Opcode: RISCVISD::CZERO_EQZ, DL, VT, N1: TrueV, N2: CondV));
7481
7482	// Try some other optimizations before falling back to generic lowering.
7483	if (SDValue V = combineSelectToBinOp(N: Op.getNode(), DAG, Subtarget))
7484	return V;
7485
7486	// (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
7487	// (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
7488	if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV)) {
7489	const APInt &TrueVal = TrueV ->getAsAPIntVal();
7490	const APInt &FalseVal = FalseV ->getAsAPIntVal();
7491	const int TrueValCost = RISCVMatInt::getIntMatCost(
7492	TrueVal, Subtarget.getXLen(), Subtarget, /CompressionCost=/true);
7493	const int FalseValCost = RISCVMatInt::getIntMatCost(
7494	FalseVal, Subtarget.getXLen(), Subtarget, /CompressionCost=/true);
7495	bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
7496	SDValue LHSVal = DAG.getConstant(
7497	Val: IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
7498	SDValue RHSVal =
7499	DAG.getConstant(Val: IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
7500	SDValue CMOV =
7501	DAG.getNode(Opcode: IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
7502	DL, VT, N1: LHSVal, N2: CondV);
7503	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CMOV, N2: RHSVal);
7504	}
7505
7506	// (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
7507	// Unless we have the short forward branch optimization.
7508	if (!Subtarget.hasConditionalMoveFusion())
7509	return DAG.getNode(
7510	Opcode: ISD::OR, DL, VT,
7511	N1: DAG.getNode(Opcode: RISCVISD::CZERO_EQZ, DL, VT, N1: TrueV, N2: CondV),
7512	N2: DAG.getNode(Opcode: RISCVISD::CZERO_NEZ, DL, VT, N1: FalseV, N2: CondV));
7513	}
7514
7515	if (SDValue V = combineSelectToBinOp(N: Op.getNode(), DAG, Subtarget))
7516	return V;
7517
7518	if (Op.hasOneUse()) {
7519	unsigned UseOpc = Op ->use_begin()->getOpcode();
7520	if (isBinOp(Opcode: UseOpc) && DAG.isSafeToSpeculativelyExecute(Opcode: UseOpc)) {
7521	SDNode BinOp = Op ->use_begin();
7522	if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(BO: *Op ->use_begin(),
7523	DAG, Subtarget)) {
7524	DAG.ReplaceAllUsesWith(From: BinOp, To: &NewSel);
7525	return lowerSELECT(Op: NewSel, DAG);
7526	}
7527	}
7528	}
7529
7530	// (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
7531	// (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
7532	const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(Val&: TrueV);
7533	const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(Val&: FalseV);
7534	if (FPTV && FPFV) {
7535	if (FPTV->isExactlyValue(V: `1.0`) && FPFV->isExactlyValue(V: `0.0`))
7536	return DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT, Operand: CondV);
7537	if (FPTV->isExactlyValue(V: `0.0`) && FPFV->isExactlyValue(V: `1.0`)) {
7538	SDValue XOR = DAG.getNode(Opcode: ISD::XOR, DL, VT: XLenVT, N1: CondV,
7539	N2: DAG.getConstant(Val: `1`, DL, VT: XLenVT));
7540	return DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT, Operand: XOR);
7541	}
7542	}
7543
7544	// If the condition is not an integer SETCC which operates on XLenVT, we need
7545	// to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
7546	// (select condv, truev, falsev)
7547	// -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
7548	if (CondV.getOpcode() != ISD::SETCC \|\|
7549	CondV.getOperand(i: `0`).getSimpleValueType() != XLenVT) {
7550	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: XLenVT);
7551	SDValue SetNE = DAG.getCondCode(Cond: ISD::SETNE);
7552
7553	SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
7554
7555	return DAG.getNode(Opcode: RISCVISD::SELECT_CC, DL, VT, Ops);
7556	}
7557
7558	// If the CondV is the output of a SETCC node which operates on XLenVT inputs,
7559	// then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
7560	// advantage of the integer compare+branch instructions. i.e.:
7561	// (select (setcc lhs, rhs, cc), truev, falsev)
7562	// -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
7563	SDValue LHS = CondV.getOperand(i: `0`);
7564	SDValue RHS = CondV.getOperand(i: `1`);
7565	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: CondV.getOperand(i: `2`))->get();
7566
7567	// Special case for a select of 2 constants that have a diffence of 1.
7568	// Normally this is done by DAGCombine, but if the select is introduced by
7569	// type legalization or op legalization, we miss it. Restricting to SETLT
7570	// case for now because that is what signed saturating add/sub need.
7571	// FIXME: We don't need the condition to be SETLT or even a SETCC,
7572	// but we would probably want to swap the true/false values if the condition
7573	// is SETGE/SETLE to avoid an XORI.
7574	if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV) &&
7575	CCVal == ISD::SETLT) {
7576	const APInt &TrueVal = TrueV ->getAsAPIntVal();
7577	const APInt &FalseVal = FalseV ->getAsAPIntVal();
7578	if (TrueVal - `1` == FalseVal)
7579	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: FalseV);
7580	if (TrueVal + `1` == FalseVal)
7581	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: FalseV, N2: CondV);
7582	}
7583
7584	translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG);
7585	// 1 < x ? x : 1 -> 0 < x ? x : 1
7586	if (isOneConstant(V: LHS) && (CCVal == ISD::SETLT \|\| CCVal == ISD::SETULT) &&
7587	RHS == TrueV && LHS == FalseV) {
7588	LHS = DAG.getConstant(Val: `0`, DL, VT);
7589	// 0 <u x is the same as x != 0.
7590	if (CCVal == ISD::SETULT) {
7591	std::swap(a&: LHS, b&: RHS);
7592	CCVal = ISD::SETNE;
7593	}
7594	}
7595
7596	// x <s -1 ? x : -1 -> x <s 0 ? x : -1
7597	if (isAllOnesConstant(V: RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
7598	RHS == FalseV) {
7599	RHS = DAG.getConstant(Val: `0`, DL, VT);
7600	}
7601
7602	SDValue TargetCC = DAG.getCondCode(Cond: CCVal);
7603
7604	if (isa<ConstantSDNode>(Val: TrueV) && !isa<ConstantSDNode>(Val: FalseV)) {
7605	// (select (setcc lhs, rhs, CC), constant, falsev)
7606	// -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
7607	std::swap(a&: TrueV, b&: FalseV);
7608	TargetCC = DAG.getCondCode(Cond: ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType()));
7609	}
7610
7611	SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
7612	return DAG.getNode(Opcode: RISCVISD::SELECT_CC, DL, VT, Ops);
7613	}
7614
7615	SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
7616	SDValue CondV = Op.getOperand(i: `1`);
7617	SDLoc DL(Op);
7618	MVT XLenVT = Subtarget.getXLenVT();
7619
7620	if (CondV.getOpcode() == ISD::SETCC &&
7621	CondV.getOperand(i: `0`).getValueType() == XLenVT) {
7622	SDValue LHS = CondV.getOperand(i: `0`);
7623	SDValue RHS = CondV.getOperand(i: `1`);
7624	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: CondV.getOperand(i: `2`))->get();
7625
7626	translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG);
7627
7628	SDValue TargetCC = DAG.getCondCode(Cond: CCVal);
7629	return DAG.getNode(Opcode: RISCVISD::BR_CC, DL, VT: Op.getValueType(), N1: Op.getOperand(i: `0`),
7630	N2: LHS, N3: RHS, N4: TargetCC, N5: Op.getOperand(i: `2`));
7631	}
7632
7633	return DAG.getNode(Opcode: RISCVISD::BR_CC, DL, VT: Op.getValueType(), N1: Op.getOperand(i: `0`),
7634	N2: CondV, N3: DAG.getConstant(Val: `0`, DL, VT: XLenVT),
7635	N4: DAG.getCondCode(Cond: ISD::SETNE), N5: Op.getOperand(i: `2`));
7636	}
7637
7638	SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
7639	MachineFunction &MF = DAG.getMachineFunction();
7640	RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
7641
7642	SDLoc DL(Op);
7643	SDValue FI = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(),
7644	VT: getPointerTy(DL: MF.getDataLayout()));
7645
7646	// vastart just stores the address of the VarArgsFrameIndex slot into the
7647	// memory location argument.
7648	const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: `2`))->getValue();
7649	return DAG.getStore(Chain: Op.getOperand(i: `0`), dl: DL, Val: FI, Ptr: Op.getOperand(i: `1`),
7650	PtrInfo: MachinePointerInfo (SV));
7651	}
7652
7653	SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
7654	SelectionDAG &DAG) const {
7655	const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7656	MachineFunction &MF = DAG.getMachineFunction();
7657	MachineFrameInfo &MFI = MF.getFrameInfo();
7658	MFI.setFrameAddressIsTaken(true);
7659	Register FrameReg = RI.getFrameRegister(MF);
7660	int XLenInBytes = Subtarget.getXLen() / `8`;
7661
7662	EVT VT = Op.getValueType();
7663	SDLoc DL(Op);
7664	SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: FrameReg, VT);
7665	unsigned Depth = Op.getConstantOperandVal(i: `0`);
7666	while (Depth--) {
7667	int Offset = -(XLenInBytes * `2`);
7668	SDValue Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr,
7669	N2: DAG.getIntPtrConstant(Val: Offset, DL));
7670	FrameAddr =
7671	DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(), Ptr, PtrInfo: MachinePointerInfo ());
7672	}
7673	return FrameAddr;
7674	}
7675
7676	SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
7677	SelectionDAG &DAG) const {
7678	const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7679	MachineFunction &MF = DAG.getMachineFunction();
7680	MachineFrameInfo &MFI = MF.getFrameInfo();
7681	MFI.setReturnAddressIsTaken(true);
7682	MVT XLenVT = Subtarget.getXLenVT();
7683	int XLenInBytes = Subtarget.getXLen() / `8`;
7684
7685	if (verifyReturnAddressArgumentIsConstant(Op, DAG))
7686	return SDValue ();
7687
7688	EVT VT = Op.getValueType();
7689	SDLoc DL(Op);
7690	unsigned Depth = Op.getConstantOperandVal(i: `0`);
7691	if (Depth) {
7692	int Off = -XLenInBytes;
7693	SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
7694	SDValue Offset = DAG.getConstant(Val: Off, DL, VT);
7695	return DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(),
7696	Ptr: DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr, N2: Offset),
7697	PtrInfo: MachinePointerInfo ());
7698	}
7699
7700	// Return the value of the return address register, marking it an implicit
7701	// live-in.
7702	Register Reg = MF.addLiveIn(PReg: RI.getRARegister(), RC: getRegClassFor(VT: XLenVT));
7703	return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg, VT: XLenVT);
7704	}
7705
7706	SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
7707	SelectionDAG &DAG) const {
7708	SDLoc DL(Op);
7709	SDValue Lo = Op.getOperand(i: `0`);
7710	SDValue Hi = Op.getOperand(i: `1`);
7711	SDValue Shamt = Op.getOperand(i: `2`);
7712	EVT VT = Lo.getValueType();
7713
7714	// if Shamt-XLEN < 0: // Shamt < XLEN
7715	// Lo = Lo << Shamt
7716	// Hi = (Hi << Shamt) \| ((Lo >>u 1) >>u (XLEN-1 - Shamt))
7717	// else:
7718	// Lo = 0
7719	// Hi = Lo << (Shamt-XLEN)
7720
7721	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
7722	SDValue One = DAG.getConstant(Val: `1`, DL, VT);
7723	SDValue MinusXLen = DAG.getConstant(Val: -(int)Subtarget.getXLen(), DL, VT);
7724	SDValue XLenMinus1 = DAG.getConstant(Val: Subtarget.getXLen() - `1`, DL, VT);
7725	SDValue ShamtMinusXLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusXLen);
7726	SDValue XLenMinus1Shamt = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: XLenMinus1, N2: Shamt);
7727
7728	SDValue LoTrue = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: Shamt);
7729	SDValue ShiftRight1Lo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: One);
7730	SDValue ShiftRightLo =
7731	DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShiftRight1Lo, N2: XLenMinus1Shamt);
7732	SDValue ShiftLeftHi = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: Shamt);
7733	SDValue HiTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftLeftHi, N2: ShiftRightLo);
7734	SDValue HiFalse = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: ShamtMinusXLen);
7735
7736	SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusXLen, RHS: Zero, Cond: ISD::SETLT);
7737
7738	Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: Zero);
7739	Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse);
7740
7741	SDValue Parts[`2`] = {Lo, Hi};
7742	return DAG.getMergeValues(Ops: Parts, dl: DL);
7743	}
7744
7745	SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
7746	bool IsSRA) const {
7747	SDLoc DL(Op);
7748	SDValue Lo = Op.getOperand(i: `0`);
7749	SDValue Hi = Op.getOperand(i: `1`);
7750	SDValue Shamt = Op.getOperand(i: `2`);
7751	EVT VT = Lo.getValueType();
7752
7753	// SRA expansion:
7754	// if Shamt-XLEN < 0: // Shamt < XLEN
7755	// Lo = (Lo >>u Shamt) \| ((Hi << 1) << (XLEN-1 - ShAmt))
7756	// Hi = Hi >>s Shamt
7757	// else:
7758	// Lo = Hi >>s (Shamt-XLEN);
7759	// Hi = Hi >>s (XLEN-1)
7760	//
7761	// SRL expansion:
7762	// if Shamt-XLEN < 0: // Shamt < XLEN
7763	// Lo = (Lo >>u Shamt) \| ((Hi << 1) << (XLEN-1 - ShAmt))
7764	// Hi = Hi >>u Shamt
7765	// else:
7766	// Lo = Hi >>u (Shamt-XLEN);
7767	// Hi = 0;
7768
7769	unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
7770
7771	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
7772	SDValue One = DAG.getConstant(Val: `1`, DL, VT);
7773	SDValue MinusXLen = DAG.getConstant(Val: -(int)Subtarget.getXLen(), DL, VT);
7774	SDValue XLenMinus1 = DAG.getConstant(Val: Subtarget.getXLen() - `1`, DL, VT);
7775	SDValue ShamtMinusXLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusXLen);
7776	SDValue XLenMinus1Shamt = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: XLenMinus1, N2: Shamt);
7777
7778	SDValue ShiftRightLo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: Shamt);
7779	SDValue ShiftLeftHi1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: One);
7780	SDValue ShiftLeftHi =
7781	DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShiftLeftHi1, N2: XLenMinus1Shamt);
7782	SDValue LoTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftRightLo, N2: ShiftLeftHi);
7783	SDValue HiTrue = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: Shamt);
7784	SDValue LoFalse = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: ShamtMinusXLen);
7785	SDValue HiFalse =
7786	IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Hi, N2: XLenMinus1) : Zero;
7787
7788	SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusXLen, RHS: Zero, Cond: ISD::SETLT);
7789
7790	Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: LoFalse);
7791	Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse);
7792
7793	SDValue Parts[`2`] = {Lo, Hi};
7794	return DAG.getMergeValues(Ops: Parts, dl: DL);
7795	}
7796
7797	// Lower splats of i1 types to SETCC. For each mask vector type, we have a
7798	// legal equivalently-sized i8 type, so we can use that as a go-between.
7799	SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
7800	SelectionDAG &DAG) const {
7801	SDLoc DL(Op);
7802	MVT VT = Op.getSimpleValueType();
7803	SDValue SplatVal = Op.getOperand(i: `0`);
7804	// All-zeros or all-ones splats are handled specially.
7805	if (ISD::isConstantSplatVectorAllOnes(N: Op.getNode())) {
7806	SDValue VL = getDefaultScalableVLOps(VecVT: VT, DL, DAG, Subtarget).second;
7807	return DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT, Operand: VL);
7808	}
7809	if (ISD::isConstantSplatVectorAllZeros(N: Op.getNode())) {
7810	SDValue VL = getDefaultScalableVLOps(VecVT: VT, DL, DAG, Subtarget).second;
7811	return DAG.getNode(Opcode: RISCVISD::VMCLR_VL, DL, VT, Operand: VL);
7812	}
7813	MVT InterVT = VT.changeVectorElementType(MVT::i8);
7814	SplatVal = DAG.getNode(Opcode: ISD::AND, DL, VT: SplatVal.getValueType(), N1: SplatVal,
7815	N2: DAG.getConstant(Val: `1`, DL, VT: SplatVal.getValueType()));
7816	SDValue LHS = DAG.getSplatVector(VT: InterVT, DL, Op: SplatVal);
7817	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: InterVT);
7818	return DAG.getSetCC(DL, VT, LHS, RHS: Zero, Cond: ISD::SETNE);
7819	}
7820
7821	// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
7822	// illegal (currently only vXi64 RV32).
7823	// FIXME: We could also catch non-constant sign-extended i32 values and lower
7824	// them to VMV_V_X_VL.
7825	SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
7826	SelectionDAG &DAG) const {
7827	SDLoc DL(Op);
7828	MVT VecVT = Op.getSimpleValueType();
7829	assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
7830	"Unexpected SPLAT_VECTOR_PARTS lowering");
7831
7832	assert(Op.getNumOperands() == `2` && "Unexpected number of operands!");
7833	SDValue Lo = Op.getOperand(i: `0`);
7834	SDValue Hi = Op.getOperand(i: `1`);
7835
7836	MVT ContainerVT = VecVT;
7837	if (VecVT.isFixedLengthVector())
7838	ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
7839
7840	auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7841
7842	SDValue Res =
7843	splatPartsI64WithVL(DL, VT: ContainerVT, Passthru: SDValue (), Lo, Hi, VL, DAG);
7844
7845	if (VecVT.isFixedLengthVector())
7846	Res = convertFromScalableVector(VT: VecVT, V: Res, DAG, Subtarget);
7847
7848	return Res;
7849	}
7850
7851	// Custom-lower extensions from mask vectors by using a vselect either with 1
7852	// for zero/any-extension or -1 for sign-extension:
7853	// (vXiN = (s\|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
7854	// Note that any-extension is lowered identically to zero-extension.
7855	SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
7856	int64_t ExtTrueVal) const {
7857	SDLoc DL(Op);
7858	MVT VecVT = Op.getSimpleValueType();
7859	SDValue Src = Op.getOperand(i: `0`);
7860	// Only custom-lower extensions from mask types
7861	assert(Src.getValueType().isVector() &&
7862	Src.getValueType().getVectorElementType() == MVT::i1);
7863
7864	if (VecVT.isScalableVector()) {
7865	SDValue SplatZero = DAG.getConstant(Val: `0`, DL, VT: VecVT);
7866	SDValue SplatTrueVal = DAG.getConstant(Val: ExtTrueVal, DL, VT: VecVT);
7867	return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: VecVT, N1: Src, N2: SplatTrueVal, N3: SplatZero);
7868	}
7869
7870	MVT ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
7871	MVT I1ContainerVT =
7872	MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
7873
7874	SDValue CC = convertToScalableVector(VT: I1ContainerVT, V: Src, DAG, Subtarget);
7875
7876	SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7877
7878	MVT XLenVT = Subtarget.getXLenVT();
7879	SDValue SplatZero = DAG.getConstant(Val: `0`, DL, VT: XLenVT);
7880	SDValue SplatTrueVal = DAG.getConstant(Val: ExtTrueVal, DL, VT: XLenVT);
7881
7882	SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
7883	N1: DAG.getUNDEF(VT: ContainerVT), N2: SplatZero, N3: VL);
7884	SplatTrueVal = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
7885	N1: DAG.getUNDEF(VT: ContainerVT), N2: SplatTrueVal, N3: VL);
7886	SDValue Select =
7887	DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: CC, N2: SplatTrueVal,
7888	N3: SplatZero, N4: DAG.getUNDEF(VT: ContainerVT), N5: VL);
7889
7890	return convertFromScalableVector(VT: VecVT, V: Select, DAG, Subtarget);
7891	}
7892
7893	SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
7894	SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
7895	MVT ExtVT = Op.getSimpleValueType();
7896	// Only custom-lower extensions from fixed-length vector types.
7897	if (!ExtVT.isFixedLengthVector())
7898	return Op;
7899	MVT VT = Op.getOperand(i: `0`).getSimpleValueType();
7900	// Grab the canonical container type for the extended type. Infer the smaller
7901	// type from that to ensure the same number of vector elements, as we know
7902	// the LMUL will be sufficient to hold the smaller type.
7903	MVT ContainerExtVT = getContainerForFixedLengthVector(VT: ExtVT);
7904	// Get the extended container type manually to ensure the same number of
7905	// vector elements between source and dest.
7906	MVT ContainerVT = MVT::getVectorVT(VT: VT.getVectorElementType(),
7907	EC: ContainerExtVT.getVectorElementCount());
7908
7909	SDValue Op1 =
7910	convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i: `0`), DAG, Subtarget);
7911
7912	SDLoc DL(Op);
7913	auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
7914
7915	SDValue Ext = DAG.getNode(Opcode: ExtendOpc, DL, VT: ContainerExtVT, N1: Op1, N2: Mask, N3: VL);
7916
7917	return convertFromScalableVector(VT: ExtVT, V: Ext, DAG, Subtarget);
7918	}
7919
7920	// Custom-lower truncations from vectors to mask vectors by using a mask and a
7921	// setcc operation:
7922	// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
7923	SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
7924	SelectionDAG &DAG) const {
7925	bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
7926	SDLoc DL(Op);
7927	EVT MaskVT = Op.getValueType();
7928	// Only expect to custom-lower truncations to mask types
7929	assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
7930	"Unexpected type for vector mask lowering");
7931	SDValue Src = Op.getOperand(i: `0`);
7932	MVT VecVT = Src.getSimpleValueType();
7933	SDValue Mask, VL;
7934	if (IsVPTrunc) {
7935	Mask = Op.getOperand(i: `1`);
7936	VL = Op.getOperand(i: `2`);
7937	}
7938	// If this is a fixed vector, we need to convert it to a scalable vector.
7939	MVT ContainerVT = VecVT;
7940
7941	if (VecVT.isFixedLengthVector()) {
7942	ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
7943	Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget);
7944	if (IsVPTrunc) {
7945	MVT MaskContainerVT =
7946	getContainerForFixedLengthVector(VT: Mask.getSimpleValueType());
7947	Mask = convertToScalableVector(VT: MaskContainerVT, V: Mask, DAG, Subtarget);
7948	}
7949	}
7950
7951	if (!IsVPTrunc) {
7952	std::tie(args&: Mask, args&: VL) =
7953	getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7954	}
7955
7956	SDValue SplatOne = DAG.getConstant(Val: `1`, DL, VT: Subtarget.getXLenVT());
7957	SDValue SplatZero = DAG.getConstant(Val: `0`, DL, VT: Subtarget.getXLenVT());
7958
7959	SplatOne = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
7960	N1: DAG.getUNDEF(VT: ContainerVT), N2: SplatOne, N3: VL);
7961	SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
7962	N1: DAG.getUNDEF(VT: ContainerVT), N2: SplatZero, N3: VL);
7963
7964	MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
7965	SDValue Trunc = DAG.getNode(Opcode: RISCVISD::AND_VL, DL, VT: ContainerVT, N1: Src, N2: SplatOne,
7966	N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL);
7967	Trunc = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: MaskContainerVT,
7968	Ops: {Trunc, SplatZero, DAG.getCondCode(Cond: ISD::SETNE),
7969	DAG.getUNDEF(VT: MaskContainerVT), Mask, VL});
7970	if (MaskVT.isFixedLengthVector())
7971	Trunc = convertFromScalableVector(VT: MaskVT, V: Trunc, DAG, Subtarget);
7972	return Trunc;
7973	}
7974
7975	SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
7976	SelectionDAG &DAG) const {
7977	bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
7978	SDLoc DL(Op);
7979
7980	MVT VT = Op.getSimpleValueType();
7981	// Only custom-lower vector truncates
7982	assert(VT.isVector() && "Unexpected type for vector truncate lowering");
7983
7984	// Truncates to mask types are handled differently
7985	if (VT.getVectorElementType() == MVT::i1)
7986	return lowerVectorMaskTruncLike(Op, DAG);
7987
7988	// RVV only has truncates which operate from SEW2->SEW, so lower arbitrary*
7989	// truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
7990	// truncate by one power of two at a time.
7991	MVT DstEltVT = VT.getVectorElementType();
7992
7993	SDValue Src = Op.getOperand(i: `0`);
7994	MVT SrcVT = Src.getSimpleValueType();
7995	MVT SrcEltVT = SrcVT.getVectorElementType();
7996
7997	assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
7998	isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
7999	"Unexpected vector truncate lowering");
8000
8001	MVT ContainerVT = SrcVT;
8002	SDValue Mask, VL;
8003	if (IsVPTrunc) {
8004	Mask = Op.getOperand(i: `1`);
8005	VL = Op.getOperand(i: `2`);
8006	}
8007	if (SrcVT.isFixedLengthVector()) {
8008	ContainerVT = getContainerForFixedLengthVector(VT: SrcVT);
8009	Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget);
8010	if (IsVPTrunc) {
8011	MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
8012	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
8013	}
8014	}
8015
8016	SDValue Result = Src;
8017	if (!IsVPTrunc) {
8018	std::tie(args&: Mask, args&: VL) =
8019	getDefaultVLOps(VecVT: SrcVT, ContainerVT, DL, DAG, Subtarget);
8020	}
8021
8022	LLVMContext &Context = *DAG.getContext();
8023	const ElementCount Count = ContainerVT.getVectorElementCount();
8024	do {
8025	SrcEltVT = MVT::getIntegerVT(BitWidth: SrcEltVT.getSizeInBits() / `2`);
8026	EVT ResultVT = EVT::getVectorVT(Context, VT: SrcEltVT, EC: Count);
8027	Result = DAG.getNode(Opcode: RISCVISD::TRUNCATE_VECTOR_VL, DL, VT: ResultVT, N1: Result,
8028	N2: Mask, N3: VL);
8029	} while (SrcEltVT != DstEltVT);
8030
8031	if (SrcVT.isFixedLengthVector())
8032	Result = convertFromScalableVector(VT, V: Result, DAG, Subtarget);
8033
8034	return Result;
8035	}
8036
8037	SDValue
8038	RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8039	SelectionDAG &DAG) const {
8040	SDLoc DL(Op);
8041	SDValue Chain = Op.getOperand(i: `0`);
8042	SDValue Src = Op.getOperand(i: `1`);
8043	MVT VT = Op.getSimpleValueType();
8044	MVT SrcVT = Src.getSimpleValueType();
8045	MVT ContainerVT = VT;
8046	if (VT.isFixedLengthVector()) {
8047	MVT SrcContainerVT = getContainerForFixedLengthVector(VT: SrcVT);
8048	ContainerVT =
8049	SrcContainerVT.changeVectorElementType(EltVT: VT.getVectorElementType());
8050	Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget);
8051	}
8052
8053	auto [Mask, VL] = getDefaultVLOps(VecVT: SrcVT, ContainerVT, DL, DAG, Subtarget);
8054
8055	// RVV can only widen/truncate fp to types double/half the size as the source.
8056	if ((VT.getVectorElementType() == MVT::f64 &&
8057	SrcVT.getVectorElementType() == MVT::f16) \|\|
8058	(VT.getVectorElementType() == MVT::f16 &&
8059	SrcVT.getVectorElementType() == MVT::f64)) {
8060	// For double rounding, the intermediate rounding should be round-to-odd.
8061	unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8062	? RISCVISD::STRICT_FP_EXTEND_VL
8063	: RISCVISD::STRICT_VFNCVT_ROD_VL;
8064	MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8065	Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8066	Chain, Src, Mask, VL);
8067	Chain = Src.getValue(R: `1`);
8068	}
8069
8070	unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8071	? RISCVISD::STRICT_FP_EXTEND_VL
8072	: RISCVISD::STRICT_FP_ROUND_VL;
8073	SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8074	Chain, Src, Mask, VL);
8075	if (VT.isFixedLengthVector()) {
8076	// StrictFP operations have two result values. Their lowered result should
8077	// have same result count.
8078	SDValue SubVec = convertFromScalableVector(VT, V: Res, DAG, Subtarget);
8079	Res = DAG.getMergeValues(Ops: {SubVec, Res.getValue(R: `1`)}, dl: DL);
8080	}
8081	return Res;
8082	}
8083
8084	SDValue
8085	RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8086	SelectionDAG &DAG) const {
8087	bool IsVP =
8088	Op.getOpcode() == ISD::VP_FP_ROUND \|\| Op.getOpcode() == ISD::VP_FP_EXTEND;
8089	bool IsExtend =
8090	Op.getOpcode() == ISD::VP_FP_EXTEND \|\| Op.getOpcode() == ISD::FP_EXTEND;
8091	// RVV can only do truncate fp to types half the size as the source. We
8092	// custom-lower f64->f16 rounds via RVV's round-to-odd float
8093	// conversion instruction.
8094	SDLoc DL(Op);
8095	MVT VT = Op.getSimpleValueType();
8096
8097	assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8098
8099	SDValue Src = Op.getOperand(i: `0`);
8100	MVT SrcVT = Src.getSimpleValueType();
8101
8102	bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 \|\|
8103	SrcVT.getVectorElementType() != MVT::f16);
8104	bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 \|\|
8105	SrcVT.getVectorElementType() != MVT::f64);
8106
8107	bool IsDirectConv = IsDirectExtend \|\| IsDirectTrunc;
8108
8109	// Prepare any fixed-length vector operands.
8110	MVT ContainerVT = VT;
8111	SDValue Mask, VL;
8112	if (IsVP) {
8113	Mask = Op.getOperand(i: `1`);
8114	VL = Op.getOperand(i: `2`);
8115	}
8116	if (VT.isFixedLengthVector()) {
8117	MVT SrcContainerVT = getContainerForFixedLengthVector(VT: SrcVT);
8118	ContainerVT =
8119	SrcContainerVT.changeVectorElementType(EltVT: VT.getVectorElementType());
8120	Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget);
8121	if (IsVP) {
8122	MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
8123	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
8124	}
8125	}
8126
8127	if (!IsVP)
8128	std::tie(args&: Mask, args&: VL) =
8129	getDefaultVLOps(VecVT: SrcVT, ContainerVT, DL, DAG, Subtarget);
8130
8131	unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
8132
8133	if (IsDirectConv) {
8134	Src = DAG.getNode(Opcode: ConvOpc, DL, VT: ContainerVT, N1: Src, N2: Mask, N3: VL);
8135	if (VT.isFixedLengthVector())
8136	Src = convertFromScalableVector(VT, V: Src, DAG, Subtarget);
8137	return Src;
8138	}
8139
8140	unsigned InterConvOpc =
8141	IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
8142
8143	MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8144	SDValue IntermediateConv =
8145	DAG.getNode(Opcode: InterConvOpc, DL, VT: InterVT, N1: Src, N2: Mask, N3: VL);
8146	SDValue Result =
8147	DAG.getNode(Opcode: ConvOpc, DL, VT: ContainerVT, N1: IntermediateConv, N2: Mask, N3: VL);
8148	if (VT.isFixedLengthVector())
8149	return convertFromScalableVector(VT, V: Result, DAG, Subtarget);
8150	return Result;
8151	}
8152
8153	// Given a scalable vector type and an index into it, returns the type for the
8154	// smallest subvector that the index fits in. This can be used to reduce LMUL
8155	// for operations like vslidedown.
8156	//
8157	// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
8158	static std::optional<MVT>
8159	getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
8160	const RISCVSubtarget &Subtarget) {
8161	assert(VecVT.isScalableVector());
8162	const unsigned EltSize = VecVT.getScalarSizeInBits();
8163	const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
8164	const unsigned MinVLMAX = VectorBitsMin / EltSize;
8165	MVT SmallerVT;
8166	if (MaxIdx < MinVLMAX)
8167	SmallerVT = getLMUL1VT(VT: VecVT);
8168	else if (MaxIdx < MinVLMAX * `2`)
8169	SmallerVT = getLMUL1VT(VT: VecVT).getDoubleNumVectorElementsVT();
8170	else if (MaxIdx < MinVLMAX * `4`)
8171	SmallerVT = getLMUL1VT(VT: VecVT)
8172	.getDoubleNumVectorElementsVT()
8173	.getDoubleNumVectorElementsVT();
8174	if (!SmallerVT.isValid() \|\| !VecVT.bitsGT(VT: SmallerVT))
8175	return std::nullopt;
8176	return SmallerVT;
8177	}
8178
8179	// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
8180	// first position of a vector, and that vector is slid up to the insert index.
8181	// By limiting the active vector length to index+1 and merging with the
8182	// original vector (with an undisturbed tail policy for elements >= VL), we
8183	// achieve the desired result of leaving all elements untouched except the one
8184	// at VL-1, which is replaced with the desired value.
8185	SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
8186	SelectionDAG &DAG) const {
8187	SDLoc DL(Op);
8188	MVT VecVT = Op.getSimpleValueType();
8189	SDValue Vec = Op.getOperand(i: `0`);
8190	SDValue Val = Op.getOperand(i: `1`);
8191	SDValue Idx = Op.getOperand(i: `2`);
8192
8193	if (VecVT.getVectorElementType() == MVT::i1) {
8194	// FIXME: For now we just promote to an i8 vector and insert into that,
8195	// but this is probably not optimal.
8196	MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8197	Vec = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WideVT, Operand: Vec);
8198	Vec = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: WideVT, N1: Vec, N2: Val, N3: Idx);
8199	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VecVT, Operand: Vec);
8200	}
8201
8202	MVT ContainerVT = VecVT;
8203	// If the operand is a fixed-length vector, convert to a scalable one.
8204	if (VecVT.isFixedLengthVector()) {
8205	ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
8206	Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
8207	}
8208
8209	// If we know the index we're going to insert at, we can shrink Vec so that
8210	// we're performing the scalar inserts and slideup on a smaller LMUL.
8211	MVT OrigContainerVT = ContainerVT;
8212	SDValue OrigVec = Vec;
8213	SDValue AlignedIdx;
8214	if (auto *IdxC = dyn_cast<ConstantSDNode>(Val&: Idx)) {
8215	const unsigned OrigIdx = IdxC->getZExtValue();
8216	// Do we know an upper bound on LMUL?
8217	if (auto ShrunkVT = getSmallestVTForIndex(VecVT: ContainerVT, MaxIdx: OrigIdx,
8218	DL, DAG, Subtarget)) {
8219	ContainerVT = *ShrunkVT;
8220	AlignedIdx = DAG.getVectorIdxConstant(Val: `0`, DL);
8221	}
8222
8223	// If we're compiling for an exact VLEN value, we can always perform
8224	// the insert in m1 as we can determine the register corresponding to
8225	// the index in the register group.
8226	const MVT M1VT = getLMUL1VT(VT: ContainerVT);
8227	if (auto VLEN = Subtarget.getRealVLen();
8228	VLEN && ContainerVT.bitsGT(VT: M1VT)) {
8229	EVT ElemVT = VecVT.getVectorElementType();
8230	unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
8231	unsigned RemIdx = OrigIdx % ElemsPerVReg;
8232	unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8233	unsigned ExtractIdx =
8234	SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8235	AlignedIdx = DAG.getVectorIdxConstant(Val: ExtractIdx, DL);
8236	Idx = DAG.getVectorIdxConstant(Val: RemIdx, DL);
8237	ContainerVT = M1VT;
8238	}
8239
8240	if (AlignedIdx)
8241	Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ContainerVT, N1: Vec,
8242	N2: AlignedIdx);
8243	}
8244
8245	MVT XLenVT = Subtarget.getXLenVT();
8246
8247	bool IsLegalInsert = Subtarget.is64Bit() \|\| Val.getValueType() != MVT::i64;
8248	// Even i64-element vectors on RV32 can be lowered without scalar
8249	// legalization if the most-significant 32 bits of the value are not affected
8250	// by the sign-extension of the lower 32 bits.
8251	// TODO: We could also catch sign extensions of a 32-bit value.
8252	if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
8253	const auto *CVal = cast<ConstantSDNode>(Val);
8254	if (isInt<`32`>(x: CVal->getSExtValue())) {
8255	IsLegalInsert = true;
8256	Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
8257	}
8258	}
8259
8260	auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8261
8262	SDValue ValInVec;
8263
8264	if (IsLegalInsert) {
8265	unsigned Opc =
8266	VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
8267	if (isNullConstant(V: Idx)) {
8268	if (!VecVT.isFloatingPoint())
8269	Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Val);
8270	Vec = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: Vec, N2: Val, N3: VL);
8271
8272	if (AlignedIdx)
8273	Vec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: OrigContainerVT, N1: OrigVec,
8274	N2: Vec, N3: AlignedIdx);
8275	if (!VecVT.isFixedLengthVector())
8276	return Vec;
8277	return convertFromScalableVector(VT: VecVT, V: Vec, DAG, Subtarget);
8278	}
8279	ValInVec = lowerScalarInsert(Scalar: Val, VL, VT: ContainerVT, DL, DAG, Subtarget);
8280	} else {
8281	// On RV32, i64-element vectors must be specially handled to place the
8282	// value at element 0, by using two vslide1down instructions in sequence on
8283	// the i32 split lo/hi value. Use an equivalently-sized i32 vector for
8284	// this.
8285	SDValue ValLo, ValHi;
8286	std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
8287	MVT I32ContainerVT =
8288	MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * `2`);
8289	SDValue I32Mask =
8290	getDefaultScalableVLOps(VecVT: I32ContainerVT, DL, DAG, Subtarget).first;
8291	// Limit the active VL to two.
8292	SDValue InsertI64VL = DAG.getConstant(Val: `2`, DL, VT: XLenVT);
8293	// If the Idx is 0 we can insert directly into the vector.
8294	if (isNullConstant(V: Idx)) {
8295	// First slide in the lo value, then the hi in above it. We use slide1down
8296	// to avoid the register group overlap constraint of vslide1up.
8297	ValInVec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32ContainerVT,
8298	N1: Vec, N2: Vec, N3: ValLo, N4: I32Mask, N5: InsertI64VL);
8299	// If the source vector is undef don't pass along the tail elements from
8300	// the previous slide1down.
8301	SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
8302	ValInVec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32ContainerVT,
8303	N1: Tail, N2: ValInVec, N3: ValHi, N4: I32Mask, N5: InsertI64VL);
8304	// Bitcast back to the right container type.
8305	ValInVec = DAG.getBitcast(VT: ContainerVT, V: ValInVec);
8306
8307	if (AlignedIdx)
8308	ValInVec =
8309	DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: OrigContainerVT, N1: OrigVec,
8310	N2: ValInVec, N3: AlignedIdx);
8311	if (!VecVT.isFixedLengthVector())
8312	return ValInVec;
8313	return convertFromScalableVector(VT: VecVT, V: ValInVec, DAG, Subtarget);
8314	}
8315
8316	// First slide in the lo value, then the hi in above it. We use slide1down
8317	// to avoid the register group overlap constraint of vslide1up.
8318	ValInVec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32ContainerVT,
8319	N1: DAG.getUNDEF(VT: I32ContainerVT),
8320	N2: DAG.getUNDEF(VT: I32ContainerVT), N3: ValLo,
8321	N4: I32Mask, N5: InsertI64VL);
8322	ValInVec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32ContainerVT,
8323	N1: DAG.getUNDEF(VT: I32ContainerVT), N2: ValInVec, N3: ValHi,
8324	N4: I32Mask, N5: InsertI64VL);
8325	// Bitcast back to the right container type.
8326	ValInVec = DAG.getBitcast(VT: ContainerVT, V: ValInVec);
8327	}
8328
8329	// Now that the value is in a vector, slide it into position.
8330	SDValue InsertVL =
8331	DAG.getNode(Opcode: ISD::ADD, DL, VT: XLenVT, N1: Idx, N2: DAG.getConstant(Val: `1`, DL, VT: XLenVT));
8332
8333	// Use tail agnostic policy if Idx is the last index of Vec.
8334	unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
8335	if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Val: Idx) &&
8336	Idx ->getAsZExtVal() + `1` == VecVT.getVectorNumElements())
8337	Policy = RISCVII::TAIL_AGNOSTIC;
8338	SDValue Slideup = getVSlideup(DAG, Subtarget, DL, VT: ContainerVT, Merge: Vec, Op: ValInVec,
8339	Offset: Idx, Mask, VL: InsertVL, Policy);
8340
8341	if (AlignedIdx)
8342	Slideup = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: OrigContainerVT, N1: OrigVec,
8343	N2: Slideup, N3: AlignedIdx);
8344	if (!VecVT.isFixedLengthVector())
8345	return Slideup;
8346	return convertFromScalableVector(VT: VecVT, V: Slideup, DAG, Subtarget);
8347	}
8348
8349	// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
8350	// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
8351	// types this is done using VMV_X_S to allow us to glean information about the
8352	// sign bits of the result.
8353	SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
8354	SelectionDAG &DAG) const {
8355	SDLoc DL(Op);
8356	SDValue Idx = Op.getOperand(i: `1`);
8357	SDValue Vec = Op.getOperand(i: `0`);
8358	EVT EltVT = Op.getValueType();
8359	MVT VecVT = Vec.getSimpleValueType();
8360	MVT XLenVT = Subtarget.getXLenVT();
8361
8362	if (VecVT.getVectorElementType() == MVT::i1) {
8363	// Use vfirst.m to extract the first bit.
8364	if (isNullConstant(V: Idx)) {
8365	MVT ContainerVT = VecVT;
8366	if (VecVT.isFixedLengthVector()) {
8367	ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
8368	Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
8369	}
8370	auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8371	SDValue Vfirst =
8372	DAG.getNode(Opcode: RISCVISD::VFIRST_VL, DL, VT: XLenVT, N1: Vec, N2: Mask, N3: VL);
8373	SDValue Res = DAG.getSetCC(DL, VT: XLenVT, LHS: Vfirst,
8374	RHS: DAG.getConstant(Val: `0`, DL, VT: XLenVT), Cond: ISD::SETEQ);
8375	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: EltVT, Operand: Res);
8376	}
8377	if (VecVT.isFixedLengthVector()) {
8378	unsigned NumElts = VecVT.getVectorNumElements();
8379	if (NumElts >= `8`) {
8380	MVT WideEltVT;
8381	unsigned WidenVecLen;
8382	SDValue ExtractElementIdx;
8383	SDValue ExtractBitIdx;
8384	unsigned MaxEEW = Subtarget.getELen();
8385	MVT LargestEltVT = MVT::getIntegerVT(
8386	BitWidth: std::min(a: MaxEEW, b: unsigned(XLenVT.getSizeInBits())));
8387	if (NumElts <= LargestEltVT.getSizeInBits()) {
8388	assert(isPowerOf2_32(NumElts) &&
8389	"the number of elements should be power of 2");
8390	WideEltVT = MVT::getIntegerVT(BitWidth: NumElts);
8391	WidenVecLen = `1`;
8392	ExtractElementIdx = DAG.getConstant(Val: `0`, DL, VT: XLenVT);
8393	ExtractBitIdx = Idx;
8394	} else {
8395	WideEltVT = LargestEltVT;
8396	WidenVecLen = NumElts / WideEltVT.getSizeInBits();
8397	// extract element index = index / element width
8398	ExtractElementIdx = DAG.getNode(
8399	Opcode: ISD::SRL, DL, VT: XLenVT, N1: Idx,
8400	N2: DAG.getConstant(Val: Log2_64(Value: WideEltVT.getSizeInBits()), DL, VT: XLenVT));
8401	// mask bit index = index % element width
8402	ExtractBitIdx = DAG.getNode(
8403	Opcode: ISD::AND, DL, VT: XLenVT, N1: Idx,
8404	N2: DAG.getConstant(Val: WideEltVT.getSizeInBits() - `1`, DL, VT: XLenVT));
8405	}
8406	MVT WideVT = MVT::getVectorVT(VT: WideEltVT, NumElements: WidenVecLen);
8407	Vec = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: WideVT, Operand: Vec);
8408	SDValue ExtractElt = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: XLenVT,
8409	N1: Vec, N2: ExtractElementIdx);
8410	// Extract the bit from GPR.
8411	SDValue ShiftRight =
8412	DAG.getNode(Opcode: ISD::SRL, DL, VT: XLenVT, N1: ExtractElt, N2: ExtractBitIdx);
8413	SDValue Res = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: ShiftRight,
8414	N2: DAG.getConstant(Val: `1`, DL, VT: XLenVT));
8415	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: EltVT, Operand: Res);
8416	}
8417	}
8418	// Otherwise, promote to an i8 vector and extract from that.
8419	MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8420	Vec = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WideVT, Operand: Vec);
8421	return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: Vec, N2: Idx);
8422	}
8423
8424	// If this is a fixed vector, we need to convert it to a scalable vector.
8425	MVT ContainerVT = VecVT;
8426	if (VecVT.isFixedLengthVector()) {
8427	ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
8428	Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
8429	}
8430
8431	// If we're compiling for an exact VLEN value and we have a known
8432	// constant index, we can always perform the extract in m1 (or
8433	// smaller) as we can determine the register corresponding to
8434	// the index in the register group.
8435	const auto VLen = Subtarget.getRealVLen();
8436	if (auto *IdxC = dyn_cast<ConstantSDNode>(Val&: Idx);
8437	IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
8438	MVT M1VT = getLMUL1VT(VT: ContainerVT);
8439	unsigned OrigIdx = IdxC->getZExtValue();
8440	EVT ElemVT = VecVT.getVectorElementType();
8441	unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
8442	unsigned RemIdx = OrigIdx % ElemsPerVReg;
8443	unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8444	unsigned ExtractIdx =
8445	SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8446	Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: M1VT, N1: Vec,
8447	N2: DAG.getVectorIdxConstant(Val: ExtractIdx, DL));
8448	Idx = DAG.getVectorIdxConstant(Val: RemIdx, DL);
8449	ContainerVT = M1VT;
8450	}
8451
8452	// Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
8453	// contains our index.
8454	std::optional<uint64_t> MaxIdx;
8455	if (VecVT.isFixedLengthVector())
8456	MaxIdx = VecVT.getVectorNumElements() - `1`;
8457	if (auto *IdxC = dyn_cast<ConstantSDNode>(Val&: Idx))
8458	MaxIdx = IdxC->getZExtValue();
8459	if (MaxIdx) {
8460	if (auto SmallerVT =
8461	getSmallestVTForIndex(VecVT: ContainerVT, MaxIdx: *MaxIdx, DL, DAG, Subtarget)) {
8462	ContainerVT = *SmallerVT;
8463	Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ContainerVT, N1: Vec,
8464	N2: DAG.getConstant(Val: `0`, DL, VT: XLenVT));
8465	}
8466	}
8467
8468	// If after narrowing, the required slide is still greater than LMUL2,
8469	// fallback to generic expansion and go through the stack. This is done
8470	// for a subtle reason: extracting all* elements out of a vector is*
8471	// widely expected to be linear in vector size, but because vslidedown
8472	// is linear in LMUL, performing N extracts using vslidedown becomes
8473	// O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
8474	// seems to have the same problem (the store is linear in LMUL), but the
8475	// generic expansion memoizes* the store, and thus for many extracts of*
8476	// the same vector we end up with one store and a bunch of loads.
8477	// TODO: We don't have the same code for insert_vector_elt because we
8478	// have BUILD_VECTOR and handle the degenerate case there. Should we
8479	// consider adding an inverse BUILD_VECTOR node?
8480	MVT LMUL2VT = getLMUL1VT(VT: ContainerVT).getDoubleNumVectorElementsVT();
8481	if (ContainerVT.bitsGT(VT: LMUL2VT) && VecVT.isFixedLengthVector())
8482	return SDValue ();
8483
8484	// If the index is 0, the vector is already in the right position.
8485	if (!isNullConstant(V: Idx)) {
8486	// Use a VL of 1 to avoid processing more elements than we need.
8487	auto [Mask, VL] = getDefaultVLOps(NumElts: `1`, ContainerVT, DL, DAG, Subtarget);
8488	Vec = getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT,
8489	Merge: DAG.getUNDEF(VT: ContainerVT), Op: Vec, Offset: Idx, Mask, VL);
8490	}
8491
8492	if (!EltVT.isInteger()) {
8493	// Floating-point extracts are handled in TableGen.
8494	return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: Vec,
8495	N2: DAG.getVectorIdxConstant(Val: `0`, DL));
8496	}
8497
8498	SDValue Elt0 = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: Vec);
8499	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: EltVT, Operand: Elt0);
8500	}
8501
8502	// Some RVV intrinsics may claim that they want an integer operand to be
8503	// promoted or expanded.
8504	static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
8505	const RISCVSubtarget &Subtarget) {
8506	assert((Op.getOpcode() == ISD::INTRINSIC_VOID \|\|
8507	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
8508	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
8509	"Unexpected opcode");
8510
8511	if (!Subtarget.hasVInstructions())
8512	return SDValue ();
8513
8514	bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID \|\|
8515	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8516	unsigned IntNo = Op.getConstantOperandVal(i: HasChain ? `1` : `0`);
8517
8518	SDLoc DL(Op);
8519
8520	const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
8521	RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8522	if (!II \|\| !II->hasScalarOperand())
8523	return SDValue ();
8524
8525	unsigned SplatOp = II->ScalarOperand + `1` + HasChain;
8526	assert(SplatOp < Op.getNumOperands());
8527
8528	SmallVector<SDValue, `8`> Operands(Op ->op_begin(), Op ->op_end());
8529	SDValue &ScalarOp = Operands [SplatOp];
8530	MVT OpVT = ScalarOp.getSimpleValueType();
8531	MVT XLenVT = Subtarget.getXLenVT();
8532
8533	// If this isn't a scalar, or its type is XLenVT we're done.
8534	if (!OpVT.isScalarInteger() \|\| OpVT == XLenVT)
8535	return SDValue ();
8536
8537	// Simplest case is that the operand needs to be promoted to XLenVT.
8538	if (OpVT.bitsLT(VT: XLenVT)) {
8539	// If the operand is a constant, sign extend to increase our chances
8540	// of being able to use a .vi instruction. ANY_EXTEND would become a
8541	// a zero extend and the simm5 check in isel would fail.
8542	// FIXME: Should we ignore the upper bits in isel instead?
8543	unsigned ExtOpc =
8544	isa<ConstantSDNode>(Val: ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8545	ScalarOp = DAG.getNode(Opcode: ExtOpc, DL, VT: XLenVT, Operand: ScalarOp);
8546	return DAG.getNode(Opcode: Op ->getOpcode(), DL, VTList: Op ->getVTList(), Ops: Operands);
8547	}
8548
8549	// Use the previous operand to get the vXi64 VT. The result might be a mask
8550	// VT for compares. Using the previous operand assumes that the previous
8551	// operand will never have a smaller element size than a scalar operand and
8552	// that a widening operation never uses SEW=64.
8553	// NOTE: If this fails the below assert, we can probably just find the
8554	// element count from any operand or result and use it to construct the VT.
8555	assert(II->ScalarOperand > `0` && "Unexpected splat operand!");
8556	MVT VT = Op.getOperand(i: SplatOp - `1`).getSimpleValueType();
8557
8558	// The more complex case is when the scalar is larger than XLenVT.
8559	assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
8560	VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
8561
8562	// If this is a sign-extended 32-bit value, we can truncate it and rely on the
8563	// instruction to sign-extend since SEW>XLEN.
8564	if (DAG.ComputeNumSignBits(Op: ScalarOp) > `32`) {
8565	ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
8566	return DAG.getNode(Opcode: Op ->getOpcode(), DL, VTList: Op ->getVTList(), Ops: Operands);
8567	}
8568
8569	switch (IntNo) {
8570	case Intrinsic::riscv_vslide1up:
8571	case Intrinsic::riscv_vslide1down:
8572	case Intrinsic::riscv_vslide1up_mask:
8573	case Intrinsic::riscv_vslide1down_mask: {
8574	// We need to special case these when the scalar is larger than XLen.
8575	unsigned NumOps = Op.getNumOperands();
8576	bool IsMasked = NumOps == `7`;
8577
8578	// Convert the vector source to the equivalent nxvXi32 vector.
8579	MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * `2`);
8580	SDValue Vec = DAG.getBitcast(VT: I32VT, V: Operands [`2`]);
8581	SDValue ScalarLo, ScalarHi;
8582	std::tie(ScalarLo, ScalarHi) =
8583	DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
8584
8585	// Double the VL since we halved SEW.
8586	SDValue AVL = getVLOperand(Op);
8587	SDValue I32VL;
8588
8589	// Optimize for constant AVL
8590	if (isa<ConstantSDNode>(Val: AVL)) {
8591	const auto [MinVLMAX, MaxVLMAX] =
8592	RISCVTargetLowering::computeVLMAXBounds(VecVT: VT, Subtarget);
8593
8594	uint64_t AVLInt = AVL ->getAsZExtVal();
8595	if (AVLInt <= MinVLMAX) {
8596	I32VL = DAG.getConstant(Val: `2` * AVLInt, DL, VT: XLenVT);
8597	} else if (AVLInt >= `2` * MaxVLMAX) {
8598	// Just set vl to VLMAX in this situation
8599	RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT: I32VT);
8600	SDValue LMUL = DAG.getConstant(Val: Lmul, DL, VT: XLenVT);
8601	unsigned Sew = RISCVVType::encodeSEW(SEW: I32VT.getScalarSizeInBits());
8602	SDValue SEW = DAG.getConstant(Val: Sew, DL, VT: XLenVT);
8603	SDValue SETVLMAX = DAG.getTargetConstant(
8604	Intrinsic::riscv_vsetvlimax, DL, MVT::i32);
8605	I32VL = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: XLenVT, N1: SETVLMAX, N2: SEW,
8606	N3: LMUL);
8607	} else {
8608	// For AVL between (MinVLMAX, 2 MaxVLMAX), the actual working vl*
8609	// is related to the hardware implementation.
8610	// So let the following code handle
8611	}
8612	}
8613	if (!I32VL) {
8614	RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT);
8615	SDValue LMUL = DAG.getConstant(Val: Lmul, DL, VT: XLenVT);
8616	unsigned Sew = RISCVVType::encodeSEW(SEW: VT.getScalarSizeInBits());
8617	SDValue SEW = DAG.getConstant(Val: Sew, DL, VT: XLenVT);
8618	SDValue SETVL =
8619	DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
8620	// Using vsetvli instruction to get actually used length which related to
8621	// the hardware implementation
8622	SDValue VL = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: XLenVT, N1: SETVL, N2: AVL,
8623	N3: SEW, N4: LMUL);
8624	I32VL =
8625	DAG.getNode(Opcode: ISD::SHL, DL, VT: XLenVT, N1: VL, N2: DAG.getConstant(Val: `1`, DL, VT: XLenVT));
8626	}
8627
8628	SDValue I32Mask = getAllOnesMask(VecVT: I32VT, VL: I32VL, DL, DAG);
8629
8630	// Shift the two scalar parts in using SEW=32 slide1up/slide1down
8631	// instructions.
8632	SDValue Passthru;
8633	if (IsMasked)
8634	Passthru = DAG.getUNDEF(VT: I32VT);
8635	else
8636	Passthru = DAG.getBitcast(VT: I32VT, V: Operands [`1`]);
8637
8638	if (IntNo == Intrinsic::riscv_vslide1up \|\|
8639	IntNo == Intrinsic::riscv_vslide1up_mask) {
8640	Vec = DAG.getNode(Opcode: RISCVISD::VSLIDE1UP_VL, DL, VT: I32VT, N1: Passthru, N2: Vec,
8641	N3: ScalarHi, N4: I32Mask, N5: I32VL);
8642	Vec = DAG.getNode(Opcode: RISCVISD::VSLIDE1UP_VL, DL, VT: I32VT, N1: Passthru, N2: Vec,
8643	N3: ScalarLo, N4: I32Mask, N5: I32VL);
8644	} else {
8645	Vec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32VT, N1: Passthru, N2: Vec,
8646	N3: ScalarLo, N4: I32Mask, N5: I32VL);
8647	Vec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32VT, N1: Passthru, N2: Vec,
8648	N3: ScalarHi, N4: I32Mask, N5: I32VL);
8649	}
8650
8651	// Convert back to nxvXi64.
8652	Vec = DAG.getBitcast(VT, V: Vec);
8653
8654	if (!IsMasked)
8655	return Vec;
8656	// Apply mask after the operation.
8657	SDValue Mask = Operands [NumOps - `3`];
8658	SDValue MaskedOff = Operands [`1`];
8659	// Assume Policy operand is the last operand.
8660	uint64_t Policy = Operands [NumOps - `1`]->getAsZExtVal();
8661	// We don't need to select maskedoff if it's undef.
8662	if (MaskedOff.isUndef())
8663	return Vec;
8664	// TAMU
8665	if (Policy == RISCVII::TAIL_AGNOSTIC)
8666	return DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT, N1: Mask, N2: Vec, N3: MaskedOff,
8667	N4: DAG.getUNDEF(VT), N5: AVL);
8668	// TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
8669	// It's fine because vmerge does not care mask policy.
8670	return DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT, N1: Mask, N2: Vec, N3: MaskedOff,
8671	N4: MaskedOff, N5: AVL);
8672	}
8673	}
8674
8675	// We need to convert the scalar to a splat vector.
8676	SDValue VL = getVLOperand(Op);
8677	assert(VL.getValueType() == XLenVT);
8678	ScalarOp = splatSplitI64WithVL(DL, VT, Passthru: SDValue (), Scalar: ScalarOp, VL, DAG);
8679	return DAG.getNode(Opcode: Op ->getOpcode(), DL, VTList: Op ->getVTList(), Ops: Operands);
8680	}
8681
8682	// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
8683	// scalable vector llvm.get.vector.length for now.
8684	//
8685	// We need to convert from a scalable VF to a vsetvli with VLMax equal to
8686	// (vscale VF). The vscale and VF are independent of element width. We use*
8687	// SEW=8 for the vsetvli because it is the only element width that supports all
8688	// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
8689	// (vscale VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The*
8690	// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
8691	// SEW and LMUL are better for the surrounding vector instructions.
8692	static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,
8693	const RISCVSubtarget &Subtarget) {
8694	MVT XLenVT = Subtarget.getXLenVT();
8695
8696	// The smallest LMUL is only valid for the smallest element width.
8697	const unsigned ElementWidth = `8`;
8698
8699	// Determine the VF that corresponds to LMUL 1 for ElementWidth.
8700	unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
8701	// We don't support VF==1 with ELEN==32.
8702	[[maybe_unused]] unsigned MinVF =
8703	RISCV::RVVBitsPerBlock / Subtarget.getELen();
8704
8705	[[maybe_unused]] unsigned VF = N->getConstantOperandVal(Num: `2`);
8706	assert(VF >= MinVF && VF <= (LMul1VF * `8`) && isPowerOf2_32(VF) &&
8707	"Unexpected VF");
8708
8709	bool Fractional = VF < LMul1VF;
8710	unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
8711	unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMUL: LMulVal, Fractional);
8712	unsigned VSEW = RISCVVType::encodeSEW(SEW: ElementWidth);
8713
8714	SDLoc DL(N);
8715
8716	SDValue LMul = DAG.getTargetConstant(Val: VLMUL, DL, VT: XLenVT);
8717	SDValue Sew = DAG.getTargetConstant(Val: VSEW, DL, VT: XLenVT);
8718
8719	SDValue AVL = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: XLenVT, Operand: N->getOperand(Num: `1`));
8720
8721	SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
8722	SDValue Res =
8723	DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: XLenVT, N1: ID, N2: AVL, N3: Sew, N4: LMul);
8724	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: `0`), Operand: Res);
8725	}
8726
8727	static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG,
8728	const RISCVSubtarget &Subtarget) {
8729	SDValue Op0 = N->getOperand(Num: `1`);
8730	MVT OpVT = Op0.getSimpleValueType();
8731	MVT ContainerVT = OpVT;
8732	if (OpVT.isFixedLengthVector()) {
8733	ContainerVT = getContainerForFixedLengthVector(DAG, VT: OpVT, Subtarget);
8734	Op0 = convertToScalableVector(VT: ContainerVT, V: Op0, DAG, Subtarget);
8735	}
8736	MVT XLenVT = Subtarget.getXLenVT();
8737	SDLoc DL(N);
8738	auto [Mask, VL] = getDefaultVLOps(VecVT: OpVT, ContainerVT, DL, DAG, Subtarget);
8739	SDValue Res = DAG.getNode(Opcode: RISCVISD::VFIRST_VL, DL, VT: XLenVT, N1: Op0, N2: Mask, N3: VL);
8740	if (isOneConstant(V: N->getOperand(Num: `2`)))
8741	return Res;
8742
8743	// Convert -1 to VL.
8744	SDValue Setcc =
8745	DAG.getSetCC(DL, VT: XLenVT, LHS: Res, RHS: DAG.getConstant(Val: `0`, DL, VT: XLenVT), Cond: ISD::SETLT);
8746	VL = DAG.getElementCount(DL, VT: XLenVT, EC: OpVT.getVectorElementCount());
8747	return DAG.getSelect(DL, VT: XLenVT, Cond: Setcc, LHS: VL, RHS: Res);
8748	}
8749
8750	static inline void promoteVCIXScalar(const SDValue &Op,
8751	SmallVectorImpl<SDValue> &Operands,
8752	SelectionDAG &DAG) {
8753	const RISCVSubtarget &Subtarget =
8754	DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
8755
8756	bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID \|\|
8757	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8758	unsigned IntNo = Op.getConstantOperandVal(i: HasChain ? `1` : `0`);
8759	SDLoc DL(Op);
8760
8761	const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
8762	RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8763	if (!II \|\| !II->hasScalarOperand())
8764	return;
8765
8766	unsigned SplatOp = II->ScalarOperand + `1`;
8767	assert(SplatOp < Op.getNumOperands());
8768
8769	SDValue &ScalarOp = Operands [SplatOp];
8770	MVT OpVT = ScalarOp.getSimpleValueType();
8771	MVT XLenVT = Subtarget.getXLenVT();
8772
8773	// The code below is partially copied from lowerVectorIntrinsicScalars.
8774	// If this isn't a scalar, or its type is XLenVT we're done.
8775	if (!OpVT.isScalarInteger() \|\| OpVT == XLenVT)
8776	return;
8777
8778	// Manually emit promote operation for scalar operation.
8779	if (OpVT.bitsLT(VT: XLenVT)) {
8780	unsigned ExtOpc =
8781	isa<ConstantSDNode>(Val: ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8782	ScalarOp = DAG.getNode(Opcode: ExtOpc, DL, VT: XLenVT, Operand: ScalarOp);
8783	}
8784
8785	return;
8786	}
8787
8788	static void processVCIXOperands(SDValue &OrigOp,
8789	SmallVectorImpl<SDValue> &Operands,
8790	SelectionDAG &DAG) {
8791	promoteVCIXScalar(Op: OrigOp, Operands, DAG);
8792	const RISCVSubtarget &Subtarget =
8793	DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
8794	for (SDValue &V : Operands) {
8795	EVT ValType = V.getValueType();
8796	if (ValType.isVector() && ValType.isFloatingPoint()) {
8797	MVT InterimIVT =
8798	MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: ValType.getScalarSizeInBits()),
8799	EC: ValType.getVectorElementCount());
8800	V = DAG.getBitcast(VT: InterimIVT, V);
8801	}
8802	if (ValType.isFixedLengthVector()) {
8803	MVT OpContainerVT = getContainerForFixedLengthVector(
8804	DAG, VT: V.getSimpleValueType(), Subtarget);
8805	V = convertToScalableVector(VT: OpContainerVT, V, DAG, Subtarget);
8806	}
8807	}
8808	}
8809
8810	// LMUL VLEN should be greater than or equal to EGS * SEW*
8811	static inline bool isValidEGW(int EGS, EVT VT,
8812	const RISCVSubtarget &Subtarget) {
8813	return (Subtarget.getRealMinVLen() *
8814	VT.getSizeInBits().getKnownMinValue()) / RISCV::RVVBitsPerBlock >=
8815	EGS * VT.getScalarSizeInBits();
8816	}
8817
8818	SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
8819	SelectionDAG &DAG) const {
8820	unsigned IntNo = Op.getConstantOperandVal(i: `0`);
8821	SDLoc DL(Op);
8822	MVT XLenVT = Subtarget.getXLenVT();
8823
8824	switch (IntNo) {
8825	default:
8826	break; // Don't custom lower most intrinsics.
8827	case Intrinsic::thread_pointer: {
8828	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
8829	return DAG.getRegister(RISCV::X4, PtrVT);
8830	}
8831	case Intrinsic::riscv_orc_b:
8832	case Intrinsic::riscv_brev8:
8833	case Intrinsic::riscv_sha256sig0:
8834	case Intrinsic::riscv_sha256sig1:
8835	case Intrinsic::riscv_sha256sum0:
8836	case Intrinsic::riscv_sha256sum1:
8837	case Intrinsic::riscv_sm3p0:
8838	case Intrinsic::riscv_sm3p1: {
8839	unsigned Opc;
8840	switch (IntNo) {
8841	case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
8842	case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
8843	case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
8844	case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
8845	case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
8846	case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
8847	case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
8848	case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
8849	}
8850
8851	if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8852	SDValue NewOp =
8853	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(`1`));
8854	SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
8855	return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8856	}
8857
8858	return DAG.getNode(Opcode: Opc, DL, VT: XLenVT, Operand: Op.getOperand(i: `1`));
8859	}
8860	case Intrinsic::riscv_sm4ks:
8861	case Intrinsic::riscv_sm4ed: {
8862	unsigned Opc =
8863	IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
8864
8865	if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8866	SDValue NewOp0 =
8867	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(`1`));
8868	SDValue NewOp1 =
8869	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(`2`));
8870	SDValue Res =
8871	DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(`3`));
8872	return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8873	}
8874
8875	return DAG.getNode(Opcode: Opc, DL, VT: XLenVT, N1: Op.getOperand(i: `1`), N2: Op.getOperand(i: `2`),
8876	N3: Op.getOperand(i: `3`));
8877	}
8878	case Intrinsic::riscv_zip:
8879	case Intrinsic::riscv_unzip: {
8880	unsigned Opc =
8881	IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
8882	return DAG.getNode(Opcode: Opc, DL, VT: XLenVT, Operand: Op.getOperand(i: `1`));
8883	}
8884	case Intrinsic::riscv_mopr: {
8885	if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8886	SDValue NewOp =
8887	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(`1`));
8888	SDValue Res = DAG.getNode(
8889	RISCVISD::MOPR, DL, MVT::i64, NewOp,
8890	DAG.getTargetConstant(Op.getConstantOperandVal(`2`), DL, MVT::i64));
8891	return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8892	}
8893	return DAG.getNode(Opcode: RISCVISD::MOPR, DL, VT: XLenVT, N1: Op.getOperand(i: `1`),
8894	N2: Op.getOperand(i: `2`));
8895	}
8896
8897	case Intrinsic::riscv_moprr: {
8898	if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8899	SDValue NewOp0 =
8900	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(`1`));
8901	SDValue NewOp1 =
8902	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(`2`));
8903	SDValue Res = DAG.getNode(
8904	RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
8905	DAG.getTargetConstant(Op.getConstantOperandVal(`3`), DL, MVT::i64));
8906	return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8907	}
8908	return DAG.getNode(Opcode: RISCVISD::MOPRR, DL, VT: XLenVT, N1: Op.getOperand(i: `1`),
8909	N2: Op.getOperand(i: `2`), N3: Op.getOperand(i: `3`));
8910	}
8911	case Intrinsic::riscv_clmul:
8912	if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8913	SDValue NewOp0 =
8914	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(`1`));
8915	SDValue NewOp1 =
8916	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(`2`));
8917	SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
8918	return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8919	}
8920	return DAG.getNode(Opcode: RISCVISD::CLMUL, DL, VT: XLenVT, N1: Op.getOperand(i: `1`),
8921	N2: Op.getOperand(i: `2`));
8922	case Intrinsic::riscv_clmulh:
8923	case Intrinsic::riscv_clmulr: {
8924	unsigned Opc =
8925	IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
8926	if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8927	SDValue NewOp0 =
8928	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(`1`));
8929	SDValue NewOp1 =
8930	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(`2`));
8931	NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
8932	DAG.getConstant(`32`, DL, MVT::i64));
8933	NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
8934	DAG.getConstant(`32`, DL, MVT::i64));
8935	SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
8936	Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
8937	DAG.getConstant(`32`, DL, MVT::i64));
8938	return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8939	}
8940
8941	return DAG.getNode(Opcode: Opc, DL, VT: XLenVT, N1: Op.getOperand(i: `1`), N2: Op.getOperand(i: `2`));
8942	}
8943	case Intrinsic::experimental_get_vector_length:
8944	return lowerGetVectorLength(N: Op.getNode(), DAG, Subtarget);
8945	case Intrinsic::experimental_cttz_elts:
8946	return lowerCttzElts(N: Op.getNode(), DAG, Subtarget);
8947	case Intrinsic::riscv_vmv_x_s: {
8948	SDValue Res = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: Op.getOperand(i: `1`));
8949	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Op.getValueType(), Operand: Res);
8950	}
8951	case Intrinsic::riscv_vfmv_f_s:
8952	return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: Op.getValueType(),
8953	N1: Op.getOperand(i: `1`), N2: DAG.getVectorIdxConstant(Val: `0`, DL));
8954	case Intrinsic::riscv_vmv_v_x:
8955	return lowerScalarSplat(Passthru: Op.getOperand(i: `1`), Scalar: Op.getOperand(i: `2`),
8956	VL: Op.getOperand(i: `3`), VT: Op.getSimpleValueType(), DL, DAG,
8957	Subtarget);
8958	case Intrinsic::riscv_vfmv_v_f:
8959	return DAG.getNode(Opcode: RISCVISD::VFMV_V_F_VL, DL, VT: Op.getValueType(),
8960	N1: Op.getOperand(i: `1`), N2: Op.getOperand(i: `2`), N3: Op.getOperand(i: `3`));
8961	case Intrinsic::riscv_vmv_s_x: {
8962	SDValue Scalar = Op.getOperand(i: `2`);
8963
8964	if (Scalar.getValueType().bitsLE(VT: XLenVT)) {
8965	Scalar = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Scalar);
8966	return DAG.getNode(Opcode: RISCVISD::VMV_S_X_VL, DL, VT: Op.getValueType(),
8967	N1: Op.getOperand(i: `1`), N2: Scalar, N3: Op.getOperand(i: `3`));
8968	}
8969
8970	assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
8971
8972	// This is an i64 value that lives in two scalar registers. We have to
8973	// insert this in a convoluted way. First we build vXi64 splat containing
8974	// the two values that we assemble using some bit math. Next we'll use
8975	// vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
8976	// to merge element 0 from our splat into the source vector.
8977	// FIXME: This is probably not the best way to do this, but it is
8978	// consistent with INSERT_VECTOR_ELT lowering so it is a good starting
8979	// point.
8980	// sw lo, (a0)
8981	// sw hi, 4(a0)
8982	// vlse vX, (a0)
8983	//
8984	// vid.v vVid
8985	// vmseq.vx mMask, vVid, 0
8986	// vmerge.vvm vDest, vSrc, vVal, mMask
8987	MVT VT = Op.getSimpleValueType();
8988	SDValue Vec = Op.getOperand(i: `1`);
8989	SDValue VL = getVLOperand(Op);
8990
8991	SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Passthru: SDValue (), Scalar, VL, DAG);
8992	if (Op.getOperand(i: `1`).isUndef())
8993	return SplattedVal;
8994	SDValue SplattedIdx =
8995	DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
8996	DAG.getConstant(`0`, DL, MVT::i32), VL);
8997
8998	MVT MaskVT = getMaskTypeFor(VecVT: VT);
8999	SDValue Mask = getAllOnesMask(VecVT: VT, VL, DL, DAG);
9000	SDValue VID = DAG.getNode(Opcode: RISCVISD::VID_VL, DL, VT, N1: Mask, N2: VL);
9001	SDValue SelectCond =
9002	DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: MaskVT,
9003	Ops: {VID, SplattedIdx, DAG.getCondCode(Cond: ISD::SETEQ),
9004	DAG.getUNDEF(VT: MaskVT), Mask, VL});
9005	return DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT, N1: SelectCond, N2: SplattedVal,
9006	N3: Vec, N4: DAG.getUNDEF(VT), N5: VL);
9007	}
9008	case Intrinsic::riscv_vfmv_s_f:
9009	return DAG.getNode(Opcode: RISCVISD::VFMV_S_F_VL, DL, VT: Op.getSimpleValueType(),
9010	N1: Op.getOperand(i: `1`), N2: Op.getOperand(i: `2`), N3: Op.getOperand(i: `3`));
9011	// EGS EEW >= 128 bits*
9012	case Intrinsic::riscv_vaesdf_vv:
9013	case Intrinsic::riscv_vaesdf_vs:
9014	case Intrinsic::riscv_vaesdm_vv:
9015	case Intrinsic::riscv_vaesdm_vs:
9016	case Intrinsic::riscv_vaesef_vv:
9017	case Intrinsic::riscv_vaesef_vs:
9018	case Intrinsic::riscv_vaesem_vv:
9019	case Intrinsic::riscv_vaesem_vs:
9020	case Intrinsic::riscv_vaeskf1:
9021	case Intrinsic::riscv_vaeskf2:
9022	case Intrinsic::riscv_vaesz_vs:
9023	case Intrinsic::riscv_vsm4k:
9024	case Intrinsic::riscv_vsm4r_vv:
9025	case Intrinsic::riscv_vsm4r_vs: {
9026	if (!isValidEGW(EGS: `4`, VT: Op.getSimpleValueType(), Subtarget) \|\|
9027	!isValidEGW(EGS: `4`, VT: Op ->getOperand(Num: `1`).getSimpleValueType(), Subtarget) \|\|
9028	!isValidEGW(EGS: `4`, VT: Op ->getOperand(Num: `2`).getSimpleValueType(), Subtarget))
9029	report_fatal_error(reason: "EGW should be greater than or equal to 4 * SEW.");
9030	return Op;
9031	}
9032	// EGS EEW >= 256 bits*
9033	case Intrinsic::riscv_vsm3c:
9034	case Intrinsic::riscv_vsm3me: {
9035	if (!isValidEGW(EGS: `8`, VT: Op.getSimpleValueType(), Subtarget) \|\|
9036	!isValidEGW(EGS: `8`, VT: Op ->getOperand(Num: `1`).getSimpleValueType(), Subtarget))
9037	report_fatal_error(reason: "EGW should be greater than or equal to 8 * SEW.");
9038	return Op;
9039	}
9040	// zvknha(SEW=32)/zvknhb(SEW=[32\|64])
9041	case Intrinsic::riscv_vsha2ch:
9042	case Intrinsic::riscv_vsha2cl:
9043	case Intrinsic::riscv_vsha2ms: {
9044	if (Op ->getSimpleValueType(ResNo: `0`).getScalarSizeInBits() == `64` &&
9045	!Subtarget.hasStdExtZvknhb())
9046	report_fatal_error(reason: "SEW=64 needs Zvknhb to be enabled.");
9047	if (!isValidEGW(EGS: `4`, VT: Op.getSimpleValueType(), Subtarget) \|\|
9048	!isValidEGW(EGS: `4`, VT: Op ->getOperand(Num: `1`).getSimpleValueType(), Subtarget) \|\|
9049	!isValidEGW(EGS: `4`, VT: Op ->getOperand(Num: `2`).getSimpleValueType(), Subtarget))
9050	report_fatal_error(reason: "EGW should be greater than or equal to 4 * SEW.");
9051	return Op;
9052	}
9053	case Intrinsic::riscv_sf_vc_v_x:
9054	case Intrinsic::riscv_sf_vc_v_i:
9055	case Intrinsic::riscv_sf_vc_v_xv:
9056	case Intrinsic::riscv_sf_vc_v_iv:
9057	case Intrinsic::riscv_sf_vc_v_vv:
9058	case Intrinsic::riscv_sf_vc_v_fv:
9059	case Intrinsic::riscv_sf_vc_v_xvv:
9060	case Intrinsic::riscv_sf_vc_v_ivv:
9061	case Intrinsic::riscv_sf_vc_v_vvv:
9062	case Intrinsic::riscv_sf_vc_v_fvv:
9063	case Intrinsic::riscv_sf_vc_v_xvw:
9064	case Intrinsic::riscv_sf_vc_v_ivw:
9065	case Intrinsic::riscv_sf_vc_v_vvw:
9066	case Intrinsic::riscv_sf_vc_v_fvw: {
9067	MVT VT = Op.getSimpleValueType();
9068
9069	SmallVector<SDValue> Operands{Op ->op_values()};
9070	processVCIXOperands(OrigOp&: Op, Operands, DAG);
9071
9072	MVT RetVT = VT;
9073	if (VT.isFixedLengthVector())
9074	RetVT = getContainerForFixedLengthVector(VT);
9075	else if (VT.isFloatingPoint())
9076	RetVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: VT.getScalarSizeInBits()),
9077	EC: VT.getVectorElementCount());
9078
9079	SDValue NewNode = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: RetVT, Ops: Operands);
9080
9081	if (VT.isFixedLengthVector())
9082	NewNode = convertFromScalableVector(VT, V: NewNode, DAG, Subtarget);
9083	else if (VT.isFloatingPoint())
9084	NewNode = DAG.getBitcast(VT, V: NewNode);
9085
9086	if (Op == NewNode)
9087	break;
9088
9089	return NewNode;
9090	}
9091	}
9092
9093	return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9094	}
9095
9096	static inline SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG,
9097	unsigned Type) {
9098	SDLoc DL(Op);
9099	SmallVector<SDValue> Operands{Op ->op_values()};
9100	Operands.erase(CI: Operands.begin() + `1`);
9101
9102	const RISCVSubtarget &Subtarget =
9103	DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
9104	MVT VT = Op.getSimpleValueType();
9105	MVT RetVT = VT;
9106	MVT FloatVT = VT;
9107
9108	if (VT.isFloatingPoint()) {
9109	RetVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: VT.getScalarSizeInBits()),
9110	EC: VT.getVectorElementCount());
9111	FloatVT = RetVT;
9112	}
9113	if (VT.isFixedLengthVector())
9114	RetVT = getContainerForFixedLengthVector(TLI: DAG.getTargetLoweringInfo(), VT: RetVT,
9115	Subtarget);
9116
9117	processVCIXOperands(OrigOp&: Op, Operands, DAG);
9118
9119	SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9120	SDValue NewNode = DAG.getNode(Opcode: Type, DL, VTList: VTs, Ops: Operands);
9121	SDValue Chain = NewNode.getValue(R: `1`);
9122
9123	if (VT.isFixedLengthVector())
9124	NewNode = convertFromScalableVector(VT: FloatVT, V: NewNode, DAG, Subtarget);
9125	if (VT.isFloatingPoint())
9126	NewNode = DAG.getBitcast(VT, V: NewNode);
9127
9128	NewNode = DAG.getMergeValues(Ops: {NewNode, Chain}, dl: DL);
9129
9130	return NewNode;
9131	}
9132
9133	static inline SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG,
9134	unsigned Type) {
9135	SmallVector<SDValue> Operands{Op ->op_values()};
9136	Operands.erase(CI: Operands.begin() + `1`);
9137	processVCIXOperands(OrigOp&: Op, Operands, DAG);
9138
9139	return DAG.getNode(Opcode: Type, DL: SDLoc (Op), VT: Op.getValueType(), Ops: Operands);
9140	}
9141
9142	SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
9143	SelectionDAG &DAG) const {
9144	unsigned IntNo = Op.getConstantOperandVal(i: `1`);
9145	switch (IntNo) {
9146	default:
9147	break;
9148	case Intrinsic::riscv_masked_strided_load: {
9149	SDLoc DL(Op);
9150	MVT XLenVT = Subtarget.getXLenVT();
9151
9152	// If the mask is known to be all ones, optimize to an unmasked intrinsic;
9153	// the selection of the masked intrinsics doesn't do this for us.
9154	SDValue Mask = Op.getOperand(i: `5`);
9155	bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode());
9156
9157	MVT VT = Op ->getSimpleValueType(ResNo: `0`);
9158	MVT ContainerVT = VT;
9159	if (VT.isFixedLengthVector())
9160	ContainerVT = getContainerForFixedLengthVector(VT);
9161
9162	SDValue PassThru = Op.getOperand(i: `2`);
9163	if (!IsUnmasked) {
9164	MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
9165	if (VT.isFixedLengthVector()) {
9166	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
9167	PassThru = convertToScalableVector(VT: ContainerVT, V: PassThru, DAG, Subtarget);
9168	}
9169	}
9170
9171	auto *Load = cast<MemIntrinsicSDNode>(Val&: Op);
9172	SDValue VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second;
9173	SDValue Ptr = Op.getOperand(i: `3`);
9174	SDValue Stride = Op.getOperand(i: `4`);
9175	SDValue Result, Chain;
9176
9177	// TODO: We restrict this to unmasked loads currently in consideration of
9178	// the complexity of handling all falses masks.
9179	MVT ScalarVT = ContainerVT.getVectorElementType();
9180	if (IsUnmasked && isNullConstant(V: Stride) && ContainerVT.isInteger()) {
9181	SDValue ScalarLoad =
9182	DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl: DL, VT: XLenVT, Chain: Load->getChain(), Ptr,
9183	MemVT: ScalarVT, MMO: Load->getMemOperand());
9184	Chain = ScalarLoad.getValue(R: `1`);
9185	Result = lowerScalarSplat(Passthru: SDValue (), Scalar: ScalarLoad, VL, VT: ContainerVT, DL, DAG,
9186	Subtarget);
9187	} else if (IsUnmasked && isNullConstant(V: Stride) && isTypeLegal(VT: ScalarVT)) {
9188	SDValue ScalarLoad = DAG.getLoad(VT: ScalarVT, dl: DL, Chain: Load->getChain(), Ptr,
9189	MMO: Load->getMemOperand());
9190	Chain = ScalarLoad.getValue(R: `1`);
9191	Result = DAG.getSplat(VT: ContainerVT, DL, Op: ScalarLoad);
9192	} else {
9193	SDValue IntID = DAG.getTargetConstant(
9194	IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
9195	XLenVT);
9196
9197	SmallVector<SDValue, `8`> Ops{Load->getChain(), IntID};
9198	if (IsUnmasked)
9199	Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT));
9200	else
9201	Ops.push_back(Elt: PassThru);
9202	Ops.push_back(Elt: Ptr);
9203	Ops.push_back(Elt: Stride);
9204	if (!IsUnmasked)
9205	Ops.push_back(Elt: Mask);
9206	Ops.push_back(Elt: VL);
9207	if (!IsUnmasked) {
9208	SDValue Policy =
9209	DAG.getTargetConstant(Val: RISCVII::TAIL_AGNOSTIC, DL, VT: XLenVT);
9210	Ops.push_back(Elt: Policy);
9211	}
9212
9213	SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
9214	Result =
9215	DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops,
9216	MemVT: Load->getMemoryVT(), MMO: Load->getMemOperand());
9217	Chain = Result.getValue(R: `1`);
9218	}
9219	if (VT.isFixedLengthVector())
9220	Result = convertFromScalableVector(VT, V: Result, DAG, Subtarget);
9221	return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
9222	}
9223	case Intrinsic::riscv_seg2_load:
9224	case Intrinsic::riscv_seg3_load:
9225	case Intrinsic::riscv_seg4_load:
9226	case Intrinsic::riscv_seg5_load:
9227	case Intrinsic::riscv_seg6_load:
9228	case Intrinsic::riscv_seg7_load:
9229	case Intrinsic::riscv_seg8_load: {
9230	SDLoc DL(Op);
9231	static const Intrinsic::ID VlsegInts[`7`] = {
9232	Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
9233	Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
9234	Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
9235	Intrinsic::riscv_vlseg8};
9236	unsigned NF = Op ->getNumValues() - `1`;
9237	assert(NF >= `2` && NF <= `8` && "Unexpected seg number");
9238	MVT XLenVT = Subtarget.getXLenVT();
9239	MVT VT = Op ->getSimpleValueType(ResNo: `0`);
9240	MVT ContainerVT = getContainerForFixedLengthVector(VT);
9241
9242	SDValue VL = getVLOp(NumElts: VT.getVectorNumElements(), ContainerVT, DL, DAG,
9243	Subtarget);
9244	SDValue IntID = DAG.getTargetConstant(Val: VlsegInts[NF - `2`], DL, VT: XLenVT);
9245	auto *Load = cast<MemIntrinsicSDNode>(Val&: Op);
9246	SmallVector<EVT, `9`> ContainerVTs(NF, ContainerVT);
9247	ContainerVTs.push_back(MVT::Other);
9248	SDVTList VTs = DAG.getVTList(VTs: ContainerVTs);
9249	SmallVector<SDValue, `12`> Ops = {Load->getChain(), IntID};
9250	Ops.insert(I: Ops.end(), NumToInsert: NF, Elt: DAG.getUNDEF(VT: ContainerVT));
9251	Ops.push_back(Elt: Op.getOperand(i: `2`));
9252	Ops.push_back(Elt: VL);
9253	SDValue Result =
9254	DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops,
9255	MemVT: Load->getMemoryVT(), MMO: Load->getMemOperand());
9256	SmallVector<SDValue, `9`> Results;
9257	for (unsigned int RetIdx = `0`; RetIdx < NF; RetIdx++)
9258	Results.push_back(Elt: convertFromScalableVector(VT, V: Result.getValue(R: RetIdx),
9259	DAG, Subtarget));
9260	Results.push_back(Elt: Result.getValue(R: NF));
9261	return DAG.getMergeValues(Ops: Results, dl: DL);
9262	}
9263	case Intrinsic::riscv_sf_vc_v_x_se:
9264	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_X_SE);
9265	case Intrinsic::riscv_sf_vc_v_i_se:
9266	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_I_SE);
9267	case Intrinsic::riscv_sf_vc_v_xv_se:
9268	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_XV_SE);
9269	case Intrinsic::riscv_sf_vc_v_iv_se:
9270	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_IV_SE);
9271	case Intrinsic::riscv_sf_vc_v_vv_se:
9272	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_VV_SE);
9273	case Intrinsic::riscv_sf_vc_v_fv_se:
9274	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_FV_SE);
9275	case Intrinsic::riscv_sf_vc_v_xvv_se:
9276	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_XVV_SE);
9277	case Intrinsic::riscv_sf_vc_v_ivv_se:
9278	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_IVV_SE);
9279	case Intrinsic::riscv_sf_vc_v_vvv_se:
9280	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_VVV_SE);
9281	case Intrinsic::riscv_sf_vc_v_fvv_se:
9282	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_FVV_SE);
9283	case Intrinsic::riscv_sf_vc_v_xvw_se:
9284	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_XVW_SE);
9285	case Intrinsic::riscv_sf_vc_v_ivw_se:
9286	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_IVW_SE);
9287	case Intrinsic::riscv_sf_vc_v_vvw_se:
9288	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_VVW_SE);
9289	case Intrinsic::riscv_sf_vc_v_fvw_se:
9290	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_FVW_SE);
9291	}
9292
9293	return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9294	}
9295
9296	SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9297	SelectionDAG &DAG) const {
9298	unsigned IntNo = Op.getConstantOperandVal(i: `1`);
9299	switch (IntNo) {
9300	default:
9301	break;
9302	case Intrinsic::riscv_masked_strided_store: {
9303	SDLoc DL(Op);
9304	MVT XLenVT = Subtarget.getXLenVT();
9305
9306	// If the mask is known to be all ones, optimize to an unmasked intrinsic;
9307	// the selection of the masked intrinsics doesn't do this for us.
9308	SDValue Mask = Op.getOperand(i: `5`);
9309	bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode());
9310
9311	SDValue Val = Op.getOperand(i: `2`);
9312	MVT VT = Val.getSimpleValueType();
9313	MVT ContainerVT = VT;
9314	if (VT.isFixedLengthVector()) {
9315	ContainerVT = getContainerForFixedLengthVector(VT);
9316	Val = convertToScalableVector(VT: ContainerVT, V: Val, DAG, Subtarget);
9317	}
9318	if (!IsUnmasked) {
9319	MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
9320	if (VT.isFixedLengthVector())
9321	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
9322	}
9323
9324	SDValue VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second;
9325
9326	SDValue IntID = DAG.getTargetConstant(
9327	IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
9328	XLenVT);
9329
9330	auto *Store = cast<MemIntrinsicSDNode>(Val&: Op);
9331	SmallVector<SDValue, `8`> Ops{Store->getChain(), IntID};
9332	Ops.push_back(Elt: Val);
9333	Ops.push_back(Elt: Op.getOperand(i: `3`)); // Ptr
9334	Ops.push_back(Elt: Op.getOperand(i: `4`)); // Stride
9335	if (!IsUnmasked)
9336	Ops.push_back(Elt: Mask);
9337	Ops.push_back(Elt: VL);
9338
9339	return DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl: DL, VTList: Store->getVTList(),
9340	Ops, MemVT: Store->getMemoryVT(),
9341	MMO: Store->getMemOperand());
9342	}
9343	case Intrinsic::riscv_seg2_store:
9344	case Intrinsic::riscv_seg3_store:
9345	case Intrinsic::riscv_seg4_store:
9346	case Intrinsic::riscv_seg5_store:
9347	case Intrinsic::riscv_seg6_store:
9348	case Intrinsic::riscv_seg7_store:
9349	case Intrinsic::riscv_seg8_store: {
9350	SDLoc DL(Op);
9351	static const Intrinsic::ID VssegInts[] = {
9352	Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
9353	Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
9354	Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
9355	Intrinsic::riscv_vsseg8};
9356	// Operands are (chain, int_id, vec, ptr, vl)*
9357	unsigned NF = Op ->getNumOperands() - `4`;
9358	assert(NF >= `2` && NF <= `8` && "Unexpected seg number");
9359	MVT XLenVT = Subtarget.getXLenVT();
9360	MVT VT = Op ->getOperand(Num: `2`).getSimpleValueType();
9361	MVT ContainerVT = getContainerForFixedLengthVector(VT);
9362
9363	SDValue VL = getVLOp(NumElts: VT.getVectorNumElements(), ContainerVT, DL, DAG,
9364	Subtarget);
9365	SDValue IntID = DAG.getTargetConstant(Val: VssegInts[NF - `2`], DL, VT: XLenVT);
9366	SDValue Ptr = Op ->getOperand(Num: NF + `2`);
9367
9368	auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Val&: Op);
9369	SmallVector<SDValue, `12`> Ops = {FixedIntrinsic->getChain(), IntID};
9370	for (unsigned i = `0`; i < NF; i++)
9371	Ops.push_back(Elt: convertToScalableVector(
9372	VT: ContainerVT, V: FixedIntrinsic->getOperand(Num: `2` + i), DAG, Subtarget));
9373	Ops.append(IL: {Ptr, VL});
9374
9375	return DAG.getMemIntrinsicNode(
9376	ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
9377	FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
9378	}
9379	case Intrinsic::riscv_sf_vc_xv_se:
9380	return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_XV_SE);
9381	case Intrinsic::riscv_sf_vc_iv_se:
9382	return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_IV_SE);
9383	case Intrinsic::riscv_sf_vc_vv_se:
9384	return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_VV_SE);
9385	case Intrinsic::riscv_sf_vc_fv_se:
9386	return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_FV_SE);
9387	case Intrinsic::riscv_sf_vc_xvv_se:
9388	return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_XVV_SE);
9389	case Intrinsic::riscv_sf_vc_ivv_se:
9390	return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_IVV_SE);
9391	case Intrinsic::riscv_sf_vc_vvv_se:
9392	return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_VVV_SE);
9393	case Intrinsic::riscv_sf_vc_fvv_se:
9394	return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_FVV_SE);
9395	case Intrinsic::riscv_sf_vc_xvw_se:
9396	return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_XVW_SE);
9397	case Intrinsic::riscv_sf_vc_ivw_se:
9398	return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_IVW_SE);
9399	case Intrinsic::riscv_sf_vc_vvw_se:
9400	return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_VVW_SE);
9401	case Intrinsic::riscv_sf_vc_fvw_se:
9402	return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_FVW_SE);
9403	}
9404
9405	return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9406	}
9407
9408	static unsigned getRVVReductionOp(unsigned ISDOpcode) {
9409	switch (ISDOpcode) {
9410	default:
9411	llvm_unreachable("Unhandled reduction");
9412	case ISD::VP_REDUCE_ADD:
9413	case ISD::VECREDUCE_ADD:
9414	return RISCVISD::VECREDUCE_ADD_VL;
9415	case ISD::VP_REDUCE_UMAX:
9416	case ISD::VECREDUCE_UMAX:
9417	return RISCVISD::VECREDUCE_UMAX_VL;
9418	case ISD::VP_REDUCE_SMAX:
9419	case ISD::VECREDUCE_SMAX:
9420	return RISCVISD::VECREDUCE_SMAX_VL;
9421	case ISD::VP_REDUCE_UMIN:
9422	case ISD::VECREDUCE_UMIN:
9423	return RISCVISD::VECREDUCE_UMIN_VL;
9424	case ISD::VP_REDUCE_SMIN:
9425	case ISD::VECREDUCE_SMIN:
9426	return RISCVISD::VECREDUCE_SMIN_VL;
9427	case ISD::VP_REDUCE_AND:
9428	case ISD::VECREDUCE_AND:
9429	return RISCVISD::VECREDUCE_AND_VL;
9430	case ISD::VP_REDUCE_OR:
9431	case ISD::VECREDUCE_OR:
9432	return RISCVISD::VECREDUCE_OR_VL;
9433	case ISD::VP_REDUCE_XOR:
9434	case ISD::VECREDUCE_XOR:
9435	return RISCVISD::VECREDUCE_XOR_VL;
9436	case ISD::VP_REDUCE_FADD:
9437	return RISCVISD::VECREDUCE_FADD_VL;
9438	case ISD::VP_REDUCE_SEQ_FADD:
9439	return RISCVISD::VECREDUCE_SEQ_FADD_VL;
9440	case ISD::VP_REDUCE_FMAX:
9441	return RISCVISD::VECREDUCE_FMAX_VL;
9442	case ISD::VP_REDUCE_FMIN:
9443	return RISCVISD::VECREDUCE_FMIN_VL;
9444	}
9445
9446	}
9447
9448	SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
9449	SelectionDAG &DAG,
9450	bool IsVP) const {
9451	SDLoc DL(Op);
9452	SDValue Vec = Op.getOperand(i: IsVP ? `1` : `0`);
9453	MVT VecVT = Vec.getSimpleValueType();
9454	assert((Op.getOpcode() == ISD::VECREDUCE_AND \|\|
9455	Op.getOpcode() == ISD::VECREDUCE_OR \|\|
9456	Op.getOpcode() == ISD::VECREDUCE_XOR \|\|
9457	Op.getOpcode() == ISD::VP_REDUCE_AND \|\|
9458	Op.getOpcode() == ISD::VP_REDUCE_OR \|\|
9459	Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
9460	"Unexpected reduction lowering");
9461
9462	MVT XLenVT = Subtarget.getXLenVT();
9463
9464	MVT ContainerVT = VecVT;
9465	if (VecVT.isFixedLengthVector()) {
9466	ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
9467	Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
9468	}
9469
9470	SDValue Mask, VL;
9471	if (IsVP) {
9472	Mask = Op.getOperand(i: `2`);
9473	VL = Op.getOperand(i: `3`);
9474	} else {
9475	std::tie(args&: Mask, args&: VL) =
9476	getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9477	}
9478
9479	unsigned BaseOpc;
9480	ISD::CondCode CC;
9481	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: XLenVT);
9482
9483	switch (Op.getOpcode()) {
9484	default:
9485	llvm_unreachable("Unhandled reduction");
9486	case ISD::VECREDUCE_AND:
9487	case ISD::VP_REDUCE_AND: {
9488	// vcpop ~x == 0
9489	SDValue TrueMask = DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT: ContainerVT, Operand: VL);
9490	Vec = DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Vec, N2: TrueMask, N3: VL);
9491	Vec = DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: Vec, N2: Mask, N3: VL);
9492	CC = ISD::SETEQ;
9493	BaseOpc = ISD::AND;
9494	break;
9495	}
9496	case ISD::VECREDUCE_OR:
9497	case ISD::VP_REDUCE_OR:
9498	// vcpop x != 0
9499	Vec = DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: Vec, N2: Mask, N3: VL);
9500	CC = ISD::SETNE;
9501	BaseOpc = ISD::OR;
9502	break;
9503	case ISD::VECREDUCE_XOR:
9504	case ISD::VP_REDUCE_XOR: {
9505	// ((vcpop x) & 1) != 0
9506	SDValue One = DAG.getConstant(Val: `1`, DL, VT: XLenVT);
9507	Vec = DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: Vec, N2: Mask, N3: VL);
9508	Vec = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: Vec, N2: One);
9509	CC = ISD::SETNE;
9510	BaseOpc = ISD::XOR;
9511	break;
9512	}
9513	}
9514
9515	SDValue SetCC = DAG.getSetCC(DL, VT: XLenVT, LHS: Vec, RHS: Zero, Cond: CC);
9516	SetCC = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Op.getValueType(), Operand: SetCC);
9517
9518	if (!IsVP)
9519	return SetCC;
9520
9521	// Now include the start value in the operation.
9522	// Note that we must return the start value when no elements are operated
9523	// upon. The vcpop instructions we've emitted in each case above will return
9524	// 0 for an inactive vector, and so we've already received the neutral value:
9525	// AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
9526	// can simply include the start value.
9527	return DAG.getNode(Opcode: BaseOpc, DL, VT: Op.getValueType(), N1: SetCC, N2: Op.getOperand(i: `0`));
9528	}
9529
9530	static bool isNonZeroAVL(SDValue AVL) {
9531	auto *RegisterAVL = dyn_cast<RegisterSDNode>(Val&: AVL);
9532	auto *ImmAVL = dyn_cast<ConstantSDNode>(Val&: AVL);
9533	return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) \|\|
9534	(ImmAVL && ImmAVL->getZExtValue() >= `1`);
9535	}
9536
9537	/// Helper to lower a reduction sequence of the form:
9538	/// scalar = reduce_op vec, scalar_start
9539	static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
9540	SDValue StartValue, SDValue Vec, SDValue Mask,
9541	SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
9542	const RISCVSubtarget &Subtarget) {
9543	const MVT VecVT = Vec.getSimpleValueType();
9544	const MVT M1VT = getLMUL1VT(VT: VecVT);
9545	const MVT XLenVT = Subtarget.getXLenVT();
9546	const bool NonZeroAVL = isNonZeroAVL(AVL: VL);
9547
9548	// The reduction needs an LMUL1 input; do the splat at either LMUL1
9549	// or the original VT if fractional.
9550	auto InnerVT = VecVT.bitsLE(VT: M1VT) ? VecVT : M1VT;
9551	// We reuse the VL of the reduction to reduce vsetvli toggles if we can
9552	// prove it is non-zero. For the AVL=0 case, we need the scalar to
9553	// be the result of the reduction operation.
9554	auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(Val: `1`, DL, VT: XLenVT);
9555	SDValue InitialValue = lowerScalarInsert(Scalar: StartValue, VL: InnerVL, VT: InnerVT, DL,
9556	DAG, Subtarget);
9557	if (M1VT != InnerVT)
9558	InitialValue =
9559	DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: M1VT, N1: DAG.getUNDEF(VT: M1VT),
9560	N2: InitialValue, N3: DAG.getVectorIdxConstant(Val: `0`, DL));
9561	SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(VT: M1VT) : InitialValue;
9562	SDValue Policy = DAG.getTargetConstant(Val: RISCVII::TAIL_AGNOSTIC, DL, VT: XLenVT);
9563	SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
9564	SDValue Reduction = DAG.getNode(Opcode: RVVOpcode, DL, VT: M1VT, Ops);
9565	return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: ResVT, N1: Reduction,
9566	N2: DAG.getVectorIdxConstant(Val: `0`, DL));
9567	}
9568
9569	SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
9570	SelectionDAG &DAG) const {
9571	SDLoc DL(Op);
9572	SDValue Vec = Op.getOperand(i: `0`);
9573	EVT VecEVT = Vec.getValueType();
9574
9575	unsigned BaseOpc = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Op.getOpcode());
9576
9577	// Due to ordering in legalize types we may have a vector type that needs to
9578	// be split. Do that manually so we can get down to a legal type.
9579	while (getTypeAction(Context&: *DAG.getContext(), VT: VecEVT) ==
9580	TargetLowering::TypeSplitVector) {
9581	auto [Lo, Hi] = DAG.SplitVector(N: Vec, DL);
9582	VecEVT = Lo.getValueType();
9583	Vec = DAG.getNode(Opcode: BaseOpc, DL, VT: VecEVT, N1: Lo, N2: Hi);
9584	}
9585
9586	// TODO: The type may need to be widened rather than split. Or widened before
9587	// it can be split.
9588	if (!isTypeLegal(VT: VecEVT))
9589	return SDValue ();
9590
9591	MVT VecVT = VecEVT.getSimpleVT();
9592	MVT VecEltVT = VecVT.getVectorElementType();
9593	unsigned RVVOpcode = getRVVReductionOp(ISDOpcode: Op.getOpcode());
9594
9595	MVT ContainerVT = VecVT;
9596	if (VecVT.isFixedLengthVector()) {
9597	ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
9598	Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
9599	}
9600
9601	auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9602
9603	SDValue StartV = DAG.getNeutralElement(Opcode: BaseOpc, DL, VT: VecEltVT, Flags: SDNodeFlags ());
9604	switch (BaseOpc) {
9605	case ISD::AND:
9606	case ISD::OR:
9607	case ISD::UMAX:
9608	case ISD::UMIN:
9609	case ISD::SMAX:
9610	case ISD::SMIN:
9611	StartV = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: VecEltVT, N1: Vec,
9612	N2: DAG.getVectorIdxConstant(Val: `0`, DL));
9613	}
9614	return lowerReductionSeq(RVVOpcode, ResVT: Op.getSimpleValueType(), StartValue: StartV, Vec,
9615	Mask, VL, DL, DAG, Subtarget);
9616	}
9617
9618	// Given a reduction op, this function returns the matching reduction opcode,
9619	// the vector SDValue and the scalar SDValue required to lower this to a
9620	// RISCVISD node.
9621	static std::tuple<unsigned, SDValue, SDValue>
9622	getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT,
9623	const RISCVSubtarget &Subtarget) {
9624	SDLoc DL(Op);
9625	auto Flags = Op ->getFlags();
9626	unsigned Opcode = Op.getOpcode();
9627	switch (Opcode) {
9628	default:
9629	llvm_unreachable("Unhandled reduction");
9630	case ISD::VECREDUCE_FADD: {
9631	// Use positive zero if we can. It is cheaper to materialize.
9632	SDValue Zero =
9633	DAG.getConstantFP(Val: Flags.hasNoSignedZeros() ? `0.0` : -`0.0`, DL, VT: EltVT);
9634	return std::make_tuple(args: RISCVISD::VECREDUCE_FADD_VL, args: Op.getOperand(i: `0`), args&: Zero);
9635	}
9636	case ISD::VECREDUCE_SEQ_FADD:
9637	return std::make_tuple(args: RISCVISD::VECREDUCE_SEQ_FADD_VL, args: Op.getOperand(i: `1`),
9638	args: Op.getOperand(i: `0`));
9639	case ISD::VECREDUCE_FMINIMUM:
9640	case ISD::VECREDUCE_FMAXIMUM:
9641	case ISD::VECREDUCE_FMIN:
9642	case ISD::VECREDUCE_FMAX: {
9643	SDValue Front =
9644	DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: Op.getOperand(i: `0`),
9645	N2: DAG.getVectorIdxConstant(Val: `0`, DL));
9646	unsigned RVVOpc =
9647	(Opcode == ISD::VECREDUCE_FMIN \|\| Opcode == ISD::VECREDUCE_FMINIMUM)
9648	? RISCVISD::VECREDUCE_FMIN_VL
9649	: RISCVISD::VECREDUCE_FMAX_VL;
9650	return std::make_tuple(args&: RVVOpc, args: Op.getOperand(i: `0`), args&: Front);
9651	}
9652	}
9653	}
9654
9655	SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
9656	SelectionDAG &DAG) const {
9657	SDLoc DL(Op);
9658	MVT VecEltVT = Op.getSimpleValueType();
9659
9660	unsigned RVVOpcode;
9661	SDValue VectorVal, ScalarVal;
9662	std::tie(args&: RVVOpcode, args&: VectorVal, args&: ScalarVal) =
9663	getRVVFPReductionOpAndOperands(Op, DAG, EltVT: VecEltVT, Subtarget);
9664	MVT VecVT = VectorVal.getSimpleValueType();
9665
9666	MVT ContainerVT = VecVT;
9667	if (VecVT.isFixedLengthVector()) {
9668	ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
9669	VectorVal = convertToScalableVector(VT: ContainerVT, V: VectorVal, DAG, Subtarget);
9670	}
9671
9672	MVT ResVT = Op.getSimpleValueType();
9673	auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9674	SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, StartValue: ScalarVal, Vec: VectorVal, Mask,
9675	VL, DL, DAG, Subtarget);
9676	if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
9677	Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
9678	return Res;
9679
9680	if (Op ->getFlags().hasNoNaNs())
9681	return Res;
9682
9683	// Force output to NaN if any element is Nan.
9684	SDValue IsNan =
9685	DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: Mask.getValueType(),
9686	Ops: {VectorVal, VectorVal, DAG.getCondCode(Cond: ISD::SETNE),
9687	DAG.getUNDEF(VT: Mask.getValueType()), Mask, VL});
9688	MVT XLenVT = Subtarget.getXLenVT();
9689	SDValue CPop = DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: IsNan, N2: Mask, N3: VL);
9690	SDValue NoNaNs = DAG.getSetCC(DL, VT: XLenVT, LHS: CPop,
9691	RHS: DAG.getConstant(Val: `0`, DL, VT: XLenVT), Cond: ISD::SETEQ);
9692	return DAG.getSelect(
9693	DL, VT: ResVT, Cond: NoNaNs, LHS: Res,
9694	RHS: DAG.getConstantFP(Val: APFloat::getNaN(Sem: DAG.EVTToAPFloatSemantics(VT: ResVT)), DL,
9695	VT: ResVT));
9696	}
9697
9698	SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
9699	SelectionDAG &DAG) const {
9700	SDLoc DL(Op);
9701	SDValue Vec = Op.getOperand(i: `1`);
9702	EVT VecEVT = Vec.getValueType();
9703
9704	// TODO: The type may need to be widened rather than split. Or widened before
9705	// it can be split.
9706	if (!isTypeLegal(VT: VecEVT))
9707	return SDValue ();
9708
9709	MVT VecVT = VecEVT.getSimpleVT();
9710	unsigned RVVOpcode = getRVVReductionOp(ISDOpcode: Op.getOpcode());
9711
9712	if (VecVT.isFixedLengthVector()) {
9713	auto ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
9714	Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
9715	}
9716
9717	SDValue VL = Op.getOperand(i: `3`);
9718	SDValue Mask = Op.getOperand(i: `2`);
9719	return lowerReductionSeq(RVVOpcode, ResVT: Op.getSimpleValueType(), StartValue: Op.getOperand(i: `0`),
9720	Vec, Mask, VL, DL, DAG, Subtarget);
9721	}
9722
9723	SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9724	SelectionDAG &DAG) const {
9725	SDValue Vec = Op.getOperand(i: `0`);
9726	SDValue SubVec = Op.getOperand(i: `1`);
9727	MVT VecVT = Vec.getSimpleValueType();
9728	MVT SubVecVT = SubVec.getSimpleValueType();
9729
9730	SDLoc DL(Op);
9731	MVT XLenVT = Subtarget.getXLenVT();
9732	unsigned OrigIdx = Op.getConstantOperandVal(i: `2`);
9733	const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9734
9735	// We don't have the ability to slide mask vectors up indexed by their i1
9736	// elements; the smallest we can do is i8. Often we are able to bitcast to
9737	// equivalent i8 vectors. Note that when inserting a fixed-length vector
9738	// into a scalable one, we might not necessarily have enough scalable
9739	// elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
9740	if (SubVecVT.getVectorElementType() == MVT::i1 &&
9741	(OrigIdx != `0` \|\| !Vec.isUndef())) {
9742	if (VecVT.getVectorMinNumElements() >= `8` &&
9743	SubVecVT.getVectorMinNumElements() >= `8`) {
9744	assert(OrigIdx % `8` == `0` && "Invalid index");
9745	assert(VecVT.getVectorMinNumElements() % `8` == `0` &&
9746	SubVecVT.getVectorMinNumElements() % `8` == `0` &&
9747	"Unexpected mask vector lowering");
9748	OrigIdx /= `8`;
9749	SubVecVT =
9750	MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / `8`,
9751	SubVecVT.isScalableVector());
9752	VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / `8`,
9753	VecVT.isScalableVector());
9754	Vec = DAG.getBitcast(VT: VecVT, V: Vec);
9755	SubVec = DAG.getBitcast(VT: SubVecVT, V: SubVec);
9756	} else {
9757	// We can't slide this mask vector up indexed by its i1 elements.
9758	// This poses a problem when we wish to insert a scalable vector which
9759	// can't be re-expressed as a larger type. Just choose the slow path and
9760	// extend to a larger type, then truncate back down.
9761	MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9762	MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9763	Vec = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtVecVT, Operand: Vec);
9764	SubVec = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtSubVecVT, Operand: SubVec);
9765	Vec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ExtVecVT, N1: Vec, N2: SubVec,
9766	N3: Op.getOperand(i: `2`));
9767	SDValue SplatZero = DAG.getConstant(Val: `0`, DL, VT: ExtVecVT);
9768	return DAG.getSetCC(DL, VT: VecVT, LHS: Vec, RHS: SplatZero, Cond: ISD::SETNE);
9769	}
9770	}
9771
9772	// If the subvector vector is a fixed-length type, we cannot use subregister
9773	// manipulation to simplify the codegen; we don't know which register of a
9774	// LMUL group contains the specific subvector as we only know the minimum
9775	// register size. Therefore we must slide the vector group up the full
9776	// amount.
9777	if (SubVecVT.isFixedLengthVector()) {
9778	if (OrigIdx == `0` && Vec.isUndef() && !VecVT.isFixedLengthVector())
9779	return Op;
9780	MVT ContainerVT = VecVT;
9781	if (VecVT.isFixedLengthVector()) {
9782	ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
9783	Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
9784	}
9785
9786	if (OrigIdx == `0` && Vec.isUndef() && VecVT.isFixedLengthVector()) {
9787	SubVec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ContainerVT,
9788	N1: DAG.getUNDEF(VT: ContainerVT), N2: SubVec,
9789	N3: DAG.getVectorIdxConstant(Val: `0`, DL));
9790	SubVec = convertFromScalableVector(VT: VecVT, V: SubVec, DAG, Subtarget);
9791	return DAG.getBitcast(VT: Op.getValueType(), V: SubVec);
9792	}
9793
9794	SubVec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ContainerVT,
9795	N1: DAG.getUNDEF(VT: ContainerVT), N2: SubVec,
9796	N3: DAG.getVectorIdxConstant(Val: `0`, DL));
9797	SDValue Mask =
9798	getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9799	// Set the vector length to only the number of elements we care about. Note
9800	// that for slideup this includes the offset.
9801	unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
9802	SDValue VL = getVLOp(NumElts: EndIndex, ContainerVT, DL, DAG, Subtarget);
9803
9804	// Use tail agnostic policy if we're inserting over Vec's tail.
9805	unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
9806	if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
9807	Policy = RISCVII::TAIL_AGNOSTIC;
9808
9809	// If we're inserting into the lowest elements, use a tail undisturbed
9810	// vmv.v.v.
9811	if (OrigIdx == `0`) {
9812	SubVec =
9813	DAG.getNode(Opcode: RISCVISD::VMV_V_V_VL, DL, VT: ContainerVT, N1: Vec, N2: SubVec, N3: VL);
9814	} else {
9815	SDValue SlideupAmt = DAG.getConstant(Val: OrigIdx, DL, VT: XLenVT);
9816	SubVec = getVSlideup(DAG, Subtarget, DL, VT: ContainerVT, Merge: Vec, Op: SubVec,
9817	Offset: SlideupAmt, Mask, VL, Policy);
9818	}
9819
9820	if (VecVT.isFixedLengthVector())
9821	SubVec = convertFromScalableVector(VT: VecVT, V: SubVec, DAG, Subtarget);
9822	return DAG.getBitcast(VT: Op.getValueType(), V: SubVec);
9823	}
9824
9825	unsigned SubRegIdx, RemIdx;
9826	std::tie(args&: SubRegIdx, args&: RemIdx) =
9827	RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9828	VecVT, SubVecVT, InsertExtractIdx: OrigIdx, TRI);
9829
9830	RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(VT: SubVecVT);
9831	bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 \|\|
9832	SubVecLMUL == RISCVII::VLMUL::LMUL_F4 \|\|
9833	SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
9834
9835	// 1. If the Idx has been completely eliminated and this subvector's size is
9836	// a vector register or a multiple thereof, or the surrounding elements are
9837	// undef, then this is a subvector insert which naturally aligns to a vector
9838	// register. These can easily be handled using subregister manipulation.
9839	// 2. If the subvector is smaller than a vector register, then the insertion
9840	// must preserve the undisturbed elements of the register. We do this by
9841	// lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
9842	// (which resolves to a subregister copy), performing a VSLIDEUP to place the
9843	// subvector within the vector register, and an INSERT_SUBVECTOR of that
9844	// LMUL=1 type back into the larger vector (resolving to another subregister
9845	// operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
9846	// to avoid allocating a large register group to hold our subvector.
9847	if (RemIdx == `0` && (!IsSubVecPartReg \|\| Vec.isUndef()))
9848	return Op;
9849
9850	// VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
9851	// OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
9852	// (in our case undisturbed). This means we can set up a subvector insertion
9853	// where OFFSET is the insertion offset, and the VL is the OFFSET plus the
9854	// size of the subvector.
9855	MVT InterSubVT = VecVT;
9856	SDValue AlignedExtract = Vec;
9857	unsigned AlignedIdx = OrigIdx - RemIdx;
9858	if (VecVT.bitsGT(VT: getLMUL1VT(VT: VecVT))) {
9859	InterSubVT = getLMUL1VT(VT: VecVT);
9860	// Extract a subvector equal to the nearest full vector register type. This
9861	// should resolve to a EXTRACT_SUBREG instruction.
9862	AlignedExtract = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: InterSubVT, N1: Vec,
9863	N2: DAG.getVectorIdxConstant(Val: AlignedIdx, DL));
9864	}
9865
9866	SubVec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: InterSubVT,
9867	N1: DAG.getUNDEF(VT: InterSubVT), N2: SubVec,
9868	N3: DAG.getVectorIdxConstant(Val: `0`, DL));
9869
9870	auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
9871
9872	ElementCount EndIndex =
9873	ElementCount::getScalable(MinVal: RemIdx) + SubVecVT.getVectorElementCount();
9874	VL = computeVLMax(VecVT: SubVecVT, DL, DAG);
9875
9876	// Use tail agnostic policy if we're inserting over InterSubVT's tail.
9877	unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
9878	if (EndIndex == InterSubVT.getVectorElementCount())
9879	Policy = RISCVII::TAIL_AGNOSTIC;
9880
9881	// If we're inserting into the lowest elements, use a tail undisturbed
9882	// vmv.v.v.
9883	if (RemIdx == `0`) {
9884	SubVec = DAG.getNode(Opcode: RISCVISD::VMV_V_V_VL, DL, VT: InterSubVT, N1: AlignedExtract,
9885	N2: SubVec, N3: VL);
9886	} else {
9887	SDValue SlideupAmt =
9888	DAG.getVScale(DL, VT: XLenVT, MulImm: APInt (XLenVT.getSizeInBits(), RemIdx));
9889
9890	// Construct the vector length corresponding to RemIdx + length(SubVecVT).
9891	VL = DAG.getNode(Opcode: ISD::ADD, DL, VT: XLenVT, N1: SlideupAmt, N2: VL);
9892
9893	SubVec = getVSlideup(DAG, Subtarget, DL, VT: InterSubVT, Merge: AlignedExtract, Op: SubVec,
9894	Offset: SlideupAmt, Mask, VL, Policy);
9895	}
9896
9897	// If required, insert this subvector back into the correct vector register.
9898	// This should resolve to an INSERT_SUBREG instruction.
9899	if (VecVT.bitsGT(VT: InterSubVT))
9900	SubVec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: VecVT, N1: Vec, N2: SubVec,
9901	N3: DAG.getVectorIdxConstant(Val: AlignedIdx, DL));
9902
9903	// We might have bitcast from a mask type: cast back to the original type if
9904	// required.
9905	return DAG.getBitcast(VT: Op.getSimpleValueType(), V: SubVec);
9906	}
9907
9908	SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
9909	SelectionDAG &DAG) const {
9910	SDValue Vec = Op.getOperand(i: `0`);
9911	MVT SubVecVT = Op.getSimpleValueType();
9912	MVT VecVT = Vec.getSimpleValueType();
9913
9914	SDLoc DL(Op);
9915	MVT XLenVT = Subtarget.getXLenVT();
9916	unsigned OrigIdx = Op.getConstantOperandVal(i: `1`);
9917	const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9918
9919	// We don't have the ability to slide mask vectors down indexed by their i1
9920	// elements; the smallest we can do is i8. Often we are able to bitcast to
9921	// equivalent i8 vectors. Note that when extracting a fixed-length vector
9922	// from a scalable one, we might not necessarily have enough scalable
9923	// elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
9924	if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != `0`) {
9925	if (VecVT.getVectorMinNumElements() >= `8` &&
9926	SubVecVT.getVectorMinNumElements() >= `8`) {
9927	assert(OrigIdx % `8` == `0` && "Invalid index");
9928	assert(VecVT.getVectorMinNumElements() % `8` == `0` &&
9929	SubVecVT.getVectorMinNumElements() % `8` == `0` &&
9930	"Unexpected mask vector lowering");
9931	OrigIdx /= `8`;
9932	SubVecVT =
9933	MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / `8`,
9934	SubVecVT.isScalableVector());
9935	VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / `8`,
9936	VecVT.isScalableVector());
9937	Vec = DAG.getBitcast(VT: VecVT, V: Vec);
9938	} else {
9939	// We can't slide this mask vector down, indexed by its i1 elements.
9940	// This poses a problem when we wish to extract a scalable vector which
9941	// can't be re-expressed as a larger type. Just choose the slow path and
9942	// extend to a larger type, then truncate back down.
9943	// TODO: We could probably improve this when extracting certain fixed
9944	// from fixed, where we can extract as i8 and shift the correct element
9945	// right to reach the desired subvector?
9946	MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9947	MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9948	Vec = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtVecVT, Operand: Vec);
9949	Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ExtSubVecVT, N1: Vec,
9950	N2: Op.getOperand(i: `1`));
9951	SDValue SplatZero = DAG.getConstant(Val: `0`, DL, VT: ExtSubVecVT);
9952	return DAG.getSetCC(DL, VT: SubVecVT, LHS: Vec, RHS: SplatZero, Cond: ISD::SETNE);
9953	}
9954	}
9955
9956	// With an index of 0 this is a cast-like subvector, which can be performed
9957	// with subregister operations.
9958	if (OrigIdx == `0`)
9959	return Op;
9960
9961	const auto VLen = Subtarget.getRealVLen();
9962
9963	// If the subvector vector is a fixed-length type and we don't know VLEN
9964	// exactly, we cannot use subregister manipulation to simplify the codegen; we
9965	// don't know which register of a LMUL group contains the specific subvector
9966	// as we only know the minimum register size. Therefore we must slide the
9967	// vector group down the full amount.
9968	if (SubVecVT.isFixedLengthVector() && !VLen) {
9969	MVT ContainerVT = VecVT;
9970	if (VecVT.isFixedLengthVector()) {
9971	ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
9972	Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
9973	}
9974
9975	// Shrink down Vec so we're performing the slidedown on a smaller LMUL.
9976	unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - `1`;
9977	if (auto ShrunkVT =
9978	getSmallestVTForIndex(VecVT: ContainerVT, MaxIdx: LastIdx, DL, DAG, Subtarget)) {
9979	ContainerVT = *ShrunkVT;
9980	Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ContainerVT, N1: Vec,
9981	N2: DAG.getVectorIdxConstant(Val: `0`, DL));
9982	}
9983
9984	SDValue Mask =
9985	getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9986	// Set the vector length to only the number of elements we care about. This
9987	// avoids sliding down elements we're going to discard straight away.
9988	SDValue VL = getVLOp(NumElts: SubVecVT.getVectorNumElements(), ContainerVT, DL, DAG,
9989	Subtarget);
9990	SDValue SlidedownAmt = DAG.getConstant(Val: OrigIdx, DL, VT: XLenVT);
9991	SDValue Slidedown =
9992	getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT,
9993	Merge: DAG.getUNDEF(VT: ContainerVT), Op: Vec, Offset: SlidedownAmt, Mask, VL);
9994	// Now we can use a cast-like subvector extract to get the result.
9995	Slidedown = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: SubVecVT, N1: Slidedown,
9996	N2: DAG.getVectorIdxConstant(Val: `0`, DL));
9997	return DAG.getBitcast(VT: Op.getValueType(), V: Slidedown);
9998	}
9999
10000	if (VecVT.isFixedLengthVector()) {
10001	VecVT = getContainerForFixedLengthVector(VT: VecVT);
10002	Vec = convertToScalableVector(VT: VecVT, V: Vec, DAG, Subtarget);
10003	}
10004
10005	MVT ContainerSubVecVT = SubVecVT;
10006	if (SubVecVT.isFixedLengthVector())
10007	ContainerSubVecVT = getContainerForFixedLengthVector(VT: SubVecVT);
10008
10009	unsigned SubRegIdx;
10010	ElementCount RemIdx;
10011	// extract_subvector scales the index by vscale if the subvector is scalable,
10012	// and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10013	// we have a fixed length subvector, we need to adjust the index by 1/vscale.
10014	if (SubVecVT.isFixedLengthVector()) {
10015	assert(VLen);
10016	unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10017	auto Decompose =
10018	RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
10019	VecVT, SubVecVT: ContainerSubVecVT, InsertExtractIdx: OrigIdx / Vscale, TRI);
10020	SubRegIdx = Decompose.first;
10021	RemIdx = ElementCount::getFixed(MinVal: (Decompose.second * Vscale) +
10022	(OrigIdx % Vscale));
10023	} else {
10024	auto Decompose =
10025	RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
10026	VecVT, SubVecVT: ContainerSubVecVT, InsertExtractIdx: OrigIdx, TRI);
10027	SubRegIdx = Decompose.first;
10028	RemIdx = ElementCount::getScalable(MinVal: Decompose.second);
10029	}
10030
10031	// If the Idx has been completely eliminated then this is a subvector extract
10032	// which naturally aligns to a vector register. These can easily be handled
10033	// using subregister manipulation.
10034	if (RemIdx.isZero()) {
10035	if (SubVecVT.isFixedLengthVector()) {
10036	Vec = DAG.getTargetExtractSubreg(SRIdx: SubRegIdx, DL, VT: ContainerSubVecVT, Operand: Vec);
10037	return convertFromScalableVector(VT: SubVecVT, V: Vec, DAG, Subtarget);
10038	}
10039	return Op;
10040	}
10041
10042	// Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10043	// was > M1 then the index would need to be a multiple of VLMAX, and so would
10044	// divide exactly.
10045	assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second \|\|
10046	getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10047
10048	// If the vector type is an LMUL-group type, extract a subvector equal to the
10049	// nearest full vector register type.
10050	MVT InterSubVT = VecVT;
10051	if (VecVT.bitsGT(VT: getLMUL1VT(VT: VecVT))) {
10052	// If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10053	// we should have successfully decomposed the extract into a subregister.
10054	assert(SubRegIdx != RISCV::NoSubRegister);
10055	InterSubVT = getLMUL1VT(VT: VecVT);
10056	Vec = DAG.getTargetExtractSubreg(SRIdx: SubRegIdx, DL, VT: InterSubVT, Operand: Vec);
10057	}
10058
10059	// Slide this vector register down by the desired number of elements in order
10060	// to place the desired subvector starting at element 0.
10061	SDValue SlidedownAmt = DAG.getElementCount(DL, VT: XLenVT, EC: RemIdx);
10062	auto [Mask, VL] = getDefaultScalableVLOps(VecVT: InterSubVT, DL, DAG, Subtarget);
10063	if (SubVecVT.isFixedLengthVector())
10064	VL = getVLOp(NumElts: SubVecVT.getVectorNumElements(), ContainerVT: InterSubVT, DL, DAG,
10065	Subtarget);
10066	SDValue Slidedown =
10067	getVSlidedown(DAG, Subtarget, DL, VT: InterSubVT, Merge: DAG.getUNDEF(VT: InterSubVT),
10068	Op: Vec, Offset: SlidedownAmt, Mask, VL);
10069
10070	// Now the vector is in the right position, extract our final subvector. This
10071	// should resolve to a COPY.
10072	Slidedown = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: SubVecVT, N1: Slidedown,
10073	N2: DAG.getVectorIdxConstant(Val: `0`, DL));
10074
10075	// We might have bitcast from a mask type: cast back to the original type if
10076	// required.
10077	return DAG.getBitcast(VT: Op.getSimpleValueType(), V: Slidedown);
10078	}
10079
10080	// Widen a vector's operands to i8, then truncate its results back to the
10081	// original type, typically i1. All operand and result types must be the same.
10082	static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL,
10083	SelectionDAG &DAG) {
10084	MVT VT = N.getSimpleValueType();
10085	MVT WideVT = VT.changeVectorElementType(MVT::i8);
10086	SmallVector<SDValue, `4`> WideOps;
10087	for (SDValue Op : N ->ops()) {
10088	assert(Op.getSimpleValueType() == VT &&
10089	"Operands and result must be same type");
10090	WideOps.push_back(Elt: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WideVT, Operand: Op));
10091	}
10092
10093	unsigned NumVals = N ->getNumValues();
10094
10095	SDVTList VTs = DAG.getVTList(SmallVector<EVT, `4`>(
10096	NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10097	SDValue WideN = DAG.getNode(Opcode: N.getOpcode(), DL, VTList: VTs, Ops: WideOps);
10098	SmallVector<SDValue, `4`> TruncVals;
10099	for (unsigned I = `0`; I < NumVals; I++) {
10100	TruncVals.push_back(
10101	Elt: DAG.getSetCC(DL, VT: N ->getSimpleValueType(ResNo: I), LHS: WideN.getValue(R: I),
10102	RHS: DAG.getConstant(Val: `0`, DL, VT: WideVT), Cond: ISD::SETNE));
10103	}
10104
10105	if (TruncVals.size() > `1`)
10106	return DAG.getMergeValues(Ops: TruncVals, dl: DL);
10107	return TruncVals.front();
10108	}
10109
10110	SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10111	SelectionDAG &DAG) const {
10112	SDLoc DL(Op);
10113	MVT VecVT = Op.getSimpleValueType();
10114
10115	assert(VecVT.isScalableVector() &&
10116	"vector_interleave on non-scalable vector!");
10117
10118	// 1 bit element vectors need to be widened to e8
10119	if (VecVT.getVectorElementType() == MVT::i1)
10120	return widenVectorOpsToi8(N: Op, DL, DAG);
10121
10122	// If the VT is LMUL=8, we need to split and reassemble.
10123	if (VecVT.getSizeInBits().getKnownMinValue() ==
10124	(`8` * RISCV::RVVBitsPerBlock)) {
10125	auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(N: Op.getNode(), OpNo: `0`);
10126	auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(N: Op.getNode(), OpNo: `1`);
10127	EVT SplitVT = Op0Lo.getValueType();
10128
10129	SDValue ResLo = DAG.getNode(Opcode: ISD::VECTOR_DEINTERLEAVE, DL,
10130	VTList: DAG.getVTList(VT1: SplitVT, VT2: SplitVT), N1: Op0Lo, N2: Op0Hi);
10131	SDValue ResHi = DAG.getNode(Opcode: ISD::VECTOR_DEINTERLEAVE, DL,
10132	VTList: DAG.getVTList(VT1: SplitVT, VT2: SplitVT), N1: Op1Lo, N2: Op1Hi);
10133
10134	SDValue Even = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: VecVT,
10135	N1: ResLo.getValue(R: `0`), N2: ResHi.getValue(R: `0`));
10136	SDValue Odd = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: VecVT, N1: ResLo.getValue(R: `1`),
10137	N2: ResHi.getValue(R: `1`));
10138	return DAG.getMergeValues(Ops: {Even, Odd}, dl: DL);
10139	}
10140
10141	// Concatenate the two vectors as one vector to deinterleave
10142	MVT ConcatVT =
10143	MVT::getVectorVT(VT: VecVT.getVectorElementType(),
10144	EC: VecVT.getVectorElementCount().multiplyCoefficientBy(RHS: `2`));
10145	SDValue Concat = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: ConcatVT,
10146	N1: Op.getOperand(i: `0`), N2: Op.getOperand(i: `1`));
10147
10148	// We want to operate on all lanes, so get the mask and VL and mask for it
10149	auto [Mask, VL] = getDefaultScalableVLOps(VecVT: ConcatVT, DL, DAG, Subtarget);
10150	SDValue Passthru = DAG.getUNDEF(VT: ConcatVT);
10151
10152	// We can deinterleave through vnsrl.wi if the element type is smaller than
10153	// ELEN
10154	if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10155	SDValue Even =
10156	getDeinterleaveViaVNSRL(DL, VT: VecVT, Src: Concat, EvenElts: true, Subtarget, DAG);
10157	SDValue Odd =
10158	getDeinterleaveViaVNSRL(DL, VT: VecVT, Src: Concat, EvenElts: false, Subtarget, DAG);
10159	return DAG.getMergeValues(Ops: {Even, Odd}, dl: DL);
10160	}
10161
10162	// For the indices, use the same SEW to avoid an extra vsetvli
10163	MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
10164	// Create a vector of even indices {0, 2, 4, ...}
10165	SDValue EvenIdx =
10166	DAG.getStepVector(DL, ResVT: IdxVT, StepVal: APInt (IdxVT.getScalarSizeInBits(), `2`));
10167	// Create a vector of odd indices {1, 3, 5, ... }
10168	SDValue OddIdx =
10169	DAG.getNode(Opcode: ISD::ADD, DL, VT: IdxVT, N1: EvenIdx, N2: DAG.getConstant(Val: `1`, DL, VT: IdxVT));
10170
10171	// Gather the even and odd elements into two separate vectors
10172	SDValue EvenWide = DAG.getNode(Opcode: RISCVISD::VRGATHER_VV_VL, DL, VT: ConcatVT,
10173	N1: Concat, N2: EvenIdx, N3: Passthru, N4: Mask, N5: VL);
10174	SDValue OddWide = DAG.getNode(Opcode: RISCVISD::VRGATHER_VV_VL, DL, VT: ConcatVT,
10175	N1: Concat, N2: OddIdx, N3: Passthru, N4: Mask, N5: VL);
10176
10177	// Extract the result half of the gather for even and odd
10178	SDValue Even = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: VecVT, N1: EvenWide,
10179	N2: DAG.getVectorIdxConstant(Val: `0`, DL));
10180	SDValue Odd = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: VecVT, N1: OddWide,
10181	N2: DAG.getVectorIdxConstant(Val: `0`, DL));
10182
10183	return DAG.getMergeValues(Ops: {Even, Odd}, dl: DL);
10184	}
10185
10186	SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
10187	SelectionDAG &DAG) const {
10188	SDLoc DL(Op);
10189	MVT VecVT = Op.getSimpleValueType();
10190
10191	assert(VecVT.isScalableVector() &&
10192	"vector_interleave on non-scalable vector!");
10193
10194	// i1 vectors need to be widened to i8
10195	if (VecVT.getVectorElementType() == MVT::i1)
10196	return widenVectorOpsToi8(N: Op, DL, DAG);
10197
10198	MVT XLenVT = Subtarget.getXLenVT();
10199	SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
10200
10201	// If the VT is LMUL=8, we need to split and reassemble.
10202	if (VecVT.getSizeInBits().getKnownMinValue() == (`8` * RISCV::RVVBitsPerBlock)) {
10203	auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(N: Op.getNode(), OpNo: `0`);
10204	auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(N: Op.getNode(), OpNo: `1`);
10205	EVT SplitVT = Op0Lo.getValueType();
10206
10207	SDValue ResLo = DAG.getNode(Opcode: ISD::VECTOR_INTERLEAVE, DL,
10208	VTList: DAG.getVTList(VT1: SplitVT, VT2: SplitVT), N1: Op0Lo, N2: Op1Lo);
10209	SDValue ResHi = DAG.getNode(Opcode: ISD::VECTOR_INTERLEAVE, DL,
10210	VTList: DAG.getVTList(VT1: SplitVT, VT2: SplitVT), N1: Op0Hi, N2: Op1Hi);
10211
10212	SDValue Lo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: VecVT,
10213	N1: ResLo.getValue(R: `0`), N2: ResLo.getValue(R: `1`));
10214	SDValue Hi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: VecVT,
10215	N1: ResHi.getValue(R: `0`), N2: ResHi.getValue(R: `1`));
10216	return DAG.getMergeValues(Ops: {Lo, Hi}, dl: DL);
10217	}
10218
10219	SDValue Interleaved;
10220
10221	// If the element type is smaller than ELEN, then we can interleave with
10222	// vwaddu.vv and vwmaccu.vx
10223	if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10224	Interleaved = getWideningInterleave(EvenV: Op.getOperand(i: `0`), OddV: Op.getOperand(i: `1`), DL,
10225	DAG, Subtarget);
10226	} else {
10227	// Otherwise, fallback to using vrgathere16.vv
10228	MVT ConcatVT =
10229	MVT::getVectorVT(VT: VecVT.getVectorElementType(),
10230	EC: VecVT.getVectorElementCount().multiplyCoefficientBy(RHS: `2`));
10231	SDValue Concat = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: ConcatVT,
10232	N1: Op.getOperand(i: `0`), N2: Op.getOperand(i: `1`));
10233
10234	MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
10235
10236	// 0 1 2 3 4 5 6 7 ...
10237	SDValue StepVec = DAG.getStepVector(DL, ResVT: IdxVT);
10238
10239	// 1 1 1 1 1 1 1 1 ...
10240	SDValue Ones = DAG.getSplatVector(VT: IdxVT, DL, Op: DAG.getConstant(Val: `1`, DL, VT: XLenVT));
10241
10242	// 1 0 1 0 1 0 1 0 ...
10243	SDValue OddMask = DAG.getNode(Opcode: ISD::AND, DL, VT: IdxVT, N1: StepVec, N2: Ones);
10244	OddMask = DAG.getSetCC(
10245	DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
10246	DAG.getSplatVector(IdxVT, DL, DAG.getConstant(`0`, DL, XLenVT)),
10247	ISD::CondCode::SETNE);
10248
10249	SDValue VLMax = DAG.getSplatVector(VT: IdxVT, DL, Op: computeVLMax(VecVT, DL, DAG));
10250
10251	// Build up the index vector for interleaving the concatenated vector
10252	// 0 0 1 1 2 2 3 3 ...
10253	SDValue Idx = DAG.getNode(Opcode: ISD::SRL, DL, VT: IdxVT, N1: StepVec, N2: Ones);
10254	// 0 n 1 n+1 2 n+2 3 n+3 ...
10255	Idx =
10256	DAG.getNode(Opcode: RISCVISD::ADD_VL, DL, VT: IdxVT, N1: Idx, N2: VLMax, N3: Idx, N4: OddMask, N5: VL);
10257
10258	// Then perform the interleave
10259	// v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
10260	SDValue TrueMask = getAllOnesMask(VecVT: IdxVT, VL, DL, DAG);
10261	Interleaved = DAG.getNode(Opcode: RISCVISD::VRGATHEREI16_VV_VL, DL, VT: ConcatVT,
10262	N1: Concat, N2: Idx, N3: DAG.getUNDEF(VT: ConcatVT), N4: TrueMask, N5: VL);
10263	}
10264
10265	// Extract the two halves from the interleaved result
10266	SDValue Lo = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: VecVT, N1: Interleaved,
10267	N2: DAG.getVectorIdxConstant(Val: `0`, DL));
10268	SDValue Hi = DAG.getNode(
10269	Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: VecVT, N1: Interleaved,
10270	N2: DAG.getVectorIdxConstant(Val: VecVT.getVectorMinNumElements(), DL));
10271
10272	return DAG.getMergeValues(Ops: {Lo, Hi}, dl: DL);
10273	}
10274
10275	// Lower step_vector to the vid instruction. Any non-identity step value must
10276	// be accounted for my manual expansion.
10277	SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
10278	SelectionDAG &DAG) const {
10279	SDLoc DL(Op);
10280	MVT VT = Op.getSimpleValueType();
10281	assert(VT.isScalableVector() && "Expected scalable vector");
10282	MVT XLenVT = Subtarget.getXLenVT();
10283	auto [Mask, VL] = getDefaultScalableVLOps(VecVT: VT, DL, DAG, Subtarget);
10284	SDValue StepVec = DAG.getNode(Opcode: RISCVISD::VID_VL, DL, VT, N1: Mask, N2: VL);
10285	uint64_t StepValImm = Op.getConstantOperandVal(i: `0`);
10286	if (StepValImm != `1`) {
10287	if (isPowerOf2_64(Value: StepValImm)) {
10288	SDValue StepVal =
10289	DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: DAG.getUNDEF(VT),
10290	N2: DAG.getConstant(Val: Log2_64(Value: StepValImm), DL, VT: XLenVT), N3: VL);
10291	StepVec = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: StepVec, N2: StepVal);
10292	} else {
10293	SDValue StepVal = lowerScalarSplat(
10294	Passthru: SDValue (), Scalar: DAG.getConstant(Val: StepValImm, DL, VT: VT.getVectorElementType()),
10295	VL, VT, DL, DAG, Subtarget);
10296	StepVec = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: StepVec, N2: StepVal);
10297	}
10298	}
10299	return StepVec;
10300	}
10301
10302	// Implement vector_reverse using vrgather.vv with indices determined by
10303	// subtracting the id of each element from (VLMAX-1). This will convert
10304	// the indices like so:
10305	// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
10306	// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
10307	SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
10308	SelectionDAG &DAG) const {
10309	SDLoc DL(Op);
10310	MVT VecVT = Op.getSimpleValueType();
10311	if (VecVT.getVectorElementType() == MVT::i1) {
10312	MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10313	SDValue Op1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WidenVT, Operand: Op.getOperand(i: `0`));
10314	SDValue Op2 = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: WidenVT, Operand: Op1);
10315	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VecVT, Operand: Op2);
10316	}
10317	unsigned EltSize = VecVT.getScalarSizeInBits();
10318	unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
10319	unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
10320	unsigned MaxVLMAX =
10321	RISCVTargetLowering::computeVLMAX(VectorBits: VectorBitsMax, EltSize, MinSize);
10322
10323	unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
10324	MVT IntVT = VecVT.changeVectorElementTypeToInteger();
10325
10326	// If this is SEW=8 and VLMAX is potentially more than 256, we need
10327	// to use vrgatherei16.vv.
10328	// TODO: It's also possible to use vrgatherei16.vv for other types to
10329	// decrease register width for the index calculation.
10330	if (MaxVLMAX > `256` && EltSize == `8`) {
10331	// If this is LMUL=8, we have to split before can use vrgatherei16.vv.
10332	// Reverse each half, then reassemble them in reverse order.
10333	// NOTE: It's also possible that after splitting that VLMAX no longer
10334	// requires vrgatherei16.vv.
10335	if (MinSize == (`8` * RISCV::RVVBitsPerBlock)) {
10336	auto [Lo, Hi] = DAG.SplitVectorOperand(N: Op.getNode(), OpNo: `0`);
10337	auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: VecVT);
10338	Lo = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: LoVT, Operand: Lo);
10339	Hi = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: HiVT, Operand: Hi);
10340	// Reassemble the low and high pieces reversed.
10341	// FIXME: This is a CONCAT_VECTORS.
10342	SDValue Res =
10343	DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: VecVT, N1: DAG.getUNDEF(VT: VecVT), N2: Hi,
10344	N3: DAG.getVectorIdxConstant(Val: `0`, DL));
10345	return DAG.getNode(
10346	Opcode: ISD::INSERT_SUBVECTOR, DL, VT: VecVT, N1: Res, N2: Lo,
10347	N3: DAG.getVectorIdxConstant(Val: LoVT.getVectorMinNumElements(), DL));
10348	}
10349
10350	// Just promote the int type to i16 which will double the LMUL.
10351	IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
10352	GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
10353	}
10354
10355	MVT XLenVT = Subtarget.getXLenVT();
10356	auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
10357
10358	// Calculate VLMAX-1 for the desired SEW.
10359	SDValue VLMinus1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT,
10360	N1: computeVLMax(VecVT, DL, DAG),
10361	N2: DAG.getConstant(Val: `1`, DL, VT: XLenVT));
10362
10363	// Splat VLMAX-1 taking care to handle SEW==64 on RV32.
10364	bool IsRV32E64 =
10365	!Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
10366	SDValue SplatVL;
10367	if (!IsRV32E64)
10368	SplatVL = DAG.getSplatVector(VT: IntVT, DL, Op: VLMinus1);
10369	else
10370	SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
10371	VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
10372
10373	SDValue VID = DAG.getNode(Opcode: RISCVISD::VID_VL, DL, VT: IntVT, N1: Mask, N2: VL);
10374	SDValue Indices = DAG.getNode(Opcode: RISCVISD::SUB_VL, DL, VT: IntVT, N1: SplatVL, N2: VID,
10375	N3: DAG.getUNDEF(VT: IntVT), N4: Mask, N5: VL);
10376
10377	return DAG.getNode(Opcode: GatherOpc, DL, VT: VecVT, N1: Op.getOperand(i: `0`), N2: Indices,
10378	N3: DAG.getUNDEF(VT: VecVT), N4: Mask, N5: VL);
10379	}
10380
10381	SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
10382	SelectionDAG &DAG) const {
10383	SDLoc DL(Op);
10384	SDValue V1 = Op.getOperand(i: `0`);
10385	SDValue V2 = Op.getOperand(i: `1`);
10386	MVT XLenVT = Subtarget.getXLenVT();
10387	MVT VecVT = Op.getSimpleValueType();
10388
10389	SDValue VLMax = computeVLMax(VecVT, DL, DAG);
10390
10391	int64_t ImmValue = cast<ConstantSDNode>(Val: Op.getOperand(i: `2`))->getSExtValue();
10392	SDValue DownOffset, UpOffset;
10393	if (ImmValue >= `0`) {
10394	// The operand is a TargetConstant, we need to rebuild it as a regular
10395	// constant.
10396	DownOffset = DAG.getConstant(Val: ImmValue, DL, VT: XLenVT);
10397	UpOffset = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: VLMax, N2: DownOffset);
10398	} else {
10399	// The operand is a TargetConstant, we need to rebuild it as a regular
10400	// constant rather than negating the original operand.
10401	UpOffset = DAG.getConstant(Val: -ImmValue, DL, VT: XLenVT);
10402	DownOffset = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: VLMax, N2: UpOffset);
10403	}
10404
10405	SDValue TrueMask = getAllOnesMask(VecVT, VL: VLMax, DL, DAG);
10406
10407	SDValue SlideDown =
10408	getVSlidedown(DAG, Subtarget, DL, VT: VecVT, Merge: DAG.getUNDEF(VT: VecVT), Op: V1,
10409	Offset: DownOffset, Mask: TrueMask, VL: UpOffset);
10410	return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
10411	TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
10412	RISCVII::TAIL_AGNOSTIC);
10413	}
10414
10415	SDValue
10416	RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
10417	SelectionDAG &DAG) const {
10418	SDLoc DL(Op);
10419	auto *Load = cast<LoadSDNode>(Val&: Op);
10420
10421	assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
10422	Load->getMemoryVT(),
10423	*Load->getMemOperand()) &&
10424	"Expecting a correctly-aligned load");
10425
10426	MVT VT = Op.getSimpleValueType();
10427	MVT XLenVT = Subtarget.getXLenVT();
10428	MVT ContainerVT = getContainerForFixedLengthVector(VT);
10429
10430	// If we know the exact VLEN and our fixed length vector completely fills
10431	// the container, use a whole register load instead.
10432	const auto [MinVLMAX, MaxVLMAX] =
10433	RISCVTargetLowering::computeVLMAXBounds(VecVT: ContainerVT, Subtarget);
10434	if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10435	getLMUL1VT(VT: ContainerVT).bitsLE(VT: ContainerVT)) {
10436	MachineMemOperand *MMO = Load->getMemOperand();
10437	SDValue NewLoad =
10438	DAG.getLoad(VT: ContainerVT, dl: DL, Chain: Load->getChain(), Ptr: Load->getBasePtr(),
10439	PtrInfo: MMO->getPointerInfo(), Alignment: MMO->getBaseAlign(), MMOFlags: MMO->getFlags(),
10440	AAInfo: MMO->getAAInfo(), Ranges: MMO->getRanges());
10441	SDValue Result = convertFromScalableVector(VT, V: NewLoad, DAG, Subtarget);
10442	return DAG.getMergeValues(Ops: {Result, NewLoad.getValue(R: `1`)}, dl: DL);
10443	}
10444
10445	SDValue VL = getVLOp(NumElts: VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget);
10446
10447	bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10448	SDValue IntID = DAG.getTargetConstant(
10449	IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
10450	SmallVector<SDValue, `4`> Ops{Load->getChain(), IntID};
10451	if (!IsMaskOp)
10452	Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT));
10453	Ops.push_back(Elt: Load->getBasePtr());
10454	Ops.push_back(Elt: VL);
10455	SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10456	SDValue NewLoad =
10457	DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops,
10458	MemVT: Load->getMemoryVT(), MMO: Load->getMemOperand());
10459
10460	SDValue Result = convertFromScalableVector(VT, V: NewLoad, DAG, Subtarget);
10461	return DAG.getMergeValues(Ops: {Result, NewLoad.getValue(R: `1`)}, dl: DL);
10462	}
10463
10464	SDValue
10465	RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
10466	SelectionDAG &DAG) const {
10467	SDLoc DL(Op);
10468	auto *Store = cast<StoreSDNode>(Val&: Op);
10469
10470	assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
10471	Store->getMemoryVT(),
10472	*Store->getMemOperand()) &&
10473	"Expecting a correctly-aligned store");
10474
10475	SDValue StoreVal = Store->getValue();
10476	MVT VT = StoreVal.getSimpleValueType();
10477	MVT XLenVT = Subtarget.getXLenVT();
10478
10479	// If the size less than a byte, we need to pad with zeros to make a byte.
10480	if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < `8`) {
10481	VT = MVT::v8i1;
10482	StoreVal =
10483	DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: DAG.getConstant(Val: `0`, DL, VT),
10484	N2: StoreVal, N3: DAG.getVectorIdxConstant(Val: `0`, DL));
10485	}
10486
10487	MVT ContainerVT = getContainerForFixedLengthVector(VT);
10488
10489	SDValue NewValue =
10490	convertToScalableVector(VT: ContainerVT, V: StoreVal, DAG, Subtarget);
10491
10492
10493	// If we know the exact VLEN and our fixed length vector completely fills
10494	// the container, use a whole register store instead.
10495	const auto [MinVLMAX, MaxVLMAX] =
10496	RISCVTargetLowering::computeVLMAXBounds(VecVT: ContainerVT, Subtarget);
10497	if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10498	getLMUL1VT(VT: ContainerVT).bitsLE(VT: ContainerVT)) {
10499	MachineMemOperand *MMO = Store->getMemOperand();
10500	return DAG.getStore(Chain: Store->getChain(), dl: DL, Val: NewValue, Ptr: Store->getBasePtr(),
10501	PtrInfo: MMO->getPointerInfo(), Alignment: MMO->getBaseAlign(),
10502	MMOFlags: MMO->getFlags(), AAInfo: MMO->getAAInfo());
10503	}
10504
10505	SDValue VL = getVLOp(NumElts: VT.getVectorNumElements(), ContainerVT, DL, DAG,
10506	Subtarget);
10507
10508	bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10509	SDValue IntID = DAG.getTargetConstant(
10510	IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
10511	return DAG.getMemIntrinsicNode(
10512	ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
10513	{Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
10514	Store->getMemoryVT(), Store->getMemOperand());
10515	}
10516
10517	SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
10518	SelectionDAG &DAG) const {
10519	SDLoc DL(Op);
10520	MVT VT = Op.getSimpleValueType();
10521
10522	const auto *MemSD = cast<MemSDNode>(Val&: Op);
10523	EVT MemVT = MemSD->getMemoryVT();
10524	MachineMemOperand *MMO = MemSD->getMemOperand();
10525	SDValue Chain = MemSD->getChain();
10526	SDValue BasePtr = MemSD->getBasePtr();
10527
10528	SDValue Mask, PassThru, VL;
10529	if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Val&: Op)) {
10530	Mask = VPLoad->getMask();
10531	PassThru = DAG.getUNDEF(VT);
10532	VL = VPLoad->getVectorLength();
10533	} else {
10534	const auto *MLoad = cast<MaskedLoadSDNode>(Val&: Op);
10535	Mask = MLoad->getMask();
10536	PassThru = MLoad->getPassThru();
10537	}
10538
10539	bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode());
10540
10541	MVT XLenVT = Subtarget.getXLenVT();
10542
10543	MVT ContainerVT = VT;
10544	if (VT.isFixedLengthVector()) {
10545	ContainerVT = getContainerForFixedLengthVector(VT);
10546	PassThru = convertToScalableVector(VT: ContainerVT, V: PassThru, DAG, Subtarget);
10547	if (!IsUnmasked) {
10548	MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
10549	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
10550	}
10551	}
10552
10553	if (!VL)
10554	VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second;
10555
10556	unsigned IntID =
10557	IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
10558	SmallVector<SDValue, `8`> Ops{Chain, DAG.getTargetConstant(Val: IntID, DL, VT: XLenVT)};
10559	if (IsUnmasked)
10560	Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT));
10561	else
10562	Ops.push_back(Elt: PassThru);
10563	Ops.push_back(Elt: BasePtr);
10564	if (!IsUnmasked)
10565	Ops.push_back(Elt: Mask);
10566	Ops.push_back(Elt: VL);
10567	if (!IsUnmasked)
10568	Ops.push_back(Elt: DAG.getTargetConstant(Val: RISCVII::TAIL_AGNOSTIC, DL, VT: XLenVT));
10569
10570	SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10571
10572	SDValue Result =
10573	DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops, MemVT, MMO);
10574	Chain = Result.getValue(R: `1`);
10575
10576	if (VT.isFixedLengthVector())
10577	Result = convertFromScalableVector(VT, V: Result, DAG, Subtarget);
10578
10579	return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
10580	}
10581
10582	SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
10583	SelectionDAG &DAG) const {
10584	SDLoc DL(Op);
10585
10586	const auto *MemSD = cast<MemSDNode>(Val&: Op);
10587	EVT MemVT = MemSD->getMemoryVT();
10588	MachineMemOperand *MMO = MemSD->getMemOperand();
10589	SDValue Chain = MemSD->getChain();
10590	SDValue BasePtr = MemSD->getBasePtr();
10591	SDValue Val, Mask, VL;
10592
10593	bool IsCompressingStore = false;
10594	if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Val&: Op)) {
10595	Val = VPStore->getValue();
10596	Mask = VPStore->getMask();
10597	VL = VPStore->getVectorLength();
10598	} else {
10599	const auto *MStore = cast<MaskedStoreSDNode>(Val&: Op);
10600	Val = MStore->getValue();
10601	Mask = MStore->getMask();
10602	IsCompressingStore = MStore->isCompressingStore();
10603	}
10604
10605	bool IsUnmasked =
10606	ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()) \|\| IsCompressingStore;
10607
10608	MVT VT = Val.getSimpleValueType();
10609	MVT XLenVT = Subtarget.getXLenVT();
10610
10611	MVT ContainerVT = VT;
10612	if (VT.isFixedLengthVector()) {
10613	ContainerVT = getContainerForFixedLengthVector(VT);
10614
10615	Val = convertToScalableVector(VT: ContainerVT, V: Val, DAG, Subtarget);
10616	if (!IsUnmasked \|\| IsCompressingStore) {
10617	MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
10618	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
10619	}
10620	}
10621
10622	if (!VL)
10623	VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second;
10624
10625	if (IsCompressingStore) {
10626	Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
10627	DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
10628	DAG.getUNDEF(ContainerVT), Val, Mask, VL);
10629	VL =
10630	DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: Mask,
10631	N2: getAllOnesMask(VecVT: Mask.getSimpleValueType(), VL, DL, DAG), N3: VL);
10632	}
10633
10634	unsigned IntID =
10635	IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
10636	SmallVector<SDValue, `8`> Ops{Chain, DAG.getTargetConstant(Val: IntID, DL, VT: XLenVT)};
10637	Ops.push_back(Elt: Val);
10638	Ops.push_back(Elt: BasePtr);
10639	if (!IsUnmasked)
10640	Ops.push_back(Elt: Mask);
10641	Ops.push_back(Elt: VL);
10642
10643	return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
10644	DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
10645	}
10646
10647	SDValue
10648	RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
10649	SelectionDAG &DAG) const {
10650	MVT InVT = Op.getOperand(i: `0`).getSimpleValueType();
10651	MVT ContainerVT = getContainerForFixedLengthVector(VT: InVT);
10652
10653	MVT VT = Op.getSimpleValueType();
10654
10655	SDValue Op1 =
10656	convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i: `0`), DAG, Subtarget);
10657	SDValue Op2 =
10658	convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i: `1`), DAG, Subtarget);
10659
10660	SDLoc DL(Op);
10661	auto [Mask, VL] = getDefaultVLOps(NumElts: VT.getVectorNumElements(), ContainerVT, DL,
10662	DAG, Subtarget);
10663	MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
10664
10665	SDValue Cmp =
10666	DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: MaskVT,
10667	Ops: {Op1, Op2, Op.getOperand(i: `2`), DAG.getUNDEF(VT: MaskVT), Mask, VL});
10668
10669	return convertFromScalableVector(VT, V: Cmp, DAG, Subtarget);
10670	}
10671
10672	SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
10673	SelectionDAG &DAG) const {
10674	unsigned Opc = Op.getOpcode();
10675	SDLoc DL(Op);
10676	SDValue Chain = Op.getOperand(i: `0`);
10677	SDValue Op1 = Op.getOperand(i: `1`);
10678	SDValue Op2 = Op.getOperand(i: `2`);
10679	SDValue CC = Op.getOperand(i: `3`);
10680	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val&: CC)->get();
10681	MVT VT = Op.getSimpleValueType();
10682	MVT InVT = Op1.getSimpleValueType();
10683
10684	// RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
10685	// condition code.
10686	if (Opc == ISD::STRICT_FSETCCS) {
10687	// Expand strict_fsetccs(x, oeq) to
10688	// (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
10689	SDVTList VTList = Op ->getVTList();
10690	if (CCVal == ISD::SETEQ \|\| CCVal == ISD::SETOEQ) {
10691	SDValue OLECCVal = DAG.getCondCode(Cond: ISD::SETOLE);
10692	SDValue Tmp1 = DAG.getNode(Opcode: ISD::STRICT_FSETCCS, DL, VTList, N1: Chain, N2: Op1,
10693	N3: Op2, N4: OLECCVal);
10694	SDValue Tmp2 = DAG.getNode(Opcode: ISD::STRICT_FSETCCS, DL, VTList, N1: Chain, N2: Op2,
10695	N3: Op1, N4: OLECCVal);
10696	SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
10697	Tmp1.getValue(`1`), Tmp2.getValue(`1`));
10698	// Tmp1 and Tmp2 might be the same node.
10699	if (Tmp1 != Tmp2)
10700	Tmp1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Tmp1, N2: Tmp2);
10701	return DAG.getMergeValues(Ops: {Tmp1, OutChain}, dl: DL);
10702	}
10703
10704	// Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
10705	if (CCVal == ISD::SETNE \|\| CCVal == ISD::SETUNE) {
10706	SDValue OEQCCVal = DAG.getCondCode(Cond: ISD::SETOEQ);
10707	SDValue OEQ = DAG.getNode(Opcode: ISD::STRICT_FSETCCS, DL, VTList, N1: Chain, N2: Op1,
10708	N3: Op2, N4: OEQCCVal);
10709	SDValue Res = DAG.getNOT(DL, Val: OEQ, VT);
10710	return DAG.getMergeValues(Ops: {Res, OEQ.getValue(R: `1`)}, dl: DL);
10711	}
10712	}
10713
10714	MVT ContainerInVT = InVT;
10715	if (InVT.isFixedLengthVector()) {
10716	ContainerInVT = getContainerForFixedLengthVector(VT: InVT);
10717	Op1 = convertToScalableVector(VT: ContainerInVT, V: Op1, DAG, Subtarget);
10718	Op2 = convertToScalableVector(VT: ContainerInVT, V: Op2, DAG, Subtarget);
10719	}
10720	MVT MaskVT = getMaskTypeFor(VecVT: ContainerInVT);
10721
10722	auto [Mask, VL] = getDefaultVLOps(VecVT: InVT, ContainerVT: ContainerInVT, DL, DAG, Subtarget);
10723
10724	SDValue Res;
10725	if (Opc == ISD::STRICT_FSETCC &&
10726	(CCVal == ISD::SETLT \|\| CCVal == ISD::SETOLT \|\| CCVal == ISD::SETLE \|\|
10727	CCVal == ISD::SETOLE)) {
10728	// VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
10729	// active when both input elements are ordered.
10730	SDValue True = getAllOnesMask(VecVT: ContainerInVT, VL, DL, DAG);
10731	SDValue OrderMask1 = DAG.getNode(
10732	RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10733	{Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10734	True, VL});
10735	SDValue OrderMask2 = DAG.getNode(
10736	RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10737	{Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10738	True, VL});
10739	Mask =
10740	DAG.getNode(Opcode: RISCVISD::VMAND_VL, DL, VT: MaskVT, N1: OrderMask1, N2: OrderMask2, N3: VL);
10741	// Use Mask as the merge operand to let the result be 0 if either of the
10742	// inputs is unordered.
10743	Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL,
10744	DAG.getVTList(MaskVT, MVT::Other),
10745	{Chain, Op1, Op2, CC, Mask, Mask, VL});
10746	} else {
10747	unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
10748	: RISCVISD::STRICT_FSETCCS_VL;
10749	Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
10750	{Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
10751	}
10752
10753	if (VT.isFixedLengthVector()) {
10754	SDValue SubVec = convertFromScalableVector(VT, V: Res, DAG, Subtarget);
10755	return DAG.getMergeValues(Ops: {SubVec, Res.getValue(R: `1`)}, dl: DL);
10756	}
10757	return Res;
10758	}
10759
10760	// Lower vector ABS to smax(X, sub(0, X)).
10761	SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
10762	SDLoc DL(Op);
10763	MVT VT = Op.getSimpleValueType();
10764	SDValue X = Op.getOperand(i: `0`);
10765
10766	assert((Op.getOpcode() == ISD::VP_ABS \|\| VT.isFixedLengthVector()) &&
10767	"Unexpected type for ISD::ABS");
10768
10769	MVT ContainerVT = VT;
10770	if (VT.isFixedLengthVector()) {
10771	ContainerVT = getContainerForFixedLengthVector(VT);
10772	X = convertToScalableVector(VT: ContainerVT, V: X, DAG, Subtarget);
10773	}
10774
10775	SDValue Mask, VL;
10776	if (Op ->getOpcode() == ISD::VP_ABS) {
10777	Mask = Op ->getOperand(Num: `1`);
10778	if (VT.isFixedLengthVector())
10779	Mask = convertToScalableVector(VT: getMaskTypeFor(VecVT: ContainerVT), V: Mask, DAG,
10780	Subtarget);
10781	VL = Op ->getOperand(Num: `2`);
10782	} else
10783	std::tie(args&: Mask, args&: VL) = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
10784
10785	SDValue SplatZero = DAG.getNode(
10786	Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT),
10787	N2: DAG.getConstant(Val: `0`, DL, VT: Subtarget.getXLenVT()), N3: VL);
10788	SDValue NegX = DAG.getNode(Opcode: RISCVISD::SUB_VL, DL, VT: ContainerVT, N1: SplatZero, N2: X,
10789	N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL);
10790	SDValue Max = DAG.getNode(Opcode: RISCVISD::SMAX_VL, DL, VT: ContainerVT, N1: X, N2: NegX,
10791	N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL);
10792
10793	if (VT.isFixedLengthVector())
10794	Max = convertFromScalableVector(VT, V: Max, DAG, Subtarget);
10795	return Max;
10796	}
10797
10798	SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
10799	SDValue Op, SelectionDAG &DAG) const {
10800	SDLoc DL(Op);
10801	MVT VT = Op.getSimpleValueType();
10802	SDValue Mag = Op.getOperand(i: `0`);
10803	SDValue Sign = Op.getOperand(i: `1`);
10804	assert(Mag.getValueType() == Sign.getValueType() &&
10805	"Can only handle COPYSIGN with matching types.");
10806
10807	MVT ContainerVT = getContainerForFixedLengthVector(VT);
10808	Mag = convertToScalableVector(VT: ContainerVT, V: Mag, DAG, Subtarget);
10809	Sign = convertToScalableVector(VT: ContainerVT, V: Sign, DAG, Subtarget);
10810
10811	auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
10812
10813	SDValue CopySign = DAG.getNode(Opcode: RISCVISD::FCOPYSIGN_VL, DL, VT: ContainerVT, N1: Mag,
10814	N2: Sign, N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL);
10815
10816	return convertFromScalableVector(VT, V: CopySign, DAG, Subtarget);
10817	}
10818
10819	SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
10820	SDValue Op, SelectionDAG &DAG) const {
10821	MVT VT = Op.getSimpleValueType();
10822	MVT ContainerVT = getContainerForFixedLengthVector(VT);
10823
10824	MVT I1ContainerVT =
10825	MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10826
10827	SDValue CC =
10828	convertToScalableVector(VT: I1ContainerVT, V: Op.getOperand(i: `0`), DAG, Subtarget);
10829	SDValue Op1 =
10830	convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i: `1`), DAG, Subtarget);
10831	SDValue Op2 =
10832	convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i: `2`), DAG, Subtarget);
10833
10834	SDLoc DL(Op);
10835	SDValue VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second;
10836
10837	SDValue Select = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: CC, N2: Op1,
10838	N3: Op2, N4: DAG.getUNDEF(VT: ContainerVT), N5: VL);
10839
10840	return convertFromScalableVector(VT, V: Select, DAG, Subtarget);
10841	}
10842
10843	SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
10844	SelectionDAG &DAG) const {
10845	unsigned NewOpc = getRISCVVLOp(Op);
10846	bool HasMergeOp = hasMergeOp(Opcode: NewOpc);
10847	bool HasMask = hasMaskOp(Opcode: NewOpc);
10848
10849	MVT VT = Op.getSimpleValueType();
10850	MVT ContainerVT = getContainerForFixedLengthVector(VT);
10851
10852	// Create list of operands by converting existing ones to scalable types.
10853	SmallVector<SDValue, `6`> Ops;
10854	for (const SDValue &V : Op ->op_values()) {
10855	assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10856
10857	// Pass through non-vector operands.
10858	if (!V.getValueType().isVector()) {
10859	Ops.push_back(Elt: V);
10860	continue;
10861	}
10862
10863	// "cast" fixed length vector to a scalable vector.
10864	assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
10865	"Only fixed length vectors are supported!");
10866	Ops.push_back(Elt: convertToScalableVector(VT: ContainerVT, V, DAG, Subtarget));
10867	}
10868
10869	SDLoc DL(Op);
10870	auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
10871	if (HasMergeOp)
10872	Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT));
10873	if (HasMask)
10874	Ops.push_back(Elt: Mask);
10875	Ops.push_back(Elt: VL);
10876
10877	// StrictFP operations have two result values. Their lowered result should
10878	// have same result count.
10879	if (Op ->isStrictFPOpcode()) {
10880	SDValue ScalableRes =
10881	DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
10882	Op->getFlags());
10883	SDValue SubVec = convertFromScalableVector(VT, V: ScalableRes, DAG, Subtarget);
10884	return DAG.getMergeValues(Ops: {SubVec, ScalableRes.getValue(R: `1`)}, dl: DL);
10885	}
10886
10887	SDValue ScalableRes =
10888	DAG.getNode(Opcode: NewOpc, DL, VT: ContainerVT, Ops, Flags: Op ->getFlags());
10889	return convertFromScalableVector(VT, V: ScalableRes, DAG, Subtarget);
10890	}
10891
10892	// Lower a VP_ ISD node to the corresponding RISCVISD::_VL node:
10893	// Operands of each node are assumed to be in the same order.*
10894	// The EVL operand is promoted from i32 to i64 on RV64.*
10895	// Fixed-length vectors are converted to their scalable-vector container*
10896	// types.
10897	SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
10898	unsigned RISCVISDOpc = getRISCVVLOp(Op);
10899	bool HasMergeOp = hasMergeOp(Opcode: RISCVISDOpc);
10900
10901	SDLoc DL(Op);
10902	MVT VT = Op.getSimpleValueType();
10903	SmallVector<SDValue, `4`> Ops;
10904
10905	MVT ContainerVT = VT;
10906	if (VT.isFixedLengthVector())
10907	ContainerVT = getContainerForFixedLengthVector(VT);
10908
10909	for (const auto &OpIdx : enumerate(First: Op ->ops())) {
10910	SDValue V = OpIdx.value();
10911	assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10912	// Add dummy merge value before the mask. Or if there isn't a mask, before
10913	// EVL.
10914	if (HasMergeOp) {
10915	auto MaskIdx = ISD::getVPMaskIdx(Opcode: Op.getOpcode());
10916	if (MaskIdx) {
10917	if (*MaskIdx == OpIdx.index())
10918	Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT));
10919	} else if (ISD::getVPExplicitVectorLengthIdx(Opcode: Op.getOpcode()) ==
10920	OpIdx.index()) {
10921	if (Op.getOpcode() == ISD::VP_MERGE) {
10922	// For VP_MERGE, copy the false operand instead of an undef value.
10923	Ops.push_back(Elt: Ops.back());
10924	} else {
10925	assert(Op.getOpcode() == ISD::VP_SELECT);
10926	// For VP_SELECT, add an undef value.
10927	Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT));
10928	}
10929	}
10930	}
10931	// Pass through operands which aren't fixed-length vectors.
10932	if (!V.getValueType().isFixedLengthVector()) {
10933	Ops.push_back(Elt: V);
10934	continue;
10935	}
10936	// "cast" fixed length vector to a scalable vector.
10937	MVT OpVT = V.getSimpleValueType();
10938	MVT ContainerVT = getContainerForFixedLengthVector(VT: OpVT);
10939	assert(useRVVForFixedLengthVectorVT(OpVT) &&
10940	"Only fixed length vectors are supported!");
10941	Ops.push_back(Elt: convertToScalableVector(VT: ContainerVT, V, DAG, Subtarget));
10942	}
10943
10944	if (!VT.isFixedLengthVector())
10945	return DAG.getNode(Opcode: RISCVISDOpc, DL, VT, Ops, Flags: Op ->getFlags());
10946
10947	SDValue VPOp = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: ContainerVT, Ops, Flags: Op ->getFlags());
10948
10949	return convertFromScalableVector(VT, V: VPOp, DAG, Subtarget);
10950	}
10951
10952	SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
10953	SelectionDAG &DAG) const {
10954	SDLoc DL(Op);
10955	MVT VT = Op.getSimpleValueType();
10956
10957	SDValue Src = Op.getOperand(i: `0`);
10958	// NOTE: Mask is dropped.
10959	SDValue VL = Op.getOperand(i: `2`);
10960
10961	MVT ContainerVT = VT;
10962	if (VT.isFixedLengthVector()) {
10963	ContainerVT = getContainerForFixedLengthVector(VT);
10964	MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10965	Src = convertToScalableVector(VT: SrcVT, V: Src, DAG, Subtarget);
10966	}
10967
10968	MVT XLenVT = Subtarget.getXLenVT();
10969	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: XLenVT);
10970	SDValue ZeroSplat = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
10971	N1: DAG.getUNDEF(VT: ContainerVT), N2: Zero, N3: VL);
10972
10973	SDValue SplatValue = DAG.getConstant(
10974	Val: Op.getOpcode() == ISD::VP_ZERO_EXTEND ? `1` : -`1`, DL, VT: XLenVT);
10975	SDValue Splat = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
10976	N1: DAG.getUNDEF(VT: ContainerVT), N2: SplatValue, N3: VL);
10977
10978	SDValue Result = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: Src, N2: Splat,
10979	N3: ZeroSplat, N4: DAG.getUNDEF(VT: ContainerVT), N5: VL);
10980	if (!VT.isFixedLengthVector())
10981	return Result;
10982	return convertFromScalableVector(VT, V: Result, DAG, Subtarget);
10983	}
10984
10985	SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
10986	SelectionDAG &DAG) const {
10987	SDLoc DL(Op);
10988	MVT VT = Op.getSimpleValueType();
10989
10990	SDValue Op1 = Op.getOperand(i: `0`);
10991	SDValue Op2 = Op.getOperand(i: `1`);
10992	ISD::CondCode Condition = cast<CondCodeSDNode>(Val: Op.getOperand(i: `2`))->get();
10993	// NOTE: Mask is dropped.
10994	SDValue VL = Op.getOperand(i: `4`);
10995
10996	MVT ContainerVT = VT;
10997	if (VT.isFixedLengthVector()) {
10998	ContainerVT = getContainerForFixedLengthVector(VT);
10999	Op1 = convertToScalableVector(VT: ContainerVT, V: Op1, DAG, Subtarget);
11000	Op2 = convertToScalableVector(VT: ContainerVT, V: Op2, DAG, Subtarget);
11001	}
11002
11003	SDValue Result;
11004	SDValue AllOneMask = DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT: ContainerVT, Operand: VL);
11005
11006	switch (Condition) {
11007	default:
11008	break;
11009	// X != Y --> (X^Y)
11010	case ISD::SETNE:
11011	Result = DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op1, N2: Op2, N3: VL);
11012	break;
11013	// X == Y --> ~(X^Y)
11014	case ISD::SETEQ: {
11015	SDValue Temp =
11016	DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op1, N2: Op2, N3: VL);
11017	Result =
11018	DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Temp, N2: AllOneMask, N3: VL);
11019	break;
11020	}
11021	// X >s Y --> X == 0 & Y == 1 --> ~X & Y
11022	// X <u Y --> X == 0 & Y == 1 --> ~X & Y
11023	case ISD::SETGT:
11024	case ISD::SETULT: {
11025	SDValue Temp =
11026	DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op1, N2: AllOneMask, N3: VL);
11027	Result = DAG.getNode(Opcode: RISCVISD::VMAND_VL, DL, VT: ContainerVT, N1: Temp, N2: Op2, N3: VL);
11028	break;
11029	}
11030	// X <s Y --> X == 1 & Y == 0 --> ~Y & X
11031	// X >u Y --> X == 1 & Y == 0 --> ~Y & X
11032	case ISD::SETLT:
11033	case ISD::SETUGT: {
11034	SDValue Temp =
11035	DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op2, N2: AllOneMask, N3: VL);
11036	Result = DAG.getNode(Opcode: RISCVISD::VMAND_VL, DL, VT: ContainerVT, N1: Op1, N2: Temp, N3: VL);
11037	break;
11038	}
11039	// X >=s Y --> X == 0 \| Y == 1 --> ~X \| Y
11040	// X <=u Y --> X == 0 \| Y == 1 --> ~X \| Y
11041	case ISD::SETGE:
11042	case ISD::SETULE: {
11043	SDValue Temp =
11044	DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op1, N2: AllOneMask, N3: VL);
11045	Result = DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Temp, N2: Op2, N3: VL);
11046	break;
11047	}
11048	// X <=s Y --> X == 1 \| Y == 0 --> ~Y \| X
11049	// X >=u Y --> X == 1 \| Y == 0 --> ~Y \| X
11050	case ISD::SETLE:
11051	case ISD::SETUGE: {
11052	SDValue Temp =
11053	DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op2, N2: AllOneMask, N3: VL);
11054	Result = DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Temp, N2: Op1, N3: VL);
11055	break;
11056	}
11057	}
11058
11059	if (!VT.isFixedLengthVector())
11060	return Result;
11061	return convertFromScalableVector(VT, V: Result, DAG, Subtarget);
11062	}
11063
11064	// Lower Floating-Point/Integer Type-Convert VP SDNodes
11065	SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
11066	SelectionDAG &DAG) const {
11067	SDLoc DL(Op);
11068
11069	SDValue Src = Op.getOperand(i: `0`);
11070	SDValue Mask = Op.getOperand(i: `1`);
11071	SDValue VL = Op.getOperand(i: `2`);
11072	unsigned RISCVISDOpc = getRISCVVLOp(Op);
11073
11074	MVT DstVT = Op.getSimpleValueType();
11075	MVT SrcVT = Src.getSimpleValueType();
11076	if (DstVT.isFixedLengthVector()) {
11077	DstVT = getContainerForFixedLengthVector(VT: DstVT);
11078	SrcVT = getContainerForFixedLengthVector(VT: SrcVT);
11079	Src = convertToScalableVector(VT: SrcVT, V: Src, DAG, Subtarget);
11080	MVT MaskVT = getMaskTypeFor(VecVT: DstVT);
11081	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
11082	}
11083
11084	unsigned DstEltSize = DstVT.getScalarSizeInBits();
11085	unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
11086
11087	SDValue Result;
11088	if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
11089	if (SrcVT.isInteger()) {
11090	assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11091
11092	unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
11093	? RISCVISD::VSEXT_VL
11094	: RISCVISD::VZEXT_VL;
11095
11096	// Do we need to do any pre-widening before converting?
11097	if (SrcEltSize == `1`) {
11098	MVT IntVT = DstVT.changeVectorElementTypeToInteger();
11099	MVT XLenVT = Subtarget.getXLenVT();
11100	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: XLenVT);
11101	SDValue ZeroSplat = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IntVT,
11102	N1: DAG.getUNDEF(VT: IntVT), N2: Zero, N3: VL);
11103	SDValue One = DAG.getConstant(
11104	Val: RISCVISDExtOpc == RISCVISD::VZEXT_VL ? `1` : -`1`, DL, VT: XLenVT);
11105	SDValue OneSplat = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IntVT,
11106	N1: DAG.getUNDEF(VT: IntVT), N2: One, N3: VL);
11107	Src = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: IntVT, N1: Src, N2: OneSplat,
11108	N3: ZeroSplat, N4: DAG.getUNDEF(VT: IntVT), N5: VL);
11109	} else if (DstEltSize > (`2` * SrcEltSize)) {
11110	// Widen before converting.
11111	MVT IntVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: DstEltSize / `2`),
11112	EC: DstVT.getVectorElementCount());
11113	Src = DAG.getNode(Opcode: RISCVISDExtOpc, DL, VT: IntVT, N1: Src, N2: Mask, N3: VL);
11114	}
11115
11116	Result = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: DstVT, N1: Src, N2: Mask, N3: VL);
11117	} else {
11118	assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11119	"Wrong input/output vector types");
11120
11121	// Convert f16 to f32 then convert f32 to i64.
11122	if (DstEltSize > (`2` * SrcEltSize)) {
11123	assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11124	MVT InterimFVT =
11125	MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11126	Src =
11127	DAG.getNode(Opcode: RISCVISD::FP_EXTEND_VL, DL, VT: InterimFVT, N1: Src, N2: Mask, N3: VL);
11128	}
11129
11130	Result = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: DstVT, N1: Src, N2: Mask, N3: VL);
11131	}
11132	} else { // Narrowing + Conversion
11133	if (SrcVT.isInteger()) {
11134	assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11135	// First do a narrowing convert to an FP type half the size, then round
11136	// the FP type to a small FP type if needed.
11137
11138	MVT InterimFVT = DstVT;
11139	if (SrcEltSize > (`2` * DstEltSize)) {
11140	assert(SrcEltSize == (`4` * DstEltSize) && "Unexpected types!");
11141	assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11142	InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11143	}
11144
11145	Result = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: InterimFVT, N1: Src, N2: Mask, N3: VL);
11146
11147	if (InterimFVT != DstVT) {
11148	Src = Result;
11149	Result = DAG.getNode(Opcode: RISCVISD::FP_ROUND_VL, DL, VT: DstVT, N1: Src, N2: Mask, N3: VL);
11150	}
11151	} else {
11152	assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11153	"Wrong input/output vector types");
11154	// First do a narrowing conversion to an integer half the size, then
11155	// truncate if needed.
11156
11157	if (DstEltSize == `1`) {
11158	// First convert to the same size integer, then convert to mask using
11159	// setcc.
11160	assert(SrcEltSize >= `16` && "Unexpected FP type!");
11161	MVT InterimIVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SrcEltSize),
11162	EC: DstVT.getVectorElementCount());
11163	Result = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: InterimIVT, N1: Src, N2: Mask, N3: VL);
11164
11165	// Compare the integer result to 0. The integer should be 0 or 1/-1,
11166	// otherwise the conversion was undefined.
11167	MVT XLenVT = Subtarget.getXLenVT();
11168	SDValue SplatZero = DAG.getConstant(Val: `0`, DL, VT: XLenVT);
11169	SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: InterimIVT,
11170	N1: DAG.getUNDEF(VT: InterimIVT), N2: SplatZero, N3: VL);
11171	Result = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: DstVT,
11172	Ops: {Result, SplatZero, DAG.getCondCode(Cond: ISD::SETNE),
11173	DAG.getUNDEF(VT: DstVT), Mask, VL});
11174	} else {
11175	MVT InterimIVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SrcEltSize / `2`),
11176	EC: DstVT.getVectorElementCount());
11177
11178	Result = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: InterimIVT, N1: Src, N2: Mask, N3: VL);
11179
11180	while (InterimIVT != DstVT) {
11181	SrcEltSize /= `2`;
11182	Src = Result;
11183	InterimIVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SrcEltSize / `2`),
11184	EC: DstVT.getVectorElementCount());
11185	Result = DAG.getNode(Opcode: RISCVISD::TRUNCATE_VECTOR_VL, DL, VT: InterimIVT,
11186	N1: Src, N2: Mask, N3: VL);
11187	}
11188	}
11189	}
11190	}
11191
11192	MVT VT = Op.getSimpleValueType();
11193	if (!VT.isFixedLengthVector())
11194	return Result;
11195	return convertFromScalableVector(VT, V: Result, DAG, Subtarget);
11196	}
11197
11198	SDValue
11199	RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
11200	SelectionDAG &DAG) const {
11201	SDLoc DL(Op);
11202
11203	SDValue Op1 = Op.getOperand(i: `0`);
11204	SDValue Op2 = Op.getOperand(i: `1`);
11205	SDValue Offset = Op.getOperand(i: `2`);
11206	SDValue Mask = Op.getOperand(i: `3`);
11207	SDValue EVL1 = Op.getOperand(i: `4`);
11208	SDValue EVL2 = Op.getOperand(i: `5`);
11209
11210	const MVT XLenVT = Subtarget.getXLenVT();
11211	MVT VT = Op.getSimpleValueType();
11212	MVT ContainerVT = VT;
11213	if (VT.isFixedLengthVector()) {
11214	ContainerVT = getContainerForFixedLengthVector(VT);
11215	Op1 = convertToScalableVector(VT: ContainerVT, V: Op1, DAG, Subtarget);
11216	Op2 = convertToScalableVector(VT: ContainerVT, V: Op2, DAG, Subtarget);
11217	MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
11218	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
11219	}
11220
11221	// EVL1 may need to be extended to XLenVT with RV64LegalI32.
11222	EVL1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: XLenVT, Operand: EVL1);
11223
11224	bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
11225	if (IsMaskVector) {
11226	ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
11227
11228	// Expand input operands
11229	SDValue SplatOneOp1 = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
11230	N1: DAG.getUNDEF(VT: ContainerVT),
11231	N2: DAG.getConstant(Val: `1`, DL, VT: XLenVT), N3: EVL1);
11232	SDValue SplatZeroOp1 = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
11233	N1: DAG.getUNDEF(VT: ContainerVT),
11234	N2: DAG.getConstant(Val: `0`, DL, VT: XLenVT), N3: EVL1);
11235	Op1 = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: Op1, N2: SplatOneOp1,
11236	N3: SplatZeroOp1, N4: DAG.getUNDEF(VT: ContainerVT), N5: EVL1);
11237
11238	SDValue SplatOneOp2 = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
11239	N1: DAG.getUNDEF(VT: ContainerVT),
11240	N2: DAG.getConstant(Val: `1`, DL, VT: XLenVT), N3: EVL2);
11241	SDValue SplatZeroOp2 = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
11242	N1: DAG.getUNDEF(VT: ContainerVT),
11243	N2: DAG.getConstant(Val: `0`, DL, VT: XLenVT), N3: EVL2);
11244	Op2 = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: Op2, N2: SplatOneOp2,
11245	N3: SplatZeroOp2, N4: DAG.getUNDEF(VT: ContainerVT), N5: EVL2);
11246	}
11247
11248	int64_t ImmValue = cast<ConstantSDNode>(Val&: Offset)->getSExtValue();
11249	SDValue DownOffset, UpOffset;
11250	if (ImmValue >= `0`) {
11251	// The operand is a TargetConstant, we need to rebuild it as a regular
11252	// constant.
11253	DownOffset = DAG.getConstant(Val: ImmValue, DL, VT: XLenVT);
11254	UpOffset = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: EVL1, N2: DownOffset);
11255	} else {
11256	// The operand is a TargetConstant, we need to rebuild it as a regular
11257	// constant rather than negating the original operand.
11258	UpOffset = DAG.getConstant(Val: -ImmValue, DL, VT: XLenVT);
11259	DownOffset = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: EVL1, N2: UpOffset);
11260	}
11261
11262	SDValue SlideDown =
11263	getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, Merge: DAG.getUNDEF(VT: ContainerVT),
11264	Op: Op1, Offset: DownOffset, Mask, VL: UpOffset);
11265	SDValue Result = getVSlideup(DAG, Subtarget, DL, VT: ContainerVT, Merge: SlideDown, Op: Op2,
11266	Offset: UpOffset, Mask, VL: EVL2, Policy: RISCVII::TAIL_AGNOSTIC);
11267
11268	if (IsMaskVector) {
11269	// Truncate Result back to a mask vector (Result has same EVL as Op2)
11270	Result = DAG.getNode(
11271	RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
11272	{Result, DAG.getConstant(`0`, DL, ContainerVT),
11273	DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
11274	Mask, EVL2});
11275	}
11276
11277	if (!VT.isFixedLengthVector())
11278	return Result;
11279	return convertFromScalableVector(VT, V: Result, DAG, Subtarget);
11280	}
11281
11282	SDValue
11283	RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
11284	SelectionDAG &DAG) const {
11285	SDLoc DL(Op);
11286	MVT VT = Op.getSimpleValueType();
11287	MVT XLenVT = Subtarget.getXLenVT();
11288
11289	SDValue Op1 = Op.getOperand(i: `0`);
11290	SDValue Mask = Op.getOperand(i: `1`);
11291	SDValue EVL = Op.getOperand(i: `2`);
11292
11293	MVT ContainerVT = VT;
11294	if (VT.isFixedLengthVector()) {
11295	ContainerVT = getContainerForFixedLengthVector(VT);
11296	Op1 = convertToScalableVector(VT: ContainerVT, V: Op1, DAG, Subtarget);
11297	MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
11298	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
11299	}
11300
11301	MVT GatherVT = ContainerVT;
11302	MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
11303	// Check if we are working with mask vectors
11304	bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
11305	if (IsMaskVector) {
11306	GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
11307
11308	// Expand input operand
11309	SDValue SplatOne = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IndicesVT,
11310	N1: DAG.getUNDEF(VT: IndicesVT),
11311	N2: DAG.getConstant(Val: `1`, DL, VT: XLenVT), N3: EVL);
11312	SDValue SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IndicesVT,
11313	N1: DAG.getUNDEF(VT: IndicesVT),
11314	N2: DAG.getConstant(Val: `0`, DL, VT: XLenVT), N3: EVL);
11315	Op1 = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: IndicesVT, N1: Op1, N2: SplatOne,
11316	N3: SplatZero, N4: DAG.getUNDEF(VT: IndicesVT), N5: EVL);
11317	}
11318
11319	unsigned EltSize = GatherVT.getScalarSizeInBits();
11320	unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
11321	unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11322	unsigned MaxVLMAX =
11323	RISCVTargetLowering::computeVLMAX(VectorBits: VectorBitsMax, EltSize, MinSize);
11324
11325	unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11326	// If this is SEW=8 and VLMAX is unknown or more than 256, we need
11327	// to use vrgatherei16.vv.
11328	// TODO: It's also possible to use vrgatherei16.vv for other types to
11329	// decrease register width for the index calculation.
11330	// NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11331	if (MaxVLMAX > `256` && EltSize == `8`) {
11332	// If this is LMUL=8, we have to split before using vrgatherei16.vv.
11333	// Split the vector in half and reverse each half using a full register
11334	// reverse.
11335	// Swap the halves and concatenate them.
11336	// Slide the concatenated result by (VLMax - VL).
11337	if (MinSize == (`8` * RISCV::RVVBitsPerBlock)) {
11338	auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: GatherVT);
11339	auto [Lo, Hi] = DAG.SplitVector(N: Op1, DL);
11340
11341	SDValue LoRev = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: LoVT, Operand: Lo);
11342	SDValue HiRev = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: HiVT, Operand: Hi);
11343
11344	// Reassemble the low and high pieces reversed.
11345	// NOTE: this Result is unmasked (because we do not need masks for
11346	// shuffles). If in the future this has to change, we can use a SELECT_VL
11347	// between Result and UNDEF using the mask originally passed to VP_REVERSE
11348	SDValue Result =
11349	DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: GatherVT, N1: HiRev, N2: LoRev);
11350
11351	// Slide off any elements from past EVL that were reversed into the low
11352	// elements.
11353	unsigned MinElts = GatherVT.getVectorMinNumElements();
11354	SDValue VLMax =
11355	DAG.getVScale(DL, VT: XLenVT, MulImm: APInt (XLenVT.getSizeInBits(), MinElts));
11356	SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: VLMax, N2: EVL);
11357
11358	Result = getVSlidedown(DAG, Subtarget, DL, VT: GatherVT,
11359	Merge: DAG.getUNDEF(VT: GatherVT), Op: Result, Offset: Diff, Mask, VL: EVL);
11360
11361	if (IsMaskVector) {
11362	// Truncate Result back to a mask vector
11363	Result =
11364	DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: ContainerVT,
11365	Ops: {Result, DAG.getConstant(Val: `0`, DL, VT: GatherVT),
11366	DAG.getCondCode(Cond: ISD::SETNE),
11367	DAG.getUNDEF(VT: getMaskTypeFor(VecVT: ContainerVT)), Mask, EVL});
11368	}
11369
11370	if (!VT.isFixedLengthVector())
11371	return Result;
11372	return convertFromScalableVector(VT, V: Result, DAG, Subtarget);
11373	}
11374
11375	// Just promote the int type to i16 which will double the LMUL.
11376	IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
11377	GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11378	}
11379
11380	SDValue VID = DAG.getNode(Opcode: RISCVISD::VID_VL, DL, VT: IndicesVT, N1: Mask, N2: EVL);
11381	SDValue VecLen =
11382	DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: EVL, N2: DAG.getConstant(Val: `1`, DL, VT: XLenVT));
11383	SDValue VecLenSplat = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IndicesVT,
11384	N1: DAG.getUNDEF(VT: IndicesVT), N2: VecLen, N3: EVL);
11385	SDValue VRSUB = DAG.getNode(Opcode: RISCVISD::SUB_VL, DL, VT: IndicesVT, N1: VecLenSplat, N2: VID,
11386	N3: DAG.getUNDEF(VT: IndicesVT), N4: Mask, N5: EVL);
11387	SDValue Result = DAG.getNode(Opcode: GatherOpc, DL, VT: GatherVT, N1: Op1, N2: VRSUB,
11388	N3: DAG.getUNDEF(VT: GatherVT), N4: Mask, N5: EVL);
11389
11390	if (IsMaskVector) {
11391	// Truncate Result back to a mask vector
11392	Result = DAG.getNode(
11393	Opcode: RISCVISD::SETCC_VL, DL, VT: ContainerVT,
11394	Ops: {Result, DAG.getConstant(Val: `0`, DL, VT: GatherVT), DAG.getCondCode(Cond: ISD::SETNE),
11395	DAG.getUNDEF(VT: getMaskTypeFor(VecVT: ContainerVT)), Mask, EVL});
11396	}
11397
11398	if (!VT.isFixedLengthVector())
11399	return Result;
11400	return convertFromScalableVector(VT, V: Result, DAG, Subtarget);
11401	}
11402
11403	SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
11404	SelectionDAG &DAG) const {
11405	MVT VT = Op.getSimpleValueType();
11406	if (VT.getVectorElementType() != MVT::i1)
11407	return lowerVPOp(Op, DAG);
11408
11409	// It is safe to drop mask parameter as masked-off elements are undef.
11410	SDValue Op1 = Op ->getOperand(Num: `0`);
11411	SDValue Op2 = Op ->getOperand(Num: `1`);
11412	SDValue VL = Op ->getOperand(Num: `3`);
11413
11414	MVT ContainerVT = VT;
11415	const bool IsFixed = VT.isFixedLengthVector();
11416	if (IsFixed) {
11417	ContainerVT = getContainerForFixedLengthVector(VT);
11418	Op1 = convertToScalableVector(VT: ContainerVT, V: Op1, DAG, Subtarget);
11419	Op2 = convertToScalableVector(VT: ContainerVT, V: Op2, DAG, Subtarget);
11420	}
11421
11422	SDLoc DL(Op);
11423	SDValue Val = DAG.getNode(Opcode: getRISCVVLOp(Op), DL, VT: ContainerVT, N1: Op1, N2: Op2, N3: VL);
11424	if (!IsFixed)
11425	return Val;
11426	return convertFromScalableVector(VT, V: Val, DAG, Subtarget);
11427	}
11428
11429	SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
11430	SelectionDAG &DAG) const {
11431	SDLoc DL(Op);
11432	MVT XLenVT = Subtarget.getXLenVT();
11433	MVT VT = Op.getSimpleValueType();
11434	MVT ContainerVT = VT;
11435	if (VT.isFixedLengthVector())
11436	ContainerVT = getContainerForFixedLengthVector(VT);
11437
11438	SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11439
11440	auto *VPNode = cast<VPStridedLoadSDNode>(Val&: Op);
11441	// Check if the mask is known to be all ones
11442	SDValue Mask = VPNode->getMask();
11443	bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode());
11444
11445	SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
11446	: Intrinsic::riscv_vlse_mask,
11447	DL, XLenVT);
11448	SmallVector<SDValue, `8`> Ops{VPNode->getChain(), IntID,
11449	DAG.getUNDEF(VT: ContainerVT), VPNode->getBasePtr(),
11450	VPNode->getStride()};
11451	if (!IsUnmasked) {
11452	if (VT.isFixedLengthVector()) {
11453	MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11454	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
11455	}
11456	Ops.push_back(Elt: Mask);
11457	}
11458	Ops.push_back(Elt: VPNode->getVectorLength());
11459	if (!IsUnmasked) {
11460	SDValue Policy = DAG.getTargetConstant(Val: RISCVII::TAIL_AGNOSTIC, DL, VT: XLenVT);
11461	Ops.push_back(Elt: Policy);
11462	}
11463
11464	SDValue Result =
11465	DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops,
11466	MemVT: VPNode->getMemoryVT(), MMO: VPNode->getMemOperand());
11467	SDValue Chain = Result.getValue(R: `1`);
11468
11469	if (VT.isFixedLengthVector())
11470	Result = convertFromScalableVector(VT, V: Result, DAG, Subtarget);
11471
11472	return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
11473	}
11474
11475	SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
11476	SelectionDAG &DAG) const {
11477	SDLoc DL(Op);
11478	MVT XLenVT = Subtarget.getXLenVT();
11479
11480	auto *VPNode = cast<VPStridedStoreSDNode>(Val&: Op);
11481	SDValue StoreVal = VPNode->getValue();
11482	MVT VT = StoreVal.getSimpleValueType();
11483	MVT ContainerVT = VT;
11484	if (VT.isFixedLengthVector()) {
11485	ContainerVT = getContainerForFixedLengthVector(VT);
11486	StoreVal = convertToScalableVector(VT: ContainerVT, V: StoreVal, DAG, Subtarget);
11487	}
11488
11489	// Check if the mask is known to be all ones
11490	SDValue Mask = VPNode->getMask();
11491	bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode());
11492
11493	SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
11494	: Intrinsic::riscv_vsse_mask,
11495	DL, XLenVT);
11496	SmallVector<SDValue, `8`> Ops{VPNode->getChain(), IntID, StoreVal,
11497	VPNode->getBasePtr(), VPNode->getStride()};
11498	if (!IsUnmasked) {
11499	if (VT.isFixedLengthVector()) {
11500	MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11501	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
11502	}
11503	Ops.push_back(Elt: Mask);
11504	}
11505	Ops.push_back(Elt: VPNode->getVectorLength());
11506
11507	return DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl: DL, VTList: VPNode->getVTList(),
11508	Ops, MemVT: VPNode->getMemoryVT(),
11509	MMO: VPNode->getMemOperand());
11510	}
11511
11512	// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
11513	// matched to a RVV indexed load. The RVV indexed load instructions only
11514	// support the "unsigned unscaled" addressing mode; indices are implicitly
11515	// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11516	// signed or scaled indexing is extended to the XLEN value type and scaled
11517	// accordingly.
11518	SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
11519	SelectionDAG &DAG) const {
11520	SDLoc DL(Op);
11521	MVT VT = Op.getSimpleValueType();
11522
11523	const auto *MemSD = cast<MemSDNode>(Val: Op.getNode());
11524	EVT MemVT = MemSD->getMemoryVT();
11525	MachineMemOperand *MMO = MemSD->getMemOperand();
11526	SDValue Chain = MemSD->getChain();
11527	SDValue BasePtr = MemSD->getBasePtr();
11528
11529	[[maybe_unused]] ISD::LoadExtType LoadExtType;
11530	SDValue Index, Mask, PassThru, VL;
11531
11532	if (auto *VPGN = dyn_cast<VPGatherSDNode>(Val: Op.getNode())) {
11533	Index = VPGN->getIndex();
11534	Mask = VPGN->getMask();
11535	PassThru = DAG.getUNDEF(VT);
11536	VL = VPGN->getVectorLength();
11537	// VP doesn't support extending loads.
11538	LoadExtType = ISD::NON_EXTLOAD;
11539	} else {
11540	// Else it must be a MGATHER.
11541	auto *MGN = cast<MaskedGatherSDNode>(Val: Op.getNode());
11542	Index = MGN->getIndex();
11543	Mask = MGN->getMask();
11544	PassThru = MGN->getPassThru();
11545	LoadExtType = MGN->getExtensionType();
11546	}
11547
11548	MVT IndexVT = Index.getSimpleValueType();
11549	MVT XLenVT = Subtarget.getXLenVT();
11550
11551	assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
11552	"Unexpected VTs!");
11553	assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11554	// Targets have to explicitly opt-in for extending vector loads.
11555	assert(LoadExtType == ISD::NON_EXTLOAD &&
11556	"Unexpected extending MGATHER/VP_GATHER");
11557
11558	// If the mask is known to be all ones, optimize to an unmasked intrinsic;
11559	// the selection of the masked intrinsics doesn't do this for us.
11560	bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode());
11561
11562	MVT ContainerVT = VT;
11563	if (VT.isFixedLengthVector()) {
11564	ContainerVT = getContainerForFixedLengthVector(VT);
11565	IndexVT = MVT::getVectorVT(VT: IndexVT.getVectorElementType(),
11566	EC: ContainerVT.getVectorElementCount());
11567
11568	Index = convertToScalableVector(VT: IndexVT, V: Index, DAG, Subtarget);
11569
11570	if (!IsUnmasked) {
11571	MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
11572	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
11573	PassThru = convertToScalableVector(VT: ContainerVT, V: PassThru, DAG, Subtarget);
11574	}
11575	}
11576
11577	if (!VL)
11578	VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second;
11579
11580	if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11581	IndexVT = IndexVT.changeVectorElementType(EltVT: XLenVT);
11582	Index = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: IndexVT, Operand: Index);
11583	}
11584
11585	unsigned IntID =
11586	IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
11587	SmallVector<SDValue, `8`> Ops{Chain, DAG.getTargetConstant(Val: IntID, DL, VT: XLenVT)};
11588	if (IsUnmasked)
11589	Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT));
11590	else
11591	Ops.push_back(Elt: PassThru);
11592	Ops.push_back(Elt: BasePtr);
11593	Ops.push_back(Elt: Index);
11594	if (!IsUnmasked)
11595	Ops.push_back(Elt: Mask);
11596	Ops.push_back(Elt: VL);
11597	if (!IsUnmasked)
11598	Ops.push_back(Elt: DAG.getTargetConstant(Val: RISCVII::TAIL_AGNOSTIC, DL, VT: XLenVT));
11599
11600	SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11601	SDValue Result =
11602	DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops, MemVT, MMO);
11603	Chain = Result.getValue(R: `1`);
11604
11605	if (VT.isFixedLengthVector())
11606	Result = convertFromScalableVector(VT, V: Result, DAG, Subtarget);
11607
11608	return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
11609	}
11610
11611	// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
11612	// matched to a RVV indexed store. The RVV indexed store instructions only
11613	// support the "unsigned unscaled" addressing mode; indices are implicitly
11614	// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11615	// signed or scaled indexing is extended to the XLEN value type and scaled
11616	// accordingly.
11617	SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
11618	SelectionDAG &DAG) const {
11619	SDLoc DL(Op);
11620	const auto *MemSD = cast<MemSDNode>(Val: Op.getNode());
11621	EVT MemVT = MemSD->getMemoryVT();
11622	MachineMemOperand *MMO = MemSD->getMemOperand();
11623	SDValue Chain = MemSD->getChain();
11624	SDValue BasePtr = MemSD->getBasePtr();
11625
11626	[[maybe_unused]] bool IsTruncatingStore = false;
11627	SDValue Index, Mask, Val, VL;
11628
11629	if (auto *VPSN = dyn_cast<VPScatterSDNode>(Val: Op.getNode())) {
11630	Index = VPSN->getIndex();
11631	Mask = VPSN->getMask();
11632	Val = VPSN->getValue();
11633	VL = VPSN->getVectorLength();
11634	// VP doesn't support truncating stores.
11635	IsTruncatingStore = false;
11636	} else {
11637	// Else it must be a MSCATTER.
11638	auto *MSN = cast<MaskedScatterSDNode>(Val: Op.getNode());
11639	Index = MSN->getIndex();
11640	Mask = MSN->getMask();
11641	Val = MSN->getValue();
11642	IsTruncatingStore = MSN->isTruncatingStore();
11643	}
11644
11645	MVT VT = Val.getSimpleValueType();
11646	MVT IndexVT = Index.getSimpleValueType();
11647	MVT XLenVT = Subtarget.getXLenVT();
11648
11649	assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
11650	"Unexpected VTs!");
11651	assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11652	// Targets have to explicitly opt-in for extending vector loads and
11653	// truncating vector stores.
11654	assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
11655
11656	// If the mask is known to be all ones, optimize to an unmasked intrinsic;
11657	// the selection of the masked intrinsics doesn't do this for us.
11658	bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode());
11659
11660	MVT ContainerVT = VT;
11661	if (VT.isFixedLengthVector()) {
11662	ContainerVT = getContainerForFixedLengthVector(VT);
11663	IndexVT = MVT::getVectorVT(VT: IndexVT.getVectorElementType(),
11664	EC: ContainerVT.getVectorElementCount());
11665
11666	Index = convertToScalableVector(VT: IndexVT, V: Index, DAG, Subtarget);
11667	Val = convertToScalableVector(VT: ContainerVT, V: Val, DAG, Subtarget);
11668
11669	if (!IsUnmasked) {
11670	MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
11671	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
11672	}
11673	}
11674
11675	if (!VL)
11676	VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second;
11677
11678	if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11679	IndexVT = IndexVT.changeVectorElementType(EltVT: XLenVT);
11680	Index = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: IndexVT, Operand: Index);
11681	}
11682
11683	unsigned IntID =
11684	IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
11685	SmallVector<SDValue, `8`> Ops{Chain, DAG.getTargetConstant(Val: IntID, DL, VT: XLenVT)};
11686	Ops.push_back(Elt: Val);
11687	Ops.push_back(Elt: BasePtr);
11688	Ops.push_back(Elt: Index);
11689	if (!IsUnmasked)
11690	Ops.push_back(Elt: Mask);
11691	Ops.push_back(Elt: VL);
11692
11693	return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
11694	DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11695	}
11696
11697	SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
11698	SelectionDAG &DAG) const {
11699	const MVT XLenVT = Subtarget.getXLenVT();
11700	SDLoc DL(Op);
11701	SDValue Chain = Op ->getOperand(Num: `0`);
11702	SDValue SysRegNo = DAG.getTargetConstant(
11703	RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11704	SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
11705	SDValue RM = DAG.getNode(Opcode: RISCVISD::READ_CSR, DL, VTList: VTs, N1: Chain, N2: SysRegNo);
11706
11707	// Encoding used for rounding mode in RISC-V differs from that used in
11708	// FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
11709	// table, which consists of a sequence of 4-bit fields, each representing
11710	// corresponding FLT_ROUNDS mode.
11711	static const int Table =
11712	(int(RoundingMode::NearestTiesToEven) << `4` * RISCVFPRndMode::RNE) \|
11713	(int(RoundingMode::TowardZero) << `4` * RISCVFPRndMode::RTZ) \|
11714	(int(RoundingMode::TowardNegative) << `4` * RISCVFPRndMode::RDN) \|
11715	(int(RoundingMode::TowardPositive) << `4` * RISCVFPRndMode::RUP) \|
11716	(int(RoundingMode::NearestTiesToAway) << `4` * RISCVFPRndMode::RMM);
11717
11718	SDValue Shift =
11719	DAG.getNode(Opcode: ISD::SHL, DL, VT: XLenVT, N1: RM, N2: DAG.getConstant(Val: `2`, DL, VT: XLenVT));
11720	SDValue Shifted = DAG.getNode(Opcode: ISD::SRL, DL, VT: XLenVT,
11721	N1: DAG.getConstant(Val: Table, DL, VT: XLenVT), N2: Shift);
11722	SDValue Masked = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: Shifted,
11723	N2: DAG.getConstant(Val: `7`, DL, VT: XLenVT));
11724
11725	return DAG.getMergeValues(Ops: {Masked, Chain}, dl: DL);
11726	}
11727
11728	SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
11729	SelectionDAG &DAG) const {
11730	const MVT XLenVT = Subtarget.getXLenVT();
11731	SDLoc DL(Op);
11732	SDValue Chain = Op ->getOperand(Num: `0`);
11733	SDValue RMValue = Op ->getOperand(Num: `1`);
11734	SDValue SysRegNo = DAG.getTargetConstant(
11735	RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11736
11737	// Encoding used for rounding mode in RISC-V differs from that used in
11738	// FLT_ROUNDS. To convert it the C rounding mode is used as an index in
11739	// a table, which consists of a sequence of 4-bit fields, each representing
11740	// corresponding RISC-V mode.
11741	static const unsigned Table =
11742	(RISCVFPRndMode::RNE << `4` * int(RoundingMode::NearestTiesToEven)) \|
11743	(RISCVFPRndMode::RTZ << `4` * int(RoundingMode::TowardZero)) \|
11744	(RISCVFPRndMode::RDN << `4` * int(RoundingMode::TowardNegative)) \|
11745	(RISCVFPRndMode::RUP << `4` * int(RoundingMode::TowardPositive)) \|
11746	(RISCVFPRndMode::RMM << `4` * int(RoundingMode::NearestTiesToAway));
11747
11748	RMValue = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: XLenVT, Operand: RMValue);
11749
11750	SDValue Shift = DAG.getNode(Opcode: ISD::SHL, DL, VT: XLenVT, N1: RMValue,
11751	N2: DAG.getConstant(Val: `2`, DL, VT: XLenVT));
11752	SDValue Shifted = DAG.getNode(Opcode: ISD::SRL, DL, VT: XLenVT,
11753	N1: DAG.getConstant(Val: Table, DL, VT: XLenVT), N2: Shift);
11754	RMValue = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: Shifted,
11755	N2: DAG.getConstant(Val: `0x7`, DL, VT: XLenVT));
11756	return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
11757	RMValue);
11758	}
11759
11760	SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
11761	SelectionDAG &DAG) const {
11762	MachineFunction &MF = DAG.getMachineFunction();
11763
11764	bool isRISCV64 = Subtarget.is64Bit();
11765	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
11766
11767	int FI = MF.getFrameInfo().CreateFixedObject(Size: isRISCV64 ? `8` : `4`, SPOffset: `0`, IsImmutable: false);
11768	return DAG.getFrameIndex(FI, VT: PtrVT);
11769	}
11770
11771	// Returns the opcode of the target-specific SDNode that implements the 32-bit
11772	// form of the given Opcode.
11773	static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
11774	switch (Opcode) {
11775	default:
11776	llvm_unreachable("Unexpected opcode");
11777	case ISD::SHL:
11778	return RISCVISD::SLLW;
11779	case ISD::SRA:
11780	return RISCVISD::SRAW;
11781	case ISD::SRL:
11782	return RISCVISD::SRLW;
11783	case ISD::SDIV:
11784	return RISCVISD::DIVW;
11785	case ISD::UDIV:
11786	return RISCVISD::DIVUW;
11787	case ISD::UREM:
11788	return RISCVISD::REMUW;
11789	case ISD::ROTL:
11790	return RISCVISD::ROLW;
11791	case ISD::ROTR:
11792	return RISCVISD::RORW;
11793	}
11794	}
11795
11796	// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
11797	// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
11798	// otherwise be promoted to i64, making it difficult to select the
11799	// SLLW/DIVUW/.../W later one because the fact the operation was originally of*
11800	// type i8/i16/i32 is lost.
11801	static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
11802	unsigned ExtOpc = ISD::ANY_EXTEND) {
11803	SDLoc DL(N);
11804	RISCVISD::NodeType WOpcode = getRISCVWOpcode(Opcode: N->getOpcode());
11805	SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(`0`));
11806	SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(`1`));
11807	SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
11808	// ReplaceNodeResults requires we maintain the same type for the return value.
11809	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: `0`), Operand: NewRes);
11810	}
11811
11812	// Converts the given 32-bit operation to a i64 operation with signed extension
11813	// semantic to reduce the signed extension instructions.
11814	static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
11815	SDLoc DL(N);
11816	SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(`0`));
11817	SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(`1`));
11818	SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
11819	SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
11820	DAG.getValueType(MVT::i32));
11821	return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
11822	}
11823
11824	void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
11825	SmallVectorImpl<SDValue> &Results,
11826	SelectionDAG &DAG) const {
11827	SDLoc DL(N);
11828	switch (N->getOpcode()) {
11829	default:
11830	llvm_unreachable("Don't know how to custom type legalize this operation!");
11831	case ISD::STRICT_FP_TO_SINT:
11832	case ISD::STRICT_FP_TO_UINT:
11833	case ISD::FP_TO_SINT:
11834	case ISD::FP_TO_UINT: {
11835	assert(N->getValueType(`0`) == MVT::i32 && Subtarget.is64Bit() &&
11836	"Unexpected custom legalisation");
11837	bool IsStrict = N->isStrictFPOpcode();
11838	bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT \|\|
11839	N->getOpcode() == ISD::STRICT_FP_TO_SINT;
11840	SDValue Op0 = IsStrict ? N->getOperand(Num: `1`) : N->getOperand(Num: `0`);
11841	if (getTypeAction(Context&: *DAG.getContext(), VT: Op0.getValueType()) !=
11842	TargetLowering::TypeSoftenFloat) {
11843	if (!isTypeLegal(VT: Op0.getValueType()))
11844	return;
11845	if (IsStrict) {
11846	SDValue Chain = N->getOperand(Num: `0`);
11847	// In absense of Zfh, promote f16 to f32, then convert.
11848	if (Op0.getValueType() == MVT::f16 &&
11849	!Subtarget.hasStdExtZfhOrZhinx()) {
11850	Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
11851	{Chain, Op0});
11852	Chain = Op0.getValue(R: `1`);
11853	}
11854	unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
11855	: RISCVISD::STRICT_FCVT_WU_RV64;
11856	SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
11857	SDValue Res = DAG.getNode(
11858	Opc, DL, VTs, Chain, Op0,
11859	DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11860	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11861	Results.push_back(Elt: Res.getValue(R: `1`));
11862	return;
11863	}
11864	// For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
11865	// convert.
11866	if ((Op0.getValueType() == MVT::f16 &&
11867	!Subtarget.hasStdExtZfhOrZhinx()) \|\|
11868	Op0.getValueType() == MVT::bf16)
11869	Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
11870
11871	unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
11872	SDValue Res =
11873	DAG.getNode(Opc, DL, MVT::i64, Op0,
11874	DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11875	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11876	return;
11877	}
11878	// If the FP type needs to be softened, emit a library call using the 'si'
11879	// version. If we left it to default legalization we'd end up with 'di'. If
11880	// the FP type doesn't need to be softened just let generic type
11881	// legalization promote the result type.
11882	RTLIB::Libcall LC;
11883	if (IsSigned)
11884	LC = RTLIB::getFPTOSINT(OpVT: Op0.getValueType(), RetVT: N->getValueType(ResNo: `0`));
11885	else
11886	LC = RTLIB::getFPTOUINT(OpVT: Op0.getValueType(), RetVT: N->getValueType(ResNo: `0`));
11887	MakeLibCallOptions CallOptions;
11888	EVT OpVT = Op0.getValueType();
11889	CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: N->getValueType(ResNo: `0`), Value: true);
11890	SDValue Chain = IsStrict ? N->getOperand(Num: `0`) : SDValue ();
11891	SDValue Result;
11892	std::tie(args&: Result, args&: Chain) =
11893	makeLibCall(DAG, LC, RetVT: N->getValueType(ResNo: `0`), Ops: Op0, CallOptions, dl: DL, Chain);
11894	Results.push_back(Elt: Result);
11895	if (IsStrict)
11896	Results.push_back(Elt: Chain);
11897	break;
11898	}
11899	case ISD::LROUND: {
11900	SDValue Op0 = N->getOperand(Num: `0`);
11901	EVT Op0VT = Op0.getValueType();
11902	if (getTypeAction(Context&: *DAG.getContext(), VT: Op0.getValueType()) !=
11903	TargetLowering::TypeSoftenFloat) {
11904	if (!isTypeLegal(VT: Op0VT))
11905	return;
11906
11907	// In absense of Zfh, promote f16 to f32, then convert.
11908	if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
11909	Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
11910
11911	SDValue Res =
11912	DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
11913	DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
11914	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11915	return;
11916	}
11917	// If the FP type needs to be softened, emit a library call to lround. We'll
11918	// need to truncate the result. We assume any value that doesn't fit in i32
11919	// is allowed to return an unspecified value.
11920	RTLIB::Libcall LC =
11921	Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
11922	MakeLibCallOptions CallOptions;
11923	EVT OpVT = Op0.getValueType();
11924	CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
11925	SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
11926	Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
11927	Results.push_back(Elt: Result);
11928	break;
11929	}
11930	case ISD::READCYCLECOUNTER:
11931	case ISD::READSTEADYCOUNTER: {
11932	assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
11933	"has custom type legalization on riscv32");
11934
11935	SDValue LoCounter, HiCounter;
11936	MVT XLenVT = Subtarget.getXLenVT();
11937	if (N->getOpcode() == ISD::READCYCLECOUNTER) {
11938	LoCounter = DAG.getTargetConstant(
11939	RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding, DL, XLenVT);
11940	HiCounter = DAG.getTargetConstant(
11941	RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding, DL, XLenVT);
11942	} else {
11943	LoCounter = DAG.getTargetConstant(
11944	RISCVSysReg::lookupSysRegByName("TIME")->Encoding, DL, XLenVT);
11945	HiCounter = DAG.getTargetConstant(
11946	RISCVSysReg::lookupSysRegByName("TIMEH")->Encoding, DL, XLenVT);
11947	}
11948	SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11949	SDValue RCW = DAG.getNode(Opcode: RISCVISD::READ_COUNTER_WIDE, DL, VTList: VTs,
11950	N1: N->getOperand(Num: `0`), N2: LoCounter, N3: HiCounter);
11951
11952	Results.push_back(
11953	DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(`1`)));
11954	Results.push_back(Elt: RCW.getValue(R: `2`));
11955	break;
11956	}
11957	case ISD::LOAD: {
11958	if (!ISD::isNON_EXTLoad(N))
11959	return;
11960
11961	// Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
11962	// sext_inreg we emit for ADD/SUB/MUL/SLLI.
11963	LoadSDNode *Ld = cast<LoadSDNode>(Val: N);
11964
11965	SDLoc dl(N);
11966	SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
11967	Ld->getBasePtr(), Ld->getMemoryVT(),
11968	Ld->getMemOperand());
11969	Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
11970	Results.push_back(Elt: Res.getValue(R: `1`));
11971	return;
11972	}
11973	case ISD::MUL: {
11974	unsigned Size = N->getSimpleValueType(ResNo: `0`).getSizeInBits();
11975	unsigned XLen = Subtarget.getXLen();
11976	// This multiply needs to be expanded, try to use MULHSU+MUL if possible.
11977	if (Size > XLen) {
11978	assert(Size == (XLen * `2`) && "Unexpected custom legalisation");
11979	SDValue LHS = N->getOperand(Num: `0`);
11980	SDValue RHS = N->getOperand(Num: `1`);
11981	APInt HighMask = APInt::getHighBitsSet(numBits: Size, hiBitsSet: XLen);
11982
11983	bool LHSIsU = DAG.MaskedValueIsZero(Op: LHS, Mask: HighMask);
11984	bool RHSIsU = DAG.MaskedValueIsZero(Op: RHS, Mask: HighMask);
11985	// We need exactly one side to be unsigned.
11986	if (LHSIsU == RHSIsU)
11987	return;
11988
11989	auto MakeMULPair = [&](SDValue S, SDValue U) {
11990	MVT XLenVT = Subtarget.getXLenVT();
11991	S = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: XLenVT, Operand: S);
11992	U = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: XLenVT, Operand: U);
11993	SDValue Lo = DAG.getNode(Opcode: ISD::MUL, DL, VT: XLenVT, N1: S, N2: U);
11994	SDValue Hi = DAG.getNode(Opcode: RISCVISD::MULHSU, DL, VT: XLenVT, N1: S, N2: U);
11995	return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: N->getValueType(ResNo: `0`), N1: Lo, N2: Hi);
11996	};
11997
11998	bool LHSIsS = DAG.ComputeNumSignBits(Op: LHS) > XLen;
11999	bool RHSIsS = DAG.ComputeNumSignBits(Op: RHS) > XLen;
12000
12001	// The other operand should be signed, but still prefer MULH when
12002	// possible.
12003	if (RHSIsU && LHSIsS && !RHSIsS)
12004	Results.push_back(Elt: MakeMULPair (LHS, RHS));
12005	else if (LHSIsU && RHSIsS && !LHSIsS)
12006	Results.push_back(Elt: MakeMULPair (RHS, LHS));
12007
12008	return;
12009	}
12010	[[fallthrough]];
12011	}
12012	case ISD::ADD:
12013	case ISD::SUB:
12014	assert(N->getValueType(`0`) == MVT::i32 && Subtarget.is64Bit() &&
12015	"Unexpected custom legalisation");
12016	Results.push_back(Elt: customLegalizeToWOpWithSExt(N, DAG));
12017	break;
12018	case ISD::SHL:
12019	case ISD::SRA:
12020	case ISD::SRL:
12021	assert(N->getValueType(`0`) == MVT::i32 && Subtarget.is64Bit() &&
12022	"Unexpected custom legalisation");
12023	if (N->getOperand(Num: `1`).getOpcode() != ISD::Constant) {
12024	// If we can use a BSET instruction, allow default promotion to apply.
12025	if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
12026	isOneConstant(V: N->getOperand(Num: `0`)))
12027	break;
12028	Results.push_back(Elt: customLegalizeToWOp(N, DAG));
12029	break;
12030	}
12031
12032	// Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
12033	// similar to customLegalizeToWOpWithSExt, but we must zero_extend the
12034	// shift amount.
12035	if (N->getOpcode() == ISD::SHL) {
12036	SDLoc DL(N);
12037	SDValue NewOp0 =
12038	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(`0`));
12039	SDValue NewOp1 =
12040	DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(`1`));
12041	SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
12042	SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12043	DAG.getValueType(MVT::i32));
12044	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12045	}
12046
12047	break;
12048	case ISD::ROTL:
12049	case ISD::ROTR:
12050	assert(N->getValueType(`0`) == MVT::i32 && Subtarget.is64Bit() &&
12051	"Unexpected custom legalisation");
12052	assert((Subtarget.hasStdExtZbb() \|\| Subtarget.hasStdExtZbkb() \|\|
12053	Subtarget.hasVendorXTHeadBb()) &&
12054	"Unexpected custom legalization");
12055	if (!isa<ConstantSDNode>(Val: N->getOperand(Num: `1`)) &&
12056	!(Subtarget.hasStdExtZbb() \|\| Subtarget.hasStdExtZbkb()))
12057	return;
12058	Results.push_back(Elt: customLegalizeToWOp(N, DAG));
12059	break;
12060	case ISD::CTTZ:
12061	case ISD::CTTZ_ZERO_UNDEF:
12062	case ISD::CTLZ:
12063	case ISD::CTLZ_ZERO_UNDEF: {
12064	assert(N->getValueType(`0`) == MVT::i32 && Subtarget.is64Bit() &&
12065	"Unexpected custom legalisation");
12066
12067	SDValue NewOp0 =
12068	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(`0`));
12069	bool IsCTZ =
12070	N->getOpcode() == ISD::CTTZ \|\| N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
12071	unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
12072	SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
12073	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12074	return;
12075	}
12076	case ISD::SDIV:
12077	case ISD::UDIV:
12078	case ISD::UREM: {
12079	MVT VT = N->getSimpleValueType(ResNo: `0`);
12080	assert((VT == MVT::i8 \|\| VT == MVT::i16 \|\| VT == MVT::i32) &&
12081	Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
12082	"Unexpected custom legalisation");
12083	// Don't promote division/remainder by constant since we should expand those
12084	// to multiply by magic constant.
12085	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
12086	if (N->getOperand(Num: `1`).getOpcode() == ISD::Constant &&
12087	!isIntDivCheap(VT: N->getValueType(ResNo: `0`), Attr))
12088	return;
12089
12090	// If the input is i32, use ANY_EXTEND since the W instructions don't read
12091	// the upper 32 bits. For other types we need to sign or zero extend
12092	// based on the opcode.
12093	unsigned ExtOpc = ISD::ANY_EXTEND;
12094	if (VT != MVT::i32)
12095	ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
12096	: ISD::ZERO_EXTEND;
12097
12098	Results.push_back(Elt: customLegalizeToWOp(N, DAG, ExtOpc));
12099	break;
12100	}
12101	case ISD::SADDO: {
12102	assert(N->getValueType(`0`) == MVT::i32 && Subtarget.is64Bit() &&
12103	"Unexpected custom legalisation");
12104
12105	// If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
12106	// use the default legalization.
12107	if (!isa<ConstantSDNode>(Val: N->getOperand(Num: `1`)))
12108	return;
12109
12110	SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(`0`));
12111	SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(`1`));
12112	SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
12113	Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12114	DAG.getValueType(MVT::i32));
12115
12116	SDValue Zero = DAG.getConstant(`0`, DL, MVT::i64);
12117
12118	// For an addition, the result should be less than one of the operands (LHS)
12119	// if and only if the other operand (RHS) is negative, otherwise there will
12120	// be overflow.
12121	// For a subtraction, the result should be less than one of the operands
12122	// (LHS) if and only if the other operand (RHS) is (non-zero) positive,
12123	// otherwise there will be overflow.
12124	EVT OType = N->getValueType(ResNo: `1`);
12125	SDValue ResultLowerThanLHS = DAG.getSetCC(DL, VT: OType, LHS: Res, RHS: LHS, Cond: ISD::SETLT);
12126	SDValue ConditionRHS = DAG.getSetCC(DL, VT: OType, LHS: RHS, RHS: Zero, Cond: ISD::SETLT);
12127
12128	SDValue Overflow =
12129	DAG.getNode(Opcode: ISD::XOR, DL, VT: OType, N1: ConditionRHS, N2: ResultLowerThanLHS);
12130	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12131	Results.push_back(Elt: Overflow);
12132	return;
12133	}
12134	case ISD::UADDO:
12135	case ISD::USUBO: {
12136	assert(N->getValueType(`0`) == MVT::i32 && Subtarget.is64Bit() &&
12137	"Unexpected custom legalisation");
12138	bool IsAdd = N->getOpcode() == ISD::UADDO;
12139	// Create an ADDW or SUBW.
12140	SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(`0`));
12141	SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(`1`));
12142	SDValue Res =
12143	DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
12144	Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12145	DAG.getValueType(MVT::i32));
12146
12147	SDValue Overflow;
12148	if (IsAdd && isOneConstant(V: RHS)) {
12149	// Special case uaddo X, 1 overflowed if the addition result is 0.
12150	// The general case (X + C) < C is not necessarily beneficial. Although we
12151	// reduce the live range of X, we may introduce the materialization of
12152	// constant C, especially when the setcc result is used by branch. We have
12153	// no compare with constant and branch instructions.
12154	Overflow = DAG.getSetCC(DL, N->getValueType(`1`), Res,
12155	DAG.getConstant(`0`, DL, MVT::i64), ISD::SETEQ);
12156	} else if (IsAdd && isAllOnesConstant(V: RHS)) {
12157	// Special case uaddo X, -1 overflowed if X != 0.
12158	Overflow = DAG.getSetCC(DL, N->getValueType(`1`), N->getOperand(`0`),
12159	DAG.getConstant(`0`, DL, MVT::i32), ISD::SETNE);
12160	} else {
12161	// Sign extend the LHS and perform an unsigned compare with the ADDW
12162	// result. Since the inputs are sign extended from i32, this is equivalent
12163	// to comparing the lower 32 bits.
12164	LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(`0`));
12165	Overflow = DAG.getSetCC(DL, VT: N->getValueType(ResNo: `1`), LHS: Res, RHS: LHS,
12166	Cond: IsAdd ? ISD::SETULT : ISD::SETUGT);
12167	}
12168
12169	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12170	Results.push_back(Elt: Overflow);
12171	return;
12172	}
12173	case ISD::UADDSAT:
12174	case ISD::USUBSAT: {
12175	assert(N->getValueType(`0`) == MVT::i32 && Subtarget.is64Bit() &&
12176	"Unexpected custom legalisation");
12177	if (Subtarget.hasStdExtZbb()) {
12178	// With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
12179	// sign extend allows overflow of the lower 32 bits to be detected on
12180	// the promoted size.
12181	SDValue LHS =
12182	DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(`0`));
12183	SDValue RHS =
12184	DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(`1`));
12185	SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
12186	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12187	return;
12188	}
12189
12190	// Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
12191	// promotion for UADDO/USUBO.
12192	Results.push_back(Elt: expandAddSubSat(Node: N, DAG));
12193	return;
12194	}
12195	case ISD::SADDSAT:
12196	case ISD::SSUBSAT: {
12197	assert(N->getValueType(`0`) == MVT::i32 && Subtarget.is64Bit() &&
12198	"Unexpected custom legalisation");
12199	Results.push_back(Elt: expandAddSubSat(Node: N, DAG));
12200	return;
12201	}
12202	case ISD::ABS: {
12203	assert(N->getValueType(`0`) == MVT::i32 && Subtarget.is64Bit() &&
12204	"Unexpected custom legalisation");
12205
12206	if (Subtarget.hasStdExtZbb()) {
12207	// Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
12208	// This allows us to remember that the result is sign extended. Expanding
12209	// to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
12210	SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
12211	N->getOperand(`0`));
12212	SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
12213	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
12214	return;
12215	}
12216
12217	// Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
12218	SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(`0`));
12219
12220	// Freeze the source so we can increase it's use count.
12221	Src = DAG.getFreeze(V: Src);
12222
12223	// Copy sign bit to all bits using the sraiw pattern.
12224	SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
12225	DAG.getValueType(MVT::i32));
12226	SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
12227	DAG.getConstant(`31`, DL, MVT::i64));
12228
12229	SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
12230	NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
12231
12232	// NOTE: The result is only required to be anyextended, but sext is
12233	// consistent with type legalization of sub.
12234	NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
12235	DAG.getValueType(MVT::i32));
12236	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12237	return;
12238	}
12239	case ISD::BITCAST: {
12240	EVT VT = N->getValueType(ResNo: `0`);
12241	assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
12242	SDValue Op0 = N->getOperand(Num: `0`);
12243	EVT Op0VT = Op0.getValueType();
12244	MVT XLenVT = Subtarget.getXLenVT();
12245	if (VT == MVT::i16 && Op0VT == MVT::f16 &&
12246	Subtarget.hasStdExtZfhminOrZhinxmin()) {
12247	SDValue FPConv = DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTH, DL, VT: XLenVT, Operand: Op0);
12248	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12249	} else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&
12250	Subtarget.hasStdExtZfbfmin()) {
12251	SDValue FPConv = DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTH, DL, VT: XLenVT, Operand: Op0);
12252	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12253	} else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
12254	Subtarget.hasStdExtFOrZfinx()) {
12255	SDValue FPConv =
12256	DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
12257	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
12258	} else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32) {
12259	SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
12260	DAG.getVTList(MVT::i32, MVT::i32), Op0);
12261	SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
12262	NewReg.getValue(`0`), NewReg.getValue(`1`));
12263	Results.push_back(Elt: RetReg);
12264	} else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
12265	isTypeLegal(VT: Op0VT)) {
12266	// Custom-legalize bitcasts from fixed-length vector types to illegal
12267	// scalar types in order to improve codegen. Bitcast the vector to a
12268	// one-element vector type whose element type is the same as the result
12269	// type, and extract the first element.
12270	EVT BVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT, NumElements: `1`);
12271	if (isTypeLegal(VT: BVT)) {
12272	SDValue BVec = DAG.getBitcast(VT: BVT, V: Op0);
12273	Results.push_back(Elt: DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT, N1: BVec,
12274	N2: DAG.getVectorIdxConstant(Val: `0`, DL)));
12275	}
12276	}
12277	break;
12278	}
12279	case RISCVISD::BREV8: {
12280	MVT VT = N->getSimpleValueType(ResNo: `0`);
12281	MVT XLenVT = Subtarget.getXLenVT();
12282	assert((VT == MVT::i16 \|\| (VT == MVT::i32 && Subtarget.is64Bit())) &&
12283	"Unexpected custom legalisation");
12284	assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
12285	SDValue NewOp = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: N->getOperand(Num: `0`));
12286	SDValue NewRes = DAG.getNode(Opcode: N->getOpcode(), DL, VT: XLenVT, Operand: NewOp);
12287	// ReplaceNodeResults requires we maintain the same type for the return
12288	// value.
12289	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: NewRes));
12290	break;
12291	}
12292	case ISD::EXTRACT_VECTOR_ELT: {
12293	// Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
12294	// type is illegal (currently only vXi64 RV32).
12295	// With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
12296	// transferred to the destination register. We issue two of these from the
12297	// upper- and lower- halves of the SEW-bit vector element, slid down to the
12298	// first element.
12299	SDValue Vec = N->getOperand(Num: `0`);
12300	SDValue Idx = N->getOperand(Num: `1`);
12301
12302	// The vector type hasn't been legalized yet so we can't issue target
12303	// specific nodes if it needs legalization.
12304	// FIXME: We would manually legalize if it's important.
12305	if (!isTypeLegal(VT: Vec.getValueType()))
12306	return;
12307
12308	MVT VecVT = Vec.getSimpleValueType();
12309
12310	assert(!Subtarget.is64Bit() && N->getValueType(`0`) == MVT::i64 &&
12311	VecVT.getVectorElementType() == MVT::i64 &&
12312	"Unexpected EXTRACT_VECTOR_ELT legalization");
12313
12314	// If this is a fixed vector, we need to convert it to a scalable vector.
12315	MVT ContainerVT = VecVT;
12316	if (VecVT.isFixedLengthVector()) {
12317	ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
12318	Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
12319	}
12320
12321	MVT XLenVT = Subtarget.getXLenVT();
12322
12323	// Use a VL of 1 to avoid processing more elements than we need.
12324	auto [Mask, VL] = getDefaultVLOps(NumElts: `1`, ContainerVT, DL, DAG, Subtarget);
12325
12326	// Unless the index is known to be 0, we must slide the vector down to get
12327	// the desired element into index 0.
12328	if (!isNullConstant(V: Idx)) {
12329	Vec = getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT,
12330	Merge: DAG.getUNDEF(VT: ContainerVT), Op: Vec, Offset: Idx, Mask, VL);
12331	}
12332
12333	// Extract the lower XLEN bits of the correct vector element.
12334	SDValue EltLo = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: Vec);
12335
12336	// To extract the upper XLEN bits of the vector element, shift the first
12337	// element right by 32 bits and re-extract the lower XLEN bits.
12338	SDValue ThirtyTwoV = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
12339	N1: DAG.getUNDEF(VT: ContainerVT),
12340	N2: DAG.getConstant(Val: `32`, DL, VT: XLenVT), N3: VL);
12341	SDValue LShr32 =
12342	DAG.getNode(Opcode: RISCVISD::SRL_VL, DL, VT: ContainerVT, N1: Vec, N2: ThirtyTwoV,
12343	N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL);
12344
12345	SDValue EltHi = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: LShr32);
12346
12347	Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12348	break;
12349	}
12350	case ISD::INTRINSIC_WO_CHAIN: {
12351	unsigned IntNo = N->getConstantOperandVal(Num: `0`);
12352	switch (IntNo) {
12353	default:
12354	llvm_unreachable(
12355	"Don't know how to custom type legalize this intrinsic!");
12356	case Intrinsic::experimental_get_vector_length: {
12357	SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
12358	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12359	return;
12360	}
12361	case Intrinsic::experimental_cttz_elts: {
12362	SDValue Res = lowerCttzElts(N, DAG, Subtarget);
12363	Results.push_back(
12364	Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: `0`), Operand: Res));
12365	return;
12366	}
12367	case Intrinsic::riscv_orc_b:
12368	case Intrinsic::riscv_brev8:
12369	case Intrinsic::riscv_sha256sig0:
12370	case Intrinsic::riscv_sha256sig1:
12371	case Intrinsic::riscv_sha256sum0:
12372	case Intrinsic::riscv_sha256sum1:
12373	case Intrinsic::riscv_sm3p0:
12374	case Intrinsic::riscv_sm3p1: {
12375	if (!Subtarget.is64Bit() \|\| N->getValueType(`0`) != MVT::i32)
12376	return;
12377	unsigned Opc;
12378	switch (IntNo) {
12379	case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
12380	case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
12381	case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
12382	case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
12383	case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
12384	case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
12385	case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
12386	case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
12387	}
12388
12389	SDValue NewOp =
12390	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(`1`));
12391	SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
12392	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12393	return;
12394	}
12395	case Intrinsic::riscv_sm4ks:
12396	case Intrinsic::riscv_sm4ed: {
12397	unsigned Opc =
12398	IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
12399	SDValue NewOp0 =
12400	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(`1`));
12401	SDValue NewOp1 =
12402	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(`2`));
12403	SDValue Res =
12404	DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(`3`));
12405	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12406	return;
12407	}
12408	case Intrinsic::riscv_mopr: {
12409	if (!Subtarget.is64Bit() \|\| N->getValueType(`0`) != MVT::i32)
12410	return;
12411	SDValue NewOp =
12412	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(`1`));
12413	SDValue Res = DAG.getNode(
12414	RISCVISD::MOPR, DL, MVT::i64, NewOp,
12415	DAG.getTargetConstant(N->getConstantOperandVal(`2`), DL, MVT::i64));
12416	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12417	return;
12418	}
12419	case Intrinsic::riscv_moprr: {
12420	if (!Subtarget.is64Bit() \|\| N->getValueType(`0`) != MVT::i32)
12421	return;
12422	SDValue NewOp0 =
12423	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(`1`));
12424	SDValue NewOp1 =
12425	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(`2`));
12426	SDValue Res = DAG.getNode(
12427	RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
12428	DAG.getTargetConstant(N->getConstantOperandVal(`3`), DL, MVT::i64));
12429	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12430	return;
12431	}
12432	case Intrinsic::riscv_clmul: {
12433	if (!Subtarget.is64Bit() \|\| N->getValueType(`0`) != MVT::i32)
12434	return;
12435
12436	SDValue NewOp0 =
12437	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(`1`));
12438	SDValue NewOp1 =
12439	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(`2`));
12440	SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
12441	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12442	return;
12443	}
12444	case Intrinsic::riscv_clmulh:
12445	case Intrinsic::riscv_clmulr: {
12446	if (!Subtarget.is64Bit() \|\| N->getValueType(`0`) != MVT::i32)
12447	return;
12448
12449	// Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
12450	// to the full 128-bit clmul result of multiplying two xlen values.
12451	// Perform clmulr or clmulh on the shifted values. Finally, extract the
12452	// upper 32 bits.
12453	//
12454	// The alternative is to mask the inputs to 32 bits and use clmul, but
12455	// that requires two shifts to mask each input without zext.w.
12456	// FIXME: If the inputs are known zero extended or could be freely
12457	// zero extended, the mask form would be better.
12458	SDValue NewOp0 =
12459	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(`1`));
12460	SDValue NewOp1 =
12461	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(`2`));
12462	NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
12463	DAG.getConstant(`32`, DL, MVT::i64));
12464	NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
12465	DAG.getConstant(`32`, DL, MVT::i64));
12466	unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
12467	: RISCVISD::CLMULR;
12468	SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
12469	Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
12470	DAG.getConstant(`32`, DL, MVT::i64));
12471	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12472	return;
12473	}
12474	case Intrinsic::riscv_vmv_x_s: {
12475	EVT VT = N->getValueType(ResNo: `0`);
12476	MVT XLenVT = Subtarget.getXLenVT();
12477	if (VT.bitsLT(VT: XLenVT)) {
12478	// Simple case just extract using vmv.x.s and truncate.
12479	SDValue Extract = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL,
12480	VT: Subtarget.getXLenVT(), Operand: N->getOperand(Num: `1`));
12481	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Extract));
12482	return;
12483	}
12484
12485	assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
12486	"Unexpected custom legalization");
12487
12488	// We need to do the move in two steps.
12489	SDValue Vec = N->getOperand(Num: `1`);
12490	MVT VecVT = Vec.getSimpleValueType();
12491
12492	// First extract the lower XLEN bits of the element.
12493	SDValue EltLo = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: Vec);
12494
12495	// To extract the upper XLEN bits of the vector element, shift the first
12496	// element right by 32 bits and re-extract the lower XLEN bits.
12497	auto [Mask, VL] = getDefaultVLOps(NumElts: `1`, ContainerVT: VecVT, DL, DAG, Subtarget);
12498
12499	SDValue ThirtyTwoV =
12500	DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: VecVT, N1: DAG.getUNDEF(VT: VecVT),
12501	N2: DAG.getConstant(Val: `32`, DL, VT: XLenVT), N3: VL);
12502	SDValue LShr32 = DAG.getNode(Opcode: RISCVISD::SRL_VL, DL, VT: VecVT, N1: Vec, N2: ThirtyTwoV,
12503	N3: DAG.getUNDEF(VT: VecVT), N4: Mask, N5: VL);
12504	SDValue EltHi = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: LShr32);
12505
12506	Results.push_back(
12507	DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12508	break;
12509	}
12510	}
12511	break;
12512	}
12513	case ISD::VECREDUCE_ADD:
12514	case ISD::VECREDUCE_AND:
12515	case ISD::VECREDUCE_OR:
12516	case ISD::VECREDUCE_XOR:
12517	case ISD::VECREDUCE_SMAX:
12518	case ISD::VECREDUCE_UMAX:
12519	case ISD::VECREDUCE_SMIN:
12520	case ISD::VECREDUCE_UMIN:
12521	if (SDValue V = lowerVECREDUCE(Op: SDValue (N, `0`), DAG))
12522	Results.push_back(Elt: V);
12523	break;
12524	case ISD::VP_REDUCE_ADD:
12525	case ISD::VP_REDUCE_AND:
12526	case ISD::VP_REDUCE_OR:
12527	case ISD::VP_REDUCE_XOR:
12528	case ISD::VP_REDUCE_SMAX:
12529	case ISD::VP_REDUCE_UMAX:
12530	case ISD::VP_REDUCE_SMIN:
12531	case ISD::VP_REDUCE_UMIN:
12532	if (SDValue V = lowerVPREDUCE(Op: SDValue (N, `0`), DAG))
12533	Results.push_back(Elt: V);
12534	break;
12535	case ISD::GET_ROUNDING: {
12536	SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
12537	SDValue Res = DAG.getNode(Opcode: ISD::GET_ROUNDING, DL, VTList: VTs, N: N->getOperand(Num: `0`));
12538	Results.push_back(Elt: Res.getValue(R: `0`));
12539	Results.push_back(Elt: Res.getValue(R: `1`));
12540	break;
12541	}
12542	}
12543	}
12544
12545	/// Given a binary operator, return the associative* generic ISD::VECREDUCE_OP*
12546	/// which corresponds to it.
12547	static unsigned getVecReduceOpcode(unsigned Opc) {
12548	switch (Opc) {
12549	default:
12550	llvm_unreachable("Unhandled binary to transfrom reduction");
12551	case ISD::ADD:
12552	return ISD::VECREDUCE_ADD;
12553	case ISD::UMAX:
12554	return ISD::VECREDUCE_UMAX;
12555	case ISD::SMAX:
12556	return ISD::VECREDUCE_SMAX;
12557	case ISD::UMIN:
12558	return ISD::VECREDUCE_UMIN;
12559	case ISD::SMIN:
12560	return ISD::VECREDUCE_SMIN;
12561	case ISD::AND:
12562	return ISD::VECREDUCE_AND;
12563	case ISD::OR:
12564	return ISD::VECREDUCE_OR;
12565	case ISD::XOR:
12566	return ISD::VECREDUCE_XOR;
12567	case ISD::FADD:
12568	// Note: This is the associative form of the generic reduction opcode.
12569	return ISD::VECREDUCE_FADD;
12570	}
12571	}
12572
12573	/// Perform two related transforms whose purpose is to incrementally recognize
12574	/// an explode_vector followed by scalar reduction as a vector reduction node.
12575	/// This exists to recover from a deficiency in SLP which can't handle
12576	/// forests with multiple roots sharing common nodes. In some cases, one
12577	/// of the trees will be vectorized, and the other will remain (unprofitably)
12578	/// scalarized.
12579	static SDValue
12580	combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG,
12581	const RISCVSubtarget &Subtarget) {
12582
12583	// This transforms need to run before all integer types have been legalized
12584	// to i64 (so that the vector element type matches the add type), and while
12585	// it's safe to introduce odd sized vector types.
12586	if (DAG.NewNodesMustHaveLegalTypes)
12587	return SDValue ();
12588
12589	// Without V, this transform isn't useful. We could form the (illegal)
12590	// operations and let them be scalarized again, but there's really no point.
12591	if (!Subtarget.hasVInstructions())
12592	return SDValue ();
12593
12594	const SDLoc DL(N);
12595	const EVT VT = N->getValueType(ResNo: `0`);
12596	const unsigned Opc = N->getOpcode();
12597
12598	// For FADD, we only handle the case with reassociation allowed. We
12599	// could handle strict reduction order, but at the moment, there's no
12600	// known reason to, and the complexity isn't worth it.
12601	// TODO: Handle fminnum and fmaxnum here
12602	if (!VT.isInteger() &&
12603	(Opc != ISD::FADD \|\| !N->getFlags().hasAllowReassociation()))
12604	return SDValue ();
12605
12606	const unsigned ReduceOpc = getVecReduceOpcode(Opc);
12607	assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
12608	"Inconsistent mappings");
12609	SDValue LHS = N->getOperand(Num: `0`);
12610	SDValue RHS = N->getOperand(Num: `1`);
12611
12612	if (!LHS.hasOneUse() \|\| !RHS.hasOneUse())
12613	return SDValue ();
12614
12615	if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12616	std::swap(a&: LHS, b&: RHS);
12617
12618	if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
12619	!isa<ConstantSDNode>(Val: RHS.getOperand(i: `1`)))
12620	return SDValue ();
12621
12622	uint64_t RHSIdx = cast<ConstantSDNode>(Val: RHS.getOperand(i: `1`))->getLimitedValue();
12623	SDValue SrcVec = RHS.getOperand(i: `0`);
12624	EVT SrcVecVT = SrcVec.getValueType();
12625	assert(SrcVecVT.getVectorElementType() == VT);
12626	if (SrcVecVT.isScalableVector())
12627	return SDValue ();
12628
12629	if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
12630	return SDValue ();
12631
12632	// match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
12633	// reduce_op (extract_subvector [2 x VT] from V). This will form the
12634	// root of our reduction tree. TODO: We could extend this to any two
12635	// adjacent aligned constant indices if desired.
12636	if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12637	LHS.getOperand(i: `0`) == SrcVec && isa<ConstantSDNode>(Val: LHS.getOperand(i: `1`))) {
12638	uint64_t LHSIdx =
12639	cast<ConstantSDNode>(Val: LHS.getOperand(i: `1`))->getLimitedValue();
12640	if (`0` == std::min(a: LHSIdx, b: RHSIdx) && `1` == std::max(a: LHSIdx, b: RHSIdx)) {
12641	EVT ReduceVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT, NumElements: `2`);
12642	SDValue Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ReduceVT, N1: SrcVec,
12643	N2: DAG.getVectorIdxConstant(Val: `0`, DL));
12644	return DAG.getNode(Opcode: ReduceOpc, DL, VT, Operand: Vec, Flags: N->getFlags());
12645	}
12646	}
12647
12648	// Match (binop (reduce (extract_subvector V, 0),
12649	// (extract_vector_elt V, sizeof(SubVec))))
12650	// into a reduction of one more element from the original vector V.
12651	if (LHS.getOpcode() != ReduceOpc)
12652	return SDValue ();
12653
12654	SDValue ReduceVec = LHS.getOperand(i: `0`);
12655	if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
12656	ReduceVec.hasOneUse() && ReduceVec.getOperand(i: `0`) == RHS.getOperand(i: `0`) &&
12657	isNullConstant(V: ReduceVec.getOperand(i: `1`)) &&
12658	ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
12659	// For illegal types (e.g. 3xi32), most will be combined again into a
12660	// wider (hopefully legal) type. If this is a terminal state, we are
12661	// relying on type legalization here to produce something reasonable
12662	// and this lowering quality could probably be improved. (TODO)
12663	EVT ReduceVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT, NumElements: RHSIdx + `1`);
12664	SDValue Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ReduceVT, N1: SrcVec,
12665	N2: DAG.getVectorIdxConstant(Val: `0`, DL));
12666	auto Flags = ReduceVec ->getFlags();
12667	Flags.intersectWith(Flags: N->getFlags());
12668	return DAG.getNode(Opcode: ReduceOpc, DL, VT, Operand: Vec, Flags);
12669	}
12670
12671	return SDValue ();
12672	}
12673
12674
12675	// Try to fold (<bop> x, (reduction.<bop> vec, start))
12676	static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG,
12677	const RISCVSubtarget &Subtarget) {
12678	auto BinOpToRVVReduce = [](unsigned Opc) {
12679	switch (Opc) {
12680	default:
12681	llvm_unreachable("Unhandled binary to transfrom reduction");
12682	case ISD::ADD:
12683	return RISCVISD::VECREDUCE_ADD_VL;
12684	case ISD::UMAX:
12685	return RISCVISD::VECREDUCE_UMAX_VL;
12686	case ISD::SMAX:
12687	return RISCVISD::VECREDUCE_SMAX_VL;
12688	case ISD::UMIN:
12689	return RISCVISD::VECREDUCE_UMIN_VL;
12690	case ISD::SMIN:
12691	return RISCVISD::VECREDUCE_SMIN_VL;
12692	case ISD::AND:
12693	return RISCVISD::VECREDUCE_AND_VL;
12694	case ISD::OR:
12695	return RISCVISD::VECREDUCE_OR_VL;
12696	case ISD::XOR:
12697	return RISCVISD::VECREDUCE_XOR_VL;
12698	case ISD::FADD:
12699	return RISCVISD::VECREDUCE_FADD_VL;
12700	case ISD::FMAXNUM:
12701	return RISCVISD::VECREDUCE_FMAX_VL;
12702	case ISD::FMINNUM:
12703	return RISCVISD::VECREDUCE_FMIN_VL;
12704	}
12705	};
12706
12707	auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
12708	return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12709	isNullConstant(V: V.getOperand(i: `1`)) &&
12710	V.getOperand(i: `0`).getOpcode() == BinOpToRVVReduce (Opc);
12711	};
12712
12713	unsigned Opc = N->getOpcode();
12714	unsigned ReduceIdx;
12715	if (IsReduction (N->getOperand(Num: `0`), Opc))
12716	ReduceIdx = `0`;
12717	else if (IsReduction (N->getOperand(Num: `1`), Opc))
12718	ReduceIdx = `1`;
12719	else
12720	return SDValue ();
12721
12722	// Skip if FADD disallows reassociation but the combiner needs.
12723	if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
12724	return SDValue ();
12725
12726	SDValue Extract = N->getOperand(Num: ReduceIdx);
12727	SDValue Reduce = Extract.getOperand(i: `0`);
12728	if (!Extract.hasOneUse() \|\| !Reduce.hasOneUse())
12729	return SDValue ();
12730
12731	SDValue ScalarV = Reduce.getOperand(i: `2`);
12732	EVT ScalarVT = ScalarV.getValueType();
12733	if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
12734	ScalarV.getOperand(i: `0`)->isUndef() &&
12735	isNullConstant(V: ScalarV.getOperand(i: `2`)))
12736	ScalarV = ScalarV.getOperand(i: `1`);
12737
12738	// Make sure that ScalarV is a splat with VL=1.
12739	if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
12740	ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
12741	ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
12742	return SDValue ();
12743
12744	if (!isNonZeroAVL(AVL: ScalarV.getOperand(i: `2`)))
12745	return SDValue ();
12746
12747	// Check the scalar of ScalarV is neutral element
12748	// TODO: Deal with value other than neutral element.
12749	if (!isNeutralConstant(Opc: N->getOpcode(), Flags: N->getFlags(), V: ScalarV.getOperand(i: `1`),
12750	OperandNo: `0`))
12751	return SDValue ();
12752
12753	// If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
12754	// FIXME: We might be able to improve this if operand 0 is undef.
12755	if (!isNonZeroAVL(AVL: Reduce.getOperand(i: `5`)))
12756	return SDValue ();
12757
12758	SDValue NewStart = N->getOperand(Num: `1` - ReduceIdx);
12759
12760	SDLoc DL(N);
12761	SDValue NewScalarV =
12762	lowerScalarInsert(Scalar: NewStart, VL: ScalarV.getOperand(i: `2`),
12763	VT: ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
12764
12765	// If we looked through an INSERT_SUBVECTOR we need to restore it.
12766	if (ScalarVT != ScalarV.getValueType())
12767	NewScalarV =
12768	DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ScalarVT, N1: DAG.getUNDEF(VT: ScalarVT),
12769	N2: NewScalarV, N3: DAG.getVectorIdxConstant(Val: `0`, DL));
12770
12771	SDValue Ops[] = {Reduce.getOperand(i: `0`), Reduce.getOperand(i: `1`),
12772	NewScalarV, Reduce.getOperand(i: `3`),
12773	Reduce.getOperand(i: `4`), Reduce.getOperand(i: `5`)};
12774	SDValue NewReduce =
12775	DAG.getNode(Opcode: Reduce.getOpcode(), DL, VT: Reduce.getValueType(), Ops);
12776	return DAG.getNode(Opcode: Extract.getOpcode(), DL, VT: Extract.getValueType(), N1: NewReduce,
12777	N2: Extract.getOperand(i: `1`));
12778	}
12779
12780	// Optimize (add (shl x, c0), (shl y, c1)) ->
12781	// (SLLI (SHADD x, y), c0), if c1-c0 equals to [1\|2\|3].*
12782	static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
12783	const RISCVSubtarget &Subtarget) {
12784	// Perform this optimization only in the zba extension.
12785	if (!Subtarget.hasStdExtZba())
12786	return SDValue ();
12787
12788	// Skip for vector types and larger types.
12789	EVT VT = N->getValueType(ResNo: `0`);
12790	if (VT.isVector() \|\| VT.getSizeInBits() > Subtarget.getXLen())
12791	return SDValue ();
12792
12793	// The two operand nodes must be SHL and have no other use.
12794	SDValue N0 = N->getOperand(Num: `0`);
12795	SDValue N1 = N->getOperand(Num: `1`);
12796	if (N0 ->getOpcode() != ISD::SHL \|\| N1 ->getOpcode() != ISD::SHL \|\|
12797	!N0 ->hasOneUse() \|\| !N1 ->hasOneUse())
12798	return SDValue ();
12799
12800	// Check c0 and c1.
12801	auto *N0C = dyn_cast<ConstantSDNode>(Val: N0 ->getOperand(Num: `1`));
12802	auto *N1C = dyn_cast<ConstantSDNode>(Val: N1 ->getOperand(Num: `1`));
12803	if (!N0C \|\| !N1C)
12804	return SDValue ();
12805	int64_t C0 = N0C->getSExtValue();
12806	int64_t C1 = N1C->getSExtValue();
12807	if (C0 <= `0` \|\| C1 <= `0`)
12808	return SDValue ();
12809
12810	// Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
12811	int64_t Bits = std::min(a: C0, b: C1);
12812	int64_t Diff = std::abs(i: C0 - C1);
12813	if (Diff != `1` && Diff != `2` && Diff != `3`)
12814	return SDValue ();
12815
12816	// Build nodes.
12817	SDLoc DL(N);
12818	SDValue NS = (C0 < C1) ? N0 ->getOperand(Num: `0`) : N1 ->getOperand(Num: `0`);
12819	SDValue NL = (C0 > C1) ? N0 ->getOperand(Num: `0`) : N1 ->getOperand(Num: `0`);
12820	SDValue NA0 =
12821	DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: NL, N2: DAG.getConstant(Val: Diff, DL, VT));
12822	SDValue NA1 = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: NA0, N2: NS);
12823	return DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: NA1, N2: DAG.getConstant(Val: Bits, DL, VT));
12824	}
12825
12826	// Combine a constant select operand into its use:
12827	//
12828	// (and (select cond, -1, c), x)
12829	// -> (select cond, x, (and x, c)) [AllOnes=1]
12830	// (or (select cond, 0, c), x)
12831	// -> (select cond, x, (or x, c)) [AllOnes=0]
12832	// (xor (select cond, 0, c), x)
12833	// -> (select cond, x, (xor x, c)) [AllOnes=0]
12834	// (add (select cond, 0, c), x)
12835	// -> (select cond, x, (add x, c)) [AllOnes=0]
12836	// (sub x, (select cond, 0, c))
12837	// -> (select cond, x, (sub x, c)) [AllOnes=0]
12838	static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
12839	SelectionDAG &DAG, bool AllOnes,
12840	const RISCVSubtarget &Subtarget) {
12841	EVT VT = N->getValueType(ResNo: `0`);
12842
12843	// Skip vectors.
12844	if (VT.isVector())
12845	return SDValue ();
12846
12847	if (!Subtarget.hasConditionalMoveFusion()) {
12848	// (select cond, x, (and x, c)) has custom lowering with Zicond.
12849	if ((!Subtarget.hasStdExtZicond() &&
12850	!Subtarget.hasVendorXVentanaCondOps()) \|\|
12851	N->getOpcode() != ISD::AND)
12852	return SDValue ();
12853
12854	// Maybe harmful when condition code has multiple use.
12855	if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(i: `0`).hasOneUse())
12856	return SDValue ();
12857
12858	// Maybe harmful when VT is wider than XLen.
12859	if (VT.getSizeInBits() > Subtarget.getXLen())
12860	return SDValue ();
12861	}
12862
12863	if ((Slct.getOpcode() != ISD::SELECT &&
12864	Slct.getOpcode() != RISCVISD::SELECT_CC) \|\|
12865	!Slct.hasOneUse())
12866	return SDValue ();
12867
12868	auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
12869	return AllOnes ? isAllOnesConstant(V: N) : isNullConstant(V: N);
12870	};
12871
12872	bool SwapSelectOps;
12873	unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? `2` : `0`;
12874	SDValue TrueVal = Slct.getOperand(i: `1` + OpOffset);
12875	SDValue FalseVal = Slct.getOperand(i: `2` + OpOffset);
12876	SDValue NonConstantVal;
12877	if (isZeroOrAllOnes (TrueVal, AllOnes)) {
12878	SwapSelectOps = false;
12879	NonConstantVal = FalseVal;
12880	} else if (isZeroOrAllOnes (FalseVal, AllOnes)) {
12881	SwapSelectOps = true;
12882	NonConstantVal = TrueVal;
12883	} else
12884	return SDValue ();
12885
12886	// Slct is now know to be the desired identity constant when CC is true.
12887	TrueVal = OtherOp;
12888	FalseVal = DAG.getNode(Opcode: N->getOpcode(), DL: SDLoc (N), VT, N1: OtherOp, N2: NonConstantVal);
12889	// Unless SwapSelectOps says the condition should be false.
12890	if (SwapSelectOps)
12891	std::swap(a&: TrueVal, b&: FalseVal);
12892
12893	if (Slct.getOpcode() == RISCVISD::SELECT_CC)
12894	return DAG.getNode(Opcode: RISCVISD::SELECT_CC, DL: SDLoc (N), VT,
12895	Ops: {Slct.getOperand(i: `0`), Slct.getOperand(i: `1`),
12896	Slct.getOperand(i: `2`), TrueVal, FalseVal});
12897
12898	return DAG.getNode(Opcode: ISD::SELECT, DL: SDLoc (N), VT,
12899	Ops: {Slct.getOperand(i: `0`), TrueVal, FalseVal});
12900	}
12901
12902	// Attempt combineSelectAndUse on each operand of a commutative operator N.
12903	static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG,
12904	bool AllOnes,
12905	const RISCVSubtarget &Subtarget) {
12906	SDValue N0 = N->getOperand(Num: `0`);
12907	SDValue N1 = N->getOperand(Num: `1`);
12908	if (SDValue Result = combineSelectAndUse(N, Slct: N0, OtherOp: N1, DAG, AllOnes, Subtarget))
12909	return Result;
12910	if (SDValue Result = combineSelectAndUse(N, Slct: N1, OtherOp: N0, DAG, AllOnes, Subtarget))
12911	return Result;
12912	return SDValue ();
12913	}
12914
12915	// Transform (add (mul x, c0), c1) ->
12916	// (add (mul (add x, c1/c0), c0), c1%c0).
12917	// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
12918	// that should be excluded is when c0(c1/c0) is simm12, which will lead*
12919	// to an infinite loop in DAGCombine if transformed.
12920	// Or transform (add (mul x, c0), c1) ->
12921	// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
12922	// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
12923	// case that should be excluded is when c0(c1/c0+1) is simm12, which will*
12924	// lead to an infinite loop in DAGCombine if transformed.
12925	// Or transform (add (mul x, c0), c1) ->
12926	// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
12927	// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
12928	// case that should be excluded is when c0(c1/c0-1) is simm12, which will*
12929	// lead to an infinite loop in DAGCombine if transformed.
12930	// Or transform (add (mul x, c0), c1) ->
12931	// (mul (add x, c1/c0), c0).
12932	// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
12933	static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
12934	const RISCVSubtarget &Subtarget) {
12935	// Skip for vector types and larger types.
12936	EVT VT = N->getValueType(ResNo: `0`);
12937	if (VT.isVector() \|\| VT.getSizeInBits() > Subtarget.getXLen())
12938	return SDValue ();
12939	// The first operand node must be a MUL and has no other use.
12940	SDValue N0 = N->getOperand(Num: `0`);
12941	if (!N0 ->hasOneUse() \|\| N0 ->getOpcode() != ISD::MUL)
12942	return SDValue ();
12943	// Check if c0 and c1 match above conditions.
12944	auto *N0C = dyn_cast<ConstantSDNode>(Val: N0 ->getOperand(Num: `1`));
12945	auto *N1C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
12946	if (!N0C \|\| !N1C)
12947	return SDValue ();
12948	// If N0C has multiple uses it's possible one of the cases in
12949	// DAGCombiner::isMulAddWithConstProfitable will be true, which would result
12950	// in an infinite loop.
12951	if (!N0C->hasOneUse())
12952	return SDValue ();
12953	int64_t C0 = N0C->getSExtValue();
12954	int64_t C1 = N1C->getSExtValue();
12955	int64_t CA, CB;
12956	if (C0 == -`1` \|\| C0 == `0` \|\| C0 == `1` \|\| isInt<`12`>(x: C1))
12957	return SDValue ();
12958	// Search for proper CA (non-zero) and CB that both are simm12.
12959	if ((C1 / C0) != `0` && isInt<`12`>(x: C1 / C0) && isInt<`12`>(x: C1 % C0) &&
12960	!isInt<`12`>(x: C0 * (C1 / C0))) {
12961	CA = C1 / C0;
12962	CB = C1 % C0;
12963	} else if ((C1 / C0 + `1`) != `0` && isInt<`12`>(x: C1 / C0 + `1`) &&
12964	isInt<`12`>(x: C1 % C0 - C0) && !isInt<`12`>(x: C0 * (C1 / C0 + `1`))) {
12965	CA = C1 / C0 + `1`;
12966	CB = C1 % C0 - C0;
12967	} else if ((C1 / C0 - `1`) != `0` && isInt<`12`>(x: C1 / C0 - `1`) &&
12968	isInt<`12`>(x: C1 % C0 + C0) && !isInt<`12`>(x: C0 * (C1 / C0 - `1`))) {
12969	CA = C1 / C0 - `1`;
12970	CB = C1 % C0 + C0;
12971	} else
12972	return SDValue ();
12973	// Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
12974	SDLoc DL(N);
12975	SDValue New0 = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0 ->getOperand(Num: `0`),
12976	N2: DAG.getConstant(Val: CA, DL, VT));
12977	SDValue New1 =
12978	DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: New0, N2: DAG.getConstant(Val: C0, DL, VT));
12979	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: New1, N2: DAG.getConstant(Val: CB, DL, VT));
12980	}
12981
12982	// add (zext, zext) -> zext (add (zext, zext))
12983	// sub (zext, zext) -> sext (sub (zext, zext))
12984	// mul (zext, zext) -> zext (mul (zext, zext))
12985	// sdiv (zext, zext) -> zext (sdiv (zext, zext))
12986	// udiv (zext, zext) -> zext (udiv (zext, zext))
12987	// srem (zext, zext) -> zext (srem (zext, zext))
12988	// urem (zext, zext) -> zext (urem (zext, zext))
12989	//
12990	// where the sum of the extend widths match, and the the range of the bin op
12991	// fits inside the width of the narrower bin op. (For profitability on rvv, we
12992	// use a power of two for both inner and outer extend.)
12993	static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG) {
12994
12995	EVT VT = N->getValueType(ResNo: `0`);
12996	if (!VT.isVector() \|\| !DAG.getTargetLoweringInfo().isTypeLegal(VT))
12997	return SDValue ();
12998
12999	SDValue N0 = N->getOperand(Num: `0`);
13000	SDValue N1 = N->getOperand(Num: `1`);
13001	if (N0.getOpcode() != ISD::ZERO_EXTEND \|\| N1.getOpcode() != ISD::ZERO_EXTEND)
13002	return SDValue ();
13003	if (!N0.hasOneUse() \|\| !N1.hasOneUse())
13004	return SDValue ();
13005
13006	SDValue Src0 = N0.getOperand(i: `0`);
13007	SDValue Src1 = N1.getOperand(i: `0`);
13008	EVT SrcVT = Src0.getValueType();
13009	if (!DAG.getTargetLoweringInfo().isTypeLegal(VT: SrcVT) \|\|
13010	SrcVT != Src1.getValueType() \|\| SrcVT.getScalarSizeInBits() < `8` \|\|
13011	SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / `2`)
13012	return SDValue ();
13013
13014	LLVMContext &C = *DAG.getContext();
13015	EVT ElemVT = VT.getVectorElementType().getHalfSizedIntegerVT(Context&: C);
13016	EVT NarrowVT = EVT::getVectorVT(Context&: C, VT: ElemVT, EC: VT.getVectorElementCount());
13017
13018	Src0 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SDLoc (Src0), VT: NarrowVT, Operand: Src0);
13019	Src1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SDLoc (Src1), VT: NarrowVT, Operand: Src1);
13020
13021	// Src0 and Src1 are zero extended, so they're always positive if signed.
13022	//
13023	// sub can produce a negative from two positive operands, so it needs sign
13024	// extended. Other nodes produce a positive from two positive operands, so
13025	// zero extend instead.
13026	unsigned OuterExtend =
13027	N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13028
13029	return DAG.getNode(
13030	Opcode: OuterExtend, DL: SDLoc (N), VT,
13031	Operand: DAG.getNode(Opcode: N->getOpcode(), DL: SDLoc (N), VT: NarrowVT, N1: Src0, N2: Src1));
13032	}
13033
13034	// Try to turn (add (xor bool, 1) -1) into (neg bool).
13035	static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) {
13036	SDValue N0 = N->getOperand(Num: `0`);
13037	SDValue N1 = N->getOperand(Num: `1`);
13038	EVT VT = N->getValueType(ResNo: `0`);
13039	SDLoc DL(N);
13040
13041	// RHS should be -1.
13042	if (!isAllOnesConstant(V: N1))
13043	return SDValue ();
13044
13045	// Look for (xor X, 1).
13046	if (N0.getOpcode() != ISD::XOR \|\| !isOneConstant(V: N0.getOperand(i: `1`)))
13047	return SDValue ();
13048
13049	// First xor input should be 0 or 1.
13050	APInt Mask = APInt::getBitsSetFrom(numBits: VT.getSizeInBits(), loBit: `1`);
13051	if (!DAG.MaskedValueIsZero(Op: N0.getOperand(i: `0`), Mask))
13052	return SDValue ();
13053
13054	// Emit a negate of the setcc.
13055	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: `0`, DL, VT),
13056	N2: N0.getOperand(i: `0`));
13057	}
13058
13059	static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
13060	const RISCVSubtarget &Subtarget) {
13061	if (SDValue V = combineAddOfBooleanXor(N, DAG))
13062	return V;
13063	if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
13064	return V;
13065	if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
13066	return V;
13067	if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13068	return V;
13069	if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13070	return V;
13071	if (SDValue V = combineBinOpOfZExt(N, DAG))
13072	return V;
13073
13074	// fold (add (select lhs, rhs, cc, 0, y), x) ->
13075	// (select lhs, rhs, cc, x, (add x, y))
13076	return combineSelectAndUseCommutative(N, DAG, /AllOnes/ false, Subtarget);
13077	}
13078
13079	// Try to turn a sub boolean RHS and constant LHS into an addi.
13080	static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG) {
13081	SDValue N0 = N->getOperand(Num: `0`);
13082	SDValue N1 = N->getOperand(Num: `1`);
13083	EVT VT = N->getValueType(ResNo: `0`);
13084	SDLoc DL(N);
13085
13086	// Require a constant LHS.
13087	auto *N0C = dyn_cast<ConstantSDNode>(Val&: N0);
13088	if (!N0C)
13089	return SDValue ();
13090
13091	// All our optimizations involve subtracting 1 from the immediate and forming
13092	// an ADDI. Make sure the new immediate is valid for an ADDI.
13093	APInt ImmValMinus1 = N0C->getAPIntValue() - `1`;
13094	if (!ImmValMinus1.isSignedIntN(N: `12`))
13095	return SDValue ();
13096
13097	SDValue NewLHS;
13098	if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
13099	// (sub constant, (setcc x, y, eq/neq)) ->
13100	// (add (setcc x, y, neq/eq), constant - 1)
13101	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: N1.getOperand(i: `2`))->get();
13102	EVT SetCCOpVT = N1.getOperand(i: `0`).getValueType();
13103	if (!isIntEqualitySetCC(Code: CCVal) \|\| !SetCCOpVT.isInteger())
13104	return SDValue ();
13105	CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: SetCCOpVT);
13106	NewLHS =
13107	DAG.getSetCC(DL: SDLoc (N1), VT, LHS: N1.getOperand(i: `0`), RHS: N1.getOperand(i: `1`), Cond: CCVal);
13108	} else if (N1.getOpcode() == ISD::XOR && isOneConstant(V: N1.getOperand(i: `1`)) &&
13109	N1.getOperand(i: `0`).getOpcode() == ISD::SETCC) {
13110	// (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
13111	// Since setcc returns a bool the xor is equivalent to 1-setcc.
13112	NewLHS = N1.getOperand(i: `0`);
13113	} else
13114	return SDValue ();
13115
13116	SDValue NewRHS = DAG.getConstant(Val: ImmValMinus1, DL, VT);
13117	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: NewLHS, N2: NewRHS);
13118	}
13119
13120	static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
13121	const RISCVSubtarget &Subtarget) {
13122	if (SDValue V = combineSubOfBoolean(N, DAG))
13123	return V;
13124
13125	EVT VT = N->getValueType(ResNo: `0`);
13126	SDValue N0 = N->getOperand(Num: `0`);
13127	SDValue N1 = N->getOperand(Num: `1`);
13128	// fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
13129	if (isNullConstant(V: N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
13130	isNullConstant(V: N1.getOperand(i: `1`))) {
13131	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: N1.getOperand(i: `2`))->get();
13132	if (CCVal == ISD::SETLT) {
13133	SDLoc DL(N);
13134	unsigned ShAmt = N0.getValueSizeInBits() - `1`;
13135	return DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: N1.getOperand(i: `0`),
13136	N2: DAG.getConstant(Val: ShAmt, DL, VT));
13137	}
13138	}
13139
13140	if (SDValue V = combineBinOpOfZExt(N, DAG))
13141	return V;
13142
13143	// fold (sub x, (select lhs, rhs, cc, 0, y)) ->
13144	// (select lhs, rhs, cc, x, (sub x, y))
13145	return combineSelectAndUse(N, Slct: N1, OtherOp: N0, DAG, /AllOnes/ false, Subtarget);
13146	}
13147
13148	// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
13149	// Legalizing setcc can introduce xors like this. Doing this transform reduces
13150	// the number of xors and may allow the xor to fold into a branch condition.
13151	static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG) {
13152	SDValue N0 = N->getOperand(Num: `0`);
13153	SDValue N1 = N->getOperand(Num: `1`);
13154	bool IsAnd = N->getOpcode() == ISD::AND;
13155
13156	if (N0.getOpcode() != ISD::XOR \|\| N1.getOpcode() != ISD::XOR)
13157	return SDValue ();
13158
13159	if (!N0.hasOneUse() \|\| !N1.hasOneUse())
13160	return SDValue ();
13161
13162	SDValue N01 = N0.getOperand(i: `1`);
13163	SDValue N11 = N1.getOperand(i: `1`);
13164
13165	// For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
13166	// (xor X, -1) based on the upper bits of the other operand being 0. If the
13167	// operation is And, allow one of the Xors to use -1.
13168	if (isOneConstant(V: N01)) {
13169	if (!isOneConstant(V: N11) && !(IsAnd && isAllOnesConstant(V: N11)))
13170	return SDValue ();
13171	} else if (isOneConstant(V: N11)) {
13172	// N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
13173	if (!(IsAnd && isAllOnesConstant(V: N01)))
13174	return SDValue ();
13175	} else
13176	return SDValue ();
13177
13178	EVT VT = N->getValueType(ResNo: `0`);
13179
13180	SDValue N00 = N0.getOperand(i: `0`);
13181	SDValue N10 = N1.getOperand(i: `0`);
13182
13183	// The LHS of the xors needs to be 0/1.
13184	APInt Mask = APInt::getBitsSetFrom(numBits: VT.getSizeInBits(), loBit: `1`);
13185	if (!DAG.MaskedValueIsZero(Op: N00, Mask) \|\| !DAG.MaskedValueIsZero(Op: N10, Mask))
13186	return SDValue ();
13187
13188	// Invert the opcode and insert a new xor.
13189	SDLoc DL(N);
13190	unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
13191	SDValue Logic = DAG.getNode(Opcode: Opc, DL, VT, N1: N00, N2: N10);
13192	return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Logic, N2: DAG.getConstant(Val: `1`, DL, VT));
13193	}
13194
13195	static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
13196	const RISCVSubtarget &Subtarget) {
13197	SDValue N0 = N->getOperand(Num: `0`);
13198	EVT VT = N->getValueType(ResNo: `0`);
13199
13200	// Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
13201	// extending X. This is safe since we only need the LSB after the shift and
13202	// shift amounts larger than 31 would produce poison. If we wait until
13203	// type legalization, we'll create RISCVISD::SRLW and we can't recover it
13204	// to use a BEXT instruction.
13205	if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
13206	N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
13207	!isa<ConstantSDNode>(N0.getOperand(`1`)) && N0.hasOneUse()) {
13208	SDLoc DL(N0);
13209	SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(`0`));
13210	SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(`1`));
13211	SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13212	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc (N), VT, Operand: Srl);
13213	}
13214
13215	return SDValue ();
13216	}
13217
13218	// Combines two comparison operation and logic operation to one selection
13219	// operation(min, max) and logic operation. Returns new constructed Node if
13220	// conditions for optimization are satisfied.
13221	static SDValue performANDCombine(SDNode *N,
13222	TargetLowering::DAGCombinerInfo &DCI,
13223	const RISCVSubtarget &Subtarget) {
13224	SelectionDAG &DAG = DCI.DAG;
13225
13226	SDValue N0 = N->getOperand(Num: `0`);
13227	// Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
13228	// extending X. This is safe since we only need the LSB after the shift and
13229	// shift amounts larger than 31 would produce poison. If we wait until
13230	// type legalization, we'll create RISCVISD::SRLW and we can't recover it
13231	// to use a BEXT instruction.
13232	if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13233	N->getValueType(`0`) == MVT::i32 && isOneConstant(N->getOperand(`1`)) &&
13234	N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(`1`)) &&
13235	N0.hasOneUse()) {
13236	SDLoc DL(N);
13237	SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(`0`));
13238	SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(`1`));
13239	SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13240	SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
13241	DAG.getConstant(`1`, DL, MVT::i64));
13242	return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13243	}
13244
13245	if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13246	return V;
13247	if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13248	return V;
13249
13250	if (DCI.isAfterLegalizeDAG())
13251	if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13252	return V;
13253
13254	// fold (and (select lhs, rhs, cc, -1, y), x) ->
13255	// (select lhs, rhs, cc, x, (and x, y))
13256	return combineSelectAndUseCommutative(N, DAG, /AllOnes/ true, Subtarget);
13257	}
13258
13259	// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
13260	// FIXME: Generalize to other binary operators with same operand.
13261	static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1,
13262	SelectionDAG &DAG) {
13263	assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
13264
13265	if (N0.getOpcode() != RISCVISD::CZERO_EQZ \|\|
13266	N1.getOpcode() != RISCVISD::CZERO_NEZ \|\|
13267	!N0.hasOneUse() \|\| !N1.hasOneUse())
13268	return SDValue ();
13269
13270	// Should have the same condition.
13271	SDValue Cond = N0.getOperand(i: `1`);
13272	if (Cond != N1.getOperand(i: `1`))
13273	return SDValue ();
13274
13275	SDValue TrueV = N0.getOperand(i: `0`);
13276	SDValue FalseV = N1.getOperand(i: `0`);
13277
13278	if (TrueV.getOpcode() != ISD::XOR \|\| FalseV.getOpcode() != ISD::XOR \|\|
13279	TrueV.getOperand(i: `1`) != FalseV.getOperand(i: `1`) \|\|
13280	!isOneConstant(V: TrueV.getOperand(i: `1`)) \|\|
13281	!TrueV.hasOneUse() \|\| !FalseV.hasOneUse())
13282	return SDValue ();
13283
13284	EVT VT = N->getValueType(ResNo: `0`);
13285	SDLoc DL(N);
13286
13287	SDValue NewN0 = DAG.getNode(Opcode: RISCVISD::CZERO_EQZ, DL, VT, N1: TrueV.getOperand(i: `0`),
13288	N2: Cond);
13289	SDValue NewN1 = DAG.getNode(Opcode: RISCVISD::CZERO_NEZ, DL, VT, N1: FalseV.getOperand(i: `0`),
13290	N2: Cond);
13291	SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: NewN0, N2: NewN1);
13292	return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: NewOr, N2: TrueV.getOperand(i: `1`));
13293	}
13294
13295	static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
13296	const RISCVSubtarget &Subtarget) {
13297	SelectionDAG &DAG = DCI.DAG;
13298
13299	if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13300	return V;
13301	if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13302	return V;
13303
13304	if (DCI.isAfterLegalizeDAG())
13305	if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13306	return V;
13307
13308	// Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
13309	// We may be able to pull a common operation out of the true and false value.
13310	SDValue N0 = N->getOperand(Num: `0`);
13311	SDValue N1 = N->getOperand(Num: `1`);
13312	if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
13313	return V;
13314	if (SDValue V = combineOrOfCZERO(N, N0: N1, N1: N0, DAG))
13315	return V;
13316
13317	// fold (or (select cond, 0, y), x) ->
13318	// (select cond, x, (or x, y))
13319	return combineSelectAndUseCommutative(N, DAG, /AllOnes/ false, Subtarget);
13320	}
13321
13322	static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
13323	const RISCVSubtarget &Subtarget) {
13324	SDValue N0 = N->getOperand(Num: `0`);
13325	SDValue N1 = N->getOperand(Num: `1`);
13326
13327	// Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
13328	// (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
13329	// RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
13330	if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13331	N->getValueType(`0`) == MVT::i32 && isAllOnesConstant(N1) &&
13332	N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(`0`)) &&
13333	!isa<ConstantSDNode>(N0.getOperand(`1`)) && N0.hasOneUse()) {
13334	SDLoc DL(N);
13335	SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(`0`));
13336	SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(`1`));
13337	SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
13338	SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
13339	return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13340	}
13341
13342	// fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
13343	// NOTE: Assumes ROL being legal means ROLW is legal.
13344	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13345	if (N0.getOpcode() == RISCVISD::SLLW &&
13346	isAllOnesConstant(N1) && isOneConstant(N0.getOperand(`0`)) &&
13347	TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
13348	SDLoc DL(N);
13349	return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
13350	DAG.getConstant(~`1`, DL, MVT::i64), N0.getOperand(`1`));
13351	}
13352
13353	// Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
13354	if (N0.getOpcode() == ISD::SETCC && isOneConstant(V: N1) && N0.hasOneUse()) {
13355	auto *ConstN00 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `0`));
13356	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N0.getOperand(i: `2`))->get();
13357	if (ConstN00 && CC == ISD::SETLT) {
13358	EVT VT = N0.getValueType();
13359	SDLoc DL(N0);
13360	const APInt &Imm = ConstN00->getAPIntValue();
13361	if ((Imm + `1`).isSignedIntN(N: `12`))
13362	return DAG.getSetCC(DL, VT, LHS: N0.getOperand(i: `1`),
13363	RHS: DAG.getConstant(Val: Imm + `1`, DL, VT), Cond: CC);
13364	}
13365	}
13366
13367	// Combine (xor (trunc (X cc Y)) 1) -> (trunc (X !cc Y)). This is needed with
13368	// RV64LegalI32 when the setcc is created after type legalization. An i1 xor
13369	// would have been promoted to i32, but the setcc would have i64 result.
13370	if (N->getValueType(`0`) == MVT::i32 && N0.getOpcode() == ISD::TRUNCATE &&
13371	isOneConstant(N1) && N0.getOperand(`0`).getOpcode() == ISD::SETCC) {
13372	SDValue N00 = N0.getOperand(i: `0`);
13373	SDLoc DL(N);
13374	SDValue LHS = N00.getOperand(i: `0`);
13375	SDValue RHS = N00.getOperand(i: `1`);
13376	SDValue CC = N00.getOperand(i: `2`);
13377	ISD::CondCode NotCC = ISD::getSetCCInverse(Operation: cast<CondCodeSDNode>(Val&: CC)->get(),
13378	Type: LHS.getValueType());
13379	SDValue Setcc = DAG.getSetCC(DL: SDLoc (N00), VT: N0.getOperand(i: `0`).getValueType(),
13380	LHS, RHS, Cond: NotCC);
13381	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc (N0), VT: N->getValueType(ResNo: `0`), Operand: Setcc);
13382	}
13383
13384	if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13385	return V;
13386	if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13387	return V;
13388
13389	// fold (xor (select cond, 0, y), x) ->
13390	// (select cond, x, (xor x, y))
13391	return combineSelectAndUseCommutative(N, DAG, /AllOnes/ false, Subtarget);
13392	}
13393
13394	// Try to expand a scalar multiply to a faster sequence.
13395	static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
13396	TargetLowering::DAGCombinerInfo &DCI,
13397	const RISCVSubtarget &Subtarget) {
13398
13399	EVT VT = N->getValueType(ResNo: `0`);
13400
13401	// LI + MUL is usually smaller than the alternative sequence.
13402	if (DAG.getMachineFunction().getFunction().hasMinSize())
13403	return SDValue ();
13404
13405	if (DCI.isBeforeLegalize() \|\| DCI.isCalledByLegalizer())
13406	return SDValue ();
13407
13408	if (VT != Subtarget.getXLenVT())
13409	return SDValue ();
13410
13411	if (!Subtarget.hasStdExtZba() && !Subtarget.hasVendorXTHeadBa())
13412	return SDValue ();
13413
13414	ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
13415	if (!CNode)
13416	return SDValue ();
13417	uint64_t MulAmt = CNode->getZExtValue();
13418
13419	for (uint64_t Divisor : {`3`, `5`, `9`}) {
13420	if (MulAmt % Divisor != `0`)
13421	continue;
13422	uint64_t MulAmt2 = MulAmt / Divisor;
13423	// 3/5/9 2^N -> shXadd (sll X, C), (sll X, C)*
13424	// Matched in tablegen, avoid perturbing patterns.
13425	if (isPowerOf2_64(Value: MulAmt2))
13426	return SDValue ();
13427
13428	// 3/5/9 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)*
13429	if (MulAmt2 == `3` \|\| MulAmt2 == `5` \|\| MulAmt2 == `9`) {
13430	SDLoc DL(N);
13431	SDValue X = DAG.getFreeze(V: N->getOperand(Num: `0`));
13432	SDValue Mul359 =
13433	DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: X,
13434	N2: DAG.getConstant(Val: Log2_64(Value: Divisor - `1`), DL, VT), N3: X);
13435	return DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: Mul359,
13436	N2: DAG.getConstant(Val: Log2_64(Value: MulAmt2 - `1`), DL, VT),
13437	N3: Mul359);
13438	}
13439	}
13440
13441	// If this is a power 2 + 2/4/8, we can use a shift followed by a single
13442	// shXadd. First check if this a sum of two power of 2s because that's
13443	// easy. Then count how many zeros are up to the first bit.
13444	if (isPowerOf2_64(Value: MulAmt & (MulAmt - `1`))) {
13445	unsigned ScaleShift = llvm::countr_zero(Val: MulAmt);
13446	if (ScaleShift >= `1` && ScaleShift < `4`) {
13447	unsigned ShiftAmt = Log2_64(Value: (MulAmt & (MulAmt - `1`)));
13448	SDLoc DL(N);
13449	SDValue X = DAG.getFreeze(V: N->getOperand(Num: `0`));
13450	SDValue Shift1 =
13451	DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: DAG.getConstant(Val: ShiftAmt, DL, VT));
13452	return DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: X,
13453	N2: DAG.getConstant(Val: ScaleShift, DL, VT), N3: Shift1);
13454	}
13455	}
13456
13457	// 2^(1,2,3) 3,5,9 + 1 -> (shXadd (shYadd x, x), x)*
13458	// This is the two instruction form, there are also three instruction
13459	// variants we could implement. e.g.
13460	// (2^(1,2,3) 3,5,9 + 1) << C2*
13461	// 2^(C1>3) 3,5,9 +/- 1*
13462	for (uint64_t Divisor : {`3`, `5`, `9`}) {
13463	uint64_t C = MulAmt - `1`;
13464	if (C <= Divisor)
13465	continue;
13466	unsigned TZ = llvm::countr_zero(Val: C);
13467	if ((C >> TZ) == Divisor && (TZ == `1` \|\| TZ == `2` \|\| TZ == `3`)) {
13468	SDLoc DL(N);
13469	SDValue X = DAG.getFreeze(V: N->getOperand(Num: `0`));
13470	SDValue Mul359 =
13471	DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: X,
13472	N2: DAG.getConstant(Val: Log2_64(Value: Divisor - `1`), DL, VT), N3: X);
13473	return DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: Mul359,
13474	N2: DAG.getConstant(Val: TZ, DL, VT), N3: X);
13475	}
13476	}
13477
13478	// 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
13479	if (MulAmt > `2` && isPowerOf2_64(Value: (MulAmt - `1`) & (MulAmt - `2`))) {
13480	unsigned ScaleShift = llvm::countr_zero(Val: MulAmt - `1`);
13481	if (ScaleShift >= `1` && ScaleShift < `4`) {
13482	unsigned ShiftAmt = Log2_64(Value: ((MulAmt - `1`) & (MulAmt - `2`)));
13483	SDLoc DL(N);
13484	SDValue X = DAG.getFreeze(V: N->getOperand(Num: `0`));
13485	SDValue Shift1 =
13486	DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: DAG.getConstant(Val: ShiftAmt, DL, VT));
13487	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shift1,
13488	N2: DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: X,
13489	N2: DAG.getConstant(Val: ScaleShift, DL, VT), N3: X));
13490	}
13491	}
13492
13493	// 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
13494	for (uint64_t Offset : {`3`, `5`, `9`}) {
13495	if (isPowerOf2_64(Value: MulAmt + Offset)) {
13496	SDLoc DL(N);
13497	SDValue Shift1 =
13498	DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N->getOperand(Num: `0`),
13499	N2: DAG.getConstant(Val: Log2_64(Value: MulAmt + Offset), DL, VT));
13500	SDValue Mul359 = DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: N->getOperand(Num: `0`),
13501	N2: DAG.getConstant(Val: Log2_64(Value: Offset - `1`), DL, VT),
13502	N3: N->getOperand(Num: `0`));
13503	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Shift1, N2: Mul359);
13504	}
13505	}
13506
13507	return SDValue ();
13508	}
13509
13510
13511	static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
13512	TargetLowering::DAGCombinerInfo &DCI,
13513	const RISCVSubtarget &Subtarget) {
13514	EVT VT = N->getValueType(ResNo: `0`);
13515	if (!VT.isVector())
13516	return expandMul(N, DAG, DCI, Subtarget);
13517
13518	SDLoc DL(N);
13519	SDValue N0 = N->getOperand(Num: `0`);
13520	SDValue N1 = N->getOperand(Num: `1`);
13521	SDValue MulOper;
13522	unsigned AddSubOpc;
13523
13524	// vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
13525	// (mul x, add (y, 1)) -> (add x, (mul x, y))
13526	// vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
13527	// (mul x, (sub 1, y)) -> (sub x, (mul x, y))
13528	auto IsAddSubWith1 = [&](SDValue V) -> bool {
13529	AddSubOpc = V ->getOpcode();
13530	if ((AddSubOpc == ISD::ADD \|\| AddSubOpc == ISD::SUB) && V ->hasOneUse()) {
13531	SDValue Opnd = V ->getOperand(Num: `1`);
13532	MulOper = V ->getOperand(Num: `0`);
13533	if (AddSubOpc == ISD::SUB)
13534	std::swap(a&: Opnd, b&: MulOper);
13535	if (isOneOrOneSplat(V: Opnd))
13536	return true;
13537	}
13538	return false;
13539	};
13540
13541	if (IsAddSubWith1 (N0)) {
13542	SDValue MulVal = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1, N2: MulOper);
13543	return DAG.getNode(Opcode: AddSubOpc, DL, VT, N1, N2: MulVal);
13544	}
13545
13546	if (IsAddSubWith1 (N1)) {
13547	SDValue MulVal = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N0, N2: MulOper);
13548	return DAG.getNode(Opcode: AddSubOpc, DL, VT, N1: N0, N2: MulVal);
13549	}
13550
13551	if (SDValue V = combineBinOpOfZExt(N, DAG))
13552	return V;
13553
13554	return SDValue ();
13555	}
13556
13557	/// According to the property that indexed load/store instructions zero-extend
13558	/// their indices, try to narrow the type of index operand.
13559	static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
13560	if (isIndexTypeSigned(IndexType))
13561	return false;
13562
13563	if (!N ->hasOneUse())
13564	return false;
13565
13566	EVT VT = N.getValueType();
13567	SDLoc DL(N);
13568
13569	// In general, what we're doing here is seeing if we can sink a truncate to
13570	// a smaller element type into the expression tree building our index.
13571	// TODO: We can generalize this and handle a bunch more cases if useful.
13572
13573	// Narrow a buildvector to the narrowest element type. This requires less
13574	// work and less register pressure at high LMUL, and creates smaller constants
13575	// which may be cheaper to materialize.
13576	if (ISD::isBuildVectorOfConstantSDNodes(N: N.getNode())) {
13577	KnownBits Known = DAG.computeKnownBits(Op: N);
13578	unsigned ActiveBits = std::max(a: `8u`, b: Known.countMaxActiveBits());
13579	LLVMContext &C = *DAG.getContext();
13580	EVT ResultVT = EVT::getIntegerVT(Context&: C, BitWidth: ActiveBits).getRoundIntegerType(Context&: C);
13581	if (ResultVT.bitsLT(VT: VT.getVectorElementType())) {
13582	N = DAG.getNode(Opcode: ISD::TRUNCATE, DL,
13583	VT: VT.changeVectorElementType(EltVT: ResultVT), Operand: N);
13584	return true;
13585	}
13586	}
13587
13588	// Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
13589	if (N.getOpcode() != ISD::SHL)
13590	return false;
13591
13592	SDValue N0 = N.getOperand(i: `0`);
13593	if (N0.getOpcode() != ISD::ZERO_EXTEND &&
13594	N0.getOpcode() != RISCVISD::VZEXT_VL)
13595	return false;
13596	if (!N0 ->hasOneUse())
13597	return false;
13598
13599	APInt ShAmt;
13600	SDValue N1 = N.getOperand(i: `1`);
13601	if (!ISD::isConstantSplatVector(N: N1.getNode(), SplatValue&: ShAmt))
13602	return false;
13603
13604	SDValue Src = N0.getOperand(i: `0`);
13605	EVT SrcVT = Src.getValueType();
13606	unsigned SrcElen = SrcVT.getScalarSizeInBits();
13607	unsigned ShAmtV = ShAmt.getZExtValue();
13608	unsigned NewElen = PowerOf2Ceil(A: SrcElen + ShAmtV);
13609	NewElen = std::max(a: NewElen, b: `8U`);
13610
13611	// Skip if NewElen is not narrower than the original extended type.
13612	if (NewElen >= N0.getValueType().getScalarSizeInBits())
13613	return false;
13614
13615	EVT NewEltVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NewElen);
13616	EVT NewVT = SrcVT.changeVectorElementType(EltVT: NewEltVT);
13617
13618	SDValue NewExt = DAG.getNode(Opcode: N0 ->getOpcode(), DL, VT: NewVT, Ops: N0 ->ops());
13619	SDValue NewShAmtVec = DAG.getConstant(Val: ShAmtV, DL, VT: NewVT);
13620	N = DAG.getNode(Opcode: ISD::SHL, DL, VT: NewVT, N1: NewExt, N2: NewShAmtVec);
13621	return true;
13622	}
13623
13624	// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
13625	// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
13626	// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
13627	// can become a sext.w instead of a shift pair.
13628	static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
13629	const RISCVSubtarget &Subtarget) {
13630	SDValue N0 = N->getOperand(Num: `0`);
13631	SDValue N1 = N->getOperand(Num: `1`);
13632	EVT VT = N->getValueType(ResNo: `0`);
13633	EVT OpVT = N0.getValueType();
13634
13635	if (OpVT != MVT::i64 \|\| !Subtarget.is64Bit())
13636	return SDValue ();
13637
13638	// RHS needs to be a constant.
13639	auto *N1C = dyn_cast<ConstantSDNode>(Val&: N1);
13640	if (!N1C)
13641	return SDValue ();
13642
13643	// LHS needs to be (and X, 0xffffffff).
13644	if (N0.getOpcode() != ISD::AND \|\| !N0.hasOneUse() \|\|
13645	!isa<ConstantSDNode>(Val: N0.getOperand(i: `1`)) \|\|
13646	N0.getConstantOperandVal(i: `1`) != UINT64_C(`0xffffffff`))
13647	return SDValue ();
13648
13649	// Looking for an equality compare.
13650	ISD::CondCode Cond = cast<CondCodeSDNode>(Val: N->getOperand(Num: `2`))->get();
13651	if (!isIntEqualitySetCC(Code: Cond))
13652	return SDValue ();
13653
13654	// Don't do this if the sign bit is provably zero, it will be turned back into
13655	// an AND.
13656	APInt SignMask = APInt::getOneBitSet(numBits: `64`, BitNo: `31`);
13657	if (DAG.MaskedValueIsZero(Op: N0.getOperand(i: `0`), Mask: SignMask))
13658	return SDValue ();
13659
13660	const APInt &C1 = N1C->getAPIntValue();
13661
13662	SDLoc dl(N);
13663	// If the constant is larger than 2^32 - 1 it is impossible for both sides
13664	// to be equal.
13665	if (C1.getActiveBits() > `32`)
13666	return DAG.getBoolConstant(V: Cond == ISD::SETNE, DL: dl, VT, OpVT);
13667
13668	SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
13669	N0.getOperand(`0`), DAG.getValueType(MVT::i32));
13670	return DAG.getSetCC(DL: dl, VT, LHS: SExtOp, RHS: DAG.getConstant(Val: C1.trunc(width: `32`).sext(width: `64`),
13671	DL: dl, VT: OpVT), Cond);
13672	}
13673
13674	static SDValue
13675	performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
13676	const RISCVSubtarget &Subtarget) {
13677	SDValue Src = N->getOperand(Num: `0`);
13678	EVT VT = N->getValueType(ResNo: `0`);
13679
13680	// Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
13681	if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
13682	cast<VTSDNode>(N->getOperand(`1`))->getVT().bitsGE(MVT::i16))
13683	return DAG.getNode(Opcode: RISCVISD::FMV_X_SIGNEXTH, DL: SDLoc (N), VT,
13684	Operand: Src.getOperand(i: `0`));
13685
13686	return SDValue ();
13687	}
13688
13689	namespace {
13690	// Forward declaration of the structure holding the necessary information to
13691	// apply a combine.
13692	struct CombineResult;
13693
13694	enum ExtKind : uint8_t { ZExt = `1` << `0`, SExt = `1` << `1`, FPExt = `1` << `2` };
13695	/// Helper class for folding sign/zero extensions.
13696	/// In particular, this class is used for the following combines:
13697	/// add \| add_vl \| or disjoint -> vwadd(u) \| vwadd(u)_w
13698	/// sub \| sub_vl -> vwsub(u) \| vwsub(u)_w
13699	/// mul \| mul_vl -> vwmul(u) \| vwmul_su
13700	/// shl \| shl_vl -> vwsll
13701	/// fadd -> vfwadd \| vfwadd_w
13702	/// fsub -> vfwsub \| vfwsub_w
13703	/// fmul -> vfwmul
13704	/// An object of this class represents an operand of the operation we want to
13705	/// combine.
13706	/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
13707	/// NodeExtensionHelper for `a` and one for `b`.
13708	///
13709	/// This class abstracts away how the extension is materialized and
13710	/// how its number of users affect the combines.
13711	///
13712	/// In particular:
13713	/// - VWADD_W is conceptually == add(op0, sext(op1))
13714	/// - VWADDU_W == add(op0, zext(op1))
13715	/// - VWSUB_W == sub(op0, sext(op1))
13716	/// - VWSUBU_W == sub(op0, zext(op1))
13717	/// - VFWADD_W == fadd(op0, fpext(op1))
13718	/// - VFWSUB_W == fsub(op0, fpext(op1))
13719	/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
13720	/// zext\|sext(smaller_value).
13721	struct NodeExtensionHelper {
13722	/// Records if this operand is like being zero extended.
13723	bool SupportsZExt;
13724	/// Records if this operand is like being sign extended.
13725	/// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
13726	/// instance, a splat constant (e.g., 3), would support being both sign and
13727	/// zero extended.
13728	bool SupportsSExt;
13729	/// Records if this operand is like being floating-Point extended.
13730	bool SupportsFPExt;
13731	/// This boolean captures whether we care if this operand would still be
13732	/// around after the folding happens.
13733	bool EnforceOneUse;
13734	/// Original value that this NodeExtensionHelper represents.
13735	SDValue OrigOperand;
13736
13737	/// Get the value feeding the extension or the value itself.
13738	/// E.g., for zext(a), this would return a.
13739	SDValue getSource() const {
13740	switch (OrigOperand.getOpcode()) {
13741	case ISD::ZERO_EXTEND:
13742	case ISD::SIGN_EXTEND:
13743	case RISCVISD::VSEXT_VL:
13744	case RISCVISD::VZEXT_VL:
13745	case RISCVISD::FP_EXTEND_VL:
13746	return OrigOperand.getOperand(i: `0`);
13747	default:
13748	return OrigOperand;
13749	}
13750	}
13751
13752	/// Check if this instance represents a splat.
13753	bool isSplat() const {
13754	return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL \|\|
13755	OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
13756	}
13757
13758	/// Get the extended opcode.
13759	unsigned getExtOpc(ExtKind SupportsExt) const {
13760	switch (SupportsExt) {
13761	case ExtKind::SExt:
13762	return RISCVISD::VSEXT_VL;
13763	case ExtKind::ZExt:
13764	return RISCVISD::VZEXT_VL;
13765	case ExtKind::FPExt:
13766	return RISCVISD::FP_EXTEND_VL;
13767	}
13768	llvm_unreachable("Unknown ExtKind enum");
13769	}
13770
13771	/// Get or create a value that can feed \p Root with the given extension \p
13772	/// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
13773	/// operand. \see ::getSource().
13774	SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
13775	const RISCVSubtarget &Subtarget,
13776	std::optional<ExtKind> SupportsExt) const {
13777	if (!SupportsExt.has_value())
13778	return OrigOperand;
13779
13780	MVT NarrowVT = getNarrowType(Root, SupportsExt: *SupportsExt);
13781
13782	SDValue Source = getSource();
13783	assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
13784	if (Source.getValueType() == NarrowVT)
13785	return Source;
13786
13787	unsigned ExtOpc = getExtOpc(SupportsExt: *SupportsExt);
13788
13789	// If we need an extension, we should be changing the type.
13790	SDLoc DL(OrigOperand);
13791	auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
13792	switch (OrigOperand.getOpcode()) {
13793	case ISD::ZERO_EXTEND:
13794	case ISD::SIGN_EXTEND:
13795	case RISCVISD::VSEXT_VL:
13796	case RISCVISD::VZEXT_VL:
13797	case RISCVISD::FP_EXTEND_VL:
13798	return DAG.getNode(Opcode: ExtOpc, DL, VT: NarrowVT, N1: Source, N2: Mask, N3: VL);
13799	case ISD::SPLAT_VECTOR:
13800	return DAG.getSplat(VT: NarrowVT, DL, Op: Source.getOperand(i: `0`));
13801	case RISCVISD::VMV_V_X_VL:
13802	return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: NarrowVT,
13803	N1: DAG.getUNDEF(VT: NarrowVT), N2: Source.getOperand(i: `1`), N3: VL);
13804	default:
13805	// Other opcodes can only come from the original LHS of VW(ADD\|SUB)_W_VL
13806	// and that operand should already have the right NarrowVT so no
13807	// extension should be required at this point.
13808	llvm_unreachable("Unsupported opcode");
13809	}
13810	}
13811
13812	/// Helper function to get the narrow type for \p Root.
13813	/// The narrow type is the type of \p Root where we divided the size of each
13814	/// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
13815	/// \pre Both the narrow type and the original type should be legal.
13816	static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
13817	MVT VT = Root->getSimpleValueType(ResNo: `0`);
13818
13819	// Determine the narrow size.
13820	unsigned NarrowSize = VT.getScalarSizeInBits() / `2`;
13821
13822	MVT EltVT = SupportsExt == ExtKind::FPExt
13823	? MVT::getFloatingPointVT(BitWidth: NarrowSize)
13824	: MVT::getIntegerVT(BitWidth: NarrowSize);
13825
13826	assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? `16` : `8`) &&
13827	"Trying to extend something we can't represent");
13828	MVT NarrowVT = MVT::getVectorVT(VT: EltVT, EC: VT.getVectorElementCount());
13829	return NarrowVT;
13830	}
13831
13832	/// Get the opcode to materialize:
13833	/// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
13834	static unsigned getSExtOpcode(unsigned Opcode) {
13835	switch (Opcode) {
13836	case ISD::ADD:
13837	case RISCVISD::ADD_VL:
13838	case RISCVISD::VWADD_W_VL:
13839	case RISCVISD::VWADDU_W_VL:
13840	case ISD::OR:
13841	return RISCVISD::VWADD_VL;
13842	case ISD::SUB:
13843	case RISCVISD::SUB_VL:
13844	case RISCVISD::VWSUB_W_VL:
13845	case RISCVISD::VWSUBU_W_VL:
13846	return RISCVISD::VWSUB_VL;
13847	case ISD::MUL:
13848	case RISCVISD::MUL_VL:
13849	return RISCVISD::VWMUL_VL;
13850	default:
13851	llvm_unreachable("Unexpected opcode");
13852	}
13853	}
13854
13855	/// Get the opcode to materialize:
13856	/// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
13857	static unsigned getZExtOpcode(unsigned Opcode) {
13858	switch (Opcode) {
13859	case ISD::ADD:
13860	case RISCVISD::ADD_VL:
13861	case RISCVISD::VWADD_W_VL:
13862	case RISCVISD::VWADDU_W_VL:
13863	case ISD::OR:
13864	return RISCVISD::VWADDU_VL;
13865	case ISD::SUB:
13866	case RISCVISD::SUB_VL:
13867	case RISCVISD::VWSUB_W_VL:
13868	case RISCVISD::VWSUBU_W_VL:
13869	return RISCVISD::VWSUBU_VL;
13870	case ISD::MUL:
13871	case RISCVISD::MUL_VL:
13872	return RISCVISD::VWMULU_VL;
13873	case ISD::SHL:
13874	case RISCVISD::SHL_VL:
13875	return RISCVISD::VWSLL_VL;
13876	default:
13877	llvm_unreachable("Unexpected opcode");
13878	}
13879	}
13880
13881	/// Get the opcode to materialize:
13882	/// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
13883	static unsigned getFPExtOpcode(unsigned Opcode) {
13884	switch (Opcode) {
13885	case RISCVISD::FADD_VL:
13886	case RISCVISD::VFWADD_W_VL:
13887	return RISCVISD::VFWADD_VL;
13888	case RISCVISD::FSUB_VL:
13889	case RISCVISD::VFWSUB_W_VL:
13890	return RISCVISD::VFWSUB_VL;
13891	case RISCVISD::FMUL_VL:
13892	return RISCVISD::VFWMUL_VL;
13893	default:
13894	llvm_unreachable("Unexpected opcode");
13895	}
13896	}
13897
13898	/// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
13899	/// newOpcode(a, b).
13900	static unsigned getSUOpcode(unsigned Opcode) {
13901	assert((Opcode == RISCVISD::MUL_VL \|\| Opcode == ISD::MUL) &&
13902	"SU is only supported for MUL");
13903	return RISCVISD::VWMULSU_VL;
13904	}
13905
13906	/// Get the opcode to materialize
13907	/// \p Opcode(a, s\|z\|fpext(b)) -> newOpcode(a, b).
13908	static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
13909	switch (Opcode) {
13910	case ISD::ADD:
13911	case RISCVISD::ADD_VL:
13912	case ISD::OR:
13913	return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
13914	: RISCVISD::VWADDU_W_VL;
13915	case ISD::SUB:
13916	case RISCVISD::SUB_VL:
13917	return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
13918	: RISCVISD::VWSUBU_W_VL;
13919	case RISCVISD::FADD_VL:
13920	return RISCVISD::VFWADD_W_VL;
13921	case RISCVISD::FSUB_VL:
13922	return RISCVISD::VFWSUB_W_VL;
13923	default:
13924	llvm_unreachable("Unexpected opcode");
13925	}
13926	}
13927
13928	using CombineToTry = std::function<std::optional<CombineResult>(
13929	SDNode * /Root/, const NodeExtensionHelper & /LHS/,
13930	const NodeExtensionHelper & /RHS/, SelectionDAG &,
13931	const RISCVSubtarget &)>;
13932
13933	/// Check if this node needs to be fully folded or extended for all users.
13934	bool needToPromoteOtherUsers() const { return EnforceOneUse; }
13935
13936	void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
13937	const RISCVSubtarget &Subtarget) {
13938	unsigned Opc = OrigOperand.getOpcode();
13939	MVT VT = OrigOperand.getSimpleValueType();
13940
13941	assert((Opc == ISD::SPLAT_VECTOR \|\| Opc == RISCVISD::VMV_V_X_VL) &&
13942	"Unexpected Opcode");
13943
13944	// The pasthru must be undef for tail agnostic.
13945	if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(i: `0`).isUndef())
13946	return;
13947
13948	// Get the scalar value.
13949	SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(i: `0`)
13950	: OrigOperand.getOperand(i: `1`);
13951
13952	// See if we have enough sign bits or zero bits in the scalar to use a
13953	// widening opcode by splatting to smaller element size.
13954	unsigned EltBits = VT.getScalarSizeInBits();
13955	unsigned ScalarBits = Op.getValueSizeInBits();
13956	// Make sure we're getting all element bits from the scalar register.
13957	// FIXME: Support implicit sign extension of vmv.v.x?
13958	if (ScalarBits < EltBits)
13959	return;
13960
13961	unsigned NarrowSize = VT.getScalarSizeInBits() / `2`;
13962	// If the narrow type cannot be expressed with a legal VMV,
13963	// this is not a valid candidate.
13964	if (NarrowSize < `8`)
13965	return;
13966
13967	if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
13968	SupportsSExt = true;
13969
13970	if (DAG.MaskedValueIsZero(Op,
13971	Mask: APInt::getBitsSetFrom(numBits: ScalarBits, loBit: NarrowSize)))
13972	SupportsZExt = true;
13973
13974	EnforceOneUse = false;
13975	}
13976
13977	/// Helper method to set the various fields of this struct based on the
13978	/// type of \p Root.
13979	void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
13980	const RISCVSubtarget &Subtarget) {
13981	SupportsZExt = false;
13982	SupportsSExt = false;
13983	SupportsFPExt = false;
13984	EnforceOneUse = true;
13985	unsigned Opc = OrigOperand.getOpcode();
13986	// For the nodes we handle below, we end up using their inputs directly: see
13987	// getSource(). However since they either don't have a passthru or we check
13988	// that their passthru is undef, we can safely ignore their mask and VL.
13989	switch (Opc) {
13990	case ISD::ZERO_EXTEND:
13991	case ISD::SIGN_EXTEND: {
13992	MVT VT = OrigOperand.getSimpleValueType();
13993	if (!VT.isVector())
13994	break;
13995
13996	SDValue NarrowElt = OrigOperand.getOperand(i: `0`);
13997	MVT NarrowVT = NarrowElt.getSimpleValueType();
13998	// i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
13999	if (NarrowVT.getVectorElementType() == MVT::i1)
14000	break;
14001
14002	SupportsZExt = Opc == ISD::ZERO_EXTEND;
14003	SupportsSExt = Opc == ISD::SIGN_EXTEND;
14004	break;
14005	}
14006	case RISCVISD::VZEXT_VL:
14007	SupportsZExt = true;
14008	break;
14009	case RISCVISD::VSEXT_VL:
14010	SupportsSExt = true;
14011	break;
14012	case RISCVISD::FP_EXTEND_VL:
14013	SupportsFPExt = true;
14014	break;
14015	case ISD::SPLAT_VECTOR:
14016	case RISCVISD::VMV_V_X_VL:
14017	fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
14018	break;
14019	default:
14020	break;
14021	}
14022	}
14023
14024	/// Check if \p Root supports any extension folding combines.
14025	static bool isSupportedRoot(const SDNode *Root,
14026	const RISCVSubtarget &Subtarget) {
14027	switch (Root->getOpcode()) {
14028	case ISD::ADD:
14029	case ISD::SUB:
14030	case ISD::MUL: {
14031	return Root->getValueType(ResNo: `0`).isScalableVector();
14032	}
14033	case ISD::OR: {
14034	return Root->getValueType(ResNo: `0`).isScalableVector() &&
14035	Root->getFlags().hasDisjoint();
14036	}
14037	// Vector Widening Integer Add/Sub/Mul Instructions
14038	case RISCVISD::ADD_VL:
14039	case RISCVISD::MUL_VL:
14040	case RISCVISD::VWADD_W_VL:
14041	case RISCVISD::VWADDU_W_VL:
14042	case RISCVISD::SUB_VL:
14043	case RISCVISD::VWSUB_W_VL:
14044	case RISCVISD::VWSUBU_W_VL:
14045	// Vector Widening Floating-Point Add/Sub/Mul Instructions
14046	case RISCVISD::FADD_VL:
14047	case RISCVISD::FSUB_VL:
14048	case RISCVISD::FMUL_VL:
14049	case RISCVISD::VFWADD_W_VL:
14050	case RISCVISD::VFWSUB_W_VL:
14051	return true;
14052	case ISD::SHL:
14053	return Root->getValueType(ResNo: `0`).isScalableVector() &&
14054	Subtarget.hasStdExtZvbb();
14055	case RISCVISD::SHL_VL:
14056	return Subtarget.hasStdExtZvbb();
14057	default:
14058	return false;
14059	}
14060	}
14061
14062	/// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
14063	NodeExtensionHelper(SDNode Root, unsigned* OperandIdx, SelectionDAG &DAG,
14064	const RISCVSubtarget &Subtarget) {
14065	assert(isSupportedRoot(Root, Subtarget) &&
14066	"Trying to build an helper with an "
14067	"unsupported root");
14068	assert(OperandIdx < `2` && "Requesting something else than LHS or RHS");
14069	assert(DAG.getTargetLoweringInfo().isTypeLegal(Root->getValueType(`0`)));
14070	OrigOperand = Root->getOperand(Num: OperandIdx);
14071
14072	unsigned Opc = Root->getOpcode();
14073	switch (Opc) {
14074	// We consider
14075	// VW<ADD\|SUB>_W(LHS, RHS) -> <ADD\|SUB>(LHS, SEXT(RHS))
14076	// VW<ADD\|SUB>U_W(LHS, RHS) -> <ADD\|SUB>(LHS, ZEXT(RHS))
14077	// VFW<ADD\|SUB>_W(LHS, RHS) -> F<ADD\|SUB>(LHS, FPEXT(RHS))
14078	case RISCVISD::VWADD_W_VL:
14079	case RISCVISD::VWADDU_W_VL:
14080	case RISCVISD::VWSUB_W_VL:
14081	case RISCVISD::VWSUBU_W_VL:
14082	case RISCVISD::VFWADD_W_VL:
14083	case RISCVISD::VFWSUB_W_VL:
14084	if (OperandIdx == `1`) {
14085	SupportsZExt =
14086	Opc == RISCVISD::VWADDU_W_VL \|\| Opc == RISCVISD::VWSUBU_W_VL;
14087	SupportsSExt =
14088	Opc == RISCVISD::VWADD_W_VL \|\| Opc == RISCVISD::VWSUB_W_VL;
14089	SupportsFPExt =
14090	Opc == RISCVISD::VFWADD_W_VL \|\| Opc == RISCVISD::VFWSUB_W_VL;
14091	// There's no existing extension here, so we don't have to worry about
14092	// making sure it gets removed.
14093	EnforceOneUse = false;
14094	break;
14095	}
14096	[[fallthrough]];
14097	default:
14098	fillUpExtensionSupport(Root, DAG, Subtarget);
14099	break;
14100	}
14101	}
14102
14103	/// Helper function to get the Mask and VL from \p Root.
14104	static std::pair<SDValue, SDValue>
14105	getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
14106	const RISCVSubtarget &Subtarget) {
14107	assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
14108	switch (Root->getOpcode()) {
14109	case ISD::ADD:
14110	case ISD::SUB:
14111	case ISD::MUL:
14112	case ISD::OR:
14113	case ISD::SHL: {
14114	SDLoc DL(Root);
14115	MVT VT = Root->getSimpleValueType(ResNo: `0`);
14116	return getDefaultScalableVLOps(VecVT: VT, DL, DAG, Subtarget);
14117	}
14118	default:
14119	return std::make_pair(x: Root->getOperand(Num: `3`), y: Root->getOperand(Num: `4`));
14120	}
14121	}
14122
14123	/// Helper function to check if \p N is commutative with respect to the
14124	/// foldings that are supported by this class.
14125	static bool isCommutative(const SDNode *N) {
14126	switch (N->getOpcode()) {
14127	case ISD::ADD:
14128	case ISD::MUL:
14129	case ISD::OR:
14130	case RISCVISD::ADD_VL:
14131	case RISCVISD::MUL_VL:
14132	case RISCVISD::VWADD_W_VL:
14133	case RISCVISD::VWADDU_W_VL:
14134	case RISCVISD::FADD_VL:
14135	case RISCVISD::FMUL_VL:
14136	case RISCVISD::VFWADD_W_VL:
14137	return true;
14138	case ISD::SUB:
14139	case RISCVISD::SUB_VL:
14140	case RISCVISD::VWSUB_W_VL:
14141	case RISCVISD::VWSUBU_W_VL:
14142	case RISCVISD::FSUB_VL:
14143	case RISCVISD::VFWSUB_W_VL:
14144	case ISD::SHL:
14145	case RISCVISD::SHL_VL:
14146	return false;
14147	default:
14148	llvm_unreachable("Unexpected opcode");
14149	}
14150	}
14151
14152	/// Get a list of combine to try for folding extensions in \p Root.
14153	/// Note that each returned CombineToTry function doesn't actually modify
14154	/// anything. Instead they produce an optional CombineResult that if not None,
14155	/// need to be materialized for the combine to be applied.
14156	/// \see CombineResult::materialize.
14157	/// If the related CombineToTry function returns std::nullopt, that means the
14158	/// combine didn't match.
14159	static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
14160	};
14161
14162	/// Helper structure that holds all the necessary information to materialize a
14163	/// combine that does some extension folding.
14164	struct CombineResult {
14165	/// Opcode to be generated when materializing the combine.
14166	unsigned TargetOpcode;
14167	// No value means no extension is needed.
14168	std::optional<ExtKind> LHSExt;
14169	std::optional<ExtKind> RHSExt;
14170	/// Root of the combine.
14171	SDNode *Root;
14172	/// LHS of the TargetOpcode.
14173	NodeExtensionHelper LHS;
14174	/// RHS of the TargetOpcode.
14175	NodeExtensionHelper RHS;
14176
14177	CombineResult(unsigned TargetOpcode, SDNode *Root,
14178	const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
14179	const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
14180	: TargetOpcode(TargetOpcode), LHSExt (LHSExt), RHSExt (RHSExt), Root(Root),
14181	LHS (LHS), RHS (RHS) {}
14182
14183	/// Return a value that uses TargetOpcode and that can be used to replace
14184	/// Root.
14185	/// The actual replacement is not* done in that method.*
14186	SDValue materialize(SelectionDAG &DAG,
14187	const RISCVSubtarget &Subtarget) const {
14188	SDValue Mask, VL, Merge;
14189	std::tie(args&: Mask, args&: VL) =
14190	NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
14191	switch (Root->getOpcode()) {
14192	default:
14193	Merge = Root->getOperand(Num: `2`);
14194	break;
14195	case ISD::ADD:
14196	case ISD::SUB:
14197	case ISD::MUL:
14198	case ISD::OR:
14199	case ISD::SHL:
14200	Merge = DAG.getUNDEF(VT: Root->getValueType(ResNo: `0`));
14201	break;
14202	}
14203	return DAG.getNode(Opcode: TargetOpcode, DL: SDLoc (Root), VT: Root->getValueType(ResNo: `0`),
14204	N1: LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SupportsExt: LHSExt),
14205	N2: RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SupportsExt: RHSExt),
14206	N3: Merge, N4: Mask, N5: VL);
14207	}
14208	};
14209
14210	/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14211	/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14212	/// are zext) and LHS and RHS can be folded into Root.
14213	/// AllowExtMask define which form `ext` can take in this pattern.
14214	///
14215	/// \note If the pattern can match with both zext and sext, the returned
14216	/// CombineResult will feature the zext result.
14217	///
14218	/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14219	/// can be used to apply the pattern.
14220	static std::optional<CombineResult>
14221	canFoldToVWWithSameExtensionImpl(SDNode Root, const* NodeExtensionHelper &LHS,
14222	const NodeExtensionHelper &RHS,
14223	uint8_t AllowExtMask, SelectionDAG &DAG,
14224	const RISCVSubtarget &Subtarget) {
14225	if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
14226	return CombineResult (NodeExtensionHelper::getZExtOpcode(Opcode: Root->getOpcode()),
14227	Root, LHS, /LHSExt=/{ExtKind::ZExt}, RHS,
14228	/RHSExt=/{ExtKind::ZExt});
14229	if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
14230	return CombineResult (NodeExtensionHelper::getSExtOpcode(Opcode: Root->getOpcode()),
14231	Root, LHS, /LHSExt=/{ExtKind::SExt}, RHS,
14232	/RHSExt=/{ExtKind::SExt});
14233	if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
14234	return CombineResult (NodeExtensionHelper::getFPExtOpcode(Opcode: Root->getOpcode()),
14235	Root, LHS, /LHSExt=/{ExtKind::FPExt}, RHS,
14236	/RHSExt=/{ExtKind::FPExt});
14237	return std::nullopt;
14238	}
14239
14240	/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14241	/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14242	/// are zext) and LHS and RHS can be folded into Root.
14243	///
14244	/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14245	/// can be used to apply the pattern.
14246	static std::optional<CombineResult>
14247	canFoldToVWWithSameExtension(SDNode Root, const* NodeExtensionHelper &LHS,
14248	const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14249	const RISCVSubtarget &Subtarget) {
14250	return canFoldToVWWithSameExtensionImpl(
14251	Root, LHS, RHS, AllowExtMask: ExtKind::ZExt \| ExtKind::SExt \| ExtKind::FPExt, DAG,
14252	Subtarget);
14253	}
14254
14255	/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
14256	///
14257	/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14258	/// can be used to apply the pattern.
14259	static std::optional<CombineResult>
14260	canFoldToVW_W(SDNode Root, const* NodeExtensionHelper &LHS,
14261	const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14262	const RISCVSubtarget &Subtarget) {
14263	if (RHS.SupportsFPExt)
14264	return CombineResult (
14265	NodeExtensionHelper::getWOpcode(Opcode: Root->getOpcode(), SupportsExt: ExtKind::FPExt),
14266	Root, LHS, /LHSExt=/std::nullopt, RHS, /RHSExt=/{ExtKind::FPExt});
14267
14268	// FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
14269	// sext/zext?
14270	// Control this behavior behind an option (AllowSplatInVW_W) for testing
14271	// purposes.
14272	if (RHS.SupportsZExt && (!RHS.isSplat() \|\| AllowSplatInVW_W))
14273	return CombineResult (
14274	NodeExtensionHelper::getWOpcode(Opcode: Root->getOpcode(), SupportsExt: ExtKind::ZExt), Root,
14275	LHS, /LHSExt=/std::nullopt, RHS, /RHSExt=/{ExtKind::ZExt});
14276	if (RHS.SupportsSExt && (!RHS.isSplat() \|\| AllowSplatInVW_W))
14277	return CombineResult (
14278	NodeExtensionHelper::getWOpcode(Opcode: Root->getOpcode(), SupportsExt: ExtKind::SExt), Root,
14279	LHS, /LHSExt=/std::nullopt, RHS, /RHSExt=/{ExtKind::SExt});
14280	return std::nullopt;
14281	}
14282
14283	/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
14284	///
14285	/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14286	/// can be used to apply the pattern.
14287	static std::optional<CombineResult>
14288	canFoldToVWWithSEXT(SDNode Root, const* NodeExtensionHelper &LHS,
14289	const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14290	const RISCVSubtarget &Subtarget) {
14291	return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, AllowExtMask: ExtKind::SExt, DAG,
14292	Subtarget);
14293	}
14294
14295	/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
14296	///
14297	/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14298	/// can be used to apply the pattern.
14299	static std::optional<CombineResult>
14300	canFoldToVWWithZEXT(SDNode Root, const* NodeExtensionHelper &LHS,
14301	const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14302	const RISCVSubtarget &Subtarget) {
14303	return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, AllowExtMask: ExtKind::ZExt, DAG,
14304	Subtarget);
14305	}
14306
14307	/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
14308	///
14309	/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14310	/// can be used to apply the pattern.
14311	static std::optional<CombineResult>
14312	canFoldToVWWithFPEXT(SDNode Root, const* NodeExtensionHelper &LHS,
14313	const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14314	const RISCVSubtarget &Subtarget) {
14315	return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, AllowExtMask: ExtKind::FPExt, DAG,
14316	Subtarget);
14317	}
14318
14319	/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
14320	///
14321	/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14322	/// can be used to apply the pattern.
14323	static std::optional<CombineResult>
14324	canFoldToVW_SU(SDNode Root, const* NodeExtensionHelper &LHS,
14325	const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14326	const RISCVSubtarget &Subtarget) {
14327
14328	if (!LHS.SupportsSExt \|\| !RHS.SupportsZExt)
14329	return std::nullopt;
14330	return CombineResult (NodeExtensionHelper::getSUOpcode(Opcode: Root->getOpcode()),
14331	Root, LHS, /LHSExt=/{ExtKind::SExt}, RHS,
14332	/RHSExt=/{ExtKind::ZExt});
14333	}
14334
14335	SmallVector<NodeExtensionHelper::CombineToTry>
14336	NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
14337	SmallVector<CombineToTry> Strategies;
14338	switch (Root->getOpcode()) {
14339	case ISD::ADD:
14340	case ISD::SUB:
14341	case ISD::OR:
14342	case RISCVISD::ADD_VL:
14343	case RISCVISD::SUB_VL:
14344	case RISCVISD::FADD_VL:
14345	case RISCVISD::FSUB_VL:
14346	// add\|sub\|fadd\|fsub-> vwadd(u)\|vwsub(u)\|vfwadd\|vfwsub
14347	Strategies.push_back(Elt: canFoldToVWWithSameExtension);
14348	// add\|sub\|fadd\|fsub -> vwadd(u)_w\|vwsub(u)_w}\|vfwadd_w\|vfwsub_w
14349	Strategies.push_back(Elt: canFoldToVW_W);
14350	break;
14351	case RISCVISD::FMUL_VL:
14352	Strategies.push_back(Elt: canFoldToVWWithSameExtension);
14353	break;
14354	case ISD::MUL:
14355	case RISCVISD::MUL_VL:
14356	// mul -> vwmul(u)
14357	Strategies.push_back(Elt: canFoldToVWWithSameExtension);
14358	// mul -> vwmulsu
14359	Strategies.push_back(Elt: canFoldToVW_SU);
14360	break;
14361	case ISD::SHL:
14362	case RISCVISD::SHL_VL:
14363	// shl -> vwsll
14364	Strategies.push_back(Elt: canFoldToVWWithZEXT);
14365	break;
14366	case RISCVISD::VWADD_W_VL:
14367	case RISCVISD::VWSUB_W_VL:
14368	// vwadd_w\|vwsub_w -> vwadd\|vwsub
14369	Strategies.push_back(Elt: canFoldToVWWithSEXT);
14370	break;
14371	case RISCVISD::VWADDU_W_VL:
14372	case RISCVISD::VWSUBU_W_VL:
14373	// vwaddu_w\|vwsubu_w -> vwaddu\|vwsubu
14374	Strategies.push_back(Elt: canFoldToVWWithZEXT);
14375	break;
14376	case RISCVISD::VFWADD_W_VL:
14377	case RISCVISD::VFWSUB_W_VL:
14378	// vfwadd_w\|vfwsub_w -> vfwadd\|vfwsub
14379	Strategies.push_back(Elt: canFoldToVWWithFPEXT);
14380	break;
14381	default:
14382	llvm_unreachable("Unexpected opcode");
14383	}
14384	return Strategies;
14385	}
14386	} // End anonymous namespace.
14387
14388	/// Combine a binary operation to its equivalent VW or VW_W form.
14389	/// The supported combines are:
14390	/// add \| add_vl \| or disjoint -> vwadd(u) \| vwadd(u)_w
14391	/// sub \| sub_vl -> vwsub(u) \| vwsub(u)_w
14392	/// mul \| mul_vl -> vwmul(u) \| vwmul_su
14393	/// shl \| shl_vl -> vwsll
14394	/// fadd_vl -> vfwadd \| vfwadd_w
14395	/// fsub_vl -> vfwsub \| vfwsub_w
14396	/// fmul_vl -> vfwmul
14397	/// vwadd_w(u) -> vwadd(u)
14398	/// vwsub_w(u) -> vwsub(u)
14399	/// vfwadd_w -> vfwadd
14400	/// vfwsub_w -> vfwsub
14401	static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N,
14402	TargetLowering::DAGCombinerInfo &DCI,
14403	const RISCVSubtarget &Subtarget) {
14404	SelectionDAG &DAG = DCI.DAG;
14405	if (DCI.isBeforeLegalize())
14406	return SDValue ();
14407
14408	if (!NodeExtensionHelper::isSupportedRoot(Root: N, Subtarget))
14409	return SDValue ();
14410
14411	SmallVector<SDNode *> Worklist;
14412	SmallSet<SDNode *, `8`> Inserted;
14413	Worklist.push_back(Elt: N);
14414	Inserted.insert(Ptr: N);
14415	SmallVector<CombineResult> CombinesToApply;
14416
14417	while (!Worklist.empty()) {
14418	SDNode *Root = Worklist.pop_back_val();
14419	if (!NodeExtensionHelper::isSupportedRoot(Root, Subtarget))
14420	return SDValue ();
14421
14422	NodeExtensionHelper LHS(N, `0`, DAG, Subtarget);
14423	NodeExtensionHelper RHS(N, `1`, DAG, Subtarget);
14424	auto AppendUsersIfNeeded = [&Worklist,
14425	&Inserted](const NodeExtensionHelper &Op) {
14426	if (Op.needToPromoteOtherUsers()) {
14427	for (SDNode *TheUse : Op.OrigOperand ->uses()) {
14428	if (Inserted.insert(Ptr: TheUse).second)
14429	Worklist.push_back(Elt: TheUse);
14430	}
14431	}
14432	};
14433
14434	// Control the compile time by limiting the number of node we look at in
14435	// total.
14436	if (Inserted.size() > ExtensionMaxWebSize)
14437	return SDValue ();
14438
14439	SmallVector<NodeExtensionHelper::CombineToTry> FoldingStrategies =
14440	NodeExtensionHelper::getSupportedFoldings(Root: N);
14441
14442	assert(!FoldingStrategies.empty() && "Nothing to be folded");
14443	bool Matched = false;
14444	for (int Attempt = `0`;
14445	(Attempt != `1` + NodeExtensionHelper::isCommutative(N)) && !Matched;
14446	++Attempt) {
14447
14448	for (NodeExtensionHelper::CombineToTry FoldingStrategy :
14449	FoldingStrategies) {
14450	std::optional<CombineResult> Res =
14451	FoldingStrategy (N, LHS, RHS, DAG, Subtarget);
14452	if (Res) {
14453	Matched = true;
14454	CombinesToApply.push_back(Elt: *Res);
14455	// All the inputs that are extended need to be folded, otherwise
14456	// we would be leaving the old input (since it is may still be used),
14457	// and the new one.
14458	if (Res ->LHSExt.has_value())
14459	AppendUsersIfNeeded (LHS);
14460	if (Res ->RHSExt.has_value())
14461	AppendUsersIfNeeded (RHS);
14462	break;
14463	}
14464	}
14465	std::swap(a&: LHS, b&: RHS);
14466	}
14467	// Right now we do an all or nothing approach.
14468	if (!Matched)
14469	return SDValue ();
14470	}
14471	// Store the value for the replacement of the input node separately.
14472	SDValue InputRootReplacement;
14473	// We do the RAUW after we materialize all the combines, because some replaced
14474	// nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
14475	// some of these nodes may appear in the NodeExtensionHelpers of some of the
14476	// yet-to-be-visited CombinesToApply roots.
14477	SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace;
14478	ValuesToReplace.reserve(N: CombinesToApply.size());
14479	for (CombineResult Res : CombinesToApply) {
14480	SDValue NewValue = Res.materialize(DAG, Subtarget);
14481	if (!InputRootReplacement) {
14482	assert(Res.Root == N &&
14483	"First element is expected to be the current node");
14484	InputRootReplacement = NewValue;
14485	} else {
14486	ValuesToReplace.emplace_back(Args: SDValue (Res.Root, `0`), Args&: NewValue);
14487	}
14488	}
14489	for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
14490	DAG.ReplaceAllUsesOfValueWith(From: OldNewValues.first, To: OldNewValues.second);
14491	DCI.AddToWorklist(N: OldNewValues.second.getNode());
14492	}
14493	return InputRootReplacement;
14494	}
14495
14496	// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
14497	// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
14498	// y will be the Passthru and cond will be the Mask.
14499	static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG) {
14500	unsigned Opc = N->getOpcode();
14501	assert(Opc == RISCVISD::VWADD_W_VL \|\| Opc == RISCVISD::VWADDU_W_VL \|\|
14502	Opc == RISCVISD::VWSUB_W_VL \|\| Opc == RISCVISD::VWSUBU_W_VL);
14503
14504	SDValue Y = N->getOperand(Num: `0`);
14505	SDValue MergeOp = N->getOperand(Num: `1`);
14506	unsigned MergeOpc = MergeOp.getOpcode();
14507
14508	if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
14509	return SDValue ();
14510
14511	SDValue X = MergeOp ->getOperand(Num: `1`);
14512
14513	if (!MergeOp.hasOneUse())
14514	return SDValue ();
14515
14516	// Passthru should be undef
14517	SDValue Passthru = N->getOperand(Num: `2`);
14518	if (!Passthru.isUndef())
14519	return SDValue ();
14520
14521	// Mask should be all ones
14522	SDValue Mask = N->getOperand(Num: `3`);
14523	if (Mask.getOpcode() != RISCVISD::VMSET_VL)
14524	return SDValue ();
14525
14526	// False value of MergeOp should be all zeros
14527	SDValue Z = MergeOp ->getOperand(Num: `2`);
14528
14529	if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
14530	(isNullOrNullSplat(V: Z.getOperand(i: `0`)) \|\| Z.getOperand(i: `0`).isUndef()))
14531	Z = Z.getOperand(i: `1`);
14532
14533	if (!ISD::isConstantSplatVectorAllZeros(N: Z.getNode()))
14534	return SDValue ();
14535
14536	return DAG.getNode(Opcode: Opc, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`),
14537	Ops: {Y, X, Y, MergeOp ->getOperand(Num: `0`), N->getOperand(Num: `4`)},
14538	Flags: N->getFlags());
14539	}
14540
14541	static SDValue performVWADDSUBW_VLCombine(SDNode *N,
14542	TargetLowering::DAGCombinerInfo &DCI,
14543	const RISCVSubtarget &Subtarget) {
14544	[[maybe_unused]] unsigned Opc = N->getOpcode();
14545	assert(Opc == RISCVISD::VWADD_W_VL \|\| Opc == RISCVISD::VWADDU_W_VL \|\|
14546	Opc == RISCVISD::VWSUB_W_VL \|\| Opc == RISCVISD::VWSUBU_W_VL);
14547
14548	if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
14549	return V;
14550
14551	return combineVWADDSUBWSelect(N, DAG&: DCI.DAG);
14552	}
14553
14554	// Helper function for performMemPairCombine.
14555	// Try to combine the memory loads/stores LSNode1 and LSNode2
14556	// into a single memory pair operation.
14557	static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1,
14558	LSBaseSDNode *LSNode2, SDValue BasePtr,
14559	uint64_t Imm) {
14560	SmallPtrSet<const SDNode *, `32`> Visited;
14561	SmallVector<const SDNode *, `8`> Worklist = {LSNode1, LSNode2};
14562
14563	if (SDNode::hasPredecessorHelper(N: LSNode1, Visited, Worklist) \|\|
14564	SDNode::hasPredecessorHelper(N: LSNode2, Visited, Worklist))
14565	return SDValue ();
14566
14567	MachineFunction &MF = DAG.getMachineFunction();
14568	const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14569
14570	// The new operation has twice the width.
14571	MVT XLenVT = Subtarget.getXLenVT();
14572	EVT MemVT = LSNode1->getMemoryVT();
14573	EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
14574	MachineMemOperand *MMO = LSNode1->getMemOperand();
14575	MachineMemOperand *NewMMO = MF.getMachineMemOperand(
14576	MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? `8` : `16`);
14577
14578	if (LSNode1->getOpcode() == ISD::LOAD) {
14579	auto Ext = cast<LoadSDNode>(Val: LSNode1)->getExtensionType();
14580	unsigned Opcode;
14581	if (MemVT == MVT::i32)
14582	Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
14583	else
14584	Opcode = RISCVISD::TH_LDD;
14585
14586	SDValue Res = DAG.getMemIntrinsicNode(
14587	Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
14588	{LSNode1->getChain(), BasePtr,
14589	DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14590	NewMemVT, NewMMO);
14591
14592	SDValue Node1 =
14593	DAG.getMergeValues(Ops: {Res.getValue(R: `0`), Res.getValue(R: `2`)}, dl: SDLoc (LSNode1));
14594	SDValue Node2 =
14595	DAG.getMergeValues(Ops: {Res.getValue(R: `1`), Res.getValue(R: `2`)}, dl: SDLoc (LSNode2));
14596
14597	DAG.ReplaceAllUsesWith(From: LSNode2, To: Node2.getNode());
14598	return Node1;
14599	} else {
14600	unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
14601
14602	SDValue Res = DAG.getMemIntrinsicNode(
14603	Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
14604	{LSNode1->getChain(), LSNode1->getOperand(`1`), LSNode2->getOperand(`1`),
14605	BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14606	NewMemVT, NewMMO);
14607
14608	DAG.ReplaceAllUsesWith(From: LSNode2, To: Res.getNode());
14609	return Res;
14610	}
14611	}
14612
14613	// Try to combine two adjacent loads/stores to a single pair instruction from
14614	// the XTHeadMemPair vendor extension.
14615	static SDValue performMemPairCombine(SDNode *N,
14616	TargetLowering::DAGCombinerInfo &DCI) {
14617	SelectionDAG &DAG = DCI.DAG;
14618	MachineFunction &MF = DAG.getMachineFunction();
14619	const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14620
14621	// Target does not support load/store pair.
14622	if (!Subtarget.hasVendorXTHeadMemPair())
14623	return SDValue ();
14624
14625	LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(Val: N);
14626	EVT MemVT = LSNode1->getMemoryVT();
14627	unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? `1` : `2`;
14628
14629	// No volatile, indexed or atomic loads/stores.
14630	if (!LSNode1->isSimple() \|\| LSNode1->isIndexed())
14631	return SDValue ();
14632
14633	// Function to get a base + constant representation from a memory value.
14634	auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
14635	if (Ptr ->getOpcode() == ISD::ADD)
14636	if (auto *C1 = dyn_cast<ConstantSDNode>(Val: Ptr ->getOperand(Num: `1`)))
14637	return {Ptr ->getOperand(Num: `0`), C1->getZExtValue()};
14638	return {Ptr, `0`};
14639	};
14640
14641	auto [Base1, Offset1] = ExtractBaseAndOffset (LSNode1->getOperand(Num: OpNum));
14642
14643	SDValue Chain = N->getOperand(Num: `0`);
14644	for (SDNode::use_iterator UI = Chain ->use_begin(), UE = Chain ->use_end();
14645	UI != UE; ++UI) {
14646	SDUse &Use = UI.getUse();
14647	if (Use.getUser() != N && Use.getResNo() == `0` &&
14648	Use.getUser()->getOpcode() == N->getOpcode()) {
14649	LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Val: Use.getUser());
14650
14651	// No volatile, indexed or atomic loads/stores.
14652	if (!LSNode2->isSimple() \|\| LSNode2->isIndexed())
14653	continue;
14654
14655	// Check if LSNode1 and LSNode2 have the same type and extension.
14656	if (LSNode1->getOpcode() == ISD::LOAD)
14657	if (cast<LoadSDNode>(Val: LSNode2)->getExtensionType() !=
14658	cast<LoadSDNode>(Val: LSNode1)->getExtensionType())
14659	continue;
14660
14661	if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
14662	continue;
14663
14664	auto [Base2, Offset2] = ExtractBaseAndOffset (LSNode2->getOperand(Num: OpNum));
14665
14666	// Check if the base pointer is the same for both instruction.
14667	if (Base1 != Base2)
14668	continue;
14669
14670	// Check if the offsets match the XTHeadMemPair encoding contraints.
14671	bool Valid = false;
14672	if (MemVT == MVT::i32) {
14673	// Check for adjacent i32 values and a 2-bit index.
14674	if ((Offset1 + `4` == Offset2) && isShiftedUInt<`2`, `3`>(x: Offset1))
14675	Valid = true;
14676	} else if (MemVT == MVT::i64) {
14677	// Check for adjacent i64 values and a 2-bit index.
14678	if ((Offset1 + `8` == Offset2) && isShiftedUInt<`2`, `4`>(x: Offset1))
14679	Valid = true;
14680	}
14681
14682	if (!Valid)
14683	continue;
14684
14685	// Try to combine.
14686	if (SDValue Res =
14687	tryMemPairCombine(DAG, LSNode1, LSNode2, BasePtr: Base1, Imm: Offset1))
14688	return Res;
14689	}
14690	}
14691
14692	return SDValue ();
14693	}
14694
14695	// Fold
14696	// (fp_to_int (froundeven X)) -> fcvt X, rne
14697	// (fp_to_int (ftrunc X)) -> fcvt X, rtz
14698	// (fp_to_int (ffloor X)) -> fcvt X, rdn
14699	// (fp_to_int (fceil X)) -> fcvt X, rup
14700	// (fp_to_int (fround X)) -> fcvt X, rmm
14701	// (fp_to_int (frint X)) -> fcvt X
14702	static SDValue performFP_TO_INTCombine(SDNode *N,
14703	TargetLowering::DAGCombinerInfo &DCI,
14704	const RISCVSubtarget &Subtarget) {
14705	SelectionDAG &DAG = DCI.DAG;
14706	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14707	MVT XLenVT = Subtarget.getXLenVT();
14708
14709	SDValue Src = N->getOperand(Num: `0`);
14710
14711	// Don't do this for strict-fp Src.
14712	if (Src ->isStrictFPOpcode() \|\| Src ->isTargetStrictFPOpcode())
14713	return SDValue ();
14714
14715	// Ensure the FP type is legal.
14716	if (!TLI.isTypeLegal(VT: Src.getValueType()))
14717	return SDValue ();
14718
14719	// Don't do this for f16 with Zfhmin and not Zfh.
14720	if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
14721	return SDValue ();
14722
14723	RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Opc: Src.getOpcode());
14724	// If the result is invalid, we didn't find a foldable instruction.
14725	if (FRM == RISCVFPRndMode::Invalid)
14726	return SDValue ();
14727
14728	SDLoc DL(N);
14729	bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
14730	EVT VT = N->getValueType(ResNo: `0`);
14731
14732	if (VT.isVector() && TLI.isTypeLegal(VT)) {
14733	MVT SrcVT = Src.getSimpleValueType();
14734	MVT SrcContainerVT = SrcVT;
14735	MVT ContainerVT = VT.getSimpleVT();
14736	SDValue XVal = Src.getOperand(i: `0`);
14737
14738	// For widening and narrowing conversions we just combine it into a
14739	// VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
14740	// end up getting lowered to their appropriate pseudo instructions based on
14741	// their operand types
14742	if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * `2` \|\|
14743	VT.getScalarSizeInBits() * `2` < SrcVT.getScalarSizeInBits())
14744	return SDValue ();
14745
14746	// Make fixed-length vectors scalable first
14747	if (SrcVT.isFixedLengthVector()) {
14748	SrcContainerVT = getContainerForFixedLengthVector(DAG, VT: SrcVT, Subtarget);
14749	XVal = convertToScalableVector(VT: SrcContainerVT, V: XVal, DAG, Subtarget);
14750	ContainerVT =
14751	getContainerForFixedLengthVector(DAG, VT: ContainerVT, Subtarget);
14752	}
14753
14754	auto [Mask, VL] =
14755	getDefaultVLOps(VecVT: SrcVT, ContainerVT: SrcContainerVT, DL, DAG, Subtarget);
14756
14757	SDValue FpToInt;
14758	if (FRM == RISCVFPRndMode::RTZ) {
14759	// Use the dedicated trunc static rounding mode if we're truncating so we
14760	// don't need to generate calls to fsrmi/fsrm
14761	unsigned Opc =
14762	IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
14763	FpToInt = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: XVal, N2: Mask, N3: VL);
14764	} else if (FRM == RISCVFPRndMode::DYN) {
14765	unsigned Opc =
14766	IsSigned ? RISCVISD::VFCVT_X_F_VL : RISCVISD::VFCVT_XU_F_VL;
14767	FpToInt = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: XVal, N2: Mask, N3: VL);
14768	} else {
14769	unsigned Opc =
14770	IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
14771	FpToInt = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: XVal, N2: Mask,
14772	N3: DAG.getTargetConstant(Val: FRM, DL, VT: XLenVT), N4: VL);
14773	}
14774
14775	// If converted from fixed-length to scalable, convert back
14776	if (VT.isFixedLengthVector())
14777	FpToInt = convertFromScalableVector(VT, V: FpToInt, DAG, Subtarget);
14778
14779	return FpToInt;
14780	}
14781
14782	// Only handle XLen or i32 types. Other types narrower than XLen will
14783	// eventually be legalized to XLenVT.
14784	if (VT != MVT::i32 && VT != XLenVT)
14785	return SDValue ();
14786
14787	unsigned Opc;
14788	if (VT == XLenVT)
14789	Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
14790	else
14791	Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
14792
14793	SDValue FpToInt = DAG.getNode(Opcode: Opc, DL, VT: XLenVT, N1: Src.getOperand(i: `0`),
14794	N2: DAG.getTargetConstant(Val: FRM, DL, VT: XLenVT));
14795	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: FpToInt);
14796	}
14797
14798	// Fold
14799	// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
14800	// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
14801	// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
14802	// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
14803	// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
14804	// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
14805	static SDValue performFP_TO_INT_SATCombine(SDNode *N,
14806	TargetLowering::DAGCombinerInfo &DCI,
14807	const RISCVSubtarget &Subtarget) {
14808	SelectionDAG &DAG = DCI.DAG;
14809	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14810	MVT XLenVT = Subtarget.getXLenVT();
14811
14812	// Only handle XLen types. Other types narrower than XLen will eventually be
14813	// legalized to XLenVT.
14814	EVT DstVT = N->getValueType(ResNo: `0`);
14815	if (DstVT != XLenVT)
14816	return SDValue ();
14817
14818	SDValue Src = N->getOperand(Num: `0`);
14819
14820	// Don't do this for strict-fp Src.
14821	if (Src ->isStrictFPOpcode() \|\| Src ->isTargetStrictFPOpcode())
14822	return SDValue ();
14823
14824	// Ensure the FP type is also legal.
14825	if (!TLI.isTypeLegal(VT: Src.getValueType()))
14826	return SDValue ();
14827
14828	// Don't do this for f16 with Zfhmin and not Zfh.
14829	if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
14830	return SDValue ();
14831
14832	EVT SatVT = cast<VTSDNode>(Val: N->getOperand(Num: `1`))->getVT();
14833
14834	RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Opc: Src.getOpcode());
14835	if (FRM == RISCVFPRndMode::Invalid)
14836	return SDValue ();
14837
14838	bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
14839
14840	unsigned Opc;
14841	if (SatVT == DstVT)
14842	Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
14843	else if (DstVT == MVT::i64 && SatVT == MVT::i32)
14844	Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
14845	else
14846	return SDValue ();
14847	// FIXME: Support other SatVTs by clamping before or after the conversion.
14848
14849	Src = Src.getOperand(i: `0`);
14850
14851	SDLoc DL(N);
14852	SDValue FpToInt = DAG.getNode(Opcode: Opc, DL, VT: XLenVT, N1: Src,
14853	N2: DAG.getTargetConstant(Val: FRM, DL, VT: XLenVT));
14854
14855	// fcvt.wu. sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero*
14856	// extend.
14857	if (Opc == RISCVISD::FCVT_WU_RV64)
14858	FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
14859
14860	// RISC-V FP-to-int conversions saturate to the destination register size, but
14861	// don't produce 0 for nan.
14862	SDValue ZeroInt = DAG.getConstant(Val: `0`, DL, VT: DstVT);
14863	return DAG.getSelectCC(DL, LHS: Src, RHS: Src, True: ZeroInt, False: FpToInt, Cond: ISD::CondCode::SETUO);
14864	}
14865
14866	// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
14867	// smaller than XLenVT.
14868	static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG,
14869	const RISCVSubtarget &Subtarget) {
14870	assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
14871
14872	SDValue Src = N->getOperand(Num: `0`);
14873	if (Src.getOpcode() != ISD::BSWAP)
14874	return SDValue ();
14875
14876	EVT VT = N->getValueType(ResNo: `0`);
14877	if (!VT.isScalarInteger() \|\| VT.getSizeInBits() >= Subtarget.getXLen() \|\|
14878	!llvm::has_single_bit<uint32_t>(Value: VT.getSizeInBits()))
14879	return SDValue ();
14880
14881	SDLoc DL(N);
14882	return DAG.getNode(Opcode: RISCVISD::BREV8, DL, VT, Operand: Src.getOperand(i: `0`));
14883	}
14884
14885	// Convert from one FMA opcode to another based on whether we are negating the
14886	// multiply result and/or the accumulator.
14887	// NOTE: Only supports RVV operations with VL.
14888	static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
14889	// Negating the multiply result changes ADD<->SUB and toggles 'N'.
14890	if (NegMul) {
14891	// clang-format off
14892	switch (Opcode) {
14893	default: llvm_unreachable("Unexpected opcode");
14894	case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
14895	case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
14896	case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
14897	case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
14898	case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
14899	case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
14900	case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
14901	case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
14902	}
14903	// clang-format on
14904	}
14905
14906	// Negating the accumulator changes ADD<->SUB.
14907	if (NegAcc) {
14908	// clang-format off
14909	switch (Opcode) {
14910	default: llvm_unreachable("Unexpected opcode");
14911	case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
14912	case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
14913	case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
14914	case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
14915	case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
14916	case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
14917	case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
14918	case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
14919	}
14920	// clang-format on
14921	}
14922
14923	return Opcode;
14924	}
14925
14926	static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG) {
14927	// Fold FNEG_VL into FMA opcodes.
14928	// The first operand of strict-fp is chain.
14929	unsigned Offset = N->isTargetStrictFPOpcode();
14930	SDValue A = N->getOperand(Num: `0` + Offset);
14931	SDValue B = N->getOperand(Num: `1` + Offset);
14932	SDValue C = N->getOperand(Num: `2` + Offset);
14933	SDValue Mask = N->getOperand(Num: `3` + Offset);
14934	SDValue VL = N->getOperand(Num: `4` + Offset);
14935
14936	auto invertIfNegative = [&Mask, &VL](SDValue &V) {
14937	if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(i: `1`) == Mask &&
14938	V.getOperand(i: `2`) == VL) {
14939	// Return the negated input.
14940	V = V.getOperand(i: `0`);
14941	return true;
14942	}
14943
14944	return false;
14945	};
14946
14947	bool NegA = invertIfNegative (A);
14948	bool NegB = invertIfNegative (B);
14949	bool NegC = invertIfNegative (C);
14950
14951	// If no operands are negated, we're done.
14952	if (!NegA && !NegB && !NegC)
14953	return SDValue ();
14954
14955	unsigned NewOpcode = negateFMAOpcode(Opcode: N->getOpcode(), NegMul: NegA != NegB, NegAcc: NegC);
14956	if (N->isTargetStrictFPOpcode())
14957	return DAG.getNode(Opcode: NewOpcode, DL: SDLoc (N), VTList: N->getVTList(),
14958	Ops: {N->getOperand(Num: `0`), A, B, C, Mask, VL});
14959	return DAG.getNode(Opcode: NewOpcode, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`), N1: A, N2: B, N3: C, N4: Mask,
14960	N5: VL);
14961	}
14962
14963	static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG,
14964	const RISCVSubtarget &Subtarget) {
14965	if (SDValue V = combineVFMADD_VLWithVFNEG_VL(N, DAG))
14966	return V;
14967
14968	if (N->getValueType(`0`).isScalableVector() &&
14969	N->getValueType(`0`).getVectorElementType() == MVT::f32 &&
14970	(Subtarget.hasVInstructionsF16Minimal() &&
14971	!Subtarget.hasVInstructionsF16())) {
14972	return SDValue ();
14973	}
14974
14975	// FIXME: Ignore strict opcodes for now.
14976	if (N->isTargetStrictFPOpcode())
14977	return SDValue ();
14978
14979	// Try to form widening FMA.
14980	SDValue Op0 = N->getOperand(Num: `0`);
14981	SDValue Op1 = N->getOperand(Num: `1`);
14982	SDValue Mask = N->getOperand(Num: `3`);
14983	SDValue VL = N->getOperand(Num: `4`);
14984
14985	if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL \|\|
14986	Op1.getOpcode() != RISCVISD::FP_EXTEND_VL)
14987	return SDValue ();
14988
14989	// TODO: Refactor to handle more complex cases similar to
14990	// combineBinOp_VLToVWBinOp_VL.
14991	if ((!Op0.hasOneUse() \|\| !Op1.hasOneUse()) &&
14992	(Op0 != Op1 \|\| !Op0 ->hasNUsesOfValue(NUses: `2`, Value: `0`)))
14993	return SDValue ();
14994
14995	// Check the mask and VL are the same.
14996	if (Op0.getOperand(i: `1`) != Mask \|\| Op0.getOperand(i: `2`) != VL \|\|
14997	Op1.getOperand(i: `1`) != Mask \|\| Op1.getOperand(i: `2`) != VL)
14998	return SDValue ();
14999
15000	unsigned NewOpc;
15001	switch (N->getOpcode()) {
15002	default:
15003	llvm_unreachable("Unexpected opcode");
15004	case RISCVISD::VFMADD_VL:
15005	NewOpc = RISCVISD::VFWMADD_VL;
15006	break;
15007	case RISCVISD::VFNMSUB_VL:
15008	NewOpc = RISCVISD::VFWNMSUB_VL;
15009	break;
15010	case RISCVISD::VFNMADD_VL:
15011	NewOpc = RISCVISD::VFWNMADD_VL;
15012	break;
15013	case RISCVISD::VFMSUB_VL:
15014	NewOpc = RISCVISD::VFWMSUB_VL;
15015	break;
15016	}
15017
15018	Op0 = Op0.getOperand(i: `0`);
15019	Op1 = Op1.getOperand(i: `0`);
15020
15021	return DAG.getNode(Opcode: NewOpc, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`), N1: Op0, N2: Op1,
15022	N3: N->getOperand(Num: `2`), N4: Mask, N5: VL);
15023	}
15024
15025	static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
15026	const RISCVSubtarget &Subtarget) {
15027	assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
15028
15029	if (N->getValueType(`0`) != MVT::i64 \|\| !Subtarget.is64Bit())
15030	return SDValue ();
15031
15032	if (!isa<ConstantSDNode>(Val: N->getOperand(Num: `1`)))
15033	return SDValue ();
15034	uint64_t ShAmt = N->getConstantOperandVal(Num: `1`);
15035	if (ShAmt > `32`)
15036	return SDValue ();
15037
15038	SDValue N0 = N->getOperand(Num: `0`);
15039
15040	// Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
15041	// (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
15042	// SLLIW+SRAIW. SLLI+SRAI have compressed forms.
15043	if (ShAmt < `32` &&
15044	N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() &&
15045	cast<VTSDNode>(N0.getOperand(`1`))->getVT() == MVT::i32 &&
15046	N0.getOperand(`0`).getOpcode() == ISD::SHL && N0.getOperand(`0`).hasOneUse() &&
15047	isa<ConstantSDNode>(N0.getOperand(`0`).getOperand(`1`))) {
15048	uint64_t LShAmt = N0.getOperand(i: `0`).getConstantOperandVal(i: `1`);
15049	if (LShAmt < `32`) {
15050	SDLoc ShlDL(N0.getOperand(i: `0`));
15051	SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64,
15052	N0.getOperand(`0`).getOperand(`0`),
15053	DAG.getConstant(LShAmt + `32`, ShlDL, MVT::i64));
15054	SDLoc DL(N);
15055	return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl,
15056	DAG.getConstant(ShAmt + `32`, DL, MVT::i64));
15057	}
15058	}
15059
15060	// Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
15061	// FIXME: Should this be a generic combine? There's a similar combine on X86.
15062	//
15063	// Also try these folds where an add or sub is in the middle.
15064	// (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
15065	// (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
15066	SDValue Shl;
15067	ConstantSDNode AddC = nullptr*;
15068
15069	// We might have an ADD or SUB between the SRA and SHL.
15070	bool IsAdd = N0.getOpcode() == ISD::ADD;
15071	if ((IsAdd \|\| N0.getOpcode() == ISD::SUB)) {
15072	// Other operand needs to be a constant we can modify.
15073	AddC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: IsAdd ? `1` : `0`));
15074	if (!AddC)
15075	return SDValue ();
15076
15077	// AddC needs to have at least 32 trailing zeros.
15078	if (AddC->getAPIntValue().countr_zero() < `32`)
15079	return SDValue ();
15080
15081	// All users should be a shift by constant less than or equal to 32. This
15082	// ensures we'll do this optimization for each of them to produce an
15083	// add/sub+sext_inreg they can all share.
15084	for (SDNode *U : N0 ->uses()) {
15085	if (U->getOpcode() != ISD::SRA \|\|
15086	!isa<ConstantSDNode>(Val: U->getOperand(Num: `1`)) \|\|
15087	U->getConstantOperandVal(Num: `1`) > `32`)
15088	return SDValue ();
15089	}
15090
15091	Shl = N0.getOperand(i: IsAdd ? `0` : `1`);
15092	} else {
15093	// Not an ADD or SUB.
15094	Shl = N0;
15095	}
15096
15097	// Look for a shift left by 32.
15098	if (Shl.getOpcode() != ISD::SHL \|\| !isa<ConstantSDNode>(Val: Shl.getOperand(i: `1`)) \|\|
15099	Shl.getConstantOperandVal(i: `1`) != `32`)
15100	return SDValue ();
15101
15102	// We if we didn't look through an add/sub, then the shl should have one use.
15103	// If we did look through an add/sub, the sext_inreg we create is free so
15104	// we're only creating 2 new instructions. It's enough to only remove the
15105	// original sra+add/sub.
15106	if (!AddC && !Shl.hasOneUse())
15107	return SDValue ();
15108
15109	SDLoc DL(N);
15110	SDValue In = Shl.getOperand(i: `0`);
15111
15112	// If we looked through an ADD or SUB, we need to rebuild it with the shifted
15113	// constant.
15114	if (AddC) {
15115	SDValue ShiftedAddC =
15116	DAG.getConstant(AddC->getAPIntValue().lshr(`32`), DL, MVT::i64);
15117	if (IsAdd)
15118	In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
15119	else
15120	In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
15121	}
15122
15123	SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
15124	DAG.getValueType(MVT::i32));
15125	if (ShAmt == `32`)
15126	return SExt;
15127
15128	return DAG.getNode(
15129	ISD::SHL, DL, MVT::i64, SExt,
15130	DAG.getConstant(`32` - ShAmt, DL, MVT::i64));
15131	}
15132
15133	// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
15134	// the result is used as the conditon of a br_cc or select_cc we can invert,
15135	// inverting the setcc is free, and Z is 0/1. Caller will invert the
15136	// br_cc/select_cc.
15137	static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG) {
15138	bool IsAnd = Cond.getOpcode() == ISD::AND;
15139	if (!IsAnd && Cond.getOpcode() != ISD::OR)
15140	return SDValue ();
15141
15142	if (!Cond.hasOneUse())
15143	return SDValue ();
15144
15145	SDValue Setcc = Cond.getOperand(i: `0`);
15146	SDValue Xor = Cond.getOperand(i: `1`);
15147	// Canonicalize setcc to LHS.
15148	if (Setcc.getOpcode() != ISD::SETCC)
15149	std::swap(a&: Setcc, b&: Xor);
15150	// LHS should be a setcc and RHS should be an xor.
15151	if (Setcc.getOpcode() != ISD::SETCC \|\| !Setcc.hasOneUse() \|\|
15152	Xor.getOpcode() != ISD::XOR \|\| !Xor.hasOneUse())
15153	return SDValue ();
15154
15155	// If the condition is an And, SimplifyDemandedBits may have changed
15156	// (xor Z, 1) to (not Z).
15157	SDValue Xor1 = Xor.getOperand(i: `1`);
15158	if (!isOneConstant(V: Xor1) && !(IsAnd && isAllOnesConstant(V: Xor1)))
15159	return SDValue ();
15160
15161	EVT VT = Cond.getValueType();
15162	SDValue Xor0 = Xor.getOperand(i: `0`);
15163
15164	// The LHS of the xor needs to be 0/1.
15165	APInt Mask = APInt::getBitsSetFrom(numBits: VT.getSizeInBits(), loBit: `1`);
15166	if (!DAG.MaskedValueIsZero(Op: Xor0, Mask))
15167	return SDValue ();
15168
15169	// We can only invert integer setccs.
15170	EVT SetCCOpVT = Setcc.getOperand(i: `0`).getValueType();
15171	if (!SetCCOpVT.isScalarInteger())
15172	return SDValue ();
15173
15174	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: Setcc.getOperand(i: `2`))->get();
15175	if (ISD::isIntEqualitySetCC(Code: CCVal)) {
15176	CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: SetCCOpVT);
15177	Setcc = DAG.getSetCC(DL: SDLoc (Setcc), VT, LHS: Setcc.getOperand(i: `0`),
15178	RHS: Setcc.getOperand(i: `1`), Cond: CCVal);
15179	} else if (CCVal == ISD::SETLT && isNullConstant(V: Setcc.getOperand(i: `0`))) {
15180	// Invert (setlt 0, X) by converting to (setlt X, 1).
15181	Setcc = DAG.getSetCC(DL: SDLoc (Setcc), VT, LHS: Setcc.getOperand(i: `1`),
15182	RHS: DAG.getConstant(Val: `1`, DL: SDLoc (Setcc), VT), Cond: CCVal);
15183	} else if (CCVal == ISD::SETLT && isOneConstant(V: Setcc.getOperand(i: `1`))) {
15184	// (setlt X, 1) by converting to (setlt 0, X).
15185	Setcc = DAG.getSetCC(DL: SDLoc (Setcc), VT,
15186	LHS: DAG.getConstant(Val: `0`, DL: SDLoc (Setcc), VT),
15187	RHS: Setcc.getOperand(i: `0`), Cond: CCVal);
15188	} else
15189	return SDValue ();
15190
15191	unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15192	return DAG.getNode(Opcode: Opc, DL: SDLoc (Cond), VT, N1: Setcc, N2: Xor.getOperand(i: `0`));
15193	}
15194
15195	// Perform common combines for BR_CC and SELECT_CC condtions.
15196	static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
15197	SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
15198	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val&: CC)->get();
15199
15200	// As far as arithmetic right shift always saves the sign,
15201	// shift can be omitted.
15202	// Fold setlt (sra X, N), 0 -> setlt X, 0 and
15203	// setge (sra X, N), 0 -> setge X, 0
15204	if (isNullConstant(V: RHS) && (CCVal == ISD::SETGE \|\| CCVal == ISD::SETLT) &&
15205	LHS.getOpcode() == ISD::SRA) {
15206	LHS = LHS.getOperand(i: `0`);
15207	return true;
15208	}
15209
15210	if (!ISD::isIntEqualitySetCC(Code: CCVal))
15211	return false;
15212
15213	// Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
15214	// Sometimes the setcc is introduced after br_cc/select_cc has been formed.
15215	if (LHS.getOpcode() == ISD::SETCC && isNullConstant(V: RHS) &&
15216	LHS.getOperand(i: `0`).getValueType() == Subtarget.getXLenVT()) {
15217	// If we're looking for eq 0 instead of ne 0, we need to invert the
15218	// condition.
15219	bool Invert = CCVal == ISD::SETEQ;
15220	CCVal = cast<CondCodeSDNode>(Val: LHS.getOperand(i: `2`))->get();
15221	if (Invert)
15222	CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType());
15223
15224	RHS = LHS.getOperand(i: `1`);
15225	LHS = LHS.getOperand(i: `0`);
15226	translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG);
15227
15228	CC = DAG.getCondCode(Cond: CCVal);
15229	return true;
15230	}
15231
15232	// Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
15233	if (LHS.getOpcode() == ISD::XOR && isNullConstant(V: RHS)) {
15234	RHS = LHS.getOperand(i: `1`);
15235	LHS = LHS.getOperand(i: `0`);
15236	return true;
15237	}
15238
15239	// Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
15240	if (isNullConstant(V: RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
15241	LHS.getOperand(i: `1`).getOpcode() == ISD::Constant) {
15242	SDValue LHS0 = LHS.getOperand(i: `0`);
15243	if (LHS0.getOpcode() == ISD::AND &&
15244	LHS0.getOperand(i: `1`).getOpcode() == ISD::Constant) {
15245	uint64_t Mask = LHS0.getConstantOperandVal(i: `1`);
15246	uint64_t ShAmt = LHS.getConstantOperandVal(i: `1`);
15247	if (isPowerOf2_64(Value: Mask) && Log2_64(Value: Mask) == ShAmt) {
15248	CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
15249	CC = DAG.getCondCode(Cond: CCVal);
15250
15251	ShAmt = LHS.getValueSizeInBits() - `1` - ShAmt;
15252	LHS = LHS0.getOperand(i: `0`);
15253	if (ShAmt != `0`)
15254	LHS =
15255	DAG.getNode(Opcode: ISD::SHL, DL, VT: LHS.getValueType(), N1: LHS0.getOperand(i: `0`),
15256	N2: DAG.getConstant(Val: ShAmt, DL, VT: LHS.getValueType()));
15257	return true;
15258	}
15259	}
15260	}
15261
15262	// (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
15263	// This can occur when legalizing some floating point comparisons.
15264	APInt Mask = APInt::getBitsSetFrom(numBits: LHS.getValueSizeInBits(), loBit: `1`);
15265	if (isOneConstant(V: RHS) && DAG.MaskedValueIsZero(Op: LHS, Mask)) {
15266	CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType());
15267	CC = DAG.getCondCode(Cond: CCVal);
15268	RHS = DAG.getConstant(Val: `0`, DL, VT: LHS.getValueType());
15269	return true;
15270	}
15271
15272	if (isNullConstant(V: RHS)) {
15273	if (SDValue NewCond = tryDemorganOfBooleanCondition(Cond: LHS, DAG)) {
15274	CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType());
15275	CC = DAG.getCondCode(Cond: CCVal);
15276	LHS = NewCond;
15277	return true;
15278	}
15279	}
15280
15281	return false;
15282	}
15283
15284	// Fold
15285	// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
15286	// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
15287	// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
15288	// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
15289	static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG,
15290	SDValue TrueVal, SDValue FalseVal,
15291	bool Swapped) {
15292	bool Commutative = true;
15293	unsigned Opc = TrueVal.getOpcode();
15294	switch (Opc) {
15295	default:
15296	return SDValue ();
15297	case ISD::SHL:
15298	case ISD::SRA:
15299	case ISD::SRL:
15300	case ISD::SUB:
15301	Commutative = false;
15302	break;
15303	case ISD::ADD:
15304	case ISD::OR:
15305	case ISD::XOR:
15306	break;
15307	}
15308
15309	if (!TrueVal.hasOneUse() \|\| isa<ConstantSDNode>(Val: FalseVal))
15310	return SDValue ();
15311
15312	unsigned OpToFold;
15313	if (FalseVal == TrueVal.getOperand(i: `0`))
15314	OpToFold = `0`;
15315	else if (Commutative && FalseVal == TrueVal.getOperand(i: `1`))
15316	OpToFold = `1`;
15317	else
15318	return SDValue ();
15319
15320	EVT VT = N->getValueType(ResNo: `0`);
15321	SDLoc DL(N);
15322	SDValue OtherOp = TrueVal.getOperand(i: `1` - OpToFold);
15323	EVT OtherOpVT = OtherOp ->getValueType(ResNo: `0`);
15324	SDValue IdentityOperand =
15325	DAG.getNeutralElement(Opcode: Opc, DL, VT: OtherOpVT, Flags: N->getFlags());
15326	if (!Commutative)
15327	IdentityOperand = DAG.getConstant(Val: `0`, DL, VT: OtherOpVT);
15328	assert(IdentityOperand && "No identity operand!");
15329
15330	if (Swapped)
15331	std::swap(a&: OtherOp, b&: IdentityOperand);
15332	SDValue NewSel =
15333	DAG.getSelect(DL, VT: OtherOpVT, Cond: N->getOperand(Num: `0`), LHS: OtherOp, RHS: IdentityOperand);
15334	return DAG.getNode(Opcode: TrueVal.getOpcode(), DL, VT, N1: FalseVal, N2: NewSel);
15335	}
15336
15337	// This tries to get rid of `select` and `icmp` that are being used to handle
15338	// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
15339	static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) {
15340	SDValue Cond = N->getOperand(Num: `0`);
15341
15342	// This represents either CTTZ or CTLZ instruction.
15343	SDValue CountZeroes;
15344
15345	SDValue ValOnZero;
15346
15347	if (Cond.getOpcode() != ISD::SETCC)
15348	return SDValue ();
15349
15350	if (!isNullConstant(V: Cond ->getOperand(Num: `1`)))
15351	return SDValue ();
15352
15353	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: Cond ->getOperand(Num: `2`))->get();
15354	if (CCVal == ISD::CondCode::SETEQ) {
15355	CountZeroes = N->getOperand(Num: `2`);
15356	ValOnZero = N->getOperand(Num: `1`);
15357	} else if (CCVal == ISD::CondCode::SETNE) {
15358	CountZeroes = N->getOperand(Num: `1`);
15359	ValOnZero = N->getOperand(Num: `2`);
15360	} else {
15361	return SDValue ();
15362	}
15363
15364	if (CountZeroes.getOpcode() == ISD::TRUNCATE \|\|
15365	CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
15366	CountZeroes = CountZeroes.getOperand(i: `0`);
15367
15368	if (CountZeroes.getOpcode() != ISD::CTTZ &&
15369	CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
15370	CountZeroes.getOpcode() != ISD::CTLZ &&
15371	CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
15372	return SDValue ();
15373
15374	if (!isNullConstant(V: ValOnZero))
15375	return SDValue ();
15376
15377	SDValue CountZeroesArgument = CountZeroes ->getOperand(Num: `0`);
15378	if (Cond ->getOperand(Num: `0`) != CountZeroesArgument)
15379	return SDValue ();
15380
15381	if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
15382	CountZeroes = DAG.getNode(Opcode: ISD::CTTZ, DL: SDLoc (CountZeroes),
15383	VT: CountZeroes.getValueType(), Operand: CountZeroesArgument);
15384	} else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
15385	CountZeroes = DAG.getNode(Opcode: ISD::CTLZ, DL: SDLoc (CountZeroes),
15386	VT: CountZeroes.getValueType(), Operand: CountZeroesArgument);
15387	}
15388
15389	unsigned BitWidth = CountZeroes.getValueSizeInBits();
15390	SDValue BitWidthMinusOne =
15391	DAG.getConstant(Val: BitWidth - `1`, DL: SDLoc (N), VT: CountZeroes.getValueType());
15392
15393	auto AndNode = DAG.getNode(Opcode: ISD::AND, DL: SDLoc (N), VT: CountZeroes.getValueType(),
15394	N1: CountZeroes, N2: BitWidthMinusOne);
15395	return DAG.getZExtOrTrunc(Op: AndNode, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`));
15396	}
15397
15398	static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG,
15399	const RISCVSubtarget &Subtarget) {
15400	SDValue Cond = N->getOperand(Num: `0`);
15401	SDValue True = N->getOperand(Num: `1`);
15402	SDValue False = N->getOperand(Num: `2`);
15403	SDLoc DL(N);
15404	EVT VT = N->getValueType(ResNo: `0`);
15405	EVT CondVT = Cond.getValueType();
15406
15407	if (Cond.getOpcode() != ISD::SETCC \|\| !Cond.hasOneUse())
15408	return SDValue ();
15409
15410	// Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
15411	// BEXTI, where C is power of 2.
15412	if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
15413	(Subtarget.hasStdExtZicond() \|\| Subtarget.hasVendorXVentanaCondOps())) {
15414	SDValue LHS = Cond.getOperand(i: `0`);
15415	SDValue RHS = Cond.getOperand(i: `1`);
15416	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Cond.getOperand(i: `2`))->get();
15417	if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
15418	isa<ConstantSDNode>(Val: LHS.getOperand(i: `1`)) && isNullConstant(V: RHS)) {
15419	const APInt &MaskVal = LHS.getConstantOperandAPInt(i: `1`);
15420	if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(N: `12`))
15421	return DAG.getSelect(DL, VT,
15422	Cond: DAG.getSetCC(DL, VT: CondVT, LHS, RHS, Cond: ISD::SETNE),
15423	LHS: False, RHS: True);
15424	}
15425	}
15426	return SDValue ();
15427	}
15428
15429	static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
15430	const RISCVSubtarget &Subtarget) {
15431	if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
15432	return Folded;
15433
15434	if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
15435	return V;
15436
15437	if (Subtarget.hasConditionalMoveFusion())
15438	return SDValue ();
15439
15440	SDValue TrueVal = N->getOperand(Num: `1`);
15441	SDValue FalseVal = N->getOperand(Num: `2`);
15442	if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /Swapped/false))
15443	return V;
15444	return tryFoldSelectIntoOp(N, DAG, TrueVal: FalseVal, FalseVal: TrueVal, /Swapped/true);
15445	}
15446
15447	/// If we have a build_vector where each lane is binop X, C, where C
15448	/// is a constant (but not necessarily the same constant on all lanes),
15449	/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
15450	/// We assume that materializing a constant build vector will be no more
15451	/// expensive that performing O(n) binops.
15452	static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
15453	const RISCVSubtarget &Subtarget,
15454	const RISCVTargetLowering &TLI) {
15455	SDLoc DL(N);
15456	EVT VT = N->getValueType(ResNo: `0`);
15457
15458	assert(!VT.isScalableVector() && "unexpected build vector");
15459
15460	if (VT.getVectorNumElements() == `1`)
15461	return SDValue ();
15462
15463	const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
15464	if (!TLI.isBinOp(Opcode))
15465	return SDValue ();
15466
15467	if (!TLI.isOperationLegalOrCustom(Op: Opcode, VT) \|\| !TLI.isTypeLegal(VT))
15468	return SDValue ();
15469
15470	// This BUILD_VECTOR involves an implicit truncation, and sinking
15471	// truncates through binops is non-trivial.
15472	if (N->op_begin()->getValueType() != VT.getVectorElementType())
15473	return SDValue ();
15474
15475	SmallVector<SDValue> LHSOps;
15476	SmallVector<SDValue> RHSOps;
15477	for (SDValue Op : N->ops()) {
15478	if (Op.isUndef()) {
15479	// We can't form a divide or remainder from undef.
15480	if (!DAG.isSafeToSpeculativelyExecute(Opcode))
15481	return SDValue ();
15482
15483	LHSOps.push_back(Elt: Op);
15484	RHSOps.push_back(Elt: Op);
15485	continue;
15486	}
15487
15488	// TODO: We can handle operations which have an neutral rhs value
15489	// (e.g. x + 0, a 1 or a << 0), but we then have to keep track*
15490	// of profit in a more explicit manner.
15491	if (Op.getOpcode() != Opcode \|\| !Op.hasOneUse())
15492	return SDValue ();
15493
15494	LHSOps.push_back(Elt: Op.getOperand(i: `0`));
15495	if (!isa<ConstantSDNode>(Val: Op.getOperand(i: `1`)) &&
15496	!isa<ConstantFPSDNode>(Val: Op.getOperand(i: `1`)))
15497	return SDValue ();
15498	// FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15499	// have different LHS and RHS types.
15500	if (Op.getOperand(i: `0`).getValueType() != Op.getOperand(i: `1`).getValueType())
15501	return SDValue ();
15502
15503	RHSOps.push_back(Elt: Op.getOperand(i: `1`));
15504	}
15505
15506	return DAG.getNode(Opcode, DL, VT, N1: DAG.getBuildVector(VT, DL, Ops: LHSOps),
15507	N2: DAG.getBuildVector(VT, DL, Ops: RHSOps));
15508	}
15509
15510	static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
15511	const RISCVSubtarget &Subtarget,
15512	const RISCVTargetLowering &TLI) {
15513	SDValue InVec = N->getOperand(Num: `0`);
15514	SDValue InVal = N->getOperand(Num: `1`);
15515	SDValue EltNo = N->getOperand(Num: `2`);
15516	SDLoc DL(N);
15517
15518	EVT VT = InVec.getValueType();
15519	if (VT.isScalableVector())
15520	return SDValue ();
15521
15522	if (!InVec.hasOneUse())
15523	return SDValue ();
15524
15525	// Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
15526	// move the insert_vector_elts into the arms of the binop. Note that
15527	// the new RHS must be a constant.
15528	const unsigned InVecOpcode = InVec ->getOpcode();
15529	if (InVecOpcode == InVal ->getOpcode() && TLI.isBinOp(Opcode: InVecOpcode) &&
15530	InVal.hasOneUse()) {
15531	SDValue InVecLHS = InVec ->getOperand(Num: `0`);
15532	SDValue InVecRHS = InVec ->getOperand(Num: `1`);
15533	SDValue InValLHS = InVal ->getOperand(Num: `0`);
15534	SDValue InValRHS = InVal ->getOperand(Num: `1`);
15535
15536	if (!ISD::isBuildVectorOfConstantSDNodes(N: InVecRHS.getNode()))
15537	return SDValue ();
15538	if (!isa<ConstantSDNode>(Val: InValRHS) && !isa<ConstantFPSDNode>(Val: InValRHS))
15539	return SDValue ();
15540	// FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15541	// have different LHS and RHS types.
15542	if (InVec.getOperand(i: `0`).getValueType() != InVec.getOperand(i: `1`).getValueType())
15543	return SDValue ();
15544	SDValue LHS = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT,
15545	N1: InVecLHS, N2: InValLHS, N3: EltNo);
15546	SDValue RHS = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT,
15547	N1: InVecRHS, N2: InValRHS, N3: EltNo);
15548	return DAG.getNode(Opcode: InVecOpcode, DL, VT, N1: LHS, N2: RHS);
15549	}
15550
15551	// Given insert_vector_elt (concat_vectors ...), InVal, Elt
15552	// move the insert_vector_elt to the source operand of the concat_vector.
15553	if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
15554	return SDValue ();
15555
15556	auto *IndexC = dyn_cast<ConstantSDNode>(Val&: EltNo);
15557	if (!IndexC)
15558	return SDValue ();
15559	unsigned Elt = IndexC->getZExtValue();
15560
15561	EVT ConcatVT = InVec.getOperand(i: `0`).getValueType();
15562	if (ConcatVT.getVectorElementType() != InVal.getValueType())
15563	return SDValue ();
15564	unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
15565	SDValue NewIdx = DAG.getVectorIdxConstant(Val: Elt % ConcatNumElts, DL);
15566
15567	unsigned ConcatOpIdx = Elt / ConcatNumElts;
15568	SDValue ConcatOp = InVec.getOperand(i: ConcatOpIdx);
15569	ConcatOp = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ConcatVT,
15570	N1: ConcatOp, N2: InVal, N3: NewIdx);
15571
15572	SmallVector<SDValue> ConcatOps;
15573	ConcatOps.append(in_start: InVec ->op_begin(), in_end: InVec ->op_end());
15574	ConcatOps [ConcatOpIdx] = ConcatOp;
15575	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, Ops: ConcatOps);
15576	}
15577
15578	// If we're concatenating a series of vector loads like
15579	// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n2) ...*
15580	// Then we can turn this into a strided load by widening the vector elements
15581	// vlse32 p, stride=n
15582	static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
15583	const RISCVSubtarget &Subtarget,
15584	const RISCVTargetLowering &TLI) {
15585	SDLoc DL(N);
15586	EVT VT = N->getValueType(ResNo: `0`);
15587
15588	// Only perform this combine on legal MVTs.
15589	if (!TLI.isTypeLegal(VT))
15590	return SDValue ();
15591
15592	// TODO: Potentially extend this to scalable vectors
15593	if (VT.isScalableVector())
15594	return SDValue ();
15595
15596	auto *BaseLd = dyn_cast<LoadSDNode>(Val: N->getOperand(Num: `0`));
15597	if (!BaseLd \|\| !BaseLd->isSimple() \|\| !ISD::isNormalLoad(N: BaseLd) \|\|
15598	!SDValue (BaseLd, `0`).hasOneUse())
15599	return SDValue ();
15600
15601	EVT BaseLdVT = BaseLd->getValueType(ResNo: `0`);
15602
15603	// Go through the loads and check that they're strided
15604	SmallVector<LoadSDNode *> Lds;
15605	Lds.push_back(Elt: BaseLd);
15606	Align Align = BaseLd->getAlign();
15607	for (SDValue Op : N->ops().drop_front()) {
15608	auto *Ld = dyn_cast<LoadSDNode>(Val&: Op);
15609	if (!Ld \|\| !Ld->isSimple() \|\| !Op.hasOneUse() \|\|
15610	Ld->getChain() != BaseLd->getChain() \|\| !ISD::isNormalLoad(N: Ld) \|\|
15611	Ld->getValueType(ResNo: `0`) != BaseLdVT)
15612	return SDValue ();
15613
15614	Lds.push_back(Elt: Ld);
15615
15616	// The common alignment is the most restrictive (smallest) of all the loads
15617	Align = std::min(a: Align, b: Ld->getAlign());
15618	}
15619
15620	using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
15621	auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
15622	LoadSDNode *Ld2) -> std::optional<PtrDiff> {
15623	// If the load ptrs can be decomposed into a common (Base + Index) with a
15624	// common constant stride, then return the constant stride.
15625	BaseIndexOffset BIO1 = BaseIndexOffset::match(N: Ld1, DAG);
15626	BaseIndexOffset BIO2 = BaseIndexOffset::match(N: Ld2, DAG);
15627	if (BIO1.equalBaseIndex(Other: BIO2, DAG))
15628	return {{BIO2.getOffset() - BIO1.getOffset(), false}};
15629
15630	// Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
15631	SDValue P1 = Ld1->getBasePtr();
15632	SDValue P2 = Ld2->getBasePtr();
15633	if (P2.getOpcode() == ISD::ADD && P2.getOperand(i: `0`) == P1)
15634	return {{P2.getOperand(i: `1`), false}};
15635	if (P1.getOpcode() == ISD::ADD && P1.getOperand(i: `0`) == P2)
15636	return {{P1.getOperand(i: `1`), true}};
15637
15638	return std::nullopt;
15639	};
15640
15641	// Get the distance between the first and second loads
15642	auto BaseDiff = GetPtrDiff (Lds [`0`], Lds [`1`]);
15643	if (!BaseDiff)
15644	return SDValue ();
15645
15646	// Check all the loads are the same distance apart
15647	for (auto *It = Lds.begin() + `1`; It != Lds.end() - `1`; It++)
15648	if (GetPtrDiff (It, std::next(x: It)) != BaseDiff)
15649	return SDValue ();
15650
15651	// TODO: At this point, we've successfully matched a generalized gather
15652	// load. Maybe we should emit that, and then move the specialized
15653	// matchers above and below into a DAG combine?
15654
15655	// Get the widened scalar type, e.g. v4i8 -> i64
15656	unsigned WideScalarBitWidth =
15657	BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
15658	MVT WideScalarVT = MVT::getIntegerVT(BitWidth: WideScalarBitWidth);
15659
15660	// Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
15661	MVT WideVecVT = MVT::getVectorVT(VT: WideScalarVT, NumElements: N->getNumOperands());
15662	if (!TLI.isTypeLegal(VT: WideVecVT))
15663	return SDValue ();
15664
15665	// Check that the operation is legal
15666	if (!TLI.isLegalStridedLoadStore(DataType: WideVecVT, Alignment: Align))
15667	return SDValue ();
15668
15669	auto [StrideVariant, MustNegateStride] = *BaseDiff;
15670	SDValue Stride = std::holds_alternative<SDValue>(v: StrideVariant)
15671	? std::get<SDValue>(v&: StrideVariant)
15672	: DAG.getConstant(Val: std::get<int64_t>(v&: StrideVariant), DL,
15673	VT: Lds [`0`]->getOffset().getValueType());
15674	if (MustNegateStride)
15675	Stride = DAG.getNegative(Val: Stride, DL, VT: Stride.getValueType());
15676
15677	SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other});
15678	SDValue IntID =
15679	DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
15680	Subtarget.getXLenVT());
15681
15682	SDValue AllOneMask =
15683	DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
15684	DAG.getConstant(`1`, DL, MVT::i1));
15685
15686	SDValue Ops[] = {BaseLd->getChain(), IntID, DAG.getUNDEF(VT: WideVecVT),
15687	BaseLd->getBasePtr(), Stride, AllOneMask};
15688
15689	uint64_t MemSize;
15690	if (auto *ConstStride = dyn_cast<ConstantSDNode>(Val&: Stride);
15691	ConstStride && ConstStride->getSExtValue() >= `0`)
15692	// total size = (elsize n) + (stride - elsize) * (n-1)*
15693	// = elsize + stride (n-1)*
15694	MemSize = WideScalarVT.getSizeInBits() +
15695	ConstStride->getSExtValue() * (N->getNumOperands() - `1`);
15696	else
15697	// If Stride isn't constant, then we can't know how much it will load
15698	MemSize = MemoryLocation::UnknownSize;
15699
15700	MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
15701	PtrInfo: BaseLd->getPointerInfo(), F: BaseLd->getMemOperand()->getFlags(), Size: MemSize,
15702	BaseAlignment: Align);
15703
15704	SDValue StridedLoad = DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs,
15705	Ops, MemVT: WideVecVT, MMO);
15706	for (SDValue Ld : N->ops())
15707	DAG.makeEquivalentMemoryOrdering(OldLoad: cast<LoadSDNode>(Val&: Ld), NewMemOp: StridedLoad);
15708
15709	return DAG.getBitcast(VT: VT.getSimpleVT(), V: StridedLoad);
15710	}
15711
15712	static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,
15713	const RISCVSubtarget &Subtarget) {
15714
15715	assert(N->getOpcode() == RISCVISD::ADD_VL \|\| N->getOpcode() == ISD::ADD);
15716
15717	if (N->getValueType(ResNo: `0`).isFixedLengthVector())
15718	return SDValue ();
15719
15720	SDValue Addend = N->getOperand(Num: `0`);
15721	SDValue MulOp = N->getOperand(Num: `1`);
15722
15723	if (N->getOpcode() == RISCVISD::ADD_VL) {
15724	SDValue AddMergeOp = N->getOperand(Num: `2`);
15725	if (!AddMergeOp.isUndef())
15726	return SDValue ();
15727	}
15728
15729	auto IsVWMulOpc = [](unsigned Opc) {
15730	switch (Opc) {
15731	case RISCVISD::VWMUL_VL:
15732	case RISCVISD::VWMULU_VL:
15733	case RISCVISD::VWMULSU_VL:
15734	return true;
15735	default:
15736	return false;
15737	}
15738	};
15739
15740	if (!IsVWMulOpc (MulOp.getOpcode()))
15741	std::swap(a&: Addend, b&: MulOp);
15742
15743	if (!IsVWMulOpc (MulOp.getOpcode()))
15744	return SDValue ();
15745
15746	SDValue MulMergeOp = MulOp.getOperand(i: `2`);
15747
15748	if (!MulMergeOp.isUndef())
15749	return SDValue ();
15750
15751	auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
15752	const RISCVSubtarget &Subtarget) {
15753	if (N->getOpcode() == ISD::ADD) {
15754	SDLoc DL(N);
15755	return getDefaultScalableVLOps(VecVT: N->getSimpleValueType(ResNo: `0`), DL, DAG,
15756	Subtarget);
15757	}
15758	return std::make_pair(x: N->getOperand(Num: `3`), y: N->getOperand(Num: `4`));
15759	}(N, DAG, Subtarget);
15760
15761	SDValue MulMask = MulOp.getOperand(i: `3`);
15762	SDValue MulVL = MulOp.getOperand(i: `4`);
15763
15764	if (AddMask != MulMask \|\| AddVL != MulVL)
15765	return SDValue ();
15766
15767	unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
15768	static_assert(RISCVISD::VWMACC_VL + `1` == RISCVISD::VWMACCU_VL,
15769	"Unexpected opcode after VWMACC_VL");
15770	static_assert(RISCVISD::VWMACC_VL + `2` == RISCVISD::VWMACCSU_VL,
15771	"Unexpected opcode after VWMACC_VL!");
15772	static_assert(RISCVISD::VWMUL_VL + `1` == RISCVISD::VWMULU_VL,
15773	"Unexpected opcode after VWMUL_VL!");
15774	static_assert(RISCVISD::VWMUL_VL + `2` == RISCVISD::VWMULSU_VL,
15775	"Unexpected opcode after VWMUL_VL!");
15776
15777	SDLoc DL(N);
15778	EVT VT = N->getValueType(ResNo: `0`);
15779	SDValue Ops[] = {MulOp.getOperand(i: `0`), MulOp.getOperand(i: `1`), Addend, AddMask,
15780	AddVL};
15781	return DAG.getNode(Opcode: Opc, DL, VT, Ops);
15782	}
15783
15784	static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index,
15785	ISD::MemIndexType &IndexType,
15786	RISCVTargetLowering::DAGCombinerInfo &DCI) {
15787	if (!DCI.isBeforeLegalize())
15788	return false;
15789
15790	SelectionDAG &DAG = DCI.DAG;
15791	const MVT XLenVT =
15792	DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
15793
15794	const EVT IndexVT = Index.getValueType();
15795
15796	// RISC-V indexed loads only support the "unsigned unscaled" addressing
15797	// mode, so anything else must be manually legalized.
15798	if (!isIndexTypeSigned(IndexType))
15799	return false;
15800
15801	if (IndexVT.getVectorElementType().bitsLT(VT: XLenVT)) {
15802	// Any index legalization should first promote to XLenVT, so we don't lose
15803	// bits when scaling. This may create an illegal index type so we let
15804	// LLVM's legalization take care of the splitting.
15805	// FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
15806	Index = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL,
15807	VT: IndexVT.changeVectorElementType(EltVT: XLenVT), Operand: Index);
15808	}
15809	IndexType = ISD::UNSIGNED_SCALED;
15810	return true;
15811	}
15812
15813	/// Match the index vector of a scatter or gather node as the shuffle mask
15814	/// which performs the rearrangement if possible. Will only match if
15815	/// all lanes are touched, and thus replacing the scatter or gather with
15816	/// a unit strided access and shuffle is legal.
15817	static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
15818	SmallVector<int> &ShuffleMask) {
15819	if (!ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()))
15820	return false;
15821	if (!ISD::isBuildVectorOfConstantSDNodes(N: Index.getNode()))
15822	return false;
15823
15824	const unsigned ElementSize = VT.getScalarStoreSize();
15825	const unsigned NumElems = VT.getVectorNumElements();
15826
15827	// Create the shuffle mask and check all bits active
15828	assert(ShuffleMask.empty());
15829	BitVector ActiveLanes(NumElems);
15830	for (unsigned i = `0`; i < Index ->getNumOperands(); i++) {
15831	// TODO: We've found an active bit of UB, and could be
15832	// more aggressive here if desired.
15833	if (Index ->getOperand(Num: i)->isUndef())
15834	return false;
15835	uint64_t C = Index ->getConstantOperandVal(Num: i);
15836	if (C % ElementSize != `0`)
15837	return false;
15838	C = C / ElementSize;
15839	if (C >= NumElems)
15840	return false;
15841	ShuffleMask.push_back(Elt: C);
15842	ActiveLanes.set(C);
15843	}
15844	return ActiveLanes.all();
15845	}
15846
15847	/// Match the index of a gather or scatter operation as an operation
15848	/// with twice the element width and half the number of elements. This is
15849	/// generally profitable (if legal) because these operations are linear
15850	/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
15851	/// come out ahead.
15852	static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
15853	Align BaseAlign, const RISCVSubtarget &ST) {
15854	if (!ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()))
15855	return false;
15856	if (!ISD::isBuildVectorOfConstantSDNodes(N: Index.getNode()))
15857	return false;
15858
15859	// Attempt a doubling. If we can use a element type 4x or 8x in
15860	// size, this will happen via multiply iterations of the transform.
15861	const unsigned NumElems = VT.getVectorNumElements();
15862	if (NumElems % `2` != `0`)
15863	return false;
15864
15865	const unsigned ElementSize = VT.getScalarStoreSize();
15866	const unsigned WiderElementSize = ElementSize * `2`;
15867	if (WiderElementSize > ST.getELen()/`8`)
15868	return false;
15869
15870	if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
15871	return false;
15872
15873	for (unsigned i = `0`; i < Index ->getNumOperands(); i++) {
15874	// TODO: We've found an active bit of UB, and could be
15875	// more aggressive here if desired.
15876	if (Index ->getOperand(Num: i)->isUndef())
15877	return false;
15878	// TODO: This offset check is too strict if we support fully
15879	// misaligned memory operations.
15880	uint64_t C = Index ->getConstantOperandVal(Num: i);
15881	if (i % `2` == `0`) {
15882	if (C % WiderElementSize != `0`)
15883	return false;
15884	continue;
15885	}
15886	uint64_t Last = Index ->getConstantOperandVal(Num: i-`1`);
15887	if (C != Last + ElementSize)
15888	return false;
15889	}
15890	return true;
15891	}
15892
15893
15894	SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
15895	DAGCombinerInfo &DCI) const {
15896	SelectionDAG &DAG = DCI.DAG;
15897	const MVT XLenVT = Subtarget.getXLenVT();
15898	SDLoc DL(N);
15899
15900	// Helper to call SimplifyDemandedBits on an operand of N where only some low
15901	// bits are demanded. N will be added to the Worklist if it was not deleted.
15902	// Caller should return SDValue(N, 0) if this returns true.
15903	auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
15904	SDValue Op = N->getOperand(Num: OpNo);
15905	APInt Mask = APInt::getLowBitsSet(numBits: Op.getValueSizeInBits(), loBitsSet: LowBits);
15906	if (!SimplifyDemandedBits(Op, DemandedBits: Mask, DCI))
15907	return false;
15908
15909	if (N->getOpcode() != ISD::DELETED_NODE)
15910	DCI.AddToWorklist(N);
15911	return true;
15912	};
15913
15914	switch (N->getOpcode()) {
15915	default:
15916	break;
15917	case RISCVISD::SplitF64: {
15918	SDValue Op0 = N->getOperand(Num: `0`);
15919	// If the input to SplitF64 is just BuildPairF64 then the operation is
15920	// redundant. Instead, use BuildPairF64's operands directly.
15921	if (Op0 ->getOpcode() == RISCVISD::BuildPairF64)
15922	return DCI.CombineTo(N, Res0: Op0.getOperand(i: `0`), Res1: Op0.getOperand(i: `1`));
15923
15924	if (Op0 ->isUndef()) {
15925	SDValue Lo = DAG.getUNDEF(MVT::i32);
15926	SDValue Hi = DAG.getUNDEF(MVT::i32);
15927	return DCI.CombineTo(N, Res0: Lo, Res1: Hi);
15928	}
15929
15930	// It's cheaper to materialise two 32-bit integers than to load a double
15931	// from the constant pool and transfer it to integer registers through the
15932	// stack.
15933	if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: Op0)) {
15934	APInt V = C->getValueAPF().bitcastToAPInt();
15935	SDValue Lo = DAG.getConstant(V.trunc(`32`), DL, MVT::i32);
15936	SDValue Hi = DAG.getConstant(V.lshr(`32`).trunc(`32`), DL, MVT::i32);
15937	return DCI.CombineTo(N, Res0: Lo, Res1: Hi);
15938	}
15939
15940	// This is a target-specific version of a DAGCombine performed in
15941	// DAGCombiner::visitBITCAST. It performs the equivalent of:
15942	// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15943	// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15944	if (!(Op0.getOpcode() == ISD::FNEG \|\| Op0.getOpcode() == ISD::FABS) \|\|
15945	!Op0.getNode()->hasOneUse())
15946	break;
15947	SDValue NewSplitF64 =
15948	DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
15949	Op0.getOperand(`0`));
15950	SDValue Lo = NewSplitF64.getValue(R: `0`);
15951	SDValue Hi = NewSplitF64.getValue(R: `1`);
15952	APInt SignBit = APInt::getSignMask(BitWidth: `32`);
15953	if (Op0.getOpcode() == ISD::FNEG) {
15954	SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
15955	DAG.getConstant(SignBit, DL, MVT::i32));
15956	return DCI.CombineTo(N, Res0: Lo, Res1: NewHi);
15957	}
15958	assert(Op0.getOpcode() == ISD::FABS);
15959	SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
15960	DAG.getConstant(~SignBit, DL, MVT::i32));
15961	return DCI.CombineTo(N, Res0: Lo, Res1: NewHi);
15962	}
15963	case RISCVISD::SLLW:
15964	case RISCVISD::SRAW:
15965	case RISCVISD::SRLW:
15966	case RISCVISD::RORW:
15967	case RISCVISD::ROLW: {
15968	// Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
15969	if (SimplifyDemandedLowBitsHelper (`0`, `32`) \|\|
15970	SimplifyDemandedLowBitsHelper (`1`, `5`))
15971	return SDValue (N, `0`);
15972
15973	break;
15974	}
15975	case RISCVISD::CLZW:
15976	case RISCVISD::CTZW: {
15977	// Only the lower 32 bits of the first operand are read
15978	if (SimplifyDemandedLowBitsHelper (`0`, `32`))
15979	return SDValue (N, `0`);
15980	break;
15981	}
15982	case RISCVISD::FMV_W_X_RV64: {
15983	// If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
15984	// conversion is unnecessary and can be replaced with the
15985	// FMV_X_ANYEXTW_RV64 operand.
15986	SDValue Op0 = N->getOperand(Num: `0`);
15987	if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)
15988	return Op0.getOperand(i: `0`);
15989	break;
15990	}
15991	case RISCVISD::FMV_X_ANYEXTH:
15992	case RISCVISD::FMV_X_ANYEXTW_RV64: {
15993	SDLoc DL(N);
15994	SDValue Op0 = N->getOperand(Num: `0`);
15995	MVT VT = N->getSimpleValueType(ResNo: `0`);
15996	// If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
15997	// conversion is unnecessary and can be replaced with the FMV_W_X_RV64
15998	// operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
15999	if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
16000	Op0 ->getOpcode() == RISCVISD::FMV_W_X_RV64) \|\|
16001	(N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
16002	Op0 ->getOpcode() == RISCVISD::FMV_H_X)) {
16003	assert(Op0.getOperand(`0`).getValueType() == VT &&
16004	"Unexpected value type!");
16005	return Op0.getOperand(i: `0`);
16006	}
16007
16008	// This is a target-specific version of a DAGCombine performed in
16009	// DAGCombiner::visitBITCAST. It performs the equivalent of:
16010	// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16011	// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16012	if (!(Op0.getOpcode() == ISD::FNEG \|\| Op0.getOpcode() == ISD::FABS) \|\|
16013	!Op0.getNode()->hasOneUse())
16014	break;
16015	SDValue NewFMV = DAG.getNode(Opcode: N->getOpcode(), DL, VT, Operand: Op0.getOperand(i: `0`));
16016	unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? `32` : `16`;
16017	APInt SignBit = APInt::getSignMask(BitWidth: FPBits).sext(width: VT.getSizeInBits());
16018	if (Op0.getOpcode() == ISD::FNEG)
16019	return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: NewFMV,
16020	N2: DAG.getConstant(Val: SignBit, DL, VT));
16021
16022	assert(Op0.getOpcode() == ISD::FABS);
16023	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: NewFMV,
16024	N2: DAG.getConstant(Val: ~SignBit, DL, VT));
16025	}
16026	case ISD::ABS: {
16027	EVT VT = N->getValueType(ResNo: `0`);
16028	SDValue N0 = N->getOperand(Num: `0`);
16029	// abs (sext) -> zext (abs)
16030	// abs (zext) -> zext (handled elsewhere)
16031	if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
16032	SDValue Src = N0.getOperand(i: `0`);
16033	SDLoc DL(N);
16034	return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT,
16035	Operand: DAG.getNode(Opcode: ISD::ABS, DL, VT: Src.getValueType(), Operand: Src));
16036	}
16037	break;
16038	}
16039	case ISD::ADD: {
16040	if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16041	return V;
16042	if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
16043	return V;
16044	return performADDCombine(N, DAG, Subtarget);
16045	}
16046	case ISD::SUB: {
16047	if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16048	return V;
16049	return performSUBCombine(N, DAG, Subtarget);
16050	}
16051	case ISD::AND:
16052	return performANDCombine(N, DCI, Subtarget);
16053	case ISD::OR: {
16054	if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16055	return V;
16056	return performORCombine(N, DCI, Subtarget);
16057	}
16058	case ISD::XOR:
16059	return performXORCombine(N, DAG, Subtarget);
16060	case ISD::MUL:
16061	if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16062	return V;
16063	return performMULCombine(N, DAG, DCI, Subtarget);
16064	case ISD::SDIV:
16065	case ISD::UDIV:
16066	case ISD::SREM:
16067	case ISD::UREM:
16068	if (SDValue V = combineBinOpOfZExt(N, DAG))
16069	return V;
16070	break;
16071	case ISD::FADD:
16072	case ISD::UMAX:
16073	case ISD::UMIN:
16074	case ISD::SMAX:
16075	case ISD::SMIN:
16076	case ISD::FMAXNUM:
16077	case ISD::FMINNUM: {
16078	if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16079	return V;
16080	if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16081	return V;
16082	return SDValue ();
16083	}
16084	case ISD::SETCC:
16085	return performSETCCCombine(N, DAG, Subtarget);
16086	case ISD::SIGN_EXTEND_INREG:
16087	return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
16088	case ISD::ZERO_EXTEND:
16089	// Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
16090	// type legalization. This is safe because fp_to_uint produces poison if
16091	// it overflows.
16092	if (N->getValueType(`0`) == MVT::i64 && Subtarget.is64Bit()) {
16093	SDValue Src = N->getOperand(Num: `0`);
16094	if (Src.getOpcode() == ISD::FP_TO_UINT &&
16095	isTypeLegal(Src.getOperand(`0`).getValueType()))
16096	return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
16097	Src.getOperand(`0`));
16098	if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
16099	isTypeLegal(VT: Src.getOperand(i: `1`).getValueType())) {
16100	SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
16101	SDValue Res = DAG.getNode(Opcode: ISD::STRICT_FP_TO_UINT, DL: SDLoc (N), VTList: VTs,
16102	N1: Src.getOperand(i: `0`), N2: Src.getOperand(i: `1`));
16103	DCI.CombineTo(N, Res);
16104	DAG.ReplaceAllUsesOfValueWith(From: Src.getValue(R: `1`), To: Res.getValue(R: `1`));
16105	DCI.recursivelyDeleteUnusedNodes(N: Src.getNode());
16106	return SDValue (N, `0`); // Return N so it doesn't get rechecked.
16107	}
16108	}
16109	return SDValue ();
16110	case RISCVISD::TRUNCATE_VECTOR_VL: {
16111	// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
16112	// This would be benefit for the cases where X and Y are both the same value
16113	// type of low precision vectors. Since the truncate would be lowered into
16114	// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW2->SEW truncate*
16115	// restriction, such pattern would be expanded into a series of "vsetvli"
16116	// and "vnsrl" instructions later to reach this point.
16117	auto IsTruncNode = [](SDValue V) {
16118	if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL)
16119	return false;
16120	SDValue VL = V.getOperand(i: `2`);
16121	auto *C = dyn_cast<ConstantSDNode>(Val&: VL);
16122	// Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand
16123	bool IsVLMAXForVMSET = (C && C->isAllOnes()) \|\|
16124	(isa<RegisterSDNode>(VL) &&
16125	cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
16126	return V.getOperand(i: `1`).getOpcode() == RISCVISD::VMSET_VL &&
16127	IsVLMAXForVMSET;
16128	};
16129
16130	SDValue Op = N->getOperand(Num: `0`);
16131
16132	// We need to first find the inner level of TRUNCATE_VECTOR_VL node
16133	// to distinguish such pattern.
16134	while (IsTruncNode (Op)) {
16135	if (!Op.hasOneUse())
16136	return SDValue ();
16137	Op = Op.getOperand(i: `0`);
16138	}
16139
16140	if (Op.getOpcode() == ISD::SRA && Op.hasOneUse()) {
16141	SDValue N0 = Op.getOperand(i: `0`);
16142	SDValue N1 = Op.getOperand(i: `1`);
16143	if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
16144	N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) {
16145	SDValue N00 = N0.getOperand(i: `0`);
16146	SDValue N10 = N1.getOperand(i: `0`);
16147	if (N00.getValueType().isVector() &&
16148	N00.getValueType() == N10.getValueType() &&
16149	N->getValueType(ResNo: `0`) == N10.getValueType()) {
16150	unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - `1`;
16151	SDValue SMin = DAG.getNode(
16152	Opcode: ISD::SMIN, DL: SDLoc (N1), VT: N->getValueType(ResNo: `0`), N1: N10,
16153	N2: DAG.getConstant(Val: MaxShAmt, DL: SDLoc (N1), VT: N->getValueType(ResNo: `0`)));
16154	return DAG.getNode(Opcode: ISD::SRA, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`), N1: N00, N2: SMin);
16155	}
16156	}
16157	}
16158	break;
16159	}
16160	case ISD::TRUNCATE:
16161	return performTRUNCATECombine(N, DAG, Subtarget);
16162	case ISD::SELECT:
16163	return performSELECTCombine(N, DAG, Subtarget);
16164	case RISCVISD::CZERO_EQZ:
16165	case RISCVISD::CZERO_NEZ:
16166	// czero_eq X, (xor Y, 1) -> czero_ne X, Y if Y is 0 or 1.
16167	// czero_ne X, (xor Y, 1) -> czero_eq X, Y if Y is 0 or 1.
16168	if (N->getOperand(Num: `1`).getOpcode() == ISD::XOR &&
16169	isOneConstant(V: N->getOperand(Num: `1`).getOperand(i: `1`))) {
16170	SDValue Cond = N->getOperand(Num: `1`).getOperand(i: `0`);
16171	APInt Mask = APInt::getBitsSetFrom(numBits: Cond.getValueSizeInBits(), loBit: `1`);
16172	if (DAG.MaskedValueIsZero(Op: Cond, Mask)) {
16173	unsigned NewOpc = N->getOpcode() == RISCVISD::CZERO_EQZ
16174	? RISCVISD::CZERO_NEZ
16175	: RISCVISD::CZERO_EQZ;
16176	return DAG.getNode(Opcode: NewOpc, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`),
16177	N1: N->getOperand(Num: `0`), N2: Cond);
16178	}
16179	}
16180	return SDValue ();
16181
16182	case RISCVISD::SELECT_CC: {
16183	// Transform
16184	SDValue LHS = N->getOperand(Num: `0`);
16185	SDValue RHS = N->getOperand(Num: `1`);
16186	SDValue CC = N->getOperand(Num: `2`);
16187	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val&: CC)->get();
16188	SDValue TrueV = N->getOperand(Num: `3`);
16189	SDValue FalseV = N->getOperand(Num: `4`);
16190	SDLoc DL(N);
16191	EVT VT = N->getValueType(ResNo: `0`);
16192
16193	// If the True and False values are the same, we don't need a select_cc.
16194	if (TrueV == FalseV)
16195	return TrueV;
16196
16197	// (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
16198	// (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
16199	if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(Val: TrueV) &&
16200	isa<ConstantSDNode>(Val: FalseV) && isNullConstant(V: RHS) &&
16201	(CCVal == ISD::CondCode::SETLT \|\| CCVal == ISD::CondCode::SETGE)) {
16202	if (CCVal == ISD::CondCode::SETGE)
16203	std::swap(a&: TrueV, b&: FalseV);
16204
16205	int64_t TrueSImm = cast<ConstantSDNode>(Val&: TrueV)->getSExtValue();
16206	int64_t FalseSImm = cast<ConstantSDNode>(Val&: FalseV)->getSExtValue();
16207	// Only handle simm12, if it is not in this range, it can be considered as
16208	// register.
16209	if (isInt<`12`>(x: TrueSImm) && isInt<`12`>(x: FalseSImm) &&
16210	isInt<`12`>(x: TrueSImm - FalseSImm)) {
16211	SDValue SRA =
16212	DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: LHS,
16213	N2: DAG.getConstant(Val: Subtarget.getXLen() - `1`, DL, VT));
16214	SDValue AND =
16215	DAG.getNode(Opcode: ISD::AND, DL, VT, N1: SRA,
16216	N2: DAG.getConstant(Val: TrueSImm - FalseSImm, DL, VT));
16217	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: AND, N2: FalseV);
16218	}
16219
16220	if (CCVal == ISD::CondCode::SETGE)
16221	std::swap(a&: TrueV, b&: FalseV);
16222	}
16223
16224	if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16225	return DAG.getNode(Opcode: RISCVISD::SELECT_CC, DL, VT: N->getValueType(ResNo: `0`),
16226	Ops: {LHS, RHS, CC, TrueV, FalseV});
16227
16228	if (!Subtarget.hasConditionalMoveFusion()) {
16229	// (select c, -1, y) -> -c \| y
16230	if (isAllOnesConstant(V: TrueV)) {
16231	SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, Cond: CCVal);
16232	SDValue Neg = DAG.getNegative(Val: C, DL, VT);
16233	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: FalseV);
16234	}
16235	// (select c, y, -1) -> -!c \| y
16236	if (isAllOnesConstant(V: FalseV)) {
16237	SDValue C =
16238	DAG.getSetCC(DL, VT, LHS, RHS, Cond: ISD::getSetCCInverse(Operation: CCVal, Type: VT));
16239	SDValue Neg = DAG.getNegative(Val: C, DL, VT);
16240	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: TrueV);
16241	}
16242
16243	// (select c, 0, y) -> -!c & y
16244	if (isNullConstant(V: TrueV)) {
16245	SDValue C =
16246	DAG.getSetCC(DL, VT, LHS, RHS, Cond: ISD::getSetCCInverse(Operation: CCVal, Type: VT));
16247	SDValue Neg = DAG.getNegative(Val: C, DL, VT);
16248	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: FalseV);
16249	}
16250	// (select c, y, 0) -> -c & y
16251	if (isNullConstant(V: FalseV)) {
16252	SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, Cond: CCVal);
16253	SDValue Neg = DAG.getNegative(Val: C, DL, VT);
16254	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: TrueV);
16255	}
16256	// (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
16257	// (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
16258	if (((isOneConstant(V: FalseV) && LHS == TrueV &&
16259	CCVal == ISD::CondCode::SETNE) \|\|
16260	(isOneConstant(V: TrueV) && LHS == FalseV &&
16261	CCVal == ISD::CondCode::SETEQ)) &&
16262	isNullConstant(V: RHS)) {
16263	// freeze it to be safe.
16264	LHS = DAG.getFreeze(V: LHS);
16265	SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, Cond: ISD::CondCode::SETEQ);
16266	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: LHS, N2: C);
16267	}
16268	}
16269
16270	// If both true/false are an xor with 1, pull through the select.
16271	// This can occur after op legalization if both operands are setccs that
16272	// require an xor to invert.
16273	// FIXME: Generalize to other binary ops with identical operand?
16274	if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
16275	TrueV.getOperand(i: `1`) == FalseV.getOperand(i: `1`) &&
16276	isOneConstant(V: TrueV.getOperand(i: `1`)) &&
16277	TrueV.hasOneUse() && FalseV.hasOneUse()) {
16278	SDValue NewSel = DAG.getNode(Opcode: RISCVISD::SELECT_CC, DL, VT, N1: LHS, N2: RHS, N3: CC,
16279	N4: TrueV.getOperand(i: `0`), N5: FalseV.getOperand(i: `0`));
16280	return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: NewSel, N2: TrueV.getOperand(i: `1`));
16281	}
16282
16283	return SDValue ();
16284	}
16285	case RISCVISD::BR_CC: {
16286	SDValue LHS = N->getOperand(Num: `1`);
16287	SDValue RHS = N->getOperand(Num: `2`);
16288	SDValue CC = N->getOperand(Num: `3`);
16289	SDLoc DL(N);
16290
16291	if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16292	return DAG.getNode(Opcode: RISCVISD::BR_CC, DL, VT: N->getValueType(ResNo: `0`),
16293	N1: N->getOperand(Num: `0`), N2: LHS, N3: RHS, N4: CC, N5: N->getOperand(Num: `4`));
16294
16295	return SDValue ();
16296	}
16297	case ISD::BITREVERSE:
16298	return performBITREVERSECombine(N, DAG, Subtarget);
16299	case ISD::FP_TO_SINT:
16300	case ISD::FP_TO_UINT:
16301	return performFP_TO_INTCombine(N, DCI, Subtarget);
16302	case ISD::FP_TO_SINT_SAT:
16303	case ISD::FP_TO_UINT_SAT:
16304	return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
16305	case ISD::FCOPYSIGN: {
16306	EVT VT = N->getValueType(ResNo: `0`);
16307	if (!VT.isVector())
16308	break;
16309	// There is a form of VFSGNJ which injects the negated sign of its second
16310	// operand. Try and bubble any FNEG up after the extend/round to produce
16311	// this optimized pattern. Avoid modifying cases where FP_ROUND and
16312	// TRUNC=1.
16313	SDValue In2 = N->getOperand(Num: `1`);
16314	// Avoid cases where the extend/round has multiple uses, as duplicating
16315	// those is typically more expensive than removing a fneg.
16316	if (!In2.hasOneUse())
16317	break;
16318	if (In2.getOpcode() != ISD::FP_EXTEND &&
16319	(In2.getOpcode() != ISD::FP_ROUND \|\| In2.getConstantOperandVal(i: `1`) != `0`))
16320	break;
16321	In2 = In2.getOperand(i: `0`);
16322	if (In2.getOpcode() != ISD::FNEG)
16323	break;
16324	SDLoc DL(N);
16325	SDValue NewFPExtRound = DAG.getFPExtendOrRound(Op: In2.getOperand(i: `0`), DL, VT);
16326	return DAG.getNode(Opcode: ISD::FCOPYSIGN, DL, VT, N1: N->getOperand(Num: `0`),
16327	N2: DAG.getNode(Opcode: ISD::FNEG, DL, VT, Operand: NewFPExtRound));
16328	}
16329	case ISD::MGATHER: {
16330	const auto *MGN = dyn_cast<MaskedGatherSDNode>(Val: N);
16331	const EVT VT = N->getValueType(ResNo: `0`);
16332	SDValue Index = MGN->getIndex();
16333	SDValue ScaleOp = MGN->getScale();
16334	ISD::MemIndexType IndexType = MGN->getIndexType();
16335	assert(!MGN->isIndexScaled() &&
16336	"Scaled gather/scatter should not be formed");
16337
16338	SDLoc DL(N);
16339	if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16340	return DAG.getMaskedGather(
16341	VTs: N->getVTList(), MemVT: MGN->getMemoryVT(), dl: DL,
16342	Ops: {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16343	MGN->getBasePtr(), Index, ScaleOp},
16344	MMO: MGN->getMemOperand(), IndexType, ExtTy: MGN->getExtensionType());
16345
16346	if (narrowIndex(N&: Index, IndexType, DAG))
16347	return DAG.getMaskedGather(
16348	VTs: N->getVTList(), MemVT: MGN->getMemoryVT(), dl: DL,
16349	Ops: {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16350	MGN->getBasePtr(), Index, ScaleOp},
16351	MMO: MGN->getMemOperand(), IndexType, ExtTy: MGN->getExtensionType());
16352
16353	if (Index.getOpcode() == ISD::BUILD_VECTOR &&
16354	MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
16355	// The sequence will be XLenVT, not the type of Index. Tell
16356	// isSimpleVIDSequence this so we avoid overflow.
16357	if (std::optional<VIDSequence> SimpleVID =
16358	isSimpleVIDSequence(Op: Index, EltSizeInBits: Subtarget.getXLen());
16359	SimpleVID && SimpleVID ->StepDenominator == `1`) {
16360	const int64_t StepNumerator = SimpleVID ->StepNumerator;
16361	const int64_t Addend = SimpleVID ->Addend;
16362
16363	// Note: We don't need to check alignment here since (by assumption
16364	// from the existance of the gather), our offsets must be sufficiently
16365	// aligned.
16366
16367	const EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
16368	assert(MGN->getBasePtr()->getValueType(`0`) == PtrVT);
16369	assert(IndexType == ISD::UNSIGNED_SCALED);
16370	SDValue BasePtr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: MGN->getBasePtr(),
16371	N2: DAG.getConstant(Val: Addend, DL, VT: PtrVT));
16372
16373	SDVTList VTs = DAG.getVTList({VT, MVT::Other});
16374	SDValue IntID =
16375	DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
16376	XLenVT);
16377	SDValue Ops[] =
16378	{MGN->getChain(), IntID, MGN->getPassThru(), BasePtr,
16379	DAG.getConstant(Val: StepNumerator, DL, VT: XLenVT), MGN->getMask()};
16380	return DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs,
16381	Ops, MemVT: VT, MMO: MGN->getMemOperand());
16382	}
16383	}
16384
16385	SmallVector<int> ShuffleMask;
16386	if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16387	matchIndexAsShuffle(VT, Index, Mask: MGN->getMask(), ShuffleMask)) {
16388	SDValue Load = DAG.getMaskedLoad(VT, dl: DL, Chain: MGN->getChain(),
16389	Base: MGN->getBasePtr(), Offset: DAG.getUNDEF(VT: XLenVT),
16390	Mask: MGN->getMask(), Src0: DAG.getUNDEF(VT),
16391	MemVT: MGN->getMemoryVT(), MMO: MGN->getMemOperand(),
16392	AM: ISD::UNINDEXED, ISD::NON_EXTLOAD);
16393	SDValue Shuffle =
16394	DAG.getVectorShuffle(VT, dl: DL, N1: Load, N2: DAG.getUNDEF(VT), Mask: ShuffleMask);
16395	return DAG.getMergeValues(Ops: {Shuffle, Load.getValue(R: `1`)}, dl: DL);
16396	}
16397
16398	if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16399	matchIndexAsWiderOp(VT, Index, Mask: MGN->getMask(),
16400	BaseAlign: MGN->getMemOperand()->getBaseAlign(), ST: Subtarget)) {
16401	SmallVector<SDValue> NewIndices;
16402	for (unsigned i = `0`; i < Index ->getNumOperands(); i += `2`)
16403	NewIndices.push_back(Elt: Index.getOperand(i));
16404	EVT IndexVT = Index.getValueType()
16405	.getHalfNumVectorElementsVT(Context&: *DAG.getContext());
16406	Index = DAG.getBuildVector(VT: IndexVT, DL, Ops: NewIndices);
16407
16408	unsigned ElementSize = VT.getScalarStoreSize();
16409	EVT WideScalarVT = MVT::getIntegerVT(BitWidth: ElementSize * `8` * `2`);
16410	auto EltCnt = VT.getVectorElementCount();
16411	assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
16412	EVT WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideScalarVT,
16413	EC: EltCnt.divideCoefficientBy(RHS: `2`));
16414	SDValue Passthru = DAG.getBitcast(VT: WideVT, V: MGN->getPassThru());
16415	EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
16416	EltCnt.divideCoefficientBy(`2`));
16417	SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(`1`, DL, MVT::i1));
16418
16419	SDValue Gather =
16420	DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
16421	{MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
16422	Index, ScaleOp},
16423	MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
16424	SDValue Result = DAG.getBitcast(VT, V: Gather.getValue(R: `0`));
16425	return DAG.getMergeValues(Ops: {Result, Gather.getValue(R: `1`)}, dl: DL);
16426	}
16427	break;
16428	}
16429	case ISD::MSCATTER:{
16430	const auto *MSN = dyn_cast<MaskedScatterSDNode>(Val: N);
16431	SDValue Index = MSN->getIndex();
16432	SDValue ScaleOp = MSN->getScale();
16433	ISD::MemIndexType IndexType = MSN->getIndexType();
16434	assert(!MSN->isIndexScaled() &&
16435	"Scaled gather/scatter should not be formed");
16436
16437	SDLoc DL(N);
16438	if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16439	return DAG.getMaskedScatter(
16440	VTs: N->getVTList(), MemVT: MSN->getMemoryVT(), dl: DL,
16441	Ops: {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16442	Index, ScaleOp},
16443	MMO: MSN->getMemOperand(), IndexType, IsTruncating: MSN->isTruncatingStore());
16444
16445	if (narrowIndex(N&: Index, IndexType, DAG))
16446	return DAG.getMaskedScatter(
16447	VTs: N->getVTList(), MemVT: MSN->getMemoryVT(), dl: DL,
16448	Ops: {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16449	Index, ScaleOp},
16450	MMO: MSN->getMemOperand(), IndexType, IsTruncating: MSN->isTruncatingStore());
16451
16452	EVT VT = MSN->getValue()->getValueType(ResNo: `0`);
16453	SmallVector<int> ShuffleMask;
16454	if (!MSN->isTruncatingStore() &&
16455	matchIndexAsShuffle(VT, Index, Mask: MSN->getMask(), ShuffleMask)) {
16456	SDValue Shuffle = DAG.getVectorShuffle(VT, dl: DL, N1: MSN->getValue(),
16457	N2: DAG.getUNDEF(VT), Mask: ShuffleMask);
16458	return DAG.getMaskedStore(Chain: MSN->getChain(), dl: DL, Val: Shuffle, Base: MSN->getBasePtr(),
16459	Offset: DAG.getUNDEF(VT: XLenVT), Mask: MSN->getMask(),
16460	MemVT: MSN->getMemoryVT(), MMO: MSN->getMemOperand(),
16461	AM: ISD::UNINDEXED, IsTruncating: false);
16462	}
16463	break;
16464	}
16465	case ISD::VP_GATHER: {
16466	const auto *VPGN = dyn_cast<VPGatherSDNode>(Val: N);
16467	SDValue Index = VPGN->getIndex();
16468	SDValue ScaleOp = VPGN->getScale();
16469	ISD::MemIndexType IndexType = VPGN->getIndexType();
16470	assert(!VPGN->isIndexScaled() &&
16471	"Scaled gather/scatter should not be formed");
16472
16473	SDLoc DL(N);
16474	if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16475	return DAG.getGatherVP(VTs: N->getVTList(), VT: VPGN->getMemoryVT(), dl: DL,
16476	Ops: {VPGN->getChain(), VPGN->getBasePtr(), Index,
16477	ScaleOp, VPGN->getMask(),
16478	VPGN->getVectorLength()},
16479	MMO: VPGN->getMemOperand(), IndexType);
16480
16481	if (narrowIndex(N&: Index, IndexType, DAG))
16482	return DAG.getGatherVP(VTs: N->getVTList(), VT: VPGN->getMemoryVT(), dl: DL,
16483	Ops: {VPGN->getChain(), VPGN->getBasePtr(), Index,
16484	ScaleOp, VPGN->getMask(),
16485	VPGN->getVectorLength()},
16486	MMO: VPGN->getMemOperand(), IndexType);
16487
16488	break;
16489	}
16490	case ISD::VP_SCATTER: {
16491	const auto *VPSN = dyn_cast<VPScatterSDNode>(Val: N);
16492	SDValue Index = VPSN->getIndex();
16493	SDValue ScaleOp = VPSN->getScale();
16494	ISD::MemIndexType IndexType = VPSN->getIndexType();
16495	assert(!VPSN->isIndexScaled() &&
16496	"Scaled gather/scatter should not be formed");
16497
16498	SDLoc DL(N);
16499	if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16500	return DAG.getScatterVP(VTs: N->getVTList(), VT: VPSN->getMemoryVT(), dl: DL,
16501	Ops: {VPSN->getChain(), VPSN->getValue(),
16502	VPSN->getBasePtr(), Index, ScaleOp,
16503	VPSN->getMask(), VPSN->getVectorLength()},
16504	MMO: VPSN->getMemOperand(), IndexType);
16505
16506	if (narrowIndex(N&: Index, IndexType, DAG))
16507	return DAG.getScatterVP(VTs: N->getVTList(), VT: VPSN->getMemoryVT(), dl: DL,
16508	Ops: {VPSN->getChain(), VPSN->getValue(),
16509	VPSN->getBasePtr(), Index, ScaleOp,
16510	VPSN->getMask(), VPSN->getVectorLength()},
16511	MMO: VPSN->getMemOperand(), IndexType);
16512	break;
16513	}
16514	case RISCVISD::SHL_VL:
16515	if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16516	return V;
16517	[[fallthrough]];
16518	case RISCVISD::SRA_VL:
16519	case RISCVISD::SRL_VL: {
16520	SDValue ShAmt = N->getOperand(Num: `1`);
16521	if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
16522	// We don't need the upper 32 bits of a 64-bit element for a shift amount.
16523	SDLoc DL(N);
16524	SDValue VL = N->getOperand(Num: `4`);
16525	EVT VT = N->getValueType(ResNo: `0`);
16526	ShAmt = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: DAG.getUNDEF(VT),
16527	N2: ShAmt.getOperand(i: `1`), N3: VL);
16528	return DAG.getNode(Opcode: N->getOpcode(), DL, VT, N1: N->getOperand(Num: `0`), N2: ShAmt,
16529	N3: N->getOperand(Num: `2`), N4: N->getOperand(Num: `3`), N5: N->getOperand(Num: `4`));
16530	}
16531	break;
16532	}
16533	case ISD::SRA:
16534	if (SDValue V = performSRACombine(N, DAG, Subtarget))
16535	return V;
16536	[[fallthrough]];
16537	case ISD::SRL:
16538	case ISD::SHL: {
16539	if (N->getOpcode() == ISD::SHL) {
16540	if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16541	return V;
16542	}
16543	SDValue ShAmt = N->getOperand(Num: `1`);
16544	if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
16545	// We don't need the upper 32 bits of a 64-bit element for a shift amount.
16546	SDLoc DL(N);
16547	EVT VT = N->getValueType(ResNo: `0`);
16548	ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
16549	ShAmt.getOperand(`1`),
16550	DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
16551	return DAG.getNode(Opcode: N->getOpcode(), DL, VT, N1: N->getOperand(Num: `0`), N2: ShAmt);
16552	}
16553	break;
16554	}
16555	case RISCVISD::ADD_VL:
16556	if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16557	return V;
16558	return combineToVWMACC(N, DAG, Subtarget);
16559	case RISCVISD::VWADD_W_VL:
16560	case RISCVISD::VWADDU_W_VL:
16561	case RISCVISD::VWSUB_W_VL:
16562	case RISCVISD::VWSUBU_W_VL:
16563	return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
16564	case RISCVISD::SUB_VL:
16565	case RISCVISD::MUL_VL:
16566	return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
16567	case RISCVISD::VFMADD_VL:
16568	case RISCVISD::VFNMADD_VL:
16569	case RISCVISD::VFMSUB_VL:
16570	case RISCVISD::VFNMSUB_VL:
16571	case RISCVISD::STRICT_VFMADD_VL:
16572	case RISCVISD::STRICT_VFNMADD_VL:
16573	case RISCVISD::STRICT_VFMSUB_VL:
16574	case RISCVISD::STRICT_VFNMSUB_VL:
16575	return performVFMADD_VLCombine(N, DAG, Subtarget);
16576	case RISCVISD::FADD_VL:
16577	case RISCVISD::FSUB_VL:
16578	case RISCVISD::FMUL_VL:
16579	case RISCVISD::VFWADD_W_VL:
16580	case RISCVISD::VFWSUB_W_VL: {
16581	if (N->getValueType(`0`).isScalableVector() &&
16582	N->getValueType(`0`).getVectorElementType() == MVT::f32 &&
16583	(Subtarget.hasVInstructionsF16Minimal() &&
16584	!Subtarget.hasVInstructionsF16()))
16585	return SDValue ();
16586	return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
16587	}
16588	case ISD::LOAD:
16589	case ISD::STORE: {
16590	if (DCI.isAfterLegalizeDAG())
16591	if (SDValue V = performMemPairCombine(N, DCI))
16592	return V;
16593
16594	if (N->getOpcode() != ISD::STORE)
16595	break;
16596
16597	auto *Store = cast<StoreSDNode>(Val: N);
16598	SDValue Chain = Store->getChain();
16599	EVT MemVT = Store->getMemoryVT();
16600	SDValue Val = Store->getValue();
16601	SDLoc DL(N);
16602
16603	bool IsScalarizable =
16604	MemVT.isFixedLengthVector() && ISD::isNormalStore(N: Store) &&
16605	Store->isSimple() &&
16606	MemVT.getVectorElementType().bitsLE(VT: Subtarget.getXLenVT()) &&
16607	isPowerOf2_64(Value: MemVT.getSizeInBits()) &&
16608	MemVT.getSizeInBits() <= Subtarget.getXLen();
16609
16610	// If sufficiently aligned we can scalarize stores of constant vectors of
16611	// any power-of-two size up to XLen bits, provided that they aren't too
16612	// expensive to materialize.
16613	// vsetivli zero, 2, e8, m1, ta, ma
16614	// vmv.v.i v8, 4
16615	// vse64.v v8, (a0)
16616	// ->
16617	// li a1, 1028
16618	// sh a1, 0(a0)
16619	if (DCI.isBeforeLegalize() && IsScalarizable &&
16620	ISD::isBuildVectorOfConstantSDNodes(N: Val.getNode())) {
16621	// Get the constant vector bits
16622	APInt NewC(Val.getValueSizeInBits(), `0`);
16623	uint64_t EltSize = Val.getScalarValueSizeInBits();
16624	for (unsigned i = `0`; i < Val.getNumOperands(); i++) {
16625	if (Val.getOperand(i).isUndef())
16626	continue;
16627	NewC.insertBits(SubBits: Val.getConstantOperandAPInt(i).trunc(width: EltSize),
16628	bitPosition: i * EltSize);
16629	}
16630	MVT NewVT = MVT::getIntegerVT(BitWidth: MemVT.getSizeInBits());
16631
16632	if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
16633	true) <= `2` &&
16634	allowsMemoryAccessForAlignment(Context&: *DAG.getContext(), DL: DAG.getDataLayout(),
16635	VT: NewVT, MMO: *Store->getMemOperand())) {
16636	SDValue NewV = DAG.getConstant(Val: NewC, DL, VT: NewVT);
16637	return DAG.getStore(Chain, dl: DL, Val: NewV, Ptr: Store->getBasePtr(),
16638	PtrInfo: Store->getPointerInfo(), Alignment: Store->getOriginalAlign(),
16639	MMOFlags: Store->getMemOperand()->getFlags());
16640	}
16641	}
16642
16643	// Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
16644	// vsetivli zero, 2, e16, m1, ta, ma
16645	// vle16.v v8, (a0)
16646	// vse16.v v8, (a1)
16647	if (auto *L = dyn_cast<LoadSDNode>(Val);
16648	L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
16649	L->hasNUsesOfValue(NUses: `1`, Value: `0`) && L->hasNUsesOfValue(NUses: `1`, Value: `1`) &&
16650	Store->getChain() == SDValue (L, `1`) && ISD::isNormalLoad(N: L) &&
16651	L->getMemoryVT() == MemVT) {
16652	MVT NewVT = MVT::getIntegerVT(BitWidth: MemVT.getSizeInBits());
16653	if (allowsMemoryAccessForAlignment(Context&: *DAG.getContext(), DL: DAG.getDataLayout(),
16654	VT: NewVT, MMO: *Store->getMemOperand()) &&
16655	allowsMemoryAccessForAlignment(Context&: *DAG.getContext(), DL: DAG.getDataLayout(),
16656	VT: NewVT, MMO: *L->getMemOperand())) {
16657	SDValue NewL = DAG.getLoad(VT: NewVT, dl: DL, Chain: L->getChain(), Ptr: L->getBasePtr(),
16658	PtrInfo: L->getPointerInfo(), Alignment: L->getOriginalAlign(),
16659	MMOFlags: L->getMemOperand()->getFlags());
16660	return DAG.getStore(Chain, dl: DL, Val: NewL, Ptr: Store->getBasePtr(),
16661	PtrInfo: Store->getPointerInfo(), Alignment: Store->getOriginalAlign(),
16662	MMOFlags: Store->getMemOperand()->getFlags());
16663	}
16664	}
16665
16666	// Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
16667	// vfmv.f.s is represented as extract element from 0. Match it late to avoid
16668	// any illegal types.
16669	if (Val.getOpcode() == RISCVISD::VMV_X_S \|\|
16670	(DCI.isAfterLegalizeDAG() &&
16671	Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16672	isNullConstant(V: Val.getOperand(i: `1`)))) {
16673	SDValue Src = Val.getOperand(i: `0`);
16674	MVT VecVT = Src.getSimpleValueType();
16675	// VecVT should be scalable and memory VT should match the element type.
16676	if (!Store->isIndexed() && VecVT.isScalableVector() &&
16677	MemVT == VecVT.getVectorElementType()) {
16678	SDLoc DL(N);
16679	MVT MaskVT = getMaskTypeFor(VecVT);
16680	return DAG.getStoreVP(
16681	Chain: Store->getChain(), dl: DL, Val: Src, Ptr: Store->getBasePtr(), Offset: Store->getOffset(),
16682	Mask: DAG.getConstant(Val: `1`, DL, VT: MaskVT),
16683	EVL: DAG.getConstant(Val: `1`, DL, VT: Subtarget.getXLenVT()), MemVT,
16684	MMO: Store->getMemOperand(), AM: Store->getAddressingMode(),
16685	IsTruncating: Store->isTruncatingStore(), /IsCompress/ IsCompressing: false);
16686	}
16687	}
16688
16689	break;
16690	}
16691	case ISD::SPLAT_VECTOR: {
16692	EVT VT = N->getValueType(ResNo: `0`);
16693	// Only perform this combine on legal MVT types.
16694	if (!isTypeLegal(VT))
16695	break;
16696	if (auto Gather = matchSplatAsGather(SplatVal: N->getOperand(Num: `0`), VT: VT.getSimpleVT(), DL: N,
16697	DAG, Subtarget))
16698	return Gather;
16699	break;
16700	}
16701	case ISD::BUILD_VECTOR:
16702	if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, TLI: *this))
16703	return V;
16704	break;
16705	case ISD::CONCAT_VECTORS:
16706	if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, TLI: *this))
16707	return V;
16708	break;
16709	case ISD::INSERT_VECTOR_ELT:
16710	if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, TLI: *this))
16711	return V;
16712	break;
16713	case RISCVISD::VFMV_V_F_VL: {
16714	const MVT VT = N->getSimpleValueType(ResNo: `0`);
16715	SDValue Passthru = N->getOperand(Num: `0`);
16716	SDValue Scalar = N->getOperand(Num: `1`);
16717	SDValue VL = N->getOperand(Num: `2`);
16718
16719	// If VL is 1, we can use vfmv.s.f.
16720	if (isOneConstant(V: VL))
16721	return DAG.getNode(Opcode: RISCVISD::VFMV_S_F_VL, DL, VT, N1: Passthru, N2: Scalar, N3: VL);
16722	break;
16723	}
16724	case RISCVISD::VMV_V_X_VL: {
16725	const MVT VT = N->getSimpleValueType(ResNo: `0`);
16726	SDValue Passthru = N->getOperand(Num: `0`);
16727	SDValue Scalar = N->getOperand(Num: `1`);
16728	SDValue VL = N->getOperand(Num: `2`);
16729
16730	// Tail agnostic VMV.V.X only demands the vector element bitwidth from the
16731	// scalar input.
16732	unsigned ScalarSize = Scalar.getValueSizeInBits();
16733	unsigned EltWidth = VT.getScalarSizeInBits();
16734	if (ScalarSize > EltWidth && Passthru.isUndef())
16735	if (SimplifyDemandedLowBitsHelper (`1`, EltWidth))
16736	return SDValue (N, `0`);
16737
16738	// If VL is 1 and the scalar value won't benefit from immediate, we can
16739	// use vmv.s.x.
16740	ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val&: Scalar);
16741	if (isOneConstant(V: VL) &&
16742	(!Const \|\| Const->isZero() \|\|
16743	!Const->getAPIntValue().sextOrTrunc(width: EltWidth).isSignedIntN(N: `5`)))
16744	return DAG.getNode(Opcode: RISCVISD::VMV_S_X_VL, DL, VT, N1: Passthru, N2: Scalar, N3: VL);
16745
16746	break;
16747	}
16748	case RISCVISD::VFMV_S_F_VL: {
16749	SDValue Src = N->getOperand(Num: `1`);
16750	// Try to remove vector->scalar->vector if the scalar->vector is inserting
16751	// into an undef vector.
16752	// TODO: Could use a vslide or vmv.v.v for non-undef.
16753	if (N->getOperand(Num: `0`).isUndef() &&
16754	Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16755	isNullConstant(V: Src.getOperand(i: `1`)) &&
16756	Src.getOperand(i: `0`).getValueType().isScalableVector()) {
16757	EVT VT = N->getValueType(ResNo: `0`);
16758	EVT SrcVT = Src.getOperand(i: `0`).getValueType();
16759	assert(SrcVT.getVectorElementType() == VT.getVectorElementType());
16760	// Widths match, just return the original vector.
16761	if (SrcVT == VT)
16762	return Src.getOperand(i: `0`);
16763	// TODO: Use insert_subvector/extract_subvector to change widen/narrow?
16764	}
16765	[[fallthrough]];
16766	}
16767	case RISCVISD::VMV_S_X_VL: {
16768	const MVT VT = N->getSimpleValueType(ResNo: `0`);
16769	SDValue Passthru = N->getOperand(Num: `0`);
16770	SDValue Scalar = N->getOperand(Num: `1`);
16771	SDValue VL = N->getOperand(Num: `2`);
16772
16773	// Use M1 or smaller to avoid over constraining register allocation
16774	const MVT M1VT = getLMUL1VT(VT);
16775	if (M1VT.bitsLT(VT)) {
16776	SDValue M1Passthru =
16777	DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: M1VT, N1: Passthru,
16778	N2: DAG.getVectorIdxConstant(Val: `0`, DL));
16779	SDValue Result =
16780	DAG.getNode(Opcode: N->getOpcode(), DL, VT: M1VT, N1: M1Passthru, N2: Scalar, N3: VL);
16781	Result = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: Passthru, N2: Result,
16782	N3: DAG.getVectorIdxConstant(Val: `0`, DL));
16783	return Result;
16784	}
16785
16786	// We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
16787	// higher would involve overly constraining the register allocator for
16788	// no purpose.
16789	if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val&: Scalar);
16790	Const && !Const->isZero() && isInt<`5`>(x: Const->getSExtValue()) &&
16791	VT.bitsLE(VT: getLMUL1VT(VT)) && Passthru.isUndef())
16792	return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: Passthru, N2: Scalar, N3: VL);
16793
16794	break;
16795	}
16796	case RISCVISD::VMV_X_S: {
16797	SDValue Vec = N->getOperand(Num: `0`);
16798	MVT VecVT = N->getOperand(Num: `0`).getSimpleValueType();
16799	const MVT M1VT = getLMUL1VT(VT: VecVT);
16800	if (M1VT.bitsLT(VT: VecVT)) {
16801	Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: M1VT, N1: Vec,
16802	N2: DAG.getVectorIdxConstant(Val: `0`, DL));
16803	return DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: N->getSimpleValueType(ResNo: `0`), Operand: Vec);
16804	}
16805	break;
16806	}
16807	case ISD::INTRINSIC_VOID:
16808	case ISD::INTRINSIC_W_CHAIN:
16809	case ISD::INTRINSIC_WO_CHAIN: {
16810	unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? `0` : `1`;
16811	unsigned IntNo = N->getConstantOperandVal(Num: IntOpNo);
16812	switch (IntNo) {
16813	// By default we do not combine any intrinsic.
16814	default:
16815	return SDValue ();
16816	case Intrinsic::riscv_masked_strided_load: {
16817	MVT VT = N->getSimpleValueType(ResNo: `0`);
16818	auto *Load = cast<MemIntrinsicSDNode>(Val: N);
16819	SDValue PassThru = N->getOperand(Num: `2`);
16820	SDValue Base = N->getOperand(Num: `3`);
16821	SDValue Stride = N->getOperand(Num: `4`);
16822	SDValue Mask = N->getOperand(Num: `5`);
16823
16824	// If the stride is equal to the element size in bytes, we can use
16825	// a masked.load.
16826	const unsigned ElementSize = VT.getScalarStoreSize();
16827	if (auto *StrideC = dyn_cast<ConstantSDNode>(Val&: Stride);
16828	StrideC && StrideC->getZExtValue() == ElementSize)
16829	return DAG.getMaskedLoad(VT, dl: DL, Chain: Load->getChain(), Base,
16830	Offset: DAG.getUNDEF(VT: XLenVT), Mask, Src0: PassThru,
16831	MemVT: Load->getMemoryVT(), MMO: Load->getMemOperand(),
16832	AM: ISD::UNINDEXED, ISD::NON_EXTLOAD);
16833	return SDValue ();
16834	}
16835	case Intrinsic::riscv_masked_strided_store: {
16836	auto *Store = cast<MemIntrinsicSDNode>(Val: N);
16837	SDValue Value = N->getOperand(Num: `2`);
16838	SDValue Base = N->getOperand(Num: `3`);
16839	SDValue Stride = N->getOperand(Num: `4`);
16840	SDValue Mask = N->getOperand(Num: `5`);
16841
16842	// If the stride is equal to the element size in bytes, we can use
16843	// a masked.store.
16844	const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
16845	if (auto *StrideC = dyn_cast<ConstantSDNode>(Val&: Stride);
16846	StrideC && StrideC->getZExtValue() == ElementSize)
16847	return DAG.getMaskedStore(Chain: Store->getChain(), dl: DL, Val: Value, Base,
16848	Offset: DAG.getUNDEF(VT: XLenVT), Mask,
16849	MemVT: Value.getValueType(), MMO: Store->getMemOperand(),
16850	AM: ISD::UNINDEXED, IsTruncating: false);
16851	return SDValue ();
16852	}
16853	case Intrinsic::riscv_vcpop:
16854	case Intrinsic::riscv_vcpop_mask:
16855	case Intrinsic::riscv_vfirst:
16856	case Intrinsic::riscv_vfirst_mask: {
16857	SDValue VL = N->getOperand(Num: `2`);
16858	if (IntNo == Intrinsic::riscv_vcpop_mask \|\|
16859	IntNo == Intrinsic::riscv_vfirst_mask)
16860	VL = N->getOperand(Num: `3`);
16861	if (!isNullConstant(V: VL))
16862	return SDValue ();
16863	// If VL is 0, vcpop -> li 0, vfirst -> li -1.
16864	SDLoc DL(N);
16865	EVT VT = N->getValueType(ResNo: `0`);
16866	if (IntNo == Intrinsic::riscv_vfirst \|\|
16867	IntNo == Intrinsic::riscv_vfirst_mask)
16868	return DAG.getConstant(Val: -`1`, DL, VT);
16869	return DAG.getConstant(Val: `0`, DL, VT);
16870	}
16871	}
16872	}
16873	case ISD::BITCAST: {
16874	assert(Subtarget.useRVVForFixedLengthVectors());
16875	SDValue N0 = N->getOperand(Num: `0`);
16876	EVT VT = N->getValueType(ResNo: `0`);
16877	EVT SrcVT = N0.getValueType();
16878	// If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
16879	// type, widen both sides to avoid a trip through memory.
16880	if ((SrcVT == MVT::v1i1 \|\| SrcVT == MVT::v2i1 \|\| SrcVT == MVT::v4i1) &&
16881	VT.isScalarInteger()) {
16882	unsigned NumConcats = `8` / SrcVT.getVectorNumElements();
16883	SmallVector<SDValue, `4`> Ops(NumConcats, DAG.getUNDEF(VT: SrcVT));
16884	Ops [`0`] = N0;
16885	SDLoc DL(N);
16886	N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
16887	N0 = DAG.getBitcast(MVT::i8, N0);
16888	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: N0);
16889	}
16890
16891	return SDValue ();
16892	}
16893	}
16894
16895	return SDValue ();
16896	}
16897
16898	bool RISCVTargetLowering::shouldTransformSignedTruncationCheck(
16899	EVT XVT, unsigned KeptBits) const {
16900	// For vectors, we don't have a preference..
16901	if (XVT.isVector())
16902	return false;
16903
16904	if (XVT != MVT::i32 && XVT != MVT::i64)
16905	return false;
16906
16907	// We can use sext.w for RV64 or an srai 31 on RV32.
16908	if (KeptBits == `32` \|\| KeptBits == `64`)
16909	return true;
16910
16911	// With Zbb we can use sext.h/sext.b.
16912	return Subtarget.hasStdExtZbb() &&
16913	((KeptBits == `8` && XVT == MVT::i64 && !Subtarget.is64Bit()) \|\|
16914	KeptBits == `16`);
16915	}
16916
16917	bool RISCVTargetLowering::isDesirableToCommuteWithShift(
16918	const SDNode N, CombineLevel Level) const* {
16919	assert((N->getOpcode() == ISD::SHL \|\| N->getOpcode() == ISD::SRA \|\|
16920	N->getOpcode() == ISD::SRL) &&
16921	"Expected shift op");
16922
16923	// The following folds are only desirable if `(OP _, c1 << c2)` can be
16924	// materialised in fewer instructions than `(OP _, c1)`:
16925	//
16926	// (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
16927	// (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
16928	SDValue N0 = N->getOperand(Num: `0`);
16929	EVT Ty = N0.getValueType();
16930	if (Ty.isScalarInteger() &&
16931	(N0.getOpcode() == ISD::ADD \|\| N0.getOpcode() == ISD::OR)) {
16932	auto *C1 = dyn_cast<ConstantSDNode>(Val: N0 ->getOperand(Num: `1`));
16933	auto *C2 = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
16934	if (C1 && C2) {
16935	const APInt &C1Int = C1->getAPIntValue();
16936	APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
16937
16938	// We can materialise `c1 << c2` into an add immediate, so it's "free",
16939	// and the combine should happen, to potentially allow further combines
16940	// later.
16941	if (ShiftedC1Int.getSignificantBits() <= `64` &&
16942	isLegalAddImmediate(Imm: ShiftedC1Int.getSExtValue()))
16943	return true;
16944
16945	// We can materialise `c1` in an add immediate, so it's "free", and the
16946	// combine should be prevented.
16947	if (C1Int.getSignificantBits() <= `64` &&
16948	isLegalAddImmediate(Imm: C1Int.getSExtValue()))
16949	return false;
16950
16951	// Neither constant will fit into an immediate, so find materialisation
16952	// costs.
16953	int C1Cost =
16954	RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
16955	/CompressionCost/ true);
16956	int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
16957	ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
16958	/CompressionCost/ true);
16959
16960	// Materialising `c1` is cheaper than materialising `c1 << c2`, so the
16961	// combine should be prevented.
16962	if (C1Cost < ShiftedC1Cost)
16963	return false;
16964	}
16965	}
16966	return true;
16967	}
16968
16969	bool RISCVTargetLowering::targetShrinkDemandedConstant(
16970	SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
16971	TargetLoweringOpt &TLO) const {
16972	// Delay this optimization as late as possible.
16973	if (!TLO.LegalOps)
16974	return false;
16975
16976	EVT VT = Op.getValueType();
16977	if (VT.isVector())
16978	return false;
16979
16980	unsigned Opcode = Op.getOpcode();
16981	if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
16982	return false;
16983
16984	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `1`));
16985	if (!C)
16986	return false;
16987
16988	const APInt &Mask = C->getAPIntValue();
16989
16990	// Clear all non-demanded bits initially.
16991	APInt ShrunkMask = Mask & DemandedBits;
16992
16993	// Try to make a smaller immediate by setting undemanded bits.
16994
16995	APInt ExpandedMask = Mask \| ~DemandedBits;
16996
16997	auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
16998	return ShrunkMask.isSubsetOf(RHS: Mask) && Mask.isSubsetOf(RHS: ExpandedMask);
16999	};
17000	auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
17001	if (NewMask == Mask)
17002	return true;
17003	SDLoc DL(Op);
17004	SDValue NewC = TLO.DAG.getConstant(Val: NewMask, DL, VT: Op.getValueType());
17005	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(),
17006	N1: Op.getOperand(i: `0`), N2: NewC);
17007	return TLO.CombineTo(O: Op, N: NewOp);
17008	};
17009
17010	// If the shrunk mask fits in sign extended 12 bits, let the target
17011	// independent code apply it.
17012	if (ShrunkMask.isSignedIntN(N: `12`))
17013	return false;
17014
17015	// And has a few special cases for zext.
17016	if (Opcode == ISD::AND) {
17017	// Preserve (and X, 0xffff), if zext.h exists use zext.h,
17018	// otherwise use SLLI + SRLI.
17019	APInt NewMask = APInt (Mask.getBitWidth(), `0xffff`);
17020	if (IsLegalMask (NewMask))
17021	return UseMask (NewMask);
17022
17023	// Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
17024	if (VT == MVT::i64) {
17025	APInt NewMask = APInt (`64`, `0xffffffff`);
17026	if (IsLegalMask (NewMask))
17027	return UseMask (NewMask);
17028	}
17029	}
17030
17031	// For the remaining optimizations, we need to be able to make a negative
17032	// number through a combination of mask and undemanded bits.
17033	if (!ExpandedMask.isNegative())
17034	return false;
17035
17036	// What is the fewest number of bits we need to represent the negative number.
17037	unsigned MinSignedBits = ExpandedMask.getSignificantBits();
17038
17039	// Try to make a 12 bit negative immediate. If that fails try to make a 32
17040	// bit negative immediate unless the shrunk immediate already fits in 32 bits.
17041	// If we can't create a simm12, we shouldn't change opaque constants.
17042	APInt NewMask = ShrunkMask;
17043	if (MinSignedBits <= `12`)
17044	NewMask.setBitsFrom(`11`);
17045	else if (!C->isOpaque() && MinSignedBits <= `32` && !ShrunkMask.isSignedIntN(N: `32`))
17046	NewMask.setBitsFrom(`31`);
17047	else
17048	return false;
17049
17050	// Check that our new mask is a subset of the demanded mask.
17051	assert(IsLegalMask(NewMask));
17052	return UseMask (NewMask);
17053	}
17054
17055	static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
17056	static const uint64_t GREVMasks[] = {
17057	`0x5555555555555555ULL`, `0x3333333333333333ULL`, `0x0F0F0F0F0F0F0F0FULL`,
17058	`0x00FF00FF00FF00FFULL`, `0x0000FFFF0000FFFFULL`, `0x00000000FFFFFFFFULL`};
17059
17060	for (unsigned Stage = `0`; Stage != `6`; ++Stage) {
17061	unsigned Shift = `1` << Stage;
17062	if (ShAmt & Shift) {
17063	uint64_t Mask = GREVMasks[Stage];
17064	uint64_t Res = ((x & Mask) << Shift) \| ((x >> Shift) & Mask);
17065	if (IsGORC)
17066	Res \|= x;
17067	x = Res;
17068	}
17069	}
17070
17071	return x;
17072	}
17073
17074	void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
17075	KnownBits &Known,
17076	const APInt &DemandedElts,
17077	const SelectionDAG &DAG,
17078	unsigned Depth) const {
17079	unsigned BitWidth = Known.getBitWidth();
17080	unsigned Opc = Op.getOpcode();
17081	assert((Opc >= ISD::BUILTIN_OP_END \|\|
17082	Opc == ISD::INTRINSIC_WO_CHAIN \|\|
17083	Opc == ISD::INTRINSIC_W_CHAIN \|\|
17084	Opc == ISD::INTRINSIC_VOID) &&
17085	"Should use MaskedValueIsZero if you don't know whether Op"
17086	" is a target node!");
17087
17088	Known.resetAll();
17089	switch (Opc) {
17090	default: break;
17091	case RISCVISD::SELECT_CC: {
17092	Known = DAG.computeKnownBits(Op: Op.getOperand(i: `4`), Depth: Depth + `1`);
17093	// If we don't know any bits, early out.
17094	if (Known.isUnknown())
17095	break;
17096	KnownBits Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: `3`), Depth: Depth + `1`);
17097
17098	// Only known if known in both the LHS and RHS.
17099	Known = Known.intersectWith(RHS: Known2);
17100	break;
17101	}
17102	case RISCVISD::CZERO_EQZ:
17103	case RISCVISD::CZERO_NEZ:
17104	Known = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), Depth: Depth + `1`);
17105	// Result is either all zero or operand 0. We can propagate zeros, but not
17106	// ones.
17107	Known.One.clearAllBits();
17108	break;
17109	case RISCVISD::REMUW: {
17110	KnownBits Known2;
17111	Known = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
17112	Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: `1`), DemandedElts, Depth: Depth + `1`);
17113	// We only care about the lower 32 bits.
17114	Known = KnownBits::urem(LHS: Known.trunc(BitWidth: `32`), RHS: Known2.trunc(BitWidth: `32`));
17115	// Restore the original width by sign extending.
17116	Known = Known.sext(BitWidth);
17117	break;
17118	}
17119	case RISCVISD::DIVUW: {
17120	KnownBits Known2;
17121	Known = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
17122	Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: `1`), DemandedElts, Depth: Depth + `1`);
17123	// We only care about the lower 32 bits.
17124	Known = KnownBits::udiv(LHS: Known.trunc(BitWidth: `32`), RHS: Known2.trunc(BitWidth: `32`));
17125	// Restore the original width by sign extending.
17126	Known = Known.sext(BitWidth);
17127	break;
17128	}
17129	case RISCVISD::SLLW: {
17130	KnownBits Known2;
17131	Known = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
17132	Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: `1`), DemandedElts, Depth: Depth + `1`);
17133	Known = KnownBits::shl(LHS: Known.trunc(BitWidth: `32`), RHS: Known2.trunc(BitWidth: `5`).zext(BitWidth: `32`));
17134	// Restore the original width by sign extending.
17135	Known = Known.sext(BitWidth);
17136	break;
17137	}
17138	case RISCVISD::CTZW: {
17139	KnownBits Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), Depth: Depth + `1`);
17140	unsigned PossibleTZ = Known2.trunc(BitWidth: `32`).countMaxTrailingZeros();
17141	unsigned LowBits = llvm::bit_width(Value: PossibleTZ);
17142	Known.Zero.setBitsFrom(LowBits);
17143	break;
17144	}
17145	case RISCVISD::CLZW: {
17146	KnownBits Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), Depth: Depth + `1`);
17147	unsigned PossibleLZ = Known2.trunc(BitWidth: `32`).countMaxLeadingZeros();
17148	unsigned LowBits = llvm::bit_width(Value: PossibleLZ);
17149	Known.Zero.setBitsFrom(LowBits);
17150	break;
17151	}
17152	case RISCVISD::BREV8:
17153	case RISCVISD::ORC_B: {
17154	// FIXME: This is based on the non-ratified Zbp GREV and GORC where a
17155	// control value of 7 is equivalent to brev8 and orc.b.
17156	Known = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), Depth: Depth + `1`);
17157	bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
17158	// To compute zeros, we need to invert the value and invert it back after.
17159	Known.Zero =
17160	~computeGREVOrGORC(x: ~Known.Zero.getZExtValue(), ShAmt: `7`, IsGORC);
17161	Known.One = computeGREVOrGORC(x: Known.One.getZExtValue(), ShAmt: `7`, IsGORC);
17162	break;
17163	}
17164	case RISCVISD::READ_VLENB: {
17165	// We can use the minimum and maximum VLEN values to bound VLENB. We
17166	// know VLEN must be a power of two.
17167	const unsigned MinVLenB = Subtarget.getRealMinVLen() / `8`;
17168	const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / `8`;
17169	assert(MinVLenB > `0` && "READ_VLENB without vector extension enabled?");
17170	Known.Zero.setLowBits(Log2_32(Value: MinVLenB));
17171	Known.Zero.setBitsFrom(Log2_32(Value: MaxVLenB)+`1`);
17172	if (MaxVLenB == MinVLenB)
17173	Known.One.setBit(Log2_32(Value: MinVLenB));
17174	break;
17175	}
17176	case RISCVISD::FCLASS: {
17177	// fclass will only set one of the low 10 bits.
17178	Known.Zero.setBitsFrom(`10`);
17179	break;
17180	}
17181	case ISD::INTRINSIC_W_CHAIN:
17182	case ISD::INTRINSIC_WO_CHAIN: {
17183	unsigned IntNo =
17184	Op.getConstantOperandVal(i: Opc == ISD::INTRINSIC_WO_CHAIN ? `0` : `1`);
17185	switch (IntNo) {
17186	default:
17187	// We can't do anything for most intrinsics.
17188	break;
17189	case Intrinsic::riscv_vsetvli:
17190	case Intrinsic::riscv_vsetvlimax: {
17191	bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
17192	unsigned VSEW = Op.getConstantOperandVal(i: HasAVL + `1`);
17193	RISCVII::VLMUL VLMUL =
17194	static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(i: HasAVL + `2`));
17195	unsigned SEW = RISCVVType::decodeVSEW(VSEW);
17196	auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
17197	uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
17198	MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
17199
17200	// Result of vsetvli must be not larger than AVL.
17201	if (HasAVL && isa<ConstantSDNode>(Val: Op.getOperand(i: `1`)))
17202	MaxVL = std::min(a: MaxVL, b: Op.getConstantOperandVal(i: `1`));
17203
17204	unsigned KnownZeroFirstBit = Log2_32(Value: MaxVL) + `1`;
17205	if (BitWidth > KnownZeroFirstBit)
17206	Known.Zero.setBitsFrom(KnownZeroFirstBit);
17207	break;
17208	}
17209	}
17210	break;
17211	}
17212	}
17213	}
17214
17215	unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
17216	SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17217	unsigned Depth) const {
17218	switch (Op.getOpcode()) {
17219	default:
17220	break;
17221	case RISCVISD::SELECT_CC: {
17222	unsigned Tmp =
17223	DAG.ComputeNumSignBits(Op: Op.getOperand(i: `3`), DemandedElts, Depth: Depth + `1`);
17224	if (Tmp == `1`) return `1`; // Early out.
17225	unsigned Tmp2 =
17226	DAG.ComputeNumSignBits(Op: Op.getOperand(i: `4`), DemandedElts, Depth: Depth + `1`);
17227	return std::min(a: Tmp, b: Tmp2);
17228	}
17229	case RISCVISD::CZERO_EQZ:
17230	case RISCVISD::CZERO_NEZ:
17231	// Output is either all zero or operand 0. We can propagate sign bit count
17232	// from operand 0.
17233	return DAG.ComputeNumSignBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
17234	case RISCVISD::ABSW: {
17235	// We expand this at isel to negw+max. The result will have 33 sign bits
17236	// if the input has at least 33 sign bits.
17237	unsigned Tmp =
17238	DAG.ComputeNumSignBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
17239	if (Tmp < `33`) return `1`;
17240	return `33`;
17241	}
17242	case RISCVISD::SLLW:
17243	case RISCVISD::SRAW:
17244	case RISCVISD::SRLW:
17245	case RISCVISD::DIVW:
17246	case RISCVISD::DIVUW:
17247	case RISCVISD::REMUW:
17248	case RISCVISD::ROLW:
17249	case RISCVISD::RORW:
17250	case RISCVISD::FCVT_W_RV64:
17251	case RISCVISD::FCVT_WU_RV64:
17252	case RISCVISD::STRICT_FCVT_W_RV64:
17253	case RISCVISD::STRICT_FCVT_WU_RV64:
17254	// TODO: As the result is sign-extended, this is conservatively correct. A
17255	// more precise answer could be calculated for SRAW depending on known
17256	// bits in the shift amount.
17257	return `33`;
17258	case RISCVISD::VMV_X_S: {
17259	// The number of sign bits of the scalar result is computed by obtaining the
17260	// element type of the input vector operand, subtracting its width from the
17261	// XLEN, and then adding one (sign bit within the element type). If the
17262	// element type is wider than XLen, the least-significant XLEN bits are
17263	// taken.
17264	unsigned XLen = Subtarget.getXLen();
17265	unsigned EltBits = Op.getOperand(i: `0`).getScalarValueSizeInBits();
17266	if (EltBits <= XLen)
17267	return XLen - EltBits + `1`;
17268	break;
17269	}
17270	case ISD::INTRINSIC_W_CHAIN: {
17271	unsigned IntNo = Op.getConstantOperandVal(i: `1`);
17272	switch (IntNo) {
17273	default:
17274	break;
17275	case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
17276	case Intrinsic::riscv_masked_atomicrmw_add_i64:
17277	case Intrinsic::riscv_masked_atomicrmw_sub_i64:
17278	case Intrinsic::riscv_masked_atomicrmw_nand_i64:
17279	case Intrinsic::riscv_masked_atomicrmw_max_i64:
17280	case Intrinsic::riscv_masked_atomicrmw_min_i64:
17281	case Intrinsic::riscv_masked_atomicrmw_umax_i64:
17282	case Intrinsic::riscv_masked_atomicrmw_umin_i64:
17283	case Intrinsic::riscv_masked_cmpxchg_i64:
17284	// riscv_masked_{atomicrmw_,cmpxchg} intrinsics represent an emulated*
17285	// narrow atomic operation. These are implemented using atomic
17286	// operations at the minimum supported atomicrmw/cmpxchg width whose
17287	// result is then sign extended to XLEN. With +A, the minimum width is
17288	// 32 for both 64 and 32.
17289	assert(Subtarget.getXLen() == `64`);
17290	assert(getMinCmpXchgSizeInBits() == `32`);
17291	assert(Subtarget.hasStdExtA());
17292	return `33`;
17293	}
17294	break;
17295	}
17296	}
17297
17298	return `1`;
17299	}
17300
17301	bool RISCVTargetLowering::canCreateUndefOrPoisonForTargetNode(
17302	SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17303	bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
17304
17305	// TODO: Add more target nodes.
17306	switch (Op.getOpcode()) {
17307	case RISCVISD::SELECT_CC:
17308	// Integer select_cc cannot create poison.
17309	// TODO: What are the FP poison semantics?
17310	// TODO: This instruction blocks poison from the unselected operand, can
17311	// we do anything with that?
17312	return !Op.getValueType().isInteger();
17313	}
17314	return TargetLowering::canCreateUndefOrPoisonForTargetNode(
17315	Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
17316	}
17317
17318	const Constant *
17319	RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode Ld) const* {
17320	assert(Ld && "Unexpected null LoadSDNode");
17321	if (!ISD::isNormalLoad(N: Ld))
17322	return nullptr;
17323
17324	SDValue Ptr = Ld->getBasePtr();
17325
17326	// Only constant pools with no offset are supported.
17327	auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
17328	auto *CNode = dyn_cast<ConstantPoolSDNode>(Val&: Ptr);
17329	if (!CNode \|\| CNode->isMachineConstantPoolEntry() \|\|
17330	CNode->getOffset() != `0`)
17331	return nullptr;
17332
17333	return CNode;
17334	};
17335
17336	// Simple case, LLA.
17337	if (Ptr.getOpcode() == RISCVISD::LLA) {
17338	auto *CNode = GetSupportedConstantPool (Ptr);
17339	if (!CNode \|\| CNode->getTargetFlags() != `0`)
17340	return nullptr;
17341
17342	return CNode->getConstVal();
17343	}
17344
17345	// Look for a HI and ADD_LO pair.
17346	if (Ptr.getOpcode() != RISCVISD::ADD_LO \|\|
17347	Ptr.getOperand(i: `0`).getOpcode() != RISCVISD::HI)
17348	return nullptr;
17349
17350	auto *CNodeLo = GetSupportedConstantPool (Ptr.getOperand(i: `1`));
17351	auto *CNodeHi = GetSupportedConstantPool (Ptr.getOperand(i: `0`).getOperand(i: `0`));
17352
17353	if (!CNodeLo \|\| CNodeLo->getTargetFlags() != RISCVII::MO_LO \|\|
17354	!CNodeHi \|\| CNodeHi->getTargetFlags() != RISCVII::MO_HI)
17355	return nullptr;
17356
17357	if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
17358	return nullptr;
17359
17360	return CNodeLo->getConstVal();
17361	}
17362
17363	static MachineBasicBlock *emitReadCounterWidePseudo(MachineInstr &MI,
17364	MachineBasicBlock *BB) {
17365	assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
17366
17367	// To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
17368	// Should the count have wrapped while it was being read, we need to try
17369	// again.
17370	// For example:
17371	// ```
17372	// read:
17373	// csrrs x3, counterh # load high word of counter
17374	// csrrs x2, counter # load low word of counter
17375	// csrrs x4, counterh # load high word of counter
17376	// bne x3, x4, read # check if high word reads match, otherwise try again
17377	// ```
17378
17379	MachineFunction &MF = *BB->getParent();
17380	const BasicBlock *LLVMBB = BB->getBasicBlock();
17381	MachineFunction::iterator It = ++BB->getIterator();
17382
17383	MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(BB: LLVMBB);
17384	MF.insert(MBBI: It, MBB: LoopMBB);
17385
17386	MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(BB: LLVMBB);
17387	MF.insert(MBBI: It, MBB: DoneMBB);
17388
17389	// Transfer the remainder of BB and its successor edges to DoneMBB.
17390	DoneMBB->splice(Where: DoneMBB->begin(), Other: BB,
17391	From: std::next(x: MachineBasicBlock::iterator (MI)), To: BB->end());
17392	DoneMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
17393
17394	BB->addSuccessor(Succ: LoopMBB);
17395
17396	MachineRegisterInfo &RegInfo = MF.getRegInfo();
17397	Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
17398	Register LoReg = MI.getOperand(i: `0`).getReg();
17399	Register HiReg = MI.getOperand(i: `1`).getReg();
17400	int64_t LoCounter = MI.getOperand(i: `2`).getImm();
17401	int64_t HiCounter = MI.getOperand(i: `3`).getImm();
17402	DebugLoc DL = MI.getDebugLoc();
17403
17404	const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
17405	BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
17406	.addImm(HiCounter)
17407	.addReg(RISCV::X0);
17408	BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
17409	.addImm(LoCounter)
17410	.addReg(RISCV::X0);
17411	BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
17412	.addImm(HiCounter)
17413	.addReg(RISCV::X0);
17414
17415	BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
17416	.addReg(HiReg)
17417	.addReg(ReadAgainReg)
17418	.addMBB(LoopMBB);
17419
17420	LoopMBB->addSuccessor(Succ: LoopMBB);
17421	LoopMBB->addSuccessor(Succ: DoneMBB);
17422
17423	MI.eraseFromParent();
17424
17425	return DoneMBB;
17426	}
17427
17428	static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
17429	MachineBasicBlock *BB,
17430	const RISCVSubtarget &Subtarget) {
17431	assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
17432
17433	MachineFunction &MF = *BB->getParent();
17434	DebugLoc DL = MI.getDebugLoc();
17435	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
17436	const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
17437	Register LoReg = MI.getOperand(i: `0`).getReg();
17438	Register HiReg = MI.getOperand(i: `1`).getReg();
17439	Register SrcReg = MI.getOperand(i: `2`).getReg();
17440
17441	const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
17442	int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17443
17444	TII.storeRegToStackSlot(MBB&: *BB, MI, SrcReg, isKill: MI.getOperand(i: `2`).isKill(), FrameIndex: FI, RC: SrcRC,
17445	TRI: RI, VReg: Register ());
17446	MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
17447	MachineMemOperand *MMOLo =
17448	MF.getMachineMemOperand(PtrInfo: MPI, F: MachineMemOperand::MOLoad, Size: `4`, BaseAlignment: Align (`8`));
17449	MachineMemOperand *MMOHi = MF.getMachineMemOperand(
17450	PtrInfo: MPI.getWithOffset(O: `4`), F: MachineMemOperand::MOLoad, Size: `4`, BaseAlignment: Align (`8`));
17451	BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
17452	.addFrameIndex(FI)
17453	.addImm(`0`)
17454	.addMemOperand(MMOLo);
17455	BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
17456	.addFrameIndex(FI)
17457	.addImm(`4`)
17458	.addMemOperand(MMOHi);
17459	MI.eraseFromParent(); // The pseudo instruction is gone now.
17460	return BB;
17461	}
17462
17463	static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
17464	MachineBasicBlock *BB,
17465	const RISCVSubtarget &Subtarget) {
17466	assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
17467	"Unexpected instruction");
17468
17469	MachineFunction &MF = *BB->getParent();
17470	DebugLoc DL = MI.getDebugLoc();
17471	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
17472	const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
17473	Register DstReg = MI.getOperand(i: `0`).getReg();
17474	Register LoReg = MI.getOperand(i: `1`).getReg();
17475	Register HiReg = MI.getOperand(i: `2`).getReg();
17476
17477	const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
17478	int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17479
17480	MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
17481	MachineMemOperand *MMOLo =
17482	MF.getMachineMemOperand(PtrInfo: MPI, F: MachineMemOperand::MOStore, Size: `4`, BaseAlignment: Align (`8`));
17483	MachineMemOperand *MMOHi = MF.getMachineMemOperand(
17484	PtrInfo: MPI.getWithOffset(O: `4`), F: MachineMemOperand::MOStore, Size: `4`, BaseAlignment: Align (`8`));
17485	BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17486	.addReg(LoReg, getKillRegState(MI.getOperand(`1`).isKill()))
17487	.addFrameIndex(FI)
17488	.addImm(`0`)
17489	.addMemOperand(MMOLo);
17490	BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17491	.addReg(HiReg, getKillRegState(MI.getOperand(`2`).isKill()))
17492	.addFrameIndex(FI)
17493	.addImm(`4`)
17494	.addMemOperand(MMOHi);
17495	TII.loadRegFromStackSlot(MBB&: *BB, MI, DestReg: DstReg, FrameIndex: FI, RC: DstRC, TRI: RI, VReg: Register ());
17496	MI.eraseFromParent(); // The pseudo instruction is gone now.
17497	return BB;
17498	}
17499
17500	static bool isSelectPseudo(MachineInstr &MI) {
17501	switch (MI.getOpcode()) {
17502	default:
17503	return false;
17504	case RISCV::Select_GPR_Using_CC_GPR:
17505	case RISCV::Select_FPR16_Using_CC_GPR:
17506	case RISCV::Select_FPR16INX_Using_CC_GPR:
17507	case RISCV::Select_FPR32_Using_CC_GPR:
17508	case RISCV::Select_FPR32INX_Using_CC_GPR:
17509	case RISCV::Select_FPR64_Using_CC_GPR:
17510	case RISCV::Select_FPR64INX_Using_CC_GPR:
17511	case RISCV::Select_FPR64IN32X_Using_CC_GPR:
17512	return true;
17513	}
17514	}
17515
17516	static MachineBasicBlock emitQuietFCMP(MachineInstr &MI, MachineBasicBlock BB,
17517	unsigned RelOpcode, unsigned EqOpcode,
17518	const RISCVSubtarget &Subtarget) {
17519	DebugLoc DL = MI.getDebugLoc();
17520	Register DstReg = MI.getOperand(i: `0`).getReg();
17521	Register Src1Reg = MI.getOperand(i: `1`).getReg();
17522	Register Src2Reg = MI.getOperand(i: `2`).getReg();
17523	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
17524	Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17525	const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
17526
17527	// Save the current FFLAGS.
17528	BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
17529
17530	auto MIB = BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: RelOpcode), DestReg: DstReg)
17531	.addReg(RegNo: Src1Reg)
17532	.addReg(RegNo: Src2Reg);
17533	if (MI.getFlag(Flag: MachineInstr::MIFlag::NoFPExcept))
17534	MIB ->setFlag(MachineInstr::MIFlag::NoFPExcept);
17535
17536	// Restore the FFLAGS.
17537	BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
17538	.addReg(SavedFFlags, RegState::Kill);
17539
17540	// Issue a dummy FEQ opcode to raise exception for signaling NaNs.
17541	auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
17542	.addReg(Src1Reg, getKillRegState(MI.getOperand(`1`).isKill()))
17543	.addReg(Src2Reg, getKillRegState(MI.getOperand(`2`).isKill()));
17544	if (MI.getFlag(Flag: MachineInstr::MIFlag::NoFPExcept))
17545	MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept);
17546
17547	// Erase the pseudoinstruction.
17548	MI.eraseFromParent();
17549	return BB;
17550	}
17551
17552	static MachineBasicBlock *
17553	EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second,
17554	MachineBasicBlock *ThisMBB,
17555	const RISCVSubtarget &Subtarget) {
17556	// Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
17557	// Without this, custom-inserter would have generated:
17558	//
17559	// A
17560	// \| \
17561	// \| B
17562	// \| /
17563	// C
17564	// \| \
17565	// \| D
17566	// \| /
17567	// E
17568	//
17569	// A: X = ...; Y = ...
17570	// B: empty
17571	// C: Z = PHI [X, A], [Y, B]
17572	// D: empty
17573	// E: PHI [X, C], [Z, D]
17574	//
17575	// If we lower both Select_FPRX_ in a single step, we can instead generate:
17576	//
17577	// A
17578	// \| \
17579	// \| C
17580	// \| /\|
17581	// \|/ \|
17582	// \| \|
17583	// \| D
17584	// \| /
17585	// E
17586	//
17587	// A: X = ...; Y = ...
17588	// D: empty
17589	// E: PHI [X, A], [X, C], [Y, D]
17590
17591	const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17592	const DebugLoc &DL = First.getDebugLoc();
17593	const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
17594	MachineFunction *F = ThisMBB->getParent();
17595	MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
17596	MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
17597	MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
17598	MachineFunction::iterator It = ++ThisMBB->getIterator();
17599	F->insert(MBBI: It, MBB: FirstMBB);
17600	F->insert(MBBI: It, MBB: SecondMBB);
17601	F->insert(MBBI: It, MBB: SinkMBB);
17602
17603	// Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
17604	SinkMBB->splice(Where: SinkMBB->begin(), Other: ThisMBB,
17605	From: std::next(x: MachineBasicBlock::iterator (First)),
17606	To: ThisMBB->end());
17607	SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: ThisMBB);
17608
17609	// Fallthrough block for ThisMBB.
17610	ThisMBB->addSuccessor(Succ: FirstMBB);
17611	// Fallthrough block for FirstMBB.
17612	FirstMBB->addSuccessor(Succ: SecondMBB);
17613	ThisMBB->addSuccessor(Succ: SinkMBB);
17614	FirstMBB->addSuccessor(Succ: SinkMBB);
17615	// This is fallthrough.
17616	SecondMBB->addSuccessor(Succ: SinkMBB);
17617
17618	auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(i: `3`).getImm());
17619	Register FLHS = First.getOperand(i: `1`).getReg();
17620	Register FRHS = First.getOperand(i: `2`).getReg();
17621	// Insert appropriate branch.
17622	BuildMI(BB: FirstMBB, MIMD: DL, MCID: TII.getBrCond(CC: FirstCC))
17623	.addReg(RegNo: FLHS)
17624	.addReg(RegNo: FRHS)
17625	.addMBB(MBB: SinkMBB);
17626
17627	Register SLHS = Second.getOperand(i: `1`).getReg();
17628	Register SRHS = Second.getOperand(i: `2`).getReg();
17629	Register Op1Reg4 = First.getOperand(i: `4`).getReg();
17630	Register Op1Reg5 = First.getOperand(i: `5`).getReg();
17631
17632	auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(i: `3`).getImm());
17633	// Insert appropriate branch.
17634	BuildMI(BB: ThisMBB, MIMD: DL, MCID: TII.getBrCond(CC: SecondCC))
17635	.addReg(RegNo: SLHS)
17636	.addReg(RegNo: SRHS)
17637	.addMBB(MBB: SinkMBB);
17638
17639	Register DestReg = Second.getOperand(i: `0`).getReg();
17640	Register Op2Reg4 = Second.getOperand(i: `4`).getReg();
17641	BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
17642	.addReg(Op2Reg4)
17643	.addMBB(ThisMBB)
17644	.addReg(Op1Reg4)
17645	.addMBB(FirstMBB)
17646	.addReg(Op1Reg5)
17647	.addMBB(SecondMBB);
17648
17649	// Now remove the Select_FPRX_s.
17650	First.eraseFromParent();
17651	Second.eraseFromParent();
17652	return SinkMBB;
17653	}
17654
17655	static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
17656	MachineBasicBlock *BB,
17657	const RISCVSubtarget &Subtarget) {
17658	// To "insert" Select_ instructions, we actually have to insert the triangle*
17659	// control-flow pattern. The incoming instructions know the destination vreg
17660	// to set, the condition code register to branch on, the true/false values to
17661	// select between, and the condcode to use to select the appropriate branch.
17662	//
17663	// We produce the following control flow:
17664	// HeadMBB
17665	// \| \
17666	// \| IfFalseMBB
17667	// \| /
17668	// TailMBB
17669	//
17670	// When we find a sequence of selects we attempt to optimize their emission
17671	// by sharing the control flow. Currently we only handle cases where we have
17672	// multiple selects with the exact same condition (same LHS, RHS and CC).
17673	// The selects may be interleaved with other instructions if the other
17674	// instructions meet some requirements we deem safe:
17675	// - They are not pseudo instructions.
17676	// - They are debug instructions. Otherwise,
17677	// - They do not have side-effects, do not access memory and their inputs do
17678	// not depend on the results of the select pseudo-instructions.
17679	// The TrueV/FalseV operands of the selects cannot depend on the result of
17680	// previous selects in the sequence.
17681	// These conditions could be further relaxed. See the X86 target for a
17682	// related approach and more information.
17683	//
17684	// Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
17685	// is checked here and handled by a separate function -
17686	// EmitLoweredCascadedSelect.
17687	Register LHS = MI.getOperand(i: `1`).getReg();
17688	Register RHS = MI.getOperand(i: `2`).getReg();
17689	auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(i: `3`).getImm());
17690
17691	SmallVector<MachineInstr *, `4`> SelectDebugValues;
17692	SmallSet<Register, `4`> SelectDests;
17693	SelectDests.insert(V: MI.getOperand(i: `0`).getReg());
17694
17695	MachineInstr *LastSelectPseudo = &MI;
17696	auto Next = next_nodbg(It: MI.getIterator(), End: BB->instr_end());
17697	if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() &&
17698	Next->getOpcode() == MI.getOpcode() &&
17699	Next->getOperand(`5`).getReg() == MI.getOperand(`0`).getReg() &&
17700	Next->getOperand(`5`).isKill()) {
17701	return EmitLoweredCascadedSelect(First&: MI, Second&: *Next, ThisMBB: BB, Subtarget);
17702	}
17703
17704	for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator (MI);
17705	SequenceMBBI != E; ++SequenceMBBI) {
17706	if (SequenceMBBI ->isDebugInstr())
17707	continue;
17708	if (isSelectPseudo(MI&: *SequenceMBBI)) {
17709	if (SequenceMBBI ->getOperand(i: `1`).getReg() != LHS \|\|
17710	SequenceMBBI ->getOperand(i: `2`).getReg() != RHS \|\|
17711	SequenceMBBI ->getOperand(i: `3`).getImm() != CC \|\|
17712	SelectDests.count(V: SequenceMBBI ->getOperand(i: `4`).getReg()) \|\|
17713	SelectDests.count(V: SequenceMBBI ->getOperand(i: `5`).getReg()))
17714	break;
17715	LastSelectPseudo = &*SequenceMBBI;
17716	SequenceMBBI ->collectDebugValues(DbgValues&: SelectDebugValues);
17717	SelectDests.insert(V: SequenceMBBI ->getOperand(i: `0`).getReg());
17718	continue;
17719	}
17720	if (SequenceMBBI ->hasUnmodeledSideEffects() \|\|
17721	SequenceMBBI ->mayLoadOrStore() \|\|
17722	SequenceMBBI ->usesCustomInsertionHook())
17723	break;
17724	if (llvm::any_of(Range: SequenceMBBI ->operands(), P: [&](MachineOperand &MO) {
17725	return MO.isReg() && MO.isUse() && SelectDests.count(V: MO.getReg());
17726	}))
17727	break;
17728	}
17729
17730	const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17731	const BasicBlock *LLVM_BB = BB->getBasicBlock();
17732	DebugLoc DL = MI.getDebugLoc();
17733	MachineFunction::iterator I = ++BB->getIterator();
17734
17735	MachineBasicBlock *HeadMBB = BB;
17736	MachineFunction *F = BB->getParent();
17737	MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
17738	MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
17739
17740	F->insert(MBBI: I, MBB: IfFalseMBB);
17741	F->insert(MBBI: I, MBB: TailMBB);
17742
17743	// Transfer debug instructions associated with the selects to TailMBB.
17744	for (MachineInstr *DebugInstr : SelectDebugValues) {
17745	TailMBB->push_back(MI: DebugInstr->removeFromParent());
17746	}
17747
17748	// Move all instructions after the sequence to TailMBB.
17749	TailMBB->splice(Where: TailMBB->end(), Other: HeadMBB,
17750	From: std::next(x: LastSelectPseudo->getIterator()), To: HeadMBB->end());
17751	// Update machine-CFG edges by transferring all successors of the current
17752	// block to the new block which will contain the Phi nodes for the selects.
17753	TailMBB->transferSuccessorsAndUpdatePHIs(FromMBB: HeadMBB);
17754	// Set the successors for HeadMBB.
17755	HeadMBB->addSuccessor(Succ: IfFalseMBB);
17756	HeadMBB->addSuccessor(Succ: TailMBB);
17757
17758	// Insert appropriate branch.
17759	BuildMI(BB: HeadMBB, MIMD: DL, MCID: TII.getBrCond(CC))
17760	.addReg(RegNo: LHS)
17761	.addReg(RegNo: RHS)
17762	.addMBB(MBB: TailMBB);
17763
17764	// IfFalseMBB just falls through to TailMBB.
17765	IfFalseMBB->addSuccessor(Succ: TailMBB);
17766
17767	// Create PHIs for all of the select pseudo-instructions.
17768	auto SelectMBBI = MI.getIterator();
17769	auto SelectEnd = std::next(x: LastSelectPseudo->getIterator());
17770	auto InsertionPoint = TailMBB->begin();
17771	while (SelectMBBI != SelectEnd) {
17772	auto Next = std::next(x: SelectMBBI);
17773	if (isSelectPseudo(MI&: *SelectMBBI)) {
17774	// %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
17775	BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
17776	TII.get(RISCV::PHI), SelectMBBI->getOperand(`0`).getReg())
17777	.addReg(SelectMBBI->getOperand(`4`).getReg())
17778	.addMBB(HeadMBB)
17779	.addReg(SelectMBBI->getOperand(`5`).getReg())
17780	.addMBB(IfFalseMBB);
17781	SelectMBBI ->eraseFromParent();
17782	}
17783	SelectMBBI = Next;
17784	}
17785
17786	F->getProperties().reset(P: MachineFunctionProperties::Property::NoPHIs);
17787	return TailMBB;
17788	}
17789
17790	// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
17791	static const RISCV::RISCVMaskedPseudoInfo *
17792	lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
17793	const RISCVVInversePseudosTable::PseudoInfo *Inverse =
17794	RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
17795	assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
17796	const RISCV::RISCVMaskedPseudoInfo *Masked =
17797	RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
17798	assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
17799	return Masked;
17800	}
17801
17802	static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI,
17803	MachineBasicBlock *BB,
17804	unsigned CVTXOpc) {
17805	DebugLoc DL = MI.getDebugLoc();
17806
17807	const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
17808
17809	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
17810	Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17811
17812	// Save the old value of FFLAGS.
17813	BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
17814
17815	assert(MI.getNumOperands() == `7`);
17816
17817	// Emit a VFCVT_X_F
17818	const TargetRegisterInfo *TRI =
17819	BB->getParent()->getSubtarget().getRegisterInfo();
17820	const TargetRegisterClass *RC = MI.getRegClassConstraint(OpIdx: `0`, TII: &TII, TRI);
17821	Register Tmp = MRI.createVirtualRegister(RegClass: RC);
17822	BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
17823	.add(MI.getOperand(`1`))
17824	.add(MI.getOperand(`2`))
17825	.add(MI.getOperand(`3`))
17826	.add(MachineOperand::CreateImm(`7`)) // frm = DYN
17827	.add(MI.getOperand(`4`))
17828	.add(MI.getOperand(`5`))
17829	.add(MI.getOperand(`6`))
17830	.add(MachineOperand::CreateReg(RISCV::FRM,
17831	/IsDef/ false,
17832	/IsImp/ true));
17833
17834	// Emit a VFCVT_F_X
17835	RISCVII::VLMUL LMul = RISCVII::getLMul(TSFlags: MI.getDesc().TSFlags);
17836	unsigned Log2SEW = MI.getOperand(i: RISCVII::getSEWOpNum(Desc: MI.getDesc())).getImm();
17837	// There is no E8 variant for VFCVT_F_X.
17838	assert(Log2SEW >= `4`);
17839	unsigned CVTFOpc =
17840	lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, `1` << Log2SEW)
17841	->MaskedPseudo;
17842
17843	BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
17844	.add(MI.getOperand(`0`))
17845	.add(MI.getOperand(`1`))
17846	.addReg(Tmp)
17847	.add(MI.getOperand(`3`))
17848	.add(MachineOperand::CreateImm(`7`)) // frm = DYN
17849	.add(MI.getOperand(`4`))
17850	.add(MI.getOperand(`5`))
17851	.add(MI.getOperand(`6`))
17852	.add(MachineOperand::CreateReg(RISCV::FRM,
17853	/IsDef/ false,
17854	/IsImp/ true));
17855
17856	// Restore FFLAGS.
17857	BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
17858	.addReg(SavedFFLAGS, RegState::Kill);
17859
17860	// Erase the pseudoinstruction.
17861	MI.eraseFromParent();
17862	return BB;
17863	}
17864
17865	static MachineBasicBlock emitFROUND(MachineInstr &MI, MachineBasicBlock MBB,
17866	const RISCVSubtarget &Subtarget) {
17867	unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
17868	const TargetRegisterClass *RC;
17869	switch (MI.getOpcode()) {
17870	default:
17871	llvm_unreachable("Unexpected opcode");
17872	case RISCV::PseudoFROUND_H:
17873	CmpOpc = RISCV::FLT_H;
17874	F2IOpc = RISCV::FCVT_W_H;
17875	I2FOpc = RISCV::FCVT_H_W;
17876	FSGNJOpc = RISCV::FSGNJ_H;
17877	FSGNJXOpc = RISCV::FSGNJX_H;
17878	RC = &RISCV::FPR16RegClass;
17879	break;
17880	case RISCV::PseudoFROUND_H_INX:
17881	CmpOpc = RISCV::FLT_H_INX;
17882	F2IOpc = RISCV::FCVT_W_H_INX;
17883	I2FOpc = RISCV::FCVT_H_W_INX;
17884	FSGNJOpc = RISCV::FSGNJ_H_INX;
17885	FSGNJXOpc = RISCV::FSGNJX_H_INX;
17886	RC = &RISCV::GPRF16RegClass;
17887	break;
17888	case RISCV::PseudoFROUND_S:
17889	CmpOpc = RISCV::FLT_S;
17890	F2IOpc = RISCV::FCVT_W_S;
17891	I2FOpc = RISCV::FCVT_S_W;
17892	FSGNJOpc = RISCV::FSGNJ_S;
17893	FSGNJXOpc = RISCV::FSGNJX_S;
17894	RC = &RISCV::FPR32RegClass;
17895	break;
17896	case RISCV::PseudoFROUND_S_INX:
17897	CmpOpc = RISCV::FLT_S_INX;
17898	F2IOpc = RISCV::FCVT_W_S_INX;
17899	I2FOpc = RISCV::FCVT_S_W_INX;
17900	FSGNJOpc = RISCV::FSGNJ_S_INX;
17901	FSGNJXOpc = RISCV::FSGNJX_S_INX;
17902	RC = &RISCV::GPRF32RegClass;
17903	break;
17904	case RISCV::PseudoFROUND_D:
17905	assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
17906	CmpOpc = RISCV::FLT_D;
17907	F2IOpc = RISCV::FCVT_L_D;
17908	I2FOpc = RISCV::FCVT_D_L;
17909	FSGNJOpc = RISCV::FSGNJ_D;
17910	FSGNJXOpc = RISCV::FSGNJX_D;
17911	RC = &RISCV::FPR64RegClass;
17912	break;
17913	case RISCV::PseudoFROUND_D_INX:
17914	assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
17915	CmpOpc = RISCV::FLT_D_INX;
17916	F2IOpc = RISCV::FCVT_L_D_INX;
17917	I2FOpc = RISCV::FCVT_D_L_INX;
17918	FSGNJOpc = RISCV::FSGNJ_D_INX;
17919	FSGNJXOpc = RISCV::FSGNJX_D_INX;
17920	RC = &RISCV::GPRRegClass;
17921	break;
17922	}
17923
17924	const BasicBlock *BB = MBB->getBasicBlock();
17925	DebugLoc DL = MI.getDebugLoc();
17926	MachineFunction::iterator I = ++MBB->getIterator();
17927
17928	MachineFunction *F = MBB->getParent();
17929	MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
17930	MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
17931
17932	F->insert(MBBI: I, MBB: CvtMBB);
17933	F->insert(MBBI: I, MBB: DoneMBB);
17934	// Move all instructions after the sequence to DoneMBB.
17935	DoneMBB->splice(Where: DoneMBB->end(), Other: MBB, From: MachineBasicBlock::iterator (MI),
17936	To: MBB->end());
17937	// Update machine-CFG edges by transferring all successors of the current
17938	// block to the new block which will contain the Phi nodes for the selects.
17939	DoneMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
17940	// Set the successors for MBB.
17941	MBB->addSuccessor(Succ: CvtMBB);
17942	MBB->addSuccessor(Succ: DoneMBB);
17943
17944	Register DstReg = MI.getOperand(i: `0`).getReg();
17945	Register SrcReg = MI.getOperand(i: `1`).getReg();
17946	Register MaxReg = MI.getOperand(i: `2`).getReg();
17947	int64_t FRM = MI.getOperand(i: `3`).getImm();
17948
17949	const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17950	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
17951
17952	Register FabsReg = MRI.createVirtualRegister(RegClass: RC);
17953	BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
17954
17955	// Compare the FP value to the max value.
17956	Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17957	auto MIB =
17958	BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
17959	if (MI.getFlag(Flag: MachineInstr::MIFlag::NoFPExcept))
17960	MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
17961
17962	// Insert branch.
17963	BuildMI(MBB, DL, TII.get(RISCV::BEQ))
17964	.addReg(CmpReg)
17965	.addReg(RISCV::X0)
17966	.addMBB(DoneMBB);
17967
17968	CvtMBB->addSuccessor(Succ: DoneMBB);
17969
17970	// Convert to integer.
17971	Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17972	MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
17973	if (MI.getFlag(Flag: MachineInstr::MIFlag::NoFPExcept))
17974	MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
17975
17976	// Convert back to FP.
17977	Register I2FReg = MRI.createVirtualRegister(RegClass: RC);
17978	MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
17979	if (MI.getFlag(Flag: MachineInstr::MIFlag::NoFPExcept))
17980	MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
17981
17982	// Restore the sign bit.
17983	Register CvtReg = MRI.createVirtualRegister(RegClass: RC);
17984	BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
17985
17986	// Merge the results.
17987	BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
17988	.addReg(SrcReg)
17989	.addMBB(MBB)
17990	.addReg(CvtReg)
17991	.addMBB(CvtMBB);
17992
17993	MI.eraseFromParent();
17994	return DoneMBB;
17995	}
17996
17997	MachineBasicBlock *
17998	RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
17999	MachineBasicBlock BB) const* {
18000	switch (MI.getOpcode()) {
18001	default:
18002	llvm_unreachable("Unexpected instr type to insert");
18003	case RISCV::ReadCounterWide:
18004	assert(!Subtarget.is64Bit() &&
18005	"ReadCounterWide is only to be used on riscv32");
18006	return emitReadCounterWidePseudo(MI, BB);
18007	case RISCV::Select_GPR_Using_CC_GPR:
18008	case RISCV::Select_FPR16_Using_CC_GPR:
18009	case RISCV::Select_FPR16INX_Using_CC_GPR:
18010	case RISCV::Select_FPR32_Using_CC_GPR:
18011	case RISCV::Select_FPR32INX_Using_CC_GPR:
18012	case RISCV::Select_FPR64_Using_CC_GPR:
18013	case RISCV::Select_FPR64INX_Using_CC_GPR:
18014	case RISCV::Select_FPR64IN32X_Using_CC_GPR:
18015	return emitSelectPseudo(MI, BB, Subtarget);
18016	case RISCV::BuildPairF64Pseudo:
18017	return emitBuildPairF64Pseudo(MI, BB, Subtarget);
18018	case RISCV::SplitF64Pseudo:
18019	return emitSplitF64Pseudo(MI, BB, Subtarget);
18020	case RISCV::PseudoQuietFLE_H:
18021	return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
18022	case RISCV::PseudoQuietFLE_H_INX:
18023	return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
18024	case RISCV::PseudoQuietFLT_H:
18025	return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
18026	case RISCV::PseudoQuietFLT_H_INX:
18027	return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
18028	case RISCV::PseudoQuietFLE_S:
18029	return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
18030	case RISCV::PseudoQuietFLE_S_INX:
18031	return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
18032	case RISCV::PseudoQuietFLT_S:
18033	return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
18034	case RISCV::PseudoQuietFLT_S_INX:
18035	return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
18036	case RISCV::PseudoQuietFLE_D:
18037	return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
18038	case RISCV::PseudoQuietFLE_D_INX:
18039	return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
18040	case RISCV::PseudoQuietFLE_D_IN32X:
18041	return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
18042	Subtarget);
18043	case RISCV::PseudoQuietFLT_D:
18044	return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
18045	case RISCV::PseudoQuietFLT_D_INX:
18046	return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
18047	case RISCV::PseudoQuietFLT_D_IN32X:
18048	return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
18049	Subtarget);
18050
18051	case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
18052	return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
18053	case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
18054	return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
18055	case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
18056	return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
18057	case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
18058	return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
18059	case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
18060	return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
18061	case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
18062	return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
18063	case RISCV::PseudoFROUND_H:
18064	case RISCV::PseudoFROUND_H_INX:
18065	case RISCV::PseudoFROUND_S:
18066	case RISCV::PseudoFROUND_S_INX:
18067	case RISCV::PseudoFROUND_D:
18068	case RISCV::PseudoFROUND_D_INX:
18069	case RISCV::PseudoFROUND_D_IN32X:
18070	return emitFROUND(MI, MBB: BB, Subtarget);
18071	case TargetOpcode::STATEPOINT:
18072	// STATEPOINT is a pseudo instruction which has no implicit defs/uses
18073	// while jal call instruction (where statepoint will be lowered at the end)
18074	// has implicit def. This def is early-clobber as it will be set at
18075	// the moment of the call and earlier than any use is read.
18076	// Add this implicit dead def here as a workaround.
18077	MI.addOperand(*MI.getMF(),
18078	MachineOperand::CreateReg(
18079	RISCV::X1, /isDef/ true,
18080	/isImp/ true, /isKill/ false, /isDead/ true,
18081	/isUndef/ false, /isEarlyClobber/ true));
18082	[[fallthrough]];
18083	case TargetOpcode::STACKMAP:
18084	case TargetOpcode::PATCHPOINT:
18085	if (!Subtarget.is64Bit())
18086	report_fatal_error(reason: "STACKMAP, PATCHPOINT and STATEPOINT are only "
18087	"supported on 64-bit targets");
18088	return emitPatchPoint(MI, MBB: BB);
18089	}
18090	}
18091
18092	void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
18093	SDNode Node) const* {
18094	// Add FRM dependency to any instructions with dynamic rounding mode.
18095	int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
18096	if (Idx < `0`) {
18097	// Vector pseudos have FRM index indicated by TSFlags.
18098	Idx = RISCVII::getFRMOpNum(Desc: MI.getDesc());
18099	if (Idx < `0`)
18100	return;
18101	}
18102	if (MI.getOperand(i: Idx).getImm() != RISCVFPRndMode::DYN)
18103	return;
18104	// If the instruction already reads FRM, don't add another read.
18105	if (MI.readsRegister(RISCV::FRM, /TRI=/nullptr))
18106	return;
18107	MI.addOperand(
18108	MachineOperand::CreateReg(RISCV::FRM, /isDef/ false, /isImp/ true));
18109	}
18110
18111	// Calling Convention Implementation.
18112	// The expectations for frontend ABI lowering vary from target to target.
18113	// Ideally, an LLVM frontend would be able to avoid worrying about many ABI
18114	// details, but this is a longer term goal. For now, we simply try to keep the
18115	// role of the frontend as simple and well-defined as possible. The rules can
18116	// be summarised as:
18117	// Never split up large scalar arguments. We handle them here.*
18118	// If a hardfloat calling convention is being used, and the struct may be*
18119	// passed in a pair of registers (fp+fp, int+fp), and both registers are
18120	// available, then pass as two separate arguments. If either the GPRs or FPRs
18121	// are exhausted, then pass according to the rule below.
18122	// If a struct could never be passed in registers or directly in a stack*
18123	// slot (as it is larger than 2XLEN and the floating point rules don't*
18124	// apply), then pass it using a pointer with the byval attribute.
18125	// If a struct is less than 2XLEN, then coerce to either a two-element
18126	// word-sized array or a 2XLEN scalar (depending on alignment).*
18127	// The frontend can determine whether a struct is returned by reference or*
18128	// not based on its size and fields. If it will be returned by reference, the
18129	// frontend must modify the prototype so a pointer with the sret annotation is
18130	// passed as the first argument. This is not necessary for large scalar
18131	// returns.
18132	// Struct return values and varargs should be coerced to structs containing*
18133	// register-size fields in the same situations they would be for fixed
18134	// arguments.
18135
18136	static const MCPhysReg ArgFPR16s[] = {
18137	RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
18138	RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
18139	};
18140	static const MCPhysReg ArgFPR32s[] = {
18141	RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
18142	RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
18143	};
18144	static const MCPhysReg ArgFPR64s[] = {
18145	RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
18146	RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
18147	};
18148	// This is an interim calling convention and it may be changed in the future.
18149	static const MCPhysReg ArgVRs[] = {
18150	RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
18151	RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
18152	RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
18153	static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,
18154	RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
18155	RISCV::V20M2, RISCV::V22M2};
18156	static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
18157	RISCV::V20M4};
18158	static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
18159
18160	ArrayRef<MCPhysReg> RISCV::getArgGPRs(const RISCVABI::ABI ABI) {
18161	// The GPRs used for passing arguments in the ILP32 and LP64* ABIs, except*
18162	// the ILP32E ABI.
18163	static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18164	RISCV::X13, RISCV::X14, RISCV::X15,
18165	RISCV::X16, RISCV::X17};
18166	// The GPRs used for passing arguments in the ILP32E/ILP64E ABI.
18167	static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18168	RISCV::X13, RISCV::X14, RISCV::X15};
18169
18170	if (ABI == RISCVABI::ABI_ILP32E \|\| ABI == RISCVABI::ABI_LP64E)
18171	return ArrayRef(ArgEGPRs);
18172
18173	return ArrayRef(ArgIGPRs);
18174	}
18175
18176	static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) {
18177	// The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
18178	// for save-restore libcall, so we don't use them.
18179	static const MCPhysReg FastCCIGPRs[] = {
18180	RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
18181	RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
18182	RISCV::X29, RISCV::X30, RISCV::X31};
18183
18184	// The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.
18185	static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18186	RISCV::X13, RISCV::X14, RISCV::X15,
18187	RISCV::X7};
18188
18189	if (ABI == RISCVABI::ABI_ILP32E \|\| ABI == RISCVABI::ABI_LP64E)
18190	return ArrayRef(FastCCEGPRs);
18191
18192	return ArrayRef(FastCCIGPRs);
18193	}
18194
18195	// Pass a 2XLEN argument that has been split into two XLEN values through*
18196	// registers or the stack as necessary.
18197	static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
18198	ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
18199	MVT ValVT2, MVT LocVT2,
18200	ISD::ArgFlagsTy ArgFlags2, bool EABI) {
18201	unsigned XLenInBytes = XLen / `8`;
18202	const RISCVSubtarget &STI =
18203	State.getMachineFunction().getSubtarget<RISCVSubtarget>();
18204	ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI: STI.getTargetABI());
18205
18206	if (Register Reg = State.AllocateReg(Regs: ArgGPRs)) {
18207	// At least one half can be passed via register.
18208	State.addLoc(V: CCValAssign::getReg(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(), RegNo: Reg,
18209	LocVT: VA1.getLocVT(), HTP: CCValAssign::Full));
18210	} else {
18211	// Both halves must be passed on the stack, with proper alignment.
18212	// TODO: To be compatible with GCC's behaviors, we force them to have 4-byte
18213	// alignment. This behavior may be changed when RV32E/ILP32E is ratified.
18214	Align StackAlign(XLenInBytes);
18215	if (!EABI \|\| XLen != `32`)
18216	StackAlign = std::max(a: StackAlign, b: ArgFlags1.getNonZeroOrigAlign());
18217	State.addLoc(
18218	V: CCValAssign::getMem(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(),
18219	Offset: State.AllocateStack(Size: XLenInBytes, Alignment: StackAlign),
18220	LocVT: VA1.getLocVT(), HTP: CCValAssign::Full));
18221	State.addLoc(V: CCValAssign::getMem(
18222	ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: XLenInBytes, Alignment: Align (XLenInBytes)),
18223	LocVT: LocVT2, HTP: CCValAssign::Full));
18224	return false;
18225	}
18226
18227	if (Register Reg = State.AllocateReg(Regs: ArgGPRs)) {
18228	// The second half can also be passed via register.
18229	State.addLoc(
18230	V: CCValAssign::getReg(ValNo: ValNo2, ValVT: ValVT2, RegNo: Reg, LocVT: LocVT2, HTP: CCValAssign::Full));
18231	} else {
18232	// The second half is passed via the stack, without additional alignment.
18233	State.addLoc(V: CCValAssign::getMem(
18234	ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: XLenInBytes, Alignment: Align (XLenInBytes)),
18235	LocVT: LocVT2, HTP: CCValAssign::Full));
18236	}
18237
18238	return false;
18239	}
18240
18241	// Implements the RISC-V calling convention. Returns true upon failure.
18242	bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
18243	MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
18244	ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
18245	bool IsRet, Type OrigTy, const* RISCVTargetLowering &TLI,
18246	RVVArgDispatcher &RVVDispatcher) {
18247	unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
18248	assert(XLen == `32` \|\| XLen == `64`);
18249	MVT XLenVT = XLen == `32` ? MVT::i32 : MVT::i64;
18250
18251	// Static chain parameter must not be passed in normal argument registers,
18252	// so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
18253	if (ArgFlags.isNest()) {
18254	if (unsigned Reg = State.AllocateReg(RISCV::X7)) {
18255	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
18256	return false;
18257	}
18258	}
18259
18260	// Any return value split in to more than two values can't be returned
18261	// directly. Vectors are returned via the available vector registers.
18262	if (!LocVT.isVector() && IsRet && ValNo > `1`)
18263	return true;
18264
18265	// UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
18266	// variadic argument, or if no F16/F32 argument registers are available.
18267	bool UseGPRForF16_F32 = true;
18268	// UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
18269	// variadic argument, or if no F64 argument registers are available.
18270	bool UseGPRForF64 = true;
18271
18272	switch (ABI) {
18273	default:
18274	llvm_unreachable("Unexpected ABI");
18275	case RISCVABI::ABI_ILP32:
18276	case RISCVABI::ABI_ILP32E:
18277	case RISCVABI::ABI_LP64:
18278	case RISCVABI::ABI_LP64E:
18279	break;
18280	case RISCVABI::ABI_ILP32F:
18281	case RISCVABI::ABI_LP64F:
18282	UseGPRForF16_F32 = !IsFixed;
18283	break;
18284	case RISCVABI::ABI_ILP32D:
18285	case RISCVABI::ABI_LP64D:
18286	UseGPRForF16_F32 = !IsFixed;
18287	UseGPRForF64 = !IsFixed;
18288	break;
18289	}
18290
18291	// FPR16, FPR32, and FPR64 alias each other.
18292	if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) {
18293	UseGPRForF16_F32 = true;
18294	UseGPRForF64 = true;
18295	}
18296
18297	// From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
18298	// similar local variables rather than directly checking against the target
18299	// ABI.
18300
18301	if (UseGPRForF16_F32 &&
18302	(ValVT == MVT::f16 \|\| ValVT == MVT::bf16 \|\| ValVT == MVT::f32)) {
18303	LocVT = XLenVT;
18304	LocInfo = CCValAssign::BCvt;
18305	} else if (UseGPRForF64 && XLen == `64` && ValVT == MVT::f64) {
18306	LocVT = MVT::i64;
18307	LocInfo = CCValAssign::BCvt;
18308	}
18309
18310	ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI);
18311
18312	// If this is a variadic argument, the RISC-V calling convention requires
18313	// that it is assigned an 'even' or 'aligned' register if it has 8-byte
18314	// alignment (RV32) or 16-byte alignment (RV64). An aligned register should
18315	// be used regardless of whether the original argument was split during
18316	// legalisation or not. The argument will not be passed by registers if the
18317	// original type is larger than 2XLEN, so the register alignment rule does*
18318	// not apply.
18319	// TODO: To be compatible with GCC's behaviors, we don't align registers
18320	// currently if we are using ILP32E calling convention. This behavior may be
18321	// changed when RV32E/ILP32E is ratified.
18322	unsigned TwoXLenInBytes = (`2` * XLen) / `8`;
18323	if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
18324	DL.getTypeAllocSize(Ty: OrigTy) == TwoXLenInBytes &&
18325	ABI != RISCVABI::ABI_ILP32E) {
18326	unsigned RegIdx = State.getFirstUnallocated(Regs: ArgGPRs);
18327	// Skip 'odd' register if necessary.
18328	if (RegIdx != std::size(cont: ArgGPRs) && RegIdx % `2` == `1`)
18329	State.AllocateReg(Regs: ArgGPRs);
18330	}
18331
18332	SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
18333	SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
18334	State.getPendingArgFlags();
18335
18336	assert(PendingLocs.size() == PendingArgFlags.size() &&
18337	"PendingLocs and PendingArgFlags out of sync");
18338
18339	// Handle passing f64 on RV32D with a soft float ABI or when floating point
18340	// registers are exhausted.
18341	if (UseGPRForF64 && XLen == `32` && ValVT == MVT::f64) {
18342	assert(PendingLocs.empty() && "Can't lower f64 if it is split");
18343	// Depending on available argument GPRS, f64 may be passed in a pair of
18344	// GPRs, split between a GPR and the stack, or passed completely on the
18345	// stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
18346	// cases.
18347	Register Reg = State.AllocateReg(Regs: ArgGPRs);
18348	if (!Reg) {
18349	unsigned StackOffset = State.AllocateStack(Size: `8`, Alignment: Align (`8`));
18350	State.addLoc(
18351	V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
18352	return false;
18353	}
18354	LocVT = MVT::i32;
18355	State.addLoc(V: CCValAssign::getCustomReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
18356	Register HiReg = State.AllocateReg(Regs: ArgGPRs);
18357	if (HiReg) {
18358	State.addLoc(
18359	V: CCValAssign::getCustomReg(ValNo, ValVT, RegNo: HiReg, LocVT, HTP: LocInfo));
18360	} else {
18361	unsigned StackOffset = State.AllocateStack(Size: `4`, Alignment: Align (`4`));
18362	State.addLoc(
18363	V: CCValAssign::getCustomMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
18364	}
18365	return false;
18366	}
18367
18368	// Fixed-length vectors are located in the corresponding scalable-vector
18369	// container types.
18370	if (ValVT.isFixedLengthVector())
18371	LocVT = TLI.getContainerForFixedLengthVector(VT: LocVT);
18372
18373	// Split arguments might be passed indirectly, so keep track of the pending
18374	// values. Split vectors are passed via a mix of registers and indirectly, so
18375	// treat them as we would any other argument.
18376	if (ValVT.isScalarInteger() && (ArgFlags.isSplit() \|\| !PendingLocs.empty())) {
18377	LocVT = XLenVT;
18378	LocInfo = CCValAssign::Indirect;
18379	PendingLocs.push_back(
18380	Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo));
18381	PendingArgFlags.push_back(Elt: ArgFlags);
18382	if (!ArgFlags.isSplitEnd()) {
18383	return false;
18384	}
18385	}
18386
18387	// If the split argument only had two elements, it should be passed directly
18388	// in registers or on the stack.
18389	if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
18390	PendingLocs.size() <= `2`) {
18391	assert(PendingLocs.size() == `2` && "Unexpected PendingLocs.size()");
18392	// Apply the normal calling convention rules to the first half of the
18393	// split argument.
18394	CCValAssign VA = PendingLocs [`0`];
18395	ISD::ArgFlagsTy AF = PendingArgFlags [`0`];
18396	PendingLocs.clear();
18397	PendingArgFlags.clear();
18398	return CC_RISCVAssign2XLen(
18399	XLen, State, VA1: VA, ArgFlags1: AF, ValNo2: ValNo, ValVT2: ValVT, LocVT2: LocVT, ArgFlags2: ArgFlags,
18400	EABI: ABI == RISCVABI::ABI_ILP32E \|\| ABI == RISCVABI::ABI_LP64E);
18401	}
18402
18403	// Allocate to a register if possible, or else a stack slot.
18404	Register Reg;
18405	unsigned StoreSizeBytes = XLen / `8`;
18406	Align StackAlign = Align (XLen / `8`);
18407
18408	if ((ValVT == MVT::f16 \|\| ValVT == MVT::bf16) && !UseGPRForF16_F32)
18409	Reg = State.AllocateReg(ArgFPR16s);
18410	else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
18411	Reg = State.AllocateReg(ArgFPR32s);
18412	else if (ValVT == MVT::f64 && !UseGPRForF64)
18413	Reg = State.AllocateReg(ArgFPR64s);
18414	else if (ValVT.isVector()) {
18415	Reg = RVVDispatcher.getNextPhysReg();
18416	if (!Reg) {
18417	// For return values, the vector must be passed fully via registers or
18418	// via the stack.
18419	// FIXME: The proposed vector ABI only mandates v8-v15 for return values,
18420	// but we're using all of them.
18421	if (IsRet)
18422	return true;
18423	// Try using a GPR to pass the address
18424	if ((Reg = State.AllocateReg(Regs: ArgGPRs))) {
18425	LocVT = XLenVT;
18426	LocInfo = CCValAssign::Indirect;
18427	} else if (ValVT.isScalableVector()) {
18428	LocVT = XLenVT;
18429	LocInfo = CCValAssign::Indirect;
18430	} else {
18431	// Pass fixed-length vectors on the stack.
18432	LocVT = ValVT;
18433	StoreSizeBytes = ValVT.getStoreSize();
18434	// Align vectors to their element sizes, being careful for vXi1
18435	// vectors.
18436	StackAlign = MaybeAlign (ValVT.getScalarSizeInBits() / `8`).valueOrOne();
18437	}
18438	}
18439	} else {
18440	Reg = State.AllocateReg(Regs: ArgGPRs);
18441	}
18442
18443	unsigned StackOffset =
18444	Reg ? `0` : State.AllocateStack(Size: StoreSizeBytes, Alignment: StackAlign);
18445
18446	// If we reach this point and PendingLocs is non-empty, we must be at the
18447	// end of a split argument that must be passed indirectly.
18448	if (!PendingLocs.empty()) {
18449	assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
18450	assert(PendingLocs.size() > `2` && "Unexpected PendingLocs.size()");
18451
18452	for (auto &It : PendingLocs) {
18453	if (Reg)
18454	It.convertToReg(RegNo: Reg);
18455	else
18456	It.convertToMem(Offset: StackOffset);
18457	State.addLoc(V: It);
18458	}
18459	PendingLocs.clear();
18460	PendingArgFlags.clear();
18461	return false;
18462	}
18463
18464	assert((!UseGPRForF16_F32 \|\| !UseGPRForF64 \|\| LocVT == XLenVT \|\|
18465	(TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
18466	"Expected an XLenVT or vector types at this stage");
18467
18468	if (Reg) {
18469	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
18470	return false;
18471	}
18472
18473	// When a scalar floating-point value is passed on the stack, no
18474	// bit-conversion is needed.
18475	if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) {
18476	assert(!ValVT.isVector());
18477	LocVT = ValVT;
18478	LocInfo = CCValAssign::Full;
18479	}
18480	State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
18481	return false;
18482	}
18483
18484	template <typename ArgTy>
18485	static std::optional<unsigned> preAssignMask(const ArgTy &Args) {
18486	for (const auto &ArgIdx : enumerate(Args)) {
18487	MVT ArgVT = ArgIdx.value().VT;
18488	if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
18489	return ArgIdx.index();
18490	}
18491	return std::nullopt;
18492	}
18493
18494	void RISCVTargetLowering::analyzeInputArgs(
18495	MachineFunction &MF, CCState &CCInfo,
18496	const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
18497	RISCVCCAssignFn Fn) const {
18498	unsigned NumArgs = Ins.size();
18499	FunctionType *FType = MF.getFunction().getFunctionType();
18500
18501	RVVArgDispatcher Dispatcher;
18502	if (IsRet) {
18503	Dispatcher = RVVArgDispatcher {&MF, this, ArrayRef(Ins)};
18504	} else {
18505	SmallVector<Type *, `4`> TypeList;
18506	for (const Argument &Arg : MF.getFunction().args())
18507	TypeList.push_back(Elt: Arg.getType());
18508	Dispatcher = RVVArgDispatcher {&MF, this, ArrayRef(TypeList)};
18509	}
18510
18511	for (unsigned i = `0`; i != NumArgs; ++i) {
18512	MVT ArgVT = Ins [i].VT;
18513	ISD::ArgFlagsTy ArgFlags = Ins [i].Flags;
18514
18515	Type ArgTy = nullptr*;
18516	if (IsRet)
18517	ArgTy = FType->getReturnType();
18518	else if (Ins [i].isOrigArg())
18519	ArgTy = FType->getParamType(i: Ins [i].getOrigArgIndex());
18520
18521	RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
18522	if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18523	ArgFlags, CCInfo, /IsFixed=/true, IsRet, ArgTy, *this,
18524	Dispatcher)) {
18525	LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
18526	<< ArgVT << `'\n'`);
18527	llvm_unreachable(nullptr);
18528	}
18529	}
18530	}
18531
18532	void RISCVTargetLowering::analyzeOutputArgs(
18533	MachineFunction &MF, CCState &CCInfo,
18534	const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
18535	CallLoweringInfo CLI, RISCVCCAssignFn Fn) const* {
18536	unsigned NumArgs = Outs.size();
18537
18538	SmallVector<Type *, `4`> TypeList;
18539	if (IsRet)
18540	TypeList.push_back(Elt: MF.getFunction().getReturnType());
18541	else if (CLI)
18542	for (const TargetLowering::ArgListEntry &Arg : CLI->getArgs())
18543	TypeList.push_back(Elt: Arg.Ty);
18544	RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(TypeList)};
18545
18546	for (unsigned i = `0`; i != NumArgs; i++) {
18547	MVT ArgVT = Outs [i].VT;
18548	ISD::ArgFlagsTy ArgFlags = Outs [i].Flags;
18549	Type OrigTy = CLI ? CLI->getArgs()[Outs [i].OrigArgIndex].Ty : nullptr*;
18550
18551	RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
18552	if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18553	ArgFlags, CCInfo, Outs [i].IsFixed, IsRet, OrigTy, *this,
18554	Dispatcher)) {
18555	LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
18556	<< ArgVT << "\n");
18557	llvm_unreachable(nullptr);
18558	}
18559	}
18560	}
18561
18562	// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
18563	// values.
18564	static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
18565	const CCValAssign &VA, const SDLoc &DL,
18566	const RISCVSubtarget &Subtarget) {
18567	switch (VA.getLocInfo()) {
18568	default:
18569	llvm_unreachable("Unexpected CCValAssign::LocInfo");
18570	case CCValAssign::Full:
18571	if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())
18572	Val = convertFromScalableVector(VT: VA.getValVT(), V: Val, DAG, Subtarget);
18573	break;
18574	case CCValAssign::BCvt:
18575	if (VA.getLocVT().isInteger() &&
18576	(VA.getValVT() == MVT::f16 \|\| VA.getValVT() == MVT::bf16)) {
18577	Val = DAG.getNode(Opcode: RISCVISD::FMV_H_X, DL, VT: VA.getValVT(), Operand: Val);
18578	} else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
18579	if (RV64LegalI32) {
18580	Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val);
18581	Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
18582	} else {
18583	Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
18584	}
18585	} else {
18586	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VA.getValVT(), Operand: Val);
18587	}
18588	break;
18589	}
18590	return Val;
18591	}
18592
18593	// The caller is responsible for loading the full value if the argument is
18594	// passed with CCValAssign::Indirect.
18595	static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
18596	const CCValAssign &VA, const SDLoc &DL,
18597	const ISD::InputArg &In,
18598	const RISCVTargetLowering &TLI) {
18599	MachineFunction &MF = DAG.getMachineFunction();
18600	MachineRegisterInfo &RegInfo = MF.getRegInfo();
18601	EVT LocVT = VA.getLocVT();
18602	SDValue Val;
18603	const TargetRegisterClass *RC = TLI.getRegClassFor(VT: LocVT.getSimpleVT());
18604	Register VReg = RegInfo.createVirtualRegister(RegClass: RC);
18605	RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: VReg);
18606	Val = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: LocVT);
18607
18608	// If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
18609	if (In.isOrigArg()) {
18610	Argument *OrigArg = MF.getFunction().getArg(i: In.getOrigArgIndex());
18611	if (OrigArg->getType()->isIntegerTy()) {
18612	unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
18613	// An input zero extended from i31 can also be considered sign extended.
18614	if ((BitWidth <= `32` && In.Flags.isSExt()) \|\|
18615	(BitWidth < `32` && In.Flags.isZExt())) {
18616	RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
18617	RVFI->addSExt32Register(Reg: VReg);
18618	}
18619	}
18620	}
18621
18622	if (VA.getLocInfo() == CCValAssign::Indirect)
18623	return Val;
18624
18625	return convertLocVTToValVT(DAG, Val, VA, DL, Subtarget: TLI.getSubtarget());
18626	}
18627
18628	static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
18629	const CCValAssign &VA, const SDLoc &DL,
18630	const RISCVSubtarget &Subtarget) {
18631	EVT LocVT = VA.getLocVT();
18632
18633	switch (VA.getLocInfo()) {
18634	default:
18635	llvm_unreachable("Unexpected CCValAssign::LocInfo");
18636	case CCValAssign::Full:
18637	if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
18638	Val = convertToScalableVector(VT: LocVT, V: Val, DAG, Subtarget);
18639	break;
18640	case CCValAssign::BCvt:
18641	if (LocVT.isInteger() &&
18642	(VA.getValVT() == MVT::f16 \|\| VA.getValVT() == MVT::bf16)) {
18643	Val = DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTH, DL, VT: LocVT, Operand: Val);
18644	} else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) {
18645	if (RV64LegalI32) {
18646	Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
18647	Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val);
18648	} else {
18649	Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
18650	}
18651	} else {
18652	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: LocVT, Operand: Val);
18653	}
18654	break;
18655	}
18656	return Val;
18657	}
18658
18659	// The caller is responsible for loading the full value if the argument is
18660	// passed with CCValAssign::Indirect.
18661	static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
18662	const CCValAssign &VA, const SDLoc &DL) {
18663	MachineFunction &MF = DAG.getMachineFunction();
18664	MachineFrameInfo &MFI = MF.getFrameInfo();
18665	EVT LocVT = VA.getLocVT();
18666	EVT ValVT = VA.getValVT();
18667	EVT PtrVT = MVT::getIntegerVT(BitWidth: DAG.getDataLayout().getPointerSizeInBits(AS: `0`));
18668	if (ValVT.isScalableVector()) {
18669	// When the value is a scalable vector, we save the pointer which points to
18670	// the scalable vector value in the stack. The ValVT will be the pointer
18671	// type, instead of the scalable vector type.
18672	ValVT = LocVT;
18673	}
18674	int FI = MFI.CreateFixedObject(Size: ValVT.getStoreSize(), SPOffset: VA.getLocMemOffset(),
18675	/IsImmutable=/true);
18676	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
18677	SDValue Val;
18678
18679	ISD::LoadExtType ExtType;
18680	switch (VA.getLocInfo()) {
18681	default:
18682	llvm_unreachable("Unexpected CCValAssign::LocInfo");
18683	case CCValAssign::Full:
18684	case CCValAssign::Indirect:
18685	case CCValAssign::BCvt:
18686	ExtType = ISD::NON_EXTLOAD;
18687	break;
18688	}
18689	Val = DAG.getExtLoad(
18690	ExtType, dl: DL, VT: LocVT, Chain, Ptr: FIN,
18691	PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI), MemVT: ValVT);
18692	return Val;
18693	}
18694
18695	static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
18696	const CCValAssign &VA,
18697	const CCValAssign &HiVA,
18698	const SDLoc &DL) {
18699	assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
18700	"Unexpected VA");
18701	MachineFunction &MF = DAG.getMachineFunction();
18702	MachineFrameInfo &MFI = MF.getFrameInfo();
18703	MachineRegisterInfo &RegInfo = MF.getRegInfo();
18704
18705	assert(VA.isRegLoc() && "Expected register VA assignment");
18706
18707	Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18708	RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: LoVReg);
18709	SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
18710	SDValue Hi;
18711	if (HiVA.isMemLoc()) {
18712	// Second half of f64 is passed on the stack.
18713	int FI = MFI.CreateFixedObject(Size: `4`, SPOffset: HiVA.getLocMemOffset(),
18714	/IsImmutable=/true);
18715	SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
18716	Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
18717	MachinePointerInfo::getFixedStack(MF, FI));
18718	} else {
18719	// Second half of f64 is passed in another GPR.
18720	Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18721	RegInfo.addLiveIn(Reg: HiVA.getLocReg(), vreg: HiVReg);
18722	Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
18723	}
18724	return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
18725	}
18726
18727	// FastCC has less than 1% performance improvement for some particular
18728	// benchmark. But theoretically, it may has benenfit for some cases.
18729	bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
18730	unsigned ValNo, MVT ValVT, MVT LocVT,
18731	CCValAssign::LocInfo LocInfo,
18732	ISD::ArgFlagsTy ArgFlags, CCState &State,
18733	bool IsFixed, bool IsRet, Type *OrigTy,
18734	const RISCVTargetLowering &TLI,
18735	RVVArgDispatcher &RVVDispatcher) {
18736	if (LocVT == MVT::i32 \|\| LocVT == MVT::i64) {
18737	if (unsigned Reg = State.AllocateReg(Regs: getFastCCArgGPRs(ABI))) {
18738	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
18739	return false;
18740	}
18741	}
18742
18743	const RISCVSubtarget &Subtarget = TLI.getSubtarget();
18744
18745	if (LocVT == MVT::f16 &&
18746	(Subtarget.hasStdExtZfh() \|\| Subtarget.hasStdExtZfhmin())) {
18747	static const MCPhysReg FPR16List[] = {
18748	RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
18749	RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
18750	RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,
18751	RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
18752	if (unsigned Reg = State.AllocateReg(FPR16List)) {
18753	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
18754	return false;
18755	}
18756	}
18757
18758	if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
18759	static const MCPhysReg FPR32List[] = {
18760	RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
18761	RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
18762	RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
18763	RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
18764	if (unsigned Reg = State.AllocateReg(FPR32List)) {
18765	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
18766	return false;
18767	}
18768	}
18769
18770	if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
18771	static const MCPhysReg FPR64List[] = {
18772	RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
18773	RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
18774	RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
18775	RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
18776	if (unsigned Reg = State.AllocateReg(FPR64List)) {
18777	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
18778	return false;
18779	}
18780	}
18781
18782	// Check if there is an available GPR before hitting the stack.
18783	if ((LocVT == MVT::f16 &&
18784	(Subtarget.hasStdExtZhinx() \|\| Subtarget.hasStdExtZhinxmin())) \|\|
18785	(LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) \|\|
18786	(LocVT == MVT::f64 && Subtarget.is64Bit() &&
18787	Subtarget.hasStdExtZdinx())) {
18788	if (unsigned Reg = State.AllocateReg(Regs: getFastCCArgGPRs(ABI))) {
18789	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
18790	return false;
18791	}
18792	}
18793
18794	if (LocVT == MVT::f16) {
18795	unsigned Offset2 = State.AllocateStack(Size: `2`, Alignment: Align (`2`));
18796	State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: Offset2, LocVT, HTP: LocInfo));
18797	return false;
18798	}
18799
18800	if (LocVT == MVT::i32 \|\| LocVT == MVT::f32) {
18801	unsigned Offset4 = State.AllocateStack(Size: `4`, Alignment: Align (`4`));
18802	State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: Offset4, LocVT, HTP: LocInfo));
18803	return false;
18804	}
18805
18806	if (LocVT == MVT::i64 \|\| LocVT == MVT::f64) {
18807	unsigned Offset5 = State.AllocateStack(Size: `8`, Alignment: Align (`8`));
18808	State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: Offset5, LocVT, HTP: LocInfo));
18809	return false;
18810	}
18811
18812	if (LocVT.isVector()) {
18813	MCPhysReg AllocatedVReg = RVVDispatcher.getNextPhysReg();
18814	if (AllocatedVReg) {
18815	// Fixed-length vectors are located in the corresponding scalable-vector
18816	// container types.
18817	if (ValVT.isFixedLengthVector())
18818	LocVT = TLI.getContainerForFixedLengthVector(VT: LocVT);
18819	State.addLoc(
18820	V: CCValAssign::getReg(ValNo, ValVT, RegNo: AllocatedVReg, LocVT, HTP: LocInfo));
18821	} else {
18822	// Try and pass the address via a "fast" GPR.
18823	if (unsigned GPRReg = State.AllocateReg(Regs: getFastCCArgGPRs(ABI))) {
18824	LocInfo = CCValAssign::Indirect;
18825	LocVT = TLI.getSubtarget().getXLenVT();
18826	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: GPRReg, LocVT, HTP: LocInfo));
18827	} else if (ValVT.isFixedLengthVector()) {
18828	auto StackAlign =
18829	MaybeAlign (ValVT.getScalarSizeInBits() / `8`).valueOrOne();
18830	unsigned StackOffset =
18831	State.AllocateStack(Size: ValVT.getStoreSize(), Alignment: StackAlign);
18832	State.addLoc(
18833	V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
18834	} else {
18835	// Can't pass scalable vectors on the stack.
18836	return true;
18837	}
18838	}
18839
18840	return false;
18841	}
18842
18843	return true; // CC didn't match.
18844	}
18845
18846	bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
18847	CCValAssign::LocInfo LocInfo,
18848	ISD::ArgFlagsTy ArgFlags, CCState &State) {
18849	if (ArgFlags.isNest()) {
18850	report_fatal_error(
18851	reason: "Attribute 'nest' is not supported in GHC calling convention");
18852	}
18853
18854	static const MCPhysReg GPRList[] = {
18855	RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
18856	RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
18857
18858	if (LocVT == MVT::i32 \|\| LocVT == MVT::i64) {
18859	// Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
18860	// s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
18861	if (unsigned Reg = State.AllocateReg(GPRList)) {
18862	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
18863	return false;
18864	}
18865	}
18866
18867	const RISCVSubtarget &Subtarget =
18868	State.getMachineFunction().getSubtarget<RISCVSubtarget>();
18869
18870	if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
18871	// Pass in STG registers: F1, ..., F6
18872	// fs0 ... fs5
18873	static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
18874	RISCV::F18_F, RISCV::F19_F,
18875	RISCV::F20_F, RISCV::F21_F};
18876	if (unsigned Reg = State.AllocateReg(FPR32List)) {
18877	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
18878	return false;
18879	}
18880	}
18881
18882	if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
18883	// Pass in STG registers: D1, ..., D6
18884	// fs6 ... fs11
18885	static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
18886	RISCV::F24_D, RISCV::F25_D,
18887	RISCV::F26_D, RISCV::F27_D};
18888	if (unsigned Reg = State.AllocateReg(FPR64List)) {
18889	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
18890	return false;
18891	}
18892	}
18893
18894	if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) \|\|
18895	(LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
18896	Subtarget.is64Bit())) {
18897	if (unsigned Reg = State.AllocateReg(GPRList)) {
18898	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
18899	return false;
18900	}
18901	}
18902
18903	report_fatal_error(reason: "No registers left in GHC calling convention");
18904	return true;
18905	}
18906
18907	// Transform physical registers into virtual registers.
18908	SDValue RISCVTargetLowering::LowerFormalArguments(
18909	SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
18910	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
18911	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
18912
18913	MachineFunction &MF = DAG.getMachineFunction();
18914
18915	switch (CallConv) {
18916	default:
18917	report_fatal_error(reason: "Unsupported calling convention");
18918	case CallingConv::C:
18919	case CallingConv::Fast:
18920	case CallingConv::SPIR_KERNEL:
18921	case CallingConv::GRAAL:
18922	case CallingConv::RISCV_VectorCall:
18923	break;
18924	case CallingConv::GHC:
18925	if (Subtarget.hasStdExtE())
18926	report_fatal_error(reason: "GHC calling convention is not supported on RVE!");
18927	if (!Subtarget.hasStdExtFOrZfinx() \|\| !Subtarget.hasStdExtDOrZdinx())
18928	report_fatal_error(reason: "GHC calling convention requires the (Zfinx/F) and "
18929	"(Zdinx/D) instruction set extensions");
18930	}
18931
18932	const Function &Func = MF.getFunction();
18933	if (Func.hasFnAttribute(Kind: "interrupt")) {
18934	if (!Func.arg_empty())
18935	report_fatal_error(
18936	reason: "Functions with the interrupt attribute cannot have arguments!");
18937
18938	StringRef Kind =
18939	MF.getFunction().getFnAttribute(Kind: "interrupt").getValueAsString();
18940
18941	if (!(Kind == "user" \|\| Kind == "supervisor" \|\| Kind == "machine"))
18942	report_fatal_error(
18943	reason: "Function interrupt attribute argument not supported!");
18944	}
18945
18946	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
18947	MVT XLenVT = Subtarget.getXLenVT();
18948	unsigned XLenInBytes = Subtarget.getXLen() / `8`;
18949	// Used with vargs to acumulate store chains.
18950	std::vector<SDValue> OutChains;
18951
18952	// Assign locations to all of the incoming arguments.
18953	SmallVector<CCValAssign, `16`> ArgLocs;
18954	CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
18955
18956	if (CallConv == CallingConv::GHC)
18957	CCInfo.AnalyzeFormalArguments(Ins, Fn: RISCV::CC_RISCV_GHC);
18958	else
18959	analyzeInputArgs(MF, CCInfo, Ins, /IsRet=/false,
18960	Fn: CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC
18961	: RISCV::CC_RISCV);
18962
18963	for (unsigned i = `0`, e = ArgLocs.size(), InsIdx = `0`; i != e; ++i, ++InsIdx) {
18964	CCValAssign &VA = ArgLocs [i];
18965	SDValue ArgValue;
18966	// Passing f64 on RV32D with a soft float ABI must be handled as a special
18967	// case.
18968	if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
18969	assert(VA.needsCustom());
18970	ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, HiVA: ArgLocs [++i], DL);
18971	} else if (VA.isRegLoc())
18972	ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, In: Ins [InsIdx], TLI: *this);
18973	else
18974	ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
18975
18976	if (VA.getLocInfo() == CCValAssign::Indirect) {
18977	// If the original argument was split and passed by reference (e.g. i128
18978	// on RV32), we need to load all parts of it here (using the same
18979	// address). Vectors may be partly split to registers and partly to the
18980	// stack, in which case the base address is partly offset and subsequent
18981	// stores are relative to that.
18982	InVals.push_back(Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain, Ptr: ArgValue,
18983	PtrInfo: MachinePointerInfo ()));
18984	unsigned ArgIndex = Ins [InsIdx].OrigArgIndex;
18985	unsigned ArgPartOffset = Ins [InsIdx].PartOffset;
18986	assert(VA.getValVT().isVector() \|\| ArgPartOffset == `0`);
18987	while (i + `1` != e && Ins [InsIdx + `1`].OrigArgIndex == ArgIndex) {
18988	CCValAssign &PartVA = ArgLocs [i + `1`];
18989	unsigned PartOffset = Ins [InsIdx + `1`].PartOffset - ArgPartOffset;
18990	SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL);
18991	if (PartVA.getValVT().isScalableVector())
18992	Offset = DAG.getNode(Opcode: ISD::VSCALE, DL, VT: XLenVT, Operand: Offset);
18993	SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: ArgValue, N2: Offset);
18994	InVals.push_back(Elt: DAG.getLoad(VT: PartVA.getValVT(), dl: DL, Chain, Ptr: Address,
18995	PtrInfo: MachinePointerInfo ()));
18996	++i;
18997	++InsIdx;
18998	}
18999	continue;
19000	}
19001	InVals.push_back(Elt: ArgValue);
19002	}
19003
19004	if (any_of(Range&: ArgLocs,
19005	P: [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19006	MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19007
19008	if (IsVarArg) {
19009	ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(ABI: Subtarget.getTargetABI());
19010	unsigned Idx = CCInfo.getFirstUnallocated(Regs: ArgRegs);
19011	const TargetRegisterClass *RC = &RISCV::GPRRegClass;
19012	MachineFrameInfo &MFI = MF.getFrameInfo();
19013	MachineRegisterInfo &RegInfo = MF.getRegInfo();
19014	RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
19015
19016	// Size of the vararg save area. For now, the varargs save area is either
19017	// zero or large enough to hold a0-a7.
19018	int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
19019	int FI;
19020
19021	// If all registers are allocated, then all varargs must be passed on the
19022	// stack and we don't need to save any argregs.
19023	if (VarArgsSaveSize == `0`) {
19024	int VaArgOffset = CCInfo.getStackSize();
19025	FI = MFI.CreateFixedObject(Size: XLenInBytes, SPOffset: VaArgOffset, IsImmutable: true);
19026	} else {
19027	int VaArgOffset = -VarArgsSaveSize;
19028	FI = MFI.CreateFixedObject(Size: VarArgsSaveSize, SPOffset: VaArgOffset, IsImmutable: true);
19029
19030	// If saving an odd number of registers then create an extra stack slot to
19031	// ensure that the frame pointer is 2XLEN-aligned, which in turn ensures*
19032	// offsets to even-numbered registered remain 2XLEN-aligned.*
19033	if (Idx % `2`) {
19034	MFI.CreateFixedObject(
19035	Size: XLenInBytes, SPOffset: VaArgOffset - static_cast<int>(XLenInBytes), IsImmutable: true);
19036	VarArgsSaveSize += XLenInBytes;
19037	}
19038
19039	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
19040
19041	// Copy the integer registers that may have been used for passing varargs
19042	// to the vararg save area.
19043	for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
19044	const Register Reg = RegInfo.createVirtualRegister(RegClass: RC);
19045	RegInfo.addLiveIn(Reg: ArgRegs [I], vreg: Reg);
19046	SDValue ArgValue = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT: XLenVT);
19047	SDValue Store = DAG.getStore(
19048	Chain, dl: DL, Val: ArgValue, Ptr: FIN,
19049	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI, Offset: (I - Idx) * XLenInBytes));
19050	OutChains.push_back(x: Store);
19051	FIN =
19052	DAG.getMemBasePlusOffset(Base: FIN, Offset: TypeSize::getFixed(ExactSize: XLenInBytes), DL);
19053	}
19054	}
19055
19056	// Record the frame index of the first variable argument
19057	// which is a value necessary to VASTART.
19058	RVFI->setVarArgsFrameIndex(FI);
19059	RVFI->setVarArgsSaveSize(VarArgsSaveSize);
19060	}
19061
19062	// All stores are grouped in one node to allow the matching between
19063	// the size of Ins and InVals. This only happens for vararg functions.
19064	if (!OutChains.empty()) {
19065	OutChains.push_back(x: Chain);
19066	Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
19067	}
19068
19069	return Chain;
19070	}
19071
19072	/// isEligibleForTailCallOptimization - Check whether the call is eligible
19073	/// for tail call optimization.
19074	/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
19075	bool RISCVTargetLowering::isEligibleForTailCallOptimization(
19076	CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
19077	const SmallVector<CCValAssign, `16`> &ArgLocs) const {
19078
19079	auto CalleeCC = CLI.CallConv;
19080	auto &Outs = CLI.Outs;
19081	auto &Caller = MF.getFunction();
19082	auto CallerCC = Caller.getCallingConv();
19083
19084	// Exception-handling functions need a special set of instructions to
19085	// indicate a return to the hardware. Tail-calling another function would
19086	// probably break this.
19087	// TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
19088	// should be expanded as new function attributes are introduced.
19089	if (Caller.hasFnAttribute(Kind: "interrupt"))
19090	return false;
19091
19092	// Do not tail call opt if the stack is used to pass parameters.
19093	if (CCInfo.getStackSize() != `0`)
19094	return false;
19095
19096	// Do not tail call opt if any parameters need to be passed indirectly.
19097	// Since long doubles (fp128) and i128 are larger than 2XLEN, they are*
19098	// passed indirectly. So the address of the value will be passed in a
19099	// register, or if not available, then the address is put on the stack. In
19100	// order to pass indirectly, space on the stack often needs to be allocated
19101	// in order to store the value. In this case the CCInfo.getNextStackOffset()
19102	// != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
19103	// are passed CCValAssign::Indirect.
19104	for (auto &VA : ArgLocs)
19105	if (VA.getLocInfo() == CCValAssign::Indirect)
19106	return false;
19107
19108	// Do not tail call opt if either caller or callee uses struct return
19109	// semantics.
19110	auto IsCallerStructRet = Caller.hasStructRetAttr();
19111	auto IsCalleeStructRet = Outs.empty() ? false : Outs [`0`].Flags.isSRet();
19112	if (IsCallerStructRet \|\| IsCalleeStructRet)
19113	return false;
19114
19115	// The callee has to preserve all registers the caller needs to preserve.
19116	const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
19117	const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
19118	if (CalleeCC != CallerCC) {
19119	const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
19120	if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
19121	return false;
19122	}
19123
19124	// Byval parameters hand the function a pointer directly into the stack area
19125	// we want to reuse during a tail call. Working around this is* possible*
19126	// but less efficient and uglier in LowerCall.
19127	for (auto &Arg : Outs)
19128	if (Arg.Flags.isByVal())
19129	return false;
19130
19131	return true;
19132	}
19133
19134	static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
19135	return DAG.getDataLayout().getPrefTypeAlign(
19136	Ty: VT.getTypeForEVT(Context&: *DAG.getContext()));
19137	}
19138
19139	// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
19140	// and output parameter nodes.
19141	SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
19142	SmallVectorImpl<SDValue> &InVals) const {
19143	SelectionDAG &DAG = CLI.DAG;
19144	SDLoc &DL = CLI.DL;
19145	SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
19146	SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
19147	SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
19148	SDValue Chain = CLI.Chain;
19149	SDValue Callee = CLI.Callee;
19150	bool &IsTailCall = CLI.IsTailCall;
19151	CallingConv::ID CallConv = CLI.CallConv;
19152	bool IsVarArg = CLI.IsVarArg;
19153	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
19154	MVT XLenVT = Subtarget.getXLenVT();
19155
19156	MachineFunction &MF = DAG.getMachineFunction();
19157
19158	// Analyze the operands of the call, assigning locations to each operand.
19159	SmallVector<CCValAssign, `16`> ArgLocs;
19160	CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19161
19162	if (CallConv == CallingConv::GHC) {
19163	if (Subtarget.hasStdExtE())
19164	report_fatal_error(reason: "GHC calling convention is not supported on RVE!");
19165	ArgCCInfo.AnalyzeCallOperands(Outs, Fn: RISCV::CC_RISCV_GHC);
19166	} else
19167	analyzeOutputArgs(MF, CCInfo&: ArgCCInfo, Outs, /IsRet=/false, CLI: &CLI,
19168	Fn: CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC
19169	: RISCV::CC_RISCV);
19170
19171	// Check if it's really possible to do a tail call.
19172	if (IsTailCall)
19173	IsTailCall = isEligibleForTailCallOptimization(CCInfo&: ArgCCInfo, CLI, MF, ArgLocs);
19174
19175	if (IsTailCall)
19176	++NumTailCalls;
19177	else if (CLI.CB && CLI.CB->isMustTailCall())
19178	report_fatal_error(reason: "failed to perform tail call elimination on a call "
19179	"site marked musttail");
19180
19181	// Get a count of how many bytes are to be pushed on the stack.
19182	unsigned NumBytes = ArgCCInfo.getStackSize();
19183
19184	// Create local copies for byval args
19185	SmallVector<SDValue, `8`> ByValArgs;
19186	for (unsigned i = `0`, e = Outs.size(); i != e; ++i) {
19187	ISD::ArgFlagsTy Flags = Outs [i].Flags;
19188	if (!Flags.isByVal())
19189	continue;
19190
19191	SDValue Arg = OutVals [i];
19192	unsigned Size = Flags.getByValSize();
19193	Align Alignment = Flags.getNonZeroByValAlign();
19194
19195	int FI =
19196	MF.getFrameInfo().CreateStackObject(Size, Alignment, /isSS=/isSpillSlot: false);
19197	SDValue FIPtr = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
19198	SDValue SizeNode = DAG.getConstant(Val: Size, DL, VT: XLenVT);
19199
19200	Chain = DAG.getMemcpy(Chain, dl: DL, Dst: FIPtr, Src: Arg, Size: SizeNode, Alignment,
19201	/IsVolatile=/isVol: false,
19202	/AlwaysInline=/false, isTailCall: IsTailCall,
19203	DstPtrInfo: MachinePointerInfo (), SrcPtrInfo: MachinePointerInfo ());
19204	ByValArgs.push_back(Elt: FIPtr);
19205	}
19206
19207	if (!IsTailCall)
19208	Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: `0`, DL: CLI.DL);
19209
19210	// Copy argument values to their designated locations.
19211	SmallVector<std::pair<Register, SDValue>, `8`> RegsToPass;
19212	SmallVector<SDValue, `8`> MemOpChains;
19213	SDValue StackPtr;
19214	for (unsigned i = `0`, j = `0`, e = ArgLocs.size(), OutIdx = `0`; i != e;
19215	++i, ++OutIdx) {
19216	CCValAssign &VA = ArgLocs [i];
19217	SDValue ArgValue = OutVals [OutIdx];
19218	ISD::ArgFlagsTy Flags = Outs [OutIdx].Flags;
19219
19220	// Handle passing f64 on RV32D with a soft float ABI as a special case.
19221	if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19222	assert(VA.isRegLoc() && "Expected register VA assignment");
19223	assert(VA.needsCustom());
19224	SDValue SplitF64 = DAG.getNode(
19225	RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
19226	SDValue Lo = SplitF64.getValue(R: `0`);
19227	SDValue Hi = SplitF64.getValue(R: `1`);
19228
19229	Register RegLo = VA.getLocReg();
19230	RegsToPass.push_back(Elt: std::make_pair(x&: RegLo, y&: Lo));
19231
19232	// Get the CCValAssign for the Hi part.
19233	CCValAssign &HiVA = ArgLocs [++i];
19234
19235	if (HiVA.isMemLoc()) {
19236	// Second half of f64 is passed on the stack.
19237	if (!StackPtr.getNode())
19238	StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19239	SDValue Address =
19240	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr,
19241	N2: DAG.getIntPtrConstant(Val: HiVA.getLocMemOffset(), DL));
19242	// Emit the store.
19243	MemOpChains.push_back(
19244	Elt: DAG.getStore(Chain, dl: DL, Val: Hi, Ptr: Address, PtrInfo: MachinePointerInfo ()));
19245	} else {
19246	// Second half of f64 is passed in another GPR.
19247	Register RegHigh = HiVA.getLocReg();
19248	RegsToPass.push_back(Elt: std::make_pair(x&: RegHigh, y&: Hi));
19249	}
19250	continue;
19251	}
19252
19253	// Promote the value if needed.
19254	// For now, only handle fully promoted and indirect arguments.
19255	if (VA.getLocInfo() == CCValAssign::Indirect) {
19256	// Store the argument in a stack slot and pass its address.
19257	Align StackAlign =
19258	std::max(a: getPrefTypeAlign(VT: Outs [OutIdx].ArgVT, DAG),
19259	b: getPrefTypeAlign(VT: ArgValue.getValueType(), DAG));
19260	TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
19261	// If the original argument was split (e.g. i128), we need
19262	// to store the required parts of it here (and pass just one address).
19263	// Vectors may be partly split to registers and partly to the stack, in
19264	// which case the base address is partly offset and subsequent stores are
19265	// relative to that.
19266	unsigned ArgIndex = Outs [OutIdx].OrigArgIndex;
19267	unsigned ArgPartOffset = Outs [OutIdx].PartOffset;
19268	assert(VA.getValVT().isVector() \|\| ArgPartOffset == `0`);
19269	// Calculate the total size to store. We don't have access to what we're
19270	// actually storing other than performing the loop and collecting the
19271	// info.
19272	SmallVector<std::pair<SDValue, SDValue>> Parts;
19273	while (i + `1` != e && Outs [OutIdx + `1`].OrigArgIndex == ArgIndex) {
19274	SDValue PartValue = OutVals [OutIdx + `1`];
19275	unsigned PartOffset = Outs [OutIdx + `1`].PartOffset - ArgPartOffset;
19276	SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL);
19277	EVT PartVT = PartValue.getValueType();
19278	if (PartVT.isScalableVector())
19279	Offset = DAG.getNode(Opcode: ISD::VSCALE, DL, VT: XLenVT, Operand: Offset);
19280	StoredSize += PartVT.getStoreSize();
19281	StackAlign = std::max(a: StackAlign, b: getPrefTypeAlign(VT: PartVT, DAG));
19282	Parts.push_back(Elt: std::make_pair(x&: PartValue, y&: Offset));
19283	++i;
19284	++OutIdx;
19285	}
19286	SDValue SpillSlot = DAG.CreateStackTemporary(Bytes: StoredSize, Alignment: StackAlign);
19287	int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex();
19288	MemOpChains.push_back(
19289	Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: SpillSlot,
19290	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
19291	for (const auto &Part : Parts) {
19292	SDValue PartValue = Part.first;
19293	SDValue PartOffset = Part.second;
19294	SDValue Address =
19295	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: SpillSlot, N2: PartOffset);
19296	MemOpChains.push_back(
19297	Elt: DAG.getStore(Chain, dl: DL, Val: PartValue, Ptr: Address,
19298	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
19299	}
19300	ArgValue = SpillSlot;
19301	} else {
19302	ArgValue = convertValVTToLocVT(DAG, Val: ArgValue, VA, DL, Subtarget);
19303	}
19304
19305	// Use local copy if it is a byval arg.
19306	if (Flags.isByVal())
19307	ArgValue = ByValArgs [j++];
19308
19309	if (VA.isRegLoc()) {
19310	// Queue up the argument copies and emit them at the end.
19311	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgValue));
19312	} else {
19313	assert(VA.isMemLoc() && "Argument not register or memory");
19314	assert(!IsTailCall && "Tail call not allowed if stack is used "
19315	"for passing parameters");
19316
19317	// Work out the address of the stack slot.
19318	if (!StackPtr.getNode())
19319	StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19320	SDValue Address =
19321	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr,
19322	N2: DAG.getIntPtrConstant(Val: VA.getLocMemOffset(), DL));
19323
19324	// Emit the store.
19325	MemOpChains.push_back(
19326	Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: Address, PtrInfo: MachinePointerInfo ()));
19327	}
19328	}
19329
19330	// Join the stores, which are independent of one another.
19331	if (!MemOpChains.empty())
19332	Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
19333
19334	SDValue Glue;
19335
19336	// Build a sequence of copy-to-reg nodes, chained and glued together.
19337	for (auto &Reg : RegsToPass) {
19338	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: Reg.first, N: Reg.second, Glue);
19339	Glue = Chain.getValue(R: `1`);
19340	}
19341
19342	// Validate that none of the argument registers have been marked as
19343	// reserved, if so report an error. Do the same for the return address if this
19344	// is not a tailcall.
19345	validateCCReservedRegs(Regs: RegsToPass, MF);
19346	if (!IsTailCall &&
19347	MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
19348	MF.getFunction().getContext().diagnose(DI: DiagnosticInfoUnsupported {
19349	MF.getFunction(),
19350	"Return address register required, but has been reserved."});
19351
19352	// If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
19353	// TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
19354	// split it and then direct call can be matched by PseudoCALL.
19355	if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) {
19356	const GlobalValue *GV = S->getGlobal();
19357	Callee = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: `0`, TargetFlags: RISCVII::MO_CALL);
19358	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
19359	Callee = DAG.getTargetExternalSymbol(Sym: S->getSymbol(), VT: PtrVT, TargetFlags: RISCVII::MO_CALL);
19360	}
19361
19362	// The first call operand is the chain and the second is the target address.
19363	SmallVector<SDValue, `8`> Ops;
19364	Ops.push_back(Elt: Chain);
19365	Ops.push_back(Elt: Callee);
19366
19367	// Add argument registers to the end of the list so that they are
19368	// known live into the call.
19369	for (auto &Reg : RegsToPass)
19370	Ops.push_back(Elt: DAG.getRegister(Reg: Reg.first, VT: Reg.second.getValueType()));
19371
19372	if (!IsTailCall) {
19373	// Add a register mask operand representing the call-preserved registers.
19374	const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
19375	const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
19376	assert(Mask && "Missing call preserved mask for calling convention");
19377	Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
19378	}
19379
19380	// Glue the call to the argument copies, if any.
19381	if (Glue.getNode())
19382	Ops.push_back(Elt: Glue);
19383
19384	assert((!CLI.CFIType \|\| CLI.CB->isIndirectCall()) &&
19385	"Unexpected CFI type for a direct call");
19386
19387	// Emit the call.
19388	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
19389
19390	if (IsTailCall) {
19391	MF.getFrameInfo().setHasTailCall();
19392	SDValue Ret = DAG.getNode(Opcode: RISCVISD::TAIL, DL, VTList: NodeTys, Ops);
19393	if (CLI.CFIType)
19394	Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19395	DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge);
19396	return Ret;
19397	}
19398
19399	Chain = DAG.getNode(Opcode: RISCVISD::CALL, DL, VTList: NodeTys, Ops);
19400	if (CLI.CFIType)
19401	Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19402	DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge);
19403	Glue = Chain.getValue(R: `1`);
19404
19405	// Mark the end of the call, which is glued to the call itself.
19406	Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: `0`, Glue, DL);
19407	Glue = Chain.getValue(R: `1`);
19408
19409	// Assign locations to each value returned by this call.
19410	SmallVector<CCValAssign, `16`> RVLocs;
19411	CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
19412	analyzeInputArgs(MF, CCInfo&: RetCCInfo, Ins, /IsRet=/true, Fn: RISCV::CC_RISCV);
19413
19414	// Copy all of the result registers out of their specified physreg.
19415	for (unsigned i = `0`, e = RVLocs.size(); i != e; ++i) {
19416	auto &VA = RVLocs [i];
19417	// Copy the value out
19418	SDValue RetValue =
19419	DAG.getCopyFromReg(Chain, dl: DL, Reg: VA.getLocReg(), VT: VA.getLocVT(), Glue);
19420	// Glue the RetValue to the end of the call sequence
19421	Chain = RetValue.getValue(R: `1`);
19422	Glue = RetValue.getValue(R: `2`);
19423
19424	if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19425	assert(VA.needsCustom());
19426	SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
19427	MVT::i32, Glue);
19428	Chain = RetValue2.getValue(R: `1`);
19429	Glue = RetValue2.getValue(R: `2`);
19430	RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
19431	RetValue2);
19432	}
19433
19434	RetValue = convertLocVTToValVT(DAG, Val: RetValue, VA, DL, Subtarget);
19435
19436	InVals.push_back(Elt: RetValue);
19437	}
19438
19439	return Chain;
19440	}
19441
19442	bool RISCVTargetLowering::CanLowerReturn(
19443	CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
19444	const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
19445	SmallVector<CCValAssign, `16`> RVLocs;
19446	CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
19447
19448	RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(Outs)};
19449
19450	for (unsigned i = `0`, e = Outs.size(); i != e; ++i) {
19451	MVT VT = Outs [i].VT;
19452	ISD::ArgFlagsTy ArgFlags = Outs [i].Flags;
19453	RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
19454	if (RISCV::CC_RISCV(DL: MF.getDataLayout(), ABI, ValNo: i, ValVT: VT, LocVT: VT, LocInfo: CCValAssign::Full,
19455	ArgFlags, State&: CCInfo, /IsFixed=/true, /IsRet=/true,
19456	OrigTy: nullptr, TLI: *this, RVVDispatcher&: Dispatcher))
19457	return false;
19458	}
19459	return true;
19460	}
19461
19462	SDValue
19463	RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
19464	bool IsVarArg,
19465	const SmallVectorImpl<ISD::OutputArg> &Outs,
19466	const SmallVectorImpl<SDValue> &OutVals,
19467	const SDLoc &DL, SelectionDAG &DAG) const {
19468	MachineFunction &MF = DAG.getMachineFunction();
19469	const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19470
19471	// Stores the assignment of the return value to a location.
19472	SmallVector<CCValAssign, `16`> RVLocs;
19473
19474	// Info about the registers and stack slot.
19475	CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
19476	*DAG.getContext());
19477
19478	analyzeOutputArgs(MF&: DAG.getMachineFunction(), CCInfo, Outs, /IsRet=/true,
19479	CLI: nullptr, Fn: RISCV::CC_RISCV);
19480
19481	if (CallConv == CallingConv::GHC && !RVLocs.empty())
19482	report_fatal_error(reason: "GHC functions return void only");
19483
19484	SDValue Glue;
19485	SmallVector<SDValue, `4`> RetOps(`1`, Chain);
19486
19487	// Copy the result values into the output registers.
19488	for (unsigned i = `0`, e = RVLocs.size(), OutIdx = `0`; i < e; ++i, ++OutIdx) {
19489	SDValue Val = OutVals [OutIdx];
19490	CCValAssign &VA = RVLocs [i];
19491	assert(VA.isRegLoc() && "Can only return in registers!");
19492
19493	if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19494	// Handle returning f64 on RV32D with a soft float ABI.
19495	assert(VA.isRegLoc() && "Expected return via registers");
19496	assert(VA.needsCustom());
19497	SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
19498	DAG.getVTList(MVT::i32, MVT::i32), Val);
19499	SDValue Lo = SplitF64.getValue(R: `0`);
19500	SDValue Hi = SplitF64.getValue(R: `1`);
19501	Register RegLo = VA.getLocReg();
19502	Register RegHi = RVLocs [++i].getLocReg();
19503
19504	if (STI.isRegisterReservedByUser(i: RegLo) \|\|
19505	STI.isRegisterReservedByUser(i: RegHi))
19506	MF.getFunction().getContext().diagnose(DI: DiagnosticInfoUnsupported {
19507	MF.getFunction(),
19508	"Return value register required, but has been reserved."});
19509
19510	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegLo, N: Lo, Glue);
19511	Glue = Chain.getValue(R: `1`);
19512	RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
19513	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegHi, N: Hi, Glue);
19514	Glue = Chain.getValue(R: `1`);
19515	RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
19516	} else {
19517	// Handle a 'normal' return.
19518	Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
19519	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: Val, Glue);
19520
19521	if (STI.isRegisterReservedByUser(i: VA.getLocReg()))
19522	MF.getFunction().getContext().diagnose(DI: DiagnosticInfoUnsupported {
19523	MF.getFunction(),
19524	"Return value register required, but has been reserved."});
19525
19526	// Guarantee that all emitted copies are stuck together.
19527	Glue = Chain.getValue(R: `1`);
19528	RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
19529	}
19530	}
19531
19532	RetOps [`0`] = Chain; // Update chain.
19533
19534	// Add the glue node if we have it.
19535	if (Glue.getNode()) {
19536	RetOps.push_back(Elt: Glue);
19537	}
19538
19539	if (any_of(Range&: RVLocs,
19540	P: [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19541	MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19542
19543	unsigned RetOpc = RISCVISD::RET_GLUE;
19544	// Interrupt service routines use different return instructions.
19545	const Function &Func = DAG.getMachineFunction().getFunction();
19546	if (Func.hasFnAttribute(Kind: "interrupt")) {
19547	if (!Func.getReturnType()->isVoidTy())
19548	report_fatal_error(
19549	reason: "Functions with the interrupt attribute must have void return type!");
19550
19551	MachineFunction &MF = DAG.getMachineFunction();
19552	StringRef Kind =
19553	MF.getFunction().getFnAttribute(Kind: "interrupt").getValueAsString();
19554
19555	if (Kind == "supervisor")
19556	RetOpc = RISCVISD::SRET_GLUE;
19557	else
19558	RetOpc = RISCVISD::MRET_GLUE;
19559	}
19560
19561	return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
19562	}
19563
19564	void RISCVTargetLowering::validateCCReservedRegs(
19565	const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
19566	MachineFunction &MF) const {
19567	const Function &F = MF.getFunction();
19568	const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19569
19570	if (llvm::any_of(Range: Regs, P: [&STI](auto Reg) {
19571	return STI.isRegisterReservedByUser(i: Reg.first);
19572	}))
19573	F.getContext().diagnose(DI: DiagnosticInfoUnsupported {
19574	F, "Argument register required, but has been reserved."});
19575	}
19576
19577	// Check if the result of the node is only used as a return value, as
19578	// otherwise we can't perform a tail-call.
19579	bool RISCVTargetLowering::isUsedByReturnOnly(SDNode N, SDValue &Chain) const* {
19580	if (N->getNumValues() != `1`)
19581	return false;
19582	if (!N->hasNUsesOfValue(NUses: `1`, Value: `0`))
19583	return false;
19584
19585	SDNode Copy = N->use_begin();
19586
19587	if (Copy->getOpcode() == ISD::BITCAST) {
19588	return isUsedByReturnOnly(N: Copy, Chain);
19589	}
19590
19591	// TODO: Handle additional opcodes in order to support tail-calling libcalls
19592	// with soft float ABIs.
19593	if (Copy->getOpcode() != ISD::CopyToReg) {
19594	return false;
19595	}
19596
19597	// If the ISD::CopyToReg has a glue operand, we conservatively assume it
19598	// isn't safe to perform a tail call.
19599	if (Copy->getOperand(Copy->getNumOperands() - `1`).getValueType() == MVT::Glue)
19600	return false;
19601
19602	// The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
19603	bool HasRet = false;
19604	for (SDNode *Node : Copy->uses()) {
19605	if (Node->getOpcode() != RISCVISD::RET_GLUE)
19606	return false;
19607	HasRet = true;
19608	}
19609	if (!HasRet)
19610	return false;
19611
19612	Chain = Copy->getOperand(Num: `0`);
19613	return true;
19614	}
19615
19616	bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst CI) const* {
19617	return CI->isTailCall();
19618	}
19619
19620	const char RISCVTargetLowering::getTargetNodeName(unsigned* Opcode) const {
19621	#define NODE_NAME_CASE(NODE) \
19622	case RISCVISD::NODE: \
19623	return "RISCVISD::" #NODE;
19624	// clang-format off
19625	switch ((RISCVISD::NodeType)Opcode) {
19626	case RISCVISD::FIRST_NUMBER:
19627	break;
19628	NODE_NAME_CASE(RET_GLUE)
19629	NODE_NAME_CASE(SRET_GLUE)
19630	NODE_NAME_CASE(MRET_GLUE)
19631	NODE_NAME_CASE(CALL)
19632	NODE_NAME_CASE(SELECT_CC)
19633	NODE_NAME_CASE(BR_CC)
19634	NODE_NAME_CASE(BuildPairF64)
19635	NODE_NAME_CASE(SplitF64)
19636	NODE_NAME_CASE(TAIL)
19637	NODE_NAME_CASE(ADD_LO)
19638	NODE_NAME_CASE(HI)
19639	NODE_NAME_CASE(LLA)
19640	NODE_NAME_CASE(ADD_TPREL)
19641	NODE_NAME_CASE(MULHSU)
19642	NODE_NAME_CASE(SHL_ADD)
19643	NODE_NAME_CASE(SLLW)
19644	NODE_NAME_CASE(SRAW)
19645	NODE_NAME_CASE(SRLW)
19646	NODE_NAME_CASE(DIVW)
19647	NODE_NAME_CASE(DIVUW)
19648	NODE_NAME_CASE(REMUW)
19649	NODE_NAME_CASE(ROLW)
19650	NODE_NAME_CASE(RORW)
19651	NODE_NAME_CASE(CLZW)
19652	NODE_NAME_CASE(CTZW)
19653	NODE_NAME_CASE(ABSW)
19654	NODE_NAME_CASE(FMV_H_X)
19655	NODE_NAME_CASE(FMV_X_ANYEXTH)
19656	NODE_NAME_CASE(FMV_X_SIGNEXTH)
19657	NODE_NAME_CASE(FMV_W_X_RV64)
19658	NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
19659	NODE_NAME_CASE(FCVT_X)
19660	NODE_NAME_CASE(FCVT_XU)
19661	NODE_NAME_CASE(FCVT_W_RV64)
19662	NODE_NAME_CASE(FCVT_WU_RV64)
19663	NODE_NAME_CASE(STRICT_FCVT_W_RV64)
19664	NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
19665	NODE_NAME_CASE(FP_ROUND_BF16)
19666	NODE_NAME_CASE(FP_EXTEND_BF16)
19667	NODE_NAME_CASE(FROUND)
19668	NODE_NAME_CASE(FCLASS)
19669	NODE_NAME_CASE(FMAX)
19670	NODE_NAME_CASE(FMIN)
19671	NODE_NAME_CASE(READ_COUNTER_WIDE)
19672	NODE_NAME_CASE(BREV8)
19673	NODE_NAME_CASE(ORC_B)
19674	NODE_NAME_CASE(ZIP)
19675	NODE_NAME_CASE(UNZIP)
19676	NODE_NAME_CASE(CLMUL)
19677	NODE_NAME_CASE(CLMULH)
19678	NODE_NAME_CASE(CLMULR)
19679	NODE_NAME_CASE(MOPR)
19680	NODE_NAME_CASE(MOPRR)
19681	NODE_NAME_CASE(SHA256SIG0)
19682	NODE_NAME_CASE(SHA256SIG1)
19683	NODE_NAME_CASE(SHA256SUM0)
19684	NODE_NAME_CASE(SHA256SUM1)
19685	NODE_NAME_CASE(SM4KS)
19686	NODE_NAME_CASE(SM4ED)
19687	NODE_NAME_CASE(SM3P0)
19688	NODE_NAME_CASE(SM3P1)
19689	NODE_NAME_CASE(TH_LWD)
19690	NODE_NAME_CASE(TH_LWUD)
19691	NODE_NAME_CASE(TH_LDD)
19692	NODE_NAME_CASE(TH_SWD)
19693	NODE_NAME_CASE(TH_SDD)
19694	NODE_NAME_CASE(VMV_V_V_VL)
19695	NODE_NAME_CASE(VMV_V_X_VL)
19696	NODE_NAME_CASE(VFMV_V_F_VL)
19697	NODE_NAME_CASE(VMV_X_S)
19698	NODE_NAME_CASE(VMV_S_X_VL)
19699	NODE_NAME_CASE(VFMV_S_F_VL)
19700	NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
19701	NODE_NAME_CASE(READ_VLENB)
19702	NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
19703	NODE_NAME_CASE(VSLIDEUP_VL)
19704	NODE_NAME_CASE(VSLIDE1UP_VL)
19705	NODE_NAME_CASE(VSLIDEDOWN_VL)
19706	NODE_NAME_CASE(VSLIDE1DOWN_VL)
19707	NODE_NAME_CASE(VFSLIDE1UP_VL)
19708	NODE_NAME_CASE(VFSLIDE1DOWN_VL)
19709	NODE_NAME_CASE(VID_VL)
19710	NODE_NAME_CASE(VFNCVT_ROD_VL)
19711	NODE_NAME_CASE(VECREDUCE_ADD_VL)
19712	NODE_NAME_CASE(VECREDUCE_UMAX_VL)
19713	NODE_NAME_CASE(VECREDUCE_SMAX_VL)
19714	NODE_NAME_CASE(VECREDUCE_UMIN_VL)
19715	NODE_NAME_CASE(VECREDUCE_SMIN_VL)
19716	NODE_NAME_CASE(VECREDUCE_AND_VL)
19717	NODE_NAME_CASE(VECREDUCE_OR_VL)
19718	NODE_NAME_CASE(VECREDUCE_XOR_VL)
19719	NODE_NAME_CASE(VECREDUCE_FADD_VL)
19720	NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
19721	NODE_NAME_CASE(VECREDUCE_FMIN_VL)
19722	NODE_NAME_CASE(VECREDUCE_FMAX_VL)
19723	NODE_NAME_CASE(ADD_VL)
19724	NODE_NAME_CASE(AND_VL)
19725	NODE_NAME_CASE(MUL_VL)
19726	NODE_NAME_CASE(OR_VL)
19727	NODE_NAME_CASE(SDIV_VL)
19728	NODE_NAME_CASE(SHL_VL)
19729	NODE_NAME_CASE(SREM_VL)
19730	NODE_NAME_CASE(SRA_VL)
19731	NODE_NAME_CASE(SRL_VL)
19732	NODE_NAME_CASE(ROTL_VL)
19733	NODE_NAME_CASE(ROTR_VL)
19734	NODE_NAME_CASE(SUB_VL)
19735	NODE_NAME_CASE(UDIV_VL)
19736	NODE_NAME_CASE(UREM_VL)
19737	NODE_NAME_CASE(XOR_VL)
19738	NODE_NAME_CASE(AVGFLOORU_VL)
19739	NODE_NAME_CASE(AVGCEILU_VL)
19740	NODE_NAME_CASE(SADDSAT_VL)
19741	NODE_NAME_CASE(UADDSAT_VL)
19742	NODE_NAME_CASE(SSUBSAT_VL)
19743	NODE_NAME_CASE(USUBSAT_VL)
19744	NODE_NAME_CASE(FADD_VL)
19745	NODE_NAME_CASE(FSUB_VL)
19746	NODE_NAME_CASE(FMUL_VL)
19747	NODE_NAME_CASE(FDIV_VL)
19748	NODE_NAME_CASE(FNEG_VL)
19749	NODE_NAME_CASE(FABS_VL)
19750	NODE_NAME_CASE(FSQRT_VL)
19751	NODE_NAME_CASE(FCLASS_VL)
19752	NODE_NAME_CASE(VFMADD_VL)
19753	NODE_NAME_CASE(VFNMADD_VL)
19754	NODE_NAME_CASE(VFMSUB_VL)
19755	NODE_NAME_CASE(VFNMSUB_VL)
19756	NODE_NAME_CASE(VFWMADD_VL)
19757	NODE_NAME_CASE(VFWNMADD_VL)
19758	NODE_NAME_CASE(VFWMSUB_VL)
19759	NODE_NAME_CASE(VFWNMSUB_VL)
19760	NODE_NAME_CASE(FCOPYSIGN_VL)
19761	NODE_NAME_CASE(SMIN_VL)
19762	NODE_NAME_CASE(SMAX_VL)
19763	NODE_NAME_CASE(UMIN_VL)
19764	NODE_NAME_CASE(UMAX_VL)
19765	NODE_NAME_CASE(BITREVERSE_VL)
19766	NODE_NAME_CASE(BSWAP_VL)
19767	NODE_NAME_CASE(CTLZ_VL)
19768	NODE_NAME_CASE(CTTZ_VL)
19769	NODE_NAME_CASE(CTPOP_VL)
19770	NODE_NAME_CASE(VFMIN_VL)
19771	NODE_NAME_CASE(VFMAX_VL)
19772	NODE_NAME_CASE(MULHS_VL)
19773	NODE_NAME_CASE(MULHU_VL)
19774	NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
19775	NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
19776	NODE_NAME_CASE(VFCVT_RM_X_F_VL)
19777	NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
19778	NODE_NAME_CASE(VFCVT_X_F_VL)
19779	NODE_NAME_CASE(VFCVT_XU_F_VL)
19780	NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
19781	NODE_NAME_CASE(SINT_TO_FP_VL)
19782	NODE_NAME_CASE(UINT_TO_FP_VL)
19783	NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
19784	NODE_NAME_CASE(VFCVT_RM_F_X_VL)
19785	NODE_NAME_CASE(FP_EXTEND_VL)
19786	NODE_NAME_CASE(FP_ROUND_VL)
19787	NODE_NAME_CASE(STRICT_FADD_VL)
19788	NODE_NAME_CASE(STRICT_FSUB_VL)
19789	NODE_NAME_CASE(STRICT_FMUL_VL)
19790	NODE_NAME_CASE(STRICT_FDIV_VL)
19791	NODE_NAME_CASE(STRICT_FSQRT_VL)
19792	NODE_NAME_CASE(STRICT_VFMADD_VL)
19793	NODE_NAME_CASE(STRICT_VFNMADD_VL)
19794	NODE_NAME_CASE(STRICT_VFMSUB_VL)
19795	NODE_NAME_CASE(STRICT_VFNMSUB_VL)
19796	NODE_NAME_CASE(STRICT_FP_ROUND_VL)
19797	NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
19798	NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
19799	NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
19800	NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
19801	NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
19802	NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
19803	NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
19804	NODE_NAME_CASE(STRICT_FSETCC_VL)
19805	NODE_NAME_CASE(STRICT_FSETCCS_VL)
19806	NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
19807	NODE_NAME_CASE(VWMUL_VL)
19808	NODE_NAME_CASE(VWMULU_VL)
19809	NODE_NAME_CASE(VWMULSU_VL)
19810	NODE_NAME_CASE(VWADD_VL)
19811	NODE_NAME_CASE(VWADDU_VL)
19812	NODE_NAME_CASE(VWSUB_VL)
19813	NODE_NAME_CASE(VWSUBU_VL)
19814	NODE_NAME_CASE(VWADD_W_VL)
19815	NODE_NAME_CASE(VWADDU_W_VL)
19816	NODE_NAME_CASE(VWSUB_W_VL)
19817	NODE_NAME_CASE(VWSUBU_W_VL)
19818	NODE_NAME_CASE(VWSLL_VL)
19819	NODE_NAME_CASE(VFWMUL_VL)
19820	NODE_NAME_CASE(VFWADD_VL)
19821	NODE_NAME_CASE(VFWSUB_VL)
19822	NODE_NAME_CASE(VFWADD_W_VL)
19823	NODE_NAME_CASE(VFWSUB_W_VL)
19824	NODE_NAME_CASE(VWMACC_VL)
19825	NODE_NAME_CASE(VWMACCU_VL)
19826	NODE_NAME_CASE(VWMACCSU_VL)
19827	NODE_NAME_CASE(VNSRL_VL)
19828	NODE_NAME_CASE(SETCC_VL)
19829	NODE_NAME_CASE(VMERGE_VL)
19830	NODE_NAME_CASE(VMAND_VL)
19831	NODE_NAME_CASE(VMOR_VL)
19832	NODE_NAME_CASE(VMXOR_VL)
19833	NODE_NAME_CASE(VMCLR_VL)
19834	NODE_NAME_CASE(VMSET_VL)
19835	NODE_NAME_CASE(VRGATHER_VX_VL)
19836	NODE_NAME_CASE(VRGATHER_VV_VL)
19837	NODE_NAME_CASE(VRGATHEREI16_VV_VL)
19838	NODE_NAME_CASE(VSEXT_VL)
19839	NODE_NAME_CASE(VZEXT_VL)
19840	NODE_NAME_CASE(VCPOP_VL)
19841	NODE_NAME_CASE(VFIRST_VL)
19842	NODE_NAME_CASE(READ_CSR)
19843	NODE_NAME_CASE(WRITE_CSR)
19844	NODE_NAME_CASE(SWAP_CSR)
19845	NODE_NAME_CASE(CZERO_EQZ)
19846	NODE_NAME_CASE(CZERO_NEZ)
19847	NODE_NAME_CASE(SF_VC_XV_SE)
19848	NODE_NAME_CASE(SF_VC_IV_SE)
19849	NODE_NAME_CASE(SF_VC_VV_SE)
19850	NODE_NAME_CASE(SF_VC_FV_SE)
19851	NODE_NAME_CASE(SF_VC_XVV_SE)
19852	NODE_NAME_CASE(SF_VC_IVV_SE)
19853	NODE_NAME_CASE(SF_VC_VVV_SE)
19854	NODE_NAME_CASE(SF_VC_FVV_SE)
19855	NODE_NAME_CASE(SF_VC_XVW_SE)
19856	NODE_NAME_CASE(SF_VC_IVW_SE)
19857	NODE_NAME_CASE(SF_VC_VVW_SE)
19858	NODE_NAME_CASE(SF_VC_FVW_SE)
19859	NODE_NAME_CASE(SF_VC_V_X_SE)
19860	NODE_NAME_CASE(SF_VC_V_I_SE)
19861	NODE_NAME_CASE(SF_VC_V_XV_SE)
19862	NODE_NAME_CASE(SF_VC_V_IV_SE)
19863	NODE_NAME_CASE(SF_VC_V_VV_SE)
19864	NODE_NAME_CASE(SF_VC_V_FV_SE)
19865	NODE_NAME_CASE(SF_VC_V_XVV_SE)
19866	NODE_NAME_CASE(SF_VC_V_IVV_SE)
19867	NODE_NAME_CASE(SF_VC_V_VVV_SE)
19868	NODE_NAME_CASE(SF_VC_V_FVV_SE)
19869	NODE_NAME_CASE(SF_VC_V_XVW_SE)
19870	NODE_NAME_CASE(SF_VC_V_IVW_SE)
19871	NODE_NAME_CASE(SF_VC_V_VVW_SE)
19872	NODE_NAME_CASE(SF_VC_V_FVW_SE)
19873	}
19874	// clang-format on
19875	return nullptr;
19876	#undef NODE_NAME_CASE
19877	}
19878
19879	/// getConstraintType - Given a constraint letter, return the type of
19880	/// constraint it is for this target.
19881	RISCVTargetLowering::ConstraintType
19882	RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
19883	if (Constraint.size() == `1`) {
19884	switch (Constraint [`0`]) {
19885	default:
19886	break;
19887	case `'f'`:
19888	return C_RegisterClass;
19889	case `'I'`:
19890	case `'J'`:
19891	case `'K'`:
19892	return C_Immediate;
19893	case `'A'`:
19894	return C_Memory;
19895	case `'s'`:
19896	case `'S'`: // A symbolic address
19897	return C_Other;
19898	}
19899	} else {
19900	if (Constraint == "vr" \|\| Constraint == "vm")
19901	return C_RegisterClass;
19902	}
19903	return TargetLowering::getConstraintType(Constraint);
19904	}
19905
19906	std::pair<unsigned, const TargetRegisterClass *>
19907	RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
19908	StringRef Constraint,
19909	MVT VT) const {
19910	// First, see if this is a constraint that directly corresponds to a RISC-V
19911	// register class.
19912	if (Constraint.size() == `1`) {
19913	switch (Constraint [`0`]) {
19914	case `'r'`:
19915	// TODO: Support fixed vectors up to XLen for P extension?
19916	if (VT.isVector())
19917	break;
19918	if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
19919	return std::make_pair(`0U`, &RISCV::GPRF16RegClass);
19920	if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
19921	return std::make_pair(`0U`, &RISCV::GPRF32RegClass);
19922	if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
19923	return std::make_pair(`0U`, &RISCV::GPRPairRegClass);
19924	return std::make_pair(`0U`, &RISCV::GPRNoX0RegClass);
19925	case `'f'`:
19926	if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16)
19927	return std::make_pair(`0U`, &RISCV::FPR16RegClass);
19928	if (Subtarget.hasStdExtF() && VT == MVT::f32)
19929	return std::make_pair(`0U`, &RISCV::FPR32RegClass);
19930	if (Subtarget.hasStdExtD() && VT == MVT::f64)
19931	return std::make_pair(`0U`, &RISCV::FPR64RegClass);
19932	break;
19933	default:
19934	break;
19935	}
19936	} else if (Constraint == "vr") {
19937	for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
19938	&RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
19939	if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
19940	return std::make_pair(`0U`, RC);
19941	}
19942	} else if (Constraint == "vm") {
19943	if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
19944	return std::make_pair(`0U`, &RISCV::VMV0RegClass);
19945	}
19946
19947	// Clang will correctly decode the usage of register name aliases into their
19948	// official names. However, other frontends like `rustc` do not. This allows
19949	// users of these frontends to use the ABI names for registers in LLVM-style
19950	// register constraints.
19951	unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
19952	.Case("{zero}", RISCV::X0)
19953	.Case("{ra}", RISCV::X1)
19954	.Case("{sp}", RISCV::X2)
19955	.Case("{gp}", RISCV::X3)
19956	.Case("{tp}", RISCV::X4)
19957	.Case("{t0}", RISCV::X5)
19958	.Case("{t1}", RISCV::X6)
19959	.Case("{t2}", RISCV::X7)
19960	.Cases("{s0}", "{fp}", RISCV::X8)
19961	.Case("{s1}", RISCV::X9)
19962	.Case("{a0}", RISCV::X10)
19963	.Case("{a1}", RISCV::X11)
19964	.Case("{a2}", RISCV::X12)
19965	.Case("{a3}", RISCV::X13)
19966	.Case("{a4}", RISCV::X14)
19967	.Case("{a5}", RISCV::X15)
19968	.Case("{a6}", RISCV::X16)
19969	.Case("{a7}", RISCV::X17)
19970	.Case("{s2}", RISCV::X18)
19971	.Case("{s3}", RISCV::X19)
19972	.Case("{s4}", RISCV::X20)
19973	.Case("{s5}", RISCV::X21)
19974	.Case("{s6}", RISCV::X22)
19975	.Case("{s7}", RISCV::X23)
19976	.Case("{s8}", RISCV::X24)
19977	.Case("{s9}", RISCV::X25)
19978	.Case("{s10}", RISCV::X26)
19979	.Case("{s11}", RISCV::X27)
19980	.Case("{t3}", RISCV::X28)
19981	.Case("{t4}", RISCV::X29)
19982	.Case("{t5}", RISCV::X30)
19983	.Case("{t6}", RISCV::X31)
19984	.Default(RISCV::NoRegister);
19985	if (XRegFromAlias != RISCV::NoRegister)
19986	return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
19987
19988	// Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
19989	// TableGen record rather than the AsmName to choose registers for InlineAsm
19990	// constraints, plus we want to match those names to the widest floating point
19991	// register type available, manually select floating point registers here.
19992	//
19993	// The second case is the ABI name of the register, so that frontends can also
19994	// use the ABI names in register constraint lists.
19995	if (Subtarget.hasStdExtF()) {
19996	unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
19997	.Cases("{f0}", "{ft0}", RISCV::F0_F)
19998	.Cases("{f1}", "{ft1}", RISCV::F1_F)
19999	.Cases("{f2}", "{ft2}", RISCV::F2_F)
20000	.Cases("{f3}", "{ft3}", RISCV::F3_F)
20001	.Cases("{f4}", "{ft4}", RISCV::F4_F)
20002	.Cases("{f5}", "{ft5}", RISCV::F5_F)
20003	.Cases("{f6}", "{ft6}", RISCV::F6_F)
20004	.Cases("{f7}", "{ft7}", RISCV::F7_F)
20005	.Cases("{f8}", "{fs0}", RISCV::F8_F)
20006	.Cases("{f9}", "{fs1}", RISCV::F9_F)
20007	.Cases("{f10}", "{fa0}", RISCV::F10_F)
20008	.Cases("{f11}", "{fa1}", RISCV::F11_F)
20009	.Cases("{f12}", "{fa2}", RISCV::F12_F)
20010	.Cases("{f13}", "{fa3}", RISCV::F13_F)
20011	.Cases("{f14}", "{fa4}", RISCV::F14_F)
20012	.Cases("{f15}", "{fa5}", RISCV::F15_F)
20013	.Cases("{f16}", "{fa6}", RISCV::F16_F)
20014	.Cases("{f17}", "{fa7}", RISCV::F17_F)
20015	.Cases("{f18}", "{fs2}", RISCV::F18_F)
20016	.Cases("{f19}", "{fs3}", RISCV::F19_F)
20017	.Cases("{f20}", "{fs4}", RISCV::F20_F)
20018	.Cases("{f21}", "{fs5}", RISCV::F21_F)
20019	.Cases("{f22}", "{fs6}", RISCV::F22_F)
20020	.Cases("{f23}", "{fs7}", RISCV::F23_F)
20021	.Cases("{f24}", "{fs8}", RISCV::F24_F)
20022	.Cases("{f25}", "{fs9}", RISCV::F25_F)
20023	.Cases("{f26}", "{fs10}", RISCV::F26_F)
20024	.Cases("{f27}", "{fs11}", RISCV::F27_F)
20025	.Cases("{f28}", "{ft8}", RISCV::F28_F)
20026	.Cases("{f29}", "{ft9}", RISCV::F29_F)
20027	.Cases("{f30}", "{ft10}", RISCV::F30_F)
20028	.Cases("{f31}", "{ft11}", RISCV::F31_F)
20029	.Default(RISCV::NoRegister);
20030	if (FReg != RISCV::NoRegister) {
20031	assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
20032	if (Subtarget.hasStdExtD() && (VT == MVT::f64 \|\| VT == MVT::Other)) {
20033	unsigned RegNo = FReg - RISCV::F0_F;
20034	unsigned DReg = RISCV::F0_D + RegNo;
20035	return std::make_pair(DReg, &RISCV::FPR64RegClass);
20036	}
20037	if (VT == MVT::f32 \|\| VT == MVT::Other)
20038	return std::make_pair(FReg, &RISCV::FPR32RegClass);
20039	if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
20040	unsigned RegNo = FReg - RISCV::F0_F;
20041	unsigned HReg = RISCV::F0_H + RegNo;
20042	return std::make_pair(HReg, &RISCV::FPR16RegClass);
20043	}
20044	}
20045	}
20046
20047	if (Subtarget.hasVInstructions()) {
20048	Register VReg = StringSwitch<Register>(Constraint.lower())
20049	.Case("{v0}", RISCV::V0)
20050	.Case("{v1}", RISCV::V1)
20051	.Case("{v2}", RISCV::V2)
20052	.Case("{v3}", RISCV::V3)
20053	.Case("{v4}", RISCV::V4)
20054	.Case("{v5}", RISCV::V5)
20055	.Case("{v6}", RISCV::V6)
20056	.Case("{v7}", RISCV::V7)
20057	.Case("{v8}", RISCV::V8)
20058	.Case("{v9}", RISCV::V9)
20059	.Case("{v10}", RISCV::V10)
20060	.Case("{v11}", RISCV::V11)
20061	.Case("{v12}", RISCV::V12)
20062	.Case("{v13}", RISCV::V13)
20063	.Case("{v14}", RISCV::V14)
20064	.Case("{v15}", RISCV::V15)
20065	.Case("{v16}", RISCV::V16)
20066	.Case("{v17}", RISCV::V17)
20067	.Case("{v18}", RISCV::V18)
20068	.Case("{v19}", RISCV::V19)
20069	.Case("{v20}", RISCV::V20)
20070	.Case("{v21}", RISCV::V21)
20071	.Case("{v22}", RISCV::V22)
20072	.Case("{v23}", RISCV::V23)
20073	.Case("{v24}", RISCV::V24)
20074	.Case("{v25}", RISCV::V25)
20075	.Case("{v26}", RISCV::V26)
20076	.Case("{v27}", RISCV::V27)
20077	.Case("{v28}", RISCV::V28)
20078	.Case("{v29}", RISCV::V29)
20079	.Case("{v30}", RISCV::V30)
20080	.Case("{v31}", RISCV::V31)
20081	.Default(RISCV::NoRegister);
20082	if (VReg != RISCV::NoRegister) {
20083	if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
20084	return std::make_pair(VReg, &RISCV::VMRegClass);
20085	if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
20086	return std::make_pair(VReg, &RISCV::VRRegClass);
20087	for (const auto *RC :
20088	{&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
20089	if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
20090	VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
20091	return std::make_pair(VReg, RC);
20092	}
20093	}
20094	}
20095	}
20096
20097	std::pair<Register, const TargetRegisterClass *> Res =
20098	TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
20099
20100	// If we picked one of the Zfinx register classes, remap it to the GPR class.
20101	// FIXME: When Zfinx is supported in CodeGen this will need to take the
20102	// Subtarget into account.
20103	if (Res.second == &RISCV::GPRF16RegClass \|\|
20104	Res.second == &RISCV::GPRF32RegClass \|\|
20105	Res.second == &RISCV::GPRPairRegClass)
20106	return std::make_pair(Res.first, &RISCV::GPRRegClass);
20107
20108	return Res;
20109	}
20110
20111	InlineAsm::ConstraintCode
20112	RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
20113	// Currently only support length 1 constraints.
20114	if (ConstraintCode.size() == `1`) {
20115	switch (ConstraintCode [`0`]) {
20116	case `'A'`:
20117	return InlineAsm::ConstraintCode::A;
20118	default:
20119	break;
20120	}
20121	}
20122
20123	return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
20124	}
20125
20126	void RISCVTargetLowering::LowerAsmOperandForConstraint(
20127	SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
20128	SelectionDAG &DAG) const {
20129	// Currently only support length 1 constraints.
20130	if (Constraint.size() == `1`) {
20131	switch (Constraint [`0`]) {
20132	case `'I'`:
20133	// Validate & create a 12-bit signed immediate operand.
20134	if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
20135	uint64_t CVal = C->getSExtValue();
20136	if (isInt<`12`>(x: CVal))
20137	Ops.push_back(
20138	x: DAG.getTargetConstant(Val: CVal, DL: SDLoc (Op), VT: Subtarget.getXLenVT()));
20139	}
20140	return;
20141	case `'J'`:
20142	// Validate & create an integer zero operand.
20143	if (isNullConstant(V: Op))
20144	Ops.push_back(
20145	x: DAG.getTargetConstant(Val: `0`, DL: SDLoc (Op), VT: Subtarget.getXLenVT()));
20146	return;
20147	case `'K'`:
20148	// Validate & create a 5-bit unsigned immediate operand.
20149	if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
20150	uint64_t CVal = C->getZExtValue();
20151	if (isUInt<`5`>(x: CVal))
20152	Ops.push_back(
20153	x: DAG.getTargetConstant(Val: CVal, DL: SDLoc (Op), VT: Subtarget.getXLenVT()));
20154	}
20155	return;
20156	case `'S'`:
20157	TargetLowering::LowerAsmOperandForConstraint(Op, Constraint: "s", Ops, DAG);
20158	return;
20159	default:
20160	break;
20161	}
20162	}
20163	TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20164	}
20165
20166	Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
20167	Instruction *Inst,
20168	AtomicOrdering Ord) const {
20169	if (Subtarget.hasStdExtZtso()) {
20170	if (isa<LoadInst>(Val: Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20171	return Builder.CreateFence(Ordering: Ord);
20172	return nullptr;
20173	}
20174
20175	if (isa<LoadInst>(Val: Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20176	return Builder.CreateFence(Ordering: Ord);
20177	if (isa<StoreInst>(Val: Inst) && isReleaseOrStronger(AO: Ord))
20178	return Builder.CreateFence(Ordering: AtomicOrdering::Release);
20179	return nullptr;
20180	}
20181
20182	Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
20183	Instruction *Inst,
20184	AtomicOrdering Ord) const {
20185	if (Subtarget.hasStdExtZtso()) {
20186	if (isa<StoreInst>(Val: Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20187	return Builder.CreateFence(Ordering: Ord);
20188	return nullptr;
20189	}
20190
20191	if (isa<LoadInst>(Val: Inst) && isAcquireOrStronger(AO: Ord))
20192	return Builder.CreateFence(Ordering: AtomicOrdering::Acquire);
20193	if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Val: Inst) &&
20194	Ord == AtomicOrdering::SequentiallyConsistent)
20195	return Builder.CreateFence(Ordering: AtomicOrdering::SequentiallyConsistent);
20196	return nullptr;
20197	}
20198
20199	TargetLowering::AtomicExpansionKind
20200	RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst AI) const* {
20201	// atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
20202	// point operations can't be used in an lr/sc sequence without breaking the
20203	// forward-progress guarantee.
20204	if (AI->isFloatingPointOperation() \|\|
20205	AI->getOperation() == AtomicRMWInst::UIncWrap \|\|
20206	AI->getOperation() == AtomicRMWInst::UDecWrap)
20207	return AtomicExpansionKind::CmpXChg;
20208
20209	// Don't expand forced atomics, we want to have __sync libcalls instead.
20210	if (Subtarget.hasForcedAtomics())
20211	return AtomicExpansionKind::None;
20212
20213	unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20214	if (AI->getOperation() == AtomicRMWInst::Nand) {
20215	if (Subtarget.hasStdExtZacas() &&
20216	(Size >= `32` \|\| Subtarget.hasStdExtZabha()))
20217	return AtomicExpansionKind::CmpXChg;
20218	if (Size < `32`)
20219	return AtomicExpansionKind::MaskedIntrinsic;
20220	}
20221
20222	if (Size < `32` && !Subtarget.hasStdExtZabha())
20223	return AtomicExpansionKind::MaskedIntrinsic;
20224
20225	return AtomicExpansionKind::None;
20226	}
20227
20228	static Intrinsic::ID
20229	getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
20230	if (XLen == `32`) {
20231	switch (BinOp) {
20232	default:
20233	llvm_unreachable("Unexpected AtomicRMW BinOp");
20234	case AtomicRMWInst::Xchg:
20235	return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
20236	case AtomicRMWInst::Add:
20237	return Intrinsic::riscv_masked_atomicrmw_add_i32;
20238	case AtomicRMWInst::Sub:
20239	return Intrinsic::riscv_masked_atomicrmw_sub_i32;
20240	case AtomicRMWInst::Nand:
20241	return Intrinsic::riscv_masked_atomicrmw_nand_i32;
20242	case AtomicRMWInst::Max:
20243	return Intrinsic::riscv_masked_atomicrmw_max_i32;
20244	case AtomicRMWInst::Min:
20245	return Intrinsic::riscv_masked_atomicrmw_min_i32;
20246	case AtomicRMWInst::UMax:
20247	return Intrinsic::riscv_masked_atomicrmw_umax_i32;
20248	case AtomicRMWInst::UMin:
20249	return Intrinsic::riscv_masked_atomicrmw_umin_i32;
20250	}
20251	}
20252
20253	if (XLen == `64`) {
20254	switch (BinOp) {
20255	default:
20256	llvm_unreachable("Unexpected AtomicRMW BinOp");
20257	case AtomicRMWInst::Xchg:
20258	return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
20259	case AtomicRMWInst::Add:
20260	return Intrinsic::riscv_masked_atomicrmw_add_i64;
20261	case AtomicRMWInst::Sub:
20262	return Intrinsic::riscv_masked_atomicrmw_sub_i64;
20263	case AtomicRMWInst::Nand:
20264	return Intrinsic::riscv_masked_atomicrmw_nand_i64;
20265	case AtomicRMWInst::Max:
20266	return Intrinsic::riscv_masked_atomicrmw_max_i64;
20267	case AtomicRMWInst::Min:
20268	return Intrinsic::riscv_masked_atomicrmw_min_i64;
20269	case AtomicRMWInst::UMax:
20270	return Intrinsic::riscv_masked_atomicrmw_umax_i64;
20271	case AtomicRMWInst::UMin:
20272	return Intrinsic::riscv_masked_atomicrmw_umin_i64;
20273	}
20274	}
20275
20276	llvm_unreachable("Unexpected XLen\n");
20277	}
20278
20279	Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
20280	IRBuilderBase &Builder, AtomicRMWInst AI, Value AlignedAddr, Value *Incr,
20281	Value Mask, Value ShiftAmt, AtomicOrdering Ord) const {
20282	// In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
20283	// the atomic instruction with an AtomicRMWInst::And/Or with appropriate
20284	// mask, as this produces better code than the LR/SC loop emitted by
20285	// int_riscv_masked_atomicrmw_xchg.
20286	if (AI->getOperation() == AtomicRMWInst::Xchg &&
20287	isa<ConstantInt>(Val: AI->getValOperand())) {
20288	ConstantInt *CVal = cast<ConstantInt>(Val: AI->getValOperand());
20289	if (CVal->isZero())
20290	return Builder.CreateAtomicRMW(Op: AtomicRMWInst::And, Ptr: AlignedAddr,
20291	Val: Builder.CreateNot(V: Mask, Name: "Inv_Mask"),
20292	Align: AI->getAlign(), Ordering: Ord);
20293	if (CVal->isMinusOne())
20294	return Builder.CreateAtomicRMW(Op: AtomicRMWInst::Or, Ptr: AlignedAddr, Val: Mask,
20295	Align: AI->getAlign(), Ordering: Ord);
20296	}
20297
20298	unsigned XLen = Subtarget.getXLen();
20299	Value *Ordering =
20300	Builder.getIntN(N: XLen, C: static_cast<uint64_t>(AI->getOrdering()));
20301	Type *Tys[] = {AlignedAddr->getType()};
20302	Function *LrwOpScwLoop = Intrinsic::getDeclaration(
20303	M: AI->getModule(),
20304	id: getIntrinsicForMaskedAtomicRMWBinOp(XLen, BinOp: AI->getOperation()), Tys);
20305
20306	if (XLen == `64`) {
20307	Incr = Builder.CreateSExt(V: Incr, DestTy: Builder.getInt64Ty());
20308	Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty());
20309	ShiftAmt = Builder.CreateSExt(V: ShiftAmt, DestTy: Builder.getInt64Ty());
20310	}
20311
20312	Value *Result;
20313
20314	// Must pass the shift amount needed to sign extend the loaded value prior
20315	// to performing a signed comparison for min/max. ShiftAmt is the number of
20316	// bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
20317	// is the number of bits to left+right shift the value in order to
20318	// sign-extend.
20319	if (AI->getOperation() == AtomicRMWInst::Min \|\|
20320	AI->getOperation() == AtomicRMWInst::Max) {
20321	const DataLayout &DL = AI->getModule()->getDataLayout();
20322	unsigned ValWidth =
20323	DL.getTypeStoreSizeInBits(Ty: AI->getValOperand()->getType());
20324	Value *SextShamt =
20325	Builder.CreateSub(LHS: Builder.getIntN(N: XLen, C: XLen - ValWidth), RHS: ShiftAmt);
20326	Result = Builder.CreateCall(Callee: LrwOpScwLoop,
20327	Args: {AlignedAddr, Incr, Mask, SextShamt, Ordering});
20328	} else {
20329	Result =
20330	Builder.CreateCall(Callee: LrwOpScwLoop, Args: {AlignedAddr, Incr, Mask, Ordering});
20331	}
20332
20333	if (XLen == `64`)
20334	Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
20335	return Result;
20336	}
20337
20338	TargetLowering::AtomicExpansionKind
20339	RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
20340	AtomicCmpXchgInst CI) const* {
20341	// Don't expand forced atomics, we want to have __sync libcalls instead.
20342	if (Subtarget.hasForcedAtomics())
20343	return AtomicExpansionKind::None;
20344
20345	unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
20346	if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
20347	(Size == `8` \|\| Size == `16`))
20348	return AtomicExpansionKind::MaskedIntrinsic;
20349	return AtomicExpansionKind::None;
20350	}
20351
20352	Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
20353	IRBuilderBase &Builder, AtomicCmpXchgInst CI, Value AlignedAddr,
20354	Value CmpVal, Value NewVal, Value Mask, AtomicOrdering Ord) const* {
20355	unsigned XLen = Subtarget.getXLen();
20356	Value Ordering = Builder.getIntN(N: XLen, C: static_cast*<uint64_t>(Ord));
20357	Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
20358	if (XLen == `64`) {
20359	CmpVal = Builder.CreateSExt(V: CmpVal, DestTy: Builder.getInt64Ty());
20360	NewVal = Builder.CreateSExt(V: NewVal, DestTy: Builder.getInt64Ty());
20361	Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty());
20362	CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
20363	}
20364	Type *Tys[] = {AlignedAddr->getType()};
20365	Function *MaskedCmpXchg =
20366	Intrinsic::getDeclaration(M: CI->getModule(), id: CmpXchgIntrID, Tys);
20367	Value *Result = Builder.CreateCall(
20368	Callee: MaskedCmpXchg, Args: {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
20369	if (XLen == `64`)
20370	Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
20371	return Result;
20372	}
20373
20374	bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend,
20375	EVT DataVT) const {
20376	// We have indexed loads for all supported EEW types. Indices are always
20377	// zero extended.
20378	return Extend.getOpcode() == ISD::ZERO_EXTEND &&
20379	isTypeLegal(Extend.getValueType()) &&
20380	isTypeLegal(Extend.getOperand(`0`).getValueType()) &&
20381	Extend.getOperand(`0`).getValueType().getVectorElementType() != MVT::i1;
20382	}
20383
20384	bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
20385	EVT VT) const {
20386	if (!isOperationLegalOrCustom(Op, VT) \|\| !FPVT.isSimple())
20387	return false;
20388
20389	switch (FPVT.getSimpleVT().SimpleTy) {
20390	case MVT::f16:
20391	return Subtarget.hasStdExtZfhmin();
20392	case MVT::f32:
20393	return Subtarget.hasStdExtF();
20394	case MVT::f64:
20395	return Subtarget.hasStdExtD();
20396	default:
20397	return false;
20398	}
20399	}
20400
20401	unsigned RISCVTargetLowering::getJumpTableEncoding() const {
20402	// If we are using the small code model, we can reduce size of jump table
20403	// entry to 4 bytes.
20404	if (Subtarget.is64Bit() && !isPositionIndependent() &&
20405	getTargetMachine().getCodeModel() == CodeModel::Small) {
20406	return MachineJumpTableInfo::EK_Custom32;
20407	}
20408	return TargetLowering::getJumpTableEncoding();
20409	}
20410
20411	const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry(
20412	const MachineJumpTableInfo MJTI, const* MachineBasicBlock *MBB,
20413	unsigned uid, MCContext &Ctx) const {
20414	assert(Subtarget.is64Bit() && !isPositionIndependent() &&
20415	getTargetMachine().getCodeModel() == CodeModel::Small);
20416	return MCSymbolRefExpr::create(Symbol: MBB->getSymbol(), Ctx);
20417	}
20418
20419	bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const {
20420	// We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
20421	// of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
20422	// a power of two as well.
20423	// FIXME: This doesn't work for zve32, but that's already broken
20424	// elsewhere for the same reason.
20425	assert(Subtarget.getRealMinVLen() >= `64` && "zve32* unsupported");
20426	static_assert(RISCV::RVVBitsPerBlock == `64`,
20427	"RVVBitsPerBlock changed, audit needed");
20428	return true;
20429	}
20430
20431	bool RISCVTargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
20432	SDValue &Offset,
20433	ISD::MemIndexedMode &AM,
20434	SelectionDAG &DAG) const {
20435	// Target does not support indexed loads.
20436	if (!Subtarget.hasVendorXTHeadMemIdx())
20437	return false;
20438
20439	if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
20440	return false;
20441
20442	Base = Op->getOperand(Num: `0`);
20443	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: Op->getOperand(Num: `1`))) {
20444	int64_t RHSC = RHS->getSExtValue();
20445	if (Op->getOpcode() == ISD::SUB)
20446	RHSC = -(uint64_t)RHSC;
20447
20448	// The constants that can be encoded in the THeadMemIdx instructions
20449	// are of the form (sign_extend(imm5) << imm2).
20450	bool isLegalIndexedOffset = false;
20451	for (unsigned i = `0`; i < `4`; i++)
20452	if (isInt<`5`>(x: RHSC >> i) && ((RHSC % (`1LL` << i)) == `0`)) {
20453	isLegalIndexedOffset = true;
20454	break;
20455	}
20456
20457	if (!isLegalIndexedOffset)
20458	return false;
20459
20460	Offset = Op->getOperand(Num: `1`);
20461	return true;
20462	}
20463
20464	return false;
20465	}
20466
20467	bool RISCVTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
20468	SDValue &Offset,
20469	ISD::MemIndexedMode &AM,
20470	SelectionDAG &DAG) const {
20471	EVT VT;
20472	SDValue Ptr;
20473	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
20474	VT = LD->getMemoryVT();
20475	Ptr = LD->getBasePtr();
20476	} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Val: N)) {
20477	VT = ST->getMemoryVT();
20478	Ptr = ST->getBasePtr();
20479	} else
20480	return false;
20481
20482	if (!getIndexedAddressParts(Op: Ptr.getNode(), Base, Offset, AM, DAG))
20483	return false;
20484
20485	AM = ISD::PRE_INC;
20486	return true;
20487	}
20488
20489	bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode N, SDNode Op,
20490	SDValue &Base,
20491	SDValue &Offset,
20492	ISD::MemIndexedMode &AM,
20493	SelectionDAG &DAG) const {
20494	EVT VT;
20495	SDValue Ptr;
20496	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
20497	VT = LD->getMemoryVT();
20498	Ptr = LD->getBasePtr();
20499	} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Val: N)) {
20500	VT = ST->getMemoryVT();
20501	Ptr = ST->getBasePtr();
20502	} else
20503	return false;
20504
20505	if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
20506	return false;
20507	// Post-indexing updates the base, so it's not a valid transform
20508	// if that's not the same as the load's pointer.
20509	if (Ptr != Base)
20510	return false;
20511
20512	AM = ISD::POST_INC;
20513	return true;
20514	}
20515
20516	bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
20517	EVT VT) const {
20518	EVT SVT = VT.getScalarType();
20519
20520	if (!SVT.isSimple())
20521	return false;
20522
20523	switch (SVT.getSimpleVT().SimpleTy) {
20524	case MVT::f16:
20525	return VT.isVector() ? Subtarget.hasVInstructionsF16()
20526	: Subtarget.hasStdExtZfhOrZhinx();
20527	case MVT::f32:
20528	return Subtarget.hasStdExtFOrZfinx();
20529	case MVT::f64:
20530	return Subtarget.hasStdExtDOrZdinx();
20531	default:
20532	break;
20533	}
20534
20535	return false;
20536	}
20537
20538	ISD::NodeType RISCVTargetLowering::getExtendForAtomicCmpSwapArg() const {
20539	// Zacas will use amocas.w which does not require extension.
20540	return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
20541	}
20542
20543	Register RISCVTargetLowering::getExceptionPointerRegister(
20544	const Constant PersonalityFn) const* {
20545	return RISCV::X10;
20546	}
20547
20548	Register RISCVTargetLowering::getExceptionSelectorRegister(
20549	const Constant PersonalityFn) const* {
20550	return RISCV::X11;
20551	}
20552
20553	bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
20554	// Return false to suppress the unnecessary extensions if the LibCall
20555	// arguments or return value is a float narrower than XLEN on a soft FP ABI.
20556	if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
20557	Type.getSizeInBits() < Subtarget.getXLen()))
20558	return false;
20559
20560	return true;
20561	}
20562
20563	bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
20564	if (Subtarget.is64Bit() && Type == MVT::i32)
20565	return true;
20566
20567	return IsSigned;
20568	}
20569
20570	bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
20571	SDValue C) const {
20572	// Check integral scalar types.
20573	const bool HasExtMOrZmmul =
20574	Subtarget.hasStdExtM() \|\| Subtarget.hasStdExtZmmul();
20575	if (!VT.isScalarInteger())
20576	return false;
20577
20578	// Omit the optimization if the sub target has the M extension and the data
20579	// size exceeds XLen.
20580	if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen())
20581	return false;
20582
20583	if (auto *ConstNode = dyn_cast<ConstantSDNode>(Val: C.getNode())) {
20584	// Break the MUL to a SLLI and an ADD/SUB.
20585	const APInt &Imm = ConstNode->getAPIntValue();
20586	if ((Imm + `1`).isPowerOf2() \|\| (Imm - `1`).isPowerOf2() \|\|
20587	(`1` - Imm).isPowerOf2() \|\| (-`1` - Imm).isPowerOf2())
20588	return true;
20589
20590	// Optimize the MUL to (SHADD x, (SLLI x, bits)) if Imm is not simm12.*
20591	if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(N: `12`) &&
20592	((Imm - `2`).isPowerOf2() \|\| (Imm - `4`).isPowerOf2() \|\|
20593	(Imm - `8`).isPowerOf2()))
20594	return true;
20595
20596	// Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
20597	// a pair of LUI/ADDI.
20598	if (!Imm.isSignedIntN(N: `12`) && Imm.countr_zero() < `12` &&
20599	ConstNode->hasOneUse()) {
20600	APInt ImmS = Imm.ashr(ShiftAmt: Imm.countr_zero());
20601	if ((ImmS + `1`).isPowerOf2() \|\| (ImmS - `1`).isPowerOf2() \|\|
20602	(`1` - ImmS).isPowerOf2())
20603	return true;
20604	}
20605	}
20606
20607	return false;
20608	}
20609
20610	bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode,
20611	SDValue ConstNode) const {
20612	// Let the DAGCombiner decide for vectors.
20613	EVT VT = AddNode.getValueType();
20614	if (VT.isVector())
20615	return true;
20616
20617	// Let the DAGCombiner decide for larger types.
20618	if (VT.getScalarSizeInBits() > Subtarget.getXLen())
20619	return true;
20620
20621	// It is worse if c1 is simm12 while c1c2 is not.*
20622	ConstantSDNode *C1Node = cast<ConstantSDNode>(Val: AddNode.getOperand(i: `1`));
20623	ConstantSDNode *C2Node = cast<ConstantSDNode>(Val&: ConstNode);
20624	const APInt &C1 = C1Node->getAPIntValue();
20625	const APInt &C2 = C2Node->getAPIntValue();
20626	if (C1.isSignedIntN(N: `12`) && !(C1 * C2).isSignedIntN(N: `12`))
20627	return false;
20628
20629	// Default to true and let the DAGCombiner decide.
20630	return true;
20631	}
20632
20633	bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
20634	EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
20635	unsigned Fast) const* {
20636	if (!VT.isVector()) {
20637	if (Fast)
20638	*Fast = Subtarget.enableUnalignedScalarMem();
20639	return Subtarget.enableUnalignedScalarMem();
20640	}
20641
20642	// All vector implementations must support element alignment
20643	EVT ElemVT = VT.getVectorElementType();
20644	if (Alignment >= ElemVT.getStoreSize()) {
20645	if (Fast)
20646	*Fast = `1`;
20647	return true;
20648	}
20649
20650	// Note: We lower an unmasked unaligned vector access to an equally sized
20651	// e8 element type access. Given this, we effectively support all unmasked
20652	// misaligned accesses. TODO: Work through the codegen implications of
20653	// allowing such accesses to be formed, and considered fast.
20654	if (Fast)
20655	*Fast = Subtarget.enableUnalignedVectorMem();
20656	return Subtarget.enableUnalignedVectorMem();
20657	}
20658
20659
20660	EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op,
20661	const AttributeList &FuncAttributes) const {
20662	if (!Subtarget.hasVInstructions())
20663	return MVT::Other;
20664
20665	if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
20666	return MVT::Other;
20667
20668	// We use LMUL1 memory operations here for a non-obvious reason. Our caller
20669	// has an expansion threshold, and we want the number of hardware memory
20670	// operations to correspond roughly to that threshold. LMUL>1 operations
20671	// are typically expanded linearly internally, and thus correspond to more
20672	// than one actual memory operation. Note that store merging and load
20673	// combining will typically form larger LMUL operations from the LMUL1
20674	// operations emitted here, and that's okay because combining isn't
20675	// introducing new memory operations; it's just merging existing ones.
20676	const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/`8`;
20677	if (Op.size() < MinVLenInBytes)
20678	// TODO: Figure out short memops. For the moment, do the default thing
20679	// which ends up using scalar sequences.
20680	return MVT::Other;
20681
20682	// Prefer i8 for non-zero memset as it allows us to avoid materializing
20683	// a large scalar constant and instead use vmv.v.x/i to do the
20684	// broadcast. For everything else, prefer ELenVT to minimize VL and thus
20685	// maximize the chance we can encode the size in the vsetvli.
20686	MVT ELenVT = MVT::getIntegerVT(BitWidth: Subtarget.getELen());
20687	MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
20688
20689	// Do we have sufficient alignment for our preferred VT? If not, revert
20690	// to largest size allowed by our alignment criteria.
20691	if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
20692	Align RequiredAlign(PreferredVT.getStoreSize());
20693	if (Op.isFixedDstAlign())
20694	RequiredAlign = std::min(a: RequiredAlign, b: Op.getDstAlign());
20695	if (Op.isMemcpy())
20696	RequiredAlign = std::min(a: RequiredAlign, b: Op.getSrcAlign());
20697	PreferredVT = MVT::getIntegerVT(BitWidth: RequiredAlign.value() * `8`);
20698	}
20699	return MVT::getVectorVT(VT: PreferredVT, NumElements: MinVLenInBytes/PreferredVT.getStoreSize());
20700	}
20701
20702	bool RISCVTargetLowering::splitValueIntoRegisterParts(
20703	SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
20704	unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
20705	bool IsABIRegCopy = CC.has_value();
20706	EVT ValueVT = Val.getValueType();
20707	if (IsABIRegCopy && (ValueVT == MVT::f16 \|\| ValueVT == MVT::bf16) &&
20708	PartVT == MVT::f32) {
20709	// Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
20710	// nan, and cast to f32.
20711	Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
20712	Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
20713	Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
20714	DAG.getConstant(`0xFFFF0000`, DL, MVT::i32));
20715	Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
20716	Parts[`0`] = Val;
20717	return true;
20718	}
20719
20720	if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
20721	LLVMContext &Context = *DAG.getContext();
20722	EVT ValueEltVT = ValueVT.getVectorElementType();
20723	EVT PartEltVT = PartVT.getVectorElementType();
20724	unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
20725	unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
20726	if (PartVTBitSize % ValueVTBitSize == `0`) {
20727	assert(PartVTBitSize >= ValueVTBitSize);
20728	// If the element types are different, bitcast to the same element type of
20729	// PartVT first.
20730	// Give an example here, we want copy a <vscale x 1 x i8> value to
20731	// <vscale x 4 x i16>.
20732	// We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
20733	// subvector, then we can bitcast to <vscale x 4 x i16>.
20734	if (ValueEltVT != PartEltVT) {
20735	if (PartVTBitSize > ValueVTBitSize) {
20736	unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
20737	assert(Count != `0` && "The number of element should not be zero.");
20738	EVT SameEltTypeVT =
20739	EVT::getVectorVT(Context, VT: ValueEltVT, NumElements: Count, /IsScalable=/true);
20740	Val = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: SameEltTypeVT,
20741	N1: DAG.getUNDEF(VT: SameEltTypeVT), N2: Val,
20742	N3: DAG.getVectorIdxConstant(Val: `0`, DL));
20743	}
20744	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Val);
20745	} else {
20746	Val =
20747	DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: PartVT, N1: DAG.getUNDEF(VT: PartVT),
20748	N2: Val, N3: DAG.getVectorIdxConstant(Val: `0`, DL));
20749	}
20750	Parts[`0`] = Val;
20751	return true;
20752	}
20753	}
20754	return false;
20755	}
20756
20757	SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
20758	SelectionDAG &DAG, const SDLoc &DL, const SDValue Parts, unsigned* NumParts,
20759	MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
20760	bool IsABIRegCopy = CC.has_value();
20761	if (IsABIRegCopy && (ValueVT == MVT::f16 \|\| ValueVT == MVT::bf16) &&
20762	PartVT == MVT::f32) {
20763	SDValue Val = Parts[`0`];
20764
20765	// Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
20766	Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
20767	Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
20768	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val);
20769	return Val;
20770	}
20771
20772	if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
20773	LLVMContext &Context = *DAG.getContext();
20774	SDValue Val = Parts[`0`];
20775	EVT ValueEltVT = ValueVT.getVectorElementType();
20776	EVT PartEltVT = PartVT.getVectorElementType();
20777	unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
20778	unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
20779	if (PartVTBitSize % ValueVTBitSize == `0`) {
20780	assert(PartVTBitSize >= ValueVTBitSize);
20781	EVT SameEltTypeVT = ValueVT;
20782	// If the element types are different, convert it to the same element type
20783	// of PartVT.
20784	// Give an example here, we want copy a <vscale x 1 x i8> value from
20785	// <vscale x 4 x i16>.
20786	// We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
20787	// then we can extract <vscale x 1 x i8>.
20788	if (ValueEltVT != PartEltVT) {
20789	unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
20790	assert(Count != `0` && "The number of element should not be zero.");
20791	SameEltTypeVT =
20792	EVT::getVectorVT(Context, VT: ValueEltVT, NumElements: Count, /IsScalable=/true);
20793	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: SameEltTypeVT, Operand: Val);
20794	}
20795	Val = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ValueVT, N1: Val,
20796	N2: DAG.getVectorIdxConstant(Val: `0`, DL));
20797	return Val;
20798	}
20799	}
20800	return SDValue ();
20801	}
20802
20803	bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
20804	// When aggressively optimizing for code size, we prefer to use a div
20805	// instruction, as it is usually smaller than the alternative sequence.
20806	// TODO: Add vector division?
20807	bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
20808	return OptSize && !VT.isVector();
20809	}
20810
20811	bool RISCVTargetLowering::preferScalarizeSplat(SDNode N) const* {
20812	// Scalarize zero_ext and sign_ext might stop match to widening instruction in
20813	// some situation.
20814	unsigned Opc = N->getOpcode();
20815	if (Opc == ISD::ZERO_EXTEND \|\| Opc == ISD::SIGN_EXTEND)
20816	return false;
20817	return true;
20818	}
20819
20820	static Value useTpOffset(IRBuilderBase &IRB, unsigned* Offset) {
20821	Module *M = IRB.GetInsertBlock()->getParent()->getParent();
20822	Function *ThreadPointerFunc =
20823	Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
20824	return IRB.CreateConstGEP1_32(Ty: IRB.getInt8Ty(),
20825	Ptr: IRB.CreateCall(Callee: ThreadPointerFunc), Idx0: Offset);
20826	}
20827
20828	Value RISCVTargetLowering::getIRStackGuard(IRBuilderBase &IRB) const* {
20829	// Fuchsia provides a fixed TLS slot for the stack cookie.
20830	// <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
20831	if (Subtarget.isTargetFuchsia())
20832	return useTpOffset(IRB, Offset: -`0x10`);
20833
20834	return TargetLowering::getIRStackGuard(IRB);
20835	}
20836
20837	bool RISCVTargetLowering::isLegalInterleavedAccessType(
20838	VectorType VTy, unsigned* Factor, Align Alignment, unsigned AddrSpace,
20839	const DataLayout &DL) const {
20840	EVT VT = getValueType(DL, Ty: VTy);
20841	// Don't lower vlseg/vsseg for vector types that can't be split.
20842	if (!isTypeLegal(VT))
20843	return false;
20844
20845	if (!isLegalElementTypeForRVV(ScalarTy: VT.getScalarType()) \|\|
20846	!allowsMemoryAccessForAlignment(Context&: VTy->getContext(), DL, VT, AddrSpace,
20847	Alignment))
20848	return false;
20849
20850	MVT ContainerVT = VT.getSimpleVT();
20851
20852	if (auto *FVTy = dyn_cast<FixedVectorType>(Val: VTy)) {
20853	if (!Subtarget.useRVVForFixedLengthVectors())
20854	return false;
20855	// Sometimes the interleaved access pass picks up splats as interleaves of
20856	// one element. Don't lower these.
20857	if (FVTy->getNumElements() < `2`)
20858	return false;
20859
20860	ContainerVT = getContainerForFixedLengthVector(VT: VT.getSimpleVT());
20861	}
20862
20863	// Need to make sure that EMUL NFIELDS ≤ 8*
20864	auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(VLMUL: getLMUL(VT: ContainerVT));
20865	if (Fractional)
20866	return true;
20867	return Factor * LMUL <= `8`;
20868	}
20869
20870	bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
20871	Align Alignment) const {
20872	if (!Subtarget.hasVInstructions())
20873	return false;
20874
20875	// Only support fixed vectors if we know the minimum vector size.
20876	if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
20877	return false;
20878
20879	EVT ScalarType = DataType.getScalarType();
20880	if (!isLegalElementTypeForRVV(ScalarTy: ScalarType))
20881	return false;
20882
20883	if (!Subtarget.enableUnalignedVectorMem() &&
20884	Alignment < ScalarType.getStoreSize())
20885	return false;
20886
20887	return true;
20888	}
20889
20890	static const Intrinsic::ID FixedVlsegIntrIds[] = {
20891	Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
20892	Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
20893	Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
20894	Intrinsic::riscv_seg8_load};
20895
20896	/// Lower an interleaved load into a vlsegN intrinsic.
20897	///
20898	/// E.g. Lower an interleaved load (Factor = 2):
20899	/// %wide.vec = load <8 x i32>, <8 x i32> %ptr*
20900	/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
20901	/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
20902	///
20903	/// Into:
20904	/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
20905	/// %ptr, i64 4)
20906	/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
20907	/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
20908	bool RISCVTargetLowering::lowerInterleavedLoad(
20909	LoadInst LI, ArrayRef<ShuffleVectorInst > Shuffles,
20910	ArrayRef<unsigned> Indices, unsigned Factor) const {
20911	IRBuilder<> Builder(LI);
20912
20913	auto *VTy = cast<FixedVectorType>(Val: Shuffles [`0`]->getType());
20914	if (!isLegalInterleavedAccessType(VTy, Factor, Alignment: LI->getAlign(),
20915	AddrSpace: LI->getPointerAddressSpace(),
20916	DL: LI->getModule()->getDataLayout()))
20917	return false;
20918
20919	auto *XLenTy = Type::getIntNTy(C&: LI->getContext(), N: Subtarget.getXLen());
20920
20921	Function *VlsegNFunc =
20922	Intrinsic::getDeclaration(M: LI->getModule(), id: FixedVlsegIntrIds[Factor - `2`],
20923	Tys: {VTy, LI->getPointerOperandType(), XLenTy});
20924
20925	Value *VL = ConstantInt::get(Ty: XLenTy, V: VTy->getNumElements());
20926
20927	CallInst *VlsegN =
20928	Builder.CreateCall(Callee: VlsegNFunc, Args: {LI->getPointerOperand(), VL});
20929
20930	for (unsigned i = `0`; i < Shuffles.size(); i++) {
20931	Value *SubVec = Builder.CreateExtractValue(Agg: VlsegN, Idxs: Indices [i]);
20932	Shuffles [i]->replaceAllUsesWith(V: SubVec);
20933	}
20934
20935	return true;
20936	}
20937
20938	static const Intrinsic::ID FixedVssegIntrIds[] = {
20939	Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
20940	Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
20941	Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
20942	Intrinsic::riscv_seg8_store};
20943
20944	/// Lower an interleaved store into a vssegN intrinsic.
20945	///
20946	/// E.g. Lower an interleaved store (Factor = 3):
20947	/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
20948	/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
20949	/// store <12 x i32> %i.vec, <12 x i32> %ptr*
20950	///
20951	/// Into:
20952	/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
20953	/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
20954	/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
20955	/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
20956	/// %ptr, i32 4)
20957	///
20958	/// Note that the new shufflevectors will be removed and we'll only generate one
20959	/// vsseg3 instruction in CodeGen.
20960	bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
20961	ShuffleVectorInst *SVI,
20962	unsigned Factor) const {
20963	IRBuilder<> Builder(SI);
20964	auto *ShuffleVTy = cast<FixedVectorType>(Val: SVI->getType());
20965	// Given SVI : <nfactor x ty>, then VTy : <n x ty>*
20966	auto *VTy = FixedVectorType::get(ElementType: ShuffleVTy->getElementType(),
20967	NumElts: ShuffleVTy->getNumElements() / Factor);
20968	if (!isLegalInterleavedAccessType(VTy, Factor, Alignment: SI->getAlign(),
20969	AddrSpace: SI->getPointerAddressSpace(),
20970	DL: SI->getModule()->getDataLayout()))
20971	return false;
20972
20973	auto *XLenTy = Type::getIntNTy(C&: SI->getContext(), N: Subtarget.getXLen());
20974
20975	Function *VssegNFunc =
20976	Intrinsic::getDeclaration(M: SI->getModule(), id: FixedVssegIntrIds[Factor - `2`],
20977	Tys: {VTy, SI->getPointerOperandType(), XLenTy});
20978
20979	auto Mask = SVI->getShuffleMask();
20980	SmallVector<Value *, `10`> Ops;
20981
20982	for (unsigned i = `0`; i < Factor; i++) {
20983	Value *Shuffle = Builder.CreateShuffleVector(
20984	V1: SVI->getOperand(i_nocapture: `0`), V2: SVI->getOperand(i_nocapture: `1`),
20985	Mask: createSequentialMask(Start: Mask [i], NumInts: VTy->getNumElements(), NumUndefs: `0`));
20986	Ops.push_back(Elt: Shuffle);
20987	}
20988	// This VL should be OK (should be executable in one vsseg instruction,
20989	// potentially under larger LMULs) because we checked that the fixed vector
20990	// type fits in isLegalInterleavedAccessType
20991	Value *VL = ConstantInt::get(Ty: XLenTy, V: VTy->getNumElements());
20992	Ops.append(IL: {SI->getPointerOperand(), VL});
20993
20994	Builder.CreateCall(Callee: VssegNFunc, Args: Ops);
20995
20996	return true;
20997	}
20998
20999	bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
21000	LoadInst LI) const* {
21001	assert(LI->isSimple());
21002	IRBuilder<> Builder(LI);
21003
21004	// Only deinterleave2 supported at present.
21005	if (DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2)
21006	return false;
21007
21008	unsigned Factor = `2`;
21009
21010	VectorType *VTy = cast<VectorType>(Val: DI->getOperand(i_nocapture: `0`)->getType());
21011	VectorType *ResVTy = cast<VectorType>(Val: DI->getType()->getContainedType(i: `0`));
21012
21013	if (!isLegalInterleavedAccessType(VTy: ResVTy, Factor, Alignment: LI->getAlign(),
21014	AddrSpace: LI->getPointerAddressSpace(),
21015	DL: LI->getModule()->getDataLayout()))
21016	return false;
21017
21018	Function *VlsegNFunc;
21019	Value *VL;
21020	Type *XLenTy = Type::getIntNTy(C&: LI->getContext(), N: Subtarget.getXLen());
21021	SmallVector<Value *, `10`> Ops;
21022
21023	if (auto *FVTy = dyn_cast<FixedVectorType>(Val: VTy)) {
21024	VlsegNFunc = Intrinsic::getDeclaration(
21025	M: LI->getModule(), id: FixedVlsegIntrIds[Factor - `2`],
21026	Tys: {ResVTy, LI->getPointerOperandType(), XLenTy});
21027	VL = ConstantInt::get(Ty: XLenTy, V: FVTy->getNumElements());
21028	} else {
21029	static const Intrinsic::ID IntrIds[] = {
21030	Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
21031	Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
21032	Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
21033	Intrinsic::riscv_vlseg8};
21034
21035	VlsegNFunc = Intrinsic::getDeclaration(M: LI->getModule(), id: IntrIds[Factor - `2`],
21036	Tys: {ResVTy, XLenTy});
21037	VL = Constant::getAllOnesValue(Ty: XLenTy);
21038	Ops.append(NumInputs: Factor, Elt: PoisonValue::get(T: ResVTy));
21039	}
21040
21041	Ops.append(IL: {LI->getPointerOperand(), VL});
21042
21043	Value *Vlseg = Builder.CreateCall(Callee: VlsegNFunc, Args: Ops);
21044	DI->replaceAllUsesWith(V: Vlseg);
21045
21046	return true;
21047	}
21048
21049	bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
21050	StoreInst SI) const* {
21051	assert(SI->isSimple());
21052	IRBuilder<> Builder(SI);
21053
21054	// Only interleave2 supported at present.
21055	if (II->getIntrinsicID() != Intrinsic::experimental_vector_interleave2)
21056	return false;
21057
21058	unsigned Factor = `2`;
21059
21060	VectorType *VTy = cast<VectorType>(Val: II->getType());
21061	VectorType *InVTy = cast<VectorType>(Val: II->getOperand(i_nocapture: `0`)->getType());
21062
21063	if (!isLegalInterleavedAccessType(VTy: InVTy, Factor, Alignment: SI->getAlign(),
21064	AddrSpace: SI->getPointerAddressSpace(),
21065	DL: SI->getModule()->getDataLayout()))
21066	return false;
21067
21068	Function *VssegNFunc;
21069	Value *VL;
21070	Type *XLenTy = Type::getIntNTy(C&: SI->getContext(), N: Subtarget.getXLen());
21071
21072	if (auto *FVTy = dyn_cast<FixedVectorType>(Val: VTy)) {
21073	VssegNFunc = Intrinsic::getDeclaration(
21074	M: SI->getModule(), id: FixedVssegIntrIds[Factor - `2`],
21075	Tys: {InVTy, SI->getPointerOperandType(), XLenTy});
21076	VL = ConstantInt::get(Ty: XLenTy, V: FVTy->getNumElements());
21077	} else {
21078	static const Intrinsic::ID IntrIds[] = {
21079	Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
21080	Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
21081	Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
21082	Intrinsic::riscv_vsseg8};
21083
21084	VssegNFunc = Intrinsic::getDeclaration(M: SI->getModule(), id: IntrIds[Factor - `2`],
21085	Tys: {InVTy, XLenTy});
21086	VL = Constant::getAllOnesValue(Ty: XLenTy);
21087	}
21088
21089	Builder.CreateCall(Callee: VssegNFunc, Args: {II->getOperand(i_nocapture: `0`), II->getOperand(i_nocapture: `1`),
21090	SI->getPointerOperand(), VL});
21091
21092	return true;
21093	}
21094
21095	MachineInstr *
21096	RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
21097	MachineBasicBlock::instr_iterator &MBBI,
21098	const TargetInstrInfo TII) const* {
21099	assert(MBBI ->isCall() && MBBI ->getCFIType() &&
21100	"Invalid call instruction for a KCFI check");
21101	assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
21102	MBBI->getOpcode()));
21103
21104	MachineOperand &Target = MBBI ->getOperand(i: `0`);
21105	Target.setIsRenamable(false);
21106
21107	return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
21108	.addReg(Target.getReg())
21109	.addImm(MBBI->getCFIType())
21110	.getInstr();
21111	}
21112
21113	#define GET_REGISTER_MATCHER
21114	#include "RISCVGenAsmMatcher.inc"
21115
21116	Register
21117	RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
21118	const MachineFunction &MF) const {
21119	Register Reg = MatchRegisterAltName(RegName);
21120	if (Reg == RISCV::NoRegister)
21121	Reg = MatchRegisterName(RegName);
21122	if (Reg == RISCV::NoRegister)
21123	report_fatal_error(
21124	reason: Twine("Invalid register name \"" + StringRef (RegName) + "\"."));
21125	BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
21126	if (!ReservedRegs.test(Idx: Reg) && !Subtarget.isRegisterReservedByUser(i: Reg))
21127	report_fatal_error(reason: Twine("Trying to obtain non-reserved register \"" +
21128	StringRef (RegName) + "\"."));
21129	return Reg;
21130	}
21131
21132	MachineMemOperand::Flags
21133	RISCVTargetLowering::getTargetMMOFlags(const Instruction &I) const {
21134	const MDNode *NontemporalInfo = I.getMetadata(KindID: LLVMContext::MD_nontemporal);
21135
21136	if (NontemporalInfo == nullptr)
21137	return MachineMemOperand::MONone;
21138
21139	// 1 for default value work as __RISCV_NTLH_ALL
21140	// 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
21141	// 3 -> __RISCV_NTLH_ALL_PRIVATE
21142	// 4 -> __RISCV_NTLH_INNERMOST_SHARED
21143	// 5 -> __RISCV_NTLH_ALL
21144	int NontemporalLevel = `5`;
21145	const MDNode *RISCVNontemporalInfo =
21146	I.getMetadata(Kind: "riscv-nontemporal-domain");
21147	if (RISCVNontemporalInfo != nullptr)
21148	NontemporalLevel =
21149	cast<ConstantInt>(
21150	Val: cast<ConstantAsMetadata>(Val: RISCVNontemporalInfo->getOperand(I: `0`))
21151	->getValue())
21152	->getZExtValue();
21153
21154	assert((`1` <= NontemporalLevel && NontemporalLevel <= `5`) &&
21155	"RISC-V target doesn't support this non-temporal domain.");
21156
21157	NontemporalLevel -= `2`;
21158	MachineMemOperand::Flags Flags = MachineMemOperand::MONone;
21159	if (NontemporalLevel & `0b1`)
21160	Flags \|= MONontemporalBit0;
21161	if (NontemporalLevel & `0b10`)
21162	Flags \|= MONontemporalBit1;
21163
21164	return Flags;
21165	}
21166
21167	MachineMemOperand::Flags
21168	RISCVTargetLowering::getTargetMMOFlags(const MemSDNode &Node) const {
21169
21170	MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
21171	MachineMemOperand::Flags TargetFlags = MachineMemOperand::MONone;
21172	TargetFlags \|= (NodeFlags & MONontemporalBit0);
21173	TargetFlags \|= (NodeFlags & MONontemporalBit1);
21174	return TargetFlags;
21175	}
21176
21177	bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable(
21178	const MemSDNode &NodeX, const MemSDNode &NodeY) const {
21179	return getTargetMMOFlags(Node: NodeX) == getTargetMMOFlags(Node: NodeY);
21180	}
21181
21182	bool RISCVTargetLowering::isCtpopFast(EVT VT) const {
21183	if (VT.isScalableVector())
21184	return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
21185	if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
21186	return true;
21187	return Subtarget.hasStdExtZbb() &&
21188	(VT == MVT::i32 \|\| VT == MVT::i64 \|\| VT.isFixedLengthVector());
21189	}
21190
21191	unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT,
21192	ISD::CondCode Cond) const {
21193	return isCtpopFast(VT) ? `0` : `1`;
21194	}
21195
21196	bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
21197
21198	// GISel support is in progress or complete for these opcodes.
21199	unsigned Op = Inst.getOpcode();
21200	if (Op == Instruction::Add \|\| Op == Instruction::Sub \|\|
21201	Op == Instruction::And \|\| Op == Instruction::Or \|\|
21202	Op == Instruction::Xor \|\| Op == Instruction::InsertElement \|\|
21203	Op == Instruction::ShuffleVector \|\| Op == Instruction::Load)
21204	return false;
21205
21206	if (Inst.getType()->isScalableTy())
21207	return true;
21208
21209	for (unsigned i = `0`; i < Inst.getNumOperands(); ++i)
21210	if (Inst.getOperand(i)->getType()->isScalableTy() &&
21211	!isa<ReturnInst>(Val: &Inst))
21212	return true;
21213
21214	if (const AllocaInst *AI = dyn_cast<AllocaInst>(Val: &Inst)) {
21215	if (AI->getAllocatedType()->isScalableTy())
21216	return true;
21217	}
21218
21219	return false;
21220	}
21221
21222	SDValue
21223	RISCVTargetLowering::BuildSDIVPow2(SDNode N, const* APInt &Divisor,
21224	SelectionDAG &DAG,
21225	SmallVectorImpl<SDNode > &Created) const* {
21226	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
21227	if (isIntDivCheap(VT: N->getValueType(ResNo: `0`), Attr))
21228	return SDValue (N, `0`); // Lower SDIV as SDIV
21229
21230	// Only perform this transform if short forward branch opt is supported.
21231	if (!Subtarget.hasShortForwardBranchOpt())
21232	return SDValue ();
21233	EVT VT = N->getValueType(ResNo: `0`);
21234	if (!(VT == MVT::i32 \|\| (VT == MVT::i64 && Subtarget.is64Bit())))
21235	return SDValue ();
21236
21237	// Ensure 2k-1 < 2048 so that we can just emit a single addi/addiw.
21238	if (Divisor.sgt(RHS: `2048`) \|\| Divisor.slt(RHS: -`2048`))
21239	return SDValue ();
21240	return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
21241	}
21242
21243	bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
21244	EVT VT, const APInt &AndMask) const {
21245	if (Subtarget.hasStdExtZicond() \|\| Subtarget.hasVendorXVentanaCondOps())
21246	return !Subtarget.hasStdExtZbs() && AndMask.ugt(RHS: `1024`);
21247	return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask);
21248	}
21249
21250	unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
21251	return Subtarget.getMinimumJumpTableEntries();
21252	}
21253
21254	// Handle single arg such as return value.
21255	template <typename Arg>
21256	void RVVArgDispatcher::constructArgInfos(ArrayRef<Arg> ArgList) {
21257	// This lambda determines whether an array of types are constructed by
21258	// homogeneous vector types.
21259	auto isHomogeneousScalableVectorType = [](ArrayRef<Arg> ArgList) {
21260	// First, extract the first element in the argument type.
21261	auto It = ArgList.begin();
21262	MVT FirstArgRegType = It->VT;
21263
21264	// Return if there is no return or the type needs split.
21265	if (It == ArgList.end() \|\| It->Flags.isSplit())
21266	return false;
21267
21268	++It;
21269
21270	// Return if this argument type contains only 1 element, or it's not a
21271	// vector type.
21272	if (It == ArgList.end() \|\| !FirstArgRegType.isScalableVector())
21273	return false;
21274
21275	// Second, check if the following elements in this argument type are all the
21276	// same.
21277	for (; It != ArgList.end(); ++It)
21278	if (It->Flags.isSplit() \|\| It->VT != FirstArgRegType)
21279	return false;
21280
21281	return true;
21282	};
21283
21284	if (isHomogeneousScalableVectorType(ArgList)) {
21285	// Handle as tuple type
21286	RVVArgInfos.push_back(Elt: {(unsigned)ArgList.size(), ArgList[`0`].VT, false});
21287	} else {
21288	// Handle as normal vector type
21289	bool FirstVMaskAssigned = false;
21290	for (const auto &OutArg : ArgList) {
21291	MVT RegisterVT = OutArg.VT;
21292
21293	// Skip non-RVV register type
21294	if (!RegisterVT.isVector())
21295	continue;
21296
21297	if (RegisterVT.isFixedLengthVector())
21298	RegisterVT = TLI->getContainerForFixedLengthVector(VT: RegisterVT);
21299
21300	if (!FirstVMaskAssigned && RegisterVT.getVectorElementType() == MVT::i1) {
21301	RVVArgInfos.push_back(Elt: {.NF: `1`, .VT: RegisterVT, .FirstVMask: true});
21302	FirstVMaskAssigned = true;
21303	continue;
21304	}
21305
21306	RVVArgInfos.push_back(Elt: {.NF: `1`, .VT: RegisterVT, .FirstVMask: false});
21307	}
21308	}
21309	}
21310
21311	// Handle multiple args.
21312	template <>
21313	void RVVArgDispatcher::constructArgInfos<Type >(ArrayRef<Type > TypeList) {
21314	const DataLayout &DL = MF->getDataLayout();
21315	const Function &F = MF->getFunction();
21316	LLVMContext &Context = F.getContext();
21317
21318	bool FirstVMaskAssigned = false;
21319	for (Type *Ty : TypeList) {
21320	StructType *STy = dyn_cast<StructType>(Val: Ty);
21321	if (STy && STy->containsHomogeneousScalableVectorTypes()) {
21322	Type *ElemTy = STy->getTypeAtIndex(N: `0U`);
21323	EVT VT = TLI->getValueType(DL, Ty: ElemTy);
21324	MVT RegisterVT =
21325	TLI->getRegisterTypeForCallingConv(Context, CC: F.getCallingConv(), VT);
21326	unsigned NumRegs =
21327	TLI->getNumRegistersForCallingConv(Context, CC: F.getCallingConv(), VT);
21328
21329	RVVArgInfos.push_back(
21330	Elt: {.NF: NumRegs * STy->getNumElements(), .VT: RegisterVT, .FirstVMask: false});
21331	} else {
21332	SmallVector<EVT, `4`> ValueVTs;
21333	ComputeValueVTs(TLI: *TLI, DL, Ty, ValueVTs);
21334
21335	for (unsigned Value = `0`, NumValues = ValueVTs.size(); Value != NumValues;
21336	++Value) {
21337	EVT VT = ValueVTs [Value];
21338	MVT RegisterVT =
21339	TLI->getRegisterTypeForCallingConv(Context, CC: F.getCallingConv(), VT);
21340	unsigned NumRegs =
21341	TLI->getNumRegistersForCallingConv(Context, CC: F.getCallingConv(), VT);
21342
21343	// Skip non-RVV register type
21344	if (!RegisterVT.isVector())
21345	continue;
21346
21347	if (RegisterVT.isFixedLengthVector())
21348	RegisterVT = TLI->getContainerForFixedLengthVector(VT: RegisterVT);
21349
21350	if (!FirstVMaskAssigned &&
21351	RegisterVT.getVectorElementType() == MVT::i1) {
21352	RVVArgInfos.push_back(Elt: {.NF: `1`, .VT: RegisterVT, .FirstVMask: true});
21353	FirstVMaskAssigned = true;
21354	--NumRegs;
21355	}
21356
21357	RVVArgInfos.insert(I: RVVArgInfos.end(), NumToInsert: NumRegs, Elt: {.NF: `1`, .VT: RegisterVT, .FirstVMask: false});
21358	}
21359	}
21360	}
21361	}
21362
21363	void RVVArgDispatcher::allocatePhysReg(unsigned NF, unsigned LMul,
21364	unsigned StartReg) {
21365	assert((StartReg % LMul) == `0` &&
21366	"Start register number should be multiple of lmul");
21367	const MCPhysReg *VRArrays;
21368	switch (LMul) {
21369	default:
21370	report_fatal_error(reason: "Invalid lmul");
21371	case `1`:
21372	VRArrays = ArgVRs;
21373	break;
21374	case `2`:
21375	VRArrays = ArgVRM2s;
21376	break;
21377	case `4`:
21378	VRArrays = ArgVRM4s;
21379	break;
21380	case `8`:
21381	VRArrays = ArgVRM8s;
21382	break;
21383	}
21384
21385	for (unsigned i = `0`; i < NF; ++i)
21386	if (StartReg)
21387	AllocatedPhysRegs.push_back(Elt: VRArrays[(StartReg - `8`) / LMul + i]);
21388	else
21389	AllocatedPhysRegs.push_back(Elt: MCPhysReg());
21390	}
21391
21392	/// This function determines if each RVV argument is passed by register, if the
21393	/// argument can be assigned to a VR, then give it a specific register.
21394	/// Otherwise, assign the argument to 0 which is a invalid MCPhysReg.
21395	void RVVArgDispatcher::compute() {
21396	uint32_t AssignedMap = `0`;
21397	auto allocate = [&](const RVVArgInfo &ArgInfo) {
21398	// Allocate first vector mask argument to V0.
21399	if (ArgInfo.FirstVMask) {
21400	AllocatedPhysRegs.push_back(RISCV::V0);
21401	return;
21402	}
21403
21404	unsigned RegsNeeded = divideCeil(
21405	Numerator: ArgInfo.VT.getSizeInBits().getKnownMinValue(), Denominator: RISCV::RVVBitsPerBlock);
21406	unsigned TotalRegsNeeded = ArgInfo.NF * RegsNeeded;
21407	for (unsigned StartReg = `0`; StartReg + TotalRegsNeeded <= NumArgVRs;
21408	StartReg += RegsNeeded) {
21409	uint32_t Map = ((`1` << TotalRegsNeeded) - `1`) << StartReg;
21410	if ((AssignedMap & Map) == `0`) {
21411	allocatePhysReg(NF: ArgInfo.NF, LMul: RegsNeeded, StartReg: StartReg + `8`);
21412	AssignedMap \|= Map;
21413	return;
21414	}
21415	}
21416
21417	allocatePhysReg(NF: ArgInfo.NF, LMul: RegsNeeded, StartReg: `0`);
21418	};
21419
21420	for (unsigned i = `0`; i < RVVArgInfos.size(); ++i)
21421	allocate (RVVArgInfos [i]);
21422	}
21423
21424	MCPhysReg RVVArgDispatcher::getNextPhysReg() {
21425	assert(CurIdx < AllocatedPhysRegs.size() && "Index out of range");
21426	return AllocatedPhysRegs [CurIdx++];
21427	}
21428
21429	namespace llvm::RISCVVIntrinsicsTable {
21430
21431	#define GET_RISCVVIntrinsicsTable_IMPL
21432	#include "RISCVGenSearchableTables.inc"
21433
21434	} // namespace llvm::RISCVVIntrinsicsTable
21435

source code of llvm/lib/Target/RISCV/RISCVISelLowering.cpp