1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
15#include "MCTargetDesc/RISCVMatInt.h"
16#include "RISCV.h"
17#include "RISCVMachineFunctionInfo.h"
18#include "RISCVRegisterInfo.h"
19#include "RISCVSubtarget.h"
20#include "RISCVTargetMachine.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
23#include "llvm/Analysis/MemoryLocation.h"
24#include "llvm/Analysis/VectorUtils.h"
25#include "llvm/CodeGen/Analysis.h"
26#include "llvm/CodeGen/MachineFrameInfo.h"
27#include "llvm/CodeGen/MachineFunction.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineJumpTableInfo.h"
30#include "llvm/CodeGen/MachineRegisterInfo.h"
31#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
32#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
33#include "llvm/CodeGen/ValueTypes.h"
34#include "llvm/IR/DiagnosticInfo.h"
35#include "llvm/IR/DiagnosticPrinter.h"
36#include "llvm/IR/IRBuilder.h"
37#include "llvm/IR/Instructions.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/PatternMatch.h"
40#include "llvm/Support/CommandLine.h"
41#include "llvm/Support/Debug.h"
42#include "llvm/Support/ErrorHandling.h"
43#include "llvm/Support/InstructionCost.h"
44#include "llvm/Support/KnownBits.h"
45#include "llvm/Support/MathExtras.h"
46#include "llvm/Support/raw_ostream.h"
47#include <optional>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "riscv-lower"
52
53STATISTIC(NumTailCalls, "Number of tail calls");
54
55static cl::opt<unsigned> ExtensionMaxWebSize(
56 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
57 cl::desc("Give the maximum size (in number of nodes) of the web of "
58 "instructions that we will consider for VW expansion"),
59 cl::init(Val: 18));
60
61static cl::opt<bool>
62 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
63 cl::desc("Allow the formation of VW_W operations (e.g., "
64 "VWADD_W) with splat constants"),
65 cl::init(Val: false));
66
67static cl::opt<unsigned> NumRepeatedDivisors(
68 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
69 cl::desc("Set the minimum number of repetitions of a divisor to allow "
70 "transformation to multiplications by the reciprocal"),
71 cl::init(Val: 2));
72
73static cl::opt<int>
74 FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden,
75 cl::desc("Give the maximum number of instructions that we will "
76 "use for creating a floating-point immediate value"),
77 cl::init(Val: 2));
78
79static cl::opt<bool>
80 RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,
81 cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
82
83RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
84 const RISCVSubtarget &STI)
85 : TargetLowering(TM), Subtarget(STI) {
86
87 RISCVABI::ABI ABI = Subtarget.getTargetABI();
88 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
89
90 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
91 !Subtarget.hasStdExtF()) {
92 errs() << "Hard-float 'f' ABI can't be used for a target that "
93 "doesn't support the F instruction set extension (ignoring "
94 "target-abi)\n";
95 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
96 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
97 !Subtarget.hasStdExtD()) {
98 errs() << "Hard-float 'd' ABI can't be used for a target that "
99 "doesn't support the D instruction set extension (ignoring "
100 "target-abi)\n";
101 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
102 }
103
104 switch (ABI) {
105 default:
106 report_fatal_error(reason: "Don't know how to lower this ABI");
107 case RISCVABI::ABI_ILP32:
108 case RISCVABI::ABI_ILP32E:
109 case RISCVABI::ABI_LP64E:
110 case RISCVABI::ABI_ILP32F:
111 case RISCVABI::ABI_ILP32D:
112 case RISCVABI::ABI_LP64:
113 case RISCVABI::ABI_LP64F:
114 case RISCVABI::ABI_LP64D:
115 break;
116 }
117
118 MVT XLenVT = Subtarget.getXLenVT();
119
120 // Set up the register classes.
121 addRegisterClass(VT: XLenVT, RC: &RISCV::GPRRegClass);
122 if (Subtarget.is64Bit() && RV64LegalI32)
123 addRegisterClass(MVT::VT: i32, RC: &RISCV::GPRRegClass);
124
125 if (Subtarget.hasStdExtZfhmin())
126 addRegisterClass(MVT::VT: f16, RC: &RISCV::FPR16RegClass);
127 if (Subtarget.hasStdExtZfbfmin())
128 addRegisterClass(MVT::VT: bf16, RC: &RISCV::FPR16RegClass);
129 if (Subtarget.hasStdExtF())
130 addRegisterClass(MVT::VT: f32, RC: &RISCV::FPR32RegClass);
131 if (Subtarget.hasStdExtD())
132 addRegisterClass(MVT::VT: f64, RC: &RISCV::FPR64RegClass);
133 if (Subtarget.hasStdExtZhinxmin())
134 addRegisterClass(MVT::VT: f16, RC: &RISCV::GPRF16RegClass);
135 if (Subtarget.hasStdExtZfinx())
136 addRegisterClass(MVT::VT: f32, RC: &RISCV::GPRF32RegClass);
137 if (Subtarget.hasStdExtZdinx()) {
138 if (Subtarget.is64Bit())
139 addRegisterClass(MVT::VT: f64, RC: &RISCV::GPRRegClass);
140 else
141 addRegisterClass(MVT::VT: f64, RC: &RISCV::GPRPairRegClass);
142 }
143
144 static const MVT::SimpleValueType BoolVecVTs[] = {
145 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
146 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
147 static const MVT::SimpleValueType IntVecVTs[] = {
148 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
149 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
150 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
151 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
152 MVT::nxv4i64, MVT::nxv8i64};
153 static const MVT::SimpleValueType F16VecVTs[] = {
154 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
155 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
156 static const MVT::SimpleValueType BF16VecVTs[] = {
157 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
158 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
159 static const MVT::SimpleValueType F32VecVTs[] = {
160 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
161 static const MVT::SimpleValueType F64VecVTs[] = {
162 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
163
164 if (Subtarget.hasVInstructions()) {
165 auto addRegClassForRVV = [this](MVT VT) {
166 // Disable the smallest fractional LMUL types if ELEN is less than
167 // RVVBitsPerBlock.
168 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
169 if (VT.getVectorMinNumElements() < MinElts)
170 return;
171
172 unsigned Size = VT.getSizeInBits().getKnownMinValue();
173 const TargetRegisterClass *RC;
174 if (Size <= RISCV::RVVBitsPerBlock)
175 RC = &RISCV::VRRegClass;
176 else if (Size == 2 * RISCV::RVVBitsPerBlock)
177 RC = &RISCV::VRM2RegClass;
178 else if (Size == 4 * RISCV::RVVBitsPerBlock)
179 RC = &RISCV::VRM4RegClass;
180 else if (Size == 8 * RISCV::RVVBitsPerBlock)
181 RC = &RISCV::VRM8RegClass;
182 else
183 llvm_unreachable("Unexpected size");
184
185 addRegisterClass(VT, RC);
186 };
187
188 for (MVT VT : BoolVecVTs)
189 addRegClassForRVV(VT);
190 for (MVT VT : IntVecVTs) {
191 if (VT.getVectorElementType() == MVT::i64 &&
192 !Subtarget.hasVInstructionsI64())
193 continue;
194 addRegClassForRVV(VT);
195 }
196
197 if (Subtarget.hasVInstructionsF16Minimal())
198 for (MVT VT : F16VecVTs)
199 addRegClassForRVV(VT);
200
201 if (Subtarget.hasVInstructionsBF16())
202 for (MVT VT : BF16VecVTs)
203 addRegClassForRVV(VT);
204
205 if (Subtarget.hasVInstructionsF32())
206 for (MVT VT : F32VecVTs)
207 addRegClassForRVV(VT);
208
209 if (Subtarget.hasVInstructionsF64())
210 for (MVT VT : F64VecVTs)
211 addRegClassForRVV(VT);
212
213 if (Subtarget.useRVVForFixedLengthVectors()) {
214 auto addRegClassForFixedVectors = [this](MVT VT) {
215 MVT ContainerVT = getContainerForFixedLengthVector(VT);
216 unsigned RCID = getRegClassIDForVecVT(VT: ContainerVT);
217 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
218 addRegisterClass(VT, RC: TRI.getRegClass(RCID));
219 };
220 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
221 if (useRVVForFixedLengthVectorVT(VT))
222 addRegClassForFixedVectors(VT);
223
224 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
225 if (useRVVForFixedLengthVectorVT(VT))
226 addRegClassForFixedVectors(VT);
227 }
228 }
229
230 // Compute derived properties from the register classes.
231 computeRegisterProperties(STI.getRegisterInfo());
232
233 setStackPointerRegisterToSaveRestore(RISCV::X2);
234
235 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT,
236 MVT::i1, Promote);
237 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
238 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32,
239 MVT::i1, Promote);
240
241 // TODO: add all necessary setOperationAction calls.
242 setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: XLenVT, Action: Expand);
243
244 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
245 setOperationAction(Op: ISD::BR_CC, VT: XLenVT, Action: Expand);
246 if (RV64LegalI32 && Subtarget.is64Bit())
247 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
248 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
249 setOperationAction(Op: ISD::SELECT_CC, VT: XLenVT, Action: Expand);
250 if (RV64LegalI32 && Subtarget.is64Bit())
251 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
252
253 setCondCodeAction(CCs: ISD::SETLE, VT: XLenVT, Action: Expand);
254 setCondCodeAction(CCs: ISD::SETGT, VT: XLenVT, Action: Custom);
255 setCondCodeAction(CCs: ISD::SETGE, VT: XLenVT, Action: Expand);
256 setCondCodeAction(CCs: ISD::SETULE, VT: XLenVT, Action: Expand);
257 setCondCodeAction(CCs: ISD::SETUGT, VT: XLenVT, Action: Custom);
258 setCondCodeAction(CCs: ISD::SETUGE, VT: XLenVT, Action: Expand);
259
260 if (RV64LegalI32 && Subtarget.is64Bit())
261 setOperationAction(ISD::SETCC, MVT::i32, Promote);
262
263 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
264
265 setOperationAction(ISD::VASTART, MVT::Other, Custom);
266 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
267 if (RV64LegalI32 && Subtarget.is64Bit())
268 setOperationAction(ISD::VAARG, MVT::i32, Promote);
269
270 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
271
272 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
273
274 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
275 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
276
277 if (Subtarget.is64Bit()) {
278 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
279
280 if (!RV64LegalI32) {
281 setOperationAction(ISD::LOAD, MVT::i32, Custom);
282 setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
283 MVT::i32, Custom);
284 setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
285 MVT::i32, Custom);
286 if (!Subtarget.hasStdExtZbb())
287 setOperationAction({ISD::SADDSAT, ISD::SSUBSAT}, MVT::i32, Custom);
288 } else {
289 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
290 if (Subtarget.hasStdExtZbb()) {
291 setOperationAction({ISD::SADDSAT, ISD::SSUBSAT}, MVT::i32, Custom);
292 setOperationAction({ISD::UADDSAT, ISD::USUBSAT}, MVT::i32, Custom);
293 }
294 }
295 setOperationAction(ISD::SADDO, MVT::i32, Custom);
296 } else {
297 setLibcallName(
298 Calls: {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
299 Name: nullptr);
300 setLibcallName(Call: RTLIB::MULO_I64, Name: nullptr);
301 }
302
303 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
304 setOperationAction(Ops: {ISD::MUL, ISD::MULHS, ISD::MULHU}, VT: XLenVT, Action: Expand);
305 if (RV64LegalI32 && Subtarget.is64Bit())
306 setOperationAction(ISD::MUL, MVT::i32, Promote);
307 } else if (Subtarget.is64Bit()) {
308 setOperationAction(ISD::MUL, MVT::i128, Custom);
309 if (!RV64LegalI32)
310 setOperationAction(ISD::MUL, MVT::i32, Custom);
311 else
312 setOperationAction(ISD::SMULO, MVT::i32, Custom);
313 } else {
314 setOperationAction(ISD::MUL, MVT::i64, Custom);
315 }
316
317 if (!Subtarget.hasStdExtM()) {
318 setOperationAction(Ops: {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM},
319 VT: XLenVT, Action: Expand);
320 if (RV64LegalI32 && Subtarget.is64Bit())
321 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32,
322 Promote);
323 } else if (Subtarget.is64Bit()) {
324 if (!RV64LegalI32)
325 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
326 {MVT::i8, MVT::i16, MVT::i32}, Custom);
327 }
328
329 if (RV64LegalI32 && Subtarget.is64Bit()) {
330 setOperationAction({ISD::MULHS, ISD::MULHU}, MVT::i32, Expand);
331 setOperationAction(
332 {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32,
333 Expand);
334 }
335
336 setOperationAction(
337 Ops: {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT: XLenVT,
338 Action: Expand);
339
340 setOperationAction(Ops: {ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, VT: XLenVT,
341 Action: Custom);
342
343 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
344 if (!RV64LegalI32 && Subtarget.is64Bit())
345 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
346 } else if (Subtarget.hasVendorXTHeadBb()) {
347 if (Subtarget.is64Bit())
348 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
349 setOperationAction(Ops: {ISD::ROTL, ISD::ROTR}, VT: XLenVT, Action: Custom);
350 } else if (Subtarget.hasVendorXCVbitmanip()) {
351 setOperationAction(Op: ISD::ROTL, VT: XLenVT, Action: Expand);
352 } else {
353 setOperationAction(Ops: {ISD::ROTL, ISD::ROTR}, VT: XLenVT, Action: Expand);
354 if (RV64LegalI32 && Subtarget.is64Bit())
355 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Expand);
356 }
357
358 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
359 // pattern match it directly in isel.
360 setOperationAction(ISD::BSWAP, XLenVT,
361 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
362 Subtarget.hasVendorXTHeadBb())
363 ? Legal
364 : Expand);
365 if (RV64LegalI32 && Subtarget.is64Bit())
366 setOperationAction(ISD::BSWAP, MVT::i32,
367 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
368 Subtarget.hasVendorXTHeadBb())
369 ? Promote
370 : Expand);
371
372
373 if (Subtarget.hasVendorXCVbitmanip()) {
374 setOperationAction(Op: ISD::BITREVERSE, VT: XLenVT, Action: Legal);
375 } else {
376 // Zbkb can use rev8+brev8 to implement bitreverse.
377 setOperationAction(ISD::BITREVERSE, XLenVT,
378 Subtarget.hasStdExtZbkb() ? Custom : Expand);
379 }
380
381 if (Subtarget.hasStdExtZbb()) {
382 setOperationAction(Ops: {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT: XLenVT,
383 Action: Legal);
384 if (RV64LegalI32 && Subtarget.is64Bit())
385 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, MVT::i32,
386 Promote);
387
388 if (Subtarget.is64Bit()) {
389 if (RV64LegalI32)
390 setOperationAction(ISD::CTTZ, MVT::i32, Legal);
391 else
392 setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
393 }
394 } else if (!Subtarget.hasVendorXCVbitmanip()) {
395 setOperationAction(Ops: {ISD::CTTZ, ISD::CTPOP}, VT: XLenVT, Action: Expand);
396 if (RV64LegalI32 && Subtarget.is64Bit())
397 setOperationAction({ISD::CTTZ, ISD::CTPOP}, MVT::i32, Expand);
398 }
399
400 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
401 Subtarget.hasVendorXCVbitmanip()) {
402 // We need the custom lowering to make sure that the resulting sequence
403 // for the 32bit case is efficient on 64bit targets.
404 if (Subtarget.is64Bit()) {
405 if (RV64LegalI32) {
406 setOperationAction(ISD::CTLZ, MVT::i32,
407 Subtarget.hasStdExtZbb() ? Legal : Promote);
408 if (!Subtarget.hasStdExtZbb())
409 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
410 } else
411 setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
412 }
413 } else {
414 setOperationAction(Op: ISD::CTLZ, VT: XLenVT, Action: Expand);
415 if (RV64LegalI32 && Subtarget.is64Bit())
416 setOperationAction(ISD::CTLZ, MVT::i32, Expand);
417 }
418
419 if (!RV64LegalI32 && Subtarget.is64Bit() &&
420 !Subtarget.hasShortForwardBranchOpt())
421 setOperationAction(ISD::ABS, MVT::i32, Custom);
422
423 // We can use PseudoCCSUB to implement ABS.
424 if (Subtarget.hasShortForwardBranchOpt())
425 setOperationAction(Op: ISD::ABS, VT: XLenVT, Action: Legal);
426
427 if (!Subtarget.hasVendorXTHeadCondMov()) {
428 setOperationAction(Op: ISD::SELECT, VT: XLenVT, Action: Custom);
429 if (RV64LegalI32 && Subtarget.is64Bit())
430 setOperationAction(ISD::SELECT, MVT::i32, Promote);
431 }
432
433 static const unsigned FPLegalNodeTypes[] = {
434 ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT,
435 ISD::LLRINT, ISD::LROUND, ISD::LLROUND,
436 ISD::STRICT_LRINT, ISD::STRICT_LLRINT, ISD::STRICT_LROUND,
437 ISD::STRICT_LLROUND, ISD::STRICT_FMA, ISD::STRICT_FADD,
438 ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
439 ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS};
440
441 static const ISD::CondCode FPCCToExpand[] = {
442 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
443 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
444 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO};
445
446 static const unsigned FPOpToExpand[] = {
447 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
448 ISD::FREM};
449
450 static const unsigned FPRndMode[] = {
451 ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
452 ISD::FROUNDEVEN};
453
454 if (Subtarget.hasStdExtZfhminOrZhinxmin())
455 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
456
457 static const unsigned ZfhminZfbfminPromoteOps[] = {
458 ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD,
459 ISD::FSUB, ISD::FMUL, ISD::FMA,
460 ISD::FDIV, ISD::FSQRT, ISD::FABS,
461 ISD::FNEG, ISD::STRICT_FMA, ISD::STRICT_FADD,
462 ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
463 ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
464 ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,
465 ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
466 ISD::FROUNDEVEN, ISD::SELECT};
467
468 if (Subtarget.hasStdExtZfbfmin()) {
469 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
470 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
471 setOperationAction(ISD::FP_ROUND, MVT::bf16, Custom);
472 setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
473 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
474 setOperationAction(ISD::ConstantFP, MVT::bf16, Expand);
475 setOperationAction(ISD::SELECT_CC, MVT::bf16, Expand);
476 setOperationAction(ISD::BR_CC, MVT::bf16, Expand);
477 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
478 setOperationAction(ISD::FREM, MVT::bf16, Promote);
479 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
480 // DAGCombiner::visitFP_ROUND probably needs improvements first.
481 setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
482 }
483
484 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
485 if (Subtarget.hasStdExtZfhOrZhinx()) {
486 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
487 setOperationAction(FPRndMode, MVT::f16,
488 Subtarget.hasStdExtZfa() ? Legal : Custom);
489 setOperationAction(ISD::SELECT, MVT::f16, Custom);
490 setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom);
491 } else {
492 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
493 setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT,
494 ISD::STRICT_LROUND, ISD::STRICT_LLROUND},
495 MVT::f16, Legal);
496 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
497 // DAGCombiner::visitFP_ROUND probably needs improvements first.
498 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
499 }
500
501 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
502 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
503 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
504 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
505 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
506
507 setOperationAction(ISD::FNEARBYINT, MVT::f16,
508 Subtarget.hasStdExtZfa() ? Legal : Promote);
509 setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
510 ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
511 ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
512 ISD::FLOG10},
513 MVT::f16, Promote);
514
515 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
516 // complete support for all operations in LegalizeDAG.
517 setOperationAction({ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR,
518 ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT,
519 ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN,
520 ISD::STRICT_FTRUNC},
521 MVT::f16, Promote);
522
523 // We need to custom promote this.
524 if (Subtarget.is64Bit())
525 setOperationAction(ISD::FPOWI, MVT::i32, Custom);
526
527 if (!Subtarget.hasStdExtZfa())
528 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom);
529 }
530
531 if (Subtarget.hasStdExtFOrZfinx()) {
532 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
533 setOperationAction(FPRndMode, MVT::f32,
534 Subtarget.hasStdExtZfa() ? Legal : Custom);
535 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
536 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
537 setOperationAction(ISD::SELECT, MVT::f32, Custom);
538 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
539 setOperationAction(FPOpToExpand, MVT::f32, Expand);
540 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
541 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
542 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
543 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
544 setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);
545 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
546 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
547 Subtarget.isSoftFPABI() ? LibCall : Custom);
548 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
549 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
550
551 if (Subtarget.hasStdExtZfa())
552 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
553 else
554 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);
555 }
556
557 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
558 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
559
560 if (Subtarget.hasStdExtDOrZdinx()) {
561 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
562
563 if (!Subtarget.is64Bit())
564 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
565
566 if (Subtarget.hasStdExtZfa()) {
567 setOperationAction(FPRndMode, MVT::f64, Legal);
568 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
569 } else {
570 if (Subtarget.is64Bit())
571 setOperationAction(FPRndMode, MVT::f64, Custom);
572
573 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom);
574 }
575
576 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
577 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
578 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
579 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
580 setOperationAction(ISD::SELECT, MVT::f64, Custom);
581 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
582 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
583 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
584 setOperationAction(FPOpToExpand, MVT::f64, Expand);
585 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
586 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
587 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
588 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
589 setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);
590 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
591 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
592 Subtarget.isSoftFPABI() ? LibCall : Custom);
593 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
594 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
595 }
596
597 if (Subtarget.is64Bit()) {
598 setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT,
599 ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT},
600 MVT::i32, Custom);
601 setOperationAction(ISD::LROUND, MVT::i32, Custom);
602 }
603
604 if (Subtarget.hasStdExtFOrZfinx()) {
605 setOperationAction(Ops: {ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, VT: XLenVT,
606 Action: Custom);
607
608 setOperationAction(Ops: {ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT,
609 ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
610 VT: XLenVT, Action: Legal);
611
612 if (RV64LegalI32 && Subtarget.is64Bit())
613 setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT,
614 ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
615 MVT::i32, Legal);
616
617 setOperationAction(Op: ISD::GET_ROUNDING, VT: XLenVT, Action: Custom);
618 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
619 }
620
621 setOperationAction(Ops: {ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
622 ISD::JumpTable},
623 VT: XLenVT, Action: Custom);
624
625 setOperationAction(Op: ISD::GlobalTLSAddress, VT: XLenVT, Action: Custom);
626
627 if (Subtarget.is64Bit())
628 setOperationAction(ISD::Constant, MVT::i64, Custom);
629
630 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
631 // Unfortunately this can't be determined just from the ISA naming string.
632 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
633 Subtarget.is64Bit() ? Legal : Custom);
634 setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64,
635 Subtarget.is64Bit() ? Legal : Custom);
636
637 setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
638 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
639 if (Subtarget.is64Bit())
640 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
641
642 if (Subtarget.hasStdExtZicbop()) {
643 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
644 }
645
646 if (Subtarget.hasStdExtA()) {
647 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
648 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
649 setMinCmpXchgSizeInBits(8);
650 else
651 setMinCmpXchgSizeInBits(32);
652 } else if (Subtarget.hasForcedAtomics()) {
653 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
654 } else {
655 setMaxAtomicSizeInBitsSupported(0);
656 }
657
658 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
659
660 setBooleanContents(ZeroOrOneBooleanContent);
661
662 if (Subtarget.hasVInstructions()) {
663 setBooleanVectorContents(ZeroOrOneBooleanContent);
664
665 setOperationAction(Op: ISD::VSCALE, VT: XLenVT, Action: Custom);
666 if (RV64LegalI32 && Subtarget.is64Bit())
667 setOperationAction(ISD::VSCALE, MVT::i32, Custom);
668
669 // RVV intrinsics may have illegal operands.
670 // We also need to custom legalize vmv.x.s.
671 setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN,
672 ISD::INTRINSIC_VOID},
673 {MVT::i8, MVT::i16}, Custom);
674 if (Subtarget.is64Bit())
675 setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
676 MVT::i32, Custom);
677 else
678 setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN},
679 MVT::i64, Custom);
680
681 setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
682 MVT::Other, Custom);
683
684 static const unsigned IntegerVPOps[] = {
685 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
686 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
687 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
688 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
689 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
690 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
691 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
692 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
693 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
694 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
695 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
696 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
697 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
698 ISD::VP_USUBSAT};
699
700 static const unsigned FloatingPointVPOps[] = {
701 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
702 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
703 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
704 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
705 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
706 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
707 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
708 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
709 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
710 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
711 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
712 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
713 ISD::EXPERIMENTAL_VP_SPLICE};
714
715 static const unsigned IntegerVecReduceOps[] = {
716 ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
717 ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
718 ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN};
719
720 static const unsigned FloatingPointVecReduceOps[] = {
721 ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,
722 ISD::VECREDUCE_FMAX, ISD::VECREDUCE_FMINIMUM, ISD::VECREDUCE_FMAXIMUM};
723
724 if (!Subtarget.is64Bit()) {
725 // We must custom-lower certain vXi64 operations on RV32 due to the vector
726 // element type being illegal.
727 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
728 MVT::i64, Custom);
729
730 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
731
732 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
733 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
734 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
735 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
736 MVT::i64, Custom);
737 }
738
739 for (MVT VT : BoolVecVTs) {
740 if (!isTypeLegal(VT))
741 continue;
742
743 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
744
745 // Mask VTs are custom-expanded into a series of standard nodes
746 setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS,
747 ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
748 ISD::SCALAR_TO_VECTOR},
749 VT, Custom);
750
751 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
752 Custom);
753
754 setOperationAction(ISD::SELECT, VT, Custom);
755 setOperationAction(
756 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
757 Expand);
758
759 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
760
761 setOperationAction(
762 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
763 Custom);
764
765 setOperationAction(
766 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
767 Custom);
768
769 // RVV has native int->float & float->int conversions where the
770 // element type sizes are within one power-of-two of each other. Any
771 // wider distances between type sizes have to be lowered as sequences
772 // which progressively narrow the gap in stages.
773 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
774 ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,
775 ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,
776 ISD::STRICT_FP_TO_UINT},
777 VT, Custom);
778 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
779 Custom);
780
781 // Expand all extending loads to types larger than this, and truncating
782 // stores from types larger than this.
783 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
784 setTruncStoreAction(VT, OtherVT, Expand);
785 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,
786 OtherVT, Expand);
787 }
788
789 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
790 ISD::VP_TRUNCATE, ISD::VP_SETCC},
791 VT, Custom);
792
793 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
794 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
795
796 setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
797
798 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
799 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
800
801 setOperationPromotedToType(
802 ISD::VECTOR_SPLICE, VT,
803 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
804 }
805
806 for (MVT VT : IntVecVTs) {
807 if (!isTypeLegal(VT))
808 continue;
809
810 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
811 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
812
813 // Vectors implement MULHS/MULHU.
814 setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand);
815
816 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
817 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
818 setOperationAction({ISD::MULHU, ISD::MULHS}, VT, Expand);
819
820 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT,
821 Legal);
822
823 setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom);
824
825 // Custom-lower extensions and truncations from/to mask types.
826 setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},
827 VT, Custom);
828
829 // RVV has native int->float & float->int conversions where the
830 // element type sizes are within one power-of-two of each other. Any
831 // wider distances between type sizes have to be lowered as sequences
832 // which progressively narrow the gap in stages.
833 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
834 ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,
835 ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,
836 ISD::STRICT_FP_TO_UINT},
837 VT, Custom);
838 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
839 Custom);
840 setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT,
841 ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT},
842 VT, Legal);
843
844 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
845 // nodes which truncate by one power of two at a time.
846 setOperationAction(ISD::TRUNCATE, VT, Custom);
847
848 // Custom-lower insert/extract operations to simplify patterns.
849 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
850 Custom);
851
852 // Custom-lower reduction operations to set up the corresponding custom
853 // nodes' operands.
854 setOperationAction(IntegerVecReduceOps, VT, Custom);
855
856 setOperationAction(IntegerVPOps, VT, Custom);
857
858 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
859
860 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
861 VT, Custom);
862
863 setOperationAction(
864 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
865 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
866 VT, Custom);
867
868 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
869 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
870 VT, Custom);
871
872 setOperationAction(ISD::SELECT, VT, Custom);
873 setOperationAction(ISD::SELECT_CC, VT, Expand);
874
875 setOperationAction({ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Custom);
876
877 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
878 setTruncStoreAction(VT, OtherVT, Expand);
879 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,
880 OtherVT, Expand);
881 }
882
883 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
884 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
885
886 // Splice
887 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
888
889 if (Subtarget.hasStdExtZvkb()) {
890 setOperationAction(ISD::BSWAP, VT, Legal);
891 setOperationAction(ISD::VP_BSWAP, VT, Custom);
892 } else {
893 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
894 setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand);
895 }
896
897 if (Subtarget.hasStdExtZvbb()) {
898 setOperationAction(ISD::BITREVERSE, VT, Legal);
899 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
900 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
901 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
902 VT, Custom);
903 } else {
904 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
905 setOperationAction({ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Expand);
906 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
907 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
908 VT, Expand);
909
910 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
911 // range of f32.
912 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
913 if (isTypeLegal(FloatVT)) {
914 setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
915 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
916 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
917 VT, Custom);
918 }
919 }
920 }
921
922 // Expand various CCs to best match the RVV ISA, which natively supports UNE
923 // but no other unordered comparisons, and supports all ordered comparisons
924 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
925 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
926 // and we pattern-match those back to the "original", swapping operands once
927 // more. This way we catch both operations and both "vf" and "fv" forms with
928 // fewer patterns.
929 static const ISD::CondCode VFPCCToExpand[] = {
930 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
931 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
932 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE,
933 };
934
935 // TODO: support more ops.
936 static const unsigned ZvfhminPromoteOps[] = {
937 ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
938 ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
939 ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL,
940 ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT,
941 ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, ISD::FMAXIMUM,
942 ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
943 ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA};
944
945 // TODO: support more vp ops.
946 static const unsigned ZvfhminPromoteVPOps[] = {
947 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
948 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
949 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
950 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
951 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
952 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
953 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
954 ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,
955 ISD::VP_FMAXIMUM};
956
957 // Sets common operation actions on RVV floating-point vector types.
958 const auto SetCommonVFPActions = [&](MVT VT) {
959 setOperationAction(Op: ISD::SPLAT_VECTOR, VT, Action: Legal);
960 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
961 // sizes are within one power-of-two of each other. Therefore conversions
962 // between vXf16 and vXf64 must be lowered as sequences which convert via
963 // vXf32.
964 setOperationAction(Ops: {ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Action: Custom);
965 setOperationAction(Ops: {ISD::LRINT, ISD::LLRINT}, VT, Action: Custom);
966 // Custom-lower insert/extract operations to simplify patterns.
967 setOperationAction(Ops: {ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
968 Action: Custom);
969 // Expand various condition codes (explained above).
970 setCondCodeAction(CCs: VFPCCToExpand, VT, Action: Expand);
971
972 setOperationAction(Ops: {ISD::FMINNUM, ISD::FMAXNUM}, VT, Action: Legal);
973 setOperationAction(Ops: {ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Action: Custom);
974
975 setOperationAction(Ops: {ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
976 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,
977 ISD::IS_FPCLASS},
978 VT, Action: Custom);
979
980 setOperationAction(Ops: FloatingPointVecReduceOps, VT, Action: Custom);
981
982 // Expand FP operations that need libcalls.
983 setOperationAction(Op: ISD::FREM, VT, Action: Expand);
984 setOperationAction(Op: ISD::FPOW, VT, Action: Expand);
985 setOperationAction(Op: ISD::FCOS, VT, Action: Expand);
986 setOperationAction(Op: ISD::FSIN, VT, Action: Expand);
987 setOperationAction(Op: ISD::FSINCOS, VT, Action: Expand);
988 setOperationAction(Op: ISD::FEXP, VT, Action: Expand);
989 setOperationAction(Op: ISD::FEXP2, VT, Action: Expand);
990 setOperationAction(Op: ISD::FEXP10, VT, Action: Expand);
991 setOperationAction(Op: ISD::FLOG, VT, Action: Expand);
992 setOperationAction(Op: ISD::FLOG2, VT, Action: Expand);
993 setOperationAction(Op: ISD::FLOG10, VT, Action: Expand);
994
995 setOperationAction(Op: ISD::FCOPYSIGN, VT, Action: Legal);
996
997 setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Custom);
998
999 setOperationAction(Ops: {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
1000 VT, Action: Custom);
1001
1002 setOperationAction(
1003 Ops: {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1004 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1005 VT, Action: Custom);
1006
1007 setOperationAction(Op: ISD::SELECT, VT, Action: Custom);
1008 setOperationAction(Op: ISD::SELECT_CC, VT, Action: Expand);
1009
1010 setOperationAction(Ops: {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
1011 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
1012 VT, Action: Custom);
1013
1014 setOperationAction(Op: ISD::VECTOR_DEINTERLEAVE, VT, Action: Custom);
1015 setOperationAction(Op: ISD::VECTOR_INTERLEAVE, VT, Action: Custom);
1016
1017 setOperationAction(Ops: {ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Action: Custom);
1018
1019 setOperationAction(Ops: FloatingPointVPOps, VT, Action: Custom);
1020
1021 setOperationAction(Ops: {ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT,
1022 Action: Custom);
1023 setOperationAction(Ops: {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
1024 ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA},
1025 VT, Action: Legal);
1026 setOperationAction(Ops: {ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
1027 ISD::STRICT_FTRUNC, ISD::STRICT_FCEIL,
1028 ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
1029 ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
1030 VT, Action: Custom);
1031 };
1032
1033 // Sets common extload/truncstore actions on RVV floating-point vector
1034 // types.
1035 const auto SetCommonVFPExtLoadTruncStoreActions =
1036 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1037 for (auto SmallVT : SmallerVTs) {
1038 setTruncStoreAction(ValVT: VT, MemVT: SmallVT, Action: Expand);
1039 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: SmallVT, Action: Expand);
1040 }
1041 };
1042
1043 if (Subtarget.hasVInstructionsF16()) {
1044 for (MVT VT : F16VecVTs) {
1045 if (!isTypeLegal(VT))
1046 continue;
1047 SetCommonVFPActions(VT);
1048 }
1049 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1050 for (MVT VT : F16VecVTs) {
1051 if (!isTypeLegal(VT))
1052 continue;
1053 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1054 setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
1055 Custom);
1056 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1057 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1058 Custom);
1059 setOperationAction(ISD::SELECT_CC, VT, Expand);
1060 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP,
1061 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1062 VT, Custom);
1063 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
1064 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
1065 VT, Custom);
1066 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1067 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1068 // load/store
1069 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1070
1071 // Custom split nxv32f16 since nxv32f32 if not legal.
1072 if (VT == MVT::nxv32f16) {
1073 setOperationAction(ZvfhminPromoteOps, VT, Custom);
1074 setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
1075 continue;
1076 }
1077 // Add more promote ops.
1078 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1079 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1080 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1081 }
1082 }
1083
1084 if (Subtarget.hasVInstructionsF32()) {
1085 for (MVT VT : F32VecVTs) {
1086 if (!isTypeLegal(VT))
1087 continue;
1088 SetCommonVFPActions(VT);
1089 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1090 }
1091 }
1092
1093 if (Subtarget.hasVInstructionsF64()) {
1094 for (MVT VT : F64VecVTs) {
1095 if (!isTypeLegal(VT))
1096 continue;
1097 SetCommonVFPActions(VT);
1098 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1099 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1100 }
1101 }
1102
1103 if (Subtarget.useRVVForFixedLengthVectors()) {
1104 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
1105 if (!useRVVForFixedLengthVectorVT(VT))
1106 continue;
1107
1108 // By default everything must be expanded.
1109 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1110 setOperationAction(Op, VT, Expand);
1111 for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
1112 setTruncStoreAction(VT, OtherVT, Expand);
1113 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,
1114 OtherVT, Expand);
1115 }
1116
1117 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1118 // expansion to a build_vector of 0s.
1119 setOperationAction(ISD::UNDEF, VT, Custom);
1120
1121 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1122 setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
1123 Custom);
1124
1125 setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}, VT,
1126 Custom);
1127
1128 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
1129 VT, Custom);
1130
1131 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1132
1133 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1134
1135 setOperationAction(ISD::SETCC, VT, Custom);
1136
1137 setOperationAction(ISD::SELECT, VT, Custom);
1138
1139 setOperationAction(ISD::TRUNCATE, VT, Custom);
1140
1141 setOperationAction(ISD::BITCAST, VT, Custom);
1142
1143 setOperationAction(
1144 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
1145 Custom);
1146
1147 setOperationAction(
1148 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1149 Custom);
1150
1151 setOperationAction(
1152 {
1153 ISD::SINT_TO_FP,
1154 ISD::UINT_TO_FP,
1155 ISD::FP_TO_SINT,
1156 ISD::FP_TO_UINT,
1157 ISD::STRICT_SINT_TO_FP,
1158 ISD::STRICT_UINT_TO_FP,
1159 ISD::STRICT_FP_TO_SINT,
1160 ISD::STRICT_FP_TO_UINT,
1161 },
1162 VT, Custom);
1163 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
1164 Custom);
1165
1166 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1167
1168 // Operations below are different for between masks and other vectors.
1169 if (VT.getVectorElementType() == MVT::i1) {
1170 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1171 ISD::OR, ISD::XOR},
1172 VT, Custom);
1173
1174 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1175 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1176 VT, Custom);
1177
1178 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1179 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1180 continue;
1181 }
1182
1183 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1184 // it before type legalization for i64 vectors on RV32. It will then be
1185 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1186 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1187 // improvements first.
1188 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1189 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
1190 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
1191 }
1192
1193 setOperationAction(
1194 {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
1195
1196 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1197 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1198 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1199 ISD::VP_SCATTER},
1200 VT, Custom);
1201
1202 setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR,
1203 ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV,
1204 ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL},
1205 VT, Custom);
1206
1207 setOperationAction(
1208 {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom);
1209
1210 setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom);
1211
1212 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1213 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1214 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);
1215
1216 setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT,
1217 ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT},
1218 VT, Custom);
1219
1220 setOperationAction(ISD::VSELECT, VT, Custom);
1221 setOperationAction(ISD::SELECT_CC, VT, Expand);
1222
1223 setOperationAction(
1224 {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom);
1225
1226 // Custom-lower reduction operations to set up the corresponding custom
1227 // nodes' operands.
1228 setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,
1229 ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,
1230 ISD::VECREDUCE_UMIN},
1231 VT, Custom);
1232
1233 setOperationAction(IntegerVPOps, VT, Custom);
1234
1235 if (Subtarget.hasStdExtZvkb())
1236 setOperationAction({ISD::BSWAP, ISD::ROTL, ISD::ROTR}, VT, Custom);
1237
1238 if (Subtarget.hasStdExtZvbb()) {
1239 setOperationAction({ISD::BITREVERSE, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
1240 ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTPOP},
1241 VT, Custom);
1242 } else {
1243 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1244 // range of f32.
1245 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1246 if (isTypeLegal(FloatVT))
1247 setOperationAction(
1248 {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
1249 Custom);
1250 }
1251 }
1252
1253 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
1254 // There are no extending loads or truncating stores.
1255 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1256 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1257 setTruncStoreAction(VT, InnerVT, Expand);
1258 }
1259
1260 if (!useRVVForFixedLengthVectorVT(VT))
1261 continue;
1262
1263 // By default everything must be expanded.
1264 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1265 setOperationAction(Op, VT, Expand);
1266
1267 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1268 // expansion to a build_vector of 0s.
1269 setOperationAction(ISD::UNDEF, VT, Custom);
1270
1271 if (VT.getVectorElementType() == MVT::f16 &&
1272 !Subtarget.hasVInstructionsF16()) {
1273 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1274 setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
1275 Custom);
1276 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1277 setOperationAction(
1278 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1279 Custom);
1280 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP,
1281 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1282 VT, Custom);
1283 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
1284 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
1285 VT, Custom);
1286 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1287 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1288 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1289 // Don't promote f16 vector operations to f32 if f32 vector type is
1290 // not legal.
1291 // TODO: could split the f16 vector into two vectors and do promotion.
1292 if (!isTypeLegal(F32VecVT))
1293 continue;
1294 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1295 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1296 continue;
1297 }
1298
1299 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1300 setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
1301 Custom);
1302
1303 setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
1304 ISD::VECTOR_SHUFFLE, ISD::INSERT_VECTOR_ELT,
1305 ISD::EXTRACT_VECTOR_ELT},
1306 VT, Custom);
1307
1308 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1309 ISD::MGATHER, ISD::MSCATTER},
1310 VT, Custom);
1311
1312 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1313 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1314 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1315 ISD::VP_SCATTER},
1316 VT, Custom);
1317
1318 setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV,
1319 ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
1320 ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
1321 ISD::IS_FPCLASS, ISD::FMAXIMUM, ISD::FMINIMUM},
1322 VT, Custom);
1323
1324 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1325
1326 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1327 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT},
1328 VT, Custom);
1329
1330 setCondCodeAction(VFPCCToExpand, VT, Expand);
1331
1332 setOperationAction(ISD::SETCC, VT, Custom);
1333 setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom);
1334 setOperationAction(ISD::SELECT_CC, VT, Expand);
1335
1336 setOperationAction(ISD::BITCAST, VT, Custom);
1337
1338 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1339
1340 setOperationAction(FloatingPointVPOps, VT, Custom);
1341
1342 setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT,
1343 Custom);
1344 setOperationAction(
1345 {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
1346 ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA,
1347 ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::STRICT_FTRUNC,
1348 ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
1349 ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
1350 VT, Custom);
1351 }
1352
1353 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1354 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1355 Custom);
1356 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1357 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
1358 if (Subtarget.hasStdExtFOrZfinx())
1359 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
1360 if (Subtarget.hasStdExtDOrZdinx())
1361 setOperationAction(ISD::BITCAST, MVT::f64, Custom);
1362 }
1363 }
1364
1365 if (Subtarget.hasStdExtA()) {
1366 setOperationAction(Op: ISD::ATOMIC_LOAD_SUB, VT: XLenVT, Action: Expand);
1367 if (RV64LegalI32 && Subtarget.is64Bit())
1368 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
1369 }
1370
1371 if (Subtarget.hasForcedAtomics()) {
1372 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1373 setOperationAction(
1374 Ops: {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
1375 ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
1376 ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
1377 ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
1378 VT: XLenVT, Action: LibCall);
1379 }
1380
1381 if (Subtarget.hasVendorXTHeadMemIdx()) {
1382 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1383 setIndexedLoadAction(im, MVT::i8, Legal);
1384 setIndexedStoreAction(im, MVT::i8, Legal);
1385 setIndexedLoadAction(im, MVT::i16, Legal);
1386 setIndexedStoreAction(im, MVT::i16, Legal);
1387 setIndexedLoadAction(im, MVT::i32, Legal);
1388 setIndexedStoreAction(im, MVT::i32, Legal);
1389
1390 if (Subtarget.is64Bit()) {
1391 setIndexedLoadAction(im, MVT::i64, Legal);
1392 setIndexedStoreAction(im, MVT::i64, Legal);
1393 }
1394 }
1395 }
1396
1397 // Function alignments.
1398 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1399 setMinFunctionAlignment(FunctionAlignment);
1400 // Set preferred alignments.
1401 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
1402 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
1403
1404 setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,
1405 ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::MUL,
1406 ISD::AND, ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
1407 if (Subtarget.is64Bit())
1408 setTargetDAGCombine(ISD::SRA);
1409
1410 if (Subtarget.hasStdExtFOrZfinx())
1411 setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM});
1412
1413 if (Subtarget.hasStdExtZbb())
1414 setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN});
1415
1416 if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
1417 setTargetDAGCombine(ISD::TRUNCATE);
1418
1419 if (Subtarget.hasStdExtZbkb())
1420 setTargetDAGCombine(ISD::BITREVERSE);
1421 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1422 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1423 if (Subtarget.hasStdExtFOrZfinx())
1424 setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
1425 ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT});
1426 if (Subtarget.hasVInstructions())
1427 setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
1428 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1429 ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR,
1430 ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
1431 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1432 ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM,
1433 ISD::INSERT_VECTOR_ELT, ISD::ABS});
1434 if (Subtarget.hasVendorXTHeadMemPair())
1435 setTargetDAGCombine({ISD::LOAD, ISD::STORE});
1436 if (Subtarget.useRVVForFixedLengthVectors())
1437 setTargetDAGCombine(ISD::BITCAST);
1438
1439 setLibcallName(Call: RTLIB::FPEXT_F16_F32, Name: "__extendhfsf2");
1440 setLibcallName(Call: RTLIB::FPROUND_F32_F16, Name: "__truncsfhf2");
1441
1442 // Disable strict node mutation.
1443 IsStrictFPEnabled = true;
1444}
1445
1446EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
1447 LLVMContext &Context,
1448 EVT VT) const {
1449 if (!VT.isVector())
1450 return getPointerTy(DL);
1451 if (Subtarget.hasVInstructions() &&
1452 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1453 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1454 return VT.changeVectorElementTypeToInteger();
1455}
1456
1457MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1458 return Subtarget.getXLenVT();
1459}
1460
1461// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1462bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1463 unsigned VF,
1464 bool IsScalable) const {
1465 if (!Subtarget.hasVInstructions())
1466 return true;
1467
1468 if (!IsScalable)
1469 return true;
1470
1471 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1472 return true;
1473
1474 // Don't allow VF=1 if those types are't legal.
1475 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1476 return true;
1477
1478 // VLEN=32 support is incomplete.
1479 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1480 return true;
1481
1482 // The maximum VF is for the smallest element width with LMUL=8.
1483 // VF must be a power of 2.
1484 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1485 return VF > MaxVF || !isPowerOf2_32(Value: VF);
1486}
1487
1488bool RISCVTargetLowering::shouldExpandCttzElements(EVT VT) const {
1489 return !Subtarget.hasVInstructions() ||
1490 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1491}
1492
1493bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1494 const CallInst &I,
1495 MachineFunction &MF,
1496 unsigned Intrinsic) const {
1497 auto &DL = I.getModule()->getDataLayout();
1498
1499 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1500 bool IsUnitStrided, bool UsePtrVal = false) {
1501 Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
1502 // We can't use ptrVal if the intrinsic can access memory before the
1503 // pointer. This means we can't use it for strided or indexed intrinsics.
1504 if (UsePtrVal)
1505 Info.ptrVal = I.getArgOperand(i: PtrOp);
1506 else
1507 Info.fallbackAddressSpace =
1508 I.getArgOperand(i: PtrOp)->getType()->getPointerAddressSpace();
1509 Type *MemTy;
1510 if (IsStore) {
1511 // Store value is the first operand.
1512 MemTy = I.getArgOperand(i: 0)->getType();
1513 } else {
1514 // Use return type. If it's segment load, return type is a struct.
1515 MemTy = I.getType();
1516 if (MemTy->isStructTy())
1517 MemTy = MemTy->getStructElementType(N: 0);
1518 }
1519 if (!IsUnitStrided)
1520 MemTy = MemTy->getScalarType();
1521
1522 Info.memVT = getValueType(DL, Ty: MemTy);
1523 Info.align = Align(DL.getTypeSizeInBits(Ty: MemTy->getScalarType()) / 8);
1524 Info.size = MemoryLocation::UnknownSize;
1525 Info.flags |=
1526 IsStore ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
1527 return true;
1528 };
1529
1530 if (I.hasMetadata(KindID: LLVMContext::MD_nontemporal))
1531 Info.flags |= MachineMemOperand::MONonTemporal;
1532
1533 Info.flags |= RISCVTargetLowering::getTargetMMOFlags(I);
1534 switch (Intrinsic) {
1535 default:
1536 return false;
1537 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1538 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1539 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1540 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1541 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1542 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1543 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1544 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1545 case Intrinsic::riscv_masked_cmpxchg_i32:
1546 Info.opc = ISD::INTRINSIC_W_CHAIN;
1547 Info.memVT = MVT::i32;
1548 Info.ptrVal = I.getArgOperand(i: 0);
1549 Info.offset = 0;
1550 Info.align = Align(4);
1551 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
1552 MachineMemOperand::MOVolatile;
1553 return true;
1554 case Intrinsic::riscv_masked_strided_load:
1555 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1556 /*IsUnitStrided*/ false);
1557 case Intrinsic::riscv_masked_strided_store:
1558 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1559 /*IsUnitStrided*/ false);
1560 case Intrinsic::riscv_seg2_load:
1561 case Intrinsic::riscv_seg3_load:
1562 case Intrinsic::riscv_seg4_load:
1563 case Intrinsic::riscv_seg5_load:
1564 case Intrinsic::riscv_seg6_load:
1565 case Intrinsic::riscv_seg7_load:
1566 case Intrinsic::riscv_seg8_load:
1567 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1568 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1569 case Intrinsic::riscv_seg2_store:
1570 case Intrinsic::riscv_seg3_store:
1571 case Intrinsic::riscv_seg4_store:
1572 case Intrinsic::riscv_seg5_store:
1573 case Intrinsic::riscv_seg6_store:
1574 case Intrinsic::riscv_seg7_store:
1575 case Intrinsic::riscv_seg8_store:
1576 // Operands are (vec, ..., vec, ptr, vl)
1577 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1578 /*IsStore*/ true,
1579 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1580 case Intrinsic::riscv_vle:
1581 case Intrinsic::riscv_vle_mask:
1582 case Intrinsic::riscv_vleff:
1583 case Intrinsic::riscv_vleff_mask:
1584 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1585 /*IsStore*/ false,
1586 /*IsUnitStrided*/ true,
1587 /*UsePtrVal*/ true);
1588 case Intrinsic::riscv_vse:
1589 case Intrinsic::riscv_vse_mask:
1590 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1591 /*IsStore*/ true,
1592 /*IsUnitStrided*/ true,
1593 /*UsePtrVal*/ true);
1594 case Intrinsic::riscv_vlse:
1595 case Intrinsic::riscv_vlse_mask:
1596 case Intrinsic::riscv_vloxei:
1597 case Intrinsic::riscv_vloxei_mask:
1598 case Intrinsic::riscv_vluxei:
1599 case Intrinsic::riscv_vluxei_mask:
1600 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1601 /*IsStore*/ false,
1602 /*IsUnitStrided*/ false);
1603 case Intrinsic::riscv_vsse:
1604 case Intrinsic::riscv_vsse_mask:
1605 case Intrinsic::riscv_vsoxei:
1606 case Intrinsic::riscv_vsoxei_mask:
1607 case Intrinsic::riscv_vsuxei:
1608 case Intrinsic::riscv_vsuxei_mask:
1609 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1610 /*IsStore*/ true,
1611 /*IsUnitStrided*/ false);
1612 case Intrinsic::riscv_vlseg2:
1613 case Intrinsic::riscv_vlseg3:
1614 case Intrinsic::riscv_vlseg4:
1615 case Intrinsic::riscv_vlseg5:
1616 case Intrinsic::riscv_vlseg6:
1617 case Intrinsic::riscv_vlseg7:
1618 case Intrinsic::riscv_vlseg8:
1619 case Intrinsic::riscv_vlseg2ff:
1620 case Intrinsic::riscv_vlseg3ff:
1621 case Intrinsic::riscv_vlseg4ff:
1622 case Intrinsic::riscv_vlseg5ff:
1623 case Intrinsic::riscv_vlseg6ff:
1624 case Intrinsic::riscv_vlseg7ff:
1625 case Intrinsic::riscv_vlseg8ff:
1626 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1627 /*IsStore*/ false,
1628 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1629 case Intrinsic::riscv_vlseg2_mask:
1630 case Intrinsic::riscv_vlseg3_mask:
1631 case Intrinsic::riscv_vlseg4_mask:
1632 case Intrinsic::riscv_vlseg5_mask:
1633 case Intrinsic::riscv_vlseg6_mask:
1634 case Intrinsic::riscv_vlseg7_mask:
1635 case Intrinsic::riscv_vlseg8_mask:
1636 case Intrinsic::riscv_vlseg2ff_mask:
1637 case Intrinsic::riscv_vlseg3ff_mask:
1638 case Intrinsic::riscv_vlseg4ff_mask:
1639 case Intrinsic::riscv_vlseg5ff_mask:
1640 case Intrinsic::riscv_vlseg6ff_mask:
1641 case Intrinsic::riscv_vlseg7ff_mask:
1642 case Intrinsic::riscv_vlseg8ff_mask:
1643 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1644 /*IsStore*/ false,
1645 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1646 case Intrinsic::riscv_vlsseg2:
1647 case Intrinsic::riscv_vlsseg3:
1648 case Intrinsic::riscv_vlsseg4:
1649 case Intrinsic::riscv_vlsseg5:
1650 case Intrinsic::riscv_vlsseg6:
1651 case Intrinsic::riscv_vlsseg7:
1652 case Intrinsic::riscv_vlsseg8:
1653 case Intrinsic::riscv_vloxseg2:
1654 case Intrinsic::riscv_vloxseg3:
1655 case Intrinsic::riscv_vloxseg4:
1656 case Intrinsic::riscv_vloxseg5:
1657 case Intrinsic::riscv_vloxseg6:
1658 case Intrinsic::riscv_vloxseg7:
1659 case Intrinsic::riscv_vloxseg8:
1660 case Intrinsic::riscv_vluxseg2:
1661 case Intrinsic::riscv_vluxseg3:
1662 case Intrinsic::riscv_vluxseg4:
1663 case Intrinsic::riscv_vluxseg5:
1664 case Intrinsic::riscv_vluxseg6:
1665 case Intrinsic::riscv_vluxseg7:
1666 case Intrinsic::riscv_vluxseg8:
1667 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1668 /*IsStore*/ false,
1669 /*IsUnitStrided*/ false);
1670 case Intrinsic::riscv_vlsseg2_mask:
1671 case Intrinsic::riscv_vlsseg3_mask:
1672 case Intrinsic::riscv_vlsseg4_mask:
1673 case Intrinsic::riscv_vlsseg5_mask:
1674 case Intrinsic::riscv_vlsseg6_mask:
1675 case Intrinsic::riscv_vlsseg7_mask:
1676 case Intrinsic::riscv_vlsseg8_mask:
1677 case Intrinsic::riscv_vloxseg2_mask:
1678 case Intrinsic::riscv_vloxseg3_mask:
1679 case Intrinsic::riscv_vloxseg4_mask:
1680 case Intrinsic::riscv_vloxseg5_mask:
1681 case Intrinsic::riscv_vloxseg6_mask:
1682 case Intrinsic::riscv_vloxseg7_mask:
1683 case Intrinsic::riscv_vloxseg8_mask:
1684 case Intrinsic::riscv_vluxseg2_mask:
1685 case Intrinsic::riscv_vluxseg3_mask:
1686 case Intrinsic::riscv_vluxseg4_mask:
1687 case Intrinsic::riscv_vluxseg5_mask:
1688 case Intrinsic::riscv_vluxseg6_mask:
1689 case Intrinsic::riscv_vluxseg7_mask:
1690 case Intrinsic::riscv_vluxseg8_mask:
1691 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1692 /*IsStore*/ false,
1693 /*IsUnitStrided*/ false);
1694 case Intrinsic::riscv_vsseg2:
1695 case Intrinsic::riscv_vsseg3:
1696 case Intrinsic::riscv_vsseg4:
1697 case Intrinsic::riscv_vsseg5:
1698 case Intrinsic::riscv_vsseg6:
1699 case Intrinsic::riscv_vsseg7:
1700 case Intrinsic::riscv_vsseg8:
1701 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1702 /*IsStore*/ true,
1703 /*IsUnitStrided*/ false);
1704 case Intrinsic::riscv_vsseg2_mask:
1705 case Intrinsic::riscv_vsseg3_mask:
1706 case Intrinsic::riscv_vsseg4_mask:
1707 case Intrinsic::riscv_vsseg5_mask:
1708 case Intrinsic::riscv_vsseg6_mask:
1709 case Intrinsic::riscv_vsseg7_mask:
1710 case Intrinsic::riscv_vsseg8_mask:
1711 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1712 /*IsStore*/ true,
1713 /*IsUnitStrided*/ false);
1714 case Intrinsic::riscv_vssseg2:
1715 case Intrinsic::riscv_vssseg3:
1716 case Intrinsic::riscv_vssseg4:
1717 case Intrinsic::riscv_vssseg5:
1718 case Intrinsic::riscv_vssseg6:
1719 case Intrinsic::riscv_vssseg7:
1720 case Intrinsic::riscv_vssseg8:
1721 case Intrinsic::riscv_vsoxseg2:
1722 case Intrinsic::riscv_vsoxseg3:
1723 case Intrinsic::riscv_vsoxseg4:
1724 case Intrinsic::riscv_vsoxseg5:
1725 case Intrinsic::riscv_vsoxseg6:
1726 case Intrinsic::riscv_vsoxseg7:
1727 case Intrinsic::riscv_vsoxseg8:
1728 case Intrinsic::riscv_vsuxseg2:
1729 case Intrinsic::riscv_vsuxseg3:
1730 case Intrinsic::riscv_vsuxseg4:
1731 case Intrinsic::riscv_vsuxseg5:
1732 case Intrinsic::riscv_vsuxseg6:
1733 case Intrinsic::riscv_vsuxseg7:
1734 case Intrinsic::riscv_vsuxseg8:
1735 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1736 /*IsStore*/ true,
1737 /*IsUnitStrided*/ false);
1738 case Intrinsic::riscv_vssseg2_mask:
1739 case Intrinsic::riscv_vssseg3_mask:
1740 case Intrinsic::riscv_vssseg4_mask:
1741 case Intrinsic::riscv_vssseg5_mask:
1742 case Intrinsic::riscv_vssseg6_mask:
1743 case Intrinsic::riscv_vssseg7_mask:
1744 case Intrinsic::riscv_vssseg8_mask:
1745 case Intrinsic::riscv_vsoxseg2_mask:
1746 case Intrinsic::riscv_vsoxseg3_mask:
1747 case Intrinsic::riscv_vsoxseg4_mask:
1748 case Intrinsic::riscv_vsoxseg5_mask:
1749 case Intrinsic::riscv_vsoxseg6_mask:
1750 case Intrinsic::riscv_vsoxseg7_mask:
1751 case Intrinsic::riscv_vsoxseg8_mask:
1752 case Intrinsic::riscv_vsuxseg2_mask:
1753 case Intrinsic::riscv_vsuxseg3_mask:
1754 case Intrinsic::riscv_vsuxseg4_mask:
1755 case Intrinsic::riscv_vsuxseg5_mask:
1756 case Intrinsic::riscv_vsuxseg6_mask:
1757 case Intrinsic::riscv_vsuxseg7_mask:
1758 case Intrinsic::riscv_vsuxseg8_mask:
1759 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1760 /*IsStore*/ true,
1761 /*IsUnitStrided*/ false);
1762 }
1763}
1764
1765bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1766 const AddrMode &AM, Type *Ty,
1767 unsigned AS,
1768 Instruction *I) const {
1769 // No global is ever allowed as a base.
1770 if (AM.BaseGV)
1771 return false;
1772
1773 // RVV instructions only support register addressing.
1774 if (Subtarget.hasVInstructions() && isa<VectorType>(Val: Ty))
1775 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1776
1777 // Require a 12-bit signed offset.
1778 if (!isInt<12>(x: AM.BaseOffs))
1779 return false;
1780
1781 switch (AM.Scale) {
1782 case 0: // "r+i" or just "i", depending on HasBaseReg.
1783 break;
1784 case 1:
1785 if (!AM.HasBaseReg) // allow "r+i".
1786 break;
1787 return false; // disallow "r+r" or "r+r+i".
1788 default:
1789 return false;
1790 }
1791
1792 return true;
1793}
1794
1795bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
1796 return isInt<12>(x: Imm);
1797}
1798
1799bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
1800 return isInt<12>(x: Imm);
1801}
1802
1803// On RV32, 64-bit integers are split into their high and low parts and held
1804// in two different registers, so the trunc is free since the low register can
1805// just be used.
1806// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1807// isTruncateFree?
1808bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
1809 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1810 return false;
1811 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1812 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1813 return (SrcBits == 64 && DestBits == 32);
1814}
1815
1816bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
1817 // We consider i64->i32 free on RV64 since we have good selection of W
1818 // instructions that make promoting operations back to i64 free in many cases.
1819 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1820 !DstVT.isInteger())
1821 return false;
1822 unsigned SrcBits = SrcVT.getSizeInBits();
1823 unsigned DestBits = DstVT.getSizeInBits();
1824 return (SrcBits == 64 && DestBits == 32);
1825}
1826
1827bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
1828 // Zexts are free if they can be combined with a load.
1829 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1830 // poorly with type legalization of compares preferring sext.
1831 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1832 EVT MemVT = LD->getMemoryVT();
1833 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1834 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1835 LD->getExtensionType() == ISD::ZEXTLOAD))
1836 return true;
1837 }
1838
1839 return TargetLowering::isZExtFree(Val, VT2);
1840}
1841
1842bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
1843 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1844}
1845
1846bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const {
1847 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(Bitwidth: 32);
1848}
1849
1850bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
1851 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip();
1852}
1853
1854bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
1855 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
1856 Subtarget.hasVendorXCVbitmanip();
1857}
1858
1859bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(
1860 const Instruction &AndI) const {
1861 // We expect to be able to match a bit extraction instruction if the Zbs
1862 // extension is supported and the mask is a power of two. However, we
1863 // conservatively return false if the mask would fit in an ANDI instruction,
1864 // on the basis that it's possible the sinking+duplication of the AND in
1865 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1866 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1867 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1868 return false;
1869 ConstantInt *Mask = dyn_cast<ConstantInt>(Val: AndI.getOperand(i: 1));
1870 if (!Mask)
1871 return false;
1872 return !Mask->getValue().isSignedIntN(N: 12) && Mask->getValue().isPowerOf2();
1873}
1874
1875bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const {
1876 EVT VT = Y.getValueType();
1877
1878 // FIXME: Support vectors once we have tests.
1879 if (VT.isVector())
1880 return false;
1881
1882 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1883 !isa<ConstantSDNode>(Val: Y);
1884}
1885
1886bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
1887 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1888 if (Subtarget.hasStdExtZbs())
1889 return X.getValueType().isScalarInteger();
1890 auto *C = dyn_cast<ConstantSDNode>(Val&: Y);
1891 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1892 if (Subtarget.hasVendorXTHeadBs())
1893 return C != nullptr;
1894 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1895 return C && C->getAPIntValue().ule(RHS: 10);
1896}
1897
1898bool RISCVTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode,
1899 EVT VT) const {
1900 // Only enable for rvv.
1901 if (!VT.isVector() || !Subtarget.hasVInstructions())
1902 return false;
1903
1904 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1905 return false;
1906
1907 return true;
1908}
1909
1910bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
1911 Type *Ty) const {
1912 assert(Ty->isIntegerTy());
1913
1914 unsigned BitSize = Ty->getIntegerBitWidth();
1915 if (BitSize > Subtarget.getXLen())
1916 return false;
1917
1918 // Fast path, assume 32-bit immediates are cheap.
1919 int64_t Val = Imm.getSExtValue();
1920 if (isInt<32>(x: Val))
1921 return true;
1922
1923 // A constant pool entry may be more aligned thant he load we're trying to
1924 // replace. If we don't support unaligned scalar mem, prefer the constant
1925 // pool.
1926 // TODO: Can the caller pass down the alignment?
1927 if (!Subtarget.enableUnalignedScalarMem())
1928 return true;
1929
1930 // Prefer to keep the load if it would require many instructions.
1931 // This uses the same threshold we use for constant pools but doesn't
1932 // check useConstantPoolForLargeInts.
1933 // TODO: Should we keep the load only when we're definitely going to emit a
1934 // constant pool?
1935
1936 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, Subtarget);
1937 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1938}
1939
1940bool RISCVTargetLowering::
1941 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1942 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1943 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1944 SelectionDAG &DAG) const {
1945 // One interesting pattern that we'd want to form is 'bit extract':
1946 // ((1 >> Y) & 1) ==/!= 0
1947 // But we also need to be careful not to try to reverse that fold.
1948
1949 // Is this '((1 >> Y) & 1)'?
1950 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1951 return false; // Keep the 'bit extract' pattern.
1952
1953 // Will this be '((1 >> Y) & 1)' after the transform?
1954 if (NewShiftOpcode == ISD::SRL && CC->isOne())
1955 return true; // Do form the 'bit extract' pattern.
1956
1957 // If 'X' is a constant, and we transform, then we will immediately
1958 // try to undo the fold, thus causing endless combine loop.
1959 // So only do the transform if X is not a constant. This matches the default
1960 // implementation of this function.
1961 return !XC;
1962}
1963
1964bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
1965 switch (Opcode) {
1966 case Instruction::Add:
1967 case Instruction::Sub:
1968 case Instruction::Mul:
1969 case Instruction::And:
1970 case Instruction::Or:
1971 case Instruction::Xor:
1972 case Instruction::FAdd:
1973 case Instruction::FSub:
1974 case Instruction::FMul:
1975 case Instruction::FDiv:
1976 case Instruction::ICmp:
1977 case Instruction::FCmp:
1978 return true;
1979 case Instruction::Shl:
1980 case Instruction::LShr:
1981 case Instruction::AShr:
1982 case Instruction::UDiv:
1983 case Instruction::SDiv:
1984 case Instruction::URem:
1985 case Instruction::SRem:
1986 return Operand == 1;
1987 default:
1988 return false;
1989 }
1990}
1991
1992
1993bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const {
1994 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1995 return false;
1996
1997 if (canSplatOperand(Opcode: I->getOpcode(), Operand))
1998 return true;
1999
2000 auto *II = dyn_cast<IntrinsicInst>(Val: I);
2001 if (!II)
2002 return false;
2003
2004 switch (II->getIntrinsicID()) {
2005 case Intrinsic::fma:
2006 case Intrinsic::vp_fma:
2007 return Operand == 0 || Operand == 1;
2008 case Intrinsic::vp_shl:
2009 case Intrinsic::vp_lshr:
2010 case Intrinsic::vp_ashr:
2011 case Intrinsic::vp_udiv:
2012 case Intrinsic::vp_sdiv:
2013 case Intrinsic::vp_urem:
2014 case Intrinsic::vp_srem:
2015 case Intrinsic::ssub_sat:
2016 case Intrinsic::vp_ssub_sat:
2017 case Intrinsic::usub_sat:
2018 case Intrinsic::vp_usub_sat:
2019 return Operand == 1;
2020 // These intrinsics are commutative.
2021 case Intrinsic::vp_add:
2022 case Intrinsic::vp_mul:
2023 case Intrinsic::vp_and:
2024 case Intrinsic::vp_or:
2025 case Intrinsic::vp_xor:
2026 case Intrinsic::vp_fadd:
2027 case Intrinsic::vp_fmul:
2028 case Intrinsic::vp_icmp:
2029 case Intrinsic::vp_fcmp:
2030 case Intrinsic::smin:
2031 case Intrinsic::vp_smin:
2032 case Intrinsic::umin:
2033 case Intrinsic::vp_umin:
2034 case Intrinsic::smax:
2035 case Intrinsic::vp_smax:
2036 case Intrinsic::umax:
2037 case Intrinsic::vp_umax:
2038 case Intrinsic::sadd_sat:
2039 case Intrinsic::vp_sadd_sat:
2040 case Intrinsic::uadd_sat:
2041 case Intrinsic::vp_uadd_sat:
2042 // These intrinsics have 'vr' versions.
2043 case Intrinsic::vp_sub:
2044 case Intrinsic::vp_fsub:
2045 case Intrinsic::vp_fdiv:
2046 return Operand == 0 || Operand == 1;
2047 default:
2048 return false;
2049 }
2050}
2051
2052/// Check if sinking \p I's operands to I's basic block is profitable, because
2053/// the operands can be folded into a target instruction, e.g.
2054/// splats of scalars can fold into vector instructions.
2055bool RISCVTargetLowering::shouldSinkOperands(
2056 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
2057 using namespace llvm::PatternMatch;
2058
2059 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2060 return false;
2061
2062 // Don't sink splat operands if the target prefers it. Some targets requires
2063 // S2V transfer buffers and we can run out of them copying the same value
2064 // repeatedly.
2065 // FIXME: It could still be worth doing if it would improve vector register
2066 // pressure and prevent a vector spill.
2067 if (!Subtarget.sinkSplatOperands())
2068 return false;
2069
2070 for (auto OpIdx : enumerate(First: I->operands())) {
2071 if (!canSplatOperand(I, Operand: OpIdx.index()))
2072 continue;
2073
2074 Instruction *Op = dyn_cast<Instruction>(Val: OpIdx.value().get());
2075 // Make sure we are not already sinking this operand
2076 if (!Op || any_of(Range&: Ops, P: [&](Use *U) { return U->get() == Op; }))
2077 continue;
2078
2079 // We are looking for a splat that can be sunk.
2080 if (!match(V: Op, P: m_Shuffle(v1: m_InsertElt(Val: m_Undef(), Elt: m_Value(), Idx: m_ZeroInt()),
2081 v2: m_Undef(), mask: m_ZeroMask())))
2082 continue;
2083
2084 // Don't sink i1 splats.
2085 if (cast<VectorType>(Val: Op->getType())->getElementType()->isIntegerTy(Bitwidth: 1))
2086 continue;
2087
2088 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2089 // and vector registers
2090 for (Use &U : Op->uses()) {
2091 Instruction *Insn = cast<Instruction>(Val: U.getUser());
2092 if (!canSplatOperand(I: Insn, Operand: U.getOperandNo()))
2093 return false;
2094 }
2095
2096 Ops.push_back(Elt: &Op->getOperandUse(i: 0));
2097 Ops.push_back(Elt: &OpIdx.value());
2098 }
2099 return true;
2100}
2101
2102bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
2103 unsigned Opc = VecOp.getOpcode();
2104
2105 // Assume target opcodes can't be scalarized.
2106 // TODO - do we have any exceptions?
2107 if (Opc >= ISD::BUILTIN_OP_END)
2108 return false;
2109
2110 // If the vector op is not supported, try to convert to scalar.
2111 EVT VecVT = VecOp.getValueType();
2112 if (!isOperationLegalOrCustomOrPromote(Op: Opc, VT: VecVT))
2113 return true;
2114
2115 // If the vector op is supported, but the scalar op is not, the transform may
2116 // not be worthwhile.
2117 // Permit a vector binary operation can be converted to scalar binary
2118 // operation which is custom lowered with illegal type.
2119 EVT ScalarVT = VecVT.getScalarType();
2120 return isOperationLegalOrCustomOrPromote(Op: Opc, VT: ScalarVT) ||
2121 isOperationCustom(Op: Opc, VT: ScalarVT);
2122}
2123
2124bool RISCVTargetLowering::isOffsetFoldingLegal(
2125 const GlobalAddressSDNode *GA) const {
2126 // In order to maximise the opportunity for common subexpression elimination,
2127 // keep a separate ADD node for the global address offset instead of folding
2128 // it in the global address node. Later peephole optimisations may choose to
2129 // fold it back in when profitable.
2130 return false;
2131}
2132
2133// Return one of the followings:
2134// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2135// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2136// positive counterpart, which will be materialized from the first returned
2137// element. The second returned element indicated that there should be a FNEG
2138// followed.
2139// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2140std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
2141 EVT VT) const {
2142 if (!Subtarget.hasStdExtZfa())
2143 return std::make_pair(x: -1, y: false);
2144
2145 bool IsSupportedVT = false;
2146 if (VT == MVT::f16) {
2147 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2148 } else if (VT == MVT::f32) {
2149 IsSupportedVT = true;
2150 } else if (VT == MVT::f64) {
2151 assert(Subtarget.hasStdExtD() && "Expect D extension");
2152 IsSupportedVT = true;
2153 }
2154
2155 if (!IsSupportedVT)
2156 return std::make_pair(x: -1, y: false);
2157
2158 int Index = RISCVLoadFPImm::getLoadFPImm(FPImm: Imm);
2159 if (Index < 0 && Imm.isNegative())
2160 // Try the combination of its positive counterpart + FNEG.
2161 return std::make_pair(x: RISCVLoadFPImm::getLoadFPImm(FPImm: -Imm), y: true);
2162 else
2163 return std::make_pair(x&: Index, y: false);
2164}
2165
2166bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
2167 bool ForCodeSize) const {
2168 bool IsLegalVT = false;
2169 if (VT == MVT::f16)
2170 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2171 else if (VT == MVT::f32)
2172 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2173 else if (VT == MVT::f64)
2174 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2175 else if (VT == MVT::bf16)
2176 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2177
2178 if (!IsLegalVT)
2179 return false;
2180
2181 if (getLegalZfaFPImm(Imm, VT).first >= 0)
2182 return true;
2183
2184 // Cannot create a 64 bit floating-point immediate value for rv32.
2185 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2186 // td can handle +0.0 or -0.0 already.
2187 // -0.0 can be created by fmv + fneg.
2188 return Imm.isZero();
2189 }
2190
2191 // Special case: fmv + fneg
2192 if (Imm.isNegZero())
2193 return true;
2194
2195 // Building an integer and then converting requires a fmv at the end of
2196 // the integer sequence.
2197 const int Cost =
2198 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
2199 Subtarget);
2200 return Cost <= FPImmCost;
2201}
2202
2203// TODO: This is very conservative.
2204bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
2205 unsigned Index) const {
2206 if (!isOperationLegalOrCustom(Op: ISD::EXTRACT_SUBVECTOR, VT: ResVT))
2207 return false;
2208
2209 // Only support extracting a fixed from a fixed vector for now.
2210 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2211 return false;
2212
2213 EVT EltVT = ResVT.getVectorElementType();
2214 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2215
2216 // The smallest type we can slide is i8.
2217 // TODO: We can extract index 0 from a mask vector without a slide.
2218 if (EltVT == MVT::i1)
2219 return false;
2220
2221 unsigned ResElts = ResVT.getVectorNumElements();
2222 unsigned SrcElts = SrcVT.getVectorNumElements();
2223
2224 unsigned MinVLen = Subtarget.getRealMinVLen();
2225 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2226
2227 // If we're extracting only data from the first VLEN bits of the source
2228 // then we can always do this with an m1 vslidedown.vx. Restricting the
2229 // Index ensures we can use a vslidedown.vi.
2230 // TODO: We can generalize this when the exact VLEN is known.
2231 if (Index + ResElts <= MinVLMAX && Index < 31)
2232 return true;
2233
2234 // Convervatively only handle extracting half of a vector.
2235 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2236 // a cheap extract. However, this case is important in practice for
2237 // shuffled extracts of longer vectors. How resolve?
2238 if ((ResElts * 2) != SrcElts)
2239 return false;
2240
2241 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2242 // cheap.
2243 if (Index >= 32)
2244 return false;
2245
2246 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2247 // the upper half of a vector until we have more test coverage.
2248 return Index == 0 || Index == ResElts;
2249}
2250
2251MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
2252 CallingConv::ID CC,
2253 EVT VT) const {
2254 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2255 // We might still end up using a GPR but that will be decided based on ABI.
2256 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2257 !Subtarget.hasStdExtZfhminOrZhinxmin())
2258 return MVT::f32;
2259
2260 MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2261
2262 if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)
2263 return MVT::i64;
2264
2265 return PartVT;
2266}
2267
2268unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
2269 CallingConv::ID CC,
2270 EVT VT) const {
2271 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2272 // We might still end up using a GPR but that will be decided based on ABI.
2273 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2274 !Subtarget.hasStdExtZfhminOrZhinxmin())
2275 return 1;
2276
2277 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2278}
2279
2280unsigned RISCVTargetLowering::getVectorTypeBreakdownForCallingConv(
2281 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2282 unsigned &NumIntermediates, MVT &RegisterVT) const {
2283 unsigned NumRegs = TargetLowering::getVectorTypeBreakdownForCallingConv(
2284 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2285
2286 if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)
2287 IntermediateVT = MVT::i64;
2288
2289 if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)
2290 RegisterVT = MVT::i64;
2291
2292 return NumRegs;
2293}
2294
2295// Changes the condition code and swaps operands if necessary, so the SetCC
2296// operation matches one of the comparisons supported directly by branches
2297// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2298// with 1/-1.
2299static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2300 ISD::CondCode &CC, SelectionDAG &DAG) {
2301 // If this is a single bit test that can't be handled by ANDI, shift the
2302 // bit to be tested to the MSB and perform a signed compare with 0.
2303 if (isIntEqualitySetCC(Code: CC) && isNullConstant(V: RHS) &&
2304 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2305 isa<ConstantSDNode>(Val: LHS.getOperand(i: 1))) {
2306 uint64_t Mask = LHS.getConstantOperandVal(i: 1);
2307 if ((isPowerOf2_64(Value: Mask) || isMask_64(Value: Mask)) && !isInt<12>(x: Mask)) {
2308 unsigned ShAmt = 0;
2309 if (isPowerOf2_64(Value: Mask)) {
2310 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
2311 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Value: Mask);
2312 } else {
2313 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Value: Mask);
2314 }
2315
2316 LHS = LHS.getOperand(i: 0);
2317 if (ShAmt != 0)
2318 LHS = DAG.getNode(Opcode: ISD::SHL, DL, VT: LHS.getValueType(), N1: LHS,
2319 N2: DAG.getConstant(Val: ShAmt, DL, VT: LHS.getValueType()));
2320 return;
2321 }
2322 }
2323
2324 if (auto *RHSC = dyn_cast<ConstantSDNode>(Val&: RHS)) {
2325 int64_t C = RHSC->getSExtValue();
2326 switch (CC) {
2327 default: break;
2328 case ISD::SETGT:
2329 // Convert X > -1 to X >= 0.
2330 if (C == -1) {
2331 RHS = DAG.getConstant(Val: 0, DL, VT: RHS.getValueType());
2332 CC = ISD::SETGE;
2333 return;
2334 }
2335 break;
2336 case ISD::SETLT:
2337 // Convert X < 1 to 0 >= X.
2338 if (C == 1) {
2339 RHS = LHS;
2340 LHS = DAG.getConstant(Val: 0, DL, VT: RHS.getValueType());
2341 CC = ISD::SETGE;
2342 return;
2343 }
2344 break;
2345 }
2346 }
2347
2348 switch (CC) {
2349 default:
2350 break;
2351 case ISD::SETGT:
2352 case ISD::SETLE:
2353 case ISD::SETUGT:
2354 case ISD::SETULE:
2355 CC = ISD::getSetCCSwappedOperands(Operation: CC);
2356 std::swap(a&: LHS, b&: RHS);
2357 break;
2358 }
2359}
2360
2361RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {
2362 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2363 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2364 if (VT.getVectorElementType() == MVT::i1)
2365 KnownSize *= 8;
2366
2367 switch (KnownSize) {
2368 default:
2369 llvm_unreachable("Invalid LMUL.");
2370 case 8:
2371 return RISCVII::VLMUL::LMUL_F8;
2372 case 16:
2373 return RISCVII::VLMUL::LMUL_F4;
2374 case 32:
2375 return RISCVII::VLMUL::LMUL_F2;
2376 case 64:
2377 return RISCVII::VLMUL::LMUL_1;
2378 case 128:
2379 return RISCVII::VLMUL::LMUL_2;
2380 case 256:
2381 return RISCVII::VLMUL::LMUL_4;
2382 case 512:
2383 return RISCVII::VLMUL::LMUL_8;
2384 }
2385}
2386
2387unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) {
2388 switch (LMul) {
2389 default:
2390 llvm_unreachable("Invalid LMUL.");
2391 case RISCVII::VLMUL::LMUL_F8:
2392 case RISCVII::VLMUL::LMUL_F4:
2393 case RISCVII::VLMUL::LMUL_F2:
2394 case RISCVII::VLMUL::LMUL_1:
2395 return RISCV::VRRegClassID;
2396 case RISCVII::VLMUL::LMUL_2:
2397 return RISCV::VRM2RegClassID;
2398 case RISCVII::VLMUL::LMUL_4:
2399 return RISCV::VRM4RegClassID;
2400 case RISCVII::VLMUL::LMUL_8:
2401 return RISCV::VRM8RegClassID;
2402 }
2403}
2404
2405unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2406 RISCVII::VLMUL LMUL = getLMUL(VT);
2407 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2408 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2409 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2410 LMUL == RISCVII::VLMUL::LMUL_1) {
2411 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2412 "Unexpected subreg numbering");
2413 return RISCV::sub_vrm1_0 + Index;
2414 }
2415 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2416 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2417 "Unexpected subreg numbering");
2418 return RISCV::sub_vrm2_0 + Index;
2419 }
2420 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2421 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2422 "Unexpected subreg numbering");
2423 return RISCV::sub_vrm4_0 + Index;
2424 }
2425 llvm_unreachable("Invalid vector type.");
2426}
2427
2428unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
2429 if (VT.getVectorElementType() == MVT::i1)
2430 return RISCV::VRRegClassID;
2431 return getRegClassIDForLMUL(LMul: getLMUL(VT));
2432}
2433
2434// Attempt to decompose a subvector insert/extract between VecVT and
2435// SubVecVT via subregister indices. Returns the subregister index that
2436// can perform the subvector insert/extract with the given element index, as
2437// well as the index corresponding to any leftover subvectors that must be
2438// further inserted/extracted within the register class for SubVecVT.
2439std::pair<unsigned, unsigned>
2440RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2441 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2442 const RISCVRegisterInfo *TRI) {
2443 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2444 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2445 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2446 "Register classes not ordered");
2447 unsigned VecRegClassID = getRegClassIDForVecVT(VT: VecVT);
2448 unsigned SubRegClassID = getRegClassIDForVecVT(VT: SubVecVT);
2449 // Try to compose a subregister index that takes us from the incoming
2450 // LMUL>1 register class down to the outgoing one. At each step we half
2451 // the LMUL:
2452 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2453 // Note that this is not guaranteed to find a subregister index, such as
2454 // when we are extracting from one VR type to another.
2455 unsigned SubRegIdx = RISCV::NoSubRegister;
2456 for (const unsigned RCID :
2457 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2458 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2459 VecVT = VecVT.getHalfNumVectorElementsVT();
2460 bool IsHi =
2461 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2462 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2463 getSubregIndexByMVT(VecVT, IsHi));
2464 if (IsHi)
2465 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2466 }
2467 return {SubRegIdx, InsertExtractIdx};
2468}
2469
2470// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2471// stores for those types.
2472bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2473 return !Subtarget.useRVVForFixedLengthVectors() ||
2474 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2475}
2476
2477bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const {
2478 if (!ScalarTy.isSimple())
2479 return false;
2480 switch (ScalarTy.getSimpleVT().SimpleTy) {
2481 case MVT::iPTR:
2482 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2483 case MVT::i8:
2484 case MVT::i16:
2485 case MVT::i32:
2486 return true;
2487 case MVT::i64:
2488 return Subtarget.hasVInstructionsI64();
2489 case MVT::f16:
2490 return Subtarget.hasVInstructionsF16();
2491 case MVT::f32:
2492 return Subtarget.hasVInstructionsF32();
2493 case MVT::f64:
2494 return Subtarget.hasVInstructionsF64();
2495 default:
2496 return false;
2497 }
2498}
2499
2500
2501unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2502 return NumRepeatedDivisors;
2503}
2504
2505static SDValue getVLOperand(SDValue Op) {
2506 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2507 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2508 "Unexpected opcode");
2509 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2510 unsigned IntNo = Op.getConstantOperandVal(i: HasChain ? 1 : 0);
2511 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
2512 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2513 if (!II)
2514 return SDValue();
2515 return Op.getOperand(i: II->VLOperand + 1 + HasChain);
2516}
2517
2518static bool useRVVForFixedLengthVectorVT(MVT VT,
2519 const RISCVSubtarget &Subtarget) {
2520 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2521 if (!Subtarget.useRVVForFixedLengthVectors())
2522 return false;
2523
2524 // We only support a set of vector types with a consistent maximum fixed size
2525 // across all supported vector element types to avoid legalization issues.
2526 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2527 // fixed-length vector type we support is 1024 bytes.
2528 if (VT.getFixedSizeInBits() > 1024 * 8)
2529 return false;
2530
2531 unsigned MinVLen = Subtarget.getRealMinVLen();
2532
2533 MVT EltVT = VT.getVectorElementType();
2534
2535 // Don't use RVV for vectors we cannot scalarize if required.
2536 switch (EltVT.SimpleTy) {
2537 // i1 is supported but has different rules.
2538 default:
2539 return false;
2540 case MVT::i1:
2541 // Masks can only use a single register.
2542 if (VT.getVectorNumElements() > MinVLen)
2543 return false;
2544 MinVLen /= 8;
2545 break;
2546 case MVT::i8:
2547 case MVT::i16:
2548 case MVT::i32:
2549 break;
2550 case MVT::i64:
2551 if (!Subtarget.hasVInstructionsI64())
2552 return false;
2553 break;
2554 case MVT::f16:
2555 if (!Subtarget.hasVInstructionsF16Minimal())
2556 return false;
2557 break;
2558 case MVT::f32:
2559 if (!Subtarget.hasVInstructionsF32())
2560 return false;
2561 break;
2562 case MVT::f64:
2563 if (!Subtarget.hasVInstructionsF64())
2564 return false;
2565 break;
2566 }
2567
2568 // Reject elements larger than ELEN.
2569 if (EltVT.getSizeInBits() > Subtarget.getELen())
2570 return false;
2571
2572 unsigned LMul = divideCeil(Numerator: VT.getSizeInBits(), Denominator: MinVLen);
2573 // Don't use RVV for types that don't fit.
2574 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2575 return false;
2576
2577 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2578 // the base fixed length RVV support in place.
2579 if (!VT.isPow2VectorType())
2580 return false;
2581
2582 return true;
2583}
2584
2585bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2586 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2587}
2588
2589// Return the largest legal scalable vector type that matches VT's element type.
2590static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
2591 const RISCVSubtarget &Subtarget) {
2592 // This may be called before legal types are setup.
2593 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2594 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2595 "Expected legal fixed length vector!");
2596
2597 unsigned MinVLen = Subtarget.getRealMinVLen();
2598 unsigned MaxELen = Subtarget.getELen();
2599
2600 MVT EltVT = VT.getVectorElementType();
2601 switch (EltVT.SimpleTy) {
2602 default:
2603 llvm_unreachable("unexpected element type for RVV container");
2604 case MVT::i1:
2605 case MVT::i8:
2606 case MVT::i16:
2607 case MVT::i32:
2608 case MVT::i64:
2609 case MVT::f16:
2610 case MVT::f32:
2611 case MVT::f64: {
2612 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2613 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2614 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2615 unsigned NumElts =
2616 (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
2617 NumElts = std::max(a: NumElts, b: RISCV::RVVBitsPerBlock / MaxELen);
2618 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2619 return MVT::getScalableVectorVT(VT: EltVT, NumElements: NumElts);
2620 }
2621 }
2622}
2623
2624static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
2625 const RISCVSubtarget &Subtarget) {
2626 return getContainerForFixedLengthVector(TLI: DAG.getTargetLoweringInfo(), VT,
2627 Subtarget);
2628}
2629
2630MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
2631 return ::getContainerForFixedLengthVector(TLI: *this, VT, Subtarget: getSubtarget());
2632}
2633
2634// Grow V to consume an entire RVV register.
2635static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
2636 const RISCVSubtarget &Subtarget) {
2637 assert(VT.isScalableVector() &&
2638 "Expected to convert into a scalable vector!");
2639 assert(V.getValueType().isFixedLengthVector() &&
2640 "Expected a fixed length vector operand!");
2641 SDLoc DL(V);
2642 SDValue Zero = DAG.getVectorIdxConstant(Val: 0, DL);
2643 return DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: DAG.getUNDEF(VT), N2: V, N3: Zero);
2644}
2645
2646// Shrink V so it's just big enough to maintain a VT's worth of data.
2647static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
2648 const RISCVSubtarget &Subtarget) {
2649 assert(VT.isFixedLengthVector() &&
2650 "Expected to convert into a fixed length vector!");
2651 assert(V.getValueType().isScalableVector() &&
2652 "Expected a scalable vector operand!");
2653 SDLoc DL(V);
2654 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: Subtarget.getXLenVT());
2655 return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: V, N2: Zero);
2656}
2657
2658/// Return the type of the mask type suitable for masking the provided
2659/// vector type. This is simply an i1 element type vector of the same
2660/// (possibly scalable) length.
2661static MVT getMaskTypeFor(MVT VecVT) {
2662 assert(VecVT.isVector());
2663 ElementCount EC = VecVT.getVectorElementCount();
2664 return MVT::getVectorVT(MVT::i1, EC);
2665}
2666
2667/// Creates an all ones mask suitable for masking a vector of type VecTy with
2668/// vector length VL. .
2669static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2670 SelectionDAG &DAG) {
2671 MVT MaskVT = getMaskTypeFor(VecVT);
2672 return DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT: MaskVT, Operand: VL);
2673}
2674
2675static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2676 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2677 // If we know the exact VLEN, and our VL is exactly equal to VLMAX,
2678 // canonicalize the representation. InsertVSETVLI will pick the immediate
2679 // encoding later if profitable.
2680 const auto [MinVLMAX, MaxVLMAX] =
2681 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
2682 if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX)
2683 return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2684
2685 return DAG.getConstant(Val: NumElts, DL, VT: Subtarget.getXLenVT());
2686}
2687
2688static std::pair<SDValue, SDValue>
2689getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG,
2690 const RISCVSubtarget &Subtarget) {
2691 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2692 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2693 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2694 return {Mask, VL};
2695}
2696
2697static std::pair<SDValue, SDValue>
2698getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2699 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2700 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2701 SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget);
2702 SDValue Mask = getAllOnesMask(VecVT: ContainerVT, VL, DL, DAG);
2703 return {Mask, VL};
2704}
2705
2706// Gets the two common "VL" operands: an all-ones mask and the vector length.
2707// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2708// the vector type that the fixed-length vector is contained in. Otherwise if
2709// VecVT is scalable, then ContainerVT should be the same as VecVT.
2710static std::pair<SDValue, SDValue>
2711getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2712 const RISCVSubtarget &Subtarget) {
2713 if (VecVT.isFixedLengthVector())
2714 return getDefaultVLOps(NumElts: VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2715 Subtarget);
2716 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2717 return getDefaultScalableVLOps(VecVT: ContainerVT, DL, DAG, Subtarget);
2718}
2719
2720SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL,
2721 SelectionDAG &DAG) const {
2722 assert(VecVT.isScalableVector() && "Expected scalable vector");
2723 return DAG.getElementCount(DL, VT: Subtarget.getXLenVT(),
2724 EC: VecVT.getVectorElementCount());
2725}
2726
2727std::pair<unsigned, unsigned>
2728RISCVTargetLowering::computeVLMAXBounds(MVT VecVT,
2729 const RISCVSubtarget &Subtarget) {
2730 assert(VecVT.isScalableVector() && "Expected scalable vector");
2731
2732 unsigned EltSize = VecVT.getScalarSizeInBits();
2733 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2734
2735 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2736 unsigned MaxVLMAX =
2737 RISCVTargetLowering::computeVLMAX(VectorBits: VectorBitsMax, EltSize, MinSize);
2738
2739 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2740 unsigned MinVLMAX =
2741 RISCVTargetLowering::computeVLMAX(VectorBits: VectorBitsMin, EltSize, MinSize);
2742
2743 return std::make_pair(x&: MinVLMAX, y&: MaxVLMAX);
2744}
2745
2746// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2747// of either is (currently) supported. This can get us into an infinite loop
2748// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2749// as a ..., etc.
2750// Until either (or both) of these can reliably lower any node, reporting that
2751// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2752// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2753// which is not desirable.
2754bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
2755 EVT VT, unsigned DefinedValues) const {
2756 return false;
2757}
2758
2759InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const {
2760 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2761 // implementation-defined.
2762 if (!VT.isVector())
2763 return InstructionCost::getInvalid();
2764 unsigned DLenFactor = Subtarget.getDLenFactor();
2765 unsigned Cost;
2766 if (VT.isScalableVector()) {
2767 unsigned LMul;
2768 bool Fractional;
2769 std::tie(args&: LMul, args&: Fractional) =
2770 RISCVVType::decodeVLMUL(VLMUL: RISCVTargetLowering::getLMUL(VT));
2771 if (Fractional)
2772 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2773 else
2774 Cost = (LMul * DLenFactor);
2775 } else {
2776 Cost = divideCeil(Numerator: VT.getSizeInBits(), Denominator: Subtarget.getRealMinVLen() / DLenFactor);
2777 }
2778 return Cost;
2779}
2780
2781
2782/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2783/// is generally quadratic in the number of vreg implied by LMUL. Note that
2784/// operand (index and possibly mask) are handled separately.
2785InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const {
2786 return getLMULCost(VT) * getLMULCost(VT);
2787}
2788
2789/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2790/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2791/// or may track the vrgather.vv cost. It is implementation-dependent.
2792InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const {
2793 return getLMULCost(VT);
2794}
2795
2796/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2797/// for the type VT. (This does not cover the vslide1up or vslide1down
2798/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2799/// or may track the vrgather.vv cost. It is implementation-dependent.
2800InstructionCost RISCVTargetLowering::getVSlideVXCost(MVT VT) const {
2801 return getLMULCost(VT);
2802}
2803
2804/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2805/// for the type VT. (This does not cover the vslide1up or vslide1down
2806/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2807/// or may track the vrgather.vv cost. It is implementation-dependent.
2808InstructionCost RISCVTargetLowering::getVSlideVICost(MVT VT) const {
2809 return getLMULCost(VT);
2810}
2811
2812static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
2813 const RISCVSubtarget &Subtarget) {
2814 // RISC-V FP-to-int conversions saturate to the destination register size, but
2815 // don't produce 0 for nan. We can use a conversion instruction and fix the
2816 // nan case with a compare and a select.
2817 SDValue Src = Op.getOperand(i: 0);
2818
2819 MVT DstVT = Op.getSimpleValueType();
2820 EVT SatVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT();
2821
2822 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2823
2824 if (!DstVT.isVector()) {
2825 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2826 // the result.
2827 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2828 Src.getValueType() == MVT::bf16) {
2829 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2830 }
2831
2832 unsigned Opc;
2833 if (SatVT == DstVT)
2834 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2835 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2836 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
2837 else
2838 return SDValue();
2839 // FIXME: Support other SatVTs by clamping before or after the conversion.
2840
2841 SDLoc DL(Op);
2842 SDValue FpToInt = DAG.getNode(
2843 Opcode: Opc, DL, VT: DstVT, N1: Src,
2844 N2: DAG.getTargetConstant(Val: RISCVFPRndMode::RTZ, DL, VT: Subtarget.getXLenVT()));
2845
2846 if (Opc == RISCVISD::FCVT_WU_RV64)
2847 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2848
2849 SDValue ZeroInt = DAG.getConstant(Val: 0, DL, VT: DstVT);
2850 return DAG.getSelectCC(DL, LHS: Src, RHS: Src, True: ZeroInt, False: FpToInt,
2851 Cond: ISD::CondCode::SETUO);
2852 }
2853
2854 // Vectors.
2855
2856 MVT DstEltVT = DstVT.getVectorElementType();
2857 MVT SrcVT = Src.getSimpleValueType();
2858 MVT SrcEltVT = SrcVT.getVectorElementType();
2859 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2860 unsigned DstEltSize = DstEltVT.getSizeInBits();
2861
2862 // Only handle saturating to the destination type.
2863 if (SatVT != DstEltVT)
2864 return SDValue();
2865
2866 // FIXME: Don't support narrowing by more than 1 steps for now.
2867 if (SrcEltSize > (2 * DstEltSize))
2868 return SDValue();
2869
2870 MVT DstContainerVT = DstVT;
2871 MVT SrcContainerVT = SrcVT;
2872 if (DstVT.isFixedLengthVector()) {
2873 DstContainerVT = getContainerForFixedLengthVector(DAG, VT: DstVT, Subtarget);
2874 SrcContainerVT = getContainerForFixedLengthVector(DAG, VT: SrcVT, Subtarget);
2875 assert(DstContainerVT.getVectorElementCount() ==
2876 SrcContainerVT.getVectorElementCount() &&
2877 "Expected same element count");
2878 Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget);
2879 }
2880
2881 SDLoc DL(Op);
2882
2883 auto [Mask, VL] = getDefaultVLOps(VecVT: DstVT, ContainerVT: DstContainerVT, DL, DAG, Subtarget);
2884
2885 SDValue IsNan = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: Mask.getValueType(),
2886 Ops: {Src, Src, DAG.getCondCode(Cond: ISD::SETNE),
2887 DAG.getUNDEF(VT: Mask.getValueType()), Mask, VL});
2888
2889 // Need to widen by more than 1 step, promote the FP type, then do a widening
2890 // convert.
2891 if (DstEltSize > (2 * SrcEltSize)) {
2892 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2893 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2894 Src = DAG.getNode(Opcode: RISCVISD::FP_EXTEND_VL, DL, VT: InterVT, N1: Src, N2: Mask, N3: VL);
2895 }
2896
2897 unsigned RVVOpc =
2898 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
2899 SDValue Res = DAG.getNode(Opcode: RVVOpc, DL, VT: DstContainerVT, N1: Src, N2: Mask, N3: VL);
2900
2901 SDValue SplatZero = DAG.getNode(
2902 Opcode: RISCVISD::VMV_V_X_VL, DL, VT: DstContainerVT, N1: DAG.getUNDEF(VT: DstContainerVT),
2903 N2: DAG.getConstant(Val: 0, DL, VT: Subtarget.getXLenVT()), N3: VL);
2904 Res = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: DstContainerVT, N1: IsNan, N2: SplatZero,
2905 N3: Res, N4: DAG.getUNDEF(VT: DstContainerVT), N5: VL);
2906
2907 if (DstVT.isFixedLengthVector())
2908 Res = convertFromScalableVector(VT: DstVT, V: Res, DAG, Subtarget);
2909
2910 return Res;
2911}
2912
2913static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) {
2914 switch (Opc) {
2915 case ISD::FROUNDEVEN:
2916 case ISD::STRICT_FROUNDEVEN:
2917 case ISD::VP_FROUNDEVEN:
2918 return RISCVFPRndMode::RNE;
2919 case ISD::FTRUNC:
2920 case ISD::STRICT_FTRUNC:
2921 case ISD::VP_FROUNDTOZERO:
2922 return RISCVFPRndMode::RTZ;
2923 case ISD::FFLOOR:
2924 case ISD::STRICT_FFLOOR:
2925 case ISD::VP_FFLOOR:
2926 return RISCVFPRndMode::RDN;
2927 case ISD::FCEIL:
2928 case ISD::STRICT_FCEIL:
2929 case ISD::VP_FCEIL:
2930 return RISCVFPRndMode::RUP;
2931 case ISD::FROUND:
2932 case ISD::STRICT_FROUND:
2933 case ISD::VP_FROUND:
2934 return RISCVFPRndMode::RMM;
2935 case ISD::FRINT:
2936 return RISCVFPRndMode::DYN;
2937 }
2938
2939 return RISCVFPRndMode::Invalid;
2940}
2941
2942// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2943// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2944// the integer domain and back. Taking care to avoid converting values that are
2945// nan or already correct.
2946static SDValue
2947lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
2948 const RISCVSubtarget &Subtarget) {
2949 MVT VT = Op.getSimpleValueType();
2950 assert(VT.isVector() && "Unexpected type");
2951
2952 SDLoc DL(Op);
2953
2954 SDValue Src = Op.getOperand(i: 0);
2955
2956 MVT ContainerVT = VT;
2957 if (VT.isFixedLengthVector()) {
2958 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2959 Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget);
2960 }
2961
2962 SDValue Mask, VL;
2963 if (Op->isVPOpcode()) {
2964 Mask = Op.getOperand(i: 1);
2965 if (VT.isFixedLengthVector())
2966 Mask = convertToScalableVector(VT: getMaskTypeFor(VecVT: ContainerVT), V: Mask, DAG,
2967 Subtarget);
2968 VL = Op.getOperand(i: 2);
2969 } else {
2970 std::tie(args&: Mask, args&: VL) = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
2971 }
2972
2973 // Freeze the source since we are increasing the number of uses.
2974 Src = DAG.getFreeze(V: Src);
2975
2976 // We do the conversion on the absolute value and fix the sign at the end.
2977 SDValue Abs = DAG.getNode(Opcode: RISCVISD::FABS_VL, DL, VT: ContainerVT, N1: Src, N2: Mask, N3: VL);
2978
2979 // Determine the largest integer that can be represented exactly. This and
2980 // values larger than it don't have any fractional bits so don't need to
2981 // be converted.
2982 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT: ContainerVT);
2983 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2984 APFloat MaxVal = APFloat(FltSem);
2985 MaxVal.convertFromAPInt(Input: APInt::getOneBitSet(numBits: Precision, BitNo: Precision - 1),
2986 /*IsSigned*/ false, RM: APFloat::rmNearestTiesToEven);
2987 SDValue MaxValNode =
2988 DAG.getConstantFP(Val: MaxVal, DL, VT: ContainerVT.getVectorElementType());
2989 SDValue MaxValSplat = DAG.getNode(Opcode: RISCVISD::VFMV_V_F_VL, DL, VT: ContainerVT,
2990 N1: DAG.getUNDEF(VT: ContainerVT), N2: MaxValNode, N3: VL);
2991
2992 // If abs(Src) was larger than MaxVal or nan, keep it.
2993 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2994 Mask =
2995 DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: SetccVT,
2996 Ops: {Abs, MaxValSplat, DAG.getCondCode(Cond: ISD::SETOLT),
2997 Mask, Mask, VL});
2998
2999 // Truncate to integer and convert back to FP.
3000 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3001 MVT XLenVT = Subtarget.getXLenVT();
3002 SDValue Truncated;
3003
3004 switch (Op.getOpcode()) {
3005 default:
3006 llvm_unreachable("Unexpected opcode");
3007 case ISD::FCEIL:
3008 case ISD::VP_FCEIL:
3009 case ISD::FFLOOR:
3010 case ISD::VP_FFLOOR:
3011 case ISD::FROUND:
3012 case ISD::FROUNDEVEN:
3013 case ISD::VP_FROUND:
3014 case ISD::VP_FROUNDEVEN:
3015 case ISD::VP_FROUNDTOZERO: {
3016 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Opc: Op.getOpcode());
3017 assert(FRM != RISCVFPRndMode::Invalid);
3018 Truncated = DAG.getNode(Opcode: RISCVISD::VFCVT_RM_X_F_VL, DL, VT: IntVT, N1: Src, N2: Mask,
3019 N3: DAG.getTargetConstant(Val: FRM, DL, VT: XLenVT), N4: VL);
3020 break;
3021 }
3022 case ISD::FTRUNC:
3023 Truncated = DAG.getNode(Opcode: RISCVISD::VFCVT_RTZ_X_F_VL, DL, VT: IntVT, N1: Src,
3024 N2: Mask, N3: VL);
3025 break;
3026 case ISD::FRINT:
3027 case ISD::VP_FRINT:
3028 Truncated = DAG.getNode(Opcode: RISCVISD::VFCVT_X_F_VL, DL, VT: IntVT, N1: Src, N2: Mask, N3: VL);
3029 break;
3030 case ISD::FNEARBYINT:
3031 case ISD::VP_FNEARBYINT:
3032 Truncated = DAG.getNode(Opcode: RISCVISD::VFROUND_NOEXCEPT_VL, DL, VT: ContainerVT, N1: Src,
3033 N2: Mask, N3: VL);
3034 break;
3035 }
3036
3037 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3038 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3039 Truncated = DAG.getNode(Opcode: RISCVISD::SINT_TO_FP_VL, DL, VT: ContainerVT, N1: Truncated,
3040 N2: Mask, N3: VL);
3041
3042 // Restore the original sign so that -0.0 is preserved.
3043 Truncated = DAG.getNode(Opcode: RISCVISD::FCOPYSIGN_VL, DL, VT: ContainerVT, N1: Truncated,
3044 N2: Src, N3: Src, N4: Mask, N5: VL);
3045
3046 if (!VT.isFixedLengthVector())
3047 return Truncated;
3048
3049 return convertFromScalableVector(VT, V: Truncated, DAG, Subtarget);
3050}
3051
3052// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3053// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3054// qNan and coverting the new source to integer and back to FP.
3055static SDValue
3056lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
3057 const RISCVSubtarget &Subtarget) {
3058 SDLoc DL(Op);
3059 MVT VT = Op.getSimpleValueType();
3060 SDValue Chain = Op.getOperand(i: 0);
3061 SDValue Src = Op.getOperand(i: 1);
3062
3063 MVT ContainerVT = VT;
3064 if (VT.isFixedLengthVector()) {
3065 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3066 Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget);
3067 }
3068
3069 auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
3070
3071 // Freeze the source since we are increasing the number of uses.
3072 Src = DAG.getFreeze(V: Src);
3073
3074 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3075 MVT MaskVT = Mask.getSimpleValueType();
3076 SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL,
3077 DAG.getVTList(MaskVT, MVT::Other),
3078 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3079 DAG.getUNDEF(MaskVT), Mask, VL});
3080 Chain = Unorder.getValue(R: 1);
3081 Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL,
3082 DAG.getVTList(ContainerVT, MVT::Other),
3083 {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL});
3084 Chain = Src.getValue(R: 1);
3085
3086 // We do the conversion on the absolute value and fix the sign at the end.
3087 SDValue Abs = DAG.getNode(Opcode: RISCVISD::FABS_VL, DL, VT: ContainerVT, N1: Src, N2: Mask, N3: VL);
3088
3089 // Determine the largest integer that can be represented exactly. This and
3090 // values larger than it don't have any fractional bits so don't need to
3091 // be converted.
3092 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT: ContainerVT);
3093 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3094 APFloat MaxVal = APFloat(FltSem);
3095 MaxVal.convertFromAPInt(Input: APInt::getOneBitSet(numBits: Precision, BitNo: Precision - 1),
3096 /*IsSigned*/ false, RM: APFloat::rmNearestTiesToEven);
3097 SDValue MaxValNode =
3098 DAG.getConstantFP(Val: MaxVal, DL, VT: ContainerVT.getVectorElementType());
3099 SDValue MaxValSplat = DAG.getNode(Opcode: RISCVISD::VFMV_V_F_VL, DL, VT: ContainerVT,
3100 N1: DAG.getUNDEF(VT: ContainerVT), N2: MaxValNode, N3: VL);
3101
3102 // If abs(Src) was larger than MaxVal or nan, keep it.
3103 Mask = DAG.getNode(
3104 Opcode: RISCVISD::SETCC_VL, DL, VT: MaskVT,
3105 Ops: {Abs, MaxValSplat, DAG.getCondCode(Cond: ISD::SETOLT), Mask, Mask, VL});
3106
3107 // Truncate to integer and convert back to FP.
3108 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3109 MVT XLenVT = Subtarget.getXLenVT();
3110 SDValue Truncated;
3111
3112 switch (Op.getOpcode()) {
3113 default:
3114 llvm_unreachable("Unexpected opcode");
3115 case ISD::STRICT_FCEIL:
3116 case ISD::STRICT_FFLOOR:
3117 case ISD::STRICT_FROUND:
3118 case ISD::STRICT_FROUNDEVEN: {
3119 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Opc: Op.getOpcode());
3120 assert(FRM != RISCVFPRndMode::Invalid);
3121 Truncated = DAG.getNode(
3122 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3123 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3124 break;
3125 }
3126 case ISD::STRICT_FTRUNC:
3127 Truncated =
3128 DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,
3129 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3130 break;
3131 case ISD::STRICT_FNEARBYINT:
3132 Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,
3133 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3134 Mask, VL);
3135 break;
3136 }
3137 Chain = Truncated.getValue(R: 1);
3138
3139 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3140 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3141 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3142 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3143 Truncated, Mask, VL);
3144 Chain = Truncated.getValue(R: 1);
3145 }
3146
3147 // Restore the original sign so that -0.0 is preserved.
3148 Truncated = DAG.getNode(Opcode: RISCVISD::FCOPYSIGN_VL, DL, VT: ContainerVT, N1: Truncated,
3149 N2: Src, N3: Src, N4: Mask, N5: VL);
3150
3151 if (VT.isFixedLengthVector())
3152 Truncated = convertFromScalableVector(VT, V: Truncated, DAG, Subtarget);
3153 return DAG.getMergeValues(Ops: {Truncated, Chain}, dl: DL);
3154}
3155
3156static SDValue
3157lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
3158 const RISCVSubtarget &Subtarget) {
3159 MVT VT = Op.getSimpleValueType();
3160 if (VT.isVector())
3161 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3162
3163 if (DAG.shouldOptForSize())
3164 return SDValue();
3165
3166 SDLoc DL(Op);
3167 SDValue Src = Op.getOperand(i: 0);
3168
3169 // Create an integer the size of the mantissa with the MSB set. This and all
3170 // values larger than it don't have any fractional bits so don't need to be
3171 // converted.
3172 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
3173 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3174 APFloat MaxVal = APFloat(FltSem);
3175 MaxVal.convertFromAPInt(Input: APInt::getOneBitSet(numBits: Precision, BitNo: Precision - 1),
3176 /*IsSigned*/ false, RM: APFloat::rmNearestTiesToEven);
3177 SDValue MaxValNode = DAG.getConstantFP(Val: MaxVal, DL, VT);
3178
3179 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Opc: Op.getOpcode());
3180 return DAG.getNode(Opcode: RISCVISD::FROUND, DL, VT, N1: Src, N2: MaxValNode,
3181 N3: DAG.getTargetConstant(Val: FRM, DL, VT: Subtarget.getXLenVT()));
3182}
3183
3184// Expand vector LRINT and LLRINT by converting to the integer domain.
3185static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG,
3186 const RISCVSubtarget &Subtarget) {
3187 MVT VT = Op.getSimpleValueType();
3188 assert(VT.isVector() && "Unexpected type");
3189
3190 SDLoc DL(Op);
3191 SDValue Src = Op.getOperand(i: 0);
3192 MVT ContainerVT = VT;
3193
3194 if (VT.isFixedLengthVector()) {
3195 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3196 Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget);
3197 }
3198
3199 auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
3200 SDValue Truncated =
3201 DAG.getNode(Opcode: RISCVISD::VFCVT_X_F_VL, DL, VT: ContainerVT, N1: Src, N2: Mask, N3: VL);
3202
3203 if (!VT.isFixedLengthVector())
3204 return Truncated;
3205
3206 return convertFromScalableVector(VT, V: Truncated, DAG, Subtarget);
3207}
3208
3209static SDValue
3210getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget,
3211 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
3212 SDValue Offset, SDValue Mask, SDValue VL,
3213 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
3214 if (Merge.isUndef())
3215 Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
3216 SDValue PolicyOp = DAG.getTargetConstant(Val: Policy, DL, VT: Subtarget.getXLenVT());
3217 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3218 return DAG.getNode(Opcode: RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3219}
3220
3221static SDValue
3222getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3223 EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask,
3224 SDValue VL,
3225 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
3226 if (Merge.isUndef())
3227 Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
3228 SDValue PolicyOp = DAG.getTargetConstant(Val: Policy, DL, VT: Subtarget.getXLenVT());
3229 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3230 return DAG.getNode(Opcode: RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3231}
3232
3233static MVT getLMUL1VT(MVT VT) {
3234 assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
3235 "Unexpected vector MVT");
3236 return MVT::getScalableVectorVT(
3237 VT: VT.getVectorElementType(),
3238 NumElements: RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
3239}
3240
3241struct VIDSequence {
3242 int64_t StepNumerator;
3243 unsigned StepDenominator;
3244 int64_t Addend;
3245};
3246
3247static std::optional<uint64_t> getExactInteger(const APFloat &APF,
3248 uint32_t BitWidth) {
3249 // We will use a SINT_TO_FP to materialize this constant so we should use a
3250 // signed APSInt here.
3251 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3252 // We use an arbitrary rounding mode here. If a floating-point is an exact
3253 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3254 // the rounding mode changes the output value, then it is not an exact
3255 // integer.
3256 RoundingMode ArbitraryRM = RoundingMode::TowardZero;
3257 bool IsExact;
3258 // If it is out of signed integer range, it will return an invalid operation.
3259 // If it is not an exact integer, IsExact is false.
3260 if ((APF.convertToInteger(Result&: ValInt, RM: ArbitraryRM, IsExact: &IsExact) ==
3261 APFloatBase::opInvalidOp) ||
3262 !IsExact)
3263 return std::nullopt;
3264 return ValInt.extractBitsAsZExtValue(numBits: BitWidth, bitPosition: 0);
3265}
3266
3267// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3268// to the (non-zero) step S and start value X. This can be then lowered as the
3269// RVV sequence (VID * S) + X, for example.
3270// The step S is represented as an integer numerator divided by a positive
3271// denominator. Note that the implementation currently only identifies
3272// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3273// cannot detect 2/3, for example.
3274// Note that this method will also match potentially unappealing index
3275// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3276// determine whether this is worth generating code for.
3277static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3278 unsigned EltSizeInBits) {
3279 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3280 if (!cast<BuildVectorSDNode>(Val&: Op)->isConstant())
3281 return std::nullopt;
3282 bool IsInteger = Op.getValueType().isInteger();
3283
3284 std::optional<unsigned> SeqStepDenom;
3285 std::optional<int64_t> SeqStepNum, SeqAddend;
3286 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
3287 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3288
3289 // First extract the ops into a list of constant integer values. This may not
3290 // be possible for floats if they're not all representable as integers.
3291 SmallVector<std::optional<uint64_t>> Elts(Op.getNumOperands());
3292 const unsigned OpSize = Op.getScalarValueSizeInBits();
3293 for (auto [Idx, Elt] : enumerate(First: Op->op_values())) {
3294 if (Elt.isUndef()) {
3295 Elts[Idx] = std::nullopt;
3296 continue;
3297 }
3298 if (IsInteger) {
3299 Elts[Idx] = Elt->getAsZExtVal() & maskTrailingOnes<uint64_t>(N: OpSize);
3300 } else {
3301 auto ExactInteger =
3302 getExactInteger(APF: cast<ConstantFPSDNode>(Val: Elt)->getValueAPF(), BitWidth: OpSize);
3303 if (!ExactInteger)
3304 return std::nullopt;
3305 Elts[Idx] = *ExactInteger;
3306 }
3307 }
3308
3309 for (auto [Idx, Elt] : enumerate(First&: Elts)) {
3310 // Assume undef elements match the sequence; we just have to be careful
3311 // when interpolating across them.
3312 if (!Elt)
3313 continue;
3314
3315 if (PrevElt) {
3316 // Calculate the step since the last non-undef element, and ensure
3317 // it's consistent across the entire sequence.
3318 unsigned IdxDiff = Idx - PrevElt->second;
3319 int64_t ValDiff = SignExtend64(X: *Elt - PrevElt->first, B: EltSizeInBits);
3320
3321 // A zero-value value difference means that we're somewhere in the middle
3322 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3323 // step change before evaluating the sequence.
3324 if (ValDiff == 0)
3325 continue;
3326
3327 int64_t Remainder = ValDiff % IdxDiff;
3328 // Normalize the step if it's greater than 1.
3329 if (Remainder != ValDiff) {
3330 // The difference must cleanly divide the element span.
3331 if (Remainder != 0)
3332 return std::nullopt;
3333 ValDiff /= IdxDiff;
3334 IdxDiff = 1;
3335 }
3336
3337 if (!SeqStepNum)
3338 SeqStepNum = ValDiff;
3339 else if (ValDiff != SeqStepNum)
3340 return std::nullopt;
3341
3342 if (!SeqStepDenom)
3343 SeqStepDenom = IdxDiff;
3344 else if (IdxDiff != *SeqStepDenom)
3345 return std::nullopt;
3346 }
3347
3348 // Record this non-undef element for later.
3349 if (!PrevElt || PrevElt->first != *Elt)
3350 PrevElt = std::make_pair(x&: *Elt, y&: Idx);
3351 }
3352
3353 // We need to have logged a step for this to count as a legal index sequence.
3354 if (!SeqStepNum || !SeqStepDenom)
3355 return std::nullopt;
3356
3357 // Loop back through the sequence and validate elements we might have skipped
3358 // while waiting for a valid step. While doing this, log any sequence addend.
3359 for (auto [Idx, Elt] : enumerate(First&: Elts)) {
3360 if (!Elt)
3361 continue;
3362 uint64_t ExpectedVal =
3363 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
3364 int64_t Addend = SignExtend64(X: *Elt - ExpectedVal, B: EltSizeInBits);
3365 if (!SeqAddend)
3366 SeqAddend = Addend;
3367 else if (Addend != SeqAddend)
3368 return std::nullopt;
3369 }
3370
3371 assert(SeqAddend && "Must have an addend if we have a step");
3372
3373 return VIDSequence{.StepNumerator: *SeqStepNum, .StepDenominator: *SeqStepDenom, .Addend: *SeqAddend};
3374}
3375
3376// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3377// and lower it as a VRGATHER_VX_VL from the source vector.
3378static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3379 SelectionDAG &DAG,
3380 const RISCVSubtarget &Subtarget) {
3381 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3382 return SDValue();
3383 SDValue Vec = SplatVal.getOperand(i: 0);
3384 // Only perform this optimization on vectors of the same size for simplicity.
3385 // Don't perform this optimization for i1 vectors.
3386 // FIXME: Support i1 vectors, maybe by promoting to i8?
3387 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
3388 return SDValue();
3389 SDValue Idx = SplatVal.getOperand(i: 1);
3390 // The index must be a legal type.
3391 if (Idx.getValueType() != Subtarget.getXLenVT())
3392 return SDValue();
3393
3394 MVT ContainerVT = VT;
3395 if (VT.isFixedLengthVector()) {
3396 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3397 Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
3398 }
3399
3400 auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
3401
3402 SDValue Gather = DAG.getNode(Opcode: RISCVISD::VRGATHER_VX_VL, DL, VT: ContainerVT, N1: Vec,
3403 N2: Idx, N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL);
3404
3405 if (!VT.isFixedLengthVector())
3406 return Gather;
3407
3408 return convertFromScalableVector(VT, V: Gather, DAG, Subtarget);
3409}
3410
3411
3412/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3413/// which constitute a large proportion of the elements. In such cases we can
3414/// splat a vector with the dominant element and make up the shortfall with
3415/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3416/// Note that this includes vectors of 2 elements by association. The
3417/// upper-most element is the "dominant" one, allowing us to use a splat to
3418/// "insert" the upper element, and an insert of the lower element at position
3419/// 0, which improves codegen.
3420static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG,
3421 const RISCVSubtarget &Subtarget) {
3422 MVT VT = Op.getSimpleValueType();
3423 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3424
3425 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3426
3427 SDLoc DL(Op);
3428 auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
3429
3430 MVT XLenVT = Subtarget.getXLenVT();
3431 unsigned NumElts = Op.getNumOperands();
3432
3433 SDValue DominantValue;
3434 unsigned MostCommonCount = 0;
3435 DenseMap<SDValue, unsigned> ValueCounts;
3436 unsigned NumUndefElts =
3437 count_if(Range: Op->op_values(), P: [](const SDValue &V) { return V.isUndef(); });
3438
3439 // Track the number of scalar loads we know we'd be inserting, estimated as
3440 // any non-zero floating-point constant. Other kinds of element are either
3441 // already in registers or are materialized on demand. The threshold at which
3442 // a vector load is more desirable than several scalar materializion and
3443 // vector-insertion instructions is not known.
3444 unsigned NumScalarLoads = 0;
3445
3446 for (SDValue V : Op->op_values()) {
3447 if (V.isUndef())
3448 continue;
3449
3450 ValueCounts.insert(KV: std::make_pair(x&: V, y: 0));
3451 unsigned &Count = ValueCounts[V];
3452 if (0 == Count)
3453 if (auto *CFP = dyn_cast<ConstantFPSDNode>(Val&: V))
3454 NumScalarLoads += !CFP->isExactlyValue(V: +0.0);
3455
3456 // Is this value dominant? In case of a tie, prefer the highest element as
3457 // it's cheaper to insert near the beginning of a vector than it is at the
3458 // end.
3459 if (++Count >= MostCommonCount) {
3460 DominantValue = V;
3461 MostCommonCount = Count;
3462 }
3463 }
3464
3465 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3466 unsigned NumDefElts = NumElts - NumUndefElts;
3467 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3468
3469 // Don't perform this optimization when optimizing for size, since
3470 // materializing elements and inserting them tends to cause code bloat.
3471 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3472 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(N: Op.getNode())) &&
3473 ((MostCommonCount > DominantValueCountThreshold) ||
3474 (ValueCounts.size() <= Log2_32(Value: NumDefElts)))) {
3475 // Start by splatting the most common element.
3476 SDValue Vec = DAG.getSplatBuildVector(VT, DL, Op: DominantValue);
3477
3478 DenseSet<SDValue> Processed{DominantValue};
3479
3480 // We can handle an insert into the last element (of a splat) via
3481 // v(f)slide1down. This is slightly better than the vslideup insert
3482 // lowering as it avoids the need for a vector group temporary. It
3483 // is also better than using vmerge.vx as it avoids the need to
3484 // materialize the mask in a vector register.
3485 if (SDValue LastOp = Op->getOperand(Num: Op->getNumOperands() - 1);
3486 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3487 LastOp != DominantValue) {
3488 Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
3489 auto OpCode =
3490 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
3491 if (!VT.isFloatingPoint())
3492 LastOp = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: LastOp);
3493 Vec = DAG.getNode(Opcode: OpCode, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: Vec,
3494 N3: LastOp, N4: Mask, N5: VL);
3495 Vec = convertFromScalableVector(VT, V: Vec, DAG, Subtarget);
3496 Processed.insert(V: LastOp);
3497 }
3498
3499 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3500 for (const auto &OpIdx : enumerate(First: Op->ops())) {
3501 const SDValue &V = OpIdx.value();
3502 if (V.isUndef() || !Processed.insert(V).second)
3503 continue;
3504 if (ValueCounts[V] == 1) {
3505 Vec = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT, N1: Vec, N2: V,
3506 N3: DAG.getVectorIdxConstant(Val: OpIdx.index(), DL));
3507 } else {
3508 // Blend in all instances of this value using a VSELECT, using a
3509 // mask where each bit signals whether that element is the one
3510 // we're after.
3511 SmallVector<SDValue> Ops;
3512 transform(Range: Op->op_values(), d_first: std::back_inserter(x&: Ops), F: [&](SDValue V1) {
3513 return DAG.getConstant(Val: V == V1, DL, VT: XLenVT);
3514 });
3515 Vec = DAG.getNode(Opcode: ISD::VSELECT, DL, VT,
3516 N1: DAG.getBuildVector(VT: SelMaskTy, DL, Ops),
3517 N2: DAG.getSplatBuildVector(VT, DL, Op: V), N3: Vec);
3518 }
3519 }
3520
3521 return Vec;
3522 }
3523
3524 return SDValue();
3525}
3526
3527static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
3528 const RISCVSubtarget &Subtarget) {
3529 MVT VT = Op.getSimpleValueType();
3530 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3531
3532 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3533
3534 SDLoc DL(Op);
3535 auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
3536
3537 MVT XLenVT = Subtarget.getXLenVT();
3538 unsigned NumElts = Op.getNumOperands();
3539
3540 if (VT.getVectorElementType() == MVT::i1) {
3541 if (ISD::isBuildVectorAllZeros(N: Op.getNode())) {
3542 SDValue VMClr = DAG.getNode(Opcode: RISCVISD::VMCLR_VL, DL, VT: ContainerVT, Operand: VL);
3543 return convertFromScalableVector(VT, V: VMClr, DAG, Subtarget);
3544 }
3545
3546 if (ISD::isBuildVectorAllOnes(N: Op.getNode())) {
3547 SDValue VMSet = DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT: ContainerVT, Operand: VL);
3548 return convertFromScalableVector(VT, V: VMSet, DAG, Subtarget);
3549 }
3550
3551 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3552 // scalar integer chunks whose bit-width depends on the number of mask
3553 // bits and XLEN.
3554 // First, determine the most appropriate scalar integer type to use. This
3555 // is at most XLenVT, but may be shrunk to a smaller vector element type
3556 // according to the size of the final vector - use i8 chunks rather than
3557 // XLenVT if we're producing a v8i1. This results in more consistent
3558 // codegen across RV32 and RV64.
3559 unsigned NumViaIntegerBits = std::clamp(val: NumElts, lo: 8u, hi: Subtarget.getXLen());
3560 NumViaIntegerBits = std::min(a: NumViaIntegerBits, b: Subtarget.getELen());
3561 // If we have to use more than one INSERT_VECTOR_ELT then this
3562 // optimization is likely to increase code size; avoid peforming it in
3563 // such a case. We can use a load from a constant pool in this case.
3564 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3565 return SDValue();
3566 // Now we can create our integer vector type. Note that it may be larger
3567 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3568 unsigned IntegerViaVecElts = divideCeil(Numerator: NumElts, Denominator: NumViaIntegerBits);
3569 MVT IntegerViaVecVT =
3570 MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: NumViaIntegerBits),
3571 NumElements: IntegerViaVecElts);
3572
3573 uint64_t Bits = 0;
3574 unsigned BitPos = 0, IntegerEltIdx = 0;
3575 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3576
3577 for (unsigned I = 0; I < NumElts;) {
3578 SDValue V = Op.getOperand(i: I);
3579 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3580 Bits |= ((uint64_t)BitValue << BitPos);
3581 ++BitPos;
3582 ++I;
3583
3584 // Once we accumulate enough bits to fill our scalar type or process the
3585 // last element, insert into our vector and clear our accumulated data.
3586 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3587 if (NumViaIntegerBits <= 32)
3588 Bits = SignExtend64<32>(x: Bits);
3589 SDValue Elt = DAG.getConstant(Val: Bits, DL, VT: XLenVT);
3590 Elts[IntegerEltIdx] = Elt;
3591 Bits = 0;
3592 BitPos = 0;
3593 IntegerEltIdx++;
3594 }
3595 }
3596
3597 SDValue Vec = DAG.getBuildVector(VT: IntegerViaVecVT, DL, Ops: Elts);
3598
3599 if (NumElts < NumViaIntegerBits) {
3600 // If we're producing a smaller vector than our minimum legal integer
3601 // type, bitcast to the equivalent (known-legal) mask type, and extract
3602 // our final mask.
3603 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3604 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3605 Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: Vec,
3606 N2: DAG.getConstant(Val: 0, DL, VT: XLenVT));
3607 } else {
3608 // Else we must have produced an integer type with the same size as the
3609 // mask type; bitcast for the final result.
3610 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3611 Vec = DAG.getBitcast(VT, V: Vec);
3612 }
3613
3614 return Vec;
3615 }
3616
3617 if (SDValue Splat = cast<BuildVectorSDNode>(Val&: Op)->getSplatValue()) {
3618 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3619 : RISCVISD::VMV_V_X_VL;
3620 if (!VT.isFloatingPoint())
3621 Splat = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Splat);
3622 Splat =
3623 DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: Splat, N3: VL);
3624 return convertFromScalableVector(VT, V: Splat, DAG, Subtarget);
3625 }
3626
3627 // Try and match index sequences, which we can lower to the vid instruction
3628 // with optional modifications. An all-undef vector is matched by
3629 // getSplatValue, above.
3630 if (auto SimpleVID = isSimpleVIDSequence(Op, EltSizeInBits: Op.getScalarValueSizeInBits())) {
3631 int64_t StepNumerator = SimpleVID->StepNumerator;
3632 unsigned StepDenominator = SimpleVID->StepDenominator;
3633 int64_t Addend = SimpleVID->Addend;
3634
3635 assert(StepNumerator != 0 && "Invalid step");
3636 bool Negate = false;
3637 int64_t SplatStepVal = StepNumerator;
3638 unsigned StepOpcode = ISD::MUL;
3639 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3640 // anyway as the shift of 63 won't fit in uimm5.
3641 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3642 isPowerOf2_64(Value: std::abs(i: StepNumerator))) {
3643 Negate = StepNumerator < 0;
3644 StepOpcode = ISD::SHL;
3645 SplatStepVal = Log2_64(Value: std::abs(i: StepNumerator));
3646 }
3647
3648 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3649 // threshold since it's the immediate value many RVV instructions accept.
3650 // There is no vmul.vi instruction so ensure multiply constant can fit in
3651 // a single addi instruction.
3652 if (((StepOpcode == ISD::MUL && isInt<12>(x: SplatStepVal)) ||
3653 (StepOpcode == ISD::SHL && isUInt<5>(x: SplatStepVal))) &&
3654 isPowerOf2_32(Value: StepDenominator) &&
3655 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(x: Addend)) {
3656 MVT VIDVT =
3657 VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
3658 MVT VIDContainerVT =
3659 getContainerForFixedLengthVector(DAG, VT: VIDVT, Subtarget);
3660 SDValue VID = DAG.getNode(Opcode: RISCVISD::VID_VL, DL, VT: VIDContainerVT, N1: Mask, N2: VL);
3661 // Convert right out of the scalable type so we can use standard ISD
3662 // nodes for the rest of the computation. If we used scalable types with
3663 // these, we'd lose the fixed-length vector info and generate worse
3664 // vsetvli code.
3665 VID = convertFromScalableVector(VT: VIDVT, V: VID, DAG, Subtarget);
3666 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3667 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3668 SDValue SplatStep = DAG.getConstant(Val: SplatStepVal, DL, VT: VIDVT);
3669 VID = DAG.getNode(Opcode: StepOpcode, DL, VT: VIDVT, N1: VID, N2: SplatStep);
3670 }
3671 if (StepDenominator != 1) {
3672 SDValue SplatStep =
3673 DAG.getConstant(Val: Log2_64(Value: StepDenominator), DL, VT: VIDVT);
3674 VID = DAG.getNode(Opcode: ISD::SRL, DL, VT: VIDVT, N1: VID, N2: SplatStep);
3675 }
3676 if (Addend != 0 || Negate) {
3677 SDValue SplatAddend = DAG.getConstant(Val: Addend, DL, VT: VIDVT);
3678 VID = DAG.getNode(Opcode: Negate ? ISD::SUB : ISD::ADD, DL, VT: VIDVT, N1: SplatAddend,
3679 N2: VID);
3680 }
3681 if (VT.isFloatingPoint()) {
3682 // TODO: Use vfwcvt to reduce register pressure.
3683 VID = DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT, Operand: VID);
3684 }
3685 return VID;
3686 }
3687 }
3688
3689 // For very small build_vectors, use a single scalar insert of a constant.
3690 // TODO: Base this on constant rematerialization cost, not size.
3691 const unsigned EltBitSize = VT.getScalarSizeInBits();
3692 if (VT.getSizeInBits() <= 32 &&
3693 ISD::isBuildVectorOfConstantSDNodes(N: Op.getNode())) {
3694 MVT ViaIntVT = MVT::getIntegerVT(BitWidth: VT.getSizeInBits());
3695 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3696 "Unexpected sequence type");
3697 // If we can use the original VL with the modified element type, this
3698 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3699 // be moved into InsertVSETVLI?
3700 unsigned ViaVecLen =
3701 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3702 MVT ViaVecVT = MVT::getVectorVT(VT: ViaIntVT, NumElements: ViaVecLen);
3703
3704 uint64_t EltMask = maskTrailingOnes<uint64_t>(N: EltBitSize);
3705 uint64_t SplatValue = 0;
3706 // Construct the amalgamated value at this larger vector type.
3707 for (const auto &OpIdx : enumerate(First: Op->op_values())) {
3708 const auto &SeqV = OpIdx.value();
3709 if (!SeqV.isUndef())
3710 SplatValue |=
3711 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3712 }
3713
3714 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3715 // achieve better constant materializion.
3716 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3717 SplatValue = SignExtend64<32>(x: SplatValue);
3718
3719 SDValue Vec = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ViaVecVT,
3720 N1: DAG.getUNDEF(VT: ViaVecVT),
3721 N2: DAG.getConstant(Val: SplatValue, DL, VT: XLenVT),
3722 N3: DAG.getVectorIdxConstant(Val: 0, DL));
3723 if (ViaVecLen != 1)
3724 Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL,
3725 VT: MVT::getVectorVT(VT: ViaIntVT, NumElements: 1), N1: Vec,
3726 N2: DAG.getConstant(Val: 0, DL, VT: XLenVT));
3727 return DAG.getBitcast(VT, V: Vec);
3728 }
3729
3730
3731 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3732 // when re-interpreted as a vector with a larger element type. For example,
3733 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3734 // could be instead splat as
3735 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3736 // TODO: This optimization could also work on non-constant splats, but it
3737 // would require bit-manipulation instructions to construct the splat value.
3738 SmallVector<SDValue> Sequence;
3739 const auto *BV = cast<BuildVectorSDNode>(Val&: Op);
3740 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3741 ISD::isBuildVectorOfConstantSDNodes(N: Op.getNode()) &&
3742 BV->getRepeatedSequence(Sequence) &&
3743 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3744 unsigned SeqLen = Sequence.size();
3745 MVT ViaIntVT = MVT::getIntegerVT(BitWidth: EltBitSize * SeqLen);
3746 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3747 ViaIntVT == MVT::i64) &&
3748 "Unexpected sequence type");
3749
3750 // If we can use the original VL with the modified element type, this
3751 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3752 // be moved into InsertVSETVLI?
3753 const unsigned RequiredVL = NumElts / SeqLen;
3754 const unsigned ViaVecLen =
3755 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3756 NumElts : RequiredVL;
3757 MVT ViaVecVT = MVT::getVectorVT(VT: ViaIntVT, NumElements: ViaVecLen);
3758
3759 unsigned EltIdx = 0;
3760 uint64_t EltMask = maskTrailingOnes<uint64_t>(N: EltBitSize);
3761 uint64_t SplatValue = 0;
3762 // Construct the amalgamated value which can be splatted as this larger
3763 // vector type.
3764 for (const auto &SeqV : Sequence) {
3765 if (!SeqV.isUndef())
3766 SplatValue |=
3767 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3768 EltIdx++;
3769 }
3770
3771 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3772 // achieve better constant materializion.
3773 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3774 SplatValue = SignExtend64<32>(x: SplatValue);
3775
3776 // Since we can't introduce illegal i64 types at this stage, we can only
3777 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3778 // way we can use RVV instructions to splat.
3779 assert((ViaIntVT.bitsLE(XLenVT) ||
3780 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3781 "Unexpected bitcast sequence");
3782 if (ViaIntVT.bitsLE(VT: XLenVT) || isInt<32>(x: SplatValue)) {
3783 SDValue ViaVL =
3784 DAG.getConstant(Val: ViaVecVT.getVectorNumElements(), DL, VT: XLenVT);
3785 MVT ViaContainerVT =
3786 getContainerForFixedLengthVector(DAG, VT: ViaVecVT, Subtarget);
3787 SDValue Splat =
3788 DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ViaContainerVT,
3789 N1: DAG.getUNDEF(VT: ViaContainerVT),
3790 N2: DAG.getConstant(Val: SplatValue, DL, VT: XLenVT), N3: ViaVL);
3791 Splat = convertFromScalableVector(VT: ViaVecVT, V: Splat, DAG, Subtarget);
3792 if (ViaVecLen != RequiredVL)
3793 Splat = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL,
3794 VT: MVT::getVectorVT(VT: ViaIntVT, NumElements: RequiredVL), N1: Splat,
3795 N2: DAG.getConstant(Val: 0, DL, VT: XLenVT));
3796 return DAG.getBitcast(VT, V: Splat);
3797 }
3798 }
3799
3800 // If the number of signbits allows, see if we can lower as a <N x i8>.
3801 // Our main goal here is to reduce LMUL (and thus work) required to
3802 // build the constant, but we will also narrow if the resulting
3803 // narrow vector is known to materialize cheaply.
3804 // TODO: We really should be costing the smaller vector. There are
3805 // profitable cases this misses.
3806 if (EltBitSize > 8 && VT.isInteger() &&
3807 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) {
3808 unsigned SignBits = DAG.ComputeNumSignBits(Op);
3809 if (EltBitSize - SignBits < 8) {
3810 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3811 DL, Op->ops());
3812 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3813 Source, DAG, Subtarget);
3814 SDValue Res = DAG.getNode(Opcode: RISCVISD::VSEXT_VL, DL, VT: ContainerVT, N1: Source, N2: Mask, N3: VL);
3815 return convertFromScalableVector(VT, V: Res, DAG, Subtarget);
3816 }
3817 }
3818
3819 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3820 return Res;
3821
3822 // For constant vectors, use generic constant pool lowering. Otherwise,
3823 // we'd have to materialize constants in GPRs just to move them into the
3824 // vector.
3825 return SDValue();
3826}
3827
3828static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
3829 const RISCVSubtarget &Subtarget) {
3830 MVT VT = Op.getSimpleValueType();
3831 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3832
3833 if (ISD::isBuildVectorOfConstantSDNodes(N: Op.getNode()) ||
3834 ISD::isBuildVectorOfConstantFPSDNodes(N: Op.getNode()))
3835 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
3836
3837 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3838
3839 SDLoc DL(Op);
3840 auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
3841
3842 MVT XLenVT = Subtarget.getXLenVT();
3843
3844 if (VT.getVectorElementType() == MVT::i1) {
3845 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3846 // vector type, we have a legal equivalently-sized i8 type, so we can use
3847 // that.
3848 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3849 SDValue VecZero = DAG.getConstant(Val: 0, DL, VT: WideVecVT);
3850
3851 SDValue WideVec;
3852 if (SDValue Splat = cast<BuildVectorSDNode>(Val&: Op)->getSplatValue()) {
3853 // For a splat, perform a scalar truncate before creating the wider
3854 // vector.
3855 Splat = DAG.getNode(Opcode: ISD::AND, DL, VT: Splat.getValueType(), N1: Splat,
3856 N2: DAG.getConstant(Val: 1, DL, VT: Splat.getValueType()));
3857 WideVec = DAG.getSplatBuildVector(VT: WideVecVT, DL, Op: Splat);
3858 } else {
3859 SmallVector<SDValue, 8> Ops(Op->op_values());
3860 WideVec = DAG.getBuildVector(VT: WideVecVT, DL, Ops);
3861 SDValue VecOne = DAG.getConstant(Val: 1, DL, VT: WideVecVT);
3862 WideVec = DAG.getNode(Opcode: ISD::AND, DL, VT: WideVecVT, N1: WideVec, N2: VecOne);
3863 }
3864
3865 return DAG.getSetCC(DL, VT, LHS: WideVec, RHS: VecZero, Cond: ISD::SETNE);
3866 }
3867
3868 if (SDValue Splat = cast<BuildVectorSDNode>(Val&: Op)->getSplatValue()) {
3869 if (auto Gather = matchSplatAsGather(SplatVal: Splat, VT, DL, DAG, Subtarget))
3870 return Gather;
3871 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3872 : RISCVISD::VMV_V_X_VL;
3873 if (!VT.isFloatingPoint())
3874 Splat = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Splat);
3875 Splat =
3876 DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: Splat, N3: VL);
3877 return convertFromScalableVector(VT, V: Splat, DAG, Subtarget);
3878 }
3879
3880 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3881 return Res;
3882
3883 // If we're compiling for an exact VLEN value, we can split our work per
3884 // register in the register group.
3885 if (const auto VLen = Subtarget.getRealVLen();
3886 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
3887 MVT ElemVT = VT.getVectorElementType();
3888 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
3889 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3890 MVT OneRegVT = MVT::getVectorVT(VT: ElemVT, NumElements: ElemsPerVReg);
3891 MVT M1VT = getContainerForFixedLengthVector(DAG, VT: OneRegVT, Subtarget);
3892 assert(M1VT == getLMUL1VT(M1VT));
3893
3894 // The following semantically builds up a fixed length concat_vector
3895 // of the component build_vectors. We eagerly lower to scalable and
3896 // insert_subvector here to avoid DAG combining it back to a large
3897 // build_vector.
3898 SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end());
3899 unsigned NumOpElts = M1VT.getVectorMinNumElements();
3900 SDValue Vec = DAG.getUNDEF(VT: ContainerVT);
3901 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
3902 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(N: i, M: ElemsPerVReg);
3903 SDValue SubBV =
3904 DAG.getNode(Opcode: ISD::BUILD_VECTOR, DL, VT: OneRegVT, Ops: OneVRegOfOps);
3905 SubBV = convertToScalableVector(VT: M1VT, V: SubBV, DAG, Subtarget);
3906 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
3907 Vec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ContainerVT, N1: Vec, N2: SubBV,
3908 N3: DAG.getVectorIdxConstant(Val: InsertIdx, DL));
3909 }
3910 return convertFromScalableVector(VT, V: Vec, DAG, Subtarget);
3911 }
3912
3913 // For m1 vectors, if we have non-undef values in both halves of our vector,
3914 // split the vector into low and high halves, build them separately, then
3915 // use a vselect to combine them. For long vectors, this cuts the critical
3916 // path of the vslide1down sequence in half, and gives us an opportunity
3917 // to special case each half independently. Note that we don't change the
3918 // length of the sub-vectors here, so if both fallback to the generic
3919 // vslide1down path, we should be able to fold the vselect into the final
3920 // vslidedown (for the undef tail) for the first half w/ masking.
3921 unsigned NumElts = VT.getVectorNumElements();
3922 unsigned NumUndefElts =
3923 count_if(Range: Op->op_values(), P: [](const SDValue &V) { return V.isUndef(); });
3924 unsigned NumDefElts = NumElts - NumUndefElts;
3925 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
3926 ContainerVT.bitsLE(VT: getLMUL1VT(VT: ContainerVT))) {
3927 SmallVector<SDValue> SubVecAOps, SubVecBOps;
3928 SmallVector<SDValue> MaskVals;
3929 SDValue UndefElem = DAG.getUNDEF(VT: Op->getOperand(Num: 0)->getValueType(ResNo: 0));
3930 SubVecAOps.reserve(N: NumElts);
3931 SubVecBOps.reserve(N: NumElts);
3932 for (unsigned i = 0; i < NumElts; i++) {
3933 SDValue Elem = Op->getOperand(Num: i);
3934 if (i < NumElts / 2) {
3935 SubVecAOps.push_back(Elt: Elem);
3936 SubVecBOps.push_back(Elt: UndefElem);
3937 } else {
3938 SubVecAOps.push_back(Elt: UndefElem);
3939 SubVecBOps.push_back(Elt: Elem);
3940 }
3941 bool SelectMaskVal = (i < NumElts / 2);
3942 MaskVals.push_back(Elt: DAG.getConstant(Val: SelectMaskVal, DL, VT: XLenVT));
3943 }
3944 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
3945 MaskVals.size() == NumElts);
3946
3947 SDValue SubVecA = DAG.getBuildVector(VT, DL, Ops: SubVecAOps);
3948 SDValue SubVecB = DAG.getBuildVector(VT, DL, Ops: SubVecBOps);
3949 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
3950 SDValue SelectMask = DAG.getBuildVector(VT: MaskVT, DL, Ops: MaskVals);
3951 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: SelectMask, N2: SubVecA, N3: SubVecB);
3952 }
3953
3954 // Cap the cost at a value linear to the number of elements in the vector.
3955 // The default lowering is to use the stack. The vector store + scalar loads
3956 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
3957 // being (at least) linear in LMUL. As a result, using the vslidedown
3958 // lowering for every element ends up being VL*LMUL..
3959 // TODO: Should we be directly costing the stack alternative? Doing so might
3960 // give us a more accurate upper bound.
3961 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
3962
3963 // TODO: unify with TTI getSlideCost.
3964 InstructionCost PerSlideCost = 1;
3965 switch (RISCVTargetLowering::getLMUL(VT: ContainerVT)) {
3966 default: break;
3967 case RISCVII::VLMUL::LMUL_2:
3968 PerSlideCost = 2;
3969 break;
3970 case RISCVII::VLMUL::LMUL_4:
3971 PerSlideCost = 4;
3972 break;
3973 case RISCVII::VLMUL::LMUL_8:
3974 PerSlideCost = 8;
3975 break;
3976 }
3977
3978 // TODO: Should we be using the build instseq then cost + evaluate scheme
3979 // we use for integer constants here?
3980 unsigned UndefCount = 0;
3981 for (const SDValue &V : Op->ops()) {
3982 if (V.isUndef()) {
3983 UndefCount++;
3984 continue;
3985 }
3986 if (UndefCount) {
3987 LinearBudget -= PerSlideCost;
3988 UndefCount = 0;
3989 }
3990 LinearBudget -= PerSlideCost;
3991 }
3992 if (UndefCount) {
3993 LinearBudget -= PerSlideCost;
3994 }
3995
3996 if (LinearBudget < 0)
3997 return SDValue();
3998
3999 assert((!VT.isFloatingPoint() ||
4000 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4001 "Illegal type which will result in reserved encoding");
4002
4003 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4004
4005 SDValue Vec;
4006 UndefCount = 0;
4007 for (SDValue V : Op->ops()) {
4008 if (V.isUndef()) {
4009 UndefCount++;
4010 continue;
4011 }
4012
4013 // Start our sequence with a TA splat in the hopes that hardware is able to
4014 // recognize there's no dependency on the prior value of our temporary
4015 // register.
4016 if (!Vec) {
4017 Vec = DAG.getSplatVector(VT, DL, Op: V);
4018 Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
4019 UndefCount = 0;
4020 continue;
4021 }
4022
4023 if (UndefCount) {
4024 const SDValue Offset = DAG.getConstant(Val: UndefCount, DL, VT: Subtarget.getXLenVT());
4025 Vec = getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, Merge: DAG.getUNDEF(VT: ContainerVT),
4026 Op: Vec, Offset, Mask, VL, Policy);
4027 UndefCount = 0;
4028 }
4029 auto OpCode =
4030 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
4031 if (!VT.isFloatingPoint())
4032 V = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getXLenVT(), Operand: V);
4033 Vec = DAG.getNode(Opcode: OpCode, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: Vec,
4034 N3: V, N4: Mask, N5: VL);
4035 }
4036 if (UndefCount) {
4037 const SDValue Offset = DAG.getConstant(Val: UndefCount, DL, VT: Subtarget.getXLenVT());
4038 Vec = getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, Merge: DAG.getUNDEF(VT: ContainerVT),
4039 Op: Vec, Offset, Mask, VL, Policy);
4040 }
4041 return convertFromScalableVector(VT, V: Vec, DAG, Subtarget);
4042}
4043
4044static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4045 SDValue Lo, SDValue Hi, SDValue VL,
4046 SelectionDAG &DAG) {
4047 if (!Passthru)
4048 Passthru = DAG.getUNDEF(VT);
4049 if (isa<ConstantSDNode>(Val: Lo) && isa<ConstantSDNode>(Val: Hi)) {
4050 int32_t LoC = cast<ConstantSDNode>(Val&: Lo)->getSExtValue();
4051 int32_t HiC = cast<ConstantSDNode>(Val&: Hi)->getSExtValue();
4052 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4053 // node in order to try and match RVV vector/scalar instructions.
4054 if ((LoC >> 31) == HiC)
4055 return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: Passthru, N2: Lo, N3: VL);
4056
4057 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4058 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4059 // vlmax vsetvli or vsetivli to change the VL.
4060 // FIXME: Support larger constants?
4061 // FIXME: Support non-constant VLs by saturating?
4062 if (LoC == HiC) {
4063 SDValue NewVL;
4064 if (isAllOnesConstant(VL) ||
4065 (isa<RegisterSDNode>(VL) &&
4066 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4067 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4068 else if (isa<ConstantSDNode>(Val: VL) && isUInt<4>(x: VL->getAsZExtVal()))
4069 NewVL = DAG.getNode(Opcode: ISD::ADD, DL, VT: VL.getValueType(), N1: VL, N2: VL);
4070
4071 if (NewVL) {
4072 MVT InterVT =
4073 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4074 auto InterVec = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: InterVT,
4075 N1: DAG.getUNDEF(VT: InterVT), N2: Lo, N3: NewVL);
4076 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: InterVec);
4077 }
4078 }
4079 }
4080
4081 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4082 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(i: 0) == Lo &&
4083 isa<ConstantSDNode>(Val: Hi.getOperand(i: 1)) &&
4084 Hi.getConstantOperandVal(i: 1) == 31)
4085 return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: Passthru, N2: Lo, N3: VL);
4086
4087 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4088 // even if it might be sign extended.
4089 if (Hi.isUndef())
4090 return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: Passthru, N2: Lo, N3: VL);
4091
4092 // Fall back to a stack store and stride x0 vector load.
4093 return DAG.getNode(Opcode: RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, N1: Passthru, N2: Lo,
4094 N3: Hi, N4: VL);
4095}
4096
4097// Called by type legalization to handle splat of i64 on RV32.
4098// FIXME: We can optimize this when the type has sign or zero bits in one
4099// of the halves.
4100static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4101 SDValue Scalar, SDValue VL,
4102 SelectionDAG &DAG) {
4103 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4104 SDValue Lo, Hi;
4105 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4106 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4107}
4108
4109// This function lowers a splat of a scalar operand Splat with the vector
4110// length VL. It ensures the final sequence is type legal, which is useful when
4111// lowering a splat after type legalization.
4112static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4113 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4114 const RISCVSubtarget &Subtarget) {
4115 bool HasPassthru = Passthru && !Passthru.isUndef();
4116 if (!HasPassthru && !Passthru)
4117 Passthru = DAG.getUNDEF(VT);
4118 if (VT.isFloatingPoint())
4119 return DAG.getNode(Opcode: RISCVISD::VFMV_V_F_VL, DL, VT, N1: Passthru, N2: Scalar, N3: VL);
4120
4121 MVT XLenVT = Subtarget.getXLenVT();
4122
4123 // Simplest case is that the operand needs to be promoted to XLenVT.
4124 if (Scalar.getValueType().bitsLE(VT: XLenVT)) {
4125 // If the operand is a constant, sign extend to increase our chances
4126 // of being able to use a .vi instruction. ANY_EXTEND would become a
4127 // a zero extend and the simm5 check in isel would fail.
4128 // FIXME: Should we ignore the upper bits in isel instead?
4129 unsigned ExtOpc =
4130 isa<ConstantSDNode>(Val: Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4131 Scalar = DAG.getNode(Opcode: ExtOpc, DL, VT: XLenVT, Operand: Scalar);
4132 return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: Passthru, N2: Scalar, N3: VL);
4133 }
4134
4135 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4136 "Unexpected scalar for splat lowering!");
4137
4138 if (isOneConstant(V: VL) && isNullConstant(V: Scalar))
4139 return DAG.getNode(Opcode: RISCVISD::VMV_S_X_VL, DL, VT, N1: Passthru,
4140 N2: DAG.getConstant(Val: 0, DL, VT: XLenVT), N3: VL);
4141
4142 // Otherwise use the more complicated splatting algorithm.
4143 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4144}
4145
4146// This function lowers an insert of a scalar operand Scalar into lane
4147// 0 of the vector regardless of the value of VL. The contents of the
4148// remaining lanes of the result vector are unspecified. VL is assumed
4149// to be non-zero.
4150static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
4151 const SDLoc &DL, SelectionDAG &DAG,
4152 const RISCVSubtarget &Subtarget) {
4153 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4154
4155 const MVT XLenVT = Subtarget.getXLenVT();
4156 SDValue Passthru = DAG.getUNDEF(VT);
4157
4158 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4159 isNullConstant(V: Scalar.getOperand(i: 1))) {
4160 SDValue ExtractedVal = Scalar.getOperand(i: 0);
4161 // The element types must be the same.
4162 if (ExtractedVal.getValueType().getVectorElementType() ==
4163 VT.getVectorElementType()) {
4164 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4165 MVT ExtractedContainerVT = ExtractedVT;
4166 if (ExtractedContainerVT.isFixedLengthVector()) {
4167 ExtractedContainerVT = getContainerForFixedLengthVector(
4168 DAG, VT: ExtractedContainerVT, Subtarget);
4169 ExtractedVal = convertToScalableVector(VT: ExtractedContainerVT,
4170 V: ExtractedVal, DAG, Subtarget);
4171 }
4172 if (ExtractedContainerVT.bitsLE(VT))
4173 return DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: Passthru,
4174 N2: ExtractedVal, N3: DAG.getVectorIdxConstant(Val: 0, DL));
4175 return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: ExtractedVal,
4176 N2: DAG.getVectorIdxConstant(Val: 0, DL));
4177 }
4178 }
4179
4180
4181 if (VT.isFloatingPoint())
4182 return DAG.getNode(Opcode: RISCVISD::VFMV_S_F_VL, DL, VT,
4183 N1: DAG.getUNDEF(VT), N2: Scalar, N3: VL);
4184
4185 // Avoid the tricky legalization cases by falling back to using the
4186 // splat code which already handles it gracefully.
4187 if (!Scalar.getValueType().bitsLE(VT: XLenVT))
4188 return lowerScalarSplat(Passthru: DAG.getUNDEF(VT), Scalar,
4189 VL: DAG.getConstant(Val: 1, DL, VT: XLenVT),
4190 VT, DL, DAG, Subtarget);
4191
4192 // If the operand is a constant, sign extend to increase our chances
4193 // of being able to use a .vi instruction. ANY_EXTEND would become a
4194 // a zero extend and the simm5 check in isel would fail.
4195 // FIXME: Should we ignore the upper bits in isel instead?
4196 unsigned ExtOpc =
4197 isa<ConstantSDNode>(Val: Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4198 Scalar = DAG.getNode(Opcode: ExtOpc, DL, VT: XLenVT, Operand: Scalar);
4199 return DAG.getNode(Opcode: RISCVISD::VMV_S_X_VL, DL, VT,
4200 N1: DAG.getUNDEF(VT), N2: Scalar, N3: VL);
4201}
4202
4203// Is this a shuffle extracts either the even or odd elements of a vector?
4204// That is, specifically, either (a) or (b) below.
4205// t34: v8i8 = extract_subvector t11, Constant:i64<0>
4206// t33: v8i8 = extract_subvector t11, Constant:i64<8>
4207// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4208// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4209// Returns {Src Vector, Even Elements} om success
4210static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
4211 SDValue V2, ArrayRef<int> Mask,
4212 const RISCVSubtarget &Subtarget) {
4213 // Need to be able to widen the vector.
4214 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4215 return false;
4216
4217 // Both input must be extracts.
4218 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4219 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4220 return false;
4221
4222 // Extracting from the same source.
4223 SDValue Src = V1.getOperand(i: 0);
4224 if (Src != V2.getOperand(i: 0))
4225 return false;
4226
4227 // Src needs to have twice the number of elements.
4228 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
4229 return false;
4230
4231 // The extracts must extract the two halves of the source.
4232 if (V1.getConstantOperandVal(i: 1) != 0 ||
4233 V2.getConstantOperandVal(i: 1) != Mask.size())
4234 return false;
4235
4236 // First index must be the first even or odd element from V1.
4237 if (Mask[0] != 0 && Mask[0] != 1)
4238 return false;
4239
4240 // The others must increase by 2 each time.
4241 // TODO: Support undef elements?
4242 for (unsigned i = 1; i != Mask.size(); ++i)
4243 if (Mask[i] != Mask[i - 1] + 2)
4244 return false;
4245
4246 return true;
4247}
4248
4249/// Is this shuffle interleaving contiguous elements from one vector into the
4250/// even elements and contiguous elements from another vector into the odd
4251/// elements. \p EvenSrc will contain the element that should be in the first
4252/// even element. \p OddSrc will contain the element that should be in the first
4253/// odd element. These can be the first element in a source or the element half
4254/// way through the source.
4255static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4256 int &OddSrc, const RISCVSubtarget &Subtarget) {
4257 // We need to be able to widen elements to the next larger integer type.
4258 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4259 return false;
4260
4261 int Size = Mask.size();
4262 int NumElts = VT.getVectorNumElements();
4263 assert(Size == (int)NumElts && "Unexpected mask size");
4264
4265 SmallVector<unsigned, 2> StartIndexes;
4266 if (!ShuffleVectorInst::isInterleaveMask(Mask, Factor: 2, NumInputElts: Size * 2, StartIndexes))
4267 return false;
4268
4269 EvenSrc = StartIndexes[0];
4270 OddSrc = StartIndexes[1];
4271
4272 // One source should be low half of first vector.
4273 if (EvenSrc != 0 && OddSrc != 0)
4274 return false;
4275
4276 // Subvectors will be subtracted from either at the start of the two input
4277 // vectors, or at the start and middle of the first vector if it's an unary
4278 // interleave.
4279 // In both cases, HalfNumElts will be extracted.
4280 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4281 // we'll create an illegal extract_subvector.
4282 // FIXME: We could support other values using a slidedown first.
4283 int HalfNumElts = NumElts / 2;
4284 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4285}
4286
4287/// Match shuffles that concatenate two vectors, rotate the concatenation,
4288/// and then extract the original number of elements from the rotated result.
4289/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4290/// returned rotation amount is for a rotate right, where elements move from
4291/// higher elements to lower elements. \p LoSrc indicates the first source
4292/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4293/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4294/// 0 or 1 if a rotation is found.
4295///
4296/// NOTE: We talk about rotate to the right which matches how bit shift and
4297/// rotate instructions are described where LSBs are on the right, but LLVM IR
4298/// and the table below write vectors with the lowest elements on the left.
4299static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4300 int Size = Mask.size();
4301
4302 // We need to detect various ways of spelling a rotation:
4303 // [11, 12, 13, 14, 15, 0, 1, 2]
4304 // [-1, 12, 13, 14, -1, -1, 1, -1]
4305 // [-1, -1, -1, -1, -1, -1, 1, 2]
4306 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4307 // [-1, 4, 5, 6, -1, -1, 9, -1]
4308 // [-1, 4, 5, 6, -1, -1, -1, -1]
4309 int Rotation = 0;
4310 LoSrc = -1;
4311 HiSrc = -1;
4312 for (int i = 0; i != Size; ++i) {
4313 int M = Mask[i];
4314 if (M < 0)
4315 continue;
4316
4317 // Determine where a rotate vector would have started.
4318 int StartIdx = i - (M % Size);
4319 // The identity rotation isn't interesting, stop.
4320 if (StartIdx == 0)
4321 return -1;
4322
4323 // If we found the tail of a vector the rotation must be the missing
4324 // front. If we found the head of a vector, it must be how much of the
4325 // head.
4326 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4327
4328 if (Rotation == 0)
4329 Rotation = CandidateRotation;
4330 else if (Rotation != CandidateRotation)
4331 // The rotations don't match, so we can't match this mask.
4332 return -1;
4333
4334 // Compute which value this mask is pointing at.
4335 int MaskSrc = M < Size ? 0 : 1;
4336
4337 // Compute which of the two target values this index should be assigned to.
4338 // This reflects whether the high elements are remaining or the low elemnts
4339 // are remaining.
4340 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4341
4342 // Either set up this value if we've not encountered it before, or check
4343 // that it remains consistent.
4344 if (TargetSrc < 0)
4345 TargetSrc = MaskSrc;
4346 else if (TargetSrc != MaskSrc)
4347 // This may be a rotation, but it pulls from the inputs in some
4348 // unsupported interleaving.
4349 return -1;
4350 }
4351
4352 // Check that we successfully analyzed the mask, and normalize the results.
4353 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4354 assert((LoSrc >= 0 || HiSrc >= 0) &&
4355 "Failed to find a rotated input vector!");
4356
4357 return Rotation;
4358}
4359
4360// Lower a deinterleave shuffle to vnsrl.
4361// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4362// -> [p, q, r, s] (EvenElts == false)
4363// VT is the type of the vector to return, <[vscale x ]n x ty>
4364// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4365static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src,
4366 bool EvenElts,
4367 const RISCVSubtarget &Subtarget,
4368 SelectionDAG &DAG) {
4369 // The result is a vector of type <m x n x ty>
4370 MVT ContainerVT = VT;
4371 // Convert fixed vectors to scalable if needed
4372 if (ContainerVT.isFixedLengthVector()) {
4373 assert(Src.getSimpleValueType().isFixedLengthVector());
4374 ContainerVT = getContainerForFixedLengthVector(DAG, VT: ContainerVT, Subtarget);
4375
4376 // The source is a vector of type <m x n*2 x ty>
4377 MVT SrcContainerVT =
4378 MVT::getVectorVT(VT: ContainerVT.getVectorElementType(),
4379 EC: ContainerVT.getVectorElementCount() * 2);
4380 Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget);
4381 }
4382
4383 auto [TrueMask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
4384
4385 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4386 // This also converts FP to int.
4387 unsigned EltBits = ContainerVT.getScalarSizeInBits();
4388 MVT WideSrcContainerVT = MVT::getVectorVT(
4389 VT: MVT::getIntegerVT(BitWidth: EltBits * 2), EC: ContainerVT.getVectorElementCount());
4390 Src = DAG.getBitcast(VT: WideSrcContainerVT, V: Src);
4391
4392 // The integer version of the container type.
4393 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
4394
4395 // If we want even elements, then the shift amount is 0. Otherwise, shift by
4396 // the original element size.
4397 unsigned Shift = EvenElts ? 0 : EltBits;
4398 SDValue SplatShift = DAG.getNode(
4399 Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IntContainerVT, N1: DAG.getUNDEF(VT: ContainerVT),
4400 N2: DAG.getConstant(Val: Shift, DL, VT: Subtarget.getXLenVT()), N3: VL);
4401 SDValue Res =
4402 DAG.getNode(Opcode: RISCVISD::VNSRL_VL, DL, VT: IntContainerVT, N1: Src, N2: SplatShift,
4403 N3: DAG.getUNDEF(VT: IntContainerVT), N4: TrueMask, N5: VL);
4404 // Cast back to FP if needed.
4405 Res = DAG.getBitcast(VT: ContainerVT, V: Res);
4406
4407 if (VT.isFixedLengthVector())
4408 Res = convertFromScalableVector(VT, V: Res, DAG, Subtarget);
4409 return Res;
4410}
4411
4412// Lower the following shuffle to vslidedown.
4413// a)
4414// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4415// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4416// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4417// b)
4418// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4419// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4420// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4421// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4422// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4423// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4424static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT,
4425 SDValue V1, SDValue V2,
4426 ArrayRef<int> Mask,
4427 const RISCVSubtarget &Subtarget,
4428 SelectionDAG &DAG) {
4429 auto findNonEXTRACT_SUBVECTORParent =
4430 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4431 uint64_t Offset = 0;
4432 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4433 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4434 // a scalable vector. But we don't want to match the case.
4435 Parent.getOperand(i: 0).getSimpleValueType().isFixedLengthVector()) {
4436 Offset += Parent.getConstantOperandVal(i: 1);
4437 Parent = Parent.getOperand(i: 0);
4438 }
4439 return std::make_pair(x&: Parent, y&: Offset);
4440 };
4441
4442 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4443 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4444
4445 // Extracting from the same source.
4446 SDValue Src = V1Src;
4447 if (Src != V2Src)
4448 return SDValue();
4449
4450 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4451 SmallVector<int, 16> NewMask(Mask);
4452 for (size_t i = 0; i != NewMask.size(); ++i) {
4453 if (NewMask[i] == -1)
4454 continue;
4455
4456 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4457 NewMask[i] = NewMask[i] + V1IndexOffset;
4458 } else {
4459 // Minus NewMask.size() is needed. Otherwise, the b case would be
4460 // <5,6,7,12> instead of <5,6,7,8>.
4461 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4462 }
4463 }
4464
4465 // First index must be known and non-zero. It will be used as the slidedown
4466 // amount.
4467 if (NewMask[0] <= 0)
4468 return SDValue();
4469
4470 // NewMask is also continuous.
4471 for (unsigned i = 1; i != NewMask.size(); ++i)
4472 if (NewMask[i - 1] + 1 != NewMask[i])
4473 return SDValue();
4474
4475 MVT XLenVT = Subtarget.getXLenVT();
4476 MVT SrcVT = Src.getSimpleValueType();
4477 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT: SrcVT, Subtarget);
4478 auto [TrueMask, VL] = getDefaultVLOps(VecVT: SrcVT, ContainerVT, DL, DAG, Subtarget);
4479 SDValue Slidedown =
4480 getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, Merge: DAG.getUNDEF(VT: ContainerVT),
4481 Op: convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget),
4482 Offset: DAG.getConstant(Val: NewMask[0], DL, VT: XLenVT), Mask: TrueMask, VL);
4483 return DAG.getNode(
4484 Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT,
4485 N1: convertFromScalableVector(VT: SrcVT, V: Slidedown, DAG, Subtarget),
4486 N2: DAG.getConstant(Val: 0, DL, VT: XLenVT));
4487}
4488
4489// Because vslideup leaves the destination elements at the start intact, we can
4490// use it to perform shuffles that insert subvectors:
4491//
4492// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4493// ->
4494// vsetvli zero, 8, e8, mf2, ta, ma
4495// vslideup.vi v8, v9, 4
4496//
4497// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4498// ->
4499// vsetvli zero, 5, e8, mf2, tu, ma
4500// vslideup.v1 v8, v9, 2
4501static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT,
4502 SDValue V1, SDValue V2,
4503 ArrayRef<int> Mask,
4504 const RISCVSubtarget &Subtarget,
4505 SelectionDAG &DAG) {
4506 unsigned NumElts = VT.getVectorNumElements();
4507 int NumSubElts, Index;
4508 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumSrcElts: NumElts, NumSubElts,
4509 Index))
4510 return SDValue();
4511
4512 bool OpsSwapped = Mask[Index] < (int)NumElts;
4513 SDValue InPlace = OpsSwapped ? V2 : V1;
4514 SDValue ToInsert = OpsSwapped ? V1 : V2;
4515
4516 MVT XLenVT = Subtarget.getXLenVT();
4517 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4518 auto TrueMask = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).first;
4519 // We slide up by the index that the subvector is being inserted at, and set
4520 // VL to the index + the number of elements being inserted.
4521 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED | RISCVII::MASK_AGNOSTIC;
4522 // If the we're adding a suffix to the in place vector, i.e. inserting right
4523 // up to the very end of it, then we don't actually care about the tail.
4524 if (NumSubElts + Index >= (int)NumElts)
4525 Policy |= RISCVII::TAIL_AGNOSTIC;
4526
4527 InPlace = convertToScalableVector(VT: ContainerVT, V: InPlace, DAG, Subtarget);
4528 ToInsert = convertToScalableVector(VT: ContainerVT, V: ToInsert, DAG, Subtarget);
4529 SDValue VL = DAG.getConstant(Val: NumSubElts + Index, DL, VT: XLenVT);
4530
4531 SDValue Res;
4532 // If we're inserting into the lowest elements, use a tail undisturbed
4533 // vmv.v.v.
4534 if (Index == 0)
4535 Res = DAG.getNode(Opcode: RISCVISD::VMV_V_V_VL, DL, VT: ContainerVT, N1: InPlace, N2: ToInsert,
4536 N3: VL);
4537 else
4538 Res = getVSlideup(DAG, Subtarget, DL, VT: ContainerVT, Merge: InPlace, Op: ToInsert,
4539 Offset: DAG.getConstant(Val: Index, DL, VT: XLenVT), Mask: TrueMask, VL, Policy);
4540 return convertFromScalableVector(VT, V: Res, DAG, Subtarget);
4541}
4542
4543/// Match v(f)slide1up/down idioms. These operations involve sliding
4544/// N-1 elements to make room for an inserted scalar at one end.
4545static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
4546 SDValue V1, SDValue V2,
4547 ArrayRef<int> Mask,
4548 const RISCVSubtarget &Subtarget,
4549 SelectionDAG &DAG) {
4550 bool OpsSwapped = false;
4551 if (!isa<BuildVectorSDNode>(Val: V1)) {
4552 if (!isa<BuildVectorSDNode>(Val: V2))
4553 return SDValue();
4554 std::swap(a&: V1, b&: V2);
4555 OpsSwapped = true;
4556 }
4557 SDValue Splat = cast<BuildVectorSDNode>(Val&: V1)->getSplatValue();
4558 if (!Splat)
4559 return SDValue();
4560
4561 // Return true if the mask could describe a slide of Mask.size() - 1
4562 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4563 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4564 const unsigned S = (Offset > 0) ? 0 : -Offset;
4565 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4566 for (unsigned i = S; i != E; ++i)
4567 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4568 return false;
4569 return true;
4570 };
4571
4572 const unsigned NumElts = VT.getVectorNumElements();
4573 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4574 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4575 return SDValue();
4576
4577 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4578 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4579 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4580 return SDValue();
4581
4582 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4583 auto [TrueMask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
4584 auto OpCode = IsVSlidedown ?
4585 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
4586 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
4587 if (!VT.isFloatingPoint())
4588 Splat = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getXLenVT(), Operand: Splat);
4589 auto Vec = DAG.getNode(Opcode: OpCode, DL, VT: ContainerVT,
4590 N1: DAG.getUNDEF(VT: ContainerVT),
4591 N2: convertToScalableVector(VT: ContainerVT, V: V2, DAG, Subtarget),
4592 N3: Splat, N4: TrueMask, N5: VL);
4593 return convertFromScalableVector(VT, V: Vec, DAG, Subtarget);
4594}
4595
4596// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4597// to create an interleaved vector of <[vscale x] n*2 x ty>.
4598// This requires that the size of ty is less than the subtarget's maximum ELEN.
4599static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
4600 const SDLoc &DL, SelectionDAG &DAG,
4601 const RISCVSubtarget &Subtarget) {
4602 MVT VecVT = EvenV.getSimpleValueType();
4603 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4604 // Convert fixed vectors to scalable if needed
4605 if (VecContainerVT.isFixedLengthVector()) {
4606 VecContainerVT = getContainerForFixedLengthVector(DAG, VT: VecVT, Subtarget);
4607 EvenV = convertToScalableVector(VT: VecContainerVT, V: EvenV, DAG, Subtarget);
4608 OddV = convertToScalableVector(VT: VecContainerVT, V: OddV, DAG, Subtarget);
4609 }
4610
4611 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4612
4613 // We're working with a vector of the same size as the resulting
4614 // interleaved vector, but with half the number of elements and
4615 // twice the SEW (Hence the restriction on not using the maximum
4616 // ELEN)
4617 MVT WideVT =
4618 MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: VecVT.getScalarSizeInBits() * 2),
4619 EC: VecVT.getVectorElementCount());
4620 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4621 if (WideContainerVT.isFixedLengthVector())
4622 WideContainerVT = getContainerForFixedLengthVector(DAG, VT: WideVT, Subtarget);
4623
4624 // Bitcast the input vectors to integers in case they are FP
4625 VecContainerVT = VecContainerVT.changeTypeToInteger();
4626 EvenV = DAG.getBitcast(VT: VecContainerVT, V: EvenV);
4627 OddV = DAG.getBitcast(VT: VecContainerVT, V: OddV);
4628
4629 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT: VecContainerVT, DL, DAG, Subtarget);
4630 SDValue Passthru = DAG.getUNDEF(VT: WideContainerVT);
4631
4632 SDValue Interleaved;
4633 if (OddV.isUndef()) {
4634 // If OddV is undef, this is a zero extend.
4635 // FIXME: Not only does this optimize the code, it fixes some correctness
4636 // issues because MIR does not have freeze.
4637 Interleaved =
4638 DAG.getNode(Opcode: RISCVISD::VZEXT_VL, DL, VT: WideContainerVT, N1: EvenV, N2: Mask, N3: VL);
4639 } else if (Subtarget.hasStdExtZvbb()) {
4640 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4641 SDValue OffsetVec =
4642 DAG.getConstant(Val: VecVT.getScalarSizeInBits(), DL, VT: VecContainerVT);
4643 Interleaved = DAG.getNode(Opcode: RISCVISD::VWSLL_VL, DL, VT: WideContainerVT, N1: OddV,
4644 N2: OffsetVec, N3: Passthru, N4: Mask, N5: VL);
4645 if (!EvenV.isUndef())
4646 Interleaved = DAG.getNode(Opcode: RISCVISD::VWADDU_W_VL, DL, VT: WideContainerVT,
4647 N1: Interleaved, N2: EvenV, N3: Passthru, N4: Mask, N5: VL);
4648 } else if (EvenV.isUndef()) {
4649 Interleaved =
4650 DAG.getNode(Opcode: RISCVISD::VZEXT_VL, DL, VT: WideContainerVT, N1: OddV, N2: Mask, N3: VL);
4651
4652 SDValue OffsetVec =
4653 DAG.getConstant(Val: VecVT.getScalarSizeInBits(), DL, VT: WideContainerVT);
4654 Interleaved = DAG.getNode(Opcode: RISCVISD::SHL_VL, DL, VT: WideContainerVT,
4655 N1: Interleaved, N2: OffsetVec, N3: Passthru, N4: Mask, N5: VL);
4656 } else {
4657 // FIXME: We should freeze the odd vector here. We already handled the case
4658 // of provably undef/poison above.
4659
4660 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4661 // vwaddu.vv
4662 Interleaved = DAG.getNode(Opcode: RISCVISD::VWADDU_VL, DL, VT: WideContainerVT, N1: EvenV,
4663 N2: OddV, N3: Passthru, N4: Mask, N5: VL);
4664
4665 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4666 SDValue AllOnesVec = DAG.getSplatVector(
4667 VT: VecContainerVT, DL, Op: DAG.getAllOnesConstant(DL, VT: Subtarget.getXLenVT()));
4668 SDValue OddsMul = DAG.getNode(Opcode: RISCVISD::VWMULU_VL, DL, VT: WideContainerVT,
4669 N1: OddV, N2: AllOnesVec, N3: Passthru, N4: Mask, N5: VL);
4670
4671 // Add the two together so we get
4672 // (OddV * 0xff...ff) + (OddV + EvenV)
4673 // = (OddV * 0x100...00) + EvenV
4674 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4675 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4676 Interleaved = DAG.getNode(Opcode: RISCVISD::ADD_VL, DL, VT: WideContainerVT,
4677 N1: Interleaved, N2: OddsMul, N3: Passthru, N4: Mask, N5: VL);
4678 }
4679
4680 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4681 MVT ResultContainerVT = MVT::getVectorVT(
4682 VT: VecVT.getVectorElementType(), // Make sure to use original type
4683 EC: VecContainerVT.getVectorElementCount().multiplyCoefficientBy(RHS: 2));
4684 Interleaved = DAG.getBitcast(VT: ResultContainerVT, V: Interleaved);
4685
4686 // Convert back to a fixed vector if needed
4687 MVT ResultVT =
4688 MVT::getVectorVT(VT: VecVT.getVectorElementType(),
4689 EC: VecVT.getVectorElementCount().multiplyCoefficientBy(RHS: 2));
4690 if (ResultVT.isFixedLengthVector())
4691 Interleaved =
4692 convertFromScalableVector(VT: ResultVT, V: Interleaved, DAG, Subtarget);
4693
4694 return Interleaved;
4695}
4696
4697// If we have a vector of bits that we want to reverse, we can use a vbrev on a
4698// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4699static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN,
4700 SelectionDAG &DAG,
4701 const RISCVSubtarget &Subtarget) {
4702 SDLoc DL(SVN);
4703 MVT VT = SVN->getSimpleValueType(ResNo: 0);
4704 SDValue V = SVN->getOperand(Num: 0);
4705 unsigned NumElts = VT.getVectorNumElements();
4706
4707 assert(VT.getVectorElementType() == MVT::i1);
4708
4709 if (!ShuffleVectorInst::isReverseMask(Mask: SVN->getMask(),
4710 NumSrcElts: SVN->getMask().size()) ||
4711 !SVN->getOperand(Num: 1).isUndef())
4712 return SDValue();
4713
4714 unsigned ViaEltSize = std::max(a: (uint64_t)8, b: PowerOf2Ceil(A: NumElts));
4715 EVT ViaVT = EVT::getVectorVT(
4716 Context&: *DAG.getContext(), VT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ViaEltSize), NumElements: 1);
4717 EVT ViaBitVT =
4718 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
4719
4720 // If we don't have zvbb or the larger element type > ELEN, the operation will
4721 // be illegal.
4722 if (!Subtarget.getTargetLowering()->isOperationLegalOrCustom(Op: ISD::BITREVERSE,
4723 VT: ViaVT) ||
4724 !Subtarget.getTargetLowering()->isTypeLegal(VT: ViaBitVT))
4725 return SDValue();
4726
4727 // If the bit vector doesn't fit exactly into the larger element type, we need
4728 // to insert it into the larger vector and then shift up the reversed bits
4729 // afterwards to get rid of the gap introduced.
4730 if (ViaEltSize > NumElts)
4731 V = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ViaBitVT, N1: DAG.getUNDEF(VT: ViaBitVT),
4732 N2: V, N3: DAG.getVectorIdxConstant(Val: 0, DL));
4733
4734 SDValue Res =
4735 DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT: ViaVT, Operand: DAG.getBitcast(VT: ViaVT, V));
4736
4737 // Shift up the reversed bits if the vector didn't exactly fit into the larger
4738 // element type.
4739 if (ViaEltSize > NumElts)
4740 Res = DAG.getNode(Opcode: ISD::SRL, DL, VT: ViaVT, N1: Res,
4741 N2: DAG.getConstant(Val: ViaEltSize - NumElts, DL, VT: ViaVT));
4742
4743 Res = DAG.getBitcast(VT: ViaBitVT, V: Res);
4744
4745 if (ViaEltSize > NumElts)
4746 Res = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: Res,
4747 N2: DAG.getVectorIdxConstant(Val: 0, DL));
4748 return Res;
4749}
4750
4751static bool isLegalBitRotate(ShuffleVectorSDNode *SVN,
4752 SelectionDAG &DAG,
4753 const RISCVSubtarget &Subtarget,
4754 MVT &RotateVT, unsigned &RotateAmt) {
4755 SDLoc DL(SVN);
4756
4757 EVT VT = SVN->getValueType(ResNo: 0);
4758 unsigned NumElts = VT.getVectorNumElements();
4759 unsigned EltSizeInBits = VT.getScalarSizeInBits();
4760 unsigned NumSubElts;
4761 if (!ShuffleVectorInst::isBitRotateMask(Mask: SVN->getMask(), EltSizeInBits, MinSubElts: 2,
4762 MaxSubElts: NumElts, NumSubElts, RotateAmt))
4763 return false;
4764 RotateVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: EltSizeInBits * NumSubElts),
4765 NumElements: NumElts / NumSubElts);
4766
4767 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
4768 return Subtarget.getTargetLowering()->isTypeLegal(VT: RotateVT);
4769}
4770
4771// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4772// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4773// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
4774static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN,
4775 SelectionDAG &DAG,
4776 const RISCVSubtarget &Subtarget) {
4777 SDLoc DL(SVN);
4778
4779 EVT VT = SVN->getValueType(ResNo: 0);
4780 unsigned RotateAmt;
4781 MVT RotateVT;
4782 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4783 return SDValue();
4784
4785 SDValue Op = DAG.getBitcast(VT: RotateVT, V: SVN->getOperand(Num: 0));
4786
4787 SDValue Rotate;
4788 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
4789 // so canonicalize to vrev8.
4790 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
4791 Rotate = DAG.getNode(Opcode: ISD::BSWAP, DL, VT: RotateVT, Operand: Op);
4792 else
4793 Rotate = DAG.getNode(Opcode: ISD::ROTL, DL, VT: RotateVT, N1: Op,
4794 N2: DAG.getConstant(Val: RotateAmt, DL, VT: RotateVT));
4795
4796 return DAG.getBitcast(VT, V: Rotate);
4797}
4798
4799// If compiling with an exactly known VLEN, see if we can split a
4800// shuffle on m2 or larger into a small number of m1 sized shuffles
4801// which write each destination registers exactly once.
4802static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN,
4803 SelectionDAG &DAG,
4804 const RISCVSubtarget &Subtarget) {
4805 SDLoc DL(SVN);
4806 MVT VT = SVN->getSimpleValueType(ResNo: 0);
4807 SDValue V1 = SVN->getOperand(Num: 0);
4808 SDValue V2 = SVN->getOperand(Num: 1);
4809 ArrayRef<int> Mask = SVN->getMask();
4810 unsigned NumElts = VT.getVectorNumElements();
4811
4812 // If we don't know exact data layout, not much we can do. If this
4813 // is already m1 or smaller, no point in splitting further.
4814 const auto VLen = Subtarget.getRealVLen();
4815 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
4816 return SDValue();
4817
4818 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
4819 // expansion for.
4820 unsigned RotateAmt;
4821 MVT RotateVT;
4822 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4823 return SDValue();
4824
4825 MVT ElemVT = VT.getVectorElementType();
4826 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4827 unsigned VRegsPerSrc = NumElts / ElemsPerVReg;
4828
4829 SmallVector<std::pair<int, SmallVector<int>>>
4830 OutMasks(VRegsPerSrc, {-1, {}});
4831
4832 // Check if our mask can be done as a 1-to-1 mapping from source
4833 // to destination registers in the group without needing to
4834 // write each destination more than once.
4835 for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {
4836 int DstVecIdx = DstIdx / ElemsPerVReg;
4837 int DstSubIdx = DstIdx % ElemsPerVReg;
4838 int SrcIdx = Mask[DstIdx];
4839 if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)
4840 continue;
4841 int SrcVecIdx = SrcIdx / ElemsPerVReg;
4842 int SrcSubIdx = SrcIdx % ElemsPerVReg;
4843 if (OutMasks[DstVecIdx].first == -1)
4844 OutMasks[DstVecIdx].first = SrcVecIdx;
4845 if (OutMasks[DstVecIdx].first != SrcVecIdx)
4846 // Note: This case could easily be handled by keeping track of a chain
4847 // of source values and generating two element shuffles below. This is
4848 // less an implementation question, and more a profitability one.
4849 return SDValue();
4850
4851 OutMasks[DstVecIdx].second.resize(N: ElemsPerVReg, NV: -1);
4852 OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;
4853 }
4854
4855 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4856 MVT OneRegVT = MVT::getVectorVT(VT: ElemVT, NumElements: ElemsPerVReg);
4857 MVT M1VT = getContainerForFixedLengthVector(DAG, VT: OneRegVT, Subtarget);
4858 assert(M1VT == getLMUL1VT(M1VT));
4859 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4860 SDValue Vec = DAG.getUNDEF(VT: ContainerVT);
4861 // The following semantically builds up a fixed length concat_vector
4862 // of the component shuffle_vectors. We eagerly lower to scalable here
4863 // to avoid DAG combining it back to a large shuffle_vector again.
4864 V1 = convertToScalableVector(VT: ContainerVT, V: V1, DAG, Subtarget);
4865 V2 = convertToScalableVector(VT: ContainerVT, V: V2, DAG, Subtarget);
4866 for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {
4867 auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];
4868 if (SrcVecIdx == -1)
4869 continue;
4870 unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
4871 SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;
4872 SDValue SubVec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: M1VT, N1: SrcVec,
4873 N2: DAG.getVectorIdxConstant(Val: ExtractIdx, DL));
4874 SubVec = convertFromScalableVector(VT: OneRegVT, V: SubVec, DAG, Subtarget);
4875 SubVec = DAG.getVectorShuffle(VT: OneRegVT, dl: DL, N1: SubVec, N2: SubVec, Mask: SrcSubMask);
4876 SubVec = convertToScalableVector(VT: M1VT, V: SubVec, DAG, Subtarget);
4877 unsigned InsertIdx = DstVecIdx * NumOpElts;
4878 Vec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ContainerVT, N1: Vec, N2: SubVec,
4879 N3: DAG.getVectorIdxConstant(Val: InsertIdx, DL));
4880 }
4881 return convertFromScalableVector(VT, V: Vec, DAG, Subtarget);
4882}
4883
4884static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
4885 const RISCVSubtarget &Subtarget) {
4886 SDValue V1 = Op.getOperand(i: 0);
4887 SDValue V2 = Op.getOperand(i: 1);
4888 SDLoc DL(Op);
4889 MVT XLenVT = Subtarget.getXLenVT();
4890 MVT VT = Op.getSimpleValueType();
4891 unsigned NumElts = VT.getVectorNumElements();
4892 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val: Op.getNode());
4893
4894 if (VT.getVectorElementType() == MVT::i1) {
4895 // Lower to a vror.vi of a larger element type if possible before we promote
4896 // i1s to i8s.
4897 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4898 return V;
4899 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
4900 return V;
4901
4902 // Promote i1 shuffle to i8 shuffle.
4903 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
4904 V1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WidenVT, Operand: V1);
4905 V2 = V2.isUndef() ? DAG.getUNDEF(VT: WidenVT)
4906 : DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WidenVT, Operand: V2);
4907 SDValue Shuffled = DAG.getVectorShuffle(VT: WidenVT, dl: DL, N1: V1, N2: V2, Mask: SVN->getMask());
4908 return DAG.getSetCC(DL, VT, LHS: Shuffled, RHS: DAG.getConstant(Val: 0, DL, VT: WidenVT),
4909 Cond: ISD::SETNE);
4910 }
4911
4912 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4913
4914 auto [TrueMask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
4915
4916 if (SVN->isSplat()) {
4917 const int Lane = SVN->getSplatIndex();
4918 if (Lane >= 0) {
4919 MVT SVT = VT.getVectorElementType();
4920
4921 // Turn splatted vector load into a strided load with an X0 stride.
4922 SDValue V = V1;
4923 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
4924 // with undef.
4925 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
4926 int Offset = Lane;
4927 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
4928 int OpElements =
4929 V.getOperand(i: 0).getSimpleValueType().getVectorNumElements();
4930 V = V.getOperand(i: Offset / OpElements);
4931 Offset %= OpElements;
4932 }
4933
4934 // We need to ensure the load isn't atomic or volatile.
4935 if (ISD::isNormalLoad(N: V.getNode()) && cast<LoadSDNode>(Val&: V)->isSimple()) {
4936 auto *Ld = cast<LoadSDNode>(Val&: V);
4937 Offset *= SVT.getStoreSize();
4938 SDValue NewAddr = DAG.getMemBasePlusOffset(
4939 Base: Ld->getBasePtr(), Offset: TypeSize::getFixed(ExactSize: Offset), DL);
4940
4941 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
4942 if (SVT.isInteger() && SVT.bitsGT(VT: XLenVT)) {
4943 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4944 SDValue IntID =
4945 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
4946 SDValue Ops[] = {Ld->getChain(),
4947 IntID,
4948 DAG.getUNDEF(ContainerVT),
4949 NewAddr,
4950 DAG.getRegister(RISCV::X0, XLenVT),
4951 VL};
4952 SDValue NewLoad = DAG.getMemIntrinsicNode(
4953 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
4954 DAG.getMachineFunction().getMachineMemOperand(
4955 MMO: Ld->getMemOperand(), Offset, Size: SVT.getStoreSize()));
4956 DAG.makeEquivalentMemoryOrdering(OldLoad: Ld, NewMemOp: NewLoad);
4957 return convertFromScalableVector(VT, V: NewLoad, DAG, Subtarget);
4958 }
4959
4960 // Otherwise use a scalar load and splat. This will give the best
4961 // opportunity to fold a splat into the operation. ISel can turn it into
4962 // the x0 strided load if we aren't able to fold away the select.
4963 if (SVT.isFloatingPoint())
4964 V = DAG.getLoad(VT: SVT, dl: DL, Chain: Ld->getChain(), Ptr: NewAddr,
4965 PtrInfo: Ld->getPointerInfo().getWithOffset(O: Offset),
4966 Alignment: Ld->getOriginalAlign(),
4967 MMOFlags: Ld->getMemOperand()->getFlags());
4968 else
4969 V = DAG.getExtLoad(ExtType: ISD::SEXTLOAD, dl: DL, VT: XLenVT, Chain: Ld->getChain(), Ptr: NewAddr,
4970 PtrInfo: Ld->getPointerInfo().getWithOffset(O: Offset), MemVT: SVT,
4971 Alignment: Ld->getOriginalAlign(),
4972 MMOFlags: Ld->getMemOperand()->getFlags());
4973 DAG.makeEquivalentMemoryOrdering(OldLoad: Ld, NewMemOp: V);
4974
4975 unsigned Opc =
4976 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
4977 SDValue Splat =
4978 DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: V, N3: VL);
4979 return convertFromScalableVector(VT, V: Splat, DAG, Subtarget);
4980 }
4981
4982 V1 = convertToScalableVector(VT: ContainerVT, V: V1, DAG, Subtarget);
4983 assert(Lane < (int)NumElts && "Unexpected lane!");
4984 SDValue Gather = DAG.getNode(Opcode: RISCVISD::VRGATHER_VX_VL, DL, VT: ContainerVT,
4985 N1: V1, N2: DAG.getConstant(Val: Lane, DL, VT: XLenVT),
4986 N3: DAG.getUNDEF(VT: ContainerVT), N4: TrueMask, N5: VL);
4987 return convertFromScalableVector(VT, V: Gather, DAG, Subtarget);
4988 }
4989 }
4990
4991 // For exact VLEN m2 or greater, try to split to m1 operations if we
4992 // can split cleanly.
4993 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
4994 return V;
4995
4996 ArrayRef<int> Mask = SVN->getMask();
4997
4998 if (SDValue V =
4999 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5000 return V;
5001
5002 if (SDValue V =
5003 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5004 return V;
5005
5006 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5007 // available.
5008 if (Subtarget.hasStdExtZvkb())
5009 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5010 return V;
5011
5012 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5013 // be undef which can be handled with a single SLIDEDOWN/UP.
5014 int LoSrc, HiSrc;
5015 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5016 if (Rotation > 0) {
5017 SDValue LoV, HiV;
5018 if (LoSrc >= 0) {
5019 LoV = LoSrc == 0 ? V1 : V2;
5020 LoV = convertToScalableVector(VT: ContainerVT, V: LoV, DAG, Subtarget);
5021 }
5022 if (HiSrc >= 0) {
5023 HiV = HiSrc == 0 ? V1 : V2;
5024 HiV = convertToScalableVector(VT: ContainerVT, V: HiV, DAG, Subtarget);
5025 }
5026
5027 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5028 // to slide LoV up by (NumElts - Rotation).
5029 unsigned InvRotate = NumElts - Rotation;
5030
5031 SDValue Res = DAG.getUNDEF(VT: ContainerVT);
5032 if (HiV) {
5033 // Even though we could use a smaller VL, don't to avoid a vsetivli
5034 // toggle.
5035 Res = getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, Merge: Res, Op: HiV,
5036 Offset: DAG.getConstant(Val: Rotation, DL, VT: XLenVT), Mask: TrueMask, VL);
5037 }
5038 if (LoV)
5039 Res = getVSlideup(DAG, Subtarget, DL, VT: ContainerVT, Merge: Res, Op: LoV,
5040 Offset: DAG.getConstant(Val: InvRotate, DL, VT: XLenVT), Mask: TrueMask, VL,
5041 Policy: RISCVII::TAIL_AGNOSTIC);
5042
5043 return convertFromScalableVector(VT, V: Res, DAG, Subtarget);
5044 }
5045
5046 // If this is a deinterleave and we can widen the vector, then we can use
5047 // vnsrl to deinterleave.
5048 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
5049 return getDeinterleaveViaVNSRL(DL, VT, Src: V1.getOperand(i: 0), EvenElts: Mask[0] == 0,
5050 Subtarget, DAG);
5051 }
5052
5053 if (SDValue V =
5054 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5055 return V;
5056
5057 // Detect an interleave shuffle and lower to
5058 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5059 int EvenSrc, OddSrc;
5060 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5061 // Extract the halves of the vectors.
5062 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5063
5064 int Size = Mask.size();
5065 SDValue EvenV, OddV;
5066 assert(EvenSrc >= 0 && "Undef source?");
5067 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5068 EvenV = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: HalfVT, N1: EvenV,
5069 N2: DAG.getVectorIdxConstant(Val: EvenSrc % Size, DL));
5070
5071 assert(OddSrc >= 0 && "Undef source?");
5072 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5073 OddV = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: HalfVT, N1: OddV,
5074 N2: DAG.getVectorIdxConstant(Val: OddSrc % Size, DL));
5075
5076 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5077 }
5078
5079
5080 // Handle any remaining single source shuffles
5081 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5082 if (V2.isUndef()) {
5083 // We might be able to express the shuffle as a bitrotate. But even if we
5084 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5085 // shifts and a vor will have a higher throughput than a vrgather.
5086 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5087 return V;
5088
5089 if (VT.getScalarSizeInBits() == 8 &&
5090 any_of(Range&: Mask, P: [&](const auto &Idx) { return Idx > 255; })) {
5091 // On such a vector we're unable to use i8 as the index type.
5092 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5093 // may involve vector splitting if we're already at LMUL=8, or our
5094 // user-supplied maximum fixed-length LMUL.
5095 return SDValue();
5096 }
5097
5098 // Base case for the two operand recursion below - handle the worst case
5099 // single source shuffle.
5100 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5101 MVT IndexVT = VT.changeTypeToInteger();
5102 // Since we can't introduce illegal index types at this stage, use i16 and
5103 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5104 // than XLenVT.
5105 if (IndexVT.getScalarType().bitsGT(VT: XLenVT)) {
5106 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5107 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5108 }
5109
5110 // If the mask allows, we can do all the index computation in 16 bits. This
5111 // requires less work and less register pressure at high LMUL, and creates
5112 // smaller constants which may be cheaper to materialize.
5113 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5114 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5115 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5116 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5117 }
5118
5119 MVT IndexContainerVT =
5120 ContainerVT.changeVectorElementType(EltVT: IndexVT.getScalarType());
5121
5122 V1 = convertToScalableVector(VT: ContainerVT, V: V1, DAG, Subtarget);
5123 SmallVector<SDValue> GatherIndicesLHS;
5124 for (int MaskIndex : Mask) {
5125 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5126 GatherIndicesLHS.push_back(Elt: IsLHSIndex
5127 ? DAG.getConstant(Val: MaskIndex, DL, VT: XLenVT)
5128 : DAG.getUNDEF(VT: XLenVT));
5129 }
5130 SDValue LHSIndices = DAG.getBuildVector(VT: IndexVT, DL, Ops: GatherIndicesLHS);
5131 LHSIndices = convertToScalableVector(VT: IndexContainerVT, V: LHSIndices, DAG,
5132 Subtarget);
5133 SDValue Gather = DAG.getNode(Opcode: GatherVVOpc, DL, VT: ContainerVT, N1: V1, N2: LHSIndices,
5134 N3: DAG.getUNDEF(VT: ContainerVT), N4: TrueMask, N5: VL);
5135 return convertFromScalableVector(VT, V: Gather, DAG, Subtarget);
5136 }
5137
5138 // By default we preserve the original operand order, and use a mask to
5139 // select LHS as true and RHS as false. However, since RVV vector selects may
5140 // feature splats but only on the LHS, we may choose to invert our mask and
5141 // instead select between RHS and LHS.
5142 bool SwapOps = DAG.isSplatValue(V: V2) && !DAG.isSplatValue(V: V1);
5143
5144 // Detect shuffles which can be re-expressed as vector selects; these are
5145 // shuffles in which each element in the destination is taken from an element
5146 // at the corresponding index in either source vectors.
5147 bool IsSelect = all_of(Range: enumerate(First&: Mask), P: [&](const auto &MaskIdx) {
5148 int MaskIndex = MaskIdx.value();
5149 return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
5150 });
5151 if (IsSelect) {
5152 // Now construct the mask that will be used by the vselect operation.
5153 SmallVector<SDValue> MaskVals;
5154 for (int MaskIndex : Mask) {
5155 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps;
5156 MaskVals.push_back(Elt: DAG.getConstant(Val: SelectMaskVal, DL, VT: XLenVT));
5157 }
5158
5159 if (SwapOps)
5160 std::swap(a&: V1, b&: V2);
5161
5162 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5163 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5164 SDValue SelectMask = DAG.getBuildVector(VT: MaskVT, DL, Ops: MaskVals);
5165 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: SelectMask, N2: V1, N3: V2);
5166 }
5167
5168 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5169 // merged with a second vrgather.
5170 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5171 SmallVector<SDValue> MaskVals;
5172
5173 // Now construct the mask that will be used by the blended vrgather operation.
5174 // Cconstruct the appropriate indices into each vector.
5175 for (int MaskIndex : Mask) {
5176 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5177 MaskVals.push_back(Elt: DAG.getConstant(Val: SelectMaskVal, DL, VT: XLenVT));
5178 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5179 ShuffleMaskLHS.push_back(Elt: IsLHSOrUndefIndex && MaskIndex >= 0
5180 ? MaskIndex : -1);
5181 ShuffleMaskRHS.push_back(Elt: IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5182 }
5183
5184 if (SwapOps) {
5185 std::swap(a&: V1, b&: V2);
5186 std::swap(LHS&: ShuffleMaskLHS, RHS&: ShuffleMaskRHS);
5187 }
5188
5189 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5190 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5191 SDValue SelectMask = DAG.getBuildVector(VT: MaskVT, DL, Ops: MaskVals);
5192
5193 // Recursively invoke lowering for each operand if we had two
5194 // independent single source shuffles, and then combine the result via a
5195 // vselect. Note that the vselect will likely be folded back into the
5196 // second permute (vrgather, or other) by the post-isel combine.
5197 V1 = DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: DAG.getUNDEF(VT), Mask: ShuffleMaskLHS);
5198 V2 = DAG.getVectorShuffle(VT, dl: DL, N1: V2, N2: DAG.getUNDEF(VT), Mask: ShuffleMaskRHS);
5199 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: SelectMask, N2: V2, N3: V1);
5200}
5201
5202bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
5203 // Support splats for any type. These should type legalize well.
5204 if (ShuffleVectorSDNode::isSplatMask(Mask: M.data(), VT))
5205 return true;
5206
5207 // Only support legal VTs for other shuffles for now.
5208 if (!isTypeLegal(VT))
5209 return false;
5210
5211 MVT SVT = VT.getSimpleVT();
5212
5213 // Not for i1 vectors.
5214 if (SVT.getScalarType() == MVT::i1)
5215 return false;
5216
5217 int Dummy1, Dummy2;
5218 return (isElementRotate(LoSrc&: Dummy1, HiSrc&: Dummy2, Mask: M) > 0) ||
5219 isInterleaveShuffle(Mask: M, VT: SVT, EvenSrc&: Dummy1, OddSrc&: Dummy2, Subtarget);
5220}
5221
5222// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5223// the exponent.
5224SDValue
5225RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5226 SelectionDAG &DAG) const {
5227 MVT VT = Op.getSimpleValueType();
5228 unsigned EltSize = VT.getScalarSizeInBits();
5229 SDValue Src = Op.getOperand(i: 0);
5230 SDLoc DL(Op);
5231 MVT ContainerVT = VT;
5232
5233 SDValue Mask, VL;
5234 if (Op->isVPOpcode()) {
5235 Mask = Op.getOperand(i: 1);
5236 if (VT.isFixedLengthVector())
5237 Mask = convertToScalableVector(VT: getMaskTypeFor(VecVT: ContainerVT), V: Mask, DAG,
5238 Subtarget);
5239 VL = Op.getOperand(i: 2);
5240 }
5241
5242 // We choose FP type that can represent the value if possible. Otherwise, we
5243 // use rounding to zero conversion for correct exponent of the result.
5244 // TODO: Use f16 for i8 when possible?
5245 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5246 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5247 FloatEltVT = MVT::f32;
5248 MVT FloatVT = MVT::getVectorVT(VT: FloatEltVT, EC: VT.getVectorElementCount());
5249
5250 // Legal types should have been checked in the RISCVTargetLowering
5251 // constructor.
5252 // TODO: Splitting may make sense in some cases.
5253 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5254 "Expected legal float type!");
5255
5256 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5257 // The trailing zero count is equal to log2 of this single bit value.
5258 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5259 SDValue Neg = DAG.getNegative(Val: Src, DL, VT);
5260 Src = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Src, N2: Neg);
5261 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5262 SDValue Neg = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT, N1: DAG.getConstant(Val: 0, DL, VT),
5263 N2: Src, N3: Mask, N4: VL);
5264 Src = DAG.getNode(Opcode: ISD::VP_AND, DL, VT, N1: Src, N2: Neg, N3: Mask, N4: VL);
5265 }
5266
5267 // We have a legal FP type, convert to it.
5268 SDValue FloatVal;
5269 if (FloatVT.bitsGT(VT)) {
5270 if (Op->isVPOpcode())
5271 FloatVal = DAG.getNode(Opcode: ISD::VP_UINT_TO_FP, DL, VT: FloatVT, N1: Src, N2: Mask, N3: VL);
5272 else
5273 FloatVal = DAG.getNode(Opcode: ISD::UINT_TO_FP, DL, VT: FloatVT, Operand: Src);
5274 } else {
5275 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5276 if (VT.isFixedLengthVector()) {
5277 ContainerVT = getContainerForFixedLengthVector(VT);
5278 Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget);
5279 }
5280 if (!Op->isVPOpcode())
5281 std::tie(args&: Mask, args&: VL) = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
5282 SDValue RTZRM =
5283 DAG.getTargetConstant(Val: RISCVFPRndMode::RTZ, DL, VT: Subtarget.getXLenVT());
5284 MVT ContainerFloatVT =
5285 MVT::getVectorVT(VT: FloatEltVT, EC: ContainerVT.getVectorElementCount());
5286 FloatVal = DAG.getNode(Opcode: RISCVISD::VFCVT_RM_F_XU_VL, DL, VT: ContainerFloatVT,
5287 N1: Src, N2: Mask, N3: RTZRM, N4: VL);
5288 if (VT.isFixedLengthVector())
5289 FloatVal = convertFromScalableVector(VT: FloatVT, V: FloatVal, DAG, Subtarget);
5290 }
5291 // Bitcast to integer and shift the exponent to the LSB.
5292 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5293 SDValue Bitcast = DAG.getBitcast(VT: IntVT, V: FloatVal);
5294 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5295
5296 SDValue Exp;
5297 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5298 if (Op->isVPOpcode()) {
5299 Exp = DAG.getNode(Opcode: ISD::VP_LSHR, DL, VT: IntVT, N1: Bitcast,
5300 N2: DAG.getConstant(Val: ShiftAmt, DL, VT: IntVT), N3: Mask, N4: VL);
5301 Exp = DAG.getVPZExtOrTrunc(DL, VT, Op: Exp, Mask, EVL: VL);
5302 } else {
5303 Exp = DAG.getNode(Opcode: ISD::SRL, DL, VT: IntVT, N1: Bitcast,
5304 N2: DAG.getConstant(Val: ShiftAmt, DL, VT: IntVT));
5305 if (IntVT.bitsLT(VT))
5306 Exp = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: Exp);
5307 else if (IntVT.bitsGT(VT))
5308 Exp = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Exp);
5309 }
5310
5311 // The exponent contains log2 of the value in biased form.
5312 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5313 // For trailing zeros, we just need to subtract the bias.
5314 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5315 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Exp,
5316 N2: DAG.getConstant(Val: ExponentBias, DL, VT));
5317 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5318 return DAG.getNode(Opcode: ISD::VP_SUB, DL, VT, N1: Exp,
5319 N2: DAG.getConstant(Val: ExponentBias, DL, VT), N3: Mask, N4: VL);
5320
5321 // For leading zeros, we need to remove the bias and convert from log2 to
5322 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5323 unsigned Adjust = ExponentBias + (EltSize - 1);
5324 SDValue Res;
5325 if (Op->isVPOpcode())
5326 Res = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT, N1: DAG.getConstant(Val: Adjust, DL, VT), N2: Exp,
5327 N3: Mask, N4: VL);
5328 else
5329 Res = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: Adjust, DL, VT), N2: Exp);
5330
5331 // The above result with zero input equals to Adjust which is greater than
5332 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5333 if (Op.getOpcode() == ISD::CTLZ)
5334 Res = DAG.getNode(Opcode: ISD::UMIN, DL, VT, N1: Res, N2: DAG.getConstant(Val: EltSize, DL, VT));
5335 else if (Op.getOpcode() == ISD::VP_CTLZ)
5336 Res = DAG.getNode(Opcode: ISD::VP_UMIN, DL, VT, N1: Res,
5337 N2: DAG.getConstant(Val: EltSize, DL, VT), N3: Mask, N4: VL);
5338 return Res;
5339}
5340
5341// While RVV has alignment restrictions, we should always be able to load as a
5342// legal equivalently-sized byte-typed vector instead. This method is
5343// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5344// the load is already correctly-aligned, it returns SDValue().
5345SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5346 SelectionDAG &DAG) const {
5347 auto *Load = cast<LoadSDNode>(Val&: Op);
5348 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5349
5350 if (allowsMemoryAccessForAlignment(Context&: *DAG.getContext(), DL: DAG.getDataLayout(),
5351 VT: Load->getMemoryVT(),
5352 MMO: *Load->getMemOperand()))
5353 return SDValue();
5354
5355 SDLoc DL(Op);
5356 MVT VT = Op.getSimpleValueType();
5357 unsigned EltSizeBits = VT.getScalarSizeInBits();
5358 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5359 "Unexpected unaligned RVV load type");
5360 MVT NewVT =
5361 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5362 assert(NewVT.isValid() &&
5363 "Expecting equally-sized RVV vector types to be legal");
5364 SDValue L = DAG.getLoad(VT: NewVT, dl: DL, Chain: Load->getChain(), Ptr: Load->getBasePtr(),
5365 PtrInfo: Load->getPointerInfo(), Alignment: Load->getOriginalAlign(),
5366 MMOFlags: Load->getMemOperand()->getFlags());
5367 return DAG.getMergeValues(Ops: {DAG.getBitcast(VT, V: L), L.getValue(R: 1)}, dl: DL);
5368}
5369
5370// While RVV has alignment restrictions, we should always be able to store as a
5371// legal equivalently-sized byte-typed vector instead. This method is
5372// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5373// returns SDValue() if the store is already correctly aligned.
5374SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5375 SelectionDAG &DAG) const {
5376 auto *Store = cast<StoreSDNode>(Val&: Op);
5377 assert(Store && Store->getValue().getValueType().isVector() &&
5378 "Expected vector store");
5379
5380 if (allowsMemoryAccessForAlignment(Context&: *DAG.getContext(), DL: DAG.getDataLayout(),
5381 VT: Store->getMemoryVT(),
5382 MMO: *Store->getMemOperand()))
5383 return SDValue();
5384
5385 SDLoc DL(Op);
5386 SDValue StoredVal = Store->getValue();
5387 MVT VT = StoredVal.getSimpleValueType();
5388 unsigned EltSizeBits = VT.getScalarSizeInBits();
5389 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5390 "Unexpected unaligned RVV store type");
5391 MVT NewVT =
5392 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5393 assert(NewVT.isValid() &&
5394 "Expecting equally-sized RVV vector types to be legal");
5395 StoredVal = DAG.getBitcast(VT: NewVT, V: StoredVal);
5396 return DAG.getStore(Chain: Store->getChain(), dl: DL, Val: StoredVal, Ptr: Store->getBasePtr(),
5397 PtrInfo: Store->getPointerInfo(), Alignment: Store->getOriginalAlign(),
5398 MMOFlags: Store->getMemOperand()->getFlags());
5399}
5400
5401static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
5402 const RISCVSubtarget &Subtarget) {
5403 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5404
5405 int64_t Imm = cast<ConstantSDNode>(Val&: Op)->getSExtValue();
5406
5407 // All simm32 constants should be handled by isel.
5408 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5409 // this check redundant, but small immediates are common so this check
5410 // should have better compile time.
5411 if (isInt<32>(x: Imm))
5412 return Op;
5413
5414 // We only need to cost the immediate, if constant pool lowering is enabled.
5415 if (!Subtarget.useConstantPoolForLargeInts())
5416 return Op;
5417
5418 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget);
5419 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5420 return Op;
5421
5422 // Optimizations below are disabled for opt size. If we're optimizing for
5423 // size, use a constant pool.
5424 if (DAG.shouldOptForSize())
5425 return SDValue();
5426
5427 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5428 // that if it will avoid a constant pool.
5429 // It will require an extra temporary register though.
5430 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5431 // low and high 32 bits are the same and bit 31 and 63 are set.
5432 unsigned ShiftAmt, AddOpc;
5433 RISCVMatInt::InstSeq SeqLo =
5434 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
5435 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
5436 return Op;
5437
5438 return SDValue();
5439}
5440
5441static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
5442 const RISCVSubtarget &Subtarget) {
5443 SDLoc dl(Op);
5444 AtomicOrdering FenceOrdering =
5445 static_cast<AtomicOrdering>(Op.getConstantOperandVal(i: 1));
5446 SyncScope::ID FenceSSID =
5447 static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: 2));
5448
5449 if (Subtarget.hasStdExtZtso()) {
5450 // The only fence that needs an instruction is a sequentially-consistent
5451 // cross-thread fence.
5452 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5453 FenceSSID == SyncScope::System)
5454 return Op;
5455
5456 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5457 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5458 }
5459
5460 // singlethread fences only synchronize with signal handlers on the same
5461 // thread and thus only need to preserve instruction order, not actually
5462 // enforce memory ordering.
5463 if (FenceSSID == SyncScope::SingleThread)
5464 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5465 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5466
5467 return Op;
5468}
5469
5470static SDValue lowerSADDSAT_SSUBSAT(SDValue Op, SelectionDAG &DAG) {
5471 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5472 "Unexpected custom legalisation");
5473
5474 // With Zbb, we can widen to i64 and smin/smax with INT32_MAX/MIN.
5475 bool IsAdd = Op.getOpcode() == ISD::SADDSAT;
5476 SDLoc DL(Op);
5477 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5478 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5479 SDValue Result =
5480 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5481
5482 APInt MinVal = APInt::getSignedMinValue(numBits: 32).sext(width: 64);
5483 APInt MaxVal = APInt::getSignedMaxValue(numBits: 32).sext(width: 64);
5484 SDValue SatMin = DAG.getConstant(MinVal, DL, MVT::i64);
5485 SDValue SatMax = DAG.getConstant(MaxVal, DL, MVT::i64);
5486 Result = DAG.getNode(ISD::SMIN, DL, MVT::i64, Result, SatMax);
5487 Result = DAG.getNode(ISD::SMAX, DL, MVT::i64, Result, SatMin);
5488 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5489}
5490
5491static SDValue lowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG) {
5492 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5493 "Unexpected custom legalisation");
5494
5495 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
5496 // sign extend allows overflow of the lower 32 bits to be detected on
5497 // the promoted size.
5498 SDLoc DL(Op);
5499 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5500 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5501 SDValue WideOp = DAG.getNode(Op.getOpcode(), DL, MVT::i64, LHS, RHS);
5502 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5503}
5504
5505// Custom lower i32 SADDO/SSUBO with RV64LegalI32 so we take advantage of addw.
5506static SDValue lowerSADDO_SSUBO(SDValue Op, SelectionDAG &DAG) {
5507 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5508 "Unexpected custom legalisation");
5509 if (isa<ConstantSDNode>(Val: Op.getOperand(i: 1)))
5510 return SDValue();
5511
5512 bool IsAdd = Op.getOpcode() == ISD::SADDO;
5513 SDLoc DL(Op);
5514 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5515 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5516 SDValue WideOp =
5517 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5518 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5519 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, WideOp,
5520 DAG.getValueType(MVT::i32));
5521 SDValue Ovf = DAG.getSetCC(DL, VT: Op.getValue(R: 1).getValueType(), LHS: WideOp, RHS: SExt,
5522 Cond: ISD::SETNE);
5523 return DAG.getMergeValues(Ops: {Res, Ovf}, dl: DL);
5524}
5525
5526// Custom lower i32 SMULO with RV64LegalI32 so we take advantage of mulw.
5527static SDValue lowerSMULO(SDValue Op, SelectionDAG &DAG) {
5528 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5529 "Unexpected custom legalisation");
5530 SDLoc DL(Op);
5531 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5532 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5533 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
5534 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
5535 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Mul,
5536 DAG.getValueType(MVT::i32));
5537 SDValue Ovf = DAG.getSetCC(DL, VT: Op.getValue(R: 1).getValueType(), LHS: Mul, RHS: SExt,
5538 Cond: ISD::SETNE);
5539 return DAG.getMergeValues(Ops: {Res, Ovf}, dl: DL);
5540}
5541
5542SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
5543 SelectionDAG &DAG) const {
5544 SDLoc DL(Op);
5545 MVT VT = Op.getSimpleValueType();
5546 MVT XLenVT = Subtarget.getXLenVT();
5547 unsigned Check = Op.getConstantOperandVal(i: 1);
5548 unsigned TDCMask = 0;
5549 if (Check & fcSNan)
5550 TDCMask |= RISCV::FPMASK_Signaling_NaN;
5551 if (Check & fcQNan)
5552 TDCMask |= RISCV::FPMASK_Quiet_NaN;
5553 if (Check & fcPosInf)
5554 TDCMask |= RISCV::FPMASK_Positive_Infinity;
5555 if (Check & fcNegInf)
5556 TDCMask |= RISCV::FPMASK_Negative_Infinity;
5557 if (Check & fcPosNormal)
5558 TDCMask |= RISCV::FPMASK_Positive_Normal;
5559 if (Check & fcNegNormal)
5560 TDCMask |= RISCV::FPMASK_Negative_Normal;
5561 if (Check & fcPosSubnormal)
5562 TDCMask |= RISCV::FPMASK_Positive_Subnormal;
5563 if (Check & fcNegSubnormal)
5564 TDCMask |= RISCV::FPMASK_Negative_Subnormal;
5565 if (Check & fcPosZero)
5566 TDCMask |= RISCV::FPMASK_Positive_Zero;
5567 if (Check & fcNegZero)
5568 TDCMask |= RISCV::FPMASK_Negative_Zero;
5569
5570 bool IsOneBitMask = isPowerOf2_32(Value: TDCMask);
5571
5572 SDValue TDCMaskV = DAG.getConstant(Val: TDCMask, DL, VT: XLenVT);
5573
5574 if (VT.isVector()) {
5575 SDValue Op0 = Op.getOperand(i: 0);
5576 MVT VT0 = Op.getOperand(i: 0).getSimpleValueType();
5577
5578 if (VT.isScalableVector()) {
5579 MVT DstVT = VT0.changeVectorElementTypeToInteger();
5580 auto [Mask, VL] = getDefaultScalableVLOps(VecVT: VT0, DL, DAG, Subtarget);
5581 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5582 Mask = Op.getOperand(i: 2);
5583 VL = Op.getOperand(i: 3);
5584 }
5585 SDValue FPCLASS = DAG.getNode(Opcode: RISCVISD::FCLASS_VL, DL, VT: DstVT, N1: Op0, N2: Mask,
5586 N3: VL, Flags: Op->getFlags());
5587 if (IsOneBitMask)
5588 return DAG.getSetCC(DL, VT, LHS: FPCLASS,
5589 RHS: DAG.getConstant(Val: TDCMask, DL, VT: DstVT),
5590 Cond: ISD::CondCode::SETEQ);
5591 SDValue AND = DAG.getNode(Opcode: ISD::AND, DL, VT: DstVT, N1: FPCLASS,
5592 N2: DAG.getConstant(Val: TDCMask, DL, VT: DstVT));
5593 return DAG.getSetCC(DL, VT, LHS: AND, RHS: DAG.getConstant(Val: 0, DL, VT: DstVT),
5594 Cond: ISD::SETNE);
5595 }
5596
5597 MVT ContainerVT0 = getContainerForFixedLengthVector(VT: VT0);
5598 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5599 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
5600 auto [Mask, VL] = getDefaultVLOps(VecVT: VT0, ContainerVT: ContainerVT0, DL, DAG, Subtarget);
5601 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5602 Mask = Op.getOperand(i: 2);
5603 MVT MaskContainerVT =
5604 getContainerForFixedLengthVector(VT: Mask.getSimpleValueType());
5605 Mask = convertToScalableVector(VT: MaskContainerVT, V: Mask, DAG, Subtarget);
5606 VL = Op.getOperand(i: 3);
5607 }
5608 Op0 = convertToScalableVector(VT: ContainerVT0, V: Op0, DAG, Subtarget);
5609
5610 SDValue FPCLASS = DAG.getNode(Opcode: RISCVISD::FCLASS_VL, DL, VT: ContainerDstVT, N1: Op0,
5611 N2: Mask, N3: VL, Flags: Op->getFlags());
5612
5613 TDCMaskV = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerDstVT,
5614 N1: DAG.getUNDEF(VT: ContainerDstVT), N2: TDCMaskV, N3: VL);
5615 if (IsOneBitMask) {
5616 SDValue VMSEQ =
5617 DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: ContainerVT,
5618 Ops: {FPCLASS, TDCMaskV, DAG.getCondCode(Cond: ISD::SETEQ),
5619 DAG.getUNDEF(VT: ContainerVT), Mask, VL});
5620 return convertFromScalableVector(VT, V: VMSEQ, DAG, Subtarget);
5621 }
5622 SDValue AND = DAG.getNode(Opcode: RISCVISD::AND_VL, DL, VT: ContainerDstVT, N1: FPCLASS,
5623 N2: TDCMaskV, N3: DAG.getUNDEF(VT: ContainerDstVT), N4: Mask, N5: VL);
5624
5625 SDValue SplatZero = DAG.getConstant(Val: 0, DL, VT: XLenVT);
5626 SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerDstVT,
5627 N1: DAG.getUNDEF(VT: ContainerDstVT), N2: SplatZero, N3: VL);
5628
5629 SDValue VMSNE = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: ContainerVT,
5630 Ops: {AND, SplatZero, DAG.getCondCode(Cond: ISD::SETNE),
5631 DAG.getUNDEF(VT: ContainerVT), Mask, VL});
5632 return convertFromScalableVector(VT, V: VMSNE, DAG, Subtarget);
5633 }
5634
5635 SDValue FCLASS = DAG.getNode(Opcode: RISCVISD::FCLASS, DL, VT: XLenVT, Operand: Op.getOperand(i: 0));
5636 SDValue AND = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: FCLASS, N2: TDCMaskV);
5637 SDValue Res = DAG.getSetCC(DL, VT: XLenVT, LHS: AND, RHS: DAG.getConstant(Val: 0, DL, VT: XLenVT),
5638 Cond: ISD::CondCode::SETNE);
5639 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Res);
5640}
5641
5642// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
5643// operations propagate nans.
5644static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG,
5645 const RISCVSubtarget &Subtarget) {
5646 SDLoc DL(Op);
5647 MVT VT = Op.getSimpleValueType();
5648
5649 SDValue X = Op.getOperand(i: 0);
5650 SDValue Y = Op.getOperand(i: 1);
5651
5652 if (!VT.isVector()) {
5653 MVT XLenVT = Subtarget.getXLenVT();
5654
5655 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
5656 // ensures that when one input is a nan, the other will also be a nan
5657 // allowing the nan to propagate. If both inputs are nan, this will swap the
5658 // inputs which is harmless.
5659
5660 SDValue NewY = Y;
5661 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Op: X)) {
5662 SDValue XIsNonNan = DAG.getSetCC(DL, VT: XLenVT, LHS: X, RHS: X, Cond: ISD::SETOEQ);
5663 NewY = DAG.getSelect(DL, VT, Cond: XIsNonNan, LHS: Y, RHS: X);
5664 }
5665
5666 SDValue NewX = X;
5667 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Op: Y)) {
5668 SDValue YIsNonNan = DAG.getSetCC(DL, VT: XLenVT, LHS: Y, RHS: Y, Cond: ISD::SETOEQ);
5669 NewX = DAG.getSelect(DL, VT, Cond: YIsNonNan, LHS: X, RHS: Y);
5670 }
5671
5672 unsigned Opc =
5673 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
5674 return DAG.getNode(Opcode: Opc, DL, VT, N1: NewX, N2: NewY);
5675 }
5676
5677 // Check no NaNs before converting to fixed vector scalable.
5678 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Op: X);
5679 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Op: Y);
5680
5681 MVT ContainerVT = VT;
5682 if (VT.isFixedLengthVector()) {
5683 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5684 X = convertToScalableVector(VT: ContainerVT, V: X, DAG, Subtarget);
5685 Y = convertToScalableVector(VT: ContainerVT, V: Y, DAG, Subtarget);
5686 }
5687
5688 SDValue Mask, VL;
5689 if (Op->isVPOpcode()) {
5690 Mask = Op.getOperand(i: 2);
5691 if (VT.isFixedLengthVector())
5692 Mask = convertToScalableVector(VT: getMaskTypeFor(VecVT: ContainerVT), V: Mask, DAG,
5693 Subtarget);
5694 VL = Op.getOperand(i: 3);
5695 } else {
5696 std::tie(args&: Mask, args&: VL) = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
5697 }
5698
5699 SDValue NewY = Y;
5700 if (!XIsNeverNan) {
5701 SDValue XIsNonNan = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: Mask.getValueType(),
5702 Ops: {X, X, DAG.getCondCode(Cond: ISD::SETOEQ),
5703 DAG.getUNDEF(VT: ContainerVT), Mask, VL});
5704 NewY = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: XIsNonNan, N2: Y, N3: X,
5705 N4: DAG.getUNDEF(VT: ContainerVT), N5: VL);
5706 }
5707
5708 SDValue NewX = X;
5709 if (!YIsNeverNan) {
5710 SDValue YIsNonNan = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: Mask.getValueType(),
5711 Ops: {Y, Y, DAG.getCondCode(Cond: ISD::SETOEQ),
5712 DAG.getUNDEF(VT: ContainerVT), Mask, VL});
5713 NewX = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: YIsNonNan, N2: X, N3: Y,
5714 N4: DAG.getUNDEF(VT: ContainerVT), N5: VL);
5715 }
5716
5717 unsigned Opc =
5718 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
5719 ? RISCVISD::VFMAX_VL
5720 : RISCVISD::VFMIN_VL;
5721 SDValue Res = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: NewX, N2: NewY,
5722 N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL);
5723 if (VT.isFixedLengthVector())
5724 Res = convertFromScalableVector(VT, V: Res, DAG, Subtarget);
5725 return Res;
5726}
5727
5728/// Get a RISC-V target specified VL op for a given SDNode.
5729static unsigned getRISCVVLOp(SDValue Op) {
5730#define OP_CASE(NODE) \
5731 case ISD::NODE: \
5732 return RISCVISD::NODE##_VL;
5733#define VP_CASE(NODE) \
5734 case ISD::VP_##NODE: \
5735 return RISCVISD::NODE##_VL;
5736 // clang-format off
5737 switch (Op.getOpcode()) {
5738 default:
5739 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
5740 OP_CASE(ADD)
5741 OP_CASE(SUB)
5742 OP_CASE(MUL)
5743 OP_CASE(MULHS)
5744 OP_CASE(MULHU)
5745 OP_CASE(SDIV)
5746 OP_CASE(SREM)
5747 OP_CASE(UDIV)
5748 OP_CASE(UREM)
5749 OP_CASE(SHL)
5750 OP_CASE(SRA)
5751 OP_CASE(SRL)
5752 OP_CASE(ROTL)
5753 OP_CASE(ROTR)
5754 OP_CASE(BSWAP)
5755 OP_CASE(CTTZ)
5756 OP_CASE(CTLZ)
5757 OP_CASE(CTPOP)
5758 OP_CASE(BITREVERSE)
5759 OP_CASE(SADDSAT)
5760 OP_CASE(UADDSAT)
5761 OP_CASE(SSUBSAT)
5762 OP_CASE(USUBSAT)
5763 OP_CASE(AVGFLOORU)
5764 OP_CASE(AVGCEILU)
5765 OP_CASE(FADD)
5766 OP_CASE(FSUB)
5767 OP_CASE(FMUL)
5768 OP_CASE(FDIV)
5769 OP_CASE(FNEG)
5770 OP_CASE(FABS)
5771 OP_CASE(FSQRT)
5772 OP_CASE(SMIN)
5773 OP_CASE(SMAX)
5774 OP_CASE(UMIN)
5775 OP_CASE(UMAX)
5776 OP_CASE(STRICT_FADD)
5777 OP_CASE(STRICT_FSUB)
5778 OP_CASE(STRICT_FMUL)
5779 OP_CASE(STRICT_FDIV)
5780 OP_CASE(STRICT_FSQRT)
5781 VP_CASE(ADD) // VP_ADD
5782 VP_CASE(SUB) // VP_SUB
5783 VP_CASE(MUL) // VP_MUL
5784 VP_CASE(SDIV) // VP_SDIV
5785 VP_CASE(SREM) // VP_SREM
5786 VP_CASE(UDIV) // VP_UDIV
5787 VP_CASE(UREM) // VP_UREM
5788 VP_CASE(SHL) // VP_SHL
5789 VP_CASE(FADD) // VP_FADD
5790 VP_CASE(FSUB) // VP_FSUB
5791 VP_CASE(FMUL) // VP_FMUL
5792 VP_CASE(FDIV) // VP_FDIV
5793 VP_CASE(FNEG) // VP_FNEG
5794 VP_CASE(FABS) // VP_FABS
5795 VP_CASE(SMIN) // VP_SMIN
5796 VP_CASE(SMAX) // VP_SMAX
5797 VP_CASE(UMIN) // VP_UMIN
5798 VP_CASE(UMAX) // VP_UMAX
5799 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
5800 VP_CASE(SETCC) // VP_SETCC
5801 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
5802 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
5803 VP_CASE(BITREVERSE) // VP_BITREVERSE
5804 VP_CASE(SADDSAT) // VP_SADDSAT
5805 VP_CASE(UADDSAT) // VP_UADDSAT
5806 VP_CASE(SSUBSAT) // VP_SSUBSAT
5807 VP_CASE(USUBSAT) // VP_USUBSAT
5808 VP_CASE(BSWAP) // VP_BSWAP
5809 VP_CASE(CTLZ) // VP_CTLZ
5810 VP_CASE(CTTZ) // VP_CTTZ
5811 VP_CASE(CTPOP) // VP_CTPOP
5812 case ISD::CTLZ_ZERO_UNDEF:
5813 case ISD::VP_CTLZ_ZERO_UNDEF:
5814 return RISCVISD::CTLZ_VL;
5815 case ISD::CTTZ_ZERO_UNDEF:
5816 case ISD::VP_CTTZ_ZERO_UNDEF:
5817 return RISCVISD::CTTZ_VL;
5818 case ISD::FMA:
5819 case ISD::VP_FMA:
5820 return RISCVISD::VFMADD_VL;
5821 case ISD::STRICT_FMA:
5822 return RISCVISD::STRICT_VFMADD_VL;
5823 case ISD::AND:
5824 case ISD::VP_AND:
5825 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5826 return RISCVISD::VMAND_VL;
5827 return RISCVISD::AND_VL;
5828 case ISD::OR:
5829 case ISD::VP_OR:
5830 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5831 return RISCVISD::VMOR_VL;
5832 return RISCVISD::OR_VL;
5833 case ISD::XOR:
5834 case ISD::VP_XOR:
5835 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5836 return RISCVISD::VMXOR_VL;
5837 return RISCVISD::XOR_VL;
5838 case ISD::VP_SELECT:
5839 case ISD::VP_MERGE:
5840 return RISCVISD::VMERGE_VL;
5841 case ISD::VP_ASHR:
5842 return RISCVISD::SRA_VL;
5843 case ISD::VP_LSHR:
5844 return RISCVISD::SRL_VL;
5845 case ISD::VP_SQRT:
5846 return RISCVISD::FSQRT_VL;
5847 case ISD::VP_SIGN_EXTEND:
5848 return RISCVISD::VSEXT_VL;
5849 case ISD::VP_ZERO_EXTEND:
5850 return RISCVISD::VZEXT_VL;
5851 case ISD::VP_FP_TO_SINT:
5852 return RISCVISD::VFCVT_RTZ_X_F_VL;
5853 case ISD::VP_FP_TO_UINT:
5854 return RISCVISD::VFCVT_RTZ_XU_F_VL;
5855 case ISD::FMINNUM:
5856 case ISD::VP_FMINNUM:
5857 return RISCVISD::VFMIN_VL;
5858 case ISD::FMAXNUM:
5859 case ISD::VP_FMAXNUM:
5860 return RISCVISD::VFMAX_VL;
5861 case ISD::LRINT:
5862 case ISD::VP_LRINT:
5863 case ISD::LLRINT:
5864 case ISD::VP_LLRINT:
5865 return RISCVISD::VFCVT_X_F_VL;
5866 }
5867 // clang-format on
5868#undef OP_CASE
5869#undef VP_CASE
5870}
5871
5872/// Return true if a RISC-V target specified op has a merge operand.
5873static bool hasMergeOp(unsigned Opcode) {
5874 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5875 Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
5876 "not a RISC-V target specific op");
5877 static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
5878 126 &&
5879 RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
5880 ISD::FIRST_TARGET_STRICTFP_OPCODE ==
5881 21 &&
5882 "adding target specific op should update this function");
5883 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
5884 return true;
5885 if (Opcode == RISCVISD::FCOPYSIGN_VL)
5886 return true;
5887 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
5888 return true;
5889 if (Opcode == RISCVISD::SETCC_VL)
5890 return true;
5891 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
5892 return true;
5893 if (Opcode == RISCVISD::VMERGE_VL)
5894 return true;
5895 return false;
5896}
5897
5898/// Return true if a RISC-V target specified op has a mask operand.
5899static bool hasMaskOp(unsigned Opcode) {
5900 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5901 Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
5902 "not a RISC-V target specific op");
5903 static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
5904 126 &&
5905 RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
5906 ISD::FIRST_TARGET_STRICTFP_OPCODE ==
5907 21 &&
5908 "adding target specific op should update this function");
5909 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
5910 return true;
5911 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
5912 return true;
5913 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
5914 Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL)
5915 return true;
5916 return false;
5917}
5918
5919static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) {
5920 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: Op.getValueType());
5921 SDLoc DL(Op);
5922
5923 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
5924 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
5925
5926 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5927 if (!Op.getOperand(i: j).getValueType().isVector()) {
5928 LoOperands[j] = Op.getOperand(i: j);
5929 HiOperands[j] = Op.getOperand(i: j);
5930 continue;
5931 }
5932 std::tie(args&: LoOperands[j], args&: HiOperands[j]) =
5933 DAG.SplitVector(N: Op.getOperand(i: j), DL);
5934 }
5935
5936 SDValue LoRes =
5937 DAG.getNode(Opcode: Op.getOpcode(), DL, VT: LoVT, Ops: LoOperands, Flags: Op->getFlags());
5938 SDValue HiRes =
5939 DAG.getNode(Opcode: Op.getOpcode(), DL, VT: HiVT, Ops: HiOperands, Flags: Op->getFlags());
5940
5941 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: Op.getValueType(), N1: LoRes, N2: HiRes);
5942}
5943
5944static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG) {
5945 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
5946 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: Op.getValueType());
5947 SDLoc DL(Op);
5948
5949 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
5950 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
5951
5952 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5953 if (ISD::getVPExplicitVectorLengthIdx(Opcode: Op.getOpcode()) == j) {
5954 std::tie(args&: LoOperands[j], args&: HiOperands[j]) =
5955 DAG.SplitEVL(N: Op.getOperand(i: j), VecVT: Op.getValueType(), DL);
5956 continue;
5957 }
5958 if (!Op.getOperand(i: j).getValueType().isVector()) {
5959 LoOperands[j] = Op.getOperand(i: j);
5960 HiOperands[j] = Op.getOperand(i: j);
5961 continue;
5962 }
5963 std::tie(args&: LoOperands[j], args&: HiOperands[j]) =
5964 DAG.SplitVector(N: Op.getOperand(i: j), DL);
5965 }
5966
5967 SDValue LoRes =
5968 DAG.getNode(Opcode: Op.getOpcode(), DL, VT: LoVT, Ops: LoOperands, Flags: Op->getFlags());
5969 SDValue HiRes =
5970 DAG.getNode(Opcode: Op.getOpcode(), DL, VT: HiVT, Ops: HiOperands, Flags: Op->getFlags());
5971
5972 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: Op.getValueType(), N1: LoRes, N2: HiRes);
5973}
5974
5975static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG) {
5976 SDLoc DL(Op);
5977
5978 auto [Lo, Hi] = DAG.SplitVector(N: Op.getOperand(i: 1), DL);
5979 auto [MaskLo, MaskHi] = DAG.SplitVector(N: Op.getOperand(i: 2), DL);
5980 auto [EVLLo, EVLHi] =
5981 DAG.SplitEVL(N: Op.getOperand(i: 3), VecVT: Op.getOperand(i: 1).getValueType(), DL);
5982
5983 SDValue ResLo =
5984 DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(),
5985 Ops: {Op.getOperand(i: 0), Lo, MaskLo, EVLLo}, Flags: Op->getFlags());
5986 return DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(),
5987 Ops: {ResLo, Hi, MaskHi, EVLHi}, Flags: Op->getFlags());
5988}
5989
5990static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG) {
5991
5992 assert(Op->isStrictFPOpcode());
5993
5994 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: Op->getValueType(ResNo: 0));
5995
5996 SDVTList LoVTs = DAG.getVTList(VT1: LoVT, VT2: Op->getValueType(ResNo: 1));
5997 SDVTList HiVTs = DAG.getVTList(VT1: HiVT, VT2: Op->getValueType(ResNo: 1));
5998
5999 SDLoc DL(Op);
6000
6001 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
6002 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
6003
6004 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6005 if (!Op.getOperand(i: j).getValueType().isVector()) {
6006 LoOperands[j] = Op.getOperand(i: j);
6007 HiOperands[j] = Op.getOperand(i: j);
6008 continue;
6009 }
6010 std::tie(args&: LoOperands[j], args&: HiOperands[j]) =
6011 DAG.SplitVector(N: Op.getOperand(i: j), DL);
6012 }
6013
6014 SDValue LoRes =
6015 DAG.getNode(Opcode: Op.getOpcode(), DL, VTList: LoVTs, Ops: LoOperands, Flags: Op->getFlags());
6016 HiOperands[0] = LoRes.getValue(R: 1);
6017 SDValue HiRes =
6018 DAG.getNode(Opcode: Op.getOpcode(), DL, VTList: HiVTs, Ops: HiOperands, Flags: Op->getFlags());
6019
6020 SDValue V = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: Op->getValueType(ResNo: 0),
6021 N1: LoRes.getValue(R: 0), N2: HiRes.getValue(R: 0));
6022 return DAG.getMergeValues(Ops: {V, HiRes.getValue(R: 1)}, dl: DL);
6023}
6024
6025SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
6026 SelectionDAG &DAG) const {
6027 switch (Op.getOpcode()) {
6028 default:
6029 report_fatal_error(reason: "unimplemented operand");
6030 case ISD::ATOMIC_FENCE:
6031 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6032 case ISD::GlobalAddress:
6033 return lowerGlobalAddress(Op, DAG);
6034 case ISD::BlockAddress:
6035 return lowerBlockAddress(Op, DAG);
6036 case ISD::ConstantPool:
6037 return lowerConstantPool(Op, DAG);
6038 case ISD::JumpTable:
6039 return lowerJumpTable(Op, DAG);
6040 case ISD::GlobalTLSAddress:
6041 return lowerGlobalTLSAddress(Op, DAG);
6042 case ISD::Constant:
6043 return lowerConstant(Op, DAG, Subtarget);
6044 case ISD::SELECT:
6045 return lowerSELECT(Op, DAG);
6046 case ISD::BRCOND:
6047 return lowerBRCOND(Op, DAG);
6048 case ISD::VASTART:
6049 return lowerVASTART(Op, DAG);
6050 case ISD::FRAMEADDR:
6051 return lowerFRAMEADDR(Op, DAG);
6052 case ISD::RETURNADDR:
6053 return lowerRETURNADDR(Op, DAG);
6054 case ISD::SADDO:
6055 case ISD::SSUBO:
6056 return lowerSADDO_SSUBO(Op, DAG);
6057 case ISD::SMULO:
6058 return lowerSMULO(Op, DAG);
6059 case ISD::SHL_PARTS:
6060 return lowerShiftLeftParts(Op, DAG);
6061 case ISD::SRA_PARTS:
6062 return lowerShiftRightParts(Op, DAG, IsSRA: true);
6063 case ISD::SRL_PARTS:
6064 return lowerShiftRightParts(Op, DAG, IsSRA: false);
6065 case ISD::ROTL:
6066 case ISD::ROTR:
6067 if (Op.getValueType().isFixedLengthVector()) {
6068 assert(Subtarget.hasStdExtZvkb());
6069 return lowerToScalableOp(Op, DAG);
6070 }
6071 assert(Subtarget.hasVendorXTHeadBb() &&
6072 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6073 "Unexpected custom legalization");
6074 // XTHeadBb only supports rotate by constant.
6075 if (!isa<ConstantSDNode>(Val: Op.getOperand(i: 1)))
6076 return SDValue();
6077 return Op;
6078 case ISD::BITCAST: {
6079 SDLoc DL(Op);
6080 EVT VT = Op.getValueType();
6081 SDValue Op0 = Op.getOperand(i: 0);
6082 EVT Op0VT = Op0.getValueType();
6083 MVT XLenVT = Subtarget.getXLenVT();
6084 if (VT == MVT::f16 && Op0VT == MVT::i16 &&
6085 Subtarget.hasStdExtZfhminOrZhinxmin()) {
6086 SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Op0);
6087 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
6088 return FPConv;
6089 }
6090 if (VT == MVT::bf16 && Op0VT == MVT::i16 &&
6091 Subtarget.hasStdExtZfbfmin()) {
6092 SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Op0);
6093 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0);
6094 return FPConv;
6095 }
6096 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6097 Subtarget.hasStdExtFOrZfinx()) {
6098 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6099 SDValue FPConv =
6100 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6101 return FPConv;
6102 }
6103 if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32) {
6104 SDValue Lo, Hi;
6105 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6106 SDValue RetReg =
6107 DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6108 return RetReg;
6109 }
6110
6111 // Consider other scalar<->scalar casts as legal if the types are legal.
6112 // Otherwise expand them.
6113 if (!VT.isVector() && !Op0VT.isVector()) {
6114 if (isTypeLegal(VT) && isTypeLegal(VT: Op0VT))
6115 return Op;
6116 return SDValue();
6117 }
6118
6119 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6120 "Unexpected types");
6121
6122 if (VT.isFixedLengthVector()) {
6123 // We can handle fixed length vector bitcasts with a simple replacement
6124 // in isel.
6125 if (Op0VT.isFixedLengthVector())
6126 return Op;
6127 // When bitcasting from scalar to fixed-length vector, insert the scalar
6128 // into a one-element vector of the result type, and perform a vector
6129 // bitcast.
6130 if (!Op0VT.isVector()) {
6131 EVT BVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: Op0VT, NumElements: 1);
6132 if (!isTypeLegal(VT: BVT))
6133 return SDValue();
6134 return DAG.getBitcast(VT, V: DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: BVT,
6135 N1: DAG.getUNDEF(VT: BVT), N2: Op0,
6136 N3: DAG.getVectorIdxConstant(Val: 0, DL)));
6137 }
6138 return SDValue();
6139 }
6140 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6141 // thus: bitcast the vector to a one-element vector type whose element type
6142 // is the same as the result type, and extract the first element.
6143 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6144 EVT BVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT, NumElements: 1);
6145 if (!isTypeLegal(VT: BVT))
6146 return SDValue();
6147 SDValue BVec = DAG.getBitcast(VT: BVT, V: Op0);
6148 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT, N1: BVec,
6149 N2: DAG.getVectorIdxConstant(Val: 0, DL));
6150 }
6151 return SDValue();
6152 }
6153 case ISD::INTRINSIC_WO_CHAIN:
6154 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6155 case ISD::INTRINSIC_W_CHAIN:
6156 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6157 case ISD::INTRINSIC_VOID:
6158 return LowerINTRINSIC_VOID(Op, DAG);
6159 case ISD::IS_FPCLASS:
6160 return LowerIS_FPCLASS(Op, DAG);
6161 case ISD::BITREVERSE: {
6162 MVT VT = Op.getSimpleValueType();
6163 if (VT.isFixedLengthVector()) {
6164 assert(Subtarget.hasStdExtZvbb());
6165 return lowerToScalableOp(Op, DAG);
6166 }
6167 SDLoc DL(Op);
6168 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6169 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6170 // Expand bitreverse to a bswap(rev8) followed by brev8.
6171 SDValue BSwap = DAG.getNode(Opcode: ISD::BSWAP, DL, VT, Operand: Op.getOperand(i: 0));
6172 return DAG.getNode(Opcode: RISCVISD::BREV8, DL, VT, Operand: BSwap);
6173 }
6174 case ISD::TRUNCATE:
6175 // Only custom-lower vector truncates
6176 if (!Op.getSimpleValueType().isVector())
6177 return Op;
6178 return lowerVectorTruncLike(Op, DAG);
6179 case ISD::ANY_EXTEND:
6180 case ISD::ZERO_EXTEND:
6181 if (Op.getOperand(0).getValueType().isVector() &&
6182 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6183 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ ExtTrueVal: 1);
6184 return lowerFixedLengthVectorExtendToRVV(Op, DAG, ExtendOpc: RISCVISD::VZEXT_VL);
6185 case ISD::SIGN_EXTEND:
6186 if (Op.getOperand(0).getValueType().isVector() &&
6187 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6188 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ ExtTrueVal: -1);
6189 return lowerFixedLengthVectorExtendToRVV(Op, DAG, ExtendOpc: RISCVISD::VSEXT_VL);
6190 case ISD::SPLAT_VECTOR_PARTS:
6191 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6192 case ISD::INSERT_VECTOR_ELT:
6193 return lowerINSERT_VECTOR_ELT(Op, DAG);
6194 case ISD::EXTRACT_VECTOR_ELT:
6195 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6196 case ISD::SCALAR_TO_VECTOR: {
6197 MVT VT = Op.getSimpleValueType();
6198 SDLoc DL(Op);
6199 SDValue Scalar = Op.getOperand(i: 0);
6200 if (VT.getVectorElementType() == MVT::i1) {
6201 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6202 SDValue V = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT: WideVT, Operand: Scalar);
6203 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: V);
6204 }
6205 MVT ContainerVT = VT;
6206 if (VT.isFixedLengthVector())
6207 ContainerVT = getContainerForFixedLengthVector(VT);
6208 SDValue VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second;
6209 Scalar = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getXLenVT(), Operand: Scalar);
6210 SDValue V = DAG.getNode(Opcode: RISCVISD::VMV_S_X_VL, DL, VT: ContainerVT,
6211 N1: DAG.getUNDEF(VT: ContainerVT), N2: Scalar, N3: VL);
6212 if (VT.isFixedLengthVector())
6213 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6214 return V;
6215 }
6216 case ISD::VSCALE: {
6217 MVT XLenVT = Subtarget.getXLenVT();
6218 MVT VT = Op.getSimpleValueType();
6219 SDLoc DL(Op);
6220 SDValue Res = DAG.getNode(Opcode: RISCVISD::READ_VLENB, DL, VT: XLenVT);
6221 // We define our scalable vector types for lmul=1 to use a 64 bit known
6222 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6223 // vscale as VLENB / 8.
6224 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6225 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6226 report_fatal_error(reason: "Support for VLEN==32 is incomplete.");
6227 // We assume VLENB is a multiple of 8. We manually choose the best shift
6228 // here because SimplifyDemandedBits isn't always able to simplify it.
6229 uint64_t Val = Op.getConstantOperandVal(i: 0);
6230 if (isPowerOf2_64(Value: Val)) {
6231 uint64_t Log2 = Log2_64(Value: Val);
6232 if (Log2 < 3)
6233 Res = DAG.getNode(Opcode: ISD::SRL, DL, VT: XLenVT, N1: Res,
6234 N2: DAG.getConstant(Val: 3 - Log2, DL, VT));
6235 else if (Log2 > 3)
6236 Res = DAG.getNode(Opcode: ISD::SHL, DL, VT: XLenVT, N1: Res,
6237 N2: DAG.getConstant(Val: Log2 - 3, DL, VT: XLenVT));
6238 } else if ((Val % 8) == 0) {
6239 // If the multiplier is a multiple of 8, scale it down to avoid needing
6240 // to shift the VLENB value.
6241 Res = DAG.getNode(Opcode: ISD::MUL, DL, VT: XLenVT, N1: Res,
6242 N2: DAG.getConstant(Val: Val / 8, DL, VT: XLenVT));
6243 } else {
6244 SDValue VScale = DAG.getNode(Opcode: ISD::SRL, DL, VT: XLenVT, N1: Res,
6245 N2: DAG.getConstant(Val: 3, DL, VT: XLenVT));
6246 Res = DAG.getNode(Opcode: ISD::MUL, DL, VT: XLenVT, N1: VScale,
6247 N2: DAG.getConstant(Val, DL, VT: XLenVT));
6248 }
6249 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Res);
6250 }
6251 case ISD::FPOWI: {
6252 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6253 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6254 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6255 Op.getOperand(1).getValueType() == MVT::i32) {
6256 SDLoc DL(Op);
6257 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6258 SDValue Powi =
6259 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6260 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6261 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6262 }
6263 return SDValue();
6264 }
6265 case ISD::FMAXIMUM:
6266 case ISD::FMINIMUM:
6267 if (Op.getValueType() == MVT::nxv32f16 &&
6268 (Subtarget.hasVInstructionsF16Minimal() &&
6269 !Subtarget.hasVInstructionsF16()))
6270 return SplitVectorOp(Op, DAG);
6271 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6272 case ISD::FP_EXTEND: {
6273 SDLoc DL(Op);
6274 EVT VT = Op.getValueType();
6275 SDValue Op0 = Op.getOperand(i: 0);
6276 EVT Op0VT = Op0.getValueType();
6277 if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())
6278 return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6279 if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
6280 SDValue FloatVal =
6281 DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6282 return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal);
6283 }
6284
6285 if (!Op.getValueType().isVector())
6286 return Op;
6287 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6288 }
6289 case ISD::FP_ROUND: {
6290 SDLoc DL(Op);
6291 EVT VT = Op.getValueType();
6292 SDValue Op0 = Op.getOperand(i: 0);
6293 EVT Op0VT = Op0.getValueType();
6294 if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin())
6295 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0);
6296 if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() &&
6297 Subtarget.hasStdExtDOrZdinx()) {
6298 SDValue FloatVal =
6299 DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0,
6300 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6301 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal);
6302 }
6303
6304 if (!Op.getValueType().isVector())
6305 return Op;
6306 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6307 }
6308 case ISD::STRICT_FP_ROUND:
6309 case ISD::STRICT_FP_EXTEND:
6310 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6311 case ISD::SINT_TO_FP:
6312 case ISD::UINT_TO_FP:
6313 if (Op.getValueType().isVector() &&
6314 Op.getValueType().getScalarType() == MVT::f16 &&
6315 (Subtarget.hasVInstructionsF16Minimal() &&
6316 !Subtarget.hasVInstructionsF16())) {
6317 if (Op.getValueType() == MVT::nxv32f16)
6318 return SplitVectorOp(Op, DAG);
6319 // int -> f32
6320 SDLoc DL(Op);
6321 MVT NVT =
6322 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6323 SDValue NC = DAG.getNode(Opcode: Op.getOpcode(), DL, VT: NVT, Ops: Op->ops());
6324 // f32 -> f16
6325 return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: Op.getValueType(), N1: NC,
6326 N2: DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true));
6327 }
6328 [[fallthrough]];
6329 case ISD::FP_TO_SINT:
6330 case ISD::FP_TO_UINT:
6331 if (SDValue Op1 = Op.getOperand(i: 0);
6332 Op1.getValueType().isVector() &&
6333 Op1.getValueType().getScalarType() == MVT::f16 &&
6334 (Subtarget.hasVInstructionsF16Minimal() &&
6335 !Subtarget.hasVInstructionsF16())) {
6336 if (Op1.getValueType() == MVT::nxv32f16)
6337 return SplitVectorOp(Op, DAG);
6338 // f16 -> f32
6339 SDLoc DL(Op);
6340 MVT NVT = MVT::getVectorVT(MVT::f32,
6341 Op1.getValueType().getVectorElementCount());
6342 SDValue WidenVec = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: NVT, Operand: Op1);
6343 // f32 -> int
6344 return DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(), Operand: WidenVec);
6345 }
6346 [[fallthrough]];
6347 case ISD::STRICT_FP_TO_SINT:
6348 case ISD::STRICT_FP_TO_UINT:
6349 case ISD::STRICT_SINT_TO_FP:
6350 case ISD::STRICT_UINT_TO_FP: {
6351 // RVV can only do fp<->int conversions to types half/double the size as
6352 // the source. We custom-lower any conversions that do two hops into
6353 // sequences.
6354 MVT VT = Op.getSimpleValueType();
6355 if (!VT.isVector())
6356 return Op;
6357 SDLoc DL(Op);
6358 bool IsStrict = Op->isStrictFPOpcode();
6359 SDValue Src = Op.getOperand(i: 0 + IsStrict);
6360 MVT EltVT = VT.getVectorElementType();
6361 MVT SrcVT = Src.getSimpleValueType();
6362 MVT SrcEltVT = SrcVT.getVectorElementType();
6363 unsigned EltSize = EltVT.getSizeInBits();
6364 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6365 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6366 "Unexpected vector element types");
6367
6368 bool IsInt2FP = SrcEltVT.isInteger();
6369 // Widening conversions
6370 if (EltSize > (2 * SrcEltSize)) {
6371 if (IsInt2FP) {
6372 // Do a regular integer sign/zero extension then convert to float.
6373 MVT IVecVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: EltSize / 2),
6374 EC: VT.getVectorElementCount());
6375 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6376 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6377 ? ISD::ZERO_EXTEND
6378 : ISD::SIGN_EXTEND;
6379 SDValue Ext = DAG.getNode(Opcode: ExtOpcode, DL, VT: IVecVT, Operand: Src);
6380 if (IsStrict)
6381 return DAG.getNode(Opcode: Op.getOpcode(), DL, VTList: Op->getVTList(),
6382 N1: Op.getOperand(i: 0), N2: Ext);
6383 return DAG.getNode(Opcode: Op.getOpcode(), DL, VT, Operand: Ext);
6384 }
6385 // FP2Int
6386 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6387 // Do one doubling fp_extend then complete the operation by converting
6388 // to int.
6389 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6390 if (IsStrict) {
6391 auto [FExt, Chain] =
6392 DAG.getStrictFPExtendOrRound(Op: Src, Chain: Op.getOperand(i: 0), DL, VT: InterimFVT);
6393 return DAG.getNode(Opcode: Op.getOpcode(), DL, VTList: Op->getVTList(), N1: Chain, N2: FExt);
6394 }
6395 SDValue FExt = DAG.getFPExtendOrRound(Op: Src, DL, VT: InterimFVT);
6396 return DAG.getNode(Opcode: Op.getOpcode(), DL, VT, Operand: FExt);
6397 }
6398
6399 // Narrowing conversions
6400 if (SrcEltSize > (2 * EltSize)) {
6401 if (IsInt2FP) {
6402 // One narrowing int_to_fp, then an fp_round.
6403 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
6404 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6405 if (IsStrict) {
6406 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
6407 DAG.getVTList(InterimFVT, MVT::Other),
6408 Op.getOperand(0), Src);
6409 SDValue Chain = Int2FP.getValue(R: 1);
6410 return DAG.getStrictFPExtendOrRound(Op: Int2FP, Chain, DL, VT).first;
6411 }
6412 SDValue Int2FP = DAG.getNode(Opcode: Op.getOpcode(), DL, VT: InterimFVT, Operand: Src);
6413 return DAG.getFPExtendOrRound(Op: Int2FP, DL, VT);
6414 }
6415 // FP2Int
6416 // One narrowing fp_to_int, then truncate the integer. If the float isn't
6417 // representable by the integer, the result is poison.
6418 MVT IVecVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SrcEltSize / 2),
6419 EC: VT.getVectorElementCount());
6420 if (IsStrict) {
6421 SDValue FP2Int =
6422 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
6423 Op.getOperand(0), Src);
6424 SDValue Res = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: FP2Int);
6425 return DAG.getMergeValues(Ops: {Res, FP2Int.getValue(R: 1)}, dl: DL);
6426 }
6427 SDValue FP2Int = DAG.getNode(Opcode: Op.getOpcode(), DL, VT: IVecVT, Operand: Src);
6428 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: FP2Int);
6429 }
6430
6431 // Scalable vectors can exit here. Patterns will handle equally-sized
6432 // conversions halving/doubling ones.
6433 if (!VT.isFixedLengthVector())
6434 return Op;
6435
6436 // For fixed-length vectors we lower to a custom "VL" node.
6437 unsigned RVVOpc = 0;
6438 switch (Op.getOpcode()) {
6439 default:
6440 llvm_unreachable("Impossible opcode");
6441 case ISD::FP_TO_SINT:
6442 RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;
6443 break;
6444 case ISD::FP_TO_UINT:
6445 RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;
6446 break;
6447 case ISD::SINT_TO_FP:
6448 RVVOpc = RISCVISD::SINT_TO_FP_VL;
6449 break;
6450 case ISD::UINT_TO_FP:
6451 RVVOpc = RISCVISD::UINT_TO_FP_VL;
6452 break;
6453 case ISD::STRICT_FP_TO_SINT:
6454 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;
6455 break;
6456 case ISD::STRICT_FP_TO_UINT:
6457 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;
6458 break;
6459 case ISD::STRICT_SINT_TO_FP:
6460 RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;
6461 break;
6462 case ISD::STRICT_UINT_TO_FP:
6463 RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;
6464 break;
6465 }
6466
6467 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6468 MVT SrcContainerVT = getContainerForFixedLengthVector(VT: SrcVT);
6469 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
6470 "Expected same element count");
6471
6472 auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
6473
6474 Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget);
6475 if (IsStrict) {
6476 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6477 Op.getOperand(0), Src, Mask, VL);
6478 SDValue SubVec = convertFromScalableVector(VT, V: Src, DAG, Subtarget);
6479 return DAG.getMergeValues(Ops: {SubVec, Src.getValue(R: 1)}, dl: DL);
6480 }
6481 Src = DAG.getNode(Opcode: RVVOpc, DL, VT: ContainerVT, N1: Src, N2: Mask, N3: VL);
6482 return convertFromScalableVector(VT, V: Src, DAG, Subtarget);
6483 }
6484 case ISD::FP_TO_SINT_SAT:
6485 case ISD::FP_TO_UINT_SAT:
6486 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
6487 case ISD::FP_TO_BF16: {
6488 // Custom lower to ensure the libcall return is passed in an FPR on hard
6489 // float ABIs.
6490 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
6491 SDLoc DL(Op);
6492 MakeLibCallOptions CallOptions;
6493 RTLIB::Libcall LC =
6494 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
6495 SDValue Res =
6496 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6497 if (Subtarget.is64Bit() && !RV64LegalI32)
6498 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6499 return DAG.getBitcast(MVT::i32, Res);
6500 }
6501 case ISD::BF16_TO_FP: {
6502 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
6503 MVT VT = Op.getSimpleValueType();
6504 SDLoc DL(Op);
6505 Op = DAG.getNode(
6506 Opcode: ISD::SHL, DL, VT: Op.getOperand(i: 0).getValueType(), N1: Op.getOperand(i: 0),
6507 N2: DAG.getShiftAmountConstant(Val: 16, VT: Op.getOperand(i: 0).getValueType(), DL));
6508 SDValue Res = Subtarget.is64Bit()
6509 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
6510 : DAG.getBitcast(MVT::f32, Op);
6511 // fp_extend if the target VT is bigger than f32.
6512 if (VT != MVT::f32)
6513 return DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT, Operand: Res);
6514 return Res;
6515 }
6516 case ISD::FP_TO_FP16: {
6517 // Custom lower to ensure the libcall return is passed in an FPR on hard
6518 // float ABIs.
6519 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6520 SDLoc DL(Op);
6521 MakeLibCallOptions CallOptions;
6522 RTLIB::Libcall LC =
6523 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
6524 SDValue Res =
6525 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6526 if (Subtarget.is64Bit() && !RV64LegalI32)
6527 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6528 return DAG.getBitcast(MVT::i32, Res);
6529 }
6530 case ISD::FP16_TO_FP: {
6531 // Custom lower to ensure the libcall argument is passed in an FPR on hard
6532 // float ABIs.
6533 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6534 SDLoc DL(Op);
6535 MakeLibCallOptions CallOptions;
6536 SDValue Arg = Subtarget.is64Bit()
6537 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
6538 Op.getOperand(0))
6539 : DAG.getBitcast(MVT::f32, Op.getOperand(0));
6540 SDValue Res =
6541 makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
6542 .first;
6543 return Res;
6544 }
6545 case ISD::FTRUNC:
6546 case ISD::FCEIL:
6547 case ISD::FFLOOR:
6548 case ISD::FNEARBYINT:
6549 case ISD::FRINT:
6550 case ISD::FROUND:
6551 case ISD::FROUNDEVEN:
6552 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6553 case ISD::LRINT:
6554 case ISD::LLRINT:
6555 return lowerVectorXRINT(Op, DAG, Subtarget);
6556 case ISD::VECREDUCE_ADD:
6557 case ISD::VECREDUCE_UMAX:
6558 case ISD::VECREDUCE_SMAX:
6559 case ISD::VECREDUCE_UMIN:
6560 case ISD::VECREDUCE_SMIN:
6561 return lowerVECREDUCE(Op, DAG);
6562 case ISD::VECREDUCE_AND:
6563 case ISD::VECREDUCE_OR:
6564 case ISD::VECREDUCE_XOR:
6565 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6566 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
6567 return lowerVECREDUCE(Op, DAG);
6568 case ISD::VECREDUCE_FADD:
6569 case ISD::VECREDUCE_SEQ_FADD:
6570 case ISD::VECREDUCE_FMIN:
6571 case ISD::VECREDUCE_FMAX:
6572 case ISD::VECREDUCE_FMAXIMUM:
6573 case ISD::VECREDUCE_FMINIMUM:
6574 return lowerFPVECREDUCE(Op, DAG);
6575 case ISD::VP_REDUCE_ADD:
6576 case ISD::VP_REDUCE_UMAX:
6577 case ISD::VP_REDUCE_SMAX:
6578 case ISD::VP_REDUCE_UMIN:
6579 case ISD::VP_REDUCE_SMIN:
6580 case ISD::VP_REDUCE_FADD:
6581 case ISD::VP_REDUCE_SEQ_FADD:
6582 case ISD::VP_REDUCE_FMIN:
6583 case ISD::VP_REDUCE_FMAX:
6584 if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&
6585 (Subtarget.hasVInstructionsF16Minimal() &&
6586 !Subtarget.hasVInstructionsF16()))
6587 return SplitVectorReductionOp(Op, DAG);
6588 return lowerVPREDUCE(Op, DAG);
6589 case ISD::VP_REDUCE_AND:
6590 case ISD::VP_REDUCE_OR:
6591 case ISD::VP_REDUCE_XOR:
6592 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
6593 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
6594 return lowerVPREDUCE(Op, DAG);
6595 case ISD::UNDEF: {
6596 MVT ContainerVT = getContainerForFixedLengthVector(VT: Op.getSimpleValueType());
6597 return convertFromScalableVector(VT: Op.getSimpleValueType(),
6598 V: DAG.getUNDEF(VT: ContainerVT), DAG, Subtarget);
6599 }
6600 case ISD::INSERT_SUBVECTOR:
6601 return lowerINSERT_SUBVECTOR(Op, DAG);
6602 case ISD::EXTRACT_SUBVECTOR:
6603 return lowerEXTRACT_SUBVECTOR(Op, DAG);
6604 case ISD::VECTOR_DEINTERLEAVE:
6605 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
6606 case ISD::VECTOR_INTERLEAVE:
6607 return lowerVECTOR_INTERLEAVE(Op, DAG);
6608 case ISD::STEP_VECTOR:
6609 return lowerSTEP_VECTOR(Op, DAG);
6610 case ISD::VECTOR_REVERSE:
6611 return lowerVECTOR_REVERSE(Op, DAG);
6612 case ISD::VECTOR_SPLICE:
6613 return lowerVECTOR_SPLICE(Op, DAG);
6614 case ISD::BUILD_VECTOR:
6615 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
6616 case ISD::SPLAT_VECTOR:
6617 if (Op.getValueType().getScalarType() == MVT::f16 &&
6618 (Subtarget.hasVInstructionsF16Minimal() &&
6619 !Subtarget.hasVInstructionsF16())) {
6620 if (Op.getValueType() == MVT::nxv32f16)
6621 return SplitVectorOp(Op, DAG);
6622 SDLoc DL(Op);
6623 SDValue NewScalar =
6624 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6625 SDValue NewSplat = DAG.getNode(
6626 ISD::SPLAT_VECTOR, DL,
6627 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()),
6628 NewScalar);
6629 return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: Op.getValueType(), N1: NewSplat,
6630 N2: DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true));
6631 }
6632 if (Op.getValueType().getVectorElementType() == MVT::i1)
6633 return lowerVectorMaskSplat(Op, DAG);
6634 return SDValue();
6635 case ISD::VECTOR_SHUFFLE:
6636 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
6637 case ISD::CONCAT_VECTORS: {
6638 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
6639 // better than going through the stack, as the default expansion does.
6640 SDLoc DL(Op);
6641 MVT VT = Op.getSimpleValueType();
6642 MVT ContainerVT = VT;
6643 if (VT.isFixedLengthVector())
6644 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
6645
6646 // Recursively split concat_vectors with more than 2 operands:
6647 //
6648 // concat_vector op1, op2, op3, op4
6649 // ->
6650 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
6651 //
6652 // This reduces the length of the chain of vslideups and allows us to
6653 // perform the vslideups at a smaller LMUL, limited to MF2.
6654 if (Op.getNumOperands() > 2 &&
6655 ContainerVT.bitsGE(VT: getLMUL1VT(VT: ContainerVT))) {
6656 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6657 assert(isPowerOf2_32(Op.getNumOperands()));
6658 size_t HalfNumOps = Op.getNumOperands() / 2;
6659 SDValue Lo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: HalfVT,
6660 Ops: Op->ops().take_front(N: HalfNumOps));
6661 SDValue Hi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: HalfVT,
6662 Ops: Op->ops().drop_front(N: HalfNumOps));
6663 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, N1: Lo, N2: Hi);
6664 }
6665
6666 unsigned NumOpElts =
6667 Op.getOperand(i: 0).getSimpleValueType().getVectorMinNumElements();
6668 SDValue Vec = DAG.getUNDEF(VT);
6669 for (const auto &OpIdx : enumerate(First: Op->ops())) {
6670 SDValue SubVec = OpIdx.value();
6671 // Don't insert undef subvectors.
6672 if (SubVec.isUndef())
6673 continue;
6674 Vec =
6675 DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: Vec, N2: SubVec,
6676 N3: DAG.getVectorIdxConstant(Val: OpIdx.index() * NumOpElts, DL));
6677 }
6678 return Vec;
6679 }
6680 case ISD::LOAD:
6681 if (auto V = expandUnalignedRVVLoad(Op, DAG))
6682 return V;
6683 if (Op.getValueType().isFixedLengthVector())
6684 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
6685 return Op;
6686 case ISD::STORE:
6687 if (auto V = expandUnalignedRVVStore(Op, DAG))
6688 return V;
6689 if (Op.getOperand(i: 1).getValueType().isFixedLengthVector())
6690 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
6691 return Op;
6692 case ISD::MLOAD:
6693 case ISD::VP_LOAD:
6694 return lowerMaskedLoad(Op, DAG);
6695 case ISD::MSTORE:
6696 case ISD::VP_STORE:
6697 return lowerMaskedStore(Op, DAG);
6698 case ISD::SELECT_CC: {
6699 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
6700 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
6701 // into separate SETCC+SELECT just like LegalizeDAG.
6702 SDValue Tmp1 = Op.getOperand(i: 0);
6703 SDValue Tmp2 = Op.getOperand(i: 1);
6704 SDValue True = Op.getOperand(i: 2);
6705 SDValue False = Op.getOperand(i: 3);
6706 EVT VT = Op.getValueType();
6707 SDValue CC = Op.getOperand(i: 4);
6708 EVT CmpVT = Tmp1.getValueType();
6709 EVT CCVT =
6710 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: CmpVT);
6711 SDLoc DL(Op);
6712 SDValue Cond =
6713 DAG.getNode(Opcode: ISD::SETCC, DL, VT: CCVT, N1: Tmp1, N2: Tmp2, N3: CC, Flags: Op->getFlags());
6714 return DAG.getSelect(DL, VT, Cond, LHS: True, RHS: False);
6715 }
6716 case ISD::SETCC: {
6717 MVT OpVT = Op.getOperand(i: 0).getSimpleValueType();
6718 if (OpVT.isScalarInteger()) {
6719 MVT VT = Op.getSimpleValueType();
6720 SDValue LHS = Op.getOperand(i: 0);
6721 SDValue RHS = Op.getOperand(i: 1);
6722 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get();
6723 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
6724 "Unexpected CondCode");
6725
6726 SDLoc DL(Op);
6727
6728 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
6729 // convert this to the equivalent of (set(u)ge X, C+1) by using
6730 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
6731 // in a register.
6732 if (isa<ConstantSDNode>(Val: RHS)) {
6733 int64_t Imm = cast<ConstantSDNode>(Val&: RHS)->getSExtValue();
6734 if (Imm != 0 && isInt<12>(x: (uint64_t)Imm + 1)) {
6735 // If this is an unsigned compare and the constant is -1, incrementing
6736 // the constant would change behavior. The result should be false.
6737 if (CCVal == ISD::SETUGT && Imm == -1)
6738 return DAG.getConstant(Val: 0, DL, VT);
6739 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
6740 CCVal = ISD::getSetCCSwappedOperands(Operation: CCVal);
6741 SDValue SetCC = DAG.getSetCC(
6742 DL, VT, LHS, RHS: DAG.getConstant(Val: Imm + 1, DL, VT: OpVT), Cond: CCVal);
6743 return DAG.getLogicalNOT(DL, Val: SetCC, VT);
6744 }
6745 }
6746
6747 // Not a constant we could handle, swap the operands and condition code to
6748 // SETLT/SETULT.
6749 CCVal = ISD::getSetCCSwappedOperands(Operation: CCVal);
6750 return DAG.getSetCC(DL, VT, LHS: RHS, RHS: LHS, Cond: CCVal);
6751 }
6752
6753 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6754 (Subtarget.hasVInstructionsF16Minimal() &&
6755 !Subtarget.hasVInstructionsF16()))
6756 return SplitVectorOp(Op, DAG);
6757
6758 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
6759 }
6760 case ISD::ADD:
6761 case ISD::SUB:
6762 case ISD::MUL:
6763 case ISD::MULHS:
6764 case ISD::MULHU:
6765 case ISD::AND:
6766 case ISD::OR:
6767 case ISD::XOR:
6768 case ISD::SDIV:
6769 case ISD::SREM:
6770 case ISD::UDIV:
6771 case ISD::UREM:
6772 case ISD::BSWAP:
6773 case ISD::CTPOP:
6774 return lowerToScalableOp(Op, DAG);
6775 case ISD::SHL:
6776 case ISD::SRA:
6777 case ISD::SRL:
6778 if (Op.getSimpleValueType().isFixedLengthVector())
6779 return lowerToScalableOp(Op, DAG);
6780 // This can be called for an i32 shift amount that needs to be promoted.
6781 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
6782 "Unexpected custom legalisation");
6783 return SDValue();
6784 case ISD::FADD:
6785 case ISD::FSUB:
6786 case ISD::FMUL:
6787 case ISD::FDIV:
6788 case ISD::FNEG:
6789 case ISD::FABS:
6790 case ISD::FSQRT:
6791 case ISD::FMA:
6792 case ISD::FMINNUM:
6793 case ISD::FMAXNUM:
6794 if (Op.getValueType() == MVT::nxv32f16 &&
6795 (Subtarget.hasVInstructionsF16Minimal() &&
6796 !Subtarget.hasVInstructionsF16()))
6797 return SplitVectorOp(Op, DAG);
6798 [[fallthrough]];
6799 case ISD::AVGFLOORU:
6800 case ISD::AVGCEILU:
6801 case ISD::SMIN:
6802 case ISD::SMAX:
6803 case ISD::UMIN:
6804 case ISD::UMAX:
6805 return lowerToScalableOp(Op, DAG);
6806 case ISD::UADDSAT:
6807 case ISD::USUBSAT:
6808 if (!Op.getValueType().isVector())
6809 return lowerUADDSAT_USUBSAT(Op, DAG);
6810 return lowerToScalableOp(Op, DAG);
6811 case ISD::SADDSAT:
6812 case ISD::SSUBSAT:
6813 if (!Op.getValueType().isVector())
6814 return lowerSADDSAT_SSUBSAT(Op, DAG);
6815 return lowerToScalableOp(Op, DAG);
6816 case ISD::ABDS:
6817 case ISD::ABDU: {
6818 SDLoc dl(Op);
6819 EVT VT = Op->getValueType(ResNo: 0);
6820 SDValue LHS = DAG.getFreeze(V: Op->getOperand(Num: 0));
6821 SDValue RHS = DAG.getFreeze(V: Op->getOperand(Num: 1));
6822 bool IsSigned = Op->getOpcode() == ISD::ABDS;
6823
6824 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
6825 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
6826 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
6827 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
6828 SDValue Max = DAG.getNode(Opcode: MaxOpc, DL: dl, VT, N1: LHS, N2: RHS);
6829 SDValue Min = DAG.getNode(Opcode: MinOpc, DL: dl, VT, N1: LHS, N2: RHS);
6830 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: Min);
6831 }
6832 case ISD::ABS:
6833 case ISD::VP_ABS:
6834 return lowerABS(Op, DAG);
6835 case ISD::CTLZ:
6836 case ISD::CTLZ_ZERO_UNDEF:
6837 case ISD::CTTZ:
6838 case ISD::CTTZ_ZERO_UNDEF:
6839 if (Subtarget.hasStdExtZvbb())
6840 return lowerToScalableOp(Op, DAG);
6841 assert(Op.getOpcode() != ISD::CTTZ);
6842 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6843 case ISD::VSELECT:
6844 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
6845 case ISD::FCOPYSIGN:
6846 if (Op.getValueType() == MVT::nxv32f16 &&
6847 (Subtarget.hasVInstructionsF16Minimal() &&
6848 !Subtarget.hasVInstructionsF16()))
6849 return SplitVectorOp(Op, DAG);
6850 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
6851 case ISD::STRICT_FADD:
6852 case ISD::STRICT_FSUB:
6853 case ISD::STRICT_FMUL:
6854 case ISD::STRICT_FDIV:
6855 case ISD::STRICT_FSQRT:
6856 case ISD::STRICT_FMA:
6857 if (Op.getValueType() == MVT::nxv32f16 &&
6858 (Subtarget.hasVInstructionsF16Minimal() &&
6859 !Subtarget.hasVInstructionsF16()))
6860 return SplitStrictFPVectorOp(Op, DAG);
6861 return lowerToScalableOp(Op, DAG);
6862 case ISD::STRICT_FSETCC:
6863 case ISD::STRICT_FSETCCS:
6864 return lowerVectorStrictFSetcc(Op, DAG);
6865 case ISD::STRICT_FCEIL:
6866 case ISD::STRICT_FRINT:
6867 case ISD::STRICT_FFLOOR:
6868 case ISD::STRICT_FTRUNC:
6869 case ISD::STRICT_FNEARBYINT:
6870 case ISD::STRICT_FROUND:
6871 case ISD::STRICT_FROUNDEVEN:
6872 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6873 case ISD::MGATHER:
6874 case ISD::VP_GATHER:
6875 return lowerMaskedGather(Op, DAG);
6876 case ISD::MSCATTER:
6877 case ISD::VP_SCATTER:
6878 return lowerMaskedScatter(Op, DAG);
6879 case ISD::GET_ROUNDING:
6880 return lowerGET_ROUNDING(Op, DAG);
6881 case ISD::SET_ROUNDING:
6882 return lowerSET_ROUNDING(Op, DAG);
6883 case ISD::EH_DWARF_CFA:
6884 return lowerEH_DWARF_CFA(Op, DAG);
6885 case ISD::VP_SELECT:
6886 case ISD::VP_MERGE:
6887 case ISD::VP_ADD:
6888 case ISD::VP_SUB:
6889 case ISD::VP_MUL:
6890 case ISD::VP_SDIV:
6891 case ISD::VP_UDIV:
6892 case ISD::VP_SREM:
6893 case ISD::VP_UREM:
6894 case ISD::VP_UADDSAT:
6895 case ISD::VP_USUBSAT:
6896 case ISD::VP_SADDSAT:
6897 case ISD::VP_SSUBSAT:
6898 case ISD::VP_LRINT:
6899 case ISD::VP_LLRINT:
6900 return lowerVPOp(Op, DAG);
6901 case ISD::VP_AND:
6902 case ISD::VP_OR:
6903 case ISD::VP_XOR:
6904 return lowerLogicVPOp(Op, DAG);
6905 case ISD::VP_FADD:
6906 case ISD::VP_FSUB:
6907 case ISD::VP_FMUL:
6908 case ISD::VP_FDIV:
6909 case ISD::VP_FNEG:
6910 case ISD::VP_FABS:
6911 case ISD::VP_SQRT:
6912 case ISD::VP_FMA:
6913 case ISD::VP_FMINNUM:
6914 case ISD::VP_FMAXNUM:
6915 case ISD::VP_FCOPYSIGN:
6916 if (Op.getValueType() == MVT::nxv32f16 &&
6917 (Subtarget.hasVInstructionsF16Minimal() &&
6918 !Subtarget.hasVInstructionsF16()))
6919 return SplitVPOp(Op, DAG);
6920 [[fallthrough]];
6921 case ISD::VP_ASHR:
6922 case ISD::VP_LSHR:
6923 case ISD::VP_SHL:
6924 return lowerVPOp(Op, DAG);
6925 case ISD::VP_IS_FPCLASS:
6926 return LowerIS_FPCLASS(Op, DAG);
6927 case ISD::VP_SIGN_EXTEND:
6928 case ISD::VP_ZERO_EXTEND:
6929 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
6930 return lowerVPExtMaskOp(Op, DAG);
6931 return lowerVPOp(Op, DAG);
6932 case ISD::VP_TRUNCATE:
6933 return lowerVectorTruncLike(Op, DAG);
6934 case ISD::VP_FP_EXTEND:
6935 case ISD::VP_FP_ROUND:
6936 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6937 case ISD::VP_SINT_TO_FP:
6938 case ISD::VP_UINT_TO_FP:
6939 if (Op.getValueType().isVector() &&
6940 Op.getValueType().getScalarType() == MVT::f16 &&
6941 (Subtarget.hasVInstructionsF16Minimal() &&
6942 !Subtarget.hasVInstructionsF16())) {
6943 if (Op.getValueType() == MVT::nxv32f16)
6944 return SplitVPOp(Op, DAG);
6945 // int -> f32
6946 SDLoc DL(Op);
6947 MVT NVT =
6948 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6949 auto NC = DAG.getNode(Opcode: Op.getOpcode(), DL, VT: NVT, Ops: Op->ops());
6950 // f32 -> f16
6951 return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: Op.getValueType(), N1: NC,
6952 N2: DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true));
6953 }
6954 [[fallthrough]];
6955 case ISD::VP_FP_TO_SINT:
6956 case ISD::VP_FP_TO_UINT:
6957 if (SDValue Op1 = Op.getOperand(i: 0);
6958 Op1.getValueType().isVector() &&
6959 Op1.getValueType().getScalarType() == MVT::f16 &&
6960 (Subtarget.hasVInstructionsF16Minimal() &&
6961 !Subtarget.hasVInstructionsF16())) {
6962 if (Op1.getValueType() == MVT::nxv32f16)
6963 return SplitVPOp(Op, DAG);
6964 // f16 -> f32
6965 SDLoc DL(Op);
6966 MVT NVT = MVT::getVectorVT(MVT::f32,
6967 Op1.getValueType().getVectorElementCount());
6968 SDValue WidenVec = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: NVT, Operand: Op1);
6969 // f32 -> int
6970 return DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(),
6971 Ops: {WidenVec, Op.getOperand(i: 1), Op.getOperand(i: 2)});
6972 }
6973 return lowerVPFPIntConvOp(Op, DAG);
6974 case ISD::VP_SETCC:
6975 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6976 (Subtarget.hasVInstructionsF16Minimal() &&
6977 !Subtarget.hasVInstructionsF16()))
6978 return SplitVPOp(Op, DAG);
6979 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
6980 return lowerVPSetCCMaskOp(Op, DAG);
6981 [[fallthrough]];
6982 case ISD::VP_SMIN:
6983 case ISD::VP_SMAX:
6984 case ISD::VP_UMIN:
6985 case ISD::VP_UMAX:
6986 case ISD::VP_BITREVERSE:
6987 case ISD::VP_BSWAP:
6988 return lowerVPOp(Op, DAG);
6989 case ISD::VP_CTLZ:
6990 case ISD::VP_CTLZ_ZERO_UNDEF:
6991 if (Subtarget.hasStdExtZvbb())
6992 return lowerVPOp(Op, DAG);
6993 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6994 case ISD::VP_CTTZ:
6995 case ISD::VP_CTTZ_ZERO_UNDEF:
6996 if (Subtarget.hasStdExtZvbb())
6997 return lowerVPOp(Op, DAG);
6998 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6999 case ISD::VP_CTPOP:
7000 return lowerVPOp(Op, DAG);
7001 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7002 return lowerVPStridedLoad(Op, DAG);
7003 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7004 return lowerVPStridedStore(Op, DAG);
7005 case ISD::VP_FCEIL:
7006 case ISD::VP_FFLOOR:
7007 case ISD::VP_FRINT:
7008 case ISD::VP_FNEARBYINT:
7009 case ISD::VP_FROUND:
7010 case ISD::VP_FROUNDEVEN:
7011 case ISD::VP_FROUNDTOZERO:
7012 if (Op.getValueType() == MVT::nxv32f16 &&
7013 (Subtarget.hasVInstructionsF16Minimal() &&
7014 !Subtarget.hasVInstructionsF16()))
7015 return SplitVPOp(Op, DAG);
7016 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7017 case ISD::VP_FMAXIMUM:
7018 case ISD::VP_FMINIMUM:
7019 if (Op.getValueType() == MVT::nxv32f16 &&
7020 (Subtarget.hasVInstructionsF16Minimal() &&
7021 !Subtarget.hasVInstructionsF16()))
7022 return SplitVPOp(Op, DAG);
7023 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7024 case ISD::EXPERIMENTAL_VP_SPLICE:
7025 return lowerVPSpliceExperimental(Op, DAG);
7026 case ISD::EXPERIMENTAL_VP_REVERSE:
7027 return lowerVPReverseExperimental(Op, DAG);
7028 }
7029}
7030
7031static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty,
7032 SelectionDAG &DAG, unsigned Flags) {
7033 return DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: Flags);
7034}
7035
7036static SDValue getTargetNode(BlockAddressSDNode *N, const SDLoc &DL, EVT Ty,
7037 SelectionDAG &DAG, unsigned Flags) {
7038 return DAG.getTargetBlockAddress(BA: N->getBlockAddress(), VT: Ty, Offset: N->getOffset(),
7039 TargetFlags: Flags);
7040}
7041
7042static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty,
7043 SelectionDAG &DAG, unsigned Flags) {
7044 return DAG.getTargetConstantPool(C: N->getConstVal(), VT: Ty, Align: N->getAlign(),
7045 Offset: N->getOffset(), TargetFlags: Flags);
7046}
7047
7048static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty,
7049 SelectionDAG &DAG, unsigned Flags) {
7050 return DAG.getTargetJumpTable(JTI: N->getIndex(), VT: Ty, TargetFlags: Flags);
7051}
7052
7053template <class NodeTy>
7054SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7055 bool IsLocal, bool IsExternWeak) const {
7056 SDLoc DL(N);
7057 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
7058
7059 // When HWASAN is used and tagging of global variables is enabled
7060 // they should be accessed via the GOT, since the tagged address of a global
7061 // is incompatible with existing code models. This also applies to non-pic
7062 // mode.
7063 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7064 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7065 if (IsLocal && !Subtarget.allowTaggedGlobals())
7066 // Use PC-relative addressing to access the symbol. This generates the
7067 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7068 // %pcrel_lo(auipc)).
7069 return DAG.getNode(Opcode: RISCVISD::LLA, DL, VT: Ty, Operand: Addr);
7070
7071 // Use PC-relative addressing to access the GOT for this symbol, then load
7072 // the address from the GOT. This generates the pattern (PseudoLGA sym),
7073 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7074 SDValue Load =
7075 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7076 MachineFunction &MF = DAG.getMachineFunction();
7077 MachineMemOperand *MemOp = MF.getMachineMemOperand(
7078 PtrInfo: MachinePointerInfo::getGOT(MF),
7079 f: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
7080 MachineMemOperand::MOInvariant,
7081 MemTy: LLT(Ty.getSimpleVT()), base_alignment: Align(Ty.getFixedSizeInBits() / 8));
7082 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp});
7083 return Load;
7084 }
7085
7086 switch (getTargetMachine().getCodeModel()) {
7087 default:
7088 report_fatal_error(reason: "Unsupported code model for lowering");
7089 case CodeModel::Small: {
7090 // Generate a sequence for accessing addresses within the first 2 GiB of
7091 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7092 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7093 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7094 SDValue MNHi = DAG.getNode(Opcode: RISCVISD::HI, DL, VT: Ty, Operand: AddrHi);
7095 return DAG.getNode(Opcode: RISCVISD::ADD_LO, DL, VT: Ty, N1: MNHi, N2: AddrLo);
7096 }
7097 case CodeModel::Medium: {
7098 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7099 if (IsExternWeak) {
7100 // An extern weak symbol may be undefined, i.e. have value 0, which may
7101 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7102 // symbol. This generates the pattern (PseudoLGA sym), which expands to
7103 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7104 SDValue Load =
7105 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7106 MachineFunction &MF = DAG.getMachineFunction();
7107 MachineMemOperand *MemOp = MF.getMachineMemOperand(
7108 PtrInfo: MachinePointerInfo::getGOT(MF),
7109 f: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
7110 MachineMemOperand::MOInvariant,
7111 MemTy: LLT(Ty.getSimpleVT()), base_alignment: Align(Ty.getFixedSizeInBits() / 8));
7112 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp});
7113 return Load;
7114 }
7115
7116 // Generate a sequence for accessing addresses within any 2GiB range within
7117 // the address space. This generates the pattern (PseudoLLA sym), which
7118 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
7119 return DAG.getNode(Opcode: RISCVISD::LLA, DL, VT: Ty, Operand: Addr);
7120 }
7121 }
7122}
7123
7124SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
7125 SelectionDAG &DAG) const {
7126 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
7127 assert(N->getOffset() == 0 && "unexpected offset in global node");
7128 const GlobalValue *GV = N->getGlobal();
7129 return getAddr(N, DAG, IsLocal: GV->isDSOLocal(), IsExternWeak: GV->hasExternalWeakLinkage());
7130}
7131
7132SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
7133 SelectionDAG &DAG) const {
7134 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Val&: Op);
7135
7136 return getAddr(N, DAG);
7137}
7138
7139SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
7140 SelectionDAG &DAG) const {
7141 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Val&: Op);
7142
7143 return getAddr(N, DAG);
7144}
7145
7146SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
7147 SelectionDAG &DAG) const {
7148 JumpTableSDNode *N = cast<JumpTableSDNode>(Val&: Op);
7149
7150 return getAddr(N, DAG);
7151}
7152
7153SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
7154 SelectionDAG &DAG,
7155 bool UseGOT) const {
7156 SDLoc DL(N);
7157 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
7158 const GlobalValue *GV = N->getGlobal();
7159 MVT XLenVT = Subtarget.getXLenVT();
7160
7161 if (UseGOT) {
7162 // Use PC-relative addressing to access the GOT for this TLS symbol, then
7163 // load the address from the GOT and add the thread pointer. This generates
7164 // the pattern (PseudoLA_TLS_IE sym), which expands to
7165 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
7166 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: 0);
7167 SDValue Load =
7168 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
7169 MachineFunction &MF = DAG.getMachineFunction();
7170 MachineMemOperand *MemOp = MF.getMachineMemOperand(
7171 PtrInfo: MachinePointerInfo::getGOT(MF),
7172 f: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
7173 MachineMemOperand::MOInvariant,
7174 MemTy: LLT(Ty.getSimpleVT()), base_alignment: Align(Ty.getFixedSizeInBits() / 8));
7175 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp});
7176
7177 // Add the thread pointer.
7178 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7179 return DAG.getNode(Opcode: ISD::ADD, DL, VT: Ty, N1: Load, N2: TPReg);
7180 }
7181
7182 // Generate a sequence for accessing the address relative to the thread
7183 // pointer, with the appropriate adjustment for the thread pointer offset.
7184 // This generates the pattern
7185 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
7186 SDValue AddrHi =
7187 DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: RISCVII::MO_TPREL_HI);
7188 SDValue AddrAdd =
7189 DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: RISCVII::MO_TPREL_ADD);
7190 SDValue AddrLo =
7191 DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: RISCVII::MO_TPREL_LO);
7192
7193 SDValue MNHi = DAG.getNode(Opcode: RISCVISD::HI, DL, VT: Ty, Operand: AddrHi);
7194 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7195 SDValue MNAdd =
7196 DAG.getNode(Opcode: RISCVISD::ADD_TPREL, DL, VT: Ty, N1: MNHi, N2: TPReg, N3: AddrAdd);
7197 return DAG.getNode(Opcode: RISCVISD::ADD_LO, DL, VT: Ty, N1: MNAdd, N2: AddrLo);
7198}
7199
7200SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
7201 SelectionDAG &DAG) const {
7202 SDLoc DL(N);
7203 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
7204 IntegerType *CallTy = Type::getIntNTy(C&: *DAG.getContext(), N: Ty.getSizeInBits());
7205 const GlobalValue *GV = N->getGlobal();
7206
7207 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7208 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
7209 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
7210 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: 0);
7211 SDValue Load =
7212 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
7213
7214 // Prepare argument list to generate call.
7215 ArgListTy Args;
7216 ArgListEntry Entry;
7217 Entry.Node = Load;
7218 Entry.Ty = CallTy;
7219 Args.push_back(x: Entry);
7220
7221 // Setup call to __tls_get_addr.
7222 TargetLowering::CallLoweringInfo CLI(DAG);
7223 CLI.setDebugLoc(DL)
7224 .setChain(DAG.getEntryNode())
7225 .setLibCallee(CC: CallingConv::C, ResultType: CallTy,
7226 Target: DAG.getExternalSymbol(Sym: "__tls_get_addr", VT: Ty),
7227 ArgsList: std::move(Args));
7228
7229 return LowerCallTo(CLI).first;
7230}
7231
7232SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
7233 SelectionDAG &DAG) const {
7234 SDLoc DL(N);
7235 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
7236 const GlobalValue *GV = N->getGlobal();
7237
7238 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7239 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
7240 //
7241 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
7242 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
7243 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
7244 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
7245 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: 0);
7246 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
7247}
7248
7249SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
7250 SelectionDAG &DAG) const {
7251 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
7252 assert(N->getOffset() == 0 && "unexpected offset in global node");
7253
7254 if (DAG.getTarget().useEmulatedTLS())
7255 return LowerToTLSEmulatedModel(GA: N, DAG);
7256
7257 TLSModel::Model Model = getTargetMachine().getTLSModel(GV: N->getGlobal());
7258
7259 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
7260 CallingConv::GHC)
7261 report_fatal_error(reason: "In GHC calling convention TLS is not supported");
7262
7263 SDValue Addr;
7264 switch (Model) {
7265 case TLSModel::LocalExec:
7266 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
7267 break;
7268 case TLSModel::InitialExec:
7269 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
7270 break;
7271 case TLSModel::LocalDynamic:
7272 case TLSModel::GeneralDynamic:
7273 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
7274 : getDynamicTLSAddr(N, DAG);
7275 break;
7276 }
7277
7278 return Addr;
7279}
7280
7281// Return true if Val is equal to (setcc LHS, RHS, CC).
7282// Return false if Val is the inverse of (setcc LHS, RHS, CC).
7283// Otherwise, return std::nullopt.
7284static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
7285 ISD::CondCode CC, SDValue Val) {
7286 assert(Val->getOpcode() == ISD::SETCC);
7287 SDValue LHS2 = Val.getOperand(i: 0);
7288 SDValue RHS2 = Val.getOperand(i: 1);
7289 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val: Val.getOperand(i: 2))->get();
7290
7291 if (LHS == LHS2 && RHS == RHS2) {
7292 if (CC == CC2)
7293 return true;
7294 if (CC == ISD::getSetCCInverse(Operation: CC2, Type: LHS2.getValueType()))
7295 return false;
7296 } else if (LHS == RHS2 && RHS == LHS2) {
7297 CC2 = ISD::getSetCCSwappedOperands(Operation: CC2);
7298 if (CC == CC2)
7299 return true;
7300 if (CC == ISD::getSetCCInverse(Operation: CC2, Type: LHS2.getValueType()))
7301 return false;
7302 }
7303
7304 return std::nullopt;
7305}
7306
7307static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
7308 const RISCVSubtarget &Subtarget) {
7309 SDValue CondV = N->getOperand(Num: 0);
7310 SDValue TrueV = N->getOperand(Num: 1);
7311 SDValue FalseV = N->getOperand(Num: 2);
7312 MVT VT = N->getSimpleValueType(ResNo: 0);
7313 SDLoc DL(N);
7314
7315 if (!Subtarget.hasConditionalMoveFusion()) {
7316 // (select c, -1, y) -> -c | y
7317 if (isAllOnesConstant(V: TrueV)) {
7318 SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
7319 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: DAG.getFreeze(V: FalseV));
7320 }
7321 // (select c, y, -1) -> (c-1) | y
7322 if (isAllOnesConstant(V: FalseV)) {
7323 SDValue Neg = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV,
7324 N2: DAG.getAllOnesConstant(DL, VT));
7325 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: DAG.getFreeze(V: TrueV));
7326 }
7327
7328 // (select c, 0, y) -> (c-1) & y
7329 if (isNullConstant(V: TrueV)) {
7330 SDValue Neg = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV,
7331 N2: DAG.getAllOnesConstant(DL, VT));
7332 return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: DAG.getFreeze(V: FalseV));
7333 }
7334 // (select c, y, 0) -> -c & y
7335 if (isNullConstant(V: FalseV)) {
7336 SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
7337 return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: DAG.getFreeze(V: TrueV));
7338 }
7339 }
7340
7341 // select c, ~x, x --> xor -c, x
7342 if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV)) {
7343 const APInt &TrueVal = TrueV->getAsAPIntVal();
7344 const APInt &FalseVal = FalseV->getAsAPIntVal();
7345 if (~TrueVal == FalseVal) {
7346 SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
7347 return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Neg, N2: FalseV);
7348 }
7349 }
7350
7351 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
7352 // when both truev and falsev are also setcc.
7353 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
7354 FalseV.getOpcode() == ISD::SETCC) {
7355 SDValue LHS = CondV.getOperand(i: 0);
7356 SDValue RHS = CondV.getOperand(i: 1);
7357 ISD::CondCode CC = cast<CondCodeSDNode>(Val: CondV.getOperand(i: 2))->get();
7358
7359 // (select x, x, y) -> x | y
7360 // (select !x, x, y) -> x & y
7361 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, Val: TrueV)) {
7362 return DAG.getNode(Opcode: *MatchResult ? ISD::OR : ISD::AND, DL, VT, N1: TrueV,
7363 N2: DAG.getFreeze(V: FalseV));
7364 }
7365 // (select x, y, x) -> x & y
7366 // (select !x, y, x) -> x | y
7367 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, Val: FalseV)) {
7368 return DAG.getNode(Opcode: *MatchResult ? ISD::AND : ISD::OR, DL, VT,
7369 N1: DAG.getFreeze(V: TrueV), N2: FalseV);
7370 }
7371 }
7372
7373 return SDValue();
7374}
7375
7376// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
7377// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
7378// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
7379// being `0` or `-1`. In such cases we can replace `select` with `and`.
7380// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
7381// than `c0`?
7382static SDValue
7383foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG,
7384 const RISCVSubtarget &Subtarget) {
7385 if (Subtarget.hasShortForwardBranchOpt())
7386 return SDValue();
7387
7388 unsigned SelOpNo = 0;
7389 SDValue Sel = BO->getOperand(Num: 0);
7390 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
7391 SelOpNo = 1;
7392 Sel = BO->getOperand(Num: 1);
7393 }
7394
7395 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
7396 return SDValue();
7397
7398 unsigned ConstSelOpNo = 1;
7399 unsigned OtherSelOpNo = 2;
7400 if (!dyn_cast<ConstantSDNode>(Val: Sel->getOperand(Num: ConstSelOpNo))) {
7401 ConstSelOpNo = 2;
7402 OtherSelOpNo = 1;
7403 }
7404 SDValue ConstSelOp = Sel->getOperand(Num: ConstSelOpNo);
7405 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(Val&: ConstSelOp);
7406 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
7407 return SDValue();
7408
7409 SDValue ConstBinOp = BO->getOperand(Num: SelOpNo ^ 1);
7410 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(Val&: ConstBinOp);
7411 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
7412 return SDValue();
7413
7414 SDLoc DL(Sel);
7415 EVT VT = BO->getValueType(ResNo: 0);
7416
7417 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
7418 if (SelOpNo == 1)
7419 std::swap(a&: NewConstOps[0], b&: NewConstOps[1]);
7420
7421 SDValue NewConstOp =
7422 DAG.FoldConstantArithmetic(Opcode: BO->getOpcode(), DL, VT, Ops: NewConstOps);
7423 if (!NewConstOp)
7424 return SDValue();
7425
7426 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
7427 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
7428 return SDValue();
7429
7430 SDValue OtherSelOp = Sel->getOperand(Num: OtherSelOpNo);
7431 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
7432 if (SelOpNo == 1)
7433 std::swap(a&: NewNonConstOps[0], b&: NewNonConstOps[1]);
7434 SDValue NewNonConstOp = DAG.getNode(Opcode: BO->getOpcode(), DL, VT, Ops: NewNonConstOps);
7435
7436 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
7437 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
7438 return DAG.getSelect(DL, VT, Cond: Sel.getOperand(i: 0), LHS: NewT, RHS: NewF);
7439}
7440
7441SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
7442 SDValue CondV = Op.getOperand(i: 0);
7443 SDValue TrueV = Op.getOperand(i: 1);
7444 SDValue FalseV = Op.getOperand(i: 2);
7445 SDLoc DL(Op);
7446 MVT VT = Op.getSimpleValueType();
7447 MVT XLenVT = Subtarget.getXLenVT();
7448
7449 // Lower vector SELECTs to VSELECTs by splatting the condition.
7450 if (VT.isVector()) {
7451 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
7452 SDValue CondSplat = DAG.getSplat(VT: SplatCondVT, DL, Op: CondV);
7453 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: CondSplat, N2: TrueV, N3: FalseV);
7454 }
7455
7456 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
7457 // nodes to implement the SELECT. Performing the lowering here allows for
7458 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
7459 // sequence or RISCVISD::SELECT_CC node (branch-based select).
7460 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
7461 VT.isScalarInteger()) {
7462 // (select c, t, 0) -> (czero_eqz t, c)
7463 if (isNullConstant(V: FalseV))
7464 return DAG.getNode(Opcode: RISCVISD::CZERO_EQZ, DL, VT, N1: TrueV, N2: CondV);
7465 // (select c, 0, f) -> (czero_nez f, c)
7466 if (isNullConstant(V: TrueV))
7467 return DAG.getNode(Opcode: RISCVISD::CZERO_NEZ, DL, VT, N1: FalseV, N2: CondV);
7468
7469 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
7470 if (TrueV.getOpcode() == ISD::AND &&
7471 (TrueV.getOperand(i: 0) == FalseV || TrueV.getOperand(i: 1) == FalseV))
7472 return DAG.getNode(
7473 Opcode: ISD::OR, DL, VT, N1: TrueV,
7474 N2: DAG.getNode(Opcode: RISCVISD::CZERO_NEZ, DL, VT, N1: FalseV, N2: CondV));
7475 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
7476 if (FalseV.getOpcode() == ISD::AND &&
7477 (FalseV.getOperand(i: 0) == TrueV || FalseV.getOperand(i: 1) == TrueV))
7478 return DAG.getNode(
7479 Opcode: ISD::OR, DL, VT, N1: FalseV,
7480 N2: DAG.getNode(Opcode: RISCVISD::CZERO_EQZ, DL, VT, N1: TrueV, N2: CondV));
7481
7482 // Try some other optimizations before falling back to generic lowering.
7483 if (SDValue V = combineSelectToBinOp(N: Op.getNode(), DAG, Subtarget))
7484 return V;
7485
7486 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
7487 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
7488 if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV)) {
7489 const APInt &TrueVal = TrueV->getAsAPIntVal();
7490 const APInt &FalseVal = FalseV->getAsAPIntVal();
7491 const int TrueValCost = RISCVMatInt::getIntMatCost(
7492 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7493 const int FalseValCost = RISCVMatInt::getIntMatCost(
7494 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7495 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
7496 SDValue LHSVal = DAG.getConstant(
7497 Val: IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
7498 SDValue RHSVal =
7499 DAG.getConstant(Val: IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
7500 SDValue CMOV =
7501 DAG.getNode(Opcode: IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
7502 DL, VT, N1: LHSVal, N2: CondV);
7503 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CMOV, N2: RHSVal);
7504 }
7505
7506 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
7507 // Unless we have the short forward branch optimization.
7508 if (!Subtarget.hasConditionalMoveFusion())
7509 return DAG.getNode(
7510 Opcode: ISD::OR, DL, VT,
7511 N1: DAG.getNode(Opcode: RISCVISD::CZERO_EQZ, DL, VT, N1: TrueV, N2: CondV),
7512 N2: DAG.getNode(Opcode: RISCVISD::CZERO_NEZ, DL, VT, N1: FalseV, N2: CondV));
7513 }
7514
7515 if (SDValue V = combineSelectToBinOp(N: Op.getNode(), DAG, Subtarget))
7516 return V;
7517
7518 if (Op.hasOneUse()) {
7519 unsigned UseOpc = Op->use_begin()->getOpcode();
7520 if (isBinOp(Opcode: UseOpc) && DAG.isSafeToSpeculativelyExecute(Opcode: UseOpc)) {
7521 SDNode *BinOp = *Op->use_begin();
7522 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(BO: *Op->use_begin(),
7523 DAG, Subtarget)) {
7524 DAG.ReplaceAllUsesWith(From: BinOp, To: &NewSel);
7525 return lowerSELECT(Op: NewSel, DAG);
7526 }
7527 }
7528 }
7529
7530 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
7531 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
7532 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(Val&: TrueV);
7533 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(Val&: FalseV);
7534 if (FPTV && FPFV) {
7535 if (FPTV->isExactlyValue(V: 1.0) && FPFV->isExactlyValue(V: 0.0))
7536 return DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT, Operand: CondV);
7537 if (FPTV->isExactlyValue(V: 0.0) && FPFV->isExactlyValue(V: 1.0)) {
7538 SDValue XOR = DAG.getNode(Opcode: ISD::XOR, DL, VT: XLenVT, N1: CondV,
7539 N2: DAG.getConstant(Val: 1, DL, VT: XLenVT));
7540 return DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT, Operand: XOR);
7541 }
7542 }
7543
7544 // If the condition is not an integer SETCC which operates on XLenVT, we need
7545 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
7546 // (select condv, truev, falsev)
7547 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
7548 if (CondV.getOpcode() != ISD::SETCC ||
7549 CondV.getOperand(i: 0).getSimpleValueType() != XLenVT) {
7550 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: XLenVT);
7551 SDValue SetNE = DAG.getCondCode(Cond: ISD::SETNE);
7552
7553 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
7554
7555 return DAG.getNode(Opcode: RISCVISD::SELECT_CC, DL, VT, Ops);
7556 }
7557
7558 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
7559 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
7560 // advantage of the integer compare+branch instructions. i.e.:
7561 // (select (setcc lhs, rhs, cc), truev, falsev)
7562 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
7563 SDValue LHS = CondV.getOperand(i: 0);
7564 SDValue RHS = CondV.getOperand(i: 1);
7565 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: CondV.getOperand(i: 2))->get();
7566
7567 // Special case for a select of 2 constants that have a diffence of 1.
7568 // Normally this is done by DAGCombine, but if the select is introduced by
7569 // type legalization or op legalization, we miss it. Restricting to SETLT
7570 // case for now because that is what signed saturating add/sub need.
7571 // FIXME: We don't need the condition to be SETLT or even a SETCC,
7572 // but we would probably want to swap the true/false values if the condition
7573 // is SETGE/SETLE to avoid an XORI.
7574 if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV) &&
7575 CCVal == ISD::SETLT) {
7576 const APInt &TrueVal = TrueV->getAsAPIntVal();
7577 const APInt &FalseVal = FalseV->getAsAPIntVal();
7578 if (TrueVal - 1 == FalseVal)
7579 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: FalseV);
7580 if (TrueVal + 1 == FalseVal)
7581 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: FalseV, N2: CondV);
7582 }
7583
7584 translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG);
7585 // 1 < x ? x : 1 -> 0 < x ? x : 1
7586 if (isOneConstant(V: LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
7587 RHS == TrueV && LHS == FalseV) {
7588 LHS = DAG.getConstant(Val: 0, DL, VT);
7589 // 0 <u x is the same as x != 0.
7590 if (CCVal == ISD::SETULT) {
7591 std::swap(a&: LHS, b&: RHS);
7592 CCVal = ISD::SETNE;
7593 }
7594 }
7595
7596 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
7597 if (isAllOnesConstant(V: RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
7598 RHS == FalseV) {
7599 RHS = DAG.getConstant(Val: 0, DL, VT);
7600 }
7601
7602 SDValue TargetCC = DAG.getCondCode(Cond: CCVal);
7603
7604 if (isa<ConstantSDNode>(Val: TrueV) && !isa<ConstantSDNode>(Val: FalseV)) {
7605 // (select (setcc lhs, rhs, CC), constant, falsev)
7606 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
7607 std::swap(a&: TrueV, b&: FalseV);
7608 TargetCC = DAG.getCondCode(Cond: ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType()));
7609 }
7610
7611 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
7612 return DAG.getNode(Opcode: RISCVISD::SELECT_CC, DL, VT, Ops);
7613}
7614
7615SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
7616 SDValue CondV = Op.getOperand(i: 1);
7617 SDLoc DL(Op);
7618 MVT XLenVT = Subtarget.getXLenVT();
7619
7620 if (CondV.getOpcode() == ISD::SETCC &&
7621 CondV.getOperand(i: 0).getValueType() == XLenVT) {
7622 SDValue LHS = CondV.getOperand(i: 0);
7623 SDValue RHS = CondV.getOperand(i: 1);
7624 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: CondV.getOperand(i: 2))->get();
7625
7626 translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG);
7627
7628 SDValue TargetCC = DAG.getCondCode(Cond: CCVal);
7629 return DAG.getNode(Opcode: RISCVISD::BR_CC, DL, VT: Op.getValueType(), N1: Op.getOperand(i: 0),
7630 N2: LHS, N3: RHS, N4: TargetCC, N5: Op.getOperand(i: 2));
7631 }
7632
7633 return DAG.getNode(Opcode: RISCVISD::BR_CC, DL, VT: Op.getValueType(), N1: Op.getOperand(i: 0),
7634 N2: CondV, N3: DAG.getConstant(Val: 0, DL, VT: XLenVT),
7635 N4: DAG.getCondCode(Cond: ISD::SETNE), N5: Op.getOperand(i: 2));
7636}
7637
7638SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
7639 MachineFunction &MF = DAG.getMachineFunction();
7640 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
7641
7642 SDLoc DL(Op);
7643 SDValue FI = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(),
7644 VT: getPointerTy(DL: MF.getDataLayout()));
7645
7646 // vastart just stores the address of the VarArgsFrameIndex slot into the
7647 // memory location argument.
7648 const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue();
7649 return DAG.getStore(Chain: Op.getOperand(i: 0), dl: DL, Val: FI, Ptr: Op.getOperand(i: 1),
7650 PtrInfo: MachinePointerInfo(SV));
7651}
7652
7653SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
7654 SelectionDAG &DAG) const {
7655 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7656 MachineFunction &MF = DAG.getMachineFunction();
7657 MachineFrameInfo &MFI = MF.getFrameInfo();
7658 MFI.setFrameAddressIsTaken(true);
7659 Register FrameReg = RI.getFrameRegister(MF);
7660 int XLenInBytes = Subtarget.getXLen() / 8;
7661
7662 EVT VT = Op.getValueType();
7663 SDLoc DL(Op);
7664 SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: FrameReg, VT);
7665 unsigned Depth = Op.getConstantOperandVal(i: 0);
7666 while (Depth--) {
7667 int Offset = -(XLenInBytes * 2);
7668 SDValue Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr,
7669 N2: DAG.getIntPtrConstant(Val: Offset, DL));
7670 FrameAddr =
7671 DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(), Ptr, PtrInfo: MachinePointerInfo());
7672 }
7673 return FrameAddr;
7674}
7675
7676SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
7677 SelectionDAG &DAG) const {
7678 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7679 MachineFunction &MF = DAG.getMachineFunction();
7680 MachineFrameInfo &MFI = MF.getFrameInfo();
7681 MFI.setReturnAddressIsTaken(true);
7682 MVT XLenVT = Subtarget.getXLenVT();
7683 int XLenInBytes = Subtarget.getXLen() / 8;
7684
7685 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
7686 return SDValue();
7687
7688 EVT VT = Op.getValueType();
7689 SDLoc DL(Op);
7690 unsigned Depth = Op.getConstantOperandVal(i: 0);
7691 if (Depth) {
7692 int Off = -XLenInBytes;
7693 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
7694 SDValue Offset = DAG.getConstant(Val: Off, DL, VT);
7695 return DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(),
7696 Ptr: DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr, N2: Offset),
7697 PtrInfo: MachinePointerInfo());
7698 }
7699
7700 // Return the value of the return address register, marking it an implicit
7701 // live-in.
7702 Register Reg = MF.addLiveIn(PReg: RI.getRARegister(), RC: getRegClassFor(VT: XLenVT));
7703 return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg, VT: XLenVT);
7704}
7705
7706SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
7707 SelectionDAG &DAG) const {
7708 SDLoc DL(Op);
7709 SDValue Lo = Op.getOperand(i: 0);
7710 SDValue Hi = Op.getOperand(i: 1);
7711 SDValue Shamt = Op.getOperand(i: 2);
7712 EVT VT = Lo.getValueType();
7713
7714 // if Shamt-XLEN < 0: // Shamt < XLEN
7715 // Lo = Lo << Shamt
7716 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
7717 // else:
7718 // Lo = 0
7719 // Hi = Lo << (Shamt-XLEN)
7720
7721 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
7722 SDValue One = DAG.getConstant(Val: 1, DL, VT);
7723 SDValue MinusXLen = DAG.getConstant(Val: -(int)Subtarget.getXLen(), DL, VT);
7724 SDValue XLenMinus1 = DAG.getConstant(Val: Subtarget.getXLen() - 1, DL, VT);
7725 SDValue ShamtMinusXLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusXLen);
7726 SDValue XLenMinus1Shamt = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: XLenMinus1, N2: Shamt);
7727
7728 SDValue LoTrue = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: Shamt);
7729 SDValue ShiftRight1Lo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: One);
7730 SDValue ShiftRightLo =
7731 DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShiftRight1Lo, N2: XLenMinus1Shamt);
7732 SDValue ShiftLeftHi = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: Shamt);
7733 SDValue HiTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftLeftHi, N2: ShiftRightLo);
7734 SDValue HiFalse = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: ShamtMinusXLen);
7735
7736 SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusXLen, RHS: Zero, Cond: ISD::SETLT);
7737
7738 Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: Zero);
7739 Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse);
7740
7741 SDValue Parts[2] = {Lo, Hi};
7742 return DAG.getMergeValues(Ops: Parts, dl: DL);
7743}
7744
7745SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
7746 bool IsSRA) const {
7747 SDLoc DL(Op);
7748 SDValue Lo = Op.getOperand(i: 0);
7749 SDValue Hi = Op.getOperand(i: 1);
7750 SDValue Shamt = Op.getOperand(i: 2);
7751 EVT VT = Lo.getValueType();
7752
7753 // SRA expansion:
7754 // if Shamt-XLEN < 0: // Shamt < XLEN
7755 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7756 // Hi = Hi >>s Shamt
7757 // else:
7758 // Lo = Hi >>s (Shamt-XLEN);
7759 // Hi = Hi >>s (XLEN-1)
7760 //
7761 // SRL expansion:
7762 // if Shamt-XLEN < 0: // Shamt < XLEN
7763 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7764 // Hi = Hi >>u Shamt
7765 // else:
7766 // Lo = Hi >>u (Shamt-XLEN);
7767 // Hi = 0;
7768
7769 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
7770
7771 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
7772 SDValue One = DAG.getConstant(Val: 1, DL, VT);
7773 SDValue MinusXLen = DAG.getConstant(Val: -(int)Subtarget.getXLen(), DL, VT);
7774 SDValue XLenMinus1 = DAG.getConstant(Val: Subtarget.getXLen() - 1, DL, VT);
7775 SDValue ShamtMinusXLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusXLen);
7776 SDValue XLenMinus1Shamt = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: XLenMinus1, N2: Shamt);
7777
7778 SDValue ShiftRightLo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: Shamt);
7779 SDValue ShiftLeftHi1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: One);
7780 SDValue ShiftLeftHi =
7781 DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShiftLeftHi1, N2: XLenMinus1Shamt);
7782 SDValue LoTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftRightLo, N2: ShiftLeftHi);
7783 SDValue HiTrue = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: Shamt);
7784 SDValue LoFalse = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: ShamtMinusXLen);
7785 SDValue HiFalse =
7786 IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Hi, N2: XLenMinus1) : Zero;
7787
7788 SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusXLen, RHS: Zero, Cond: ISD::SETLT);
7789
7790 Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: LoFalse);
7791 Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse);
7792
7793 SDValue Parts[2] = {Lo, Hi};
7794 return DAG.getMergeValues(Ops: Parts, dl: DL);
7795}
7796
7797// Lower splats of i1 types to SETCC. For each mask vector type, we have a
7798// legal equivalently-sized i8 type, so we can use that as a go-between.
7799SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
7800 SelectionDAG &DAG) const {
7801 SDLoc DL(Op);
7802 MVT VT = Op.getSimpleValueType();
7803 SDValue SplatVal = Op.getOperand(i: 0);
7804 // All-zeros or all-ones splats are handled specially.
7805 if (ISD::isConstantSplatVectorAllOnes(N: Op.getNode())) {
7806 SDValue VL = getDefaultScalableVLOps(VecVT: VT, DL, DAG, Subtarget).second;
7807 return DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT, Operand: VL);
7808 }
7809 if (ISD::isConstantSplatVectorAllZeros(N: Op.getNode())) {
7810 SDValue VL = getDefaultScalableVLOps(VecVT: VT, DL, DAG, Subtarget).second;
7811 return DAG.getNode(Opcode: RISCVISD::VMCLR_VL, DL, VT, Operand: VL);
7812 }
7813 MVT InterVT = VT.changeVectorElementType(MVT::i8);
7814 SplatVal = DAG.getNode(Opcode: ISD::AND, DL, VT: SplatVal.getValueType(), N1: SplatVal,
7815 N2: DAG.getConstant(Val: 1, DL, VT: SplatVal.getValueType()));
7816 SDValue LHS = DAG.getSplatVector(VT: InterVT, DL, Op: SplatVal);
7817 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: InterVT);
7818 return DAG.getSetCC(DL, VT, LHS, RHS: Zero, Cond: ISD::SETNE);
7819}
7820
7821// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
7822// illegal (currently only vXi64 RV32).
7823// FIXME: We could also catch non-constant sign-extended i32 values and lower
7824// them to VMV_V_X_VL.
7825SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
7826 SelectionDAG &DAG) const {
7827 SDLoc DL(Op);
7828 MVT VecVT = Op.getSimpleValueType();
7829 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
7830 "Unexpected SPLAT_VECTOR_PARTS lowering");
7831
7832 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
7833 SDValue Lo = Op.getOperand(i: 0);
7834 SDValue Hi = Op.getOperand(i: 1);
7835
7836 MVT ContainerVT = VecVT;
7837 if (VecVT.isFixedLengthVector())
7838 ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
7839
7840 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7841
7842 SDValue Res =
7843 splatPartsI64WithVL(DL, VT: ContainerVT, Passthru: SDValue(), Lo, Hi, VL, DAG);
7844
7845 if (VecVT.isFixedLengthVector())
7846 Res = convertFromScalableVector(VT: VecVT, V: Res, DAG, Subtarget);
7847
7848 return Res;
7849}
7850
7851// Custom-lower extensions from mask vectors by using a vselect either with 1
7852// for zero/any-extension or -1 for sign-extension:
7853// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
7854// Note that any-extension is lowered identically to zero-extension.
7855SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
7856 int64_t ExtTrueVal) const {
7857 SDLoc DL(Op);
7858 MVT VecVT = Op.getSimpleValueType();
7859 SDValue Src = Op.getOperand(i: 0);
7860 // Only custom-lower extensions from mask types
7861 assert(Src.getValueType().isVector() &&
7862 Src.getValueType().getVectorElementType() == MVT::i1);
7863
7864 if (VecVT.isScalableVector()) {
7865 SDValue SplatZero = DAG.getConstant(Val: 0, DL, VT: VecVT);
7866 SDValue SplatTrueVal = DAG.getConstant(Val: ExtTrueVal, DL, VT: VecVT);
7867 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: VecVT, N1: Src, N2: SplatTrueVal, N3: SplatZero);
7868 }
7869
7870 MVT ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
7871 MVT I1ContainerVT =
7872 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
7873
7874 SDValue CC = convertToScalableVector(VT: I1ContainerVT, V: Src, DAG, Subtarget);
7875
7876 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7877
7878 MVT XLenVT = Subtarget.getXLenVT();
7879 SDValue SplatZero = DAG.getConstant(Val: 0, DL, VT: XLenVT);
7880 SDValue SplatTrueVal = DAG.getConstant(Val: ExtTrueVal, DL, VT: XLenVT);
7881
7882 SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
7883 N1: DAG.getUNDEF(VT: ContainerVT), N2: SplatZero, N3: VL);
7884 SplatTrueVal = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
7885 N1: DAG.getUNDEF(VT: ContainerVT), N2: SplatTrueVal, N3: VL);
7886 SDValue Select =
7887 DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: CC, N2: SplatTrueVal,
7888 N3: SplatZero, N4: DAG.getUNDEF(VT: ContainerVT), N5: VL);
7889
7890 return convertFromScalableVector(VT: VecVT, V: Select, DAG, Subtarget);
7891}
7892
7893SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
7894 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
7895 MVT ExtVT = Op.getSimpleValueType();
7896 // Only custom-lower extensions from fixed-length vector types.
7897 if (!ExtVT.isFixedLengthVector())
7898 return Op;
7899 MVT VT = Op.getOperand(i: 0).getSimpleValueType();
7900 // Grab the canonical container type for the extended type. Infer the smaller
7901 // type from that to ensure the same number of vector elements, as we know
7902 // the LMUL will be sufficient to hold the smaller type.
7903 MVT ContainerExtVT = getContainerForFixedLengthVector(VT: ExtVT);
7904 // Get the extended container type manually to ensure the same number of
7905 // vector elements between source and dest.
7906 MVT ContainerVT = MVT::getVectorVT(VT: VT.getVectorElementType(),
7907 EC: ContainerExtVT.getVectorElementCount());
7908
7909 SDValue Op1 =
7910 convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i: 0), DAG, Subtarget);
7911
7912 SDLoc DL(Op);
7913 auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
7914
7915 SDValue Ext = DAG.getNode(Opcode: ExtendOpc, DL, VT: ContainerExtVT, N1: Op1, N2: Mask, N3: VL);
7916
7917 return convertFromScalableVector(VT: ExtVT, V: Ext, DAG, Subtarget);
7918}
7919
7920// Custom-lower truncations from vectors to mask vectors by using a mask and a
7921// setcc operation:
7922// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
7923SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
7924 SelectionDAG &DAG) const {
7925 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
7926 SDLoc DL(Op);
7927 EVT MaskVT = Op.getValueType();
7928 // Only expect to custom-lower truncations to mask types
7929 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
7930 "Unexpected type for vector mask lowering");
7931 SDValue Src = Op.getOperand(i: 0);
7932 MVT VecVT = Src.getSimpleValueType();
7933 SDValue Mask, VL;
7934 if (IsVPTrunc) {
7935 Mask = Op.getOperand(i: 1);
7936 VL = Op.getOperand(i: 2);
7937 }
7938 // If this is a fixed vector, we need to convert it to a scalable vector.
7939 MVT ContainerVT = VecVT;
7940
7941 if (VecVT.isFixedLengthVector()) {
7942 ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
7943 Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget);
7944 if (IsVPTrunc) {
7945 MVT MaskContainerVT =
7946 getContainerForFixedLengthVector(VT: Mask.getSimpleValueType());
7947 Mask = convertToScalableVector(VT: MaskContainerVT, V: Mask, DAG, Subtarget);
7948 }
7949 }
7950
7951 if (!IsVPTrunc) {
7952 std::tie(args&: Mask, args&: VL) =
7953 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7954 }
7955
7956 SDValue SplatOne = DAG.getConstant(Val: 1, DL, VT: Subtarget.getXLenVT());
7957 SDValue SplatZero = DAG.getConstant(Val: 0, DL, VT: Subtarget.getXLenVT());
7958
7959 SplatOne = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
7960 N1: DAG.getUNDEF(VT: ContainerVT), N2: SplatOne, N3: VL);
7961 SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
7962 N1: DAG.getUNDEF(VT: ContainerVT), N2: SplatZero, N3: VL);
7963
7964 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
7965 SDValue Trunc = DAG.getNode(Opcode: RISCVISD::AND_VL, DL, VT: ContainerVT, N1: Src, N2: SplatOne,
7966 N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL);
7967 Trunc = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: MaskContainerVT,
7968 Ops: {Trunc, SplatZero, DAG.getCondCode(Cond: ISD::SETNE),
7969 DAG.getUNDEF(VT: MaskContainerVT), Mask, VL});
7970 if (MaskVT.isFixedLengthVector())
7971 Trunc = convertFromScalableVector(VT: MaskVT, V: Trunc, DAG, Subtarget);
7972 return Trunc;
7973}
7974
7975SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
7976 SelectionDAG &DAG) const {
7977 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
7978 SDLoc DL(Op);
7979
7980 MVT VT = Op.getSimpleValueType();
7981 // Only custom-lower vector truncates
7982 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
7983
7984 // Truncates to mask types are handled differently
7985 if (VT.getVectorElementType() == MVT::i1)
7986 return lowerVectorMaskTruncLike(Op, DAG);
7987
7988 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
7989 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
7990 // truncate by one power of two at a time.
7991 MVT DstEltVT = VT.getVectorElementType();
7992
7993 SDValue Src = Op.getOperand(i: 0);
7994 MVT SrcVT = Src.getSimpleValueType();
7995 MVT SrcEltVT = SrcVT.getVectorElementType();
7996
7997 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
7998 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
7999 "Unexpected vector truncate lowering");
8000
8001 MVT ContainerVT = SrcVT;
8002 SDValue Mask, VL;
8003 if (IsVPTrunc) {
8004 Mask = Op.getOperand(i: 1);
8005 VL = Op.getOperand(i: 2);
8006 }
8007 if (SrcVT.isFixedLengthVector()) {
8008 ContainerVT = getContainerForFixedLengthVector(VT: SrcVT);
8009 Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget);
8010 if (IsVPTrunc) {
8011 MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
8012 Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
8013 }
8014 }
8015
8016 SDValue Result = Src;
8017 if (!IsVPTrunc) {
8018 std::tie(args&: Mask, args&: VL) =
8019 getDefaultVLOps(VecVT: SrcVT, ContainerVT, DL, DAG, Subtarget);
8020 }
8021
8022 LLVMContext &Context = *DAG.getContext();
8023 const ElementCount Count = ContainerVT.getVectorElementCount();
8024 do {
8025 SrcEltVT = MVT::getIntegerVT(BitWidth: SrcEltVT.getSizeInBits() / 2);
8026 EVT ResultVT = EVT::getVectorVT(Context, VT: SrcEltVT, EC: Count);
8027 Result = DAG.getNode(Opcode: RISCVISD::TRUNCATE_VECTOR_VL, DL, VT: ResultVT, N1: Result,
8028 N2: Mask, N3: VL);
8029 } while (SrcEltVT != DstEltVT);
8030
8031 if (SrcVT.isFixedLengthVector())
8032 Result = convertFromScalableVector(VT, V: Result, DAG, Subtarget);
8033
8034 return Result;
8035}
8036
8037SDValue
8038RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8039 SelectionDAG &DAG) const {
8040 SDLoc DL(Op);
8041 SDValue Chain = Op.getOperand(i: 0);
8042 SDValue Src = Op.getOperand(i: 1);
8043 MVT VT = Op.getSimpleValueType();
8044 MVT SrcVT = Src.getSimpleValueType();
8045 MVT ContainerVT = VT;
8046 if (VT.isFixedLengthVector()) {
8047 MVT SrcContainerVT = getContainerForFixedLengthVector(VT: SrcVT);
8048 ContainerVT =
8049 SrcContainerVT.changeVectorElementType(EltVT: VT.getVectorElementType());
8050 Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget);
8051 }
8052
8053 auto [Mask, VL] = getDefaultVLOps(VecVT: SrcVT, ContainerVT, DL, DAG, Subtarget);
8054
8055 // RVV can only widen/truncate fp to types double/half the size as the source.
8056 if ((VT.getVectorElementType() == MVT::f64 &&
8057 SrcVT.getVectorElementType() == MVT::f16) ||
8058 (VT.getVectorElementType() == MVT::f16 &&
8059 SrcVT.getVectorElementType() == MVT::f64)) {
8060 // For double rounding, the intermediate rounding should be round-to-odd.
8061 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8062 ? RISCVISD::STRICT_FP_EXTEND_VL
8063 : RISCVISD::STRICT_VFNCVT_ROD_VL;
8064 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8065 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8066 Chain, Src, Mask, VL);
8067 Chain = Src.getValue(R: 1);
8068 }
8069
8070 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8071 ? RISCVISD::STRICT_FP_EXTEND_VL
8072 : RISCVISD::STRICT_FP_ROUND_VL;
8073 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8074 Chain, Src, Mask, VL);
8075 if (VT.isFixedLengthVector()) {
8076 // StrictFP operations have two result values. Their lowered result should
8077 // have same result count.
8078 SDValue SubVec = convertFromScalableVector(VT, V: Res, DAG, Subtarget);
8079 Res = DAG.getMergeValues(Ops: {SubVec, Res.getValue(R: 1)}, dl: DL);
8080 }
8081 return Res;
8082}
8083
8084SDValue
8085RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8086 SelectionDAG &DAG) const {
8087 bool IsVP =
8088 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8089 bool IsExtend =
8090 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8091 // RVV can only do truncate fp to types half the size as the source. We
8092 // custom-lower f64->f16 rounds via RVV's round-to-odd float
8093 // conversion instruction.
8094 SDLoc DL(Op);
8095 MVT VT = Op.getSimpleValueType();
8096
8097 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8098
8099 SDValue Src = Op.getOperand(i: 0);
8100 MVT SrcVT = Src.getSimpleValueType();
8101
8102 bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 ||
8103 SrcVT.getVectorElementType() != MVT::f16);
8104 bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 ||
8105 SrcVT.getVectorElementType() != MVT::f64);
8106
8107 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
8108
8109 // Prepare any fixed-length vector operands.
8110 MVT ContainerVT = VT;
8111 SDValue Mask, VL;
8112 if (IsVP) {
8113 Mask = Op.getOperand(i: 1);
8114 VL = Op.getOperand(i: 2);
8115 }
8116 if (VT.isFixedLengthVector()) {
8117 MVT SrcContainerVT = getContainerForFixedLengthVector(VT: SrcVT);
8118 ContainerVT =
8119 SrcContainerVT.changeVectorElementType(EltVT: VT.getVectorElementType());
8120 Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget);
8121 if (IsVP) {
8122 MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
8123 Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
8124 }
8125 }
8126
8127 if (!IsVP)
8128 std::tie(args&: Mask, args&: VL) =
8129 getDefaultVLOps(VecVT: SrcVT, ContainerVT, DL, DAG, Subtarget);
8130
8131 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
8132
8133 if (IsDirectConv) {
8134 Src = DAG.getNode(Opcode: ConvOpc, DL, VT: ContainerVT, N1: Src, N2: Mask, N3: VL);
8135 if (VT.isFixedLengthVector())
8136 Src = convertFromScalableVector(VT, V: Src, DAG, Subtarget);
8137 return Src;
8138 }
8139
8140 unsigned InterConvOpc =
8141 IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
8142
8143 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8144 SDValue IntermediateConv =
8145 DAG.getNode(Opcode: InterConvOpc, DL, VT: InterVT, N1: Src, N2: Mask, N3: VL);
8146 SDValue Result =
8147 DAG.getNode(Opcode: ConvOpc, DL, VT: ContainerVT, N1: IntermediateConv, N2: Mask, N3: VL);
8148 if (VT.isFixedLengthVector())
8149 return convertFromScalableVector(VT, V: Result, DAG, Subtarget);
8150 return Result;
8151}
8152
8153// Given a scalable vector type and an index into it, returns the type for the
8154// smallest subvector that the index fits in. This can be used to reduce LMUL
8155// for operations like vslidedown.
8156//
8157// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
8158static std::optional<MVT>
8159getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
8160 const RISCVSubtarget &Subtarget) {
8161 assert(VecVT.isScalableVector());
8162 const unsigned EltSize = VecVT.getScalarSizeInBits();
8163 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
8164 const unsigned MinVLMAX = VectorBitsMin / EltSize;
8165 MVT SmallerVT;
8166 if (MaxIdx < MinVLMAX)
8167 SmallerVT = getLMUL1VT(VT: VecVT);
8168 else if (MaxIdx < MinVLMAX * 2)
8169 SmallerVT = getLMUL1VT(VT: VecVT).getDoubleNumVectorElementsVT();
8170 else if (MaxIdx < MinVLMAX * 4)
8171 SmallerVT = getLMUL1VT(VT: VecVT)
8172 .getDoubleNumVectorElementsVT()
8173 .getDoubleNumVectorElementsVT();
8174 if (!SmallerVT.isValid() || !VecVT.bitsGT(VT: SmallerVT))
8175 return std::nullopt;
8176 return SmallerVT;
8177}
8178
8179// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
8180// first position of a vector, and that vector is slid up to the insert index.
8181// By limiting the active vector length to index+1 and merging with the
8182// original vector (with an undisturbed tail policy for elements >= VL), we
8183// achieve the desired result of leaving all elements untouched except the one
8184// at VL-1, which is replaced with the desired value.
8185SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
8186 SelectionDAG &DAG) const {
8187 SDLoc DL(Op);
8188 MVT VecVT = Op.getSimpleValueType();
8189 SDValue Vec = Op.getOperand(i: 0);
8190 SDValue Val = Op.getOperand(i: 1);
8191 SDValue Idx = Op.getOperand(i: 2);
8192
8193 if (VecVT.getVectorElementType() == MVT::i1) {
8194 // FIXME: For now we just promote to an i8 vector and insert into that,
8195 // but this is probably not optimal.
8196 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8197 Vec = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WideVT, Operand: Vec);
8198 Vec = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: WideVT, N1: Vec, N2: Val, N3: Idx);
8199 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VecVT, Operand: Vec);
8200 }
8201
8202 MVT ContainerVT = VecVT;
8203 // If the operand is a fixed-length vector, convert to a scalable one.
8204 if (VecVT.isFixedLengthVector()) {
8205 ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
8206 Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
8207 }
8208
8209 // If we know the index we're going to insert at, we can shrink Vec so that
8210 // we're performing the scalar inserts and slideup on a smaller LMUL.
8211 MVT OrigContainerVT = ContainerVT;
8212 SDValue OrigVec = Vec;
8213 SDValue AlignedIdx;
8214 if (auto *IdxC = dyn_cast<ConstantSDNode>(Val&: Idx)) {
8215 const unsigned OrigIdx = IdxC->getZExtValue();
8216 // Do we know an upper bound on LMUL?
8217 if (auto ShrunkVT = getSmallestVTForIndex(VecVT: ContainerVT, MaxIdx: OrigIdx,
8218 DL, DAG, Subtarget)) {
8219 ContainerVT = *ShrunkVT;
8220 AlignedIdx = DAG.getVectorIdxConstant(Val: 0, DL);
8221 }
8222
8223 // If we're compiling for an exact VLEN value, we can always perform
8224 // the insert in m1 as we can determine the register corresponding to
8225 // the index in the register group.
8226 const MVT M1VT = getLMUL1VT(VT: ContainerVT);
8227 if (auto VLEN = Subtarget.getRealVLen();
8228 VLEN && ContainerVT.bitsGT(VT: M1VT)) {
8229 EVT ElemVT = VecVT.getVectorElementType();
8230 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
8231 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8232 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8233 unsigned ExtractIdx =
8234 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8235 AlignedIdx = DAG.getVectorIdxConstant(Val: ExtractIdx, DL);
8236 Idx = DAG.getVectorIdxConstant(Val: RemIdx, DL);
8237 ContainerVT = M1VT;
8238 }
8239
8240 if (AlignedIdx)
8241 Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ContainerVT, N1: Vec,
8242 N2: AlignedIdx);
8243 }
8244
8245 MVT XLenVT = Subtarget.getXLenVT();
8246
8247 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
8248 // Even i64-element vectors on RV32 can be lowered without scalar
8249 // legalization if the most-significant 32 bits of the value are not affected
8250 // by the sign-extension of the lower 32 bits.
8251 // TODO: We could also catch sign extensions of a 32-bit value.
8252 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
8253 const auto *CVal = cast<ConstantSDNode>(Val);
8254 if (isInt<32>(x: CVal->getSExtValue())) {
8255 IsLegalInsert = true;
8256 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
8257 }
8258 }
8259
8260 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8261
8262 SDValue ValInVec;
8263
8264 if (IsLegalInsert) {
8265 unsigned Opc =
8266 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
8267 if (isNullConstant(V: Idx)) {
8268 if (!VecVT.isFloatingPoint())
8269 Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Val);
8270 Vec = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: Vec, N2: Val, N3: VL);
8271
8272 if (AlignedIdx)
8273 Vec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: OrigContainerVT, N1: OrigVec,
8274 N2: Vec, N3: AlignedIdx);
8275 if (!VecVT.isFixedLengthVector())
8276 return Vec;
8277 return convertFromScalableVector(VT: VecVT, V: Vec, DAG, Subtarget);
8278 }
8279 ValInVec = lowerScalarInsert(Scalar: Val, VL, VT: ContainerVT, DL, DAG, Subtarget);
8280 } else {
8281 // On RV32, i64-element vectors must be specially handled to place the
8282 // value at element 0, by using two vslide1down instructions in sequence on
8283 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
8284 // this.
8285 SDValue ValLo, ValHi;
8286 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
8287 MVT I32ContainerVT =
8288 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
8289 SDValue I32Mask =
8290 getDefaultScalableVLOps(VecVT: I32ContainerVT, DL, DAG, Subtarget).first;
8291 // Limit the active VL to two.
8292 SDValue InsertI64VL = DAG.getConstant(Val: 2, DL, VT: XLenVT);
8293 // If the Idx is 0 we can insert directly into the vector.
8294 if (isNullConstant(V: Idx)) {
8295 // First slide in the lo value, then the hi in above it. We use slide1down
8296 // to avoid the register group overlap constraint of vslide1up.
8297 ValInVec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32ContainerVT,
8298 N1: Vec, N2: Vec, N3: ValLo, N4: I32Mask, N5: InsertI64VL);
8299 // If the source vector is undef don't pass along the tail elements from
8300 // the previous slide1down.
8301 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
8302 ValInVec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32ContainerVT,
8303 N1: Tail, N2: ValInVec, N3: ValHi, N4: I32Mask, N5: InsertI64VL);
8304 // Bitcast back to the right container type.
8305 ValInVec = DAG.getBitcast(VT: ContainerVT, V: ValInVec);
8306
8307 if (AlignedIdx)
8308 ValInVec =
8309 DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: OrigContainerVT, N1: OrigVec,
8310 N2: ValInVec, N3: AlignedIdx);
8311 if (!VecVT.isFixedLengthVector())
8312 return ValInVec;
8313 return convertFromScalableVector(VT: VecVT, V: ValInVec, DAG, Subtarget);
8314 }
8315
8316 // First slide in the lo value, then the hi in above it. We use slide1down
8317 // to avoid the register group overlap constraint of vslide1up.
8318 ValInVec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32ContainerVT,
8319 N1: DAG.getUNDEF(VT: I32ContainerVT),
8320 N2: DAG.getUNDEF(VT: I32ContainerVT), N3: ValLo,
8321 N4: I32Mask, N5: InsertI64VL);
8322 ValInVec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32ContainerVT,
8323 N1: DAG.getUNDEF(VT: I32ContainerVT), N2: ValInVec, N3: ValHi,
8324 N4: I32Mask, N5: InsertI64VL);
8325 // Bitcast back to the right container type.
8326 ValInVec = DAG.getBitcast(VT: ContainerVT, V: ValInVec);
8327 }
8328
8329 // Now that the value is in a vector, slide it into position.
8330 SDValue InsertVL =
8331 DAG.getNode(Opcode: ISD::ADD, DL, VT: XLenVT, N1: Idx, N2: DAG.getConstant(Val: 1, DL, VT: XLenVT));
8332
8333 // Use tail agnostic policy if Idx is the last index of Vec.
8334 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
8335 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Val: Idx) &&
8336 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
8337 Policy = RISCVII::TAIL_AGNOSTIC;
8338 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, VT: ContainerVT, Merge: Vec, Op: ValInVec,
8339 Offset: Idx, Mask, VL: InsertVL, Policy);
8340
8341 if (AlignedIdx)
8342 Slideup = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: OrigContainerVT, N1: OrigVec,
8343 N2: Slideup, N3: AlignedIdx);
8344 if (!VecVT.isFixedLengthVector())
8345 return Slideup;
8346 return convertFromScalableVector(VT: VecVT, V: Slideup, DAG, Subtarget);
8347}
8348
8349// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
8350// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
8351// types this is done using VMV_X_S to allow us to glean information about the
8352// sign bits of the result.
8353SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
8354 SelectionDAG &DAG) const {
8355 SDLoc DL(Op);
8356 SDValue Idx = Op.getOperand(i: 1);
8357 SDValue Vec = Op.getOperand(i: 0);
8358 EVT EltVT = Op.getValueType();
8359 MVT VecVT = Vec.getSimpleValueType();
8360 MVT XLenVT = Subtarget.getXLenVT();
8361
8362 if (VecVT.getVectorElementType() == MVT::i1) {
8363 // Use vfirst.m to extract the first bit.
8364 if (isNullConstant(V: Idx)) {
8365 MVT ContainerVT = VecVT;
8366 if (VecVT.isFixedLengthVector()) {
8367 ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
8368 Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
8369 }
8370 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8371 SDValue Vfirst =
8372 DAG.getNode(Opcode: RISCVISD::VFIRST_VL, DL, VT: XLenVT, N1: Vec, N2: Mask, N3: VL);
8373 SDValue Res = DAG.getSetCC(DL, VT: XLenVT, LHS: Vfirst,
8374 RHS: DAG.getConstant(Val: 0, DL, VT: XLenVT), Cond: ISD::SETEQ);
8375 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: EltVT, Operand: Res);
8376 }
8377 if (VecVT.isFixedLengthVector()) {
8378 unsigned NumElts = VecVT.getVectorNumElements();
8379 if (NumElts >= 8) {
8380 MVT WideEltVT;
8381 unsigned WidenVecLen;
8382 SDValue ExtractElementIdx;
8383 SDValue ExtractBitIdx;
8384 unsigned MaxEEW = Subtarget.getELen();
8385 MVT LargestEltVT = MVT::getIntegerVT(
8386 BitWidth: std::min(a: MaxEEW, b: unsigned(XLenVT.getSizeInBits())));
8387 if (NumElts <= LargestEltVT.getSizeInBits()) {
8388 assert(isPowerOf2_32(NumElts) &&
8389 "the number of elements should be power of 2");
8390 WideEltVT = MVT::getIntegerVT(BitWidth: NumElts);
8391 WidenVecLen = 1;
8392 ExtractElementIdx = DAG.getConstant(Val: 0, DL, VT: XLenVT);
8393 ExtractBitIdx = Idx;
8394 } else {
8395 WideEltVT = LargestEltVT;
8396 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
8397 // extract element index = index / element width
8398 ExtractElementIdx = DAG.getNode(
8399 Opcode: ISD::SRL, DL, VT: XLenVT, N1: Idx,
8400 N2: DAG.getConstant(Val: Log2_64(Value: WideEltVT.getSizeInBits()), DL, VT: XLenVT));
8401 // mask bit index = index % element width
8402 ExtractBitIdx = DAG.getNode(
8403 Opcode: ISD::AND, DL, VT: XLenVT, N1: Idx,
8404 N2: DAG.getConstant(Val: WideEltVT.getSizeInBits() - 1, DL, VT: XLenVT));
8405 }
8406 MVT WideVT = MVT::getVectorVT(VT: WideEltVT, NumElements: WidenVecLen);
8407 Vec = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: WideVT, Operand: Vec);
8408 SDValue ExtractElt = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: XLenVT,
8409 N1: Vec, N2: ExtractElementIdx);
8410 // Extract the bit from GPR.
8411 SDValue ShiftRight =
8412 DAG.getNode(Opcode: ISD::SRL, DL, VT: XLenVT, N1: ExtractElt, N2: ExtractBitIdx);
8413 SDValue Res = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: ShiftRight,
8414 N2: DAG.getConstant(Val: 1, DL, VT: XLenVT));
8415 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: EltVT, Operand: Res);
8416 }
8417 }
8418 // Otherwise, promote to an i8 vector and extract from that.
8419 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8420 Vec = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WideVT, Operand: Vec);
8421 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: Vec, N2: Idx);
8422 }
8423
8424 // If this is a fixed vector, we need to convert it to a scalable vector.
8425 MVT ContainerVT = VecVT;
8426 if (VecVT.isFixedLengthVector()) {
8427 ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
8428 Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
8429 }
8430
8431 // If we're compiling for an exact VLEN value and we have a known
8432 // constant index, we can always perform the extract in m1 (or
8433 // smaller) as we can determine the register corresponding to
8434 // the index in the register group.
8435 const auto VLen = Subtarget.getRealVLen();
8436 if (auto *IdxC = dyn_cast<ConstantSDNode>(Val&: Idx);
8437 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
8438 MVT M1VT = getLMUL1VT(VT: ContainerVT);
8439 unsigned OrigIdx = IdxC->getZExtValue();
8440 EVT ElemVT = VecVT.getVectorElementType();
8441 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
8442 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8443 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8444 unsigned ExtractIdx =
8445 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8446 Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: M1VT, N1: Vec,
8447 N2: DAG.getVectorIdxConstant(Val: ExtractIdx, DL));
8448 Idx = DAG.getVectorIdxConstant(Val: RemIdx, DL);
8449 ContainerVT = M1VT;
8450 }
8451
8452 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
8453 // contains our index.
8454 std::optional<uint64_t> MaxIdx;
8455 if (VecVT.isFixedLengthVector())
8456 MaxIdx = VecVT.getVectorNumElements() - 1;
8457 if (auto *IdxC = dyn_cast<ConstantSDNode>(Val&: Idx))
8458 MaxIdx = IdxC->getZExtValue();
8459 if (MaxIdx) {
8460 if (auto SmallerVT =
8461 getSmallestVTForIndex(VecVT: ContainerVT, MaxIdx: *MaxIdx, DL, DAG, Subtarget)) {
8462 ContainerVT = *SmallerVT;
8463 Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ContainerVT, N1: Vec,
8464 N2: DAG.getConstant(Val: 0, DL, VT: XLenVT));
8465 }
8466 }
8467
8468 // If after narrowing, the required slide is still greater than LMUL2,
8469 // fallback to generic expansion and go through the stack. This is done
8470 // for a subtle reason: extracting *all* elements out of a vector is
8471 // widely expected to be linear in vector size, but because vslidedown
8472 // is linear in LMUL, performing N extracts using vslidedown becomes
8473 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
8474 // seems to have the same problem (the store is linear in LMUL), but the
8475 // generic expansion *memoizes* the store, and thus for many extracts of
8476 // the same vector we end up with one store and a bunch of loads.
8477 // TODO: We don't have the same code for insert_vector_elt because we
8478 // have BUILD_VECTOR and handle the degenerate case there. Should we
8479 // consider adding an inverse BUILD_VECTOR node?
8480 MVT LMUL2VT = getLMUL1VT(VT: ContainerVT).getDoubleNumVectorElementsVT();
8481 if (ContainerVT.bitsGT(VT: LMUL2VT) && VecVT.isFixedLengthVector())
8482 return SDValue();
8483
8484 // If the index is 0, the vector is already in the right position.
8485 if (!isNullConstant(V: Idx)) {
8486 // Use a VL of 1 to avoid processing more elements than we need.
8487 auto [Mask, VL] = getDefaultVLOps(NumElts: 1, ContainerVT, DL, DAG, Subtarget);
8488 Vec = getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT,
8489 Merge: DAG.getUNDEF(VT: ContainerVT), Op: Vec, Offset: Idx, Mask, VL);
8490 }
8491
8492 if (!EltVT.isInteger()) {
8493 // Floating-point extracts are handled in TableGen.
8494 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: Vec,
8495 N2: DAG.getVectorIdxConstant(Val: 0, DL));
8496 }
8497
8498 SDValue Elt0 = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: Vec);
8499 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: EltVT, Operand: Elt0);
8500}
8501
8502// Some RVV intrinsics may claim that they want an integer operand to be
8503// promoted or expanded.
8504static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
8505 const RISCVSubtarget &Subtarget) {
8506 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
8507 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
8508 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
8509 "Unexpected opcode");
8510
8511 if (!Subtarget.hasVInstructions())
8512 return SDValue();
8513
8514 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8515 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8516 unsigned IntNo = Op.getConstantOperandVal(i: HasChain ? 1 : 0);
8517
8518 SDLoc DL(Op);
8519
8520 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
8521 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8522 if (!II || !II->hasScalarOperand())
8523 return SDValue();
8524
8525 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
8526 assert(SplatOp < Op.getNumOperands());
8527
8528 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
8529 SDValue &ScalarOp = Operands[SplatOp];
8530 MVT OpVT = ScalarOp.getSimpleValueType();
8531 MVT XLenVT = Subtarget.getXLenVT();
8532
8533 // If this isn't a scalar, or its type is XLenVT we're done.
8534 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8535 return SDValue();
8536
8537 // Simplest case is that the operand needs to be promoted to XLenVT.
8538 if (OpVT.bitsLT(VT: XLenVT)) {
8539 // If the operand is a constant, sign extend to increase our chances
8540 // of being able to use a .vi instruction. ANY_EXTEND would become a
8541 // a zero extend and the simm5 check in isel would fail.
8542 // FIXME: Should we ignore the upper bits in isel instead?
8543 unsigned ExtOpc =
8544 isa<ConstantSDNode>(Val: ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8545 ScalarOp = DAG.getNode(Opcode: ExtOpc, DL, VT: XLenVT, Operand: ScalarOp);
8546 return DAG.getNode(Opcode: Op->getOpcode(), DL, VTList: Op->getVTList(), Ops: Operands);
8547 }
8548
8549 // Use the previous operand to get the vXi64 VT. The result might be a mask
8550 // VT for compares. Using the previous operand assumes that the previous
8551 // operand will never have a smaller element size than a scalar operand and
8552 // that a widening operation never uses SEW=64.
8553 // NOTE: If this fails the below assert, we can probably just find the
8554 // element count from any operand or result and use it to construct the VT.
8555 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
8556 MVT VT = Op.getOperand(i: SplatOp - 1).getSimpleValueType();
8557
8558 // The more complex case is when the scalar is larger than XLenVT.
8559 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
8560 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
8561
8562 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
8563 // instruction to sign-extend since SEW>XLEN.
8564 if (DAG.ComputeNumSignBits(Op: ScalarOp) > 32) {
8565 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
8566 return DAG.getNode(Opcode: Op->getOpcode(), DL, VTList: Op->getVTList(), Ops: Operands);
8567 }
8568
8569 switch (IntNo) {
8570 case Intrinsic::riscv_vslide1up:
8571 case Intrinsic::riscv_vslide1down:
8572 case Intrinsic::riscv_vslide1up_mask:
8573 case Intrinsic::riscv_vslide1down_mask: {
8574 // We need to special case these when the scalar is larger than XLen.
8575 unsigned NumOps = Op.getNumOperands();
8576 bool IsMasked = NumOps == 7;
8577
8578 // Convert the vector source to the equivalent nxvXi32 vector.
8579 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
8580 SDValue Vec = DAG.getBitcast(VT: I32VT, V: Operands[2]);
8581 SDValue ScalarLo, ScalarHi;
8582 std::tie(ScalarLo, ScalarHi) =
8583 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
8584
8585 // Double the VL since we halved SEW.
8586 SDValue AVL = getVLOperand(Op);
8587 SDValue I32VL;
8588
8589 // Optimize for constant AVL
8590 if (isa<ConstantSDNode>(Val: AVL)) {
8591 const auto [MinVLMAX, MaxVLMAX] =
8592 RISCVTargetLowering::computeVLMAXBounds(VecVT: VT, Subtarget);
8593
8594 uint64_t AVLInt = AVL->getAsZExtVal();
8595 if (AVLInt <= MinVLMAX) {
8596 I32VL = DAG.getConstant(Val: 2 * AVLInt, DL, VT: XLenVT);
8597 } else if (AVLInt >= 2 * MaxVLMAX) {
8598 // Just set vl to VLMAX in this situation
8599 RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT: I32VT);
8600 SDValue LMUL = DAG.getConstant(Val: Lmul, DL, VT: XLenVT);
8601 unsigned Sew = RISCVVType::encodeSEW(SEW: I32VT.getScalarSizeInBits());
8602 SDValue SEW = DAG.getConstant(Val: Sew, DL, VT: XLenVT);
8603 SDValue SETVLMAX = DAG.getTargetConstant(
8604 Intrinsic::riscv_vsetvlimax, DL, MVT::i32);
8605 I32VL = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: XLenVT, N1: SETVLMAX, N2: SEW,
8606 N3: LMUL);
8607 } else {
8608 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
8609 // is related to the hardware implementation.
8610 // So let the following code handle
8611 }
8612 }
8613 if (!I32VL) {
8614 RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT);
8615 SDValue LMUL = DAG.getConstant(Val: Lmul, DL, VT: XLenVT);
8616 unsigned Sew = RISCVVType::encodeSEW(SEW: VT.getScalarSizeInBits());
8617 SDValue SEW = DAG.getConstant(Val: Sew, DL, VT: XLenVT);
8618 SDValue SETVL =
8619 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
8620 // Using vsetvli instruction to get actually used length which related to
8621 // the hardware implementation
8622 SDValue VL = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: XLenVT, N1: SETVL, N2: AVL,
8623 N3: SEW, N4: LMUL);
8624 I32VL =
8625 DAG.getNode(Opcode: ISD::SHL, DL, VT: XLenVT, N1: VL, N2: DAG.getConstant(Val: 1, DL, VT: XLenVT));
8626 }
8627
8628 SDValue I32Mask = getAllOnesMask(VecVT: I32VT, VL: I32VL, DL, DAG);
8629
8630 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
8631 // instructions.
8632 SDValue Passthru;
8633 if (IsMasked)
8634 Passthru = DAG.getUNDEF(VT: I32VT);
8635 else
8636 Passthru = DAG.getBitcast(VT: I32VT, V: Operands[1]);
8637
8638 if (IntNo == Intrinsic::riscv_vslide1up ||
8639 IntNo == Intrinsic::riscv_vslide1up_mask) {
8640 Vec = DAG.getNode(Opcode: RISCVISD::VSLIDE1UP_VL, DL, VT: I32VT, N1: Passthru, N2: Vec,
8641 N3: ScalarHi, N4: I32Mask, N5: I32VL);
8642 Vec = DAG.getNode(Opcode: RISCVISD::VSLIDE1UP_VL, DL, VT: I32VT, N1: Passthru, N2: Vec,
8643 N3: ScalarLo, N4: I32Mask, N5: I32VL);
8644 } else {
8645 Vec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32VT, N1: Passthru, N2: Vec,
8646 N3: ScalarLo, N4: I32Mask, N5: I32VL);
8647 Vec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32VT, N1: Passthru, N2: Vec,
8648 N3: ScalarHi, N4: I32Mask, N5: I32VL);
8649 }
8650
8651 // Convert back to nxvXi64.
8652 Vec = DAG.getBitcast(VT, V: Vec);
8653
8654 if (!IsMasked)
8655 return Vec;
8656 // Apply mask after the operation.
8657 SDValue Mask = Operands[NumOps - 3];
8658 SDValue MaskedOff = Operands[1];
8659 // Assume Policy operand is the last operand.
8660 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
8661 // We don't need to select maskedoff if it's undef.
8662 if (MaskedOff.isUndef())
8663 return Vec;
8664 // TAMU
8665 if (Policy == RISCVII::TAIL_AGNOSTIC)
8666 return DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT, N1: Mask, N2: Vec, N3: MaskedOff,
8667 N4: DAG.getUNDEF(VT), N5: AVL);
8668 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
8669 // It's fine because vmerge does not care mask policy.
8670 return DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT, N1: Mask, N2: Vec, N3: MaskedOff,
8671 N4: MaskedOff, N5: AVL);
8672 }
8673 }
8674
8675 // We need to convert the scalar to a splat vector.
8676 SDValue VL = getVLOperand(Op);
8677 assert(VL.getValueType() == XLenVT);
8678 ScalarOp = splatSplitI64WithVL(DL, VT, Passthru: SDValue(), Scalar: ScalarOp, VL, DAG);
8679 return DAG.getNode(Opcode: Op->getOpcode(), DL, VTList: Op->getVTList(), Ops: Operands);
8680}
8681
8682// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
8683// scalable vector llvm.get.vector.length for now.
8684//
8685// We need to convert from a scalable VF to a vsetvli with VLMax equal to
8686// (vscale * VF). The vscale and VF are independent of element width. We use
8687// SEW=8 for the vsetvli because it is the only element width that supports all
8688// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
8689// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
8690// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
8691// SEW and LMUL are better for the surrounding vector instructions.
8692static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,
8693 const RISCVSubtarget &Subtarget) {
8694 MVT XLenVT = Subtarget.getXLenVT();
8695
8696 // The smallest LMUL is only valid for the smallest element width.
8697 const unsigned ElementWidth = 8;
8698
8699 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
8700 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
8701 // We don't support VF==1 with ELEN==32.
8702 [[maybe_unused]] unsigned MinVF =
8703 RISCV::RVVBitsPerBlock / Subtarget.getELen();
8704
8705 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(Num: 2);
8706 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
8707 "Unexpected VF");
8708
8709 bool Fractional = VF < LMul1VF;
8710 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
8711 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMUL: LMulVal, Fractional);
8712 unsigned VSEW = RISCVVType::encodeSEW(SEW: ElementWidth);
8713
8714 SDLoc DL(N);
8715
8716 SDValue LMul = DAG.getTargetConstant(Val: VLMUL, DL, VT: XLenVT);
8717 SDValue Sew = DAG.getTargetConstant(Val: VSEW, DL, VT: XLenVT);
8718
8719 SDValue AVL = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: XLenVT, Operand: N->getOperand(Num: 1));
8720
8721 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
8722 SDValue Res =
8723 DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: XLenVT, N1: ID, N2: AVL, N3: Sew, N4: LMul);
8724 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: Res);
8725}
8726
8727static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG,
8728 const RISCVSubtarget &Subtarget) {
8729 SDValue Op0 = N->getOperand(Num: 1);
8730 MVT OpVT = Op0.getSimpleValueType();
8731 MVT ContainerVT = OpVT;
8732 if (OpVT.isFixedLengthVector()) {
8733 ContainerVT = getContainerForFixedLengthVector(DAG, VT: OpVT, Subtarget);
8734 Op0 = convertToScalableVector(VT: ContainerVT, V: Op0, DAG, Subtarget);
8735 }
8736 MVT XLenVT = Subtarget.getXLenVT();
8737 SDLoc DL(N);
8738 auto [Mask, VL] = getDefaultVLOps(VecVT: OpVT, ContainerVT, DL, DAG, Subtarget);
8739 SDValue Res = DAG.getNode(Opcode: RISCVISD::VFIRST_VL, DL, VT: XLenVT, N1: Op0, N2: Mask, N3: VL);
8740 if (isOneConstant(V: N->getOperand(Num: 2)))
8741 return Res;
8742
8743 // Convert -1 to VL.
8744 SDValue Setcc =
8745 DAG.getSetCC(DL, VT: XLenVT, LHS: Res, RHS: DAG.getConstant(Val: 0, DL, VT: XLenVT), Cond: ISD::SETLT);
8746 VL = DAG.getElementCount(DL, VT: XLenVT, EC: OpVT.getVectorElementCount());
8747 return DAG.getSelect(DL, VT: XLenVT, Cond: Setcc, LHS: VL, RHS: Res);
8748}
8749
8750static inline void promoteVCIXScalar(const SDValue &Op,
8751 SmallVectorImpl<SDValue> &Operands,
8752 SelectionDAG &DAG) {
8753 const RISCVSubtarget &Subtarget =
8754 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
8755
8756 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8757 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8758 unsigned IntNo = Op.getConstantOperandVal(i: HasChain ? 1 : 0);
8759 SDLoc DL(Op);
8760
8761 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
8762 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8763 if (!II || !II->hasScalarOperand())
8764 return;
8765
8766 unsigned SplatOp = II->ScalarOperand + 1;
8767 assert(SplatOp < Op.getNumOperands());
8768
8769 SDValue &ScalarOp = Operands[SplatOp];
8770 MVT OpVT = ScalarOp.getSimpleValueType();
8771 MVT XLenVT = Subtarget.getXLenVT();
8772
8773 // The code below is partially copied from lowerVectorIntrinsicScalars.
8774 // If this isn't a scalar, or its type is XLenVT we're done.
8775 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8776 return;
8777
8778 // Manually emit promote operation for scalar operation.
8779 if (OpVT.bitsLT(VT: XLenVT)) {
8780 unsigned ExtOpc =
8781 isa<ConstantSDNode>(Val: ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8782 ScalarOp = DAG.getNode(Opcode: ExtOpc, DL, VT: XLenVT, Operand: ScalarOp);
8783 }
8784
8785 return;
8786}
8787
8788static void processVCIXOperands(SDValue &OrigOp,
8789 SmallVectorImpl<SDValue> &Operands,
8790 SelectionDAG &DAG) {
8791 promoteVCIXScalar(Op: OrigOp, Operands, DAG);
8792 const RISCVSubtarget &Subtarget =
8793 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
8794 for (SDValue &V : Operands) {
8795 EVT ValType = V.getValueType();
8796 if (ValType.isVector() && ValType.isFloatingPoint()) {
8797 MVT InterimIVT =
8798 MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: ValType.getScalarSizeInBits()),
8799 EC: ValType.getVectorElementCount());
8800 V = DAG.getBitcast(VT: InterimIVT, V);
8801 }
8802 if (ValType.isFixedLengthVector()) {
8803 MVT OpContainerVT = getContainerForFixedLengthVector(
8804 DAG, VT: V.getSimpleValueType(), Subtarget);
8805 V = convertToScalableVector(VT: OpContainerVT, V, DAG, Subtarget);
8806 }
8807 }
8808}
8809
8810// LMUL * VLEN should be greater than or equal to EGS * SEW
8811static inline bool isValidEGW(int EGS, EVT VT,
8812 const RISCVSubtarget &Subtarget) {
8813 return (Subtarget.getRealMinVLen() *
8814 VT.getSizeInBits().getKnownMinValue()) / RISCV::RVVBitsPerBlock >=
8815 EGS * VT.getScalarSizeInBits();
8816}
8817
8818SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
8819 SelectionDAG &DAG) const {
8820 unsigned IntNo = Op.getConstantOperandVal(i: 0);
8821 SDLoc DL(Op);
8822 MVT XLenVT = Subtarget.getXLenVT();
8823
8824 switch (IntNo) {
8825 default:
8826 break; // Don't custom lower most intrinsics.
8827 case Intrinsic::thread_pointer: {
8828 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
8829 return DAG.getRegister(RISCV::X4, PtrVT);
8830 }
8831 case Intrinsic::riscv_orc_b:
8832 case Intrinsic::riscv_brev8:
8833 case Intrinsic::riscv_sha256sig0:
8834 case Intrinsic::riscv_sha256sig1:
8835 case Intrinsic::riscv_sha256sum0:
8836 case Intrinsic::riscv_sha256sum1:
8837 case Intrinsic::riscv_sm3p0:
8838 case Intrinsic::riscv_sm3p1: {
8839 unsigned Opc;
8840 switch (IntNo) {
8841 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
8842 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
8843 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
8844 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
8845 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
8846 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
8847 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
8848 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
8849 }
8850
8851 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8852 SDValue NewOp =
8853 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8854 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
8855 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8856 }
8857
8858 return DAG.getNode(Opcode: Opc, DL, VT: XLenVT, Operand: Op.getOperand(i: 1));
8859 }
8860 case Intrinsic::riscv_sm4ks:
8861 case Intrinsic::riscv_sm4ed: {
8862 unsigned Opc =
8863 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
8864
8865 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8866 SDValue NewOp0 =
8867 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8868 SDValue NewOp1 =
8869 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8870 SDValue Res =
8871 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3));
8872 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8873 }
8874
8875 return DAG.getNode(Opcode: Opc, DL, VT: XLenVT, N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2),
8876 N3: Op.getOperand(i: 3));
8877 }
8878 case Intrinsic::riscv_zip:
8879 case Intrinsic::riscv_unzip: {
8880 unsigned Opc =
8881 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
8882 return DAG.getNode(Opcode: Opc, DL, VT: XLenVT, Operand: Op.getOperand(i: 1));
8883 }
8884 case Intrinsic::riscv_mopr: {
8885 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8886 SDValue NewOp =
8887 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8888 SDValue Res = DAG.getNode(
8889 RISCVISD::MOPR, DL, MVT::i64, NewOp,
8890 DAG.getTargetConstant(Op.getConstantOperandVal(2), DL, MVT::i64));
8891 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8892 }
8893 return DAG.getNode(Opcode: RISCVISD::MOPR, DL, VT: XLenVT, N1: Op.getOperand(i: 1),
8894 N2: Op.getOperand(i: 2));
8895 }
8896
8897 case Intrinsic::riscv_moprr: {
8898 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8899 SDValue NewOp0 =
8900 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8901 SDValue NewOp1 =
8902 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8903 SDValue Res = DAG.getNode(
8904 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
8905 DAG.getTargetConstant(Op.getConstantOperandVal(3), DL, MVT::i64));
8906 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8907 }
8908 return DAG.getNode(Opcode: RISCVISD::MOPRR, DL, VT: XLenVT, N1: Op.getOperand(i: 1),
8909 N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3));
8910 }
8911 case Intrinsic::riscv_clmul:
8912 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8913 SDValue NewOp0 =
8914 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8915 SDValue NewOp1 =
8916 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8917 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
8918 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8919 }
8920 return DAG.getNode(Opcode: RISCVISD::CLMUL, DL, VT: XLenVT, N1: Op.getOperand(i: 1),
8921 N2: Op.getOperand(i: 2));
8922 case Intrinsic::riscv_clmulh:
8923 case Intrinsic::riscv_clmulr: {
8924 unsigned Opc =
8925 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
8926 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8927 SDValue NewOp0 =
8928 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8929 SDValue NewOp1 =
8930 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8931 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
8932 DAG.getConstant(32, DL, MVT::i64));
8933 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
8934 DAG.getConstant(32, DL, MVT::i64));
8935 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
8936 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
8937 DAG.getConstant(32, DL, MVT::i64));
8938 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8939 }
8940
8941 return DAG.getNode(Opcode: Opc, DL, VT: XLenVT, N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2));
8942 }
8943 case Intrinsic::experimental_get_vector_length:
8944 return lowerGetVectorLength(N: Op.getNode(), DAG, Subtarget);
8945 case Intrinsic::experimental_cttz_elts:
8946 return lowerCttzElts(N: Op.getNode(), DAG, Subtarget);
8947 case Intrinsic::riscv_vmv_x_s: {
8948 SDValue Res = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: Op.getOperand(i: 1));
8949 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Op.getValueType(), Operand: Res);
8950 }
8951 case Intrinsic::riscv_vfmv_f_s:
8952 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: Op.getValueType(),
8953 N1: Op.getOperand(i: 1), N2: DAG.getVectorIdxConstant(Val: 0, DL));
8954 case Intrinsic::riscv_vmv_v_x:
8955 return lowerScalarSplat(Passthru: Op.getOperand(i: 1), Scalar: Op.getOperand(i: 2),
8956 VL: Op.getOperand(i: 3), VT: Op.getSimpleValueType(), DL, DAG,
8957 Subtarget);
8958 case Intrinsic::riscv_vfmv_v_f:
8959 return DAG.getNode(Opcode: RISCVISD::VFMV_V_F_VL, DL, VT: Op.getValueType(),
8960 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3));
8961 case Intrinsic::riscv_vmv_s_x: {
8962 SDValue Scalar = Op.getOperand(i: 2);
8963
8964 if (Scalar.getValueType().bitsLE(VT: XLenVT)) {
8965 Scalar = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Scalar);
8966 return DAG.getNode(Opcode: RISCVISD::VMV_S_X_VL, DL, VT: Op.getValueType(),
8967 N1: Op.getOperand(i: 1), N2: Scalar, N3: Op.getOperand(i: 3));
8968 }
8969
8970 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
8971
8972 // This is an i64 value that lives in two scalar registers. We have to
8973 // insert this in a convoluted way. First we build vXi64 splat containing
8974 // the two values that we assemble using some bit math. Next we'll use
8975 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
8976 // to merge element 0 from our splat into the source vector.
8977 // FIXME: This is probably not the best way to do this, but it is
8978 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
8979 // point.
8980 // sw lo, (a0)
8981 // sw hi, 4(a0)
8982 // vlse vX, (a0)
8983 //
8984 // vid.v vVid
8985 // vmseq.vx mMask, vVid, 0
8986 // vmerge.vvm vDest, vSrc, vVal, mMask
8987 MVT VT = Op.getSimpleValueType();
8988 SDValue Vec = Op.getOperand(i: 1);
8989 SDValue VL = getVLOperand(Op);
8990
8991 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Passthru: SDValue(), Scalar, VL, DAG);
8992 if (Op.getOperand(i: 1).isUndef())
8993 return SplattedVal;
8994 SDValue SplattedIdx =
8995 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
8996 DAG.getConstant(0, DL, MVT::i32), VL);
8997
8998 MVT MaskVT = getMaskTypeFor(VecVT: VT);
8999 SDValue Mask = getAllOnesMask(VecVT: VT, VL, DL, DAG);
9000 SDValue VID = DAG.getNode(Opcode: RISCVISD::VID_VL, DL, VT, N1: Mask, N2: VL);
9001 SDValue SelectCond =
9002 DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: MaskVT,
9003 Ops: {VID, SplattedIdx, DAG.getCondCode(Cond: ISD::SETEQ),
9004 DAG.getUNDEF(VT: MaskVT), Mask, VL});
9005 return DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT, N1: SelectCond, N2: SplattedVal,
9006 N3: Vec, N4: DAG.getUNDEF(VT), N5: VL);
9007 }
9008 case Intrinsic::riscv_vfmv_s_f:
9009 return DAG.getNode(Opcode: RISCVISD::VFMV_S_F_VL, DL, VT: Op.getSimpleValueType(),
9010 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3));
9011 // EGS * EEW >= 128 bits
9012 case Intrinsic::riscv_vaesdf_vv:
9013 case Intrinsic::riscv_vaesdf_vs:
9014 case Intrinsic::riscv_vaesdm_vv:
9015 case Intrinsic::riscv_vaesdm_vs:
9016 case Intrinsic::riscv_vaesef_vv:
9017 case Intrinsic::riscv_vaesef_vs:
9018 case Intrinsic::riscv_vaesem_vv:
9019 case Intrinsic::riscv_vaesem_vs:
9020 case Intrinsic::riscv_vaeskf1:
9021 case Intrinsic::riscv_vaeskf2:
9022 case Intrinsic::riscv_vaesz_vs:
9023 case Intrinsic::riscv_vsm4k:
9024 case Intrinsic::riscv_vsm4r_vv:
9025 case Intrinsic::riscv_vsm4r_vs: {
9026 if (!isValidEGW(EGS: 4, VT: Op.getSimpleValueType(), Subtarget) ||
9027 !isValidEGW(EGS: 4, VT: Op->getOperand(Num: 1).getSimpleValueType(), Subtarget) ||
9028 !isValidEGW(EGS: 4, VT: Op->getOperand(Num: 2).getSimpleValueType(), Subtarget))
9029 report_fatal_error(reason: "EGW should be greater than or equal to 4 * SEW.");
9030 return Op;
9031 }
9032 // EGS * EEW >= 256 bits
9033 case Intrinsic::riscv_vsm3c:
9034 case Intrinsic::riscv_vsm3me: {
9035 if (!isValidEGW(EGS: 8, VT: Op.getSimpleValueType(), Subtarget) ||
9036 !isValidEGW(EGS: 8, VT: Op->getOperand(Num: 1).getSimpleValueType(), Subtarget))
9037 report_fatal_error(reason: "EGW should be greater than or equal to 8 * SEW.");
9038 return Op;
9039 }
9040 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
9041 case Intrinsic::riscv_vsha2ch:
9042 case Intrinsic::riscv_vsha2cl:
9043 case Intrinsic::riscv_vsha2ms: {
9044 if (Op->getSimpleValueType(ResNo: 0).getScalarSizeInBits() == 64 &&
9045 !Subtarget.hasStdExtZvknhb())
9046 report_fatal_error(reason: "SEW=64 needs Zvknhb to be enabled.");
9047 if (!isValidEGW(EGS: 4, VT: Op.getSimpleValueType(), Subtarget) ||
9048 !isValidEGW(EGS: 4, VT: Op->getOperand(Num: 1).getSimpleValueType(), Subtarget) ||
9049 !isValidEGW(EGS: 4, VT: Op->getOperand(Num: 2).getSimpleValueType(), Subtarget))
9050 report_fatal_error(reason: "EGW should be greater than or equal to 4 * SEW.");
9051 return Op;
9052 }
9053 case Intrinsic::riscv_sf_vc_v_x:
9054 case Intrinsic::riscv_sf_vc_v_i:
9055 case Intrinsic::riscv_sf_vc_v_xv:
9056 case Intrinsic::riscv_sf_vc_v_iv:
9057 case Intrinsic::riscv_sf_vc_v_vv:
9058 case Intrinsic::riscv_sf_vc_v_fv:
9059 case Intrinsic::riscv_sf_vc_v_xvv:
9060 case Intrinsic::riscv_sf_vc_v_ivv:
9061 case Intrinsic::riscv_sf_vc_v_vvv:
9062 case Intrinsic::riscv_sf_vc_v_fvv:
9063 case Intrinsic::riscv_sf_vc_v_xvw:
9064 case Intrinsic::riscv_sf_vc_v_ivw:
9065 case Intrinsic::riscv_sf_vc_v_vvw:
9066 case Intrinsic::riscv_sf_vc_v_fvw: {
9067 MVT VT = Op.getSimpleValueType();
9068
9069 SmallVector<SDValue> Operands{Op->op_values()};
9070 processVCIXOperands(OrigOp&: Op, Operands, DAG);
9071
9072 MVT RetVT = VT;
9073 if (VT.isFixedLengthVector())
9074 RetVT = getContainerForFixedLengthVector(VT);
9075 else if (VT.isFloatingPoint())
9076 RetVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: VT.getScalarSizeInBits()),
9077 EC: VT.getVectorElementCount());
9078
9079 SDValue NewNode = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: RetVT, Ops: Operands);
9080
9081 if (VT.isFixedLengthVector())
9082 NewNode = convertFromScalableVector(VT, V: NewNode, DAG, Subtarget);
9083 else if (VT.isFloatingPoint())
9084 NewNode = DAG.getBitcast(VT, V: NewNode);
9085
9086 if (Op == NewNode)
9087 break;
9088
9089 return NewNode;
9090 }
9091 }
9092
9093 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9094}
9095
9096static inline SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG,
9097 unsigned Type) {
9098 SDLoc DL(Op);
9099 SmallVector<SDValue> Operands{Op->op_values()};
9100 Operands.erase(CI: Operands.begin() + 1);
9101
9102 const RISCVSubtarget &Subtarget =
9103 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
9104 MVT VT = Op.getSimpleValueType();
9105 MVT RetVT = VT;
9106 MVT FloatVT = VT;
9107
9108 if (VT.isFloatingPoint()) {
9109 RetVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: VT.getScalarSizeInBits()),
9110 EC: VT.getVectorElementCount());
9111 FloatVT = RetVT;
9112 }
9113 if (VT.isFixedLengthVector())
9114 RetVT = getContainerForFixedLengthVector(TLI: DAG.getTargetLoweringInfo(), VT: RetVT,
9115 Subtarget);
9116
9117 processVCIXOperands(OrigOp&: Op, Operands, DAG);
9118
9119 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9120 SDValue NewNode = DAG.getNode(Opcode: Type, DL, VTList: VTs, Ops: Operands);
9121 SDValue Chain = NewNode.getValue(R: 1);
9122
9123 if (VT.isFixedLengthVector())
9124 NewNode = convertFromScalableVector(VT: FloatVT, V: NewNode, DAG, Subtarget);
9125 if (VT.isFloatingPoint())
9126 NewNode = DAG.getBitcast(VT, V: NewNode);
9127
9128 NewNode = DAG.getMergeValues(Ops: {NewNode, Chain}, dl: DL);
9129
9130 return NewNode;
9131}
9132
9133static inline SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG,
9134 unsigned Type) {
9135 SmallVector<SDValue> Operands{Op->op_values()};
9136 Operands.erase(CI: Operands.begin() + 1);
9137 processVCIXOperands(OrigOp&: Op, Operands, DAG);
9138
9139 return DAG.getNode(Opcode: Type, DL: SDLoc(Op), VT: Op.getValueType(), Ops: Operands);
9140}
9141
9142SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
9143 SelectionDAG &DAG) const {
9144 unsigned IntNo = Op.getConstantOperandVal(i: 1);
9145 switch (IntNo) {
9146 default:
9147 break;
9148 case Intrinsic::riscv_masked_strided_load: {
9149 SDLoc DL(Op);
9150 MVT XLenVT = Subtarget.getXLenVT();
9151
9152 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9153 // the selection of the masked intrinsics doesn't do this for us.
9154 SDValue Mask = Op.getOperand(i: 5);
9155 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode());
9156
9157 MVT VT = Op->getSimpleValueType(ResNo: 0);
9158 MVT ContainerVT = VT;
9159 if (VT.isFixedLengthVector())
9160 ContainerVT = getContainerForFixedLengthVector(VT);
9161
9162 SDValue PassThru = Op.getOperand(i: 2);
9163 if (!IsUnmasked) {
9164 MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
9165 if (VT.isFixedLengthVector()) {
9166 Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
9167 PassThru = convertToScalableVector(VT: ContainerVT, V: PassThru, DAG, Subtarget);
9168 }
9169 }
9170
9171 auto *Load = cast<MemIntrinsicSDNode>(Val&: Op);
9172 SDValue VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second;
9173 SDValue Ptr = Op.getOperand(i: 3);
9174 SDValue Stride = Op.getOperand(i: 4);
9175 SDValue Result, Chain;
9176
9177 // TODO: We restrict this to unmasked loads currently in consideration of
9178 // the complexity of handling all falses masks.
9179 MVT ScalarVT = ContainerVT.getVectorElementType();
9180 if (IsUnmasked && isNullConstant(V: Stride) && ContainerVT.isInteger()) {
9181 SDValue ScalarLoad =
9182 DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl: DL, VT: XLenVT, Chain: Load->getChain(), Ptr,
9183 MemVT: ScalarVT, MMO: Load->getMemOperand());
9184 Chain = ScalarLoad.getValue(R: 1);
9185 Result = lowerScalarSplat(Passthru: SDValue(), Scalar: ScalarLoad, VL, VT: ContainerVT, DL, DAG,
9186 Subtarget);
9187 } else if (IsUnmasked && isNullConstant(V: Stride) && isTypeLegal(VT: ScalarVT)) {
9188 SDValue ScalarLoad = DAG.getLoad(VT: ScalarVT, dl: DL, Chain: Load->getChain(), Ptr,
9189 MMO: Load->getMemOperand());
9190 Chain = ScalarLoad.getValue(R: 1);
9191 Result = DAG.getSplat(VT: ContainerVT, DL, Op: ScalarLoad);
9192 } else {
9193 SDValue IntID = DAG.getTargetConstant(
9194 IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
9195 XLenVT);
9196
9197 SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
9198 if (IsUnmasked)
9199 Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT));
9200 else
9201 Ops.push_back(Elt: PassThru);
9202 Ops.push_back(Elt: Ptr);
9203 Ops.push_back(Elt: Stride);
9204 if (!IsUnmasked)
9205 Ops.push_back(Elt: Mask);
9206 Ops.push_back(Elt: VL);
9207 if (!IsUnmasked) {
9208 SDValue Policy =
9209 DAG.getTargetConstant(Val: RISCVII::TAIL_AGNOSTIC, DL, VT: XLenVT);
9210 Ops.push_back(Elt: Policy);
9211 }
9212
9213 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
9214 Result =
9215 DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops,
9216 MemVT: Load->getMemoryVT(), MMO: Load->getMemOperand());
9217 Chain = Result.getValue(R: 1);
9218 }
9219 if (VT.isFixedLengthVector())
9220 Result = convertFromScalableVector(VT, V: Result, DAG, Subtarget);
9221 return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
9222 }
9223 case Intrinsic::riscv_seg2_load:
9224 case Intrinsic::riscv_seg3_load:
9225 case Intrinsic::riscv_seg4_load:
9226 case Intrinsic::riscv_seg5_load:
9227 case Intrinsic::riscv_seg6_load:
9228 case Intrinsic::riscv_seg7_load:
9229 case Intrinsic::riscv_seg8_load: {
9230 SDLoc DL(Op);
9231 static const Intrinsic::ID VlsegInts[7] = {
9232 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
9233 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
9234 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
9235 Intrinsic::riscv_vlseg8};
9236 unsigned NF = Op->getNumValues() - 1;
9237 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9238 MVT XLenVT = Subtarget.getXLenVT();
9239 MVT VT = Op->getSimpleValueType(ResNo: 0);
9240 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9241
9242 SDValue VL = getVLOp(NumElts: VT.getVectorNumElements(), ContainerVT, DL, DAG,
9243 Subtarget);
9244 SDValue IntID = DAG.getTargetConstant(Val: VlsegInts[NF - 2], DL, VT: XLenVT);
9245 auto *Load = cast<MemIntrinsicSDNode>(Val&: Op);
9246 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
9247 ContainerVTs.push_back(MVT::Other);
9248 SDVTList VTs = DAG.getVTList(VTs: ContainerVTs);
9249 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
9250 Ops.insert(I: Ops.end(), NumToInsert: NF, Elt: DAG.getUNDEF(VT: ContainerVT));
9251 Ops.push_back(Elt: Op.getOperand(i: 2));
9252 Ops.push_back(Elt: VL);
9253 SDValue Result =
9254 DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops,
9255 MemVT: Load->getMemoryVT(), MMO: Load->getMemOperand());
9256 SmallVector<SDValue, 9> Results;
9257 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
9258 Results.push_back(Elt: convertFromScalableVector(VT, V: Result.getValue(R: RetIdx),
9259 DAG, Subtarget));
9260 Results.push_back(Elt: Result.getValue(R: NF));
9261 return DAG.getMergeValues(Ops: Results, dl: DL);
9262 }
9263 case Intrinsic::riscv_sf_vc_v_x_se:
9264 return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_X_SE);
9265 case Intrinsic::riscv_sf_vc_v_i_se:
9266 return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_I_SE);
9267 case Intrinsic::riscv_sf_vc_v_xv_se:
9268 return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_XV_SE);
9269 case Intrinsic::riscv_sf_vc_v_iv_se:
9270 return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_IV_SE);
9271 case Intrinsic::riscv_sf_vc_v_vv_se:
9272 return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_VV_SE);
9273 case Intrinsic::riscv_sf_vc_v_fv_se:
9274 return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_FV_SE);
9275 case Intrinsic::riscv_sf_vc_v_xvv_se:
9276 return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_XVV_SE);
9277 case Intrinsic::riscv_sf_vc_v_ivv_se:
9278 return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_IVV_SE);
9279 case Intrinsic::riscv_sf_vc_v_vvv_se:
9280 return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_VVV_SE);
9281 case Intrinsic::riscv_sf_vc_v_fvv_se:
9282 return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_FVV_SE);
9283 case Intrinsic::riscv_sf_vc_v_xvw_se:
9284 return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_XVW_SE);
9285 case Intrinsic::riscv_sf_vc_v_ivw_se:
9286 return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_IVW_SE);
9287 case Intrinsic::riscv_sf_vc_v_vvw_se:
9288 return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_VVW_SE);
9289 case Intrinsic::riscv_sf_vc_v_fvw_se:
9290 return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_FVW_SE);
9291 }
9292
9293 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9294}
9295
9296SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9297 SelectionDAG &DAG) const {
9298 unsigned IntNo = Op.getConstantOperandVal(i: 1);
9299 switch (IntNo) {
9300 default:
9301 break;
9302 case Intrinsic::riscv_masked_strided_store: {
9303 SDLoc DL(Op);
9304 MVT XLenVT = Subtarget.getXLenVT();
9305
9306 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9307 // the selection of the masked intrinsics doesn't do this for us.
9308 SDValue Mask = Op.getOperand(i: 5);
9309 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode());
9310
9311 SDValue Val = Op.getOperand(i: 2);
9312 MVT VT = Val.getSimpleValueType();
9313 MVT ContainerVT = VT;
9314 if (VT.isFixedLengthVector()) {
9315 ContainerVT = getContainerForFixedLengthVector(VT);
9316 Val = convertToScalableVector(VT: ContainerVT, V: Val, DAG, Subtarget);
9317 }
9318 if (!IsUnmasked) {
9319 MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
9320 if (VT.isFixedLengthVector())
9321 Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
9322 }
9323
9324 SDValue VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second;
9325
9326 SDValue IntID = DAG.getTargetConstant(
9327 IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
9328 XLenVT);
9329
9330 auto *Store = cast<MemIntrinsicSDNode>(Val&: Op);
9331 SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
9332 Ops.push_back(Elt: Val);
9333 Ops.push_back(Elt: Op.getOperand(i: 3)); // Ptr
9334 Ops.push_back(Elt: Op.getOperand(i: 4)); // Stride
9335 if (!IsUnmasked)
9336 Ops.push_back(Elt: Mask);
9337 Ops.push_back(Elt: VL);
9338
9339 return DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl: DL, VTList: Store->getVTList(),
9340 Ops, MemVT: Store->getMemoryVT(),
9341 MMO: Store->getMemOperand());
9342 }
9343 case Intrinsic::riscv_seg2_store:
9344 case Intrinsic::riscv_seg3_store:
9345 case Intrinsic::riscv_seg4_store:
9346 case Intrinsic::riscv_seg5_store:
9347 case Intrinsic::riscv_seg6_store:
9348 case Intrinsic::riscv_seg7_store:
9349 case Intrinsic::riscv_seg8_store: {
9350 SDLoc DL(Op);
9351 static const Intrinsic::ID VssegInts[] = {
9352 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
9353 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
9354 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
9355 Intrinsic::riscv_vsseg8};
9356 // Operands are (chain, int_id, vec*, ptr, vl)
9357 unsigned NF = Op->getNumOperands() - 4;
9358 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9359 MVT XLenVT = Subtarget.getXLenVT();
9360 MVT VT = Op->getOperand(Num: 2).getSimpleValueType();
9361 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9362
9363 SDValue VL = getVLOp(NumElts: VT.getVectorNumElements(), ContainerVT, DL, DAG,
9364 Subtarget);
9365 SDValue IntID = DAG.getTargetConstant(Val: VssegInts[NF - 2], DL, VT: XLenVT);
9366 SDValue Ptr = Op->getOperand(Num: NF + 2);
9367
9368 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Val&: Op);
9369 SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID};
9370 for (unsigned i = 0; i < NF; i++)
9371 Ops.push_back(Elt: convertToScalableVector(
9372 VT: ContainerVT, V: FixedIntrinsic->getOperand(Num: 2 + i), DAG, Subtarget));
9373 Ops.append(IL: {Ptr, VL});
9374
9375 return DAG.getMemIntrinsicNode(
9376 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
9377 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
9378 }
9379 case Intrinsic::riscv_sf_vc_xv_se:
9380 return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_XV_SE);
9381 case Intrinsic::riscv_sf_vc_iv_se:
9382 return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_IV_SE);
9383 case Intrinsic::riscv_sf_vc_vv_se:
9384 return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_VV_SE);
9385 case Intrinsic::riscv_sf_vc_fv_se:
9386 return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_FV_SE);
9387 case Intrinsic::riscv_sf_vc_xvv_se:
9388 return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_XVV_SE);
9389 case Intrinsic::riscv_sf_vc_ivv_se:
9390 return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_IVV_SE);
9391 case Intrinsic::riscv_sf_vc_vvv_se:
9392 return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_VVV_SE);
9393 case Intrinsic::riscv_sf_vc_fvv_se:
9394 return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_FVV_SE);
9395 case Intrinsic::riscv_sf_vc_xvw_se:
9396 return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_XVW_SE);
9397 case Intrinsic::riscv_sf_vc_ivw_se:
9398 return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_IVW_SE);
9399 case Intrinsic::riscv_sf_vc_vvw_se:
9400 return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_VVW_SE);
9401 case Intrinsic::riscv_sf_vc_fvw_se:
9402 return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_FVW_SE);
9403 }
9404
9405 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9406}
9407
9408static unsigned getRVVReductionOp(unsigned ISDOpcode) {
9409 switch (ISDOpcode) {
9410 default:
9411 llvm_unreachable("Unhandled reduction");
9412 case ISD::VP_REDUCE_ADD:
9413 case ISD::VECREDUCE_ADD:
9414 return RISCVISD::VECREDUCE_ADD_VL;
9415 case ISD::VP_REDUCE_UMAX:
9416 case ISD::VECREDUCE_UMAX:
9417 return RISCVISD::VECREDUCE_UMAX_VL;
9418 case ISD::VP_REDUCE_SMAX:
9419 case ISD::VECREDUCE_SMAX:
9420 return RISCVISD::VECREDUCE_SMAX_VL;
9421 case ISD::VP_REDUCE_UMIN:
9422 case ISD::VECREDUCE_UMIN:
9423 return RISCVISD::VECREDUCE_UMIN_VL;
9424 case ISD::VP_REDUCE_SMIN:
9425 case ISD::VECREDUCE_SMIN:
9426 return RISCVISD::VECREDUCE_SMIN_VL;
9427 case ISD::VP_REDUCE_AND:
9428 case ISD::VECREDUCE_AND:
9429 return RISCVISD::VECREDUCE_AND_VL;
9430 case ISD::VP_REDUCE_OR:
9431 case ISD::VECREDUCE_OR:
9432 return RISCVISD::VECREDUCE_OR_VL;
9433 case ISD::VP_REDUCE_XOR:
9434 case ISD::VECREDUCE_XOR:
9435 return RISCVISD::VECREDUCE_XOR_VL;
9436 case ISD::VP_REDUCE_FADD:
9437 return RISCVISD::VECREDUCE_FADD_VL;
9438 case ISD::VP_REDUCE_SEQ_FADD:
9439 return RISCVISD::VECREDUCE_SEQ_FADD_VL;
9440 case ISD::VP_REDUCE_FMAX:
9441 return RISCVISD::VECREDUCE_FMAX_VL;
9442 case ISD::VP_REDUCE_FMIN:
9443 return RISCVISD::VECREDUCE_FMIN_VL;
9444 }
9445
9446}
9447
9448SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
9449 SelectionDAG &DAG,
9450 bool IsVP) const {
9451 SDLoc DL(Op);
9452 SDValue Vec = Op.getOperand(i: IsVP ? 1 : 0);
9453 MVT VecVT = Vec.getSimpleValueType();
9454 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
9455 Op.getOpcode() == ISD::VECREDUCE_OR ||
9456 Op.getOpcode() == ISD::VECREDUCE_XOR ||
9457 Op.getOpcode() == ISD::VP_REDUCE_AND ||
9458 Op.getOpcode() == ISD::VP_REDUCE_OR ||
9459 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
9460 "Unexpected reduction lowering");
9461
9462 MVT XLenVT = Subtarget.getXLenVT();
9463
9464 MVT ContainerVT = VecVT;
9465 if (VecVT.isFixedLengthVector()) {
9466 ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
9467 Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
9468 }
9469
9470 SDValue Mask, VL;
9471 if (IsVP) {
9472 Mask = Op.getOperand(i: 2);
9473 VL = Op.getOperand(i: 3);
9474 } else {
9475 std::tie(args&: Mask, args&: VL) =
9476 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9477 }
9478
9479 unsigned BaseOpc;
9480 ISD::CondCode CC;
9481 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: XLenVT);
9482
9483 switch (Op.getOpcode()) {
9484 default:
9485 llvm_unreachable("Unhandled reduction");
9486 case ISD::VECREDUCE_AND:
9487 case ISD::VP_REDUCE_AND: {
9488 // vcpop ~x == 0
9489 SDValue TrueMask = DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT: ContainerVT, Operand: VL);
9490 Vec = DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Vec, N2: TrueMask, N3: VL);
9491 Vec = DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: Vec, N2: Mask, N3: VL);
9492 CC = ISD::SETEQ;
9493 BaseOpc = ISD::AND;
9494 break;
9495 }
9496 case ISD::VECREDUCE_OR:
9497 case ISD::VP_REDUCE_OR:
9498 // vcpop x != 0
9499 Vec = DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: Vec, N2: Mask, N3: VL);
9500 CC = ISD::SETNE;
9501 BaseOpc = ISD::OR;
9502 break;
9503 case ISD::VECREDUCE_XOR:
9504 case ISD::VP_REDUCE_XOR: {
9505 // ((vcpop x) & 1) != 0
9506 SDValue One = DAG.getConstant(Val: 1, DL, VT: XLenVT);
9507 Vec = DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: Vec, N2: Mask, N3: VL);
9508 Vec = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: Vec, N2: One);
9509 CC = ISD::SETNE;
9510 BaseOpc = ISD::XOR;
9511 break;
9512 }
9513 }
9514
9515 SDValue SetCC = DAG.getSetCC(DL, VT: XLenVT, LHS: Vec, RHS: Zero, Cond: CC);
9516 SetCC = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Op.getValueType(), Operand: SetCC);
9517
9518 if (!IsVP)
9519 return SetCC;
9520
9521 // Now include the start value in the operation.
9522 // Note that we must return the start value when no elements are operated
9523 // upon. The vcpop instructions we've emitted in each case above will return
9524 // 0 for an inactive vector, and so we've already received the neutral value:
9525 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
9526 // can simply include the start value.
9527 return DAG.getNode(Opcode: BaseOpc, DL, VT: Op.getValueType(), N1: SetCC, N2: Op.getOperand(i: 0));
9528}
9529
9530static bool isNonZeroAVL(SDValue AVL) {
9531 auto *RegisterAVL = dyn_cast<RegisterSDNode>(Val&: AVL);
9532 auto *ImmAVL = dyn_cast<ConstantSDNode>(Val&: AVL);
9533 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
9534 (ImmAVL && ImmAVL->getZExtValue() >= 1);
9535}
9536
9537/// Helper to lower a reduction sequence of the form:
9538/// scalar = reduce_op vec, scalar_start
9539static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
9540 SDValue StartValue, SDValue Vec, SDValue Mask,
9541 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
9542 const RISCVSubtarget &Subtarget) {
9543 const MVT VecVT = Vec.getSimpleValueType();
9544 const MVT M1VT = getLMUL1VT(VT: VecVT);
9545 const MVT XLenVT = Subtarget.getXLenVT();
9546 const bool NonZeroAVL = isNonZeroAVL(AVL: VL);
9547
9548 // The reduction needs an LMUL1 input; do the splat at either LMUL1
9549 // or the original VT if fractional.
9550 auto InnerVT = VecVT.bitsLE(VT: M1VT) ? VecVT : M1VT;
9551 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
9552 // prove it is non-zero. For the AVL=0 case, we need the scalar to
9553 // be the result of the reduction operation.
9554 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(Val: 1, DL, VT: XLenVT);
9555 SDValue InitialValue = lowerScalarInsert(Scalar: StartValue, VL: InnerVL, VT: InnerVT, DL,
9556 DAG, Subtarget);
9557 if (M1VT != InnerVT)
9558 InitialValue =
9559 DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: M1VT, N1: DAG.getUNDEF(VT: M1VT),
9560 N2: InitialValue, N3: DAG.getVectorIdxConstant(Val: 0, DL));
9561 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(VT: M1VT) : InitialValue;
9562 SDValue Policy = DAG.getTargetConstant(Val: RISCVII::TAIL_AGNOSTIC, DL, VT: XLenVT);
9563 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
9564 SDValue Reduction = DAG.getNode(Opcode: RVVOpcode, DL, VT: M1VT, Ops);
9565 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: ResVT, N1: Reduction,
9566 N2: DAG.getVectorIdxConstant(Val: 0, DL));
9567}
9568
9569SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
9570 SelectionDAG &DAG) const {
9571 SDLoc DL(Op);
9572 SDValue Vec = Op.getOperand(i: 0);
9573 EVT VecEVT = Vec.getValueType();
9574
9575 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Op.getOpcode());
9576
9577 // Due to ordering in legalize types we may have a vector type that needs to
9578 // be split. Do that manually so we can get down to a legal type.
9579 while (getTypeAction(Context&: *DAG.getContext(), VT: VecEVT) ==
9580 TargetLowering::TypeSplitVector) {
9581 auto [Lo, Hi] = DAG.SplitVector(N: Vec, DL);
9582 VecEVT = Lo.getValueType();
9583 Vec = DAG.getNode(Opcode: BaseOpc, DL, VT: VecEVT, N1: Lo, N2: Hi);
9584 }
9585
9586 // TODO: The type may need to be widened rather than split. Or widened before
9587 // it can be split.
9588 if (!isTypeLegal(VT: VecEVT))
9589 return SDValue();
9590
9591 MVT VecVT = VecEVT.getSimpleVT();
9592 MVT VecEltVT = VecVT.getVectorElementType();
9593 unsigned RVVOpcode = getRVVReductionOp(ISDOpcode: Op.getOpcode());
9594
9595 MVT ContainerVT = VecVT;
9596 if (VecVT.isFixedLengthVector()) {
9597 ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
9598 Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
9599 }
9600
9601 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9602
9603 SDValue StartV = DAG.getNeutralElement(Opcode: BaseOpc, DL, VT: VecEltVT, Flags: SDNodeFlags());
9604 switch (BaseOpc) {
9605 case ISD::AND:
9606 case ISD::OR:
9607 case ISD::UMAX:
9608 case ISD::UMIN:
9609 case ISD::SMAX:
9610 case ISD::SMIN:
9611 StartV = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: VecEltVT, N1: Vec,
9612 N2: DAG.getVectorIdxConstant(Val: 0, DL));
9613 }
9614 return lowerReductionSeq(RVVOpcode, ResVT: Op.getSimpleValueType(), StartValue: StartV, Vec,
9615 Mask, VL, DL, DAG, Subtarget);
9616}
9617
9618// Given a reduction op, this function returns the matching reduction opcode,
9619// the vector SDValue and the scalar SDValue required to lower this to a
9620// RISCVISD node.
9621static std::tuple<unsigned, SDValue, SDValue>
9622getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT,
9623 const RISCVSubtarget &Subtarget) {
9624 SDLoc DL(Op);
9625 auto Flags = Op->getFlags();
9626 unsigned Opcode = Op.getOpcode();
9627 switch (Opcode) {
9628 default:
9629 llvm_unreachable("Unhandled reduction");
9630 case ISD::VECREDUCE_FADD: {
9631 // Use positive zero if we can. It is cheaper to materialize.
9632 SDValue Zero =
9633 DAG.getConstantFP(Val: Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, VT: EltVT);
9634 return std::make_tuple(args: RISCVISD::VECREDUCE_FADD_VL, args: Op.getOperand(i: 0), args&: Zero);
9635 }
9636 case ISD::VECREDUCE_SEQ_FADD:
9637 return std::make_tuple(args: RISCVISD::VECREDUCE_SEQ_FADD_VL, args: Op.getOperand(i: 1),
9638 args: Op.getOperand(i: 0));
9639 case ISD::VECREDUCE_FMINIMUM:
9640 case ISD::VECREDUCE_FMAXIMUM:
9641 case ISD::VECREDUCE_FMIN:
9642 case ISD::VECREDUCE_FMAX: {
9643 SDValue Front =
9644 DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: Op.getOperand(i: 0),
9645 N2: DAG.getVectorIdxConstant(Val: 0, DL));
9646 unsigned RVVOpc =
9647 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
9648 ? RISCVISD::VECREDUCE_FMIN_VL
9649 : RISCVISD::VECREDUCE_FMAX_VL;
9650 return std::make_tuple(args&: RVVOpc, args: Op.getOperand(i: 0), args&: Front);
9651 }
9652 }
9653}
9654
9655SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
9656 SelectionDAG &DAG) const {
9657 SDLoc DL(Op);
9658 MVT VecEltVT = Op.getSimpleValueType();
9659
9660 unsigned RVVOpcode;
9661 SDValue VectorVal, ScalarVal;
9662 std::tie(args&: RVVOpcode, args&: VectorVal, args&: ScalarVal) =
9663 getRVVFPReductionOpAndOperands(Op, DAG, EltVT: VecEltVT, Subtarget);
9664 MVT VecVT = VectorVal.getSimpleValueType();
9665
9666 MVT ContainerVT = VecVT;
9667 if (VecVT.isFixedLengthVector()) {
9668 ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
9669 VectorVal = convertToScalableVector(VT: ContainerVT, V: VectorVal, DAG, Subtarget);
9670 }
9671
9672 MVT ResVT = Op.getSimpleValueType();
9673 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9674 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, StartValue: ScalarVal, Vec: VectorVal, Mask,
9675 VL, DL, DAG, Subtarget);
9676 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
9677 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
9678 return Res;
9679
9680 if (Op->getFlags().hasNoNaNs())
9681 return Res;
9682
9683 // Force output to NaN if any element is Nan.
9684 SDValue IsNan =
9685 DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: Mask.getValueType(),
9686 Ops: {VectorVal, VectorVal, DAG.getCondCode(Cond: ISD::SETNE),
9687 DAG.getUNDEF(VT: Mask.getValueType()), Mask, VL});
9688 MVT XLenVT = Subtarget.getXLenVT();
9689 SDValue CPop = DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: IsNan, N2: Mask, N3: VL);
9690 SDValue NoNaNs = DAG.getSetCC(DL, VT: XLenVT, LHS: CPop,
9691 RHS: DAG.getConstant(Val: 0, DL, VT: XLenVT), Cond: ISD::SETEQ);
9692 return DAG.getSelect(
9693 DL, VT: ResVT, Cond: NoNaNs, LHS: Res,
9694 RHS: DAG.getConstantFP(Val: APFloat::getNaN(Sem: DAG.EVTToAPFloatSemantics(VT: ResVT)), DL,
9695 VT: ResVT));
9696}
9697
9698SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
9699 SelectionDAG &DAG) const {
9700 SDLoc DL(Op);
9701 SDValue Vec = Op.getOperand(i: 1);
9702 EVT VecEVT = Vec.getValueType();
9703
9704 // TODO: The type may need to be widened rather than split. Or widened before
9705 // it can be split.
9706 if (!isTypeLegal(VT: VecEVT))
9707 return SDValue();
9708
9709 MVT VecVT = VecEVT.getSimpleVT();
9710 unsigned RVVOpcode = getRVVReductionOp(ISDOpcode: Op.getOpcode());
9711
9712 if (VecVT.isFixedLengthVector()) {
9713 auto ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
9714 Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
9715 }
9716
9717 SDValue VL = Op.getOperand(i: 3);
9718 SDValue Mask = Op.getOperand(i: 2);
9719 return lowerReductionSeq(RVVOpcode, ResVT: Op.getSimpleValueType(), StartValue: Op.getOperand(i: 0),
9720 Vec, Mask, VL, DL, DAG, Subtarget);
9721}
9722
9723SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9724 SelectionDAG &DAG) const {
9725 SDValue Vec = Op.getOperand(i: 0);
9726 SDValue SubVec = Op.getOperand(i: 1);
9727 MVT VecVT = Vec.getSimpleValueType();
9728 MVT SubVecVT = SubVec.getSimpleValueType();
9729
9730 SDLoc DL(Op);
9731 MVT XLenVT = Subtarget.getXLenVT();
9732 unsigned OrigIdx = Op.getConstantOperandVal(i: 2);
9733 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9734
9735 // We don't have the ability to slide mask vectors up indexed by their i1
9736 // elements; the smallest we can do is i8. Often we are able to bitcast to
9737 // equivalent i8 vectors. Note that when inserting a fixed-length vector
9738 // into a scalable one, we might not necessarily have enough scalable
9739 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
9740 if (SubVecVT.getVectorElementType() == MVT::i1 &&
9741 (OrigIdx != 0 || !Vec.isUndef())) {
9742 if (VecVT.getVectorMinNumElements() >= 8 &&
9743 SubVecVT.getVectorMinNumElements() >= 8) {
9744 assert(OrigIdx % 8 == 0 && "Invalid index");
9745 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9746 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9747 "Unexpected mask vector lowering");
9748 OrigIdx /= 8;
9749 SubVecVT =
9750 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9751 SubVecVT.isScalableVector());
9752 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9753 VecVT.isScalableVector());
9754 Vec = DAG.getBitcast(VT: VecVT, V: Vec);
9755 SubVec = DAG.getBitcast(VT: SubVecVT, V: SubVec);
9756 } else {
9757 // We can't slide this mask vector up indexed by its i1 elements.
9758 // This poses a problem when we wish to insert a scalable vector which
9759 // can't be re-expressed as a larger type. Just choose the slow path and
9760 // extend to a larger type, then truncate back down.
9761 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9762 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9763 Vec = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtVecVT, Operand: Vec);
9764 SubVec = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtSubVecVT, Operand: SubVec);
9765 Vec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ExtVecVT, N1: Vec, N2: SubVec,
9766 N3: Op.getOperand(i: 2));
9767 SDValue SplatZero = DAG.getConstant(Val: 0, DL, VT: ExtVecVT);
9768 return DAG.getSetCC(DL, VT: VecVT, LHS: Vec, RHS: SplatZero, Cond: ISD::SETNE);
9769 }
9770 }
9771
9772 // If the subvector vector is a fixed-length type, we cannot use subregister
9773 // manipulation to simplify the codegen; we don't know which register of a
9774 // LMUL group contains the specific subvector as we only know the minimum
9775 // register size. Therefore we must slide the vector group up the full
9776 // amount.
9777 if (SubVecVT.isFixedLengthVector()) {
9778 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
9779 return Op;
9780 MVT ContainerVT = VecVT;
9781 if (VecVT.isFixedLengthVector()) {
9782 ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
9783 Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
9784 }
9785
9786 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
9787 SubVec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ContainerVT,
9788 N1: DAG.getUNDEF(VT: ContainerVT), N2: SubVec,
9789 N3: DAG.getVectorIdxConstant(Val: 0, DL));
9790 SubVec = convertFromScalableVector(VT: VecVT, V: SubVec, DAG, Subtarget);
9791 return DAG.getBitcast(VT: Op.getValueType(), V: SubVec);
9792 }
9793
9794 SubVec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ContainerVT,
9795 N1: DAG.getUNDEF(VT: ContainerVT), N2: SubVec,
9796 N3: DAG.getVectorIdxConstant(Val: 0, DL));
9797 SDValue Mask =
9798 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9799 // Set the vector length to only the number of elements we care about. Note
9800 // that for slideup this includes the offset.
9801 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
9802 SDValue VL = getVLOp(NumElts: EndIndex, ContainerVT, DL, DAG, Subtarget);
9803
9804 // Use tail agnostic policy if we're inserting over Vec's tail.
9805 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
9806 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
9807 Policy = RISCVII::TAIL_AGNOSTIC;
9808
9809 // If we're inserting into the lowest elements, use a tail undisturbed
9810 // vmv.v.v.
9811 if (OrigIdx == 0) {
9812 SubVec =
9813 DAG.getNode(Opcode: RISCVISD::VMV_V_V_VL, DL, VT: ContainerVT, N1: Vec, N2: SubVec, N3: VL);
9814 } else {
9815 SDValue SlideupAmt = DAG.getConstant(Val: OrigIdx, DL, VT: XLenVT);
9816 SubVec = getVSlideup(DAG, Subtarget, DL, VT: ContainerVT, Merge: Vec, Op: SubVec,
9817 Offset: SlideupAmt, Mask, VL, Policy);
9818 }
9819
9820 if (VecVT.isFixedLengthVector())
9821 SubVec = convertFromScalableVector(VT: VecVT, V: SubVec, DAG, Subtarget);
9822 return DAG.getBitcast(VT: Op.getValueType(), V: SubVec);
9823 }
9824
9825 unsigned SubRegIdx, RemIdx;
9826 std::tie(args&: SubRegIdx, args&: RemIdx) =
9827 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9828 VecVT, SubVecVT, InsertExtractIdx: OrigIdx, TRI);
9829
9830 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(VT: SubVecVT);
9831 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
9832 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
9833 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
9834
9835 // 1. If the Idx has been completely eliminated and this subvector's size is
9836 // a vector register or a multiple thereof, or the surrounding elements are
9837 // undef, then this is a subvector insert which naturally aligns to a vector
9838 // register. These can easily be handled using subregister manipulation.
9839 // 2. If the subvector is smaller than a vector register, then the insertion
9840 // must preserve the undisturbed elements of the register. We do this by
9841 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
9842 // (which resolves to a subregister copy), performing a VSLIDEUP to place the
9843 // subvector within the vector register, and an INSERT_SUBVECTOR of that
9844 // LMUL=1 type back into the larger vector (resolving to another subregister
9845 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
9846 // to avoid allocating a large register group to hold our subvector.
9847 if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
9848 return Op;
9849
9850 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
9851 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
9852 // (in our case undisturbed). This means we can set up a subvector insertion
9853 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
9854 // size of the subvector.
9855 MVT InterSubVT = VecVT;
9856 SDValue AlignedExtract = Vec;
9857 unsigned AlignedIdx = OrigIdx - RemIdx;
9858 if (VecVT.bitsGT(VT: getLMUL1VT(VT: VecVT))) {
9859 InterSubVT = getLMUL1VT(VT: VecVT);
9860 // Extract a subvector equal to the nearest full vector register type. This
9861 // should resolve to a EXTRACT_SUBREG instruction.
9862 AlignedExtract = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: InterSubVT, N1: Vec,
9863 N2: DAG.getVectorIdxConstant(Val: AlignedIdx, DL));
9864 }
9865
9866 SubVec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: InterSubVT,
9867 N1: DAG.getUNDEF(VT: InterSubVT), N2: SubVec,
9868 N3: DAG.getVectorIdxConstant(Val: 0, DL));
9869
9870 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
9871
9872 ElementCount EndIndex =
9873 ElementCount::getScalable(MinVal: RemIdx) + SubVecVT.getVectorElementCount();
9874 VL = computeVLMax(VecVT: SubVecVT, DL, DAG);
9875
9876 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
9877 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
9878 if (EndIndex == InterSubVT.getVectorElementCount())
9879 Policy = RISCVII::TAIL_AGNOSTIC;
9880
9881 // If we're inserting into the lowest elements, use a tail undisturbed
9882 // vmv.v.v.
9883 if (RemIdx == 0) {
9884 SubVec = DAG.getNode(Opcode: RISCVISD::VMV_V_V_VL, DL, VT: InterSubVT, N1: AlignedExtract,
9885 N2: SubVec, N3: VL);
9886 } else {
9887 SDValue SlideupAmt =
9888 DAG.getVScale(DL, VT: XLenVT, MulImm: APInt(XLenVT.getSizeInBits(), RemIdx));
9889
9890 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
9891 VL = DAG.getNode(Opcode: ISD::ADD, DL, VT: XLenVT, N1: SlideupAmt, N2: VL);
9892
9893 SubVec = getVSlideup(DAG, Subtarget, DL, VT: InterSubVT, Merge: AlignedExtract, Op: SubVec,
9894 Offset: SlideupAmt, Mask, VL, Policy);
9895 }
9896
9897 // If required, insert this subvector back into the correct vector register.
9898 // This should resolve to an INSERT_SUBREG instruction.
9899 if (VecVT.bitsGT(VT: InterSubVT))
9900 SubVec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: VecVT, N1: Vec, N2: SubVec,
9901 N3: DAG.getVectorIdxConstant(Val: AlignedIdx, DL));
9902
9903 // We might have bitcast from a mask type: cast back to the original type if
9904 // required.
9905 return DAG.getBitcast(VT: Op.getSimpleValueType(), V: SubVec);
9906}
9907
9908SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
9909 SelectionDAG &DAG) const {
9910 SDValue Vec = Op.getOperand(i: 0);
9911 MVT SubVecVT = Op.getSimpleValueType();
9912 MVT VecVT = Vec.getSimpleValueType();
9913
9914 SDLoc DL(Op);
9915 MVT XLenVT = Subtarget.getXLenVT();
9916 unsigned OrigIdx = Op.getConstantOperandVal(i: 1);
9917 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9918
9919 // We don't have the ability to slide mask vectors down indexed by their i1
9920 // elements; the smallest we can do is i8. Often we are able to bitcast to
9921 // equivalent i8 vectors. Note that when extracting a fixed-length vector
9922 // from a scalable one, we might not necessarily have enough scalable
9923 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
9924 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
9925 if (VecVT.getVectorMinNumElements() >= 8 &&
9926 SubVecVT.getVectorMinNumElements() >= 8) {
9927 assert(OrigIdx % 8 == 0 && "Invalid index");
9928 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9929 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9930 "Unexpected mask vector lowering");
9931 OrigIdx /= 8;
9932 SubVecVT =
9933 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9934 SubVecVT.isScalableVector());
9935 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9936 VecVT.isScalableVector());
9937 Vec = DAG.getBitcast(VT: VecVT, V: Vec);
9938 } else {
9939 // We can't slide this mask vector down, indexed by its i1 elements.
9940 // This poses a problem when we wish to extract a scalable vector which
9941 // can't be re-expressed as a larger type. Just choose the slow path and
9942 // extend to a larger type, then truncate back down.
9943 // TODO: We could probably improve this when extracting certain fixed
9944 // from fixed, where we can extract as i8 and shift the correct element
9945 // right to reach the desired subvector?
9946 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9947 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9948 Vec = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtVecVT, Operand: Vec);
9949 Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ExtSubVecVT, N1: Vec,
9950 N2: Op.getOperand(i: 1));
9951 SDValue SplatZero = DAG.getConstant(Val: 0, DL, VT: ExtSubVecVT);
9952 return DAG.getSetCC(DL, VT: SubVecVT, LHS: Vec, RHS: SplatZero, Cond: ISD::SETNE);
9953 }
9954 }
9955
9956 // With an index of 0 this is a cast-like subvector, which can be performed
9957 // with subregister operations.
9958 if (OrigIdx == 0)
9959 return Op;
9960
9961 const auto VLen = Subtarget.getRealVLen();
9962
9963 // If the subvector vector is a fixed-length type and we don't know VLEN
9964 // exactly, we cannot use subregister manipulation to simplify the codegen; we
9965 // don't know which register of a LMUL group contains the specific subvector
9966 // as we only know the minimum register size. Therefore we must slide the
9967 // vector group down the full amount.
9968 if (SubVecVT.isFixedLengthVector() && !VLen) {
9969 MVT ContainerVT = VecVT;
9970 if (VecVT.isFixedLengthVector()) {
9971 ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
9972 Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
9973 }
9974
9975 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
9976 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
9977 if (auto ShrunkVT =
9978 getSmallestVTForIndex(VecVT: ContainerVT, MaxIdx: LastIdx, DL, DAG, Subtarget)) {
9979 ContainerVT = *ShrunkVT;
9980 Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ContainerVT, N1: Vec,
9981 N2: DAG.getVectorIdxConstant(Val: 0, DL));
9982 }
9983
9984 SDValue Mask =
9985 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9986 // Set the vector length to only the number of elements we care about. This
9987 // avoids sliding down elements we're going to discard straight away.
9988 SDValue VL = getVLOp(NumElts: SubVecVT.getVectorNumElements(), ContainerVT, DL, DAG,
9989 Subtarget);
9990 SDValue SlidedownAmt = DAG.getConstant(Val: OrigIdx, DL, VT: XLenVT);
9991 SDValue Slidedown =
9992 getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT,
9993 Merge: DAG.getUNDEF(VT: ContainerVT), Op: Vec, Offset: SlidedownAmt, Mask, VL);
9994 // Now we can use a cast-like subvector extract to get the result.
9995 Slidedown = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: SubVecVT, N1: Slidedown,
9996 N2: DAG.getVectorIdxConstant(Val: 0, DL));
9997 return DAG.getBitcast(VT: Op.getValueType(), V: Slidedown);
9998 }
9999
10000 if (VecVT.isFixedLengthVector()) {
10001 VecVT = getContainerForFixedLengthVector(VT: VecVT);
10002 Vec = convertToScalableVector(VT: VecVT, V: Vec, DAG, Subtarget);
10003 }
10004
10005 MVT ContainerSubVecVT = SubVecVT;
10006 if (SubVecVT.isFixedLengthVector())
10007 ContainerSubVecVT = getContainerForFixedLengthVector(VT: SubVecVT);
10008
10009 unsigned SubRegIdx;
10010 ElementCount RemIdx;
10011 // extract_subvector scales the index by vscale if the subvector is scalable,
10012 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10013 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10014 if (SubVecVT.isFixedLengthVector()) {
10015 assert(VLen);
10016 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10017 auto Decompose =
10018 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
10019 VecVT, SubVecVT: ContainerSubVecVT, InsertExtractIdx: OrigIdx / Vscale, TRI);
10020 SubRegIdx = Decompose.first;
10021 RemIdx = ElementCount::getFixed(MinVal: (Decompose.second * Vscale) +
10022 (OrigIdx % Vscale));
10023 } else {
10024 auto Decompose =
10025 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
10026 VecVT, SubVecVT: ContainerSubVecVT, InsertExtractIdx: OrigIdx, TRI);
10027 SubRegIdx = Decompose.first;
10028 RemIdx = ElementCount::getScalable(MinVal: Decompose.second);
10029 }
10030
10031 // If the Idx has been completely eliminated then this is a subvector extract
10032 // which naturally aligns to a vector register. These can easily be handled
10033 // using subregister manipulation.
10034 if (RemIdx.isZero()) {
10035 if (SubVecVT.isFixedLengthVector()) {
10036 Vec = DAG.getTargetExtractSubreg(SRIdx: SubRegIdx, DL, VT: ContainerSubVecVT, Operand: Vec);
10037 return convertFromScalableVector(VT: SubVecVT, V: Vec, DAG, Subtarget);
10038 }
10039 return Op;
10040 }
10041
10042 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10043 // was > M1 then the index would need to be a multiple of VLMAX, and so would
10044 // divide exactly.
10045 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10046 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10047
10048 // If the vector type is an LMUL-group type, extract a subvector equal to the
10049 // nearest full vector register type.
10050 MVT InterSubVT = VecVT;
10051 if (VecVT.bitsGT(VT: getLMUL1VT(VT: VecVT))) {
10052 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10053 // we should have successfully decomposed the extract into a subregister.
10054 assert(SubRegIdx != RISCV::NoSubRegister);
10055 InterSubVT = getLMUL1VT(VT: VecVT);
10056 Vec = DAG.getTargetExtractSubreg(SRIdx: SubRegIdx, DL, VT: InterSubVT, Operand: Vec);
10057 }
10058
10059 // Slide this vector register down by the desired number of elements in order
10060 // to place the desired subvector starting at element 0.
10061 SDValue SlidedownAmt = DAG.getElementCount(DL, VT: XLenVT, EC: RemIdx);
10062 auto [Mask, VL] = getDefaultScalableVLOps(VecVT: InterSubVT, DL, DAG, Subtarget);
10063 if (SubVecVT.isFixedLengthVector())
10064 VL = getVLOp(NumElts: SubVecVT.getVectorNumElements(), ContainerVT: InterSubVT, DL, DAG,
10065 Subtarget);
10066 SDValue Slidedown =
10067 getVSlidedown(DAG, Subtarget, DL, VT: InterSubVT, Merge: DAG.getUNDEF(VT: InterSubVT),
10068 Op: Vec, Offset: SlidedownAmt, Mask, VL);
10069
10070 // Now the vector is in the right position, extract our final subvector. This
10071 // should resolve to a COPY.
10072 Slidedown = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: SubVecVT, N1: Slidedown,
10073 N2: DAG.getVectorIdxConstant(Val: 0, DL));
10074
10075 // We might have bitcast from a mask type: cast back to the original type if
10076 // required.
10077 return DAG.getBitcast(VT: Op.getSimpleValueType(), V: Slidedown);
10078}
10079
10080// Widen a vector's operands to i8, then truncate its results back to the
10081// original type, typically i1. All operand and result types must be the same.
10082static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL,
10083 SelectionDAG &DAG) {
10084 MVT VT = N.getSimpleValueType();
10085 MVT WideVT = VT.changeVectorElementType(MVT::i8);
10086 SmallVector<SDValue, 4> WideOps;
10087 for (SDValue Op : N->ops()) {
10088 assert(Op.getSimpleValueType() == VT &&
10089 "Operands and result must be same type");
10090 WideOps.push_back(Elt: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WideVT, Operand: Op));
10091 }
10092
10093 unsigned NumVals = N->getNumValues();
10094
10095 SDVTList VTs = DAG.getVTList(SmallVector<EVT, 4>(
10096 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10097 SDValue WideN = DAG.getNode(Opcode: N.getOpcode(), DL, VTList: VTs, Ops: WideOps);
10098 SmallVector<SDValue, 4> TruncVals;
10099 for (unsigned I = 0; I < NumVals; I++) {
10100 TruncVals.push_back(
10101 Elt: DAG.getSetCC(DL, VT: N->getSimpleValueType(ResNo: I), LHS: WideN.getValue(R: I),
10102 RHS: DAG.getConstant(Val: 0, DL, VT: WideVT), Cond: ISD::SETNE));
10103 }
10104
10105 if (TruncVals.size() > 1)
10106 return DAG.getMergeValues(Ops: TruncVals, dl: DL);
10107 return TruncVals.front();
10108}
10109
10110SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10111 SelectionDAG &DAG) const {
10112 SDLoc DL(Op);
10113 MVT VecVT = Op.getSimpleValueType();
10114
10115 assert(VecVT.isScalableVector() &&
10116 "vector_interleave on non-scalable vector!");
10117
10118 // 1 bit element vectors need to be widened to e8
10119 if (VecVT.getVectorElementType() == MVT::i1)
10120 return widenVectorOpsToi8(N: Op, DL, DAG);
10121
10122 // If the VT is LMUL=8, we need to split and reassemble.
10123 if (VecVT.getSizeInBits().getKnownMinValue() ==
10124 (8 * RISCV::RVVBitsPerBlock)) {
10125 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(N: Op.getNode(), OpNo: 0);
10126 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(N: Op.getNode(), OpNo: 1);
10127 EVT SplitVT = Op0Lo.getValueType();
10128
10129 SDValue ResLo = DAG.getNode(Opcode: ISD::VECTOR_DEINTERLEAVE, DL,
10130 VTList: DAG.getVTList(VT1: SplitVT, VT2: SplitVT), N1: Op0Lo, N2: Op0Hi);
10131 SDValue ResHi = DAG.getNode(Opcode: ISD::VECTOR_DEINTERLEAVE, DL,
10132 VTList: DAG.getVTList(VT1: SplitVT, VT2: SplitVT), N1: Op1Lo, N2: Op1Hi);
10133
10134 SDValue Even = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: VecVT,
10135 N1: ResLo.getValue(R: 0), N2: ResHi.getValue(R: 0));
10136 SDValue Odd = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: VecVT, N1: ResLo.getValue(R: 1),
10137 N2: ResHi.getValue(R: 1));
10138 return DAG.getMergeValues(Ops: {Even, Odd}, dl: DL);
10139 }
10140
10141 // Concatenate the two vectors as one vector to deinterleave
10142 MVT ConcatVT =
10143 MVT::getVectorVT(VT: VecVT.getVectorElementType(),
10144 EC: VecVT.getVectorElementCount().multiplyCoefficientBy(RHS: 2));
10145 SDValue Concat = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: ConcatVT,
10146 N1: Op.getOperand(i: 0), N2: Op.getOperand(i: 1));
10147
10148 // We want to operate on all lanes, so get the mask and VL and mask for it
10149 auto [Mask, VL] = getDefaultScalableVLOps(VecVT: ConcatVT, DL, DAG, Subtarget);
10150 SDValue Passthru = DAG.getUNDEF(VT: ConcatVT);
10151
10152 // We can deinterleave through vnsrl.wi if the element type is smaller than
10153 // ELEN
10154 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10155 SDValue Even =
10156 getDeinterleaveViaVNSRL(DL, VT: VecVT, Src: Concat, EvenElts: true, Subtarget, DAG);
10157 SDValue Odd =
10158 getDeinterleaveViaVNSRL(DL, VT: VecVT, Src: Concat, EvenElts: false, Subtarget, DAG);
10159 return DAG.getMergeValues(Ops: {Even, Odd}, dl: DL);
10160 }
10161
10162 // For the indices, use the same SEW to avoid an extra vsetvli
10163 MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
10164 // Create a vector of even indices {0, 2, 4, ...}
10165 SDValue EvenIdx =
10166 DAG.getStepVector(DL, ResVT: IdxVT, StepVal: APInt(IdxVT.getScalarSizeInBits(), 2));
10167 // Create a vector of odd indices {1, 3, 5, ... }
10168 SDValue OddIdx =
10169 DAG.getNode(Opcode: ISD::ADD, DL, VT: IdxVT, N1: EvenIdx, N2: DAG.getConstant(Val: 1, DL, VT: IdxVT));
10170
10171 // Gather the even and odd elements into two separate vectors
10172 SDValue EvenWide = DAG.getNode(Opcode: RISCVISD::VRGATHER_VV_VL, DL, VT: ConcatVT,
10173 N1: Concat, N2: EvenIdx, N3: Passthru, N4: Mask, N5: VL);
10174 SDValue OddWide = DAG.getNode(Opcode: RISCVISD::VRGATHER_VV_VL, DL, VT: ConcatVT,
10175 N1: Concat, N2: OddIdx, N3: Passthru, N4: Mask, N5: VL);
10176
10177 // Extract the result half of the gather for even and odd
10178 SDValue Even = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: VecVT, N1: EvenWide,
10179 N2: DAG.getVectorIdxConstant(Val: 0, DL));
10180 SDValue Odd = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: VecVT, N1: OddWide,
10181 N2: DAG.getVectorIdxConstant(Val: 0, DL));
10182
10183 return DAG.getMergeValues(Ops: {Even, Odd}, dl: DL);
10184}
10185
10186SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
10187 SelectionDAG &DAG) const {
10188 SDLoc DL(Op);
10189 MVT VecVT = Op.getSimpleValueType();
10190
10191 assert(VecVT.isScalableVector() &&
10192 "vector_interleave on non-scalable vector!");
10193
10194 // i1 vectors need to be widened to i8
10195 if (VecVT.getVectorElementType() == MVT::i1)
10196 return widenVectorOpsToi8(N: Op, DL, DAG);
10197
10198 MVT XLenVT = Subtarget.getXLenVT();
10199 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
10200
10201 // If the VT is LMUL=8, we need to split and reassemble.
10202 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
10203 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(N: Op.getNode(), OpNo: 0);
10204 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(N: Op.getNode(), OpNo: 1);
10205 EVT SplitVT = Op0Lo.getValueType();
10206
10207 SDValue ResLo = DAG.getNode(Opcode: ISD::VECTOR_INTERLEAVE, DL,
10208 VTList: DAG.getVTList(VT1: SplitVT, VT2: SplitVT), N1: Op0Lo, N2: Op1Lo);
10209 SDValue ResHi = DAG.getNode(Opcode: ISD::VECTOR_INTERLEAVE, DL,
10210 VTList: DAG.getVTList(VT1: SplitVT, VT2: SplitVT), N1: Op0Hi, N2: Op1Hi);
10211
10212 SDValue Lo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: VecVT,
10213 N1: ResLo.getValue(R: 0), N2: ResLo.getValue(R: 1));
10214 SDValue Hi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: VecVT,
10215 N1: ResHi.getValue(R: 0), N2: ResHi.getValue(R: 1));
10216 return DAG.getMergeValues(Ops: {Lo, Hi}, dl: DL);
10217 }
10218
10219 SDValue Interleaved;
10220
10221 // If the element type is smaller than ELEN, then we can interleave with
10222 // vwaddu.vv and vwmaccu.vx
10223 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10224 Interleaved = getWideningInterleave(EvenV: Op.getOperand(i: 0), OddV: Op.getOperand(i: 1), DL,
10225 DAG, Subtarget);
10226 } else {
10227 // Otherwise, fallback to using vrgathere16.vv
10228 MVT ConcatVT =
10229 MVT::getVectorVT(VT: VecVT.getVectorElementType(),
10230 EC: VecVT.getVectorElementCount().multiplyCoefficientBy(RHS: 2));
10231 SDValue Concat = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: ConcatVT,
10232 N1: Op.getOperand(i: 0), N2: Op.getOperand(i: 1));
10233
10234 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
10235
10236 // 0 1 2 3 4 5 6 7 ...
10237 SDValue StepVec = DAG.getStepVector(DL, ResVT: IdxVT);
10238
10239 // 1 1 1 1 1 1 1 1 ...
10240 SDValue Ones = DAG.getSplatVector(VT: IdxVT, DL, Op: DAG.getConstant(Val: 1, DL, VT: XLenVT));
10241
10242 // 1 0 1 0 1 0 1 0 ...
10243 SDValue OddMask = DAG.getNode(Opcode: ISD::AND, DL, VT: IdxVT, N1: StepVec, N2: Ones);
10244 OddMask = DAG.getSetCC(
10245 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
10246 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
10247 ISD::CondCode::SETNE);
10248
10249 SDValue VLMax = DAG.getSplatVector(VT: IdxVT, DL, Op: computeVLMax(VecVT, DL, DAG));
10250
10251 // Build up the index vector for interleaving the concatenated vector
10252 // 0 0 1 1 2 2 3 3 ...
10253 SDValue Idx = DAG.getNode(Opcode: ISD::SRL, DL, VT: IdxVT, N1: StepVec, N2: Ones);
10254 // 0 n 1 n+1 2 n+2 3 n+3 ...
10255 Idx =
10256 DAG.getNode(Opcode: RISCVISD::ADD_VL, DL, VT: IdxVT, N1: Idx, N2: VLMax, N3: Idx, N4: OddMask, N5: VL);
10257
10258 // Then perform the interleave
10259 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
10260 SDValue TrueMask = getAllOnesMask(VecVT: IdxVT, VL, DL, DAG);
10261 Interleaved = DAG.getNode(Opcode: RISCVISD::VRGATHEREI16_VV_VL, DL, VT: ConcatVT,
10262 N1: Concat, N2: Idx, N3: DAG.getUNDEF(VT: ConcatVT), N4: TrueMask, N5: VL);
10263 }
10264
10265 // Extract the two halves from the interleaved result
10266 SDValue Lo = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: VecVT, N1: Interleaved,
10267 N2: DAG.getVectorIdxConstant(Val: 0, DL));
10268 SDValue Hi = DAG.getNode(
10269 Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: VecVT, N1: Interleaved,
10270 N2: DAG.getVectorIdxConstant(Val: VecVT.getVectorMinNumElements(), DL));
10271
10272 return DAG.getMergeValues(Ops: {Lo, Hi}, dl: DL);
10273}
10274
10275// Lower step_vector to the vid instruction. Any non-identity step value must
10276// be accounted for my manual expansion.
10277SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
10278 SelectionDAG &DAG) const {
10279 SDLoc DL(Op);
10280 MVT VT = Op.getSimpleValueType();
10281 assert(VT.isScalableVector() && "Expected scalable vector");
10282 MVT XLenVT = Subtarget.getXLenVT();
10283 auto [Mask, VL] = getDefaultScalableVLOps(VecVT: VT, DL, DAG, Subtarget);
10284 SDValue StepVec = DAG.getNode(Opcode: RISCVISD::VID_VL, DL, VT, N1: Mask, N2: VL);
10285 uint64_t StepValImm = Op.getConstantOperandVal(i: 0);
10286 if (StepValImm != 1) {
10287 if (isPowerOf2_64(Value: StepValImm)) {
10288 SDValue StepVal =
10289 DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: DAG.getUNDEF(VT),
10290 N2: DAG.getConstant(Val: Log2_64(Value: StepValImm), DL, VT: XLenVT), N3: VL);
10291 StepVec = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: StepVec, N2: StepVal);
10292 } else {
10293 SDValue StepVal = lowerScalarSplat(
10294 Passthru: SDValue(), Scalar: DAG.getConstant(Val: StepValImm, DL, VT: VT.getVectorElementType()),
10295 VL, VT, DL, DAG, Subtarget);
10296 StepVec = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: StepVec, N2: StepVal);
10297 }
10298 }
10299 return StepVec;
10300}
10301
10302// Implement vector_reverse using vrgather.vv with indices determined by
10303// subtracting the id of each element from (VLMAX-1). This will convert
10304// the indices like so:
10305// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
10306// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
10307SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
10308 SelectionDAG &DAG) const {
10309 SDLoc DL(Op);
10310 MVT VecVT = Op.getSimpleValueType();
10311 if (VecVT.getVectorElementType() == MVT::i1) {
10312 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10313 SDValue Op1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WidenVT, Operand: Op.getOperand(i: 0));
10314 SDValue Op2 = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: WidenVT, Operand: Op1);
10315 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VecVT, Operand: Op2);
10316 }
10317 unsigned EltSize = VecVT.getScalarSizeInBits();
10318 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
10319 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
10320 unsigned MaxVLMAX =
10321 RISCVTargetLowering::computeVLMAX(VectorBits: VectorBitsMax, EltSize, MinSize);
10322
10323 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
10324 MVT IntVT = VecVT.changeVectorElementTypeToInteger();
10325
10326 // If this is SEW=8 and VLMAX is potentially more than 256, we need
10327 // to use vrgatherei16.vv.
10328 // TODO: It's also possible to use vrgatherei16.vv for other types to
10329 // decrease register width for the index calculation.
10330 if (MaxVLMAX > 256 && EltSize == 8) {
10331 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
10332 // Reverse each half, then reassemble them in reverse order.
10333 // NOTE: It's also possible that after splitting that VLMAX no longer
10334 // requires vrgatherei16.vv.
10335 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
10336 auto [Lo, Hi] = DAG.SplitVectorOperand(N: Op.getNode(), OpNo: 0);
10337 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: VecVT);
10338 Lo = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: LoVT, Operand: Lo);
10339 Hi = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: HiVT, Operand: Hi);
10340 // Reassemble the low and high pieces reversed.
10341 // FIXME: This is a CONCAT_VECTORS.
10342 SDValue Res =
10343 DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: VecVT, N1: DAG.getUNDEF(VT: VecVT), N2: Hi,
10344 N3: DAG.getVectorIdxConstant(Val: 0, DL));
10345 return DAG.getNode(
10346 Opcode: ISD::INSERT_SUBVECTOR, DL, VT: VecVT, N1: Res, N2: Lo,
10347 N3: DAG.getVectorIdxConstant(Val: LoVT.getVectorMinNumElements(), DL));
10348 }
10349
10350 // Just promote the int type to i16 which will double the LMUL.
10351 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
10352 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
10353 }
10354
10355 MVT XLenVT = Subtarget.getXLenVT();
10356 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
10357
10358 // Calculate VLMAX-1 for the desired SEW.
10359 SDValue VLMinus1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT,
10360 N1: computeVLMax(VecVT, DL, DAG),
10361 N2: DAG.getConstant(Val: 1, DL, VT: XLenVT));
10362
10363 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
10364 bool IsRV32E64 =
10365 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
10366 SDValue SplatVL;
10367 if (!IsRV32E64)
10368 SplatVL = DAG.getSplatVector(VT: IntVT, DL, Op: VLMinus1);
10369 else
10370 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
10371 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
10372
10373 SDValue VID = DAG.getNode(Opcode: RISCVISD::VID_VL, DL, VT: IntVT, N1: Mask, N2: VL);
10374 SDValue Indices = DAG.getNode(Opcode: RISCVISD::SUB_VL, DL, VT: IntVT, N1: SplatVL, N2: VID,
10375 N3: DAG.getUNDEF(VT: IntVT), N4: Mask, N5: VL);
10376
10377 return DAG.getNode(Opcode: GatherOpc, DL, VT: VecVT, N1: Op.getOperand(i: 0), N2: Indices,
10378 N3: DAG.getUNDEF(VT: VecVT), N4: Mask, N5: VL);
10379}
10380
10381SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
10382 SelectionDAG &DAG) const {
10383 SDLoc DL(Op);
10384 SDValue V1 = Op.getOperand(i: 0);
10385 SDValue V2 = Op.getOperand(i: 1);
10386 MVT XLenVT = Subtarget.getXLenVT();
10387 MVT VecVT = Op.getSimpleValueType();
10388
10389 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
10390
10391 int64_t ImmValue = cast<ConstantSDNode>(Val: Op.getOperand(i: 2))->getSExtValue();
10392 SDValue DownOffset, UpOffset;
10393 if (ImmValue >= 0) {
10394 // The operand is a TargetConstant, we need to rebuild it as a regular
10395 // constant.
10396 DownOffset = DAG.getConstant(Val: ImmValue, DL, VT: XLenVT);
10397 UpOffset = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: VLMax, N2: DownOffset);
10398 } else {
10399 // The operand is a TargetConstant, we need to rebuild it as a regular
10400 // constant rather than negating the original operand.
10401 UpOffset = DAG.getConstant(Val: -ImmValue, DL, VT: XLenVT);
10402 DownOffset = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: VLMax, N2: UpOffset);
10403 }
10404
10405 SDValue TrueMask = getAllOnesMask(VecVT, VL: VLMax, DL, DAG);
10406
10407 SDValue SlideDown =
10408 getVSlidedown(DAG, Subtarget, DL, VT: VecVT, Merge: DAG.getUNDEF(VT: VecVT), Op: V1,
10409 Offset: DownOffset, Mask: TrueMask, VL: UpOffset);
10410 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
10411 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
10412 RISCVII::TAIL_AGNOSTIC);
10413}
10414
10415SDValue
10416RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
10417 SelectionDAG &DAG) const {
10418 SDLoc DL(Op);
10419 auto *Load = cast<LoadSDNode>(Val&: Op);
10420
10421 assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
10422 Load->getMemoryVT(),
10423 *Load->getMemOperand()) &&
10424 "Expecting a correctly-aligned load");
10425
10426 MVT VT = Op.getSimpleValueType();
10427 MVT XLenVT = Subtarget.getXLenVT();
10428 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10429
10430 // If we know the exact VLEN and our fixed length vector completely fills
10431 // the container, use a whole register load instead.
10432 const auto [MinVLMAX, MaxVLMAX] =
10433 RISCVTargetLowering::computeVLMAXBounds(VecVT: ContainerVT, Subtarget);
10434 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10435 getLMUL1VT(VT: ContainerVT).bitsLE(VT: ContainerVT)) {
10436 MachineMemOperand *MMO = Load->getMemOperand();
10437 SDValue NewLoad =
10438 DAG.getLoad(VT: ContainerVT, dl: DL, Chain: Load->getChain(), Ptr: Load->getBasePtr(),
10439 PtrInfo: MMO->getPointerInfo(), Alignment: MMO->getBaseAlign(), MMOFlags: MMO->getFlags(),
10440 AAInfo: MMO->getAAInfo(), Ranges: MMO->getRanges());
10441 SDValue Result = convertFromScalableVector(VT, V: NewLoad, DAG, Subtarget);
10442 return DAG.getMergeValues(Ops: {Result, NewLoad.getValue(R: 1)}, dl: DL);
10443 }
10444
10445 SDValue VL = getVLOp(NumElts: VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget);
10446
10447 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10448 SDValue IntID = DAG.getTargetConstant(
10449 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
10450 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
10451 if (!IsMaskOp)
10452 Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT));
10453 Ops.push_back(Elt: Load->getBasePtr());
10454 Ops.push_back(Elt: VL);
10455 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10456 SDValue NewLoad =
10457 DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops,
10458 MemVT: Load->getMemoryVT(), MMO: Load->getMemOperand());
10459
10460 SDValue Result = convertFromScalableVector(VT, V: NewLoad, DAG, Subtarget);
10461 return DAG.getMergeValues(Ops: {Result, NewLoad.getValue(R: 1)}, dl: DL);
10462}
10463
10464SDValue
10465RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
10466 SelectionDAG &DAG) const {
10467 SDLoc DL(Op);
10468 auto *Store = cast<StoreSDNode>(Val&: Op);
10469
10470 assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
10471 Store->getMemoryVT(),
10472 *Store->getMemOperand()) &&
10473 "Expecting a correctly-aligned store");
10474
10475 SDValue StoreVal = Store->getValue();
10476 MVT VT = StoreVal.getSimpleValueType();
10477 MVT XLenVT = Subtarget.getXLenVT();
10478
10479 // If the size less than a byte, we need to pad with zeros to make a byte.
10480 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
10481 VT = MVT::v8i1;
10482 StoreVal =
10483 DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: DAG.getConstant(Val: 0, DL, VT),
10484 N2: StoreVal, N3: DAG.getVectorIdxConstant(Val: 0, DL));
10485 }
10486
10487 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10488
10489 SDValue NewValue =
10490 convertToScalableVector(VT: ContainerVT, V: StoreVal, DAG, Subtarget);
10491
10492
10493 // If we know the exact VLEN and our fixed length vector completely fills
10494 // the container, use a whole register store instead.
10495 const auto [MinVLMAX, MaxVLMAX] =
10496 RISCVTargetLowering::computeVLMAXBounds(VecVT: ContainerVT, Subtarget);
10497 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10498 getLMUL1VT(VT: ContainerVT).bitsLE(VT: ContainerVT)) {
10499 MachineMemOperand *MMO = Store->getMemOperand();
10500 return DAG.getStore(Chain: Store->getChain(), dl: DL, Val: NewValue, Ptr: Store->getBasePtr(),
10501 PtrInfo: MMO->getPointerInfo(), Alignment: MMO->getBaseAlign(),
10502 MMOFlags: MMO->getFlags(), AAInfo: MMO->getAAInfo());
10503 }
10504
10505 SDValue VL = getVLOp(NumElts: VT.getVectorNumElements(), ContainerVT, DL, DAG,
10506 Subtarget);
10507
10508 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10509 SDValue IntID = DAG.getTargetConstant(
10510 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
10511 return DAG.getMemIntrinsicNode(
10512 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
10513 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
10514 Store->getMemoryVT(), Store->getMemOperand());
10515}
10516
10517SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
10518 SelectionDAG &DAG) const {
10519 SDLoc DL(Op);
10520 MVT VT = Op.getSimpleValueType();
10521
10522 const auto *MemSD = cast<MemSDNode>(Val&: Op);
10523 EVT MemVT = MemSD->getMemoryVT();
10524 MachineMemOperand *MMO = MemSD->getMemOperand();
10525 SDValue Chain = MemSD->getChain();
10526 SDValue BasePtr = MemSD->getBasePtr();
10527
10528 SDValue Mask, PassThru, VL;
10529 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Val&: Op)) {
10530 Mask = VPLoad->getMask();
10531 PassThru = DAG.getUNDEF(VT);
10532 VL = VPLoad->getVectorLength();
10533 } else {
10534 const auto *MLoad = cast<MaskedLoadSDNode>(Val&: Op);
10535 Mask = MLoad->getMask();
10536 PassThru = MLoad->getPassThru();
10537 }
10538
10539 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode());
10540
10541 MVT XLenVT = Subtarget.getXLenVT();
10542
10543 MVT ContainerVT = VT;
10544 if (VT.isFixedLengthVector()) {
10545 ContainerVT = getContainerForFixedLengthVector(VT);
10546 PassThru = convertToScalableVector(VT: ContainerVT, V: PassThru, DAG, Subtarget);
10547 if (!IsUnmasked) {
10548 MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
10549 Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
10550 }
10551 }
10552
10553 if (!VL)
10554 VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second;
10555
10556 unsigned IntID =
10557 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
10558 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(Val: IntID, DL, VT: XLenVT)};
10559 if (IsUnmasked)
10560 Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT));
10561 else
10562 Ops.push_back(Elt: PassThru);
10563 Ops.push_back(Elt: BasePtr);
10564 if (!IsUnmasked)
10565 Ops.push_back(Elt: Mask);
10566 Ops.push_back(Elt: VL);
10567 if (!IsUnmasked)
10568 Ops.push_back(Elt: DAG.getTargetConstant(Val: RISCVII::TAIL_AGNOSTIC, DL, VT: XLenVT));
10569
10570 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10571
10572 SDValue Result =
10573 DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops, MemVT, MMO);
10574 Chain = Result.getValue(R: 1);
10575
10576 if (VT.isFixedLengthVector())
10577 Result = convertFromScalableVector(VT, V: Result, DAG, Subtarget);
10578
10579 return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
10580}
10581
10582SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
10583 SelectionDAG &DAG) const {
10584 SDLoc DL(Op);
10585
10586 const auto *MemSD = cast<MemSDNode>(Val&: Op);
10587 EVT MemVT = MemSD->getMemoryVT();
10588 MachineMemOperand *MMO = MemSD->getMemOperand();
10589 SDValue Chain = MemSD->getChain();
10590 SDValue BasePtr = MemSD->getBasePtr();
10591 SDValue Val, Mask, VL;
10592
10593 bool IsCompressingStore = false;
10594 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Val&: Op)) {
10595 Val = VPStore->getValue();
10596 Mask = VPStore->getMask();
10597 VL = VPStore->getVectorLength();
10598 } else {
10599 const auto *MStore = cast<MaskedStoreSDNode>(Val&: Op);
10600 Val = MStore->getValue();
10601 Mask = MStore->getMask();
10602 IsCompressingStore = MStore->isCompressingStore();
10603 }
10604
10605 bool IsUnmasked =
10606 ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()) || IsCompressingStore;
10607
10608 MVT VT = Val.getSimpleValueType();
10609 MVT XLenVT = Subtarget.getXLenVT();
10610
10611 MVT ContainerVT = VT;
10612 if (VT.isFixedLengthVector()) {
10613 ContainerVT = getContainerForFixedLengthVector(VT);
10614
10615 Val = convertToScalableVector(VT: ContainerVT, V: Val, DAG, Subtarget);
10616 if (!IsUnmasked || IsCompressingStore) {
10617 MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
10618 Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
10619 }
10620 }
10621
10622 if (!VL)
10623 VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second;
10624
10625 if (IsCompressingStore) {
10626 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
10627 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
10628 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
10629 VL =
10630 DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: Mask,
10631 N2: getAllOnesMask(VecVT: Mask.getSimpleValueType(), VL, DL, DAG), N3: VL);
10632 }
10633
10634 unsigned IntID =
10635 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
10636 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(Val: IntID, DL, VT: XLenVT)};
10637 Ops.push_back(Elt: Val);
10638 Ops.push_back(Elt: BasePtr);
10639 if (!IsUnmasked)
10640 Ops.push_back(Elt: Mask);
10641 Ops.push_back(Elt: VL);
10642
10643 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
10644 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
10645}
10646
10647SDValue
10648RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
10649 SelectionDAG &DAG) const {
10650 MVT InVT = Op.getOperand(i: 0).getSimpleValueType();
10651 MVT ContainerVT = getContainerForFixedLengthVector(VT: InVT);
10652
10653 MVT VT = Op.getSimpleValueType();
10654
10655 SDValue Op1 =
10656 convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i: 0), DAG, Subtarget);
10657 SDValue Op2 =
10658 convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i: 1), DAG, Subtarget);
10659
10660 SDLoc DL(Op);
10661 auto [Mask, VL] = getDefaultVLOps(NumElts: VT.getVectorNumElements(), ContainerVT, DL,
10662 DAG, Subtarget);
10663 MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
10664
10665 SDValue Cmp =
10666 DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: MaskVT,
10667 Ops: {Op1, Op2, Op.getOperand(i: 2), DAG.getUNDEF(VT: MaskVT), Mask, VL});
10668
10669 return convertFromScalableVector(VT, V: Cmp, DAG, Subtarget);
10670}
10671
10672SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
10673 SelectionDAG &DAG) const {
10674 unsigned Opc = Op.getOpcode();
10675 SDLoc DL(Op);
10676 SDValue Chain = Op.getOperand(i: 0);
10677 SDValue Op1 = Op.getOperand(i: 1);
10678 SDValue Op2 = Op.getOperand(i: 2);
10679 SDValue CC = Op.getOperand(i: 3);
10680 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val&: CC)->get();
10681 MVT VT = Op.getSimpleValueType();
10682 MVT InVT = Op1.getSimpleValueType();
10683
10684 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
10685 // condition code.
10686 if (Opc == ISD::STRICT_FSETCCS) {
10687 // Expand strict_fsetccs(x, oeq) to
10688 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
10689 SDVTList VTList = Op->getVTList();
10690 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
10691 SDValue OLECCVal = DAG.getCondCode(Cond: ISD::SETOLE);
10692 SDValue Tmp1 = DAG.getNode(Opcode: ISD::STRICT_FSETCCS, DL, VTList, N1: Chain, N2: Op1,
10693 N3: Op2, N4: OLECCVal);
10694 SDValue Tmp2 = DAG.getNode(Opcode: ISD::STRICT_FSETCCS, DL, VTList, N1: Chain, N2: Op2,
10695 N3: Op1, N4: OLECCVal);
10696 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
10697 Tmp1.getValue(1), Tmp2.getValue(1));
10698 // Tmp1 and Tmp2 might be the same node.
10699 if (Tmp1 != Tmp2)
10700 Tmp1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Tmp1, N2: Tmp2);
10701 return DAG.getMergeValues(Ops: {Tmp1, OutChain}, dl: DL);
10702 }
10703
10704 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
10705 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
10706 SDValue OEQCCVal = DAG.getCondCode(Cond: ISD::SETOEQ);
10707 SDValue OEQ = DAG.getNode(Opcode: ISD::STRICT_FSETCCS, DL, VTList, N1: Chain, N2: Op1,
10708 N3: Op2, N4: OEQCCVal);
10709 SDValue Res = DAG.getNOT(DL, Val: OEQ, VT);
10710 return DAG.getMergeValues(Ops: {Res, OEQ.getValue(R: 1)}, dl: DL);
10711 }
10712 }
10713
10714 MVT ContainerInVT = InVT;
10715 if (InVT.isFixedLengthVector()) {
10716 ContainerInVT = getContainerForFixedLengthVector(VT: InVT);
10717 Op1 = convertToScalableVector(VT: ContainerInVT, V: Op1, DAG, Subtarget);
10718 Op2 = convertToScalableVector(VT: ContainerInVT, V: Op2, DAG, Subtarget);
10719 }
10720 MVT MaskVT = getMaskTypeFor(VecVT: ContainerInVT);
10721
10722 auto [Mask, VL] = getDefaultVLOps(VecVT: InVT, ContainerVT: ContainerInVT, DL, DAG, Subtarget);
10723
10724 SDValue Res;
10725 if (Opc == ISD::STRICT_FSETCC &&
10726 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
10727 CCVal == ISD::SETOLE)) {
10728 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
10729 // active when both input elements are ordered.
10730 SDValue True = getAllOnesMask(VecVT: ContainerInVT, VL, DL, DAG);
10731 SDValue OrderMask1 = DAG.getNode(
10732 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10733 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10734 True, VL});
10735 SDValue OrderMask2 = DAG.getNode(
10736 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10737 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10738 True, VL});
10739 Mask =
10740 DAG.getNode(Opcode: RISCVISD::VMAND_VL, DL, VT: MaskVT, N1: OrderMask1, N2: OrderMask2, N3: VL);
10741 // Use Mask as the merge operand to let the result be 0 if either of the
10742 // inputs is unordered.
10743 Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL,
10744 DAG.getVTList(MaskVT, MVT::Other),
10745 {Chain, Op1, Op2, CC, Mask, Mask, VL});
10746 } else {
10747 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
10748 : RISCVISD::STRICT_FSETCCS_VL;
10749 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
10750 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
10751 }
10752
10753 if (VT.isFixedLengthVector()) {
10754 SDValue SubVec = convertFromScalableVector(VT, V: Res, DAG, Subtarget);
10755 return DAG.getMergeValues(Ops: {SubVec, Res.getValue(R: 1)}, dl: DL);
10756 }
10757 return Res;
10758}
10759
10760// Lower vector ABS to smax(X, sub(0, X)).
10761SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
10762 SDLoc DL(Op);
10763 MVT VT = Op.getSimpleValueType();
10764 SDValue X = Op.getOperand(i: 0);
10765
10766 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
10767 "Unexpected type for ISD::ABS");
10768
10769 MVT ContainerVT = VT;
10770 if (VT.isFixedLengthVector()) {
10771 ContainerVT = getContainerForFixedLengthVector(VT);
10772 X = convertToScalableVector(VT: ContainerVT, V: X, DAG, Subtarget);
10773 }
10774
10775 SDValue Mask, VL;
10776 if (Op->getOpcode() == ISD::VP_ABS) {
10777 Mask = Op->getOperand(Num: 1);
10778 if (VT.isFixedLengthVector())
10779 Mask = convertToScalableVector(VT: getMaskTypeFor(VecVT: ContainerVT), V: Mask, DAG,
10780 Subtarget);
10781 VL = Op->getOperand(Num: 2);
10782 } else
10783 std::tie(args&: Mask, args&: VL) = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
10784
10785 SDValue SplatZero = DAG.getNode(
10786 Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT),
10787 N2: DAG.getConstant(Val: 0, DL, VT: Subtarget.getXLenVT()), N3: VL);
10788 SDValue NegX = DAG.getNode(Opcode: RISCVISD::SUB_VL, DL, VT: ContainerVT, N1: SplatZero, N2: X,
10789 N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL);
10790 SDValue Max = DAG.getNode(Opcode: RISCVISD::SMAX_VL, DL, VT: ContainerVT, N1: X, N2: NegX,
10791 N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL);
10792
10793 if (VT.isFixedLengthVector())
10794 Max = convertFromScalableVector(VT, V: Max, DAG, Subtarget);
10795 return Max;
10796}
10797
10798SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
10799 SDValue Op, SelectionDAG &DAG) const {
10800 SDLoc DL(Op);
10801 MVT VT = Op.getSimpleValueType();
10802 SDValue Mag = Op.getOperand(i: 0);
10803 SDValue Sign = Op.getOperand(i: 1);
10804 assert(Mag.getValueType() == Sign.getValueType() &&
10805 "Can only handle COPYSIGN with matching types.");
10806
10807 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10808 Mag = convertToScalableVector(VT: ContainerVT, V: Mag, DAG, Subtarget);
10809 Sign = convertToScalableVector(VT: ContainerVT, V: Sign, DAG, Subtarget);
10810
10811 auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
10812
10813 SDValue CopySign = DAG.getNode(Opcode: RISCVISD::FCOPYSIGN_VL, DL, VT: ContainerVT, N1: Mag,
10814 N2: Sign, N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL);
10815
10816 return convertFromScalableVector(VT, V: CopySign, DAG, Subtarget);
10817}
10818
10819SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
10820 SDValue Op, SelectionDAG &DAG) const {
10821 MVT VT = Op.getSimpleValueType();
10822 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10823
10824 MVT I1ContainerVT =
10825 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10826
10827 SDValue CC =
10828 convertToScalableVector(VT: I1ContainerVT, V: Op.getOperand(i: 0), DAG, Subtarget);
10829 SDValue Op1 =
10830 convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i: 1), DAG, Subtarget);
10831 SDValue Op2 =
10832 convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i: 2), DAG, Subtarget);
10833
10834 SDLoc DL(Op);
10835 SDValue VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second;
10836
10837 SDValue Select = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: CC, N2: Op1,
10838 N3: Op2, N4: DAG.getUNDEF(VT: ContainerVT), N5: VL);
10839
10840 return convertFromScalableVector(VT, V: Select, DAG, Subtarget);
10841}
10842
10843SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
10844 SelectionDAG &DAG) const {
10845 unsigned NewOpc = getRISCVVLOp(Op);
10846 bool HasMergeOp = hasMergeOp(Opcode: NewOpc);
10847 bool HasMask = hasMaskOp(Opcode: NewOpc);
10848
10849 MVT VT = Op.getSimpleValueType();
10850 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10851
10852 // Create list of operands by converting existing ones to scalable types.
10853 SmallVector<SDValue, 6> Ops;
10854 for (const SDValue &V : Op->op_values()) {
10855 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10856
10857 // Pass through non-vector operands.
10858 if (!V.getValueType().isVector()) {
10859 Ops.push_back(Elt: V);
10860 continue;
10861 }
10862
10863 // "cast" fixed length vector to a scalable vector.
10864 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
10865 "Only fixed length vectors are supported!");
10866 Ops.push_back(Elt: convertToScalableVector(VT: ContainerVT, V, DAG, Subtarget));
10867 }
10868
10869 SDLoc DL(Op);
10870 auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
10871 if (HasMergeOp)
10872 Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT));
10873 if (HasMask)
10874 Ops.push_back(Elt: Mask);
10875 Ops.push_back(Elt: VL);
10876
10877 // StrictFP operations have two result values. Their lowered result should
10878 // have same result count.
10879 if (Op->isStrictFPOpcode()) {
10880 SDValue ScalableRes =
10881 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
10882 Op->getFlags());
10883 SDValue SubVec = convertFromScalableVector(VT, V: ScalableRes, DAG, Subtarget);
10884 return DAG.getMergeValues(Ops: {SubVec, ScalableRes.getValue(R: 1)}, dl: DL);
10885 }
10886
10887 SDValue ScalableRes =
10888 DAG.getNode(Opcode: NewOpc, DL, VT: ContainerVT, Ops, Flags: Op->getFlags());
10889 return convertFromScalableVector(VT, V: ScalableRes, DAG, Subtarget);
10890}
10891
10892// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
10893// * Operands of each node are assumed to be in the same order.
10894// * The EVL operand is promoted from i32 to i64 on RV64.
10895// * Fixed-length vectors are converted to their scalable-vector container
10896// types.
10897SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
10898 unsigned RISCVISDOpc = getRISCVVLOp(Op);
10899 bool HasMergeOp = hasMergeOp(Opcode: RISCVISDOpc);
10900
10901 SDLoc DL(Op);
10902 MVT VT = Op.getSimpleValueType();
10903 SmallVector<SDValue, 4> Ops;
10904
10905 MVT ContainerVT = VT;
10906 if (VT.isFixedLengthVector())
10907 ContainerVT = getContainerForFixedLengthVector(VT);
10908
10909 for (const auto &OpIdx : enumerate(First: Op->ops())) {
10910 SDValue V = OpIdx.value();
10911 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10912 // Add dummy merge value before the mask. Or if there isn't a mask, before
10913 // EVL.
10914 if (HasMergeOp) {
10915 auto MaskIdx = ISD::getVPMaskIdx(Opcode: Op.getOpcode());
10916 if (MaskIdx) {
10917 if (*MaskIdx == OpIdx.index())
10918 Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT));
10919 } else if (ISD::getVPExplicitVectorLengthIdx(Opcode: Op.getOpcode()) ==
10920 OpIdx.index()) {
10921 if (Op.getOpcode() == ISD::VP_MERGE) {
10922 // For VP_MERGE, copy the false operand instead of an undef value.
10923 Ops.push_back(Elt: Ops.back());
10924 } else {
10925 assert(Op.getOpcode() == ISD::VP_SELECT);
10926 // For VP_SELECT, add an undef value.
10927 Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT));
10928 }
10929 }
10930 }
10931 // Pass through operands which aren't fixed-length vectors.
10932 if (!V.getValueType().isFixedLengthVector()) {
10933 Ops.push_back(Elt: V);
10934 continue;
10935 }
10936 // "cast" fixed length vector to a scalable vector.
10937 MVT OpVT = V.getSimpleValueType();
10938 MVT ContainerVT = getContainerForFixedLengthVector(VT: OpVT);
10939 assert(useRVVForFixedLengthVectorVT(OpVT) &&
10940 "Only fixed length vectors are supported!");
10941 Ops.push_back(Elt: convertToScalableVector(VT: ContainerVT, V, DAG, Subtarget));
10942 }
10943
10944 if (!VT.isFixedLengthVector())
10945 return DAG.getNode(Opcode: RISCVISDOpc, DL, VT, Ops, Flags: Op->getFlags());
10946
10947 SDValue VPOp = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: ContainerVT, Ops, Flags: Op->getFlags());
10948
10949 return convertFromScalableVector(VT, V: VPOp, DAG, Subtarget);
10950}
10951
10952SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
10953 SelectionDAG &DAG) const {
10954 SDLoc DL(Op);
10955 MVT VT = Op.getSimpleValueType();
10956
10957 SDValue Src = Op.getOperand(i: 0);
10958 // NOTE: Mask is dropped.
10959 SDValue VL = Op.getOperand(i: 2);
10960
10961 MVT ContainerVT = VT;
10962 if (VT.isFixedLengthVector()) {
10963 ContainerVT = getContainerForFixedLengthVector(VT);
10964 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10965 Src = convertToScalableVector(VT: SrcVT, V: Src, DAG, Subtarget);
10966 }
10967
10968 MVT XLenVT = Subtarget.getXLenVT();
10969 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: XLenVT);
10970 SDValue ZeroSplat = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
10971 N1: DAG.getUNDEF(VT: ContainerVT), N2: Zero, N3: VL);
10972
10973 SDValue SplatValue = DAG.getConstant(
10974 Val: Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, VT: XLenVT);
10975 SDValue Splat = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
10976 N1: DAG.getUNDEF(VT: ContainerVT), N2: SplatValue, N3: VL);
10977
10978 SDValue Result = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: Src, N2: Splat,
10979 N3: ZeroSplat, N4: DAG.getUNDEF(VT: ContainerVT), N5: VL);
10980 if (!VT.isFixedLengthVector())
10981 return Result;
10982 return convertFromScalableVector(VT, V: Result, DAG, Subtarget);
10983}
10984
10985SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
10986 SelectionDAG &DAG) const {
10987 SDLoc DL(Op);
10988 MVT VT = Op.getSimpleValueType();
10989
10990 SDValue Op1 = Op.getOperand(i: 0);
10991 SDValue Op2 = Op.getOperand(i: 1);
10992 ISD::CondCode Condition = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get();
10993 // NOTE: Mask is dropped.
10994 SDValue VL = Op.getOperand(i: 4);
10995
10996 MVT ContainerVT = VT;
10997 if (VT.isFixedLengthVector()) {
10998 ContainerVT = getContainerForFixedLengthVector(VT);
10999 Op1 = convertToScalableVector(VT: ContainerVT, V: Op1, DAG, Subtarget);
11000 Op2 = convertToScalableVector(VT: ContainerVT, V: Op2, DAG, Subtarget);
11001 }
11002
11003 SDValue Result;
11004 SDValue AllOneMask = DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT: ContainerVT, Operand: VL);
11005
11006 switch (Condition) {
11007 default:
11008 break;
11009 // X != Y --> (X^Y)
11010 case ISD::SETNE:
11011 Result = DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op1, N2: Op2, N3: VL);
11012 break;
11013 // X == Y --> ~(X^Y)
11014 case ISD::SETEQ: {
11015 SDValue Temp =
11016 DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op1, N2: Op2, N3: VL);
11017 Result =
11018 DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Temp, N2: AllOneMask, N3: VL);
11019 break;
11020 }
11021 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
11022 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
11023 case ISD::SETGT:
11024 case ISD::SETULT: {
11025 SDValue Temp =
11026 DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op1, N2: AllOneMask, N3: VL);
11027 Result = DAG.getNode(Opcode: RISCVISD::VMAND_VL, DL, VT: ContainerVT, N1: Temp, N2: Op2, N3: VL);
11028 break;
11029 }
11030 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
11031 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
11032 case ISD::SETLT:
11033 case ISD::SETUGT: {
11034 SDValue Temp =
11035 DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op2, N2: AllOneMask, N3: VL);
11036 Result = DAG.getNode(Opcode: RISCVISD::VMAND_VL, DL, VT: ContainerVT, N1: Op1, N2: Temp, N3: VL);
11037 break;
11038 }
11039 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
11040 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
11041 case ISD::SETGE:
11042 case ISD::SETULE: {
11043 SDValue Temp =
11044 DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op1, N2: AllOneMask, N3: VL);
11045 Result = DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Temp, N2: Op2, N3: VL);
11046 break;
11047 }
11048 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
11049 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
11050 case ISD::SETLE:
11051 case ISD::SETUGE: {
11052 SDValue Temp =
11053 DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op2, N2: AllOneMask, N3: VL);
11054 Result = DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Temp, N2: Op1, N3: VL);
11055 break;
11056 }
11057 }
11058
11059 if (!VT.isFixedLengthVector())
11060 return Result;
11061 return convertFromScalableVector(VT, V: Result, DAG, Subtarget);
11062}
11063
11064// Lower Floating-Point/Integer Type-Convert VP SDNodes
11065SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
11066 SelectionDAG &DAG) const {
11067 SDLoc DL(Op);
11068
11069 SDValue Src = Op.getOperand(i: 0);
11070 SDValue Mask = Op.getOperand(i: 1);
11071 SDValue VL = Op.getOperand(i: 2);
11072 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11073
11074 MVT DstVT = Op.getSimpleValueType();
11075 MVT SrcVT = Src.getSimpleValueType();
11076 if (DstVT.isFixedLengthVector()) {
11077 DstVT = getContainerForFixedLengthVector(VT: DstVT);
11078 SrcVT = getContainerForFixedLengthVector(VT: SrcVT);
11079 Src = convertToScalableVector(VT: SrcVT, V: Src, DAG, Subtarget);
11080 MVT MaskVT = getMaskTypeFor(VecVT: DstVT);
11081 Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
11082 }
11083
11084 unsigned DstEltSize = DstVT.getScalarSizeInBits();
11085 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
11086
11087 SDValue Result;
11088 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
11089 if (SrcVT.isInteger()) {
11090 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11091
11092 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
11093 ? RISCVISD::VSEXT_VL
11094 : RISCVISD::VZEXT_VL;
11095
11096 // Do we need to do any pre-widening before converting?
11097 if (SrcEltSize == 1) {
11098 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
11099 MVT XLenVT = Subtarget.getXLenVT();
11100 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: XLenVT);
11101 SDValue ZeroSplat = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IntVT,
11102 N1: DAG.getUNDEF(VT: IntVT), N2: Zero, N3: VL);
11103 SDValue One = DAG.getConstant(
11104 Val: RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, VT: XLenVT);
11105 SDValue OneSplat = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IntVT,
11106 N1: DAG.getUNDEF(VT: IntVT), N2: One, N3: VL);
11107 Src = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: IntVT, N1: Src, N2: OneSplat,
11108 N3: ZeroSplat, N4: DAG.getUNDEF(VT: IntVT), N5: VL);
11109 } else if (DstEltSize > (2 * SrcEltSize)) {
11110 // Widen before converting.
11111 MVT IntVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: DstEltSize / 2),
11112 EC: DstVT.getVectorElementCount());
11113 Src = DAG.getNode(Opcode: RISCVISDExtOpc, DL, VT: IntVT, N1: Src, N2: Mask, N3: VL);
11114 }
11115
11116 Result = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: DstVT, N1: Src, N2: Mask, N3: VL);
11117 } else {
11118 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11119 "Wrong input/output vector types");
11120
11121 // Convert f16 to f32 then convert f32 to i64.
11122 if (DstEltSize > (2 * SrcEltSize)) {
11123 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11124 MVT InterimFVT =
11125 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11126 Src =
11127 DAG.getNode(Opcode: RISCVISD::FP_EXTEND_VL, DL, VT: InterimFVT, N1: Src, N2: Mask, N3: VL);
11128 }
11129
11130 Result = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: DstVT, N1: Src, N2: Mask, N3: VL);
11131 }
11132 } else { // Narrowing + Conversion
11133 if (SrcVT.isInteger()) {
11134 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11135 // First do a narrowing convert to an FP type half the size, then round
11136 // the FP type to a small FP type if needed.
11137
11138 MVT InterimFVT = DstVT;
11139 if (SrcEltSize > (2 * DstEltSize)) {
11140 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
11141 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11142 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11143 }
11144
11145 Result = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: InterimFVT, N1: Src, N2: Mask, N3: VL);
11146
11147 if (InterimFVT != DstVT) {
11148 Src = Result;
11149 Result = DAG.getNode(Opcode: RISCVISD::FP_ROUND_VL, DL, VT: DstVT, N1: Src, N2: Mask, N3: VL);
11150 }
11151 } else {
11152 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11153 "Wrong input/output vector types");
11154 // First do a narrowing conversion to an integer half the size, then
11155 // truncate if needed.
11156
11157 if (DstEltSize == 1) {
11158 // First convert to the same size integer, then convert to mask using
11159 // setcc.
11160 assert(SrcEltSize >= 16 && "Unexpected FP type!");
11161 MVT InterimIVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SrcEltSize),
11162 EC: DstVT.getVectorElementCount());
11163 Result = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: InterimIVT, N1: Src, N2: Mask, N3: VL);
11164
11165 // Compare the integer result to 0. The integer should be 0 or 1/-1,
11166 // otherwise the conversion was undefined.
11167 MVT XLenVT = Subtarget.getXLenVT();
11168 SDValue SplatZero = DAG.getConstant(Val: 0, DL, VT: XLenVT);
11169 SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: InterimIVT,
11170 N1: DAG.getUNDEF(VT: InterimIVT), N2: SplatZero, N3: VL);
11171 Result = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: DstVT,
11172 Ops: {Result, SplatZero, DAG.getCondCode(Cond: ISD::SETNE),
11173 DAG.getUNDEF(VT: DstVT), Mask, VL});
11174 } else {
11175 MVT InterimIVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SrcEltSize / 2),
11176 EC: DstVT.getVectorElementCount());
11177
11178 Result = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: InterimIVT, N1: Src, N2: Mask, N3: VL);
11179
11180 while (InterimIVT != DstVT) {
11181 SrcEltSize /= 2;
11182 Src = Result;
11183 InterimIVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SrcEltSize / 2),
11184 EC: DstVT.getVectorElementCount());
11185 Result = DAG.getNode(Opcode: RISCVISD::TRUNCATE_VECTOR_VL, DL, VT: InterimIVT,
11186 N1: Src, N2: Mask, N3: VL);
11187 }
11188 }
11189 }
11190 }
11191
11192 MVT VT = Op.getSimpleValueType();
11193 if (!VT.isFixedLengthVector())
11194 return Result;
11195 return convertFromScalableVector(VT, V: Result, DAG, Subtarget);
11196}
11197
11198SDValue
11199RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
11200 SelectionDAG &DAG) const {
11201 SDLoc DL(Op);
11202
11203 SDValue Op1 = Op.getOperand(i: 0);
11204 SDValue Op2 = Op.getOperand(i: 1);
11205 SDValue Offset = Op.getOperand(i: 2);
11206 SDValue Mask = Op.getOperand(i: 3);
11207 SDValue EVL1 = Op.getOperand(i: 4);
11208 SDValue EVL2 = Op.getOperand(i: 5);
11209
11210 const MVT XLenVT = Subtarget.getXLenVT();
11211 MVT VT = Op.getSimpleValueType();
11212 MVT ContainerVT = VT;
11213 if (VT.isFixedLengthVector()) {
11214 ContainerVT = getContainerForFixedLengthVector(VT);
11215 Op1 = convertToScalableVector(VT: ContainerVT, V: Op1, DAG, Subtarget);
11216 Op2 = convertToScalableVector(VT: ContainerVT, V: Op2, DAG, Subtarget);
11217 MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
11218 Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
11219 }
11220
11221 // EVL1 may need to be extended to XLenVT with RV64LegalI32.
11222 EVL1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: XLenVT, Operand: EVL1);
11223
11224 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
11225 if (IsMaskVector) {
11226 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
11227
11228 // Expand input operands
11229 SDValue SplatOneOp1 = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
11230 N1: DAG.getUNDEF(VT: ContainerVT),
11231 N2: DAG.getConstant(Val: 1, DL, VT: XLenVT), N3: EVL1);
11232 SDValue SplatZeroOp1 = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
11233 N1: DAG.getUNDEF(VT: ContainerVT),
11234 N2: DAG.getConstant(Val: 0, DL, VT: XLenVT), N3: EVL1);
11235 Op1 = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: Op1, N2: SplatOneOp1,
11236 N3: SplatZeroOp1, N4: DAG.getUNDEF(VT: ContainerVT), N5: EVL1);
11237
11238 SDValue SplatOneOp2 = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
11239 N1: DAG.getUNDEF(VT: ContainerVT),
11240 N2: DAG.getConstant(Val: 1, DL, VT: XLenVT), N3: EVL2);
11241 SDValue SplatZeroOp2 = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
11242 N1: DAG.getUNDEF(VT: ContainerVT),
11243 N2: DAG.getConstant(Val: 0, DL, VT: XLenVT), N3: EVL2);
11244 Op2 = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: Op2, N2: SplatOneOp2,
11245 N3: SplatZeroOp2, N4: DAG.getUNDEF(VT: ContainerVT), N5: EVL2);
11246 }
11247
11248 int64_t ImmValue = cast<ConstantSDNode>(Val&: Offset)->getSExtValue();
11249 SDValue DownOffset, UpOffset;
11250 if (ImmValue >= 0) {
11251 // The operand is a TargetConstant, we need to rebuild it as a regular
11252 // constant.
11253 DownOffset = DAG.getConstant(Val: ImmValue, DL, VT: XLenVT);
11254 UpOffset = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: EVL1, N2: DownOffset);
11255 } else {
11256 // The operand is a TargetConstant, we need to rebuild it as a regular
11257 // constant rather than negating the original operand.
11258 UpOffset = DAG.getConstant(Val: -ImmValue, DL, VT: XLenVT);
11259 DownOffset = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: EVL1, N2: UpOffset);
11260 }
11261
11262 SDValue SlideDown =
11263 getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, Merge: DAG.getUNDEF(VT: ContainerVT),
11264 Op: Op1, Offset: DownOffset, Mask, VL: UpOffset);
11265 SDValue Result = getVSlideup(DAG, Subtarget, DL, VT: ContainerVT, Merge: SlideDown, Op: Op2,
11266 Offset: UpOffset, Mask, VL: EVL2, Policy: RISCVII::TAIL_AGNOSTIC);
11267
11268 if (IsMaskVector) {
11269 // Truncate Result back to a mask vector (Result has same EVL as Op2)
11270 Result = DAG.getNode(
11271 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
11272 {Result, DAG.getConstant(0, DL, ContainerVT),
11273 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
11274 Mask, EVL2});
11275 }
11276
11277 if (!VT.isFixedLengthVector())
11278 return Result;
11279 return convertFromScalableVector(VT, V: Result, DAG, Subtarget);
11280}
11281
11282SDValue
11283RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
11284 SelectionDAG &DAG) const {
11285 SDLoc DL(Op);
11286 MVT VT = Op.getSimpleValueType();
11287 MVT XLenVT = Subtarget.getXLenVT();
11288
11289 SDValue Op1 = Op.getOperand(i: 0);
11290 SDValue Mask = Op.getOperand(i: 1);
11291 SDValue EVL = Op.getOperand(i: 2);
11292
11293 MVT ContainerVT = VT;
11294 if (VT.isFixedLengthVector()) {
11295 ContainerVT = getContainerForFixedLengthVector(VT);
11296 Op1 = convertToScalableVector(VT: ContainerVT, V: Op1, DAG, Subtarget);
11297 MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
11298 Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
11299 }
11300
11301 MVT GatherVT = ContainerVT;
11302 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
11303 // Check if we are working with mask vectors
11304 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
11305 if (IsMaskVector) {
11306 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
11307
11308 // Expand input operand
11309 SDValue SplatOne = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IndicesVT,
11310 N1: DAG.getUNDEF(VT: IndicesVT),
11311 N2: DAG.getConstant(Val: 1, DL, VT: XLenVT), N3: EVL);
11312 SDValue SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IndicesVT,
11313 N1: DAG.getUNDEF(VT: IndicesVT),
11314 N2: DAG.getConstant(Val: 0, DL, VT: XLenVT), N3: EVL);
11315 Op1 = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: IndicesVT, N1: Op1, N2: SplatOne,
11316 N3: SplatZero, N4: DAG.getUNDEF(VT: IndicesVT), N5: EVL);
11317 }
11318
11319 unsigned EltSize = GatherVT.getScalarSizeInBits();
11320 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
11321 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11322 unsigned MaxVLMAX =
11323 RISCVTargetLowering::computeVLMAX(VectorBits: VectorBitsMax, EltSize, MinSize);
11324
11325 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11326 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
11327 // to use vrgatherei16.vv.
11328 // TODO: It's also possible to use vrgatherei16.vv for other types to
11329 // decrease register width for the index calculation.
11330 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11331 if (MaxVLMAX > 256 && EltSize == 8) {
11332 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
11333 // Split the vector in half and reverse each half using a full register
11334 // reverse.
11335 // Swap the halves and concatenate them.
11336 // Slide the concatenated result by (VLMax - VL).
11337 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11338 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: GatherVT);
11339 auto [Lo, Hi] = DAG.SplitVector(N: Op1, DL);
11340
11341 SDValue LoRev = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: LoVT, Operand: Lo);
11342 SDValue HiRev = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: HiVT, Operand: Hi);
11343
11344 // Reassemble the low and high pieces reversed.
11345 // NOTE: this Result is unmasked (because we do not need masks for
11346 // shuffles). If in the future this has to change, we can use a SELECT_VL
11347 // between Result and UNDEF using the mask originally passed to VP_REVERSE
11348 SDValue Result =
11349 DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: GatherVT, N1: HiRev, N2: LoRev);
11350
11351 // Slide off any elements from past EVL that were reversed into the low
11352 // elements.
11353 unsigned MinElts = GatherVT.getVectorMinNumElements();
11354 SDValue VLMax =
11355 DAG.getVScale(DL, VT: XLenVT, MulImm: APInt(XLenVT.getSizeInBits(), MinElts));
11356 SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: VLMax, N2: EVL);
11357
11358 Result = getVSlidedown(DAG, Subtarget, DL, VT: GatherVT,
11359 Merge: DAG.getUNDEF(VT: GatherVT), Op: Result, Offset: Diff, Mask, VL: EVL);
11360
11361 if (IsMaskVector) {
11362 // Truncate Result back to a mask vector
11363 Result =
11364 DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: ContainerVT,
11365 Ops: {Result, DAG.getConstant(Val: 0, DL, VT: GatherVT),
11366 DAG.getCondCode(Cond: ISD::SETNE),
11367 DAG.getUNDEF(VT: getMaskTypeFor(VecVT: ContainerVT)), Mask, EVL});
11368 }
11369
11370 if (!VT.isFixedLengthVector())
11371 return Result;
11372 return convertFromScalableVector(VT, V: Result, DAG, Subtarget);
11373 }
11374
11375 // Just promote the int type to i16 which will double the LMUL.
11376 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
11377 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11378 }
11379
11380 SDValue VID = DAG.getNode(Opcode: RISCVISD::VID_VL, DL, VT: IndicesVT, N1: Mask, N2: EVL);
11381 SDValue VecLen =
11382 DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: EVL, N2: DAG.getConstant(Val: 1, DL, VT: XLenVT));
11383 SDValue VecLenSplat = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IndicesVT,
11384 N1: DAG.getUNDEF(VT: IndicesVT), N2: VecLen, N3: EVL);
11385 SDValue VRSUB = DAG.getNode(Opcode: RISCVISD::SUB_VL, DL, VT: IndicesVT, N1: VecLenSplat, N2: VID,
11386 N3: DAG.getUNDEF(VT: IndicesVT), N4: Mask, N5: EVL);
11387 SDValue Result = DAG.getNode(Opcode: GatherOpc, DL, VT: GatherVT, N1: Op1, N2: VRSUB,
11388 N3: DAG.getUNDEF(VT: GatherVT), N4: Mask, N5: EVL);
11389
11390 if (IsMaskVector) {
11391 // Truncate Result back to a mask vector
11392 Result = DAG.getNode(
11393 Opcode: RISCVISD::SETCC_VL, DL, VT: ContainerVT,
11394 Ops: {Result, DAG.getConstant(Val: 0, DL, VT: GatherVT), DAG.getCondCode(Cond: ISD::SETNE),
11395 DAG.getUNDEF(VT: getMaskTypeFor(VecVT: ContainerVT)), Mask, EVL});
11396 }
11397
11398 if (!VT.isFixedLengthVector())
11399 return Result;
11400 return convertFromScalableVector(VT, V: Result, DAG, Subtarget);
11401}
11402
11403SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
11404 SelectionDAG &DAG) const {
11405 MVT VT = Op.getSimpleValueType();
11406 if (VT.getVectorElementType() != MVT::i1)
11407 return lowerVPOp(Op, DAG);
11408
11409 // It is safe to drop mask parameter as masked-off elements are undef.
11410 SDValue Op1 = Op->getOperand(Num: 0);
11411 SDValue Op2 = Op->getOperand(Num: 1);
11412 SDValue VL = Op->getOperand(Num: 3);
11413
11414 MVT ContainerVT = VT;
11415 const bool IsFixed = VT.isFixedLengthVector();
11416 if (IsFixed) {
11417 ContainerVT = getContainerForFixedLengthVector(VT);
11418 Op1 = convertToScalableVector(VT: ContainerVT, V: Op1, DAG, Subtarget);
11419 Op2 = convertToScalableVector(VT: ContainerVT, V: Op2, DAG, Subtarget);
11420 }
11421
11422 SDLoc DL(Op);
11423 SDValue Val = DAG.getNode(Opcode: getRISCVVLOp(Op), DL, VT: ContainerVT, N1: Op1, N2: Op2, N3: VL);
11424 if (!IsFixed)
11425 return Val;
11426 return convertFromScalableVector(VT, V: Val, DAG, Subtarget);
11427}
11428
11429SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
11430 SelectionDAG &DAG) const {
11431 SDLoc DL(Op);
11432 MVT XLenVT = Subtarget.getXLenVT();
11433 MVT VT = Op.getSimpleValueType();
11434 MVT ContainerVT = VT;
11435 if (VT.isFixedLengthVector())
11436 ContainerVT = getContainerForFixedLengthVector(VT);
11437
11438 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11439
11440 auto *VPNode = cast<VPStridedLoadSDNode>(Val&: Op);
11441 // Check if the mask is known to be all ones
11442 SDValue Mask = VPNode->getMask();
11443 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode());
11444
11445 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
11446 : Intrinsic::riscv_vlse_mask,
11447 DL, XLenVT);
11448 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
11449 DAG.getUNDEF(VT: ContainerVT), VPNode->getBasePtr(),
11450 VPNode->getStride()};
11451 if (!IsUnmasked) {
11452 if (VT.isFixedLengthVector()) {
11453 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11454 Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
11455 }
11456 Ops.push_back(Elt: Mask);
11457 }
11458 Ops.push_back(Elt: VPNode->getVectorLength());
11459 if (!IsUnmasked) {
11460 SDValue Policy = DAG.getTargetConstant(Val: RISCVII::TAIL_AGNOSTIC, DL, VT: XLenVT);
11461 Ops.push_back(Elt: Policy);
11462 }
11463
11464 SDValue Result =
11465 DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops,
11466 MemVT: VPNode->getMemoryVT(), MMO: VPNode->getMemOperand());
11467 SDValue Chain = Result.getValue(R: 1);
11468
11469 if (VT.isFixedLengthVector())
11470 Result = convertFromScalableVector(VT, V: Result, DAG, Subtarget);
11471
11472 return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
11473}
11474
11475SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
11476 SelectionDAG &DAG) const {
11477 SDLoc DL(Op);
11478 MVT XLenVT = Subtarget.getXLenVT();
11479
11480 auto *VPNode = cast<VPStridedStoreSDNode>(Val&: Op);
11481 SDValue StoreVal = VPNode->getValue();
11482 MVT VT = StoreVal.getSimpleValueType();
11483 MVT ContainerVT = VT;
11484 if (VT.isFixedLengthVector()) {
11485 ContainerVT = getContainerForFixedLengthVector(VT);
11486 StoreVal = convertToScalableVector(VT: ContainerVT, V: StoreVal, DAG, Subtarget);
11487 }
11488
11489 // Check if the mask is known to be all ones
11490 SDValue Mask = VPNode->getMask();
11491 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode());
11492
11493 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
11494 : Intrinsic::riscv_vsse_mask,
11495 DL, XLenVT);
11496 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
11497 VPNode->getBasePtr(), VPNode->getStride()};
11498 if (!IsUnmasked) {
11499 if (VT.isFixedLengthVector()) {
11500 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11501 Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
11502 }
11503 Ops.push_back(Elt: Mask);
11504 }
11505 Ops.push_back(Elt: VPNode->getVectorLength());
11506
11507 return DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl: DL, VTList: VPNode->getVTList(),
11508 Ops, MemVT: VPNode->getMemoryVT(),
11509 MMO: VPNode->getMemOperand());
11510}
11511
11512// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
11513// matched to a RVV indexed load. The RVV indexed load instructions only
11514// support the "unsigned unscaled" addressing mode; indices are implicitly
11515// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11516// signed or scaled indexing is extended to the XLEN value type and scaled
11517// accordingly.
11518SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
11519 SelectionDAG &DAG) const {
11520 SDLoc DL(Op);
11521 MVT VT = Op.getSimpleValueType();
11522
11523 const auto *MemSD = cast<MemSDNode>(Val: Op.getNode());
11524 EVT MemVT = MemSD->getMemoryVT();
11525 MachineMemOperand *MMO = MemSD->getMemOperand();
11526 SDValue Chain = MemSD->getChain();
11527 SDValue BasePtr = MemSD->getBasePtr();
11528
11529 [[maybe_unused]] ISD::LoadExtType LoadExtType;
11530 SDValue Index, Mask, PassThru, VL;
11531
11532 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Val: Op.getNode())) {
11533 Index = VPGN->getIndex();
11534 Mask = VPGN->getMask();
11535 PassThru = DAG.getUNDEF(VT);
11536 VL = VPGN->getVectorLength();
11537 // VP doesn't support extending loads.
11538 LoadExtType = ISD::NON_EXTLOAD;
11539 } else {
11540 // Else it must be a MGATHER.
11541 auto *MGN = cast<MaskedGatherSDNode>(Val: Op.getNode());
11542 Index = MGN->getIndex();
11543 Mask = MGN->getMask();
11544 PassThru = MGN->getPassThru();
11545 LoadExtType = MGN->getExtensionType();
11546 }
11547
11548 MVT IndexVT = Index.getSimpleValueType();
11549 MVT XLenVT = Subtarget.getXLenVT();
11550
11551 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
11552 "Unexpected VTs!");
11553 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11554 // Targets have to explicitly opt-in for extending vector loads.
11555 assert(LoadExtType == ISD::NON_EXTLOAD &&
11556 "Unexpected extending MGATHER/VP_GATHER");
11557
11558 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11559 // the selection of the masked intrinsics doesn't do this for us.
11560 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode());
11561
11562 MVT ContainerVT = VT;
11563 if (VT.isFixedLengthVector()) {
11564 ContainerVT = getContainerForFixedLengthVector(VT);
11565 IndexVT = MVT::getVectorVT(VT: IndexVT.getVectorElementType(),
11566 EC: ContainerVT.getVectorElementCount());
11567
11568 Index = convertToScalableVector(VT: IndexVT, V: Index, DAG, Subtarget);
11569
11570 if (!IsUnmasked) {
11571 MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
11572 Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
11573 PassThru = convertToScalableVector(VT: ContainerVT, V: PassThru, DAG, Subtarget);
11574 }
11575 }
11576
11577 if (!VL)
11578 VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second;
11579
11580 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11581 IndexVT = IndexVT.changeVectorElementType(EltVT: XLenVT);
11582 Index = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: IndexVT, Operand: Index);
11583 }
11584
11585 unsigned IntID =
11586 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
11587 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(Val: IntID, DL, VT: XLenVT)};
11588 if (IsUnmasked)
11589 Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT));
11590 else
11591 Ops.push_back(Elt: PassThru);
11592 Ops.push_back(Elt: BasePtr);
11593 Ops.push_back(Elt: Index);
11594 if (!IsUnmasked)
11595 Ops.push_back(Elt: Mask);
11596 Ops.push_back(Elt: VL);
11597 if (!IsUnmasked)
11598 Ops.push_back(Elt: DAG.getTargetConstant(Val: RISCVII::TAIL_AGNOSTIC, DL, VT: XLenVT));
11599
11600 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11601 SDValue Result =
11602 DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops, MemVT, MMO);
11603 Chain = Result.getValue(R: 1);
11604
11605 if (VT.isFixedLengthVector())
11606 Result = convertFromScalableVector(VT, V: Result, DAG, Subtarget);
11607
11608 return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
11609}
11610
11611// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
11612// matched to a RVV indexed store. The RVV indexed store instructions only
11613// support the "unsigned unscaled" addressing mode; indices are implicitly
11614// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11615// signed or scaled indexing is extended to the XLEN value type and scaled
11616// accordingly.
11617SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
11618 SelectionDAG &DAG) const {
11619 SDLoc DL(Op);
11620 const auto *MemSD = cast<MemSDNode>(Val: Op.getNode());
11621 EVT MemVT = MemSD->getMemoryVT();
11622 MachineMemOperand *MMO = MemSD->getMemOperand();
11623 SDValue Chain = MemSD->getChain();
11624 SDValue BasePtr = MemSD->getBasePtr();
11625
11626 [[maybe_unused]] bool IsTruncatingStore = false;
11627 SDValue Index, Mask, Val, VL;
11628
11629 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Val: Op.getNode())) {
11630 Index = VPSN->getIndex();
11631 Mask = VPSN->getMask();
11632 Val = VPSN->getValue();
11633 VL = VPSN->getVectorLength();
11634 // VP doesn't support truncating stores.
11635 IsTruncatingStore = false;
11636 } else {
11637 // Else it must be a MSCATTER.
11638 auto *MSN = cast<MaskedScatterSDNode>(Val: Op.getNode());
11639 Index = MSN->getIndex();
11640 Mask = MSN->getMask();
11641 Val = MSN->getValue();
11642 IsTruncatingStore = MSN->isTruncatingStore();
11643 }
11644
11645 MVT VT = Val.getSimpleValueType();
11646 MVT IndexVT = Index.getSimpleValueType();
11647 MVT XLenVT = Subtarget.getXLenVT();
11648
11649 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
11650 "Unexpected VTs!");
11651 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11652 // Targets have to explicitly opt-in for extending vector loads and
11653 // truncating vector stores.
11654 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
11655
11656 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11657 // the selection of the masked intrinsics doesn't do this for us.
11658 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode());
11659
11660 MVT ContainerVT = VT;
11661 if (VT.isFixedLengthVector()) {
11662 ContainerVT = getContainerForFixedLengthVector(VT);
11663 IndexVT = MVT::getVectorVT(VT: IndexVT.getVectorElementType(),
11664 EC: ContainerVT.getVectorElementCount());
11665
11666 Index = convertToScalableVector(VT: IndexVT, V: Index, DAG, Subtarget);
11667 Val = convertToScalableVector(VT: ContainerVT, V: Val, DAG, Subtarget);
11668
11669 if (!IsUnmasked) {
11670 MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
11671 Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
11672 }
11673 }
11674
11675 if (!VL)
11676 VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second;
11677
11678 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11679 IndexVT = IndexVT.changeVectorElementType(EltVT: XLenVT);
11680 Index = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: IndexVT, Operand: Index);
11681 }
11682
11683 unsigned IntID =
11684 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
11685 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(Val: IntID, DL, VT: XLenVT)};
11686 Ops.push_back(Elt: Val);
11687 Ops.push_back(Elt: BasePtr);
11688 Ops.push_back(Elt: Index);
11689 if (!IsUnmasked)
11690 Ops.push_back(Elt: Mask);
11691 Ops.push_back(Elt: VL);
11692
11693 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
11694 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11695}
11696
11697SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
11698 SelectionDAG &DAG) const {
11699 const MVT XLenVT = Subtarget.getXLenVT();
11700 SDLoc DL(Op);
11701 SDValue Chain = Op->getOperand(Num: 0);
11702 SDValue SysRegNo = DAG.getTargetConstant(
11703 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11704 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
11705 SDValue RM = DAG.getNode(Opcode: RISCVISD::READ_CSR, DL, VTList: VTs, N1: Chain, N2: SysRegNo);
11706
11707 // Encoding used for rounding mode in RISC-V differs from that used in
11708 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
11709 // table, which consists of a sequence of 4-bit fields, each representing
11710 // corresponding FLT_ROUNDS mode.
11711 static const int Table =
11712 (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) |
11713 (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) |
11714 (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) |
11715 (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) |
11716 (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM);
11717
11718 SDValue Shift =
11719 DAG.getNode(Opcode: ISD::SHL, DL, VT: XLenVT, N1: RM, N2: DAG.getConstant(Val: 2, DL, VT: XLenVT));
11720 SDValue Shifted = DAG.getNode(Opcode: ISD::SRL, DL, VT: XLenVT,
11721 N1: DAG.getConstant(Val: Table, DL, VT: XLenVT), N2: Shift);
11722 SDValue Masked = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: Shifted,
11723 N2: DAG.getConstant(Val: 7, DL, VT: XLenVT));
11724
11725 return DAG.getMergeValues(Ops: {Masked, Chain}, dl: DL);
11726}
11727
11728SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
11729 SelectionDAG &DAG) const {
11730 const MVT XLenVT = Subtarget.getXLenVT();
11731 SDLoc DL(Op);
11732 SDValue Chain = Op->getOperand(Num: 0);
11733 SDValue RMValue = Op->getOperand(Num: 1);
11734 SDValue SysRegNo = DAG.getTargetConstant(
11735 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11736
11737 // Encoding used for rounding mode in RISC-V differs from that used in
11738 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
11739 // a table, which consists of a sequence of 4-bit fields, each representing
11740 // corresponding RISC-V mode.
11741 static const unsigned Table =
11742 (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) |
11743 (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) |
11744 (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) |
11745 (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) |
11746 (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway));
11747
11748 RMValue = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: XLenVT, Operand: RMValue);
11749
11750 SDValue Shift = DAG.getNode(Opcode: ISD::SHL, DL, VT: XLenVT, N1: RMValue,
11751 N2: DAG.getConstant(Val: 2, DL, VT: XLenVT));
11752 SDValue Shifted = DAG.getNode(Opcode: ISD::SRL, DL, VT: XLenVT,
11753 N1: DAG.getConstant(Val: Table, DL, VT: XLenVT), N2: Shift);
11754 RMValue = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: Shifted,
11755 N2: DAG.getConstant(Val: 0x7, DL, VT: XLenVT));
11756 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
11757 RMValue);
11758}
11759
11760SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
11761 SelectionDAG &DAG) const {
11762 MachineFunction &MF = DAG.getMachineFunction();
11763
11764 bool isRISCV64 = Subtarget.is64Bit();
11765 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
11766
11767 int FI = MF.getFrameInfo().CreateFixedObject(Size: isRISCV64 ? 8 : 4, SPOffset: 0, IsImmutable: false);
11768 return DAG.getFrameIndex(FI, VT: PtrVT);
11769}
11770
11771// Returns the opcode of the target-specific SDNode that implements the 32-bit
11772// form of the given Opcode.
11773static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
11774 switch (Opcode) {
11775 default:
11776 llvm_unreachable("Unexpected opcode");
11777 case ISD::SHL:
11778 return RISCVISD::SLLW;
11779 case ISD::SRA:
11780 return RISCVISD::SRAW;
11781 case ISD::SRL:
11782 return RISCVISD::SRLW;
11783 case ISD::SDIV:
11784 return RISCVISD::DIVW;
11785 case ISD::UDIV:
11786 return RISCVISD::DIVUW;
11787 case ISD::UREM:
11788 return RISCVISD::REMUW;
11789 case ISD::ROTL:
11790 return RISCVISD::ROLW;
11791 case ISD::ROTR:
11792 return RISCVISD::RORW;
11793 }
11794}
11795
11796// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
11797// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
11798// otherwise be promoted to i64, making it difficult to select the
11799// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
11800// type i8/i16/i32 is lost.
11801static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
11802 unsigned ExtOpc = ISD::ANY_EXTEND) {
11803 SDLoc DL(N);
11804 RISCVISD::NodeType WOpcode = getRISCVWOpcode(Opcode: N->getOpcode());
11805 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
11806 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
11807 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
11808 // ReplaceNodeResults requires we maintain the same type for the return value.
11809 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: NewRes);
11810}
11811
11812// Converts the given 32-bit operation to a i64 operation with signed extension
11813// semantic to reduce the signed extension instructions.
11814static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
11815 SDLoc DL(N);
11816 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11817 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11818 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
11819 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
11820 DAG.getValueType(MVT::i32));
11821 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
11822}
11823
11824void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
11825 SmallVectorImpl<SDValue> &Results,
11826 SelectionDAG &DAG) const {
11827 SDLoc DL(N);
11828 switch (N->getOpcode()) {
11829 default:
11830 llvm_unreachable("Don't know how to custom type legalize this operation!");
11831 case ISD::STRICT_FP_TO_SINT:
11832 case ISD::STRICT_FP_TO_UINT:
11833 case ISD::FP_TO_SINT:
11834 case ISD::FP_TO_UINT: {
11835 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11836 "Unexpected custom legalisation");
11837 bool IsStrict = N->isStrictFPOpcode();
11838 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
11839 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
11840 SDValue Op0 = IsStrict ? N->getOperand(Num: 1) : N->getOperand(Num: 0);
11841 if (getTypeAction(Context&: *DAG.getContext(), VT: Op0.getValueType()) !=
11842 TargetLowering::TypeSoftenFloat) {
11843 if (!isTypeLegal(VT: Op0.getValueType()))
11844 return;
11845 if (IsStrict) {
11846 SDValue Chain = N->getOperand(Num: 0);
11847 // In absense of Zfh, promote f16 to f32, then convert.
11848 if (Op0.getValueType() == MVT::f16 &&
11849 !Subtarget.hasStdExtZfhOrZhinx()) {
11850 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
11851 {Chain, Op0});
11852 Chain = Op0.getValue(R: 1);
11853 }
11854 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
11855 : RISCVISD::STRICT_FCVT_WU_RV64;
11856 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
11857 SDValue Res = DAG.getNode(
11858 Opc, DL, VTs, Chain, Op0,
11859 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11860 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11861 Results.push_back(Elt: Res.getValue(R: 1));
11862 return;
11863 }
11864 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
11865 // convert.
11866 if ((Op0.getValueType() == MVT::f16 &&
11867 !Subtarget.hasStdExtZfhOrZhinx()) ||
11868 Op0.getValueType() == MVT::bf16)
11869 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
11870
11871 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
11872 SDValue Res =
11873 DAG.getNode(Opc, DL, MVT::i64, Op0,
11874 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11875 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11876 return;
11877 }
11878 // If the FP type needs to be softened, emit a library call using the 'si'
11879 // version. If we left it to default legalization we'd end up with 'di'. If
11880 // the FP type doesn't need to be softened just let generic type
11881 // legalization promote the result type.
11882 RTLIB::Libcall LC;
11883 if (IsSigned)
11884 LC = RTLIB::getFPTOSINT(OpVT: Op0.getValueType(), RetVT: N->getValueType(ResNo: 0));
11885 else
11886 LC = RTLIB::getFPTOUINT(OpVT: Op0.getValueType(), RetVT: N->getValueType(ResNo: 0));
11887 MakeLibCallOptions CallOptions;
11888 EVT OpVT = Op0.getValueType();
11889 CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: N->getValueType(ResNo: 0), Value: true);
11890 SDValue Chain = IsStrict ? N->getOperand(Num: 0) : SDValue();
11891 SDValue Result;
11892 std::tie(args&: Result, args&: Chain) =
11893 makeLibCall(DAG, LC, RetVT: N->getValueType(ResNo: 0), Ops: Op0, CallOptions, dl: DL, Chain);
11894 Results.push_back(Elt: Result);
11895 if (IsStrict)
11896 Results.push_back(Elt: Chain);
11897 break;
11898 }
11899 case ISD::LROUND: {
11900 SDValue Op0 = N->getOperand(Num: 0);
11901 EVT Op0VT = Op0.getValueType();
11902 if (getTypeAction(Context&: *DAG.getContext(), VT: Op0.getValueType()) !=
11903 TargetLowering::TypeSoftenFloat) {
11904 if (!isTypeLegal(VT: Op0VT))
11905 return;
11906
11907 // In absense of Zfh, promote f16 to f32, then convert.
11908 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
11909 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
11910
11911 SDValue Res =
11912 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
11913 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
11914 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11915 return;
11916 }
11917 // If the FP type needs to be softened, emit a library call to lround. We'll
11918 // need to truncate the result. We assume any value that doesn't fit in i32
11919 // is allowed to return an unspecified value.
11920 RTLIB::Libcall LC =
11921 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
11922 MakeLibCallOptions CallOptions;
11923 EVT OpVT = Op0.getValueType();
11924 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
11925 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
11926 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
11927 Results.push_back(Elt: Result);
11928 break;
11929 }
11930 case ISD::READCYCLECOUNTER:
11931 case ISD::READSTEADYCOUNTER: {
11932 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
11933 "has custom type legalization on riscv32");
11934
11935 SDValue LoCounter, HiCounter;
11936 MVT XLenVT = Subtarget.getXLenVT();
11937 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
11938 LoCounter = DAG.getTargetConstant(
11939 RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding, DL, XLenVT);
11940 HiCounter = DAG.getTargetConstant(
11941 RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding, DL, XLenVT);
11942 } else {
11943 LoCounter = DAG.getTargetConstant(
11944 RISCVSysReg::lookupSysRegByName("TIME")->Encoding, DL, XLenVT);
11945 HiCounter = DAG.getTargetConstant(
11946 RISCVSysReg::lookupSysRegByName("TIMEH")->Encoding, DL, XLenVT);
11947 }
11948 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11949 SDValue RCW = DAG.getNode(Opcode: RISCVISD::READ_COUNTER_WIDE, DL, VTList: VTs,
11950 N1: N->getOperand(Num: 0), N2: LoCounter, N3: HiCounter);
11951
11952 Results.push_back(
11953 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
11954 Results.push_back(Elt: RCW.getValue(R: 2));
11955 break;
11956 }
11957 case ISD::LOAD: {
11958 if (!ISD::isNON_EXTLoad(N))
11959 return;
11960
11961 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
11962 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
11963 LoadSDNode *Ld = cast<LoadSDNode>(Val: N);
11964
11965 SDLoc dl(N);
11966 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
11967 Ld->getBasePtr(), Ld->getMemoryVT(),
11968 Ld->getMemOperand());
11969 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
11970 Results.push_back(Elt: Res.getValue(R: 1));
11971 return;
11972 }
11973 case ISD::MUL: {
11974 unsigned Size = N->getSimpleValueType(ResNo: 0).getSizeInBits();
11975 unsigned XLen = Subtarget.getXLen();
11976 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
11977 if (Size > XLen) {
11978 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
11979 SDValue LHS = N->getOperand(Num: 0);
11980 SDValue RHS = N->getOperand(Num: 1);
11981 APInt HighMask = APInt::getHighBitsSet(numBits: Size, hiBitsSet: XLen);
11982
11983 bool LHSIsU = DAG.MaskedValueIsZero(Op: LHS, Mask: HighMask);
11984 bool RHSIsU = DAG.MaskedValueIsZero(Op: RHS, Mask: HighMask);
11985 // We need exactly one side to be unsigned.
11986 if (LHSIsU == RHSIsU)
11987 return;
11988
11989 auto MakeMULPair = [&](SDValue S, SDValue U) {
11990 MVT XLenVT = Subtarget.getXLenVT();
11991 S = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: XLenVT, Operand: S);
11992 U = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: XLenVT, Operand: U);
11993 SDValue Lo = DAG.getNode(Opcode: ISD::MUL, DL, VT: XLenVT, N1: S, N2: U);
11994 SDValue Hi = DAG.getNode(Opcode: RISCVISD::MULHSU, DL, VT: XLenVT, N1: S, N2: U);
11995 return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: N->getValueType(ResNo: 0), N1: Lo, N2: Hi);
11996 };
11997
11998 bool LHSIsS = DAG.ComputeNumSignBits(Op: LHS) > XLen;
11999 bool RHSIsS = DAG.ComputeNumSignBits(Op: RHS) > XLen;
12000
12001 // The other operand should be signed, but still prefer MULH when
12002 // possible.
12003 if (RHSIsU && LHSIsS && !RHSIsS)
12004 Results.push_back(Elt: MakeMULPair(LHS, RHS));
12005 else if (LHSIsU && RHSIsS && !LHSIsS)
12006 Results.push_back(Elt: MakeMULPair(RHS, LHS));
12007
12008 return;
12009 }
12010 [[fallthrough]];
12011 }
12012 case ISD::ADD:
12013 case ISD::SUB:
12014 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12015 "Unexpected custom legalisation");
12016 Results.push_back(Elt: customLegalizeToWOpWithSExt(N, DAG));
12017 break;
12018 case ISD::SHL:
12019 case ISD::SRA:
12020 case ISD::SRL:
12021 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12022 "Unexpected custom legalisation");
12023 if (N->getOperand(Num: 1).getOpcode() != ISD::Constant) {
12024 // If we can use a BSET instruction, allow default promotion to apply.
12025 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
12026 isOneConstant(V: N->getOperand(Num: 0)))
12027 break;
12028 Results.push_back(Elt: customLegalizeToWOp(N, DAG));
12029 break;
12030 }
12031
12032 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
12033 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
12034 // shift amount.
12035 if (N->getOpcode() == ISD::SHL) {
12036 SDLoc DL(N);
12037 SDValue NewOp0 =
12038 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12039 SDValue NewOp1 =
12040 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
12041 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
12042 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12043 DAG.getValueType(MVT::i32));
12044 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12045 }
12046
12047 break;
12048 case ISD::ROTL:
12049 case ISD::ROTR:
12050 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12051 "Unexpected custom legalisation");
12052 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
12053 Subtarget.hasVendorXTHeadBb()) &&
12054 "Unexpected custom legalization");
12055 if (!isa<ConstantSDNode>(Val: N->getOperand(Num: 1)) &&
12056 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
12057 return;
12058 Results.push_back(Elt: customLegalizeToWOp(N, DAG));
12059 break;
12060 case ISD::CTTZ:
12061 case ISD::CTTZ_ZERO_UNDEF:
12062 case ISD::CTLZ:
12063 case ISD::CTLZ_ZERO_UNDEF: {
12064 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12065 "Unexpected custom legalisation");
12066
12067 SDValue NewOp0 =
12068 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12069 bool IsCTZ =
12070 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
12071 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
12072 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
12073 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12074 return;
12075 }
12076 case ISD::SDIV:
12077 case ISD::UDIV:
12078 case ISD::UREM: {
12079 MVT VT = N->getSimpleValueType(ResNo: 0);
12080 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
12081 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
12082 "Unexpected custom legalisation");
12083 // Don't promote division/remainder by constant since we should expand those
12084 // to multiply by magic constant.
12085 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
12086 if (N->getOperand(Num: 1).getOpcode() == ISD::Constant &&
12087 !isIntDivCheap(VT: N->getValueType(ResNo: 0), Attr))
12088 return;
12089
12090 // If the input is i32, use ANY_EXTEND since the W instructions don't read
12091 // the upper 32 bits. For other types we need to sign or zero extend
12092 // based on the opcode.
12093 unsigned ExtOpc = ISD::ANY_EXTEND;
12094 if (VT != MVT::i32)
12095 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
12096 : ISD::ZERO_EXTEND;
12097
12098 Results.push_back(Elt: customLegalizeToWOp(N, DAG, ExtOpc));
12099 break;
12100 }
12101 case ISD::SADDO: {
12102 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12103 "Unexpected custom legalisation");
12104
12105 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
12106 // use the default legalization.
12107 if (!isa<ConstantSDNode>(Val: N->getOperand(Num: 1)))
12108 return;
12109
12110 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12111 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12112 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
12113 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12114 DAG.getValueType(MVT::i32));
12115
12116 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
12117
12118 // For an addition, the result should be less than one of the operands (LHS)
12119 // if and only if the other operand (RHS) is negative, otherwise there will
12120 // be overflow.
12121 // For a subtraction, the result should be less than one of the operands
12122 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
12123 // otherwise there will be overflow.
12124 EVT OType = N->getValueType(ResNo: 1);
12125 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, VT: OType, LHS: Res, RHS: LHS, Cond: ISD::SETLT);
12126 SDValue ConditionRHS = DAG.getSetCC(DL, VT: OType, LHS: RHS, RHS: Zero, Cond: ISD::SETLT);
12127
12128 SDValue Overflow =
12129 DAG.getNode(Opcode: ISD::XOR, DL, VT: OType, N1: ConditionRHS, N2: ResultLowerThanLHS);
12130 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12131 Results.push_back(Elt: Overflow);
12132 return;
12133 }
12134 case ISD::UADDO:
12135 case ISD::USUBO: {
12136 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12137 "Unexpected custom legalisation");
12138 bool IsAdd = N->getOpcode() == ISD::UADDO;
12139 // Create an ADDW or SUBW.
12140 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12141 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12142 SDValue Res =
12143 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
12144 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12145 DAG.getValueType(MVT::i32));
12146
12147 SDValue Overflow;
12148 if (IsAdd && isOneConstant(V: RHS)) {
12149 // Special case uaddo X, 1 overflowed if the addition result is 0.
12150 // The general case (X + C) < C is not necessarily beneficial. Although we
12151 // reduce the live range of X, we may introduce the materialization of
12152 // constant C, especially when the setcc result is used by branch. We have
12153 // no compare with constant and branch instructions.
12154 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
12155 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
12156 } else if (IsAdd && isAllOnesConstant(V: RHS)) {
12157 // Special case uaddo X, -1 overflowed if X != 0.
12158 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
12159 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
12160 } else {
12161 // Sign extend the LHS and perform an unsigned compare with the ADDW
12162 // result. Since the inputs are sign extended from i32, this is equivalent
12163 // to comparing the lower 32 bits.
12164 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12165 Overflow = DAG.getSetCC(DL, VT: N->getValueType(ResNo: 1), LHS: Res, RHS: LHS,
12166 Cond: IsAdd ? ISD::SETULT : ISD::SETUGT);
12167 }
12168
12169 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12170 Results.push_back(Elt: Overflow);
12171 return;
12172 }
12173 case ISD::UADDSAT:
12174 case ISD::USUBSAT: {
12175 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12176 "Unexpected custom legalisation");
12177 if (Subtarget.hasStdExtZbb()) {
12178 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
12179 // sign extend allows overflow of the lower 32 bits to be detected on
12180 // the promoted size.
12181 SDValue LHS =
12182 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12183 SDValue RHS =
12184 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12185 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
12186 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12187 return;
12188 }
12189
12190 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
12191 // promotion for UADDO/USUBO.
12192 Results.push_back(Elt: expandAddSubSat(Node: N, DAG));
12193 return;
12194 }
12195 case ISD::SADDSAT:
12196 case ISD::SSUBSAT: {
12197 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12198 "Unexpected custom legalisation");
12199 Results.push_back(Elt: expandAddSubSat(Node: N, DAG));
12200 return;
12201 }
12202 case ISD::ABS: {
12203 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12204 "Unexpected custom legalisation");
12205
12206 if (Subtarget.hasStdExtZbb()) {
12207 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
12208 // This allows us to remember that the result is sign extended. Expanding
12209 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
12210 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
12211 N->getOperand(0));
12212 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
12213 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
12214 return;
12215 }
12216
12217 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
12218 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12219
12220 // Freeze the source so we can increase it's use count.
12221 Src = DAG.getFreeze(V: Src);
12222
12223 // Copy sign bit to all bits using the sraiw pattern.
12224 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
12225 DAG.getValueType(MVT::i32));
12226 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
12227 DAG.getConstant(31, DL, MVT::i64));
12228
12229 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
12230 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
12231
12232 // NOTE: The result is only required to be anyextended, but sext is
12233 // consistent with type legalization of sub.
12234 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
12235 DAG.getValueType(MVT::i32));
12236 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12237 return;
12238 }
12239 case ISD::BITCAST: {
12240 EVT VT = N->getValueType(ResNo: 0);
12241 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
12242 SDValue Op0 = N->getOperand(Num: 0);
12243 EVT Op0VT = Op0.getValueType();
12244 MVT XLenVT = Subtarget.getXLenVT();
12245 if (VT == MVT::i16 && Op0VT == MVT::f16 &&
12246 Subtarget.hasStdExtZfhminOrZhinxmin()) {
12247 SDValue FPConv = DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTH, DL, VT: XLenVT, Operand: Op0);
12248 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12249 } else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&
12250 Subtarget.hasStdExtZfbfmin()) {
12251 SDValue FPConv = DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTH, DL, VT: XLenVT, Operand: Op0);
12252 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12253 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
12254 Subtarget.hasStdExtFOrZfinx()) {
12255 SDValue FPConv =
12256 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
12257 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
12258 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32) {
12259 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
12260 DAG.getVTList(MVT::i32, MVT::i32), Op0);
12261 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
12262 NewReg.getValue(0), NewReg.getValue(1));
12263 Results.push_back(Elt: RetReg);
12264 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
12265 isTypeLegal(VT: Op0VT)) {
12266 // Custom-legalize bitcasts from fixed-length vector types to illegal
12267 // scalar types in order to improve codegen. Bitcast the vector to a
12268 // one-element vector type whose element type is the same as the result
12269 // type, and extract the first element.
12270 EVT BVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT, NumElements: 1);
12271 if (isTypeLegal(VT: BVT)) {
12272 SDValue BVec = DAG.getBitcast(VT: BVT, V: Op0);
12273 Results.push_back(Elt: DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT, N1: BVec,
12274 N2: DAG.getVectorIdxConstant(Val: 0, DL)));
12275 }
12276 }
12277 break;
12278 }
12279 case RISCVISD::BREV8: {
12280 MVT VT = N->getSimpleValueType(ResNo: 0);
12281 MVT XLenVT = Subtarget.getXLenVT();
12282 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
12283 "Unexpected custom legalisation");
12284 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
12285 SDValue NewOp = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: N->getOperand(Num: 0));
12286 SDValue NewRes = DAG.getNode(Opcode: N->getOpcode(), DL, VT: XLenVT, Operand: NewOp);
12287 // ReplaceNodeResults requires we maintain the same type for the return
12288 // value.
12289 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: NewRes));
12290 break;
12291 }
12292 case ISD::EXTRACT_VECTOR_ELT: {
12293 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
12294 // type is illegal (currently only vXi64 RV32).
12295 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
12296 // transferred to the destination register. We issue two of these from the
12297 // upper- and lower- halves of the SEW-bit vector element, slid down to the
12298 // first element.
12299 SDValue Vec = N->getOperand(Num: 0);
12300 SDValue Idx = N->getOperand(Num: 1);
12301
12302 // The vector type hasn't been legalized yet so we can't issue target
12303 // specific nodes if it needs legalization.
12304 // FIXME: We would manually legalize if it's important.
12305 if (!isTypeLegal(VT: Vec.getValueType()))
12306 return;
12307
12308 MVT VecVT = Vec.getSimpleValueType();
12309
12310 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
12311 VecVT.getVectorElementType() == MVT::i64 &&
12312 "Unexpected EXTRACT_VECTOR_ELT legalization");
12313
12314 // If this is a fixed vector, we need to convert it to a scalable vector.
12315 MVT ContainerVT = VecVT;
12316 if (VecVT.isFixedLengthVector()) {
12317 ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
12318 Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
12319 }
12320
12321 MVT XLenVT = Subtarget.getXLenVT();
12322
12323 // Use a VL of 1 to avoid processing more elements than we need.
12324 auto [Mask, VL] = getDefaultVLOps(NumElts: 1, ContainerVT, DL, DAG, Subtarget);
12325
12326 // Unless the index is known to be 0, we must slide the vector down to get
12327 // the desired element into index 0.
12328 if (!isNullConstant(V: Idx)) {
12329 Vec = getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT,
12330 Merge: DAG.getUNDEF(VT: ContainerVT), Op: Vec, Offset: Idx, Mask, VL);
12331 }
12332
12333 // Extract the lower XLEN bits of the correct vector element.
12334 SDValue EltLo = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: Vec);
12335
12336 // To extract the upper XLEN bits of the vector element, shift the first
12337 // element right by 32 bits and re-extract the lower XLEN bits.
12338 SDValue ThirtyTwoV = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
12339 N1: DAG.getUNDEF(VT: ContainerVT),
12340 N2: DAG.getConstant(Val: 32, DL, VT: XLenVT), N3: VL);
12341 SDValue LShr32 =
12342 DAG.getNode(Opcode: RISCVISD::SRL_VL, DL, VT: ContainerVT, N1: Vec, N2: ThirtyTwoV,
12343 N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL);
12344
12345 SDValue EltHi = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: LShr32);
12346
12347 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12348 break;
12349 }
12350 case ISD::INTRINSIC_WO_CHAIN: {
12351 unsigned IntNo = N->getConstantOperandVal(Num: 0);
12352 switch (IntNo) {
12353 default:
12354 llvm_unreachable(
12355 "Don't know how to custom type legalize this intrinsic!");
12356 case Intrinsic::experimental_get_vector_length: {
12357 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
12358 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12359 return;
12360 }
12361 case Intrinsic::experimental_cttz_elts: {
12362 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
12363 Results.push_back(
12364 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: Res));
12365 return;
12366 }
12367 case Intrinsic::riscv_orc_b:
12368 case Intrinsic::riscv_brev8:
12369 case Intrinsic::riscv_sha256sig0:
12370 case Intrinsic::riscv_sha256sig1:
12371 case Intrinsic::riscv_sha256sum0:
12372 case Intrinsic::riscv_sha256sum1:
12373 case Intrinsic::riscv_sm3p0:
12374 case Intrinsic::riscv_sm3p1: {
12375 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12376 return;
12377 unsigned Opc;
12378 switch (IntNo) {
12379 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
12380 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
12381 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
12382 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
12383 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
12384 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
12385 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
12386 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
12387 }
12388
12389 SDValue NewOp =
12390 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12391 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
12392 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12393 return;
12394 }
12395 case Intrinsic::riscv_sm4ks:
12396 case Intrinsic::riscv_sm4ed: {
12397 unsigned Opc =
12398 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
12399 SDValue NewOp0 =
12400 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12401 SDValue NewOp1 =
12402 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12403 SDValue Res =
12404 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
12405 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12406 return;
12407 }
12408 case Intrinsic::riscv_mopr: {
12409 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12410 return;
12411 SDValue NewOp =
12412 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12413 SDValue Res = DAG.getNode(
12414 RISCVISD::MOPR, DL, MVT::i64, NewOp,
12415 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
12416 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12417 return;
12418 }
12419 case Intrinsic::riscv_moprr: {
12420 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12421 return;
12422 SDValue NewOp0 =
12423 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12424 SDValue NewOp1 =
12425 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12426 SDValue Res = DAG.getNode(
12427 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
12428 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
12429 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12430 return;
12431 }
12432 case Intrinsic::riscv_clmul: {
12433 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12434 return;
12435
12436 SDValue NewOp0 =
12437 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12438 SDValue NewOp1 =
12439 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12440 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
12441 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12442 return;
12443 }
12444 case Intrinsic::riscv_clmulh:
12445 case Intrinsic::riscv_clmulr: {
12446 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12447 return;
12448
12449 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
12450 // to the full 128-bit clmul result of multiplying two xlen values.
12451 // Perform clmulr or clmulh on the shifted values. Finally, extract the
12452 // upper 32 bits.
12453 //
12454 // The alternative is to mask the inputs to 32 bits and use clmul, but
12455 // that requires two shifts to mask each input without zext.w.
12456 // FIXME: If the inputs are known zero extended or could be freely
12457 // zero extended, the mask form would be better.
12458 SDValue NewOp0 =
12459 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12460 SDValue NewOp1 =
12461 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12462 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
12463 DAG.getConstant(32, DL, MVT::i64));
12464 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
12465 DAG.getConstant(32, DL, MVT::i64));
12466 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
12467 : RISCVISD::CLMULR;
12468 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
12469 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
12470 DAG.getConstant(32, DL, MVT::i64));
12471 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12472 return;
12473 }
12474 case Intrinsic::riscv_vmv_x_s: {
12475 EVT VT = N->getValueType(ResNo: 0);
12476 MVT XLenVT = Subtarget.getXLenVT();
12477 if (VT.bitsLT(VT: XLenVT)) {
12478 // Simple case just extract using vmv.x.s and truncate.
12479 SDValue Extract = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL,
12480 VT: Subtarget.getXLenVT(), Operand: N->getOperand(Num: 1));
12481 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Extract));
12482 return;
12483 }
12484
12485 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
12486 "Unexpected custom legalization");
12487
12488 // We need to do the move in two steps.
12489 SDValue Vec = N->getOperand(Num: 1);
12490 MVT VecVT = Vec.getSimpleValueType();
12491
12492 // First extract the lower XLEN bits of the element.
12493 SDValue EltLo = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: Vec);
12494
12495 // To extract the upper XLEN bits of the vector element, shift the first
12496 // element right by 32 bits and re-extract the lower XLEN bits.
12497 auto [Mask, VL] = getDefaultVLOps(NumElts: 1, ContainerVT: VecVT, DL, DAG, Subtarget);
12498
12499 SDValue ThirtyTwoV =
12500 DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: VecVT, N1: DAG.getUNDEF(VT: VecVT),
12501 N2: DAG.getConstant(Val: 32, DL, VT: XLenVT), N3: VL);
12502 SDValue LShr32 = DAG.getNode(Opcode: RISCVISD::SRL_VL, DL, VT: VecVT, N1: Vec, N2: ThirtyTwoV,
12503 N3: DAG.getUNDEF(VT: VecVT), N4: Mask, N5: VL);
12504 SDValue EltHi = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: LShr32);
12505
12506 Results.push_back(
12507 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12508 break;
12509 }
12510 }
12511 break;
12512 }
12513 case ISD::VECREDUCE_ADD:
12514 case ISD::VECREDUCE_AND:
12515 case ISD::VECREDUCE_OR:
12516 case ISD::VECREDUCE_XOR:
12517 case ISD::VECREDUCE_SMAX:
12518 case ISD::VECREDUCE_UMAX:
12519 case ISD::VECREDUCE_SMIN:
12520 case ISD::VECREDUCE_UMIN:
12521 if (SDValue V = lowerVECREDUCE(Op: SDValue(N, 0), DAG))
12522 Results.push_back(Elt: V);
12523 break;
12524 case ISD::VP_REDUCE_ADD:
12525 case ISD::VP_REDUCE_AND:
12526 case ISD::VP_REDUCE_OR:
12527 case ISD::VP_REDUCE_XOR:
12528 case ISD::VP_REDUCE_SMAX:
12529 case ISD::VP_REDUCE_UMAX:
12530 case ISD::VP_REDUCE_SMIN:
12531 case ISD::VP_REDUCE_UMIN:
12532 if (SDValue V = lowerVPREDUCE(Op: SDValue(N, 0), DAG))
12533 Results.push_back(Elt: V);
12534 break;
12535 case ISD::GET_ROUNDING: {
12536 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
12537 SDValue Res = DAG.getNode(Opcode: ISD::GET_ROUNDING, DL, VTList: VTs, N: N->getOperand(Num: 0));
12538 Results.push_back(Elt: Res.getValue(R: 0));
12539 Results.push_back(Elt: Res.getValue(R: 1));
12540 break;
12541 }
12542 }
12543}
12544
12545/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
12546/// which corresponds to it.
12547static unsigned getVecReduceOpcode(unsigned Opc) {
12548 switch (Opc) {
12549 default:
12550 llvm_unreachable("Unhandled binary to transfrom reduction");
12551 case ISD::ADD:
12552 return ISD::VECREDUCE_ADD;
12553 case ISD::UMAX:
12554 return ISD::VECREDUCE_UMAX;
12555 case ISD::SMAX:
12556 return ISD::VECREDUCE_SMAX;
12557 case ISD::UMIN:
12558 return ISD::VECREDUCE_UMIN;
12559 case ISD::SMIN:
12560 return ISD::VECREDUCE_SMIN;
12561 case ISD::AND:
12562 return ISD::VECREDUCE_AND;
12563 case ISD::OR:
12564 return ISD::VECREDUCE_OR;
12565 case ISD::XOR:
12566 return ISD::VECREDUCE_XOR;
12567 case ISD::FADD:
12568 // Note: This is the associative form of the generic reduction opcode.
12569 return ISD::VECREDUCE_FADD;
12570 }
12571}
12572
12573/// Perform two related transforms whose purpose is to incrementally recognize
12574/// an explode_vector followed by scalar reduction as a vector reduction node.
12575/// This exists to recover from a deficiency in SLP which can't handle
12576/// forests with multiple roots sharing common nodes. In some cases, one
12577/// of the trees will be vectorized, and the other will remain (unprofitably)
12578/// scalarized.
12579static SDValue
12580combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG,
12581 const RISCVSubtarget &Subtarget) {
12582
12583 // This transforms need to run before all integer types have been legalized
12584 // to i64 (so that the vector element type matches the add type), and while
12585 // it's safe to introduce odd sized vector types.
12586 if (DAG.NewNodesMustHaveLegalTypes)
12587 return SDValue();
12588
12589 // Without V, this transform isn't useful. We could form the (illegal)
12590 // operations and let them be scalarized again, but there's really no point.
12591 if (!Subtarget.hasVInstructions())
12592 return SDValue();
12593
12594 const SDLoc DL(N);
12595 const EVT VT = N->getValueType(ResNo: 0);
12596 const unsigned Opc = N->getOpcode();
12597
12598 // For FADD, we only handle the case with reassociation allowed. We
12599 // could handle strict reduction order, but at the moment, there's no
12600 // known reason to, and the complexity isn't worth it.
12601 // TODO: Handle fminnum and fmaxnum here
12602 if (!VT.isInteger() &&
12603 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
12604 return SDValue();
12605
12606 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
12607 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
12608 "Inconsistent mappings");
12609 SDValue LHS = N->getOperand(Num: 0);
12610 SDValue RHS = N->getOperand(Num: 1);
12611
12612 if (!LHS.hasOneUse() || !RHS.hasOneUse())
12613 return SDValue();
12614
12615 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12616 std::swap(a&: LHS, b&: RHS);
12617
12618 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
12619 !isa<ConstantSDNode>(Val: RHS.getOperand(i: 1)))
12620 return SDValue();
12621
12622 uint64_t RHSIdx = cast<ConstantSDNode>(Val: RHS.getOperand(i: 1))->getLimitedValue();
12623 SDValue SrcVec = RHS.getOperand(i: 0);
12624 EVT SrcVecVT = SrcVec.getValueType();
12625 assert(SrcVecVT.getVectorElementType() == VT);
12626 if (SrcVecVT.isScalableVector())
12627 return SDValue();
12628
12629 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
12630 return SDValue();
12631
12632 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
12633 // reduce_op (extract_subvector [2 x VT] from V). This will form the
12634 // root of our reduction tree. TODO: We could extend this to any two
12635 // adjacent aligned constant indices if desired.
12636 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12637 LHS.getOperand(i: 0) == SrcVec && isa<ConstantSDNode>(Val: LHS.getOperand(i: 1))) {
12638 uint64_t LHSIdx =
12639 cast<ConstantSDNode>(Val: LHS.getOperand(i: 1))->getLimitedValue();
12640 if (0 == std::min(a: LHSIdx, b: RHSIdx) && 1 == std::max(a: LHSIdx, b: RHSIdx)) {
12641 EVT ReduceVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT, NumElements: 2);
12642 SDValue Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ReduceVT, N1: SrcVec,
12643 N2: DAG.getVectorIdxConstant(Val: 0, DL));
12644 return DAG.getNode(Opcode: ReduceOpc, DL, VT, Operand: Vec, Flags: N->getFlags());
12645 }
12646 }
12647
12648 // Match (binop (reduce (extract_subvector V, 0),
12649 // (extract_vector_elt V, sizeof(SubVec))))
12650 // into a reduction of one more element from the original vector V.
12651 if (LHS.getOpcode() != ReduceOpc)
12652 return SDValue();
12653
12654 SDValue ReduceVec = LHS.getOperand(i: 0);
12655 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
12656 ReduceVec.hasOneUse() && ReduceVec.getOperand(i: 0) == RHS.getOperand(i: 0) &&
12657 isNullConstant(V: ReduceVec.getOperand(i: 1)) &&
12658 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
12659 // For illegal types (e.g. 3xi32), most will be combined again into a
12660 // wider (hopefully legal) type. If this is a terminal state, we are
12661 // relying on type legalization here to produce something reasonable
12662 // and this lowering quality could probably be improved. (TODO)
12663 EVT ReduceVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT, NumElements: RHSIdx + 1);
12664 SDValue Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ReduceVT, N1: SrcVec,
12665 N2: DAG.getVectorIdxConstant(Val: 0, DL));
12666 auto Flags = ReduceVec->getFlags();
12667 Flags.intersectWith(Flags: N->getFlags());
12668 return DAG.getNode(Opcode: ReduceOpc, DL, VT, Operand: Vec, Flags);
12669 }
12670
12671 return SDValue();
12672}
12673
12674
12675// Try to fold (<bop> x, (reduction.<bop> vec, start))
12676static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG,
12677 const RISCVSubtarget &Subtarget) {
12678 auto BinOpToRVVReduce = [](unsigned Opc) {
12679 switch (Opc) {
12680 default:
12681 llvm_unreachable("Unhandled binary to transfrom reduction");
12682 case ISD::ADD:
12683 return RISCVISD::VECREDUCE_ADD_VL;
12684 case ISD::UMAX:
12685 return RISCVISD::VECREDUCE_UMAX_VL;
12686 case ISD::SMAX:
12687 return RISCVISD::VECREDUCE_SMAX_VL;
12688 case ISD::UMIN:
12689 return RISCVISD::VECREDUCE_UMIN_VL;
12690 case ISD::SMIN:
12691 return RISCVISD::VECREDUCE_SMIN_VL;
12692 case ISD::AND:
12693 return RISCVISD::VECREDUCE_AND_VL;
12694 case ISD::OR:
12695 return RISCVISD::VECREDUCE_OR_VL;
12696 case ISD::XOR:
12697 return RISCVISD::VECREDUCE_XOR_VL;
12698 case ISD::FADD:
12699 return RISCVISD::VECREDUCE_FADD_VL;
12700 case ISD::FMAXNUM:
12701 return RISCVISD::VECREDUCE_FMAX_VL;
12702 case ISD::FMINNUM:
12703 return RISCVISD::VECREDUCE_FMIN_VL;
12704 }
12705 };
12706
12707 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
12708 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12709 isNullConstant(V: V.getOperand(i: 1)) &&
12710 V.getOperand(i: 0).getOpcode() == BinOpToRVVReduce(Opc);
12711 };
12712
12713 unsigned Opc = N->getOpcode();
12714 unsigned ReduceIdx;
12715 if (IsReduction(N->getOperand(Num: 0), Opc))
12716 ReduceIdx = 0;
12717 else if (IsReduction(N->getOperand(Num: 1), Opc))
12718 ReduceIdx = 1;
12719 else
12720 return SDValue();
12721
12722 // Skip if FADD disallows reassociation but the combiner needs.
12723 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
12724 return SDValue();
12725
12726 SDValue Extract = N->getOperand(Num: ReduceIdx);
12727 SDValue Reduce = Extract.getOperand(i: 0);
12728 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
12729 return SDValue();
12730
12731 SDValue ScalarV = Reduce.getOperand(i: 2);
12732 EVT ScalarVT = ScalarV.getValueType();
12733 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
12734 ScalarV.getOperand(i: 0)->isUndef() &&
12735 isNullConstant(V: ScalarV.getOperand(i: 2)))
12736 ScalarV = ScalarV.getOperand(i: 1);
12737
12738 // Make sure that ScalarV is a splat with VL=1.
12739 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
12740 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
12741 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
12742 return SDValue();
12743
12744 if (!isNonZeroAVL(AVL: ScalarV.getOperand(i: 2)))
12745 return SDValue();
12746
12747 // Check the scalar of ScalarV is neutral element
12748 // TODO: Deal with value other than neutral element.
12749 if (!isNeutralConstant(Opc: N->getOpcode(), Flags: N->getFlags(), V: ScalarV.getOperand(i: 1),
12750 OperandNo: 0))
12751 return SDValue();
12752
12753 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
12754 // FIXME: We might be able to improve this if operand 0 is undef.
12755 if (!isNonZeroAVL(AVL: Reduce.getOperand(i: 5)))
12756 return SDValue();
12757
12758 SDValue NewStart = N->getOperand(Num: 1 - ReduceIdx);
12759
12760 SDLoc DL(N);
12761 SDValue NewScalarV =
12762 lowerScalarInsert(Scalar: NewStart, VL: ScalarV.getOperand(i: 2),
12763 VT: ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
12764
12765 // If we looked through an INSERT_SUBVECTOR we need to restore it.
12766 if (ScalarVT != ScalarV.getValueType())
12767 NewScalarV =
12768 DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ScalarVT, N1: DAG.getUNDEF(VT: ScalarVT),
12769 N2: NewScalarV, N3: DAG.getVectorIdxConstant(Val: 0, DL));
12770
12771 SDValue Ops[] = {Reduce.getOperand(i: 0), Reduce.getOperand(i: 1),
12772 NewScalarV, Reduce.getOperand(i: 3),
12773 Reduce.getOperand(i: 4), Reduce.getOperand(i: 5)};
12774 SDValue NewReduce =
12775 DAG.getNode(Opcode: Reduce.getOpcode(), DL, VT: Reduce.getValueType(), Ops);
12776 return DAG.getNode(Opcode: Extract.getOpcode(), DL, VT: Extract.getValueType(), N1: NewReduce,
12777 N2: Extract.getOperand(i: 1));
12778}
12779
12780// Optimize (add (shl x, c0), (shl y, c1)) ->
12781// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
12782static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
12783 const RISCVSubtarget &Subtarget) {
12784 // Perform this optimization only in the zba extension.
12785 if (!Subtarget.hasStdExtZba())
12786 return SDValue();
12787
12788 // Skip for vector types and larger types.
12789 EVT VT = N->getValueType(ResNo: 0);
12790 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
12791 return SDValue();
12792
12793 // The two operand nodes must be SHL and have no other use.
12794 SDValue N0 = N->getOperand(Num: 0);
12795 SDValue N1 = N->getOperand(Num: 1);
12796 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
12797 !N0->hasOneUse() || !N1->hasOneUse())
12798 return SDValue();
12799
12800 // Check c0 and c1.
12801 auto *N0C = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1));
12802 auto *N1C = dyn_cast<ConstantSDNode>(Val: N1->getOperand(Num: 1));
12803 if (!N0C || !N1C)
12804 return SDValue();
12805 int64_t C0 = N0C->getSExtValue();
12806 int64_t C1 = N1C->getSExtValue();
12807 if (C0 <= 0 || C1 <= 0)
12808 return SDValue();
12809
12810 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
12811 int64_t Bits = std::min(a: C0, b: C1);
12812 int64_t Diff = std::abs(i: C0 - C1);
12813 if (Diff != 1 && Diff != 2 && Diff != 3)
12814 return SDValue();
12815
12816 // Build nodes.
12817 SDLoc DL(N);
12818 SDValue NS = (C0 < C1) ? N0->getOperand(Num: 0) : N1->getOperand(Num: 0);
12819 SDValue NL = (C0 > C1) ? N0->getOperand(Num: 0) : N1->getOperand(Num: 0);
12820 SDValue NA0 =
12821 DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: NL, N2: DAG.getConstant(Val: Diff, DL, VT));
12822 SDValue NA1 = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: NA0, N2: NS);
12823 return DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: NA1, N2: DAG.getConstant(Val: Bits, DL, VT));
12824}
12825
12826// Combine a constant select operand into its use:
12827//
12828// (and (select cond, -1, c), x)
12829// -> (select cond, x, (and x, c)) [AllOnes=1]
12830// (or (select cond, 0, c), x)
12831// -> (select cond, x, (or x, c)) [AllOnes=0]
12832// (xor (select cond, 0, c), x)
12833// -> (select cond, x, (xor x, c)) [AllOnes=0]
12834// (add (select cond, 0, c), x)
12835// -> (select cond, x, (add x, c)) [AllOnes=0]
12836// (sub x, (select cond, 0, c))
12837// -> (select cond, x, (sub x, c)) [AllOnes=0]
12838static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
12839 SelectionDAG &DAG, bool AllOnes,
12840 const RISCVSubtarget &Subtarget) {
12841 EVT VT = N->getValueType(ResNo: 0);
12842
12843 // Skip vectors.
12844 if (VT.isVector())
12845 return SDValue();
12846
12847 if (!Subtarget.hasConditionalMoveFusion()) {
12848 // (select cond, x, (and x, c)) has custom lowering with Zicond.
12849 if ((!Subtarget.hasStdExtZicond() &&
12850 !Subtarget.hasVendorXVentanaCondOps()) ||
12851 N->getOpcode() != ISD::AND)
12852 return SDValue();
12853
12854 // Maybe harmful when condition code has multiple use.
12855 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(i: 0).hasOneUse())
12856 return SDValue();
12857
12858 // Maybe harmful when VT is wider than XLen.
12859 if (VT.getSizeInBits() > Subtarget.getXLen())
12860 return SDValue();
12861 }
12862
12863 if ((Slct.getOpcode() != ISD::SELECT &&
12864 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
12865 !Slct.hasOneUse())
12866 return SDValue();
12867
12868 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
12869 return AllOnes ? isAllOnesConstant(V: N) : isNullConstant(V: N);
12870 };
12871
12872 bool SwapSelectOps;
12873 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
12874 SDValue TrueVal = Slct.getOperand(i: 1 + OpOffset);
12875 SDValue FalseVal = Slct.getOperand(i: 2 + OpOffset);
12876 SDValue NonConstantVal;
12877 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
12878 SwapSelectOps = false;
12879 NonConstantVal = FalseVal;
12880 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
12881 SwapSelectOps = true;
12882 NonConstantVal = TrueVal;
12883 } else
12884 return SDValue();
12885
12886 // Slct is now know to be the desired identity constant when CC is true.
12887 TrueVal = OtherOp;
12888 FalseVal = DAG.getNode(Opcode: N->getOpcode(), DL: SDLoc(N), VT, N1: OtherOp, N2: NonConstantVal);
12889 // Unless SwapSelectOps says the condition should be false.
12890 if (SwapSelectOps)
12891 std::swap(a&: TrueVal, b&: FalseVal);
12892
12893 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
12894 return DAG.getNode(Opcode: RISCVISD::SELECT_CC, DL: SDLoc(N), VT,
12895 Ops: {Slct.getOperand(i: 0), Slct.getOperand(i: 1),
12896 Slct.getOperand(i: 2), TrueVal, FalseVal});
12897
12898 return DAG.getNode(Opcode: ISD::SELECT, DL: SDLoc(N), VT,
12899 Ops: {Slct.getOperand(i: 0), TrueVal, FalseVal});
12900}
12901
12902// Attempt combineSelectAndUse on each operand of a commutative operator N.
12903static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG,
12904 bool AllOnes,
12905 const RISCVSubtarget &Subtarget) {
12906 SDValue N0 = N->getOperand(Num: 0);
12907 SDValue N1 = N->getOperand(Num: 1);
12908 if (SDValue Result = combineSelectAndUse(N, Slct: N0, OtherOp: N1, DAG, AllOnes, Subtarget))
12909 return Result;
12910 if (SDValue Result = combineSelectAndUse(N, Slct: N1, OtherOp: N0, DAG, AllOnes, Subtarget))
12911 return Result;
12912 return SDValue();
12913}
12914
12915// Transform (add (mul x, c0), c1) ->
12916// (add (mul (add x, c1/c0), c0), c1%c0).
12917// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
12918// that should be excluded is when c0*(c1/c0) is simm12, which will lead
12919// to an infinite loop in DAGCombine if transformed.
12920// Or transform (add (mul x, c0), c1) ->
12921// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
12922// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
12923// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
12924// lead to an infinite loop in DAGCombine if transformed.
12925// Or transform (add (mul x, c0), c1) ->
12926// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
12927// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
12928// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
12929// lead to an infinite loop in DAGCombine if transformed.
12930// Or transform (add (mul x, c0), c1) ->
12931// (mul (add x, c1/c0), c0).
12932// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
12933static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
12934 const RISCVSubtarget &Subtarget) {
12935 // Skip for vector types and larger types.
12936 EVT VT = N->getValueType(ResNo: 0);
12937 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
12938 return SDValue();
12939 // The first operand node must be a MUL and has no other use.
12940 SDValue N0 = N->getOperand(Num: 0);
12941 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
12942 return SDValue();
12943 // Check if c0 and c1 match above conditions.
12944 auto *N0C = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1));
12945 auto *N1C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1));
12946 if (!N0C || !N1C)
12947 return SDValue();
12948 // If N0C has multiple uses it's possible one of the cases in
12949 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
12950 // in an infinite loop.
12951 if (!N0C->hasOneUse())
12952 return SDValue();
12953 int64_t C0 = N0C->getSExtValue();
12954 int64_t C1 = N1C->getSExtValue();
12955 int64_t CA, CB;
12956 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(x: C1))
12957 return SDValue();
12958 // Search for proper CA (non-zero) and CB that both are simm12.
12959 if ((C1 / C0) != 0 && isInt<12>(x: C1 / C0) && isInt<12>(x: C1 % C0) &&
12960 !isInt<12>(x: C0 * (C1 / C0))) {
12961 CA = C1 / C0;
12962 CB = C1 % C0;
12963 } else if ((C1 / C0 + 1) != 0 && isInt<12>(x: C1 / C0 + 1) &&
12964 isInt<12>(x: C1 % C0 - C0) && !isInt<12>(x: C0 * (C1 / C0 + 1))) {
12965 CA = C1 / C0 + 1;
12966 CB = C1 % C0 - C0;
12967 } else if ((C1 / C0 - 1) != 0 && isInt<12>(x: C1 / C0 - 1) &&
12968 isInt<12>(x: C1 % C0 + C0) && !isInt<12>(x: C0 * (C1 / C0 - 1))) {
12969 CA = C1 / C0 - 1;
12970 CB = C1 % C0 + C0;
12971 } else
12972 return SDValue();
12973 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
12974 SDLoc DL(N);
12975 SDValue New0 = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0->getOperand(Num: 0),
12976 N2: DAG.getConstant(Val: CA, DL, VT));
12977 SDValue New1 =
12978 DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: New0, N2: DAG.getConstant(Val: C0, DL, VT));
12979 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: New1, N2: DAG.getConstant(Val: CB, DL, VT));
12980}
12981
12982// add (zext, zext) -> zext (add (zext, zext))
12983// sub (zext, zext) -> sext (sub (zext, zext))
12984// mul (zext, zext) -> zext (mul (zext, zext))
12985// sdiv (zext, zext) -> zext (sdiv (zext, zext))
12986// udiv (zext, zext) -> zext (udiv (zext, zext))
12987// srem (zext, zext) -> zext (srem (zext, zext))
12988// urem (zext, zext) -> zext (urem (zext, zext))
12989//
12990// where the sum of the extend widths match, and the the range of the bin op
12991// fits inside the width of the narrower bin op. (For profitability on rvv, we
12992// use a power of two for both inner and outer extend.)
12993static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG) {
12994
12995 EVT VT = N->getValueType(ResNo: 0);
12996 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
12997 return SDValue();
12998
12999 SDValue N0 = N->getOperand(Num: 0);
13000 SDValue N1 = N->getOperand(Num: 1);
13001 if (N0.getOpcode() != ISD::ZERO_EXTEND || N1.getOpcode() != ISD::ZERO_EXTEND)
13002 return SDValue();
13003 if (!N0.hasOneUse() || !N1.hasOneUse())
13004 return SDValue();
13005
13006 SDValue Src0 = N0.getOperand(i: 0);
13007 SDValue Src1 = N1.getOperand(i: 0);
13008 EVT SrcVT = Src0.getValueType();
13009 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT: SrcVT) ||
13010 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
13011 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
13012 return SDValue();
13013
13014 LLVMContext &C = *DAG.getContext();
13015 EVT ElemVT = VT.getVectorElementType().getHalfSizedIntegerVT(Context&: C);
13016 EVT NarrowVT = EVT::getVectorVT(Context&: C, VT: ElemVT, EC: VT.getVectorElementCount());
13017
13018 Src0 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SDLoc(Src0), VT: NarrowVT, Operand: Src0);
13019 Src1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SDLoc(Src1), VT: NarrowVT, Operand: Src1);
13020
13021 // Src0 and Src1 are zero extended, so they're always positive if signed.
13022 //
13023 // sub can produce a negative from two positive operands, so it needs sign
13024 // extended. Other nodes produce a positive from two positive operands, so
13025 // zero extend instead.
13026 unsigned OuterExtend =
13027 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13028
13029 return DAG.getNode(
13030 Opcode: OuterExtend, DL: SDLoc(N), VT,
13031 Operand: DAG.getNode(Opcode: N->getOpcode(), DL: SDLoc(N), VT: NarrowVT, N1: Src0, N2: Src1));
13032}
13033
13034// Try to turn (add (xor bool, 1) -1) into (neg bool).
13035static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) {
13036 SDValue N0 = N->getOperand(Num: 0);
13037 SDValue N1 = N->getOperand(Num: 1);
13038 EVT VT = N->getValueType(ResNo: 0);
13039 SDLoc DL(N);
13040
13041 // RHS should be -1.
13042 if (!isAllOnesConstant(V: N1))
13043 return SDValue();
13044
13045 // Look for (xor X, 1).
13046 if (N0.getOpcode() != ISD::XOR || !isOneConstant(V: N0.getOperand(i: 1)))
13047 return SDValue();
13048
13049 // First xor input should be 0 or 1.
13050 APInt Mask = APInt::getBitsSetFrom(numBits: VT.getSizeInBits(), loBit: 1);
13051 if (!DAG.MaskedValueIsZero(Op: N0.getOperand(i: 0), Mask))
13052 return SDValue();
13053
13054 // Emit a negate of the setcc.
13055 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: 0, DL, VT),
13056 N2: N0.getOperand(i: 0));
13057}
13058
13059static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
13060 const RISCVSubtarget &Subtarget) {
13061 if (SDValue V = combineAddOfBooleanXor(N, DAG))
13062 return V;
13063 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
13064 return V;
13065 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
13066 return V;
13067 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13068 return V;
13069 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13070 return V;
13071 if (SDValue V = combineBinOpOfZExt(N, DAG))
13072 return V;
13073
13074 // fold (add (select lhs, rhs, cc, 0, y), x) ->
13075 // (select lhs, rhs, cc, x, (add x, y))
13076 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13077}
13078
13079// Try to turn a sub boolean RHS and constant LHS into an addi.
13080static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG) {
13081 SDValue N0 = N->getOperand(Num: 0);
13082 SDValue N1 = N->getOperand(Num: 1);
13083 EVT VT = N->getValueType(ResNo: 0);
13084 SDLoc DL(N);
13085
13086 // Require a constant LHS.
13087 auto *N0C = dyn_cast<ConstantSDNode>(Val&: N0);
13088 if (!N0C)
13089 return SDValue();
13090
13091 // All our optimizations involve subtracting 1 from the immediate and forming
13092 // an ADDI. Make sure the new immediate is valid for an ADDI.
13093 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
13094 if (!ImmValMinus1.isSignedIntN(N: 12))
13095 return SDValue();
13096
13097 SDValue NewLHS;
13098 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
13099 // (sub constant, (setcc x, y, eq/neq)) ->
13100 // (add (setcc x, y, neq/eq), constant - 1)
13101 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: N1.getOperand(i: 2))->get();
13102 EVT SetCCOpVT = N1.getOperand(i: 0).getValueType();
13103 if (!isIntEqualitySetCC(Code: CCVal) || !SetCCOpVT.isInteger())
13104 return SDValue();
13105 CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: SetCCOpVT);
13106 NewLHS =
13107 DAG.getSetCC(DL: SDLoc(N1), VT, LHS: N1.getOperand(i: 0), RHS: N1.getOperand(i: 1), Cond: CCVal);
13108 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(V: N1.getOperand(i: 1)) &&
13109 N1.getOperand(i: 0).getOpcode() == ISD::SETCC) {
13110 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
13111 // Since setcc returns a bool the xor is equivalent to 1-setcc.
13112 NewLHS = N1.getOperand(i: 0);
13113 } else
13114 return SDValue();
13115
13116 SDValue NewRHS = DAG.getConstant(Val: ImmValMinus1, DL, VT);
13117 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: NewLHS, N2: NewRHS);
13118}
13119
13120static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
13121 const RISCVSubtarget &Subtarget) {
13122 if (SDValue V = combineSubOfBoolean(N, DAG))
13123 return V;
13124
13125 EVT VT = N->getValueType(ResNo: 0);
13126 SDValue N0 = N->getOperand(Num: 0);
13127 SDValue N1 = N->getOperand(Num: 1);
13128 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
13129 if (isNullConstant(V: N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
13130 isNullConstant(V: N1.getOperand(i: 1))) {
13131 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: N1.getOperand(i: 2))->get();
13132 if (CCVal == ISD::SETLT) {
13133 SDLoc DL(N);
13134 unsigned ShAmt = N0.getValueSizeInBits() - 1;
13135 return DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: N1.getOperand(i: 0),
13136 N2: DAG.getConstant(Val: ShAmt, DL, VT));
13137 }
13138 }
13139
13140 if (SDValue V = combineBinOpOfZExt(N, DAG))
13141 return V;
13142
13143 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
13144 // (select lhs, rhs, cc, x, (sub x, y))
13145 return combineSelectAndUse(N, Slct: N1, OtherOp: N0, DAG, /*AllOnes*/ false, Subtarget);
13146}
13147
13148// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
13149// Legalizing setcc can introduce xors like this. Doing this transform reduces
13150// the number of xors and may allow the xor to fold into a branch condition.
13151static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG) {
13152 SDValue N0 = N->getOperand(Num: 0);
13153 SDValue N1 = N->getOperand(Num: 1);
13154 bool IsAnd = N->getOpcode() == ISD::AND;
13155
13156 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
13157 return SDValue();
13158
13159 if (!N0.hasOneUse() || !N1.hasOneUse())
13160 return SDValue();
13161
13162 SDValue N01 = N0.getOperand(i: 1);
13163 SDValue N11 = N1.getOperand(i: 1);
13164
13165 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
13166 // (xor X, -1) based on the upper bits of the other operand being 0. If the
13167 // operation is And, allow one of the Xors to use -1.
13168 if (isOneConstant(V: N01)) {
13169 if (!isOneConstant(V: N11) && !(IsAnd && isAllOnesConstant(V: N11)))
13170 return SDValue();
13171 } else if (isOneConstant(V: N11)) {
13172 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
13173 if (!(IsAnd && isAllOnesConstant(V: N01)))
13174 return SDValue();
13175 } else
13176 return SDValue();
13177
13178 EVT VT = N->getValueType(ResNo: 0);
13179
13180 SDValue N00 = N0.getOperand(i: 0);
13181 SDValue N10 = N1.getOperand(i: 0);
13182
13183 // The LHS of the xors needs to be 0/1.
13184 APInt Mask = APInt::getBitsSetFrom(numBits: VT.getSizeInBits(), loBit: 1);
13185 if (!DAG.MaskedValueIsZero(Op: N00, Mask) || !DAG.MaskedValueIsZero(Op: N10, Mask))
13186 return SDValue();
13187
13188 // Invert the opcode and insert a new xor.
13189 SDLoc DL(N);
13190 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
13191 SDValue Logic = DAG.getNode(Opcode: Opc, DL, VT, N1: N00, N2: N10);
13192 return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Logic, N2: DAG.getConstant(Val: 1, DL, VT));
13193}
13194
13195static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
13196 const RISCVSubtarget &Subtarget) {
13197 SDValue N0 = N->getOperand(Num: 0);
13198 EVT VT = N->getValueType(ResNo: 0);
13199
13200 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
13201 // extending X. This is safe since we only need the LSB after the shift and
13202 // shift amounts larger than 31 would produce poison. If we wait until
13203 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13204 // to use a BEXT instruction.
13205 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
13206 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
13207 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13208 SDLoc DL(N0);
13209 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13210 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13211 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13212 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc(N), VT, Operand: Srl);
13213 }
13214
13215 return SDValue();
13216}
13217
13218// Combines two comparison operation and logic operation to one selection
13219// operation(min, max) and logic operation. Returns new constructed Node if
13220// conditions for optimization are satisfied.
13221static SDValue performANDCombine(SDNode *N,
13222 TargetLowering::DAGCombinerInfo &DCI,
13223 const RISCVSubtarget &Subtarget) {
13224 SelectionDAG &DAG = DCI.DAG;
13225
13226 SDValue N0 = N->getOperand(Num: 0);
13227 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
13228 // extending X. This is safe since we only need the LSB after the shift and
13229 // shift amounts larger than 31 would produce poison. If we wait until
13230 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13231 // to use a BEXT instruction.
13232 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13233 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
13234 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
13235 N0.hasOneUse()) {
13236 SDLoc DL(N);
13237 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13238 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13239 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13240 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
13241 DAG.getConstant(1, DL, MVT::i64));
13242 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13243 }
13244
13245 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13246 return V;
13247 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13248 return V;
13249
13250 if (DCI.isAfterLegalizeDAG())
13251 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13252 return V;
13253
13254 // fold (and (select lhs, rhs, cc, -1, y), x) ->
13255 // (select lhs, rhs, cc, x, (and x, y))
13256 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
13257}
13258
13259// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
13260// FIXME: Generalize to other binary operators with same operand.
13261static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1,
13262 SelectionDAG &DAG) {
13263 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
13264
13265 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
13266 N1.getOpcode() != RISCVISD::CZERO_NEZ ||
13267 !N0.hasOneUse() || !N1.hasOneUse())
13268 return SDValue();
13269
13270 // Should have the same condition.
13271 SDValue Cond = N0.getOperand(i: 1);
13272 if (Cond != N1.getOperand(i: 1))
13273 return SDValue();
13274
13275 SDValue TrueV = N0.getOperand(i: 0);
13276 SDValue FalseV = N1.getOperand(i: 0);
13277
13278 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
13279 TrueV.getOperand(i: 1) != FalseV.getOperand(i: 1) ||
13280 !isOneConstant(V: TrueV.getOperand(i: 1)) ||
13281 !TrueV.hasOneUse() || !FalseV.hasOneUse())
13282 return SDValue();
13283
13284 EVT VT = N->getValueType(ResNo: 0);
13285 SDLoc DL(N);
13286
13287 SDValue NewN0 = DAG.getNode(Opcode: RISCVISD::CZERO_EQZ, DL, VT, N1: TrueV.getOperand(i: 0),
13288 N2: Cond);
13289 SDValue NewN1 = DAG.getNode(Opcode: RISCVISD::CZERO_NEZ, DL, VT, N1: FalseV.getOperand(i: 0),
13290 N2: Cond);
13291 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: NewN0, N2: NewN1);
13292 return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: NewOr, N2: TrueV.getOperand(i: 1));
13293}
13294
13295static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
13296 const RISCVSubtarget &Subtarget) {
13297 SelectionDAG &DAG = DCI.DAG;
13298
13299 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13300 return V;
13301 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13302 return V;
13303
13304 if (DCI.isAfterLegalizeDAG())
13305 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13306 return V;
13307
13308 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
13309 // We may be able to pull a common operation out of the true and false value.
13310 SDValue N0 = N->getOperand(Num: 0);
13311 SDValue N1 = N->getOperand(Num: 1);
13312 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
13313 return V;
13314 if (SDValue V = combineOrOfCZERO(N, N0: N1, N1: N0, DAG))
13315 return V;
13316
13317 // fold (or (select cond, 0, y), x) ->
13318 // (select cond, x, (or x, y))
13319 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13320}
13321
13322static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
13323 const RISCVSubtarget &Subtarget) {
13324 SDValue N0 = N->getOperand(Num: 0);
13325 SDValue N1 = N->getOperand(Num: 1);
13326
13327 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
13328 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
13329 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
13330 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13331 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
13332 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
13333 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13334 SDLoc DL(N);
13335 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13336 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13337 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
13338 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
13339 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13340 }
13341
13342 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
13343 // NOTE: Assumes ROL being legal means ROLW is legal.
13344 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13345 if (N0.getOpcode() == RISCVISD::SLLW &&
13346 isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0)) &&
13347 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
13348 SDLoc DL(N);
13349 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
13350 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
13351 }
13352
13353 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
13354 if (N0.getOpcode() == ISD::SETCC && isOneConstant(V: N1) && N0.hasOneUse()) {
13355 auto *ConstN00 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 0));
13356 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N0.getOperand(i: 2))->get();
13357 if (ConstN00 && CC == ISD::SETLT) {
13358 EVT VT = N0.getValueType();
13359 SDLoc DL(N0);
13360 const APInt &Imm = ConstN00->getAPIntValue();
13361 if ((Imm + 1).isSignedIntN(N: 12))
13362 return DAG.getSetCC(DL, VT, LHS: N0.getOperand(i: 1),
13363 RHS: DAG.getConstant(Val: Imm + 1, DL, VT), Cond: CC);
13364 }
13365 }
13366
13367 // Combine (xor (trunc (X cc Y)) 1) -> (trunc (X !cc Y)). This is needed with
13368 // RV64LegalI32 when the setcc is created after type legalization. An i1 xor
13369 // would have been promoted to i32, but the setcc would have i64 result.
13370 if (N->getValueType(0) == MVT::i32 && N0.getOpcode() == ISD::TRUNCATE &&
13371 isOneConstant(N1) && N0.getOperand(0).getOpcode() == ISD::SETCC) {
13372 SDValue N00 = N0.getOperand(i: 0);
13373 SDLoc DL(N);
13374 SDValue LHS = N00.getOperand(i: 0);
13375 SDValue RHS = N00.getOperand(i: 1);
13376 SDValue CC = N00.getOperand(i: 2);
13377 ISD::CondCode NotCC = ISD::getSetCCInverse(Operation: cast<CondCodeSDNode>(Val&: CC)->get(),
13378 Type: LHS.getValueType());
13379 SDValue Setcc = DAG.getSetCC(DL: SDLoc(N00), VT: N0.getOperand(i: 0).getValueType(),
13380 LHS, RHS, Cond: NotCC);
13381 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc(N0), VT: N->getValueType(ResNo: 0), Operand: Setcc);
13382 }
13383
13384 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13385 return V;
13386 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13387 return V;
13388
13389 // fold (xor (select cond, 0, y), x) ->
13390 // (select cond, x, (xor x, y))
13391 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13392}
13393
13394// Try to expand a scalar multiply to a faster sequence.
13395static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
13396 TargetLowering::DAGCombinerInfo &DCI,
13397 const RISCVSubtarget &Subtarget) {
13398
13399 EVT VT = N->getValueType(ResNo: 0);
13400
13401 // LI + MUL is usually smaller than the alternative sequence.
13402 if (DAG.getMachineFunction().getFunction().hasMinSize())
13403 return SDValue();
13404
13405 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
13406 return SDValue();
13407
13408 if (VT != Subtarget.getXLenVT())
13409 return SDValue();
13410
13411 if (!Subtarget.hasStdExtZba() && !Subtarget.hasVendorXTHeadBa())
13412 return SDValue();
13413
13414 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1));
13415 if (!CNode)
13416 return SDValue();
13417 uint64_t MulAmt = CNode->getZExtValue();
13418
13419 for (uint64_t Divisor : {3, 5, 9}) {
13420 if (MulAmt % Divisor != 0)
13421 continue;
13422 uint64_t MulAmt2 = MulAmt / Divisor;
13423 // 3/5/9 * 2^N -> shXadd (sll X, C), (sll X, C)
13424 // Matched in tablegen, avoid perturbing patterns.
13425 if (isPowerOf2_64(Value: MulAmt2))
13426 return SDValue();
13427
13428 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
13429 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
13430 SDLoc DL(N);
13431 SDValue X = DAG.getFreeze(V: N->getOperand(Num: 0));
13432 SDValue Mul359 =
13433 DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: X,
13434 N2: DAG.getConstant(Val: Log2_64(Value: Divisor - 1), DL, VT), N3: X);
13435 return DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: Mul359,
13436 N2: DAG.getConstant(Val: Log2_64(Value: MulAmt2 - 1), DL, VT),
13437 N3: Mul359);
13438 }
13439 }
13440
13441 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
13442 // shXadd. First check if this a sum of two power of 2s because that's
13443 // easy. Then count how many zeros are up to the first bit.
13444 if (isPowerOf2_64(Value: MulAmt & (MulAmt - 1))) {
13445 unsigned ScaleShift = llvm::countr_zero(Val: MulAmt);
13446 if (ScaleShift >= 1 && ScaleShift < 4) {
13447 unsigned ShiftAmt = Log2_64(Value: (MulAmt & (MulAmt - 1)));
13448 SDLoc DL(N);
13449 SDValue X = DAG.getFreeze(V: N->getOperand(Num: 0));
13450 SDValue Shift1 =
13451 DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: DAG.getConstant(Val: ShiftAmt, DL, VT));
13452 return DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: X,
13453 N2: DAG.getConstant(Val: ScaleShift, DL, VT), N3: Shift1);
13454 }
13455 }
13456
13457 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
13458 // This is the two instruction form, there are also three instruction
13459 // variants we could implement. e.g.
13460 // (2^(1,2,3) * 3,5,9 + 1) << C2
13461 // 2^(C1>3) * 3,5,9 +/- 1
13462 for (uint64_t Divisor : {3, 5, 9}) {
13463 uint64_t C = MulAmt - 1;
13464 if (C <= Divisor)
13465 continue;
13466 unsigned TZ = llvm::countr_zero(Val: C);
13467 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
13468 SDLoc DL(N);
13469 SDValue X = DAG.getFreeze(V: N->getOperand(Num: 0));
13470 SDValue Mul359 =
13471 DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: X,
13472 N2: DAG.getConstant(Val: Log2_64(Value: Divisor - 1), DL, VT), N3: X);
13473 return DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: Mul359,
13474 N2: DAG.getConstant(Val: TZ, DL, VT), N3: X);
13475 }
13476 }
13477
13478 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
13479 if (MulAmt > 2 && isPowerOf2_64(Value: (MulAmt - 1) & (MulAmt - 2))) {
13480 unsigned ScaleShift = llvm::countr_zero(Val: MulAmt - 1);
13481 if (ScaleShift >= 1 && ScaleShift < 4) {
13482 unsigned ShiftAmt = Log2_64(Value: ((MulAmt - 1) & (MulAmt - 2)));
13483 SDLoc DL(N);
13484 SDValue X = DAG.getFreeze(V: N->getOperand(Num: 0));
13485 SDValue Shift1 =
13486 DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: DAG.getConstant(Val: ShiftAmt, DL, VT));
13487 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shift1,
13488 N2: DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: X,
13489 N2: DAG.getConstant(Val: ScaleShift, DL, VT), N3: X));
13490 }
13491 }
13492
13493 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
13494 for (uint64_t Offset : {3, 5, 9}) {
13495 if (isPowerOf2_64(Value: MulAmt + Offset)) {
13496 SDLoc DL(N);
13497 SDValue Shift1 =
13498 DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N->getOperand(Num: 0),
13499 N2: DAG.getConstant(Val: Log2_64(Value: MulAmt + Offset), DL, VT));
13500 SDValue Mul359 = DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: N->getOperand(Num: 0),
13501 N2: DAG.getConstant(Val: Log2_64(Value: Offset - 1), DL, VT),
13502 N3: N->getOperand(Num: 0));
13503 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Shift1, N2: Mul359);
13504 }
13505 }
13506
13507 return SDValue();
13508}
13509
13510
13511static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
13512 TargetLowering::DAGCombinerInfo &DCI,
13513 const RISCVSubtarget &Subtarget) {
13514 EVT VT = N->getValueType(ResNo: 0);
13515 if (!VT.isVector())
13516 return expandMul(N, DAG, DCI, Subtarget);
13517
13518 SDLoc DL(N);
13519 SDValue N0 = N->getOperand(Num: 0);
13520 SDValue N1 = N->getOperand(Num: 1);
13521 SDValue MulOper;
13522 unsigned AddSubOpc;
13523
13524 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
13525 // (mul x, add (y, 1)) -> (add x, (mul x, y))
13526 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
13527 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
13528 auto IsAddSubWith1 = [&](SDValue V) -> bool {
13529 AddSubOpc = V->getOpcode();
13530 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
13531 SDValue Opnd = V->getOperand(Num: 1);
13532 MulOper = V->getOperand(Num: 0);
13533 if (AddSubOpc == ISD::SUB)
13534 std::swap(a&: Opnd, b&: MulOper);
13535 if (isOneOrOneSplat(V: Opnd))
13536 return true;
13537 }
13538 return false;
13539 };
13540
13541 if (IsAddSubWith1(N0)) {
13542 SDValue MulVal = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1, N2: MulOper);
13543 return DAG.getNode(Opcode: AddSubOpc, DL, VT, N1, N2: MulVal);
13544 }
13545
13546 if (IsAddSubWith1(N1)) {
13547 SDValue MulVal = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N0, N2: MulOper);
13548 return DAG.getNode(Opcode: AddSubOpc, DL, VT, N1: N0, N2: MulVal);
13549 }
13550
13551 if (SDValue V = combineBinOpOfZExt(N, DAG))
13552 return V;
13553
13554 return SDValue();
13555}
13556
13557/// According to the property that indexed load/store instructions zero-extend
13558/// their indices, try to narrow the type of index operand.
13559static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
13560 if (isIndexTypeSigned(IndexType))
13561 return false;
13562
13563 if (!N->hasOneUse())
13564 return false;
13565
13566 EVT VT = N.getValueType();
13567 SDLoc DL(N);
13568
13569 // In general, what we're doing here is seeing if we can sink a truncate to
13570 // a smaller element type into the expression tree building our index.
13571 // TODO: We can generalize this and handle a bunch more cases if useful.
13572
13573 // Narrow a buildvector to the narrowest element type. This requires less
13574 // work and less register pressure at high LMUL, and creates smaller constants
13575 // which may be cheaper to materialize.
13576 if (ISD::isBuildVectorOfConstantSDNodes(N: N.getNode())) {
13577 KnownBits Known = DAG.computeKnownBits(Op: N);
13578 unsigned ActiveBits = std::max(a: 8u, b: Known.countMaxActiveBits());
13579 LLVMContext &C = *DAG.getContext();
13580 EVT ResultVT = EVT::getIntegerVT(Context&: C, BitWidth: ActiveBits).getRoundIntegerType(Context&: C);
13581 if (ResultVT.bitsLT(VT: VT.getVectorElementType())) {
13582 N = DAG.getNode(Opcode: ISD::TRUNCATE, DL,
13583 VT: VT.changeVectorElementType(EltVT: ResultVT), Operand: N);
13584 return true;
13585 }
13586 }
13587
13588 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
13589 if (N.getOpcode() != ISD::SHL)
13590 return false;
13591
13592 SDValue N0 = N.getOperand(i: 0);
13593 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
13594 N0.getOpcode() != RISCVISD::VZEXT_VL)
13595 return false;
13596 if (!N0->hasOneUse())
13597 return false;
13598
13599 APInt ShAmt;
13600 SDValue N1 = N.getOperand(i: 1);
13601 if (!ISD::isConstantSplatVector(N: N1.getNode(), SplatValue&: ShAmt))
13602 return false;
13603
13604 SDValue Src = N0.getOperand(i: 0);
13605 EVT SrcVT = Src.getValueType();
13606 unsigned SrcElen = SrcVT.getScalarSizeInBits();
13607 unsigned ShAmtV = ShAmt.getZExtValue();
13608 unsigned NewElen = PowerOf2Ceil(A: SrcElen + ShAmtV);
13609 NewElen = std::max(a: NewElen, b: 8U);
13610
13611 // Skip if NewElen is not narrower than the original extended type.
13612 if (NewElen >= N0.getValueType().getScalarSizeInBits())
13613 return false;
13614
13615 EVT NewEltVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NewElen);
13616 EVT NewVT = SrcVT.changeVectorElementType(EltVT: NewEltVT);
13617
13618 SDValue NewExt = DAG.getNode(Opcode: N0->getOpcode(), DL, VT: NewVT, Ops: N0->ops());
13619 SDValue NewShAmtVec = DAG.getConstant(Val: ShAmtV, DL, VT: NewVT);
13620 N = DAG.getNode(Opcode: ISD::SHL, DL, VT: NewVT, N1: NewExt, N2: NewShAmtVec);
13621 return true;
13622}
13623
13624// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
13625// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
13626// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
13627// can become a sext.w instead of a shift pair.
13628static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
13629 const RISCVSubtarget &Subtarget) {
13630 SDValue N0 = N->getOperand(Num: 0);
13631 SDValue N1 = N->getOperand(Num: 1);
13632 EVT VT = N->getValueType(ResNo: 0);
13633 EVT OpVT = N0.getValueType();
13634
13635 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
13636 return SDValue();
13637
13638 // RHS needs to be a constant.
13639 auto *N1C = dyn_cast<ConstantSDNode>(Val&: N1);
13640 if (!N1C)
13641 return SDValue();
13642
13643 // LHS needs to be (and X, 0xffffffff).
13644 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
13645 !isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) ||
13646 N0.getConstantOperandVal(i: 1) != UINT64_C(0xffffffff))
13647 return SDValue();
13648
13649 // Looking for an equality compare.
13650 ISD::CondCode Cond = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
13651 if (!isIntEqualitySetCC(Code: Cond))
13652 return SDValue();
13653
13654 // Don't do this if the sign bit is provably zero, it will be turned back into
13655 // an AND.
13656 APInt SignMask = APInt::getOneBitSet(numBits: 64, BitNo: 31);
13657 if (DAG.MaskedValueIsZero(Op: N0.getOperand(i: 0), Mask: SignMask))
13658 return SDValue();
13659
13660 const APInt &C1 = N1C->getAPIntValue();
13661
13662 SDLoc dl(N);
13663 // If the constant is larger than 2^32 - 1 it is impossible for both sides
13664 // to be equal.
13665 if (C1.getActiveBits() > 32)
13666 return DAG.getBoolConstant(V: Cond == ISD::SETNE, DL: dl, VT, OpVT);
13667
13668 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
13669 N0.getOperand(0), DAG.getValueType(MVT::i32));
13670 return DAG.getSetCC(DL: dl, VT, LHS: SExtOp, RHS: DAG.getConstant(Val: C1.trunc(width: 32).sext(width: 64),
13671 DL: dl, VT: OpVT), Cond);
13672}
13673
13674static SDValue
13675performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
13676 const RISCVSubtarget &Subtarget) {
13677 SDValue Src = N->getOperand(Num: 0);
13678 EVT VT = N->getValueType(ResNo: 0);
13679
13680 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
13681 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
13682 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
13683 return DAG.getNode(Opcode: RISCVISD::FMV_X_SIGNEXTH, DL: SDLoc(N), VT,
13684 Operand: Src.getOperand(i: 0));
13685
13686 return SDValue();
13687}
13688
13689namespace {
13690// Forward declaration of the structure holding the necessary information to
13691// apply a combine.
13692struct CombineResult;
13693
13694enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
13695/// Helper class for folding sign/zero extensions.
13696/// In particular, this class is used for the following combines:
13697/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
13698/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
13699/// mul | mul_vl -> vwmul(u) | vwmul_su
13700/// shl | shl_vl -> vwsll
13701/// fadd -> vfwadd | vfwadd_w
13702/// fsub -> vfwsub | vfwsub_w
13703/// fmul -> vfwmul
13704/// An object of this class represents an operand of the operation we want to
13705/// combine.
13706/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
13707/// NodeExtensionHelper for `a` and one for `b`.
13708///
13709/// This class abstracts away how the extension is materialized and
13710/// how its number of users affect the combines.
13711///
13712/// In particular:
13713/// - VWADD_W is conceptually == add(op0, sext(op1))
13714/// - VWADDU_W == add(op0, zext(op1))
13715/// - VWSUB_W == sub(op0, sext(op1))
13716/// - VWSUBU_W == sub(op0, zext(op1))
13717/// - VFWADD_W == fadd(op0, fpext(op1))
13718/// - VFWSUB_W == fsub(op0, fpext(op1))
13719/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
13720/// zext|sext(smaller_value).
13721struct NodeExtensionHelper {
13722 /// Records if this operand is like being zero extended.
13723 bool SupportsZExt;
13724 /// Records if this operand is like being sign extended.
13725 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
13726 /// instance, a splat constant (e.g., 3), would support being both sign and
13727 /// zero extended.
13728 bool SupportsSExt;
13729 /// Records if this operand is like being floating-Point extended.
13730 bool SupportsFPExt;
13731 /// This boolean captures whether we care if this operand would still be
13732 /// around after the folding happens.
13733 bool EnforceOneUse;
13734 /// Original value that this NodeExtensionHelper represents.
13735 SDValue OrigOperand;
13736
13737 /// Get the value feeding the extension or the value itself.
13738 /// E.g., for zext(a), this would return a.
13739 SDValue getSource() const {
13740 switch (OrigOperand.getOpcode()) {
13741 case ISD::ZERO_EXTEND:
13742 case ISD::SIGN_EXTEND:
13743 case RISCVISD::VSEXT_VL:
13744 case RISCVISD::VZEXT_VL:
13745 case RISCVISD::FP_EXTEND_VL:
13746 return OrigOperand.getOperand(i: 0);
13747 default:
13748 return OrigOperand;
13749 }
13750 }
13751
13752 /// Check if this instance represents a splat.
13753 bool isSplat() const {
13754 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
13755 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
13756 }
13757
13758 /// Get the extended opcode.
13759 unsigned getExtOpc(ExtKind SupportsExt) const {
13760 switch (SupportsExt) {
13761 case ExtKind::SExt:
13762 return RISCVISD::VSEXT_VL;
13763 case ExtKind::ZExt:
13764 return RISCVISD::VZEXT_VL;
13765 case ExtKind::FPExt:
13766 return RISCVISD::FP_EXTEND_VL;
13767 }
13768 llvm_unreachable("Unknown ExtKind enum");
13769 }
13770
13771 /// Get or create a value that can feed \p Root with the given extension \p
13772 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
13773 /// operand. \see ::getSource().
13774 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
13775 const RISCVSubtarget &Subtarget,
13776 std::optional<ExtKind> SupportsExt) const {
13777 if (!SupportsExt.has_value())
13778 return OrigOperand;
13779
13780 MVT NarrowVT = getNarrowType(Root, SupportsExt: *SupportsExt);
13781
13782 SDValue Source = getSource();
13783 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
13784 if (Source.getValueType() == NarrowVT)
13785 return Source;
13786
13787 unsigned ExtOpc = getExtOpc(SupportsExt: *SupportsExt);
13788
13789 // If we need an extension, we should be changing the type.
13790 SDLoc DL(OrigOperand);
13791 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
13792 switch (OrigOperand.getOpcode()) {
13793 case ISD::ZERO_EXTEND:
13794 case ISD::SIGN_EXTEND:
13795 case RISCVISD::VSEXT_VL:
13796 case RISCVISD::VZEXT_VL:
13797 case RISCVISD::FP_EXTEND_VL:
13798 return DAG.getNode(Opcode: ExtOpc, DL, VT: NarrowVT, N1: Source, N2: Mask, N3: VL);
13799 case ISD::SPLAT_VECTOR:
13800 return DAG.getSplat(VT: NarrowVT, DL, Op: Source.getOperand(i: 0));
13801 case RISCVISD::VMV_V_X_VL:
13802 return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: NarrowVT,
13803 N1: DAG.getUNDEF(VT: NarrowVT), N2: Source.getOperand(i: 1), N3: VL);
13804 default:
13805 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
13806 // and that operand should already have the right NarrowVT so no
13807 // extension should be required at this point.
13808 llvm_unreachable("Unsupported opcode");
13809 }
13810 }
13811
13812 /// Helper function to get the narrow type for \p Root.
13813 /// The narrow type is the type of \p Root where we divided the size of each
13814 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
13815 /// \pre Both the narrow type and the original type should be legal.
13816 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
13817 MVT VT = Root->getSimpleValueType(ResNo: 0);
13818
13819 // Determine the narrow size.
13820 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
13821
13822 MVT EltVT = SupportsExt == ExtKind::FPExt
13823 ? MVT::getFloatingPointVT(BitWidth: NarrowSize)
13824 : MVT::getIntegerVT(BitWidth: NarrowSize);
13825
13826 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
13827 "Trying to extend something we can't represent");
13828 MVT NarrowVT = MVT::getVectorVT(VT: EltVT, EC: VT.getVectorElementCount());
13829 return NarrowVT;
13830 }
13831
13832 /// Get the opcode to materialize:
13833 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
13834 static unsigned getSExtOpcode(unsigned Opcode) {
13835 switch (Opcode) {
13836 case ISD::ADD:
13837 case RISCVISD::ADD_VL:
13838 case RISCVISD::VWADD_W_VL:
13839 case RISCVISD::VWADDU_W_VL:
13840 case ISD::OR:
13841 return RISCVISD::VWADD_VL;
13842 case ISD::SUB:
13843 case RISCVISD::SUB_VL:
13844 case RISCVISD::VWSUB_W_VL:
13845 case RISCVISD::VWSUBU_W_VL:
13846 return RISCVISD::VWSUB_VL;
13847 case ISD::MUL:
13848 case RISCVISD::MUL_VL:
13849 return RISCVISD::VWMUL_VL;
13850 default:
13851 llvm_unreachable("Unexpected opcode");
13852 }
13853 }
13854
13855 /// Get the opcode to materialize:
13856 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
13857 static unsigned getZExtOpcode(unsigned Opcode) {
13858 switch (Opcode) {
13859 case ISD::ADD:
13860 case RISCVISD::ADD_VL:
13861 case RISCVISD::VWADD_W_VL:
13862 case RISCVISD::VWADDU_W_VL:
13863 case ISD::OR:
13864 return RISCVISD::VWADDU_VL;
13865 case ISD::SUB:
13866 case RISCVISD::SUB_VL:
13867 case RISCVISD::VWSUB_W_VL:
13868 case RISCVISD::VWSUBU_W_VL:
13869 return RISCVISD::VWSUBU_VL;
13870 case ISD::MUL:
13871 case RISCVISD::MUL_VL:
13872 return RISCVISD::VWMULU_VL;
13873 case ISD::SHL:
13874 case RISCVISD::SHL_VL:
13875 return RISCVISD::VWSLL_VL;
13876 default:
13877 llvm_unreachable("Unexpected opcode");
13878 }
13879 }
13880
13881 /// Get the opcode to materialize:
13882 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
13883 static unsigned getFPExtOpcode(unsigned Opcode) {
13884 switch (Opcode) {
13885 case RISCVISD::FADD_VL:
13886 case RISCVISD::VFWADD_W_VL:
13887 return RISCVISD::VFWADD_VL;
13888 case RISCVISD::FSUB_VL:
13889 case RISCVISD::VFWSUB_W_VL:
13890 return RISCVISD::VFWSUB_VL;
13891 case RISCVISD::FMUL_VL:
13892 return RISCVISD::VFWMUL_VL;
13893 default:
13894 llvm_unreachable("Unexpected opcode");
13895 }
13896 }
13897
13898 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
13899 /// newOpcode(a, b).
13900 static unsigned getSUOpcode(unsigned Opcode) {
13901 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
13902 "SU is only supported for MUL");
13903 return RISCVISD::VWMULSU_VL;
13904 }
13905
13906 /// Get the opcode to materialize
13907 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
13908 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
13909 switch (Opcode) {
13910 case ISD::ADD:
13911 case RISCVISD::ADD_VL:
13912 case ISD::OR:
13913 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
13914 : RISCVISD::VWADDU_W_VL;
13915 case ISD::SUB:
13916 case RISCVISD::SUB_VL:
13917 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
13918 : RISCVISD::VWSUBU_W_VL;
13919 case RISCVISD::FADD_VL:
13920 return RISCVISD::VFWADD_W_VL;
13921 case RISCVISD::FSUB_VL:
13922 return RISCVISD::VFWSUB_W_VL;
13923 default:
13924 llvm_unreachable("Unexpected opcode");
13925 }
13926 }
13927
13928 using CombineToTry = std::function<std::optional<CombineResult>(
13929 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
13930 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
13931 const RISCVSubtarget &)>;
13932
13933 /// Check if this node needs to be fully folded or extended for all users.
13934 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
13935
13936 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
13937 const RISCVSubtarget &Subtarget) {
13938 unsigned Opc = OrigOperand.getOpcode();
13939 MVT VT = OrigOperand.getSimpleValueType();
13940
13941 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
13942 "Unexpected Opcode");
13943
13944 // The pasthru must be undef for tail agnostic.
13945 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(i: 0).isUndef())
13946 return;
13947
13948 // Get the scalar value.
13949 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(i: 0)
13950 : OrigOperand.getOperand(i: 1);
13951
13952 // See if we have enough sign bits or zero bits in the scalar to use a
13953 // widening opcode by splatting to smaller element size.
13954 unsigned EltBits = VT.getScalarSizeInBits();
13955 unsigned ScalarBits = Op.getValueSizeInBits();
13956 // Make sure we're getting all element bits from the scalar register.
13957 // FIXME: Support implicit sign extension of vmv.v.x?
13958 if (ScalarBits < EltBits)
13959 return;
13960
13961 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
13962 // If the narrow type cannot be expressed with a legal VMV,
13963 // this is not a valid candidate.
13964 if (NarrowSize < 8)
13965 return;
13966
13967 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
13968 SupportsSExt = true;
13969
13970 if (DAG.MaskedValueIsZero(Op,
13971 Mask: APInt::getBitsSetFrom(numBits: ScalarBits, loBit: NarrowSize)))
13972 SupportsZExt = true;
13973
13974 EnforceOneUse = false;
13975 }
13976
13977 /// Helper method to set the various fields of this struct based on the
13978 /// type of \p Root.
13979 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
13980 const RISCVSubtarget &Subtarget) {
13981 SupportsZExt = false;
13982 SupportsSExt = false;
13983 SupportsFPExt = false;
13984 EnforceOneUse = true;
13985 unsigned Opc = OrigOperand.getOpcode();
13986 // For the nodes we handle below, we end up using their inputs directly: see
13987 // getSource(). However since they either don't have a passthru or we check
13988 // that their passthru is undef, we can safely ignore their mask and VL.
13989 switch (Opc) {
13990 case ISD::ZERO_EXTEND:
13991 case ISD::SIGN_EXTEND: {
13992 MVT VT = OrigOperand.getSimpleValueType();
13993 if (!VT.isVector())
13994 break;
13995
13996 SDValue NarrowElt = OrigOperand.getOperand(i: 0);
13997 MVT NarrowVT = NarrowElt.getSimpleValueType();
13998 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
13999 if (NarrowVT.getVectorElementType() == MVT::i1)
14000 break;
14001
14002 SupportsZExt = Opc == ISD::ZERO_EXTEND;
14003 SupportsSExt = Opc == ISD::SIGN_EXTEND;
14004 break;
14005 }
14006 case RISCVISD::VZEXT_VL:
14007 SupportsZExt = true;
14008 break;
14009 case RISCVISD::VSEXT_VL:
14010 SupportsSExt = true;
14011 break;
14012 case RISCVISD::FP_EXTEND_VL:
14013 SupportsFPExt = true;
14014 break;
14015 case ISD::SPLAT_VECTOR:
14016 case RISCVISD::VMV_V_X_VL:
14017 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
14018 break;
14019 default:
14020 break;
14021 }
14022 }
14023
14024 /// Check if \p Root supports any extension folding combines.
14025 static bool isSupportedRoot(const SDNode *Root,
14026 const RISCVSubtarget &Subtarget) {
14027 switch (Root->getOpcode()) {
14028 case ISD::ADD:
14029 case ISD::SUB:
14030 case ISD::MUL: {
14031 return Root->getValueType(ResNo: 0).isScalableVector();
14032 }
14033 case ISD::OR: {
14034 return Root->getValueType(ResNo: 0).isScalableVector() &&
14035 Root->getFlags().hasDisjoint();
14036 }
14037 // Vector Widening Integer Add/Sub/Mul Instructions
14038 case RISCVISD::ADD_VL:
14039 case RISCVISD::MUL_VL:
14040 case RISCVISD::VWADD_W_VL:
14041 case RISCVISD::VWADDU_W_VL:
14042 case RISCVISD::SUB_VL:
14043 case RISCVISD::VWSUB_W_VL:
14044 case RISCVISD::VWSUBU_W_VL:
14045 // Vector Widening Floating-Point Add/Sub/Mul Instructions
14046 case RISCVISD::FADD_VL:
14047 case RISCVISD::FSUB_VL:
14048 case RISCVISD::FMUL_VL:
14049 case RISCVISD::VFWADD_W_VL:
14050 case RISCVISD::VFWSUB_W_VL:
14051 return true;
14052 case ISD::SHL:
14053 return Root->getValueType(ResNo: 0).isScalableVector() &&
14054 Subtarget.hasStdExtZvbb();
14055 case RISCVISD::SHL_VL:
14056 return Subtarget.hasStdExtZvbb();
14057 default:
14058 return false;
14059 }
14060 }
14061
14062 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
14063 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
14064 const RISCVSubtarget &Subtarget) {
14065 assert(isSupportedRoot(Root, Subtarget) &&
14066 "Trying to build an helper with an "
14067 "unsupported root");
14068 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
14069 assert(DAG.getTargetLoweringInfo().isTypeLegal(Root->getValueType(0)));
14070 OrigOperand = Root->getOperand(Num: OperandIdx);
14071
14072 unsigned Opc = Root->getOpcode();
14073 switch (Opc) {
14074 // We consider
14075 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
14076 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
14077 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
14078 case RISCVISD::VWADD_W_VL:
14079 case RISCVISD::VWADDU_W_VL:
14080 case RISCVISD::VWSUB_W_VL:
14081 case RISCVISD::VWSUBU_W_VL:
14082 case RISCVISD::VFWADD_W_VL:
14083 case RISCVISD::VFWSUB_W_VL:
14084 if (OperandIdx == 1) {
14085 SupportsZExt =
14086 Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL;
14087 SupportsSExt =
14088 Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWSUB_W_VL;
14089 SupportsFPExt =
14090 Opc == RISCVISD::VFWADD_W_VL || Opc == RISCVISD::VFWSUB_W_VL;
14091 // There's no existing extension here, so we don't have to worry about
14092 // making sure it gets removed.
14093 EnforceOneUse = false;
14094 break;
14095 }
14096 [[fallthrough]];
14097 default:
14098 fillUpExtensionSupport(Root, DAG, Subtarget);
14099 break;
14100 }
14101 }
14102
14103 /// Helper function to get the Mask and VL from \p Root.
14104 static std::pair<SDValue, SDValue>
14105 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
14106 const RISCVSubtarget &Subtarget) {
14107 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
14108 switch (Root->getOpcode()) {
14109 case ISD::ADD:
14110 case ISD::SUB:
14111 case ISD::MUL:
14112 case ISD::OR:
14113 case ISD::SHL: {
14114 SDLoc DL(Root);
14115 MVT VT = Root->getSimpleValueType(ResNo: 0);
14116 return getDefaultScalableVLOps(VecVT: VT, DL, DAG, Subtarget);
14117 }
14118 default:
14119 return std::make_pair(x: Root->getOperand(Num: 3), y: Root->getOperand(Num: 4));
14120 }
14121 }
14122
14123 /// Helper function to check if \p N is commutative with respect to the
14124 /// foldings that are supported by this class.
14125 static bool isCommutative(const SDNode *N) {
14126 switch (N->getOpcode()) {
14127 case ISD::ADD:
14128 case ISD::MUL:
14129 case ISD::OR:
14130 case RISCVISD::ADD_VL:
14131 case RISCVISD::MUL_VL:
14132 case RISCVISD::VWADD_W_VL:
14133 case RISCVISD::VWADDU_W_VL:
14134 case RISCVISD::FADD_VL:
14135 case RISCVISD::FMUL_VL:
14136 case RISCVISD::VFWADD_W_VL:
14137 return true;
14138 case ISD::SUB:
14139 case RISCVISD::SUB_VL:
14140 case RISCVISD::VWSUB_W_VL:
14141 case RISCVISD::VWSUBU_W_VL:
14142 case RISCVISD::FSUB_VL:
14143 case RISCVISD::VFWSUB_W_VL:
14144 case ISD::SHL:
14145 case RISCVISD::SHL_VL:
14146 return false;
14147 default:
14148 llvm_unreachable("Unexpected opcode");
14149 }
14150 }
14151
14152 /// Get a list of combine to try for folding extensions in \p Root.
14153 /// Note that each returned CombineToTry function doesn't actually modify
14154 /// anything. Instead they produce an optional CombineResult that if not None,
14155 /// need to be materialized for the combine to be applied.
14156 /// \see CombineResult::materialize.
14157 /// If the related CombineToTry function returns std::nullopt, that means the
14158 /// combine didn't match.
14159 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
14160};
14161
14162/// Helper structure that holds all the necessary information to materialize a
14163/// combine that does some extension folding.
14164struct CombineResult {
14165 /// Opcode to be generated when materializing the combine.
14166 unsigned TargetOpcode;
14167 // No value means no extension is needed.
14168 std::optional<ExtKind> LHSExt;
14169 std::optional<ExtKind> RHSExt;
14170 /// Root of the combine.
14171 SDNode *Root;
14172 /// LHS of the TargetOpcode.
14173 NodeExtensionHelper LHS;
14174 /// RHS of the TargetOpcode.
14175 NodeExtensionHelper RHS;
14176
14177 CombineResult(unsigned TargetOpcode, SDNode *Root,
14178 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
14179 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
14180 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
14181 LHS(LHS), RHS(RHS) {}
14182
14183 /// Return a value that uses TargetOpcode and that can be used to replace
14184 /// Root.
14185 /// The actual replacement is *not* done in that method.
14186 SDValue materialize(SelectionDAG &DAG,
14187 const RISCVSubtarget &Subtarget) const {
14188 SDValue Mask, VL, Merge;
14189 std::tie(args&: Mask, args&: VL) =
14190 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
14191 switch (Root->getOpcode()) {
14192 default:
14193 Merge = Root->getOperand(Num: 2);
14194 break;
14195 case ISD::ADD:
14196 case ISD::SUB:
14197 case ISD::MUL:
14198 case ISD::OR:
14199 case ISD::SHL:
14200 Merge = DAG.getUNDEF(VT: Root->getValueType(ResNo: 0));
14201 break;
14202 }
14203 return DAG.getNode(Opcode: TargetOpcode, DL: SDLoc(Root), VT: Root->getValueType(ResNo: 0),
14204 N1: LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SupportsExt: LHSExt),
14205 N2: RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SupportsExt: RHSExt),
14206 N3: Merge, N4: Mask, N5: VL);
14207 }
14208};
14209
14210/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14211/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14212/// are zext) and LHS and RHS can be folded into Root.
14213/// AllowExtMask define which form `ext` can take in this pattern.
14214///
14215/// \note If the pattern can match with both zext and sext, the returned
14216/// CombineResult will feature the zext result.
14217///
14218/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14219/// can be used to apply the pattern.
14220static std::optional<CombineResult>
14221canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
14222 const NodeExtensionHelper &RHS,
14223 uint8_t AllowExtMask, SelectionDAG &DAG,
14224 const RISCVSubtarget &Subtarget) {
14225 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
14226 return CombineResult(NodeExtensionHelper::getZExtOpcode(Opcode: Root->getOpcode()),
14227 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
14228 /*RHSExt=*/{ExtKind::ZExt});
14229 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
14230 return CombineResult(NodeExtensionHelper::getSExtOpcode(Opcode: Root->getOpcode()),
14231 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14232 /*RHSExt=*/{ExtKind::SExt});
14233 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
14234 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Opcode: Root->getOpcode()),
14235 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
14236 /*RHSExt=*/{ExtKind::FPExt});
14237 return std::nullopt;
14238}
14239
14240/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14241/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14242/// are zext) and LHS and RHS can be folded into Root.
14243///
14244/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14245/// can be used to apply the pattern.
14246static std::optional<CombineResult>
14247canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
14248 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14249 const RISCVSubtarget &Subtarget) {
14250 return canFoldToVWWithSameExtensionImpl(
14251 Root, LHS, RHS, AllowExtMask: ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
14252 Subtarget);
14253}
14254
14255/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
14256///
14257/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14258/// can be used to apply the pattern.
14259static std::optional<CombineResult>
14260canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
14261 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14262 const RISCVSubtarget &Subtarget) {
14263 if (RHS.SupportsFPExt)
14264 return CombineResult(
14265 NodeExtensionHelper::getWOpcode(Opcode: Root->getOpcode(), SupportsExt: ExtKind::FPExt),
14266 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
14267
14268 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
14269 // sext/zext?
14270 // Control this behavior behind an option (AllowSplatInVW_W) for testing
14271 // purposes.
14272 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
14273 return CombineResult(
14274 NodeExtensionHelper::getWOpcode(Opcode: Root->getOpcode(), SupportsExt: ExtKind::ZExt), Root,
14275 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
14276 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
14277 return CombineResult(
14278 NodeExtensionHelper::getWOpcode(Opcode: Root->getOpcode(), SupportsExt: ExtKind::SExt), Root,
14279 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
14280 return std::nullopt;
14281}
14282
14283/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
14284///
14285/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14286/// can be used to apply the pattern.
14287static std::optional<CombineResult>
14288canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14289 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14290 const RISCVSubtarget &Subtarget) {
14291 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, AllowExtMask: ExtKind::SExt, DAG,
14292 Subtarget);
14293}
14294
14295/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
14296///
14297/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14298/// can be used to apply the pattern.
14299static std::optional<CombineResult>
14300canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14301 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14302 const RISCVSubtarget &Subtarget) {
14303 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, AllowExtMask: ExtKind::ZExt, DAG,
14304 Subtarget);
14305}
14306
14307/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
14308///
14309/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14310/// can be used to apply the pattern.
14311static std::optional<CombineResult>
14312canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14313 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14314 const RISCVSubtarget &Subtarget) {
14315 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, AllowExtMask: ExtKind::FPExt, DAG,
14316 Subtarget);
14317}
14318
14319/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
14320///
14321/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14322/// can be used to apply the pattern.
14323static std::optional<CombineResult>
14324canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
14325 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14326 const RISCVSubtarget &Subtarget) {
14327
14328 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
14329 return std::nullopt;
14330 return CombineResult(NodeExtensionHelper::getSUOpcode(Opcode: Root->getOpcode()),
14331 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14332 /*RHSExt=*/{ExtKind::ZExt});
14333}
14334
14335SmallVector<NodeExtensionHelper::CombineToTry>
14336NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
14337 SmallVector<CombineToTry> Strategies;
14338 switch (Root->getOpcode()) {
14339 case ISD::ADD:
14340 case ISD::SUB:
14341 case ISD::OR:
14342 case RISCVISD::ADD_VL:
14343 case RISCVISD::SUB_VL:
14344 case RISCVISD::FADD_VL:
14345 case RISCVISD::FSUB_VL:
14346 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
14347 Strategies.push_back(Elt: canFoldToVWWithSameExtension);
14348 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
14349 Strategies.push_back(Elt: canFoldToVW_W);
14350 break;
14351 case RISCVISD::FMUL_VL:
14352 Strategies.push_back(Elt: canFoldToVWWithSameExtension);
14353 break;
14354 case ISD::MUL:
14355 case RISCVISD::MUL_VL:
14356 // mul -> vwmul(u)
14357 Strategies.push_back(Elt: canFoldToVWWithSameExtension);
14358 // mul -> vwmulsu
14359 Strategies.push_back(Elt: canFoldToVW_SU);
14360 break;
14361 case ISD::SHL:
14362 case RISCVISD::SHL_VL:
14363 // shl -> vwsll
14364 Strategies.push_back(Elt: canFoldToVWWithZEXT);
14365 break;
14366 case RISCVISD::VWADD_W_VL:
14367 case RISCVISD::VWSUB_W_VL:
14368 // vwadd_w|vwsub_w -> vwadd|vwsub
14369 Strategies.push_back(Elt: canFoldToVWWithSEXT);
14370 break;
14371 case RISCVISD::VWADDU_W_VL:
14372 case RISCVISD::VWSUBU_W_VL:
14373 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
14374 Strategies.push_back(Elt: canFoldToVWWithZEXT);
14375 break;
14376 case RISCVISD::VFWADD_W_VL:
14377 case RISCVISD::VFWSUB_W_VL:
14378 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
14379 Strategies.push_back(Elt: canFoldToVWWithFPEXT);
14380 break;
14381 default:
14382 llvm_unreachable("Unexpected opcode");
14383 }
14384 return Strategies;
14385}
14386} // End anonymous namespace.
14387
14388/// Combine a binary operation to its equivalent VW or VW_W form.
14389/// The supported combines are:
14390/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14391/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14392/// mul | mul_vl -> vwmul(u) | vwmul_su
14393/// shl | shl_vl -> vwsll
14394/// fadd_vl -> vfwadd | vfwadd_w
14395/// fsub_vl -> vfwsub | vfwsub_w
14396/// fmul_vl -> vfwmul
14397/// vwadd_w(u) -> vwadd(u)
14398/// vwsub_w(u) -> vwsub(u)
14399/// vfwadd_w -> vfwadd
14400/// vfwsub_w -> vfwsub
14401static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N,
14402 TargetLowering::DAGCombinerInfo &DCI,
14403 const RISCVSubtarget &Subtarget) {
14404 SelectionDAG &DAG = DCI.DAG;
14405 if (DCI.isBeforeLegalize())
14406 return SDValue();
14407
14408 if (!NodeExtensionHelper::isSupportedRoot(Root: N, Subtarget))
14409 return SDValue();
14410
14411 SmallVector<SDNode *> Worklist;
14412 SmallSet<SDNode *, 8> Inserted;
14413 Worklist.push_back(Elt: N);
14414 Inserted.insert(Ptr: N);
14415 SmallVector<CombineResult> CombinesToApply;
14416
14417 while (!Worklist.empty()) {
14418 SDNode *Root = Worklist.pop_back_val();
14419 if (!NodeExtensionHelper::isSupportedRoot(Root, Subtarget))
14420 return SDValue();
14421
14422 NodeExtensionHelper LHS(N, 0, DAG, Subtarget);
14423 NodeExtensionHelper RHS(N, 1, DAG, Subtarget);
14424 auto AppendUsersIfNeeded = [&Worklist,
14425 &Inserted](const NodeExtensionHelper &Op) {
14426 if (Op.needToPromoteOtherUsers()) {
14427 for (SDNode *TheUse : Op.OrigOperand->uses()) {
14428 if (Inserted.insert(Ptr: TheUse).second)
14429 Worklist.push_back(Elt: TheUse);
14430 }
14431 }
14432 };
14433
14434 // Control the compile time by limiting the number of node we look at in
14435 // total.
14436 if (Inserted.size() > ExtensionMaxWebSize)
14437 return SDValue();
14438
14439 SmallVector<NodeExtensionHelper::CombineToTry> FoldingStrategies =
14440 NodeExtensionHelper::getSupportedFoldings(Root: N);
14441
14442 assert(!FoldingStrategies.empty() && "Nothing to be folded");
14443 bool Matched = false;
14444 for (int Attempt = 0;
14445 (Attempt != 1 + NodeExtensionHelper::isCommutative(N)) && !Matched;
14446 ++Attempt) {
14447
14448 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
14449 FoldingStrategies) {
14450 std::optional<CombineResult> Res =
14451 FoldingStrategy(N, LHS, RHS, DAG, Subtarget);
14452 if (Res) {
14453 Matched = true;
14454 CombinesToApply.push_back(Elt: *Res);
14455 // All the inputs that are extended need to be folded, otherwise
14456 // we would be leaving the old input (since it is may still be used),
14457 // and the new one.
14458 if (Res->LHSExt.has_value())
14459 AppendUsersIfNeeded(LHS);
14460 if (Res->RHSExt.has_value())
14461 AppendUsersIfNeeded(RHS);
14462 break;
14463 }
14464 }
14465 std::swap(a&: LHS, b&: RHS);
14466 }
14467 // Right now we do an all or nothing approach.
14468 if (!Matched)
14469 return SDValue();
14470 }
14471 // Store the value for the replacement of the input node separately.
14472 SDValue InputRootReplacement;
14473 // We do the RAUW after we materialize all the combines, because some replaced
14474 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
14475 // some of these nodes may appear in the NodeExtensionHelpers of some of the
14476 // yet-to-be-visited CombinesToApply roots.
14477 SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace;
14478 ValuesToReplace.reserve(N: CombinesToApply.size());
14479 for (CombineResult Res : CombinesToApply) {
14480 SDValue NewValue = Res.materialize(DAG, Subtarget);
14481 if (!InputRootReplacement) {
14482 assert(Res.Root == N &&
14483 "First element is expected to be the current node");
14484 InputRootReplacement = NewValue;
14485 } else {
14486 ValuesToReplace.emplace_back(Args: SDValue(Res.Root, 0), Args&: NewValue);
14487 }
14488 }
14489 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
14490 DAG.ReplaceAllUsesOfValueWith(From: OldNewValues.first, To: OldNewValues.second);
14491 DCI.AddToWorklist(N: OldNewValues.second.getNode());
14492 }
14493 return InputRootReplacement;
14494}
14495
14496// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
14497// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
14498// y will be the Passthru and cond will be the Mask.
14499static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG) {
14500 unsigned Opc = N->getOpcode();
14501 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
14502 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
14503
14504 SDValue Y = N->getOperand(Num: 0);
14505 SDValue MergeOp = N->getOperand(Num: 1);
14506 unsigned MergeOpc = MergeOp.getOpcode();
14507
14508 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
14509 return SDValue();
14510
14511 SDValue X = MergeOp->getOperand(Num: 1);
14512
14513 if (!MergeOp.hasOneUse())
14514 return SDValue();
14515
14516 // Passthru should be undef
14517 SDValue Passthru = N->getOperand(Num: 2);
14518 if (!Passthru.isUndef())
14519 return SDValue();
14520
14521 // Mask should be all ones
14522 SDValue Mask = N->getOperand(Num: 3);
14523 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
14524 return SDValue();
14525
14526 // False value of MergeOp should be all zeros
14527 SDValue Z = MergeOp->getOperand(Num: 2);
14528
14529 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
14530 (isNullOrNullSplat(V: Z.getOperand(i: 0)) || Z.getOperand(i: 0).isUndef()))
14531 Z = Z.getOperand(i: 1);
14532
14533 if (!ISD::isConstantSplatVectorAllZeros(N: Z.getNode()))
14534 return SDValue();
14535
14536 return DAG.getNode(Opcode: Opc, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
14537 Ops: {Y, X, Y, MergeOp->getOperand(Num: 0), N->getOperand(Num: 4)},
14538 Flags: N->getFlags());
14539}
14540
14541static SDValue performVWADDSUBW_VLCombine(SDNode *N,
14542 TargetLowering::DAGCombinerInfo &DCI,
14543 const RISCVSubtarget &Subtarget) {
14544 [[maybe_unused]] unsigned Opc = N->getOpcode();
14545 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
14546 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
14547
14548 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
14549 return V;
14550
14551 return combineVWADDSUBWSelect(N, DAG&: DCI.DAG);
14552}
14553
14554// Helper function for performMemPairCombine.
14555// Try to combine the memory loads/stores LSNode1 and LSNode2
14556// into a single memory pair operation.
14557static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1,
14558 LSBaseSDNode *LSNode2, SDValue BasePtr,
14559 uint64_t Imm) {
14560 SmallPtrSet<const SDNode *, 32> Visited;
14561 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
14562
14563 if (SDNode::hasPredecessorHelper(N: LSNode1, Visited, Worklist) ||
14564 SDNode::hasPredecessorHelper(N: LSNode2, Visited, Worklist))
14565 return SDValue();
14566
14567 MachineFunction &MF = DAG.getMachineFunction();
14568 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14569
14570 // The new operation has twice the width.
14571 MVT XLenVT = Subtarget.getXLenVT();
14572 EVT MemVT = LSNode1->getMemoryVT();
14573 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
14574 MachineMemOperand *MMO = LSNode1->getMemOperand();
14575 MachineMemOperand *NewMMO = MF.getMachineMemOperand(
14576 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
14577
14578 if (LSNode1->getOpcode() == ISD::LOAD) {
14579 auto Ext = cast<LoadSDNode>(Val: LSNode1)->getExtensionType();
14580 unsigned Opcode;
14581 if (MemVT == MVT::i32)
14582 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
14583 else
14584 Opcode = RISCVISD::TH_LDD;
14585
14586 SDValue Res = DAG.getMemIntrinsicNode(
14587 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
14588 {LSNode1->getChain(), BasePtr,
14589 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14590 NewMemVT, NewMMO);
14591
14592 SDValue Node1 =
14593 DAG.getMergeValues(Ops: {Res.getValue(R: 0), Res.getValue(R: 2)}, dl: SDLoc(LSNode1));
14594 SDValue Node2 =
14595 DAG.getMergeValues(Ops: {Res.getValue(R: 1), Res.getValue(R: 2)}, dl: SDLoc(LSNode2));
14596
14597 DAG.ReplaceAllUsesWith(From: LSNode2, To: Node2.getNode());
14598 return Node1;
14599 } else {
14600 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
14601
14602 SDValue Res = DAG.getMemIntrinsicNode(
14603 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
14604 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
14605 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14606 NewMemVT, NewMMO);
14607
14608 DAG.ReplaceAllUsesWith(From: LSNode2, To: Res.getNode());
14609 return Res;
14610 }
14611}
14612
14613// Try to combine two adjacent loads/stores to a single pair instruction from
14614// the XTHeadMemPair vendor extension.
14615static SDValue performMemPairCombine(SDNode *N,
14616 TargetLowering::DAGCombinerInfo &DCI) {
14617 SelectionDAG &DAG = DCI.DAG;
14618 MachineFunction &MF = DAG.getMachineFunction();
14619 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14620
14621 // Target does not support load/store pair.
14622 if (!Subtarget.hasVendorXTHeadMemPair())
14623 return SDValue();
14624
14625 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(Val: N);
14626 EVT MemVT = LSNode1->getMemoryVT();
14627 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
14628
14629 // No volatile, indexed or atomic loads/stores.
14630 if (!LSNode1->isSimple() || LSNode1->isIndexed())
14631 return SDValue();
14632
14633 // Function to get a base + constant representation from a memory value.
14634 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
14635 if (Ptr->getOpcode() == ISD::ADD)
14636 if (auto *C1 = dyn_cast<ConstantSDNode>(Val: Ptr->getOperand(Num: 1)))
14637 return {Ptr->getOperand(Num: 0), C1->getZExtValue()};
14638 return {Ptr, 0};
14639 };
14640
14641 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(Num: OpNum));
14642
14643 SDValue Chain = N->getOperand(Num: 0);
14644 for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
14645 UI != UE; ++UI) {
14646 SDUse &Use = UI.getUse();
14647 if (Use.getUser() != N && Use.getResNo() == 0 &&
14648 Use.getUser()->getOpcode() == N->getOpcode()) {
14649 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Val: Use.getUser());
14650
14651 // No volatile, indexed or atomic loads/stores.
14652 if (!LSNode2->isSimple() || LSNode2->isIndexed())
14653 continue;
14654
14655 // Check if LSNode1 and LSNode2 have the same type and extension.
14656 if (LSNode1->getOpcode() == ISD::LOAD)
14657 if (cast<LoadSDNode>(Val: LSNode2)->getExtensionType() !=
14658 cast<LoadSDNode>(Val: LSNode1)->getExtensionType())
14659 continue;
14660
14661 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
14662 continue;
14663
14664 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(Num: OpNum));
14665
14666 // Check if the base pointer is the same for both instruction.
14667 if (Base1 != Base2)
14668 continue;
14669
14670 // Check if the offsets match the XTHeadMemPair encoding contraints.
14671 bool Valid = false;
14672 if (MemVT == MVT::i32) {
14673 // Check for adjacent i32 values and a 2-bit index.
14674 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(x: Offset1))
14675 Valid = true;
14676 } else if (MemVT == MVT::i64) {
14677 // Check for adjacent i64 values and a 2-bit index.
14678 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(x: Offset1))
14679 Valid = true;
14680 }
14681
14682 if (!Valid)
14683 continue;
14684
14685 // Try to combine.
14686 if (SDValue Res =
14687 tryMemPairCombine(DAG, LSNode1, LSNode2, BasePtr: Base1, Imm: Offset1))
14688 return Res;
14689 }
14690 }
14691
14692 return SDValue();
14693}
14694
14695// Fold
14696// (fp_to_int (froundeven X)) -> fcvt X, rne
14697// (fp_to_int (ftrunc X)) -> fcvt X, rtz
14698// (fp_to_int (ffloor X)) -> fcvt X, rdn
14699// (fp_to_int (fceil X)) -> fcvt X, rup
14700// (fp_to_int (fround X)) -> fcvt X, rmm
14701// (fp_to_int (frint X)) -> fcvt X
14702static SDValue performFP_TO_INTCombine(SDNode *N,
14703 TargetLowering::DAGCombinerInfo &DCI,
14704 const RISCVSubtarget &Subtarget) {
14705 SelectionDAG &DAG = DCI.DAG;
14706 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14707 MVT XLenVT = Subtarget.getXLenVT();
14708
14709 SDValue Src = N->getOperand(Num: 0);
14710
14711 // Don't do this for strict-fp Src.
14712 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
14713 return SDValue();
14714
14715 // Ensure the FP type is legal.
14716 if (!TLI.isTypeLegal(VT: Src.getValueType()))
14717 return SDValue();
14718
14719 // Don't do this for f16 with Zfhmin and not Zfh.
14720 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
14721 return SDValue();
14722
14723 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Opc: Src.getOpcode());
14724 // If the result is invalid, we didn't find a foldable instruction.
14725 if (FRM == RISCVFPRndMode::Invalid)
14726 return SDValue();
14727
14728 SDLoc DL(N);
14729 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
14730 EVT VT = N->getValueType(ResNo: 0);
14731
14732 if (VT.isVector() && TLI.isTypeLegal(VT)) {
14733 MVT SrcVT = Src.getSimpleValueType();
14734 MVT SrcContainerVT = SrcVT;
14735 MVT ContainerVT = VT.getSimpleVT();
14736 SDValue XVal = Src.getOperand(i: 0);
14737
14738 // For widening and narrowing conversions we just combine it into a
14739 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
14740 // end up getting lowered to their appropriate pseudo instructions based on
14741 // their operand types
14742 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
14743 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
14744 return SDValue();
14745
14746 // Make fixed-length vectors scalable first
14747 if (SrcVT.isFixedLengthVector()) {
14748 SrcContainerVT = getContainerForFixedLengthVector(DAG, VT: SrcVT, Subtarget);
14749 XVal = convertToScalableVector(VT: SrcContainerVT, V: XVal, DAG, Subtarget);
14750 ContainerVT =
14751 getContainerForFixedLengthVector(DAG, VT: ContainerVT, Subtarget);
14752 }
14753
14754 auto [Mask, VL] =
14755 getDefaultVLOps(VecVT: SrcVT, ContainerVT: SrcContainerVT, DL, DAG, Subtarget);
14756
14757 SDValue FpToInt;
14758 if (FRM == RISCVFPRndMode::RTZ) {
14759 // Use the dedicated trunc static rounding mode if we're truncating so we
14760 // don't need to generate calls to fsrmi/fsrm
14761 unsigned Opc =
14762 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
14763 FpToInt = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: XVal, N2: Mask, N3: VL);
14764 } else if (FRM == RISCVFPRndMode::DYN) {
14765 unsigned Opc =
14766 IsSigned ? RISCVISD::VFCVT_X_F_VL : RISCVISD::VFCVT_XU_F_VL;
14767 FpToInt = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: XVal, N2: Mask, N3: VL);
14768 } else {
14769 unsigned Opc =
14770 IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
14771 FpToInt = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: XVal, N2: Mask,
14772 N3: DAG.getTargetConstant(Val: FRM, DL, VT: XLenVT), N4: VL);
14773 }
14774
14775 // If converted from fixed-length to scalable, convert back
14776 if (VT.isFixedLengthVector())
14777 FpToInt = convertFromScalableVector(VT, V: FpToInt, DAG, Subtarget);
14778
14779 return FpToInt;
14780 }
14781
14782 // Only handle XLen or i32 types. Other types narrower than XLen will
14783 // eventually be legalized to XLenVT.
14784 if (VT != MVT::i32 && VT != XLenVT)
14785 return SDValue();
14786
14787 unsigned Opc;
14788 if (VT == XLenVT)
14789 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
14790 else
14791 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
14792
14793 SDValue FpToInt = DAG.getNode(Opcode: Opc, DL, VT: XLenVT, N1: Src.getOperand(i: 0),
14794 N2: DAG.getTargetConstant(Val: FRM, DL, VT: XLenVT));
14795 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: FpToInt);
14796}
14797
14798// Fold
14799// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
14800// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
14801// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
14802// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
14803// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
14804// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
14805static SDValue performFP_TO_INT_SATCombine(SDNode *N,
14806 TargetLowering::DAGCombinerInfo &DCI,
14807 const RISCVSubtarget &Subtarget) {
14808 SelectionDAG &DAG = DCI.DAG;
14809 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14810 MVT XLenVT = Subtarget.getXLenVT();
14811
14812 // Only handle XLen types. Other types narrower than XLen will eventually be
14813 // legalized to XLenVT.
14814 EVT DstVT = N->getValueType(ResNo: 0);
14815 if (DstVT != XLenVT)
14816 return SDValue();
14817
14818 SDValue Src = N->getOperand(Num: 0);
14819
14820 // Don't do this for strict-fp Src.
14821 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
14822 return SDValue();
14823
14824 // Ensure the FP type is also legal.
14825 if (!TLI.isTypeLegal(VT: Src.getValueType()))
14826 return SDValue();
14827
14828 // Don't do this for f16 with Zfhmin and not Zfh.
14829 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
14830 return SDValue();
14831
14832 EVT SatVT = cast<VTSDNode>(Val: N->getOperand(Num: 1))->getVT();
14833
14834 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Opc: Src.getOpcode());
14835 if (FRM == RISCVFPRndMode::Invalid)
14836 return SDValue();
14837
14838 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
14839
14840 unsigned Opc;
14841 if (SatVT == DstVT)
14842 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
14843 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
14844 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
14845 else
14846 return SDValue();
14847 // FIXME: Support other SatVTs by clamping before or after the conversion.
14848
14849 Src = Src.getOperand(i: 0);
14850
14851 SDLoc DL(N);
14852 SDValue FpToInt = DAG.getNode(Opcode: Opc, DL, VT: XLenVT, N1: Src,
14853 N2: DAG.getTargetConstant(Val: FRM, DL, VT: XLenVT));
14854
14855 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
14856 // extend.
14857 if (Opc == RISCVISD::FCVT_WU_RV64)
14858 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
14859
14860 // RISC-V FP-to-int conversions saturate to the destination register size, but
14861 // don't produce 0 for nan.
14862 SDValue ZeroInt = DAG.getConstant(Val: 0, DL, VT: DstVT);
14863 return DAG.getSelectCC(DL, LHS: Src, RHS: Src, True: ZeroInt, False: FpToInt, Cond: ISD::CondCode::SETUO);
14864}
14865
14866// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
14867// smaller than XLenVT.
14868static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG,
14869 const RISCVSubtarget &Subtarget) {
14870 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
14871
14872 SDValue Src = N->getOperand(Num: 0);
14873 if (Src.getOpcode() != ISD::BSWAP)
14874 return SDValue();
14875
14876 EVT VT = N->getValueType(ResNo: 0);
14877 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
14878 !llvm::has_single_bit<uint32_t>(Value: VT.getSizeInBits()))
14879 return SDValue();
14880
14881 SDLoc DL(N);
14882 return DAG.getNode(Opcode: RISCVISD::BREV8, DL, VT, Operand: Src.getOperand(i: 0));
14883}
14884
14885// Convert from one FMA opcode to another based on whether we are negating the
14886// multiply result and/or the accumulator.
14887// NOTE: Only supports RVV operations with VL.
14888static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
14889 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
14890 if (NegMul) {
14891 // clang-format off
14892 switch (Opcode) {
14893 default: llvm_unreachable("Unexpected opcode");
14894 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
14895 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
14896 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
14897 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
14898 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
14899 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
14900 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
14901 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
14902 }
14903 // clang-format on
14904 }
14905
14906 // Negating the accumulator changes ADD<->SUB.
14907 if (NegAcc) {
14908 // clang-format off
14909 switch (Opcode) {
14910 default: llvm_unreachable("Unexpected opcode");
14911 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
14912 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
14913 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
14914 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
14915 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
14916 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
14917 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
14918 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
14919 }
14920 // clang-format on
14921 }
14922
14923 return Opcode;
14924}
14925
14926static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG) {
14927 // Fold FNEG_VL into FMA opcodes.
14928 // The first operand of strict-fp is chain.
14929 unsigned Offset = N->isTargetStrictFPOpcode();
14930 SDValue A = N->getOperand(Num: 0 + Offset);
14931 SDValue B = N->getOperand(Num: 1 + Offset);
14932 SDValue C = N->getOperand(Num: 2 + Offset);
14933 SDValue Mask = N->getOperand(Num: 3 + Offset);
14934 SDValue VL = N->getOperand(Num: 4 + Offset);
14935
14936 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
14937 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(i: 1) == Mask &&
14938 V.getOperand(i: 2) == VL) {
14939 // Return the negated input.
14940 V = V.getOperand(i: 0);
14941 return true;
14942 }
14943
14944 return false;
14945 };
14946
14947 bool NegA = invertIfNegative(A);
14948 bool NegB = invertIfNegative(B);
14949 bool NegC = invertIfNegative(C);
14950
14951 // If no operands are negated, we're done.
14952 if (!NegA && !NegB && !NegC)
14953 return SDValue();
14954
14955 unsigned NewOpcode = negateFMAOpcode(Opcode: N->getOpcode(), NegMul: NegA != NegB, NegAcc: NegC);
14956 if (N->isTargetStrictFPOpcode())
14957 return DAG.getNode(Opcode: NewOpcode, DL: SDLoc(N), VTList: N->getVTList(),
14958 Ops: {N->getOperand(Num: 0), A, B, C, Mask, VL});
14959 return DAG.getNode(Opcode: NewOpcode, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), N1: A, N2: B, N3: C, N4: Mask,
14960 N5: VL);
14961}
14962
14963static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG,
14964 const RISCVSubtarget &Subtarget) {
14965 if (SDValue V = combineVFMADD_VLWithVFNEG_VL(N, DAG))
14966 return V;
14967
14968 if (N->getValueType(0).isScalableVector() &&
14969 N->getValueType(0).getVectorElementType() == MVT::f32 &&
14970 (Subtarget.hasVInstructionsF16Minimal() &&
14971 !Subtarget.hasVInstructionsF16())) {
14972 return SDValue();
14973 }
14974
14975 // FIXME: Ignore strict opcodes for now.
14976 if (N->isTargetStrictFPOpcode())
14977 return SDValue();
14978
14979 // Try to form widening FMA.
14980 SDValue Op0 = N->getOperand(Num: 0);
14981 SDValue Op1 = N->getOperand(Num: 1);
14982 SDValue Mask = N->getOperand(Num: 3);
14983 SDValue VL = N->getOperand(Num: 4);
14984
14985 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
14986 Op1.getOpcode() != RISCVISD::FP_EXTEND_VL)
14987 return SDValue();
14988
14989 // TODO: Refactor to handle more complex cases similar to
14990 // combineBinOp_VLToVWBinOp_VL.
14991 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
14992 (Op0 != Op1 || !Op0->hasNUsesOfValue(NUses: 2, Value: 0)))
14993 return SDValue();
14994
14995 // Check the mask and VL are the same.
14996 if (Op0.getOperand(i: 1) != Mask || Op0.getOperand(i: 2) != VL ||
14997 Op1.getOperand(i: 1) != Mask || Op1.getOperand(i: 2) != VL)
14998 return SDValue();
14999
15000 unsigned NewOpc;
15001 switch (N->getOpcode()) {
15002 default:
15003 llvm_unreachable("Unexpected opcode");
15004 case RISCVISD::VFMADD_VL:
15005 NewOpc = RISCVISD::VFWMADD_VL;
15006 break;
15007 case RISCVISD::VFNMSUB_VL:
15008 NewOpc = RISCVISD::VFWNMSUB_VL;
15009 break;
15010 case RISCVISD::VFNMADD_VL:
15011 NewOpc = RISCVISD::VFWNMADD_VL;
15012 break;
15013 case RISCVISD::VFMSUB_VL:
15014 NewOpc = RISCVISD::VFWMSUB_VL;
15015 break;
15016 }
15017
15018 Op0 = Op0.getOperand(i: 0);
15019 Op1 = Op1.getOperand(i: 0);
15020
15021 return DAG.getNode(Opcode: NewOpc, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), N1: Op0, N2: Op1,
15022 N3: N->getOperand(Num: 2), N4: Mask, N5: VL);
15023}
15024
15025static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
15026 const RISCVSubtarget &Subtarget) {
15027 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
15028
15029 if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit())
15030 return SDValue();
15031
15032 if (!isa<ConstantSDNode>(Val: N->getOperand(Num: 1)))
15033 return SDValue();
15034 uint64_t ShAmt = N->getConstantOperandVal(Num: 1);
15035 if (ShAmt > 32)
15036 return SDValue();
15037
15038 SDValue N0 = N->getOperand(Num: 0);
15039
15040 // Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
15041 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
15042 // SLLIW+SRAIW. SLLI+SRAI have compressed forms.
15043 if (ShAmt < 32 &&
15044 N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() &&
15045 cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 &&
15046 N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() &&
15047 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
15048 uint64_t LShAmt = N0.getOperand(i: 0).getConstantOperandVal(i: 1);
15049 if (LShAmt < 32) {
15050 SDLoc ShlDL(N0.getOperand(i: 0));
15051 SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64,
15052 N0.getOperand(0).getOperand(0),
15053 DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64));
15054 SDLoc DL(N);
15055 return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl,
15056 DAG.getConstant(ShAmt + 32, DL, MVT::i64));
15057 }
15058 }
15059
15060 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
15061 // FIXME: Should this be a generic combine? There's a similar combine on X86.
15062 //
15063 // Also try these folds where an add or sub is in the middle.
15064 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
15065 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
15066 SDValue Shl;
15067 ConstantSDNode *AddC = nullptr;
15068
15069 // We might have an ADD or SUB between the SRA and SHL.
15070 bool IsAdd = N0.getOpcode() == ISD::ADD;
15071 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
15072 // Other operand needs to be a constant we can modify.
15073 AddC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: IsAdd ? 1 : 0));
15074 if (!AddC)
15075 return SDValue();
15076
15077 // AddC needs to have at least 32 trailing zeros.
15078 if (AddC->getAPIntValue().countr_zero() < 32)
15079 return SDValue();
15080
15081 // All users should be a shift by constant less than or equal to 32. This
15082 // ensures we'll do this optimization for each of them to produce an
15083 // add/sub+sext_inreg they can all share.
15084 for (SDNode *U : N0->uses()) {
15085 if (U->getOpcode() != ISD::SRA ||
15086 !isa<ConstantSDNode>(Val: U->getOperand(Num: 1)) ||
15087 U->getConstantOperandVal(Num: 1) > 32)
15088 return SDValue();
15089 }
15090
15091 Shl = N0.getOperand(i: IsAdd ? 0 : 1);
15092 } else {
15093 // Not an ADD or SUB.
15094 Shl = N0;
15095 }
15096
15097 // Look for a shift left by 32.
15098 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Val: Shl.getOperand(i: 1)) ||
15099 Shl.getConstantOperandVal(i: 1) != 32)
15100 return SDValue();
15101
15102 // We if we didn't look through an add/sub, then the shl should have one use.
15103 // If we did look through an add/sub, the sext_inreg we create is free so
15104 // we're only creating 2 new instructions. It's enough to only remove the
15105 // original sra+add/sub.
15106 if (!AddC && !Shl.hasOneUse())
15107 return SDValue();
15108
15109 SDLoc DL(N);
15110 SDValue In = Shl.getOperand(i: 0);
15111
15112 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
15113 // constant.
15114 if (AddC) {
15115 SDValue ShiftedAddC =
15116 DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64);
15117 if (IsAdd)
15118 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
15119 else
15120 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
15121 }
15122
15123 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
15124 DAG.getValueType(MVT::i32));
15125 if (ShAmt == 32)
15126 return SExt;
15127
15128 return DAG.getNode(
15129 ISD::SHL, DL, MVT::i64, SExt,
15130 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
15131}
15132
15133// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
15134// the result is used as the conditon of a br_cc or select_cc we can invert,
15135// inverting the setcc is free, and Z is 0/1. Caller will invert the
15136// br_cc/select_cc.
15137static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG) {
15138 bool IsAnd = Cond.getOpcode() == ISD::AND;
15139 if (!IsAnd && Cond.getOpcode() != ISD::OR)
15140 return SDValue();
15141
15142 if (!Cond.hasOneUse())
15143 return SDValue();
15144
15145 SDValue Setcc = Cond.getOperand(i: 0);
15146 SDValue Xor = Cond.getOperand(i: 1);
15147 // Canonicalize setcc to LHS.
15148 if (Setcc.getOpcode() != ISD::SETCC)
15149 std::swap(a&: Setcc, b&: Xor);
15150 // LHS should be a setcc and RHS should be an xor.
15151 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
15152 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
15153 return SDValue();
15154
15155 // If the condition is an And, SimplifyDemandedBits may have changed
15156 // (xor Z, 1) to (not Z).
15157 SDValue Xor1 = Xor.getOperand(i: 1);
15158 if (!isOneConstant(V: Xor1) && !(IsAnd && isAllOnesConstant(V: Xor1)))
15159 return SDValue();
15160
15161 EVT VT = Cond.getValueType();
15162 SDValue Xor0 = Xor.getOperand(i: 0);
15163
15164 // The LHS of the xor needs to be 0/1.
15165 APInt Mask = APInt::getBitsSetFrom(numBits: VT.getSizeInBits(), loBit: 1);
15166 if (!DAG.MaskedValueIsZero(Op: Xor0, Mask))
15167 return SDValue();
15168
15169 // We can only invert integer setccs.
15170 EVT SetCCOpVT = Setcc.getOperand(i: 0).getValueType();
15171 if (!SetCCOpVT.isScalarInteger())
15172 return SDValue();
15173
15174 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: Setcc.getOperand(i: 2))->get();
15175 if (ISD::isIntEqualitySetCC(Code: CCVal)) {
15176 CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: SetCCOpVT);
15177 Setcc = DAG.getSetCC(DL: SDLoc(Setcc), VT, LHS: Setcc.getOperand(i: 0),
15178 RHS: Setcc.getOperand(i: 1), Cond: CCVal);
15179 } else if (CCVal == ISD::SETLT && isNullConstant(V: Setcc.getOperand(i: 0))) {
15180 // Invert (setlt 0, X) by converting to (setlt X, 1).
15181 Setcc = DAG.getSetCC(DL: SDLoc(Setcc), VT, LHS: Setcc.getOperand(i: 1),
15182 RHS: DAG.getConstant(Val: 1, DL: SDLoc(Setcc), VT), Cond: CCVal);
15183 } else if (CCVal == ISD::SETLT && isOneConstant(V: Setcc.getOperand(i: 1))) {
15184 // (setlt X, 1) by converting to (setlt 0, X).
15185 Setcc = DAG.getSetCC(DL: SDLoc(Setcc), VT,
15186 LHS: DAG.getConstant(Val: 0, DL: SDLoc(Setcc), VT),
15187 RHS: Setcc.getOperand(i: 0), Cond: CCVal);
15188 } else
15189 return SDValue();
15190
15191 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15192 return DAG.getNode(Opcode: Opc, DL: SDLoc(Cond), VT, N1: Setcc, N2: Xor.getOperand(i: 0));
15193}
15194
15195// Perform common combines for BR_CC and SELECT_CC condtions.
15196static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
15197 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
15198 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val&: CC)->get();
15199
15200 // As far as arithmetic right shift always saves the sign,
15201 // shift can be omitted.
15202 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
15203 // setge (sra X, N), 0 -> setge X, 0
15204 if (isNullConstant(V: RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
15205 LHS.getOpcode() == ISD::SRA) {
15206 LHS = LHS.getOperand(i: 0);
15207 return true;
15208 }
15209
15210 if (!ISD::isIntEqualitySetCC(Code: CCVal))
15211 return false;
15212
15213 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
15214 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
15215 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(V: RHS) &&
15216 LHS.getOperand(i: 0).getValueType() == Subtarget.getXLenVT()) {
15217 // If we're looking for eq 0 instead of ne 0, we need to invert the
15218 // condition.
15219 bool Invert = CCVal == ISD::SETEQ;
15220 CCVal = cast<CondCodeSDNode>(Val: LHS.getOperand(i: 2))->get();
15221 if (Invert)
15222 CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType());
15223
15224 RHS = LHS.getOperand(i: 1);
15225 LHS = LHS.getOperand(i: 0);
15226 translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG);
15227
15228 CC = DAG.getCondCode(Cond: CCVal);
15229 return true;
15230 }
15231
15232 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
15233 if (LHS.getOpcode() == ISD::XOR && isNullConstant(V: RHS)) {
15234 RHS = LHS.getOperand(i: 1);
15235 LHS = LHS.getOperand(i: 0);
15236 return true;
15237 }
15238
15239 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
15240 if (isNullConstant(V: RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
15241 LHS.getOperand(i: 1).getOpcode() == ISD::Constant) {
15242 SDValue LHS0 = LHS.getOperand(i: 0);
15243 if (LHS0.getOpcode() == ISD::AND &&
15244 LHS0.getOperand(i: 1).getOpcode() == ISD::Constant) {
15245 uint64_t Mask = LHS0.getConstantOperandVal(i: 1);
15246 uint64_t ShAmt = LHS.getConstantOperandVal(i: 1);
15247 if (isPowerOf2_64(Value: Mask) && Log2_64(Value: Mask) == ShAmt) {
15248 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
15249 CC = DAG.getCondCode(Cond: CCVal);
15250
15251 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
15252 LHS = LHS0.getOperand(i: 0);
15253 if (ShAmt != 0)
15254 LHS =
15255 DAG.getNode(Opcode: ISD::SHL, DL, VT: LHS.getValueType(), N1: LHS0.getOperand(i: 0),
15256 N2: DAG.getConstant(Val: ShAmt, DL, VT: LHS.getValueType()));
15257 return true;
15258 }
15259 }
15260 }
15261
15262 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
15263 // This can occur when legalizing some floating point comparisons.
15264 APInt Mask = APInt::getBitsSetFrom(numBits: LHS.getValueSizeInBits(), loBit: 1);
15265 if (isOneConstant(V: RHS) && DAG.MaskedValueIsZero(Op: LHS, Mask)) {
15266 CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType());
15267 CC = DAG.getCondCode(Cond: CCVal);
15268 RHS = DAG.getConstant(Val: 0, DL, VT: LHS.getValueType());
15269 return true;
15270 }
15271
15272 if (isNullConstant(V: RHS)) {
15273 if (SDValue NewCond = tryDemorganOfBooleanCondition(Cond: LHS, DAG)) {
15274 CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType());
15275 CC = DAG.getCondCode(Cond: CCVal);
15276 LHS = NewCond;
15277 return true;
15278 }
15279 }
15280
15281 return false;
15282}
15283
15284// Fold
15285// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
15286// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
15287// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
15288// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
15289static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG,
15290 SDValue TrueVal, SDValue FalseVal,
15291 bool Swapped) {
15292 bool Commutative = true;
15293 unsigned Opc = TrueVal.getOpcode();
15294 switch (Opc) {
15295 default:
15296 return SDValue();
15297 case ISD::SHL:
15298 case ISD::SRA:
15299 case ISD::SRL:
15300 case ISD::SUB:
15301 Commutative = false;
15302 break;
15303 case ISD::ADD:
15304 case ISD::OR:
15305 case ISD::XOR:
15306 break;
15307 }
15308
15309 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(Val: FalseVal))
15310 return SDValue();
15311
15312 unsigned OpToFold;
15313 if (FalseVal == TrueVal.getOperand(i: 0))
15314 OpToFold = 0;
15315 else if (Commutative && FalseVal == TrueVal.getOperand(i: 1))
15316 OpToFold = 1;
15317 else
15318 return SDValue();
15319
15320 EVT VT = N->getValueType(ResNo: 0);
15321 SDLoc DL(N);
15322 SDValue OtherOp = TrueVal.getOperand(i: 1 - OpToFold);
15323 EVT OtherOpVT = OtherOp->getValueType(ResNo: 0);
15324 SDValue IdentityOperand =
15325 DAG.getNeutralElement(Opcode: Opc, DL, VT: OtherOpVT, Flags: N->getFlags());
15326 if (!Commutative)
15327 IdentityOperand = DAG.getConstant(Val: 0, DL, VT: OtherOpVT);
15328 assert(IdentityOperand && "No identity operand!");
15329
15330 if (Swapped)
15331 std::swap(a&: OtherOp, b&: IdentityOperand);
15332 SDValue NewSel =
15333 DAG.getSelect(DL, VT: OtherOpVT, Cond: N->getOperand(Num: 0), LHS: OtherOp, RHS: IdentityOperand);
15334 return DAG.getNode(Opcode: TrueVal.getOpcode(), DL, VT, N1: FalseVal, N2: NewSel);
15335}
15336
15337// This tries to get rid of `select` and `icmp` that are being used to handle
15338// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
15339static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) {
15340 SDValue Cond = N->getOperand(Num: 0);
15341
15342 // This represents either CTTZ or CTLZ instruction.
15343 SDValue CountZeroes;
15344
15345 SDValue ValOnZero;
15346
15347 if (Cond.getOpcode() != ISD::SETCC)
15348 return SDValue();
15349
15350 if (!isNullConstant(V: Cond->getOperand(Num: 1)))
15351 return SDValue();
15352
15353 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: Cond->getOperand(Num: 2))->get();
15354 if (CCVal == ISD::CondCode::SETEQ) {
15355 CountZeroes = N->getOperand(Num: 2);
15356 ValOnZero = N->getOperand(Num: 1);
15357 } else if (CCVal == ISD::CondCode::SETNE) {
15358 CountZeroes = N->getOperand(Num: 1);
15359 ValOnZero = N->getOperand(Num: 2);
15360 } else {
15361 return SDValue();
15362 }
15363
15364 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
15365 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
15366 CountZeroes = CountZeroes.getOperand(i: 0);
15367
15368 if (CountZeroes.getOpcode() != ISD::CTTZ &&
15369 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
15370 CountZeroes.getOpcode() != ISD::CTLZ &&
15371 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
15372 return SDValue();
15373
15374 if (!isNullConstant(V: ValOnZero))
15375 return SDValue();
15376
15377 SDValue CountZeroesArgument = CountZeroes->getOperand(Num: 0);
15378 if (Cond->getOperand(Num: 0) != CountZeroesArgument)
15379 return SDValue();
15380
15381 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
15382 CountZeroes = DAG.getNode(Opcode: ISD::CTTZ, DL: SDLoc(CountZeroes),
15383 VT: CountZeroes.getValueType(), Operand: CountZeroesArgument);
15384 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
15385 CountZeroes = DAG.getNode(Opcode: ISD::CTLZ, DL: SDLoc(CountZeroes),
15386 VT: CountZeroes.getValueType(), Operand: CountZeroesArgument);
15387 }
15388
15389 unsigned BitWidth = CountZeroes.getValueSizeInBits();
15390 SDValue BitWidthMinusOne =
15391 DAG.getConstant(Val: BitWidth - 1, DL: SDLoc(N), VT: CountZeroes.getValueType());
15392
15393 auto AndNode = DAG.getNode(Opcode: ISD::AND, DL: SDLoc(N), VT: CountZeroes.getValueType(),
15394 N1: CountZeroes, N2: BitWidthMinusOne);
15395 return DAG.getZExtOrTrunc(Op: AndNode, DL: SDLoc(N), VT: N->getValueType(ResNo: 0));
15396}
15397
15398static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG,
15399 const RISCVSubtarget &Subtarget) {
15400 SDValue Cond = N->getOperand(Num: 0);
15401 SDValue True = N->getOperand(Num: 1);
15402 SDValue False = N->getOperand(Num: 2);
15403 SDLoc DL(N);
15404 EVT VT = N->getValueType(ResNo: 0);
15405 EVT CondVT = Cond.getValueType();
15406
15407 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
15408 return SDValue();
15409
15410 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
15411 // BEXTI, where C is power of 2.
15412 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
15413 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
15414 SDValue LHS = Cond.getOperand(i: 0);
15415 SDValue RHS = Cond.getOperand(i: 1);
15416 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Cond.getOperand(i: 2))->get();
15417 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
15418 isa<ConstantSDNode>(Val: LHS.getOperand(i: 1)) && isNullConstant(V: RHS)) {
15419 const APInt &MaskVal = LHS.getConstantOperandAPInt(i: 1);
15420 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(N: 12))
15421 return DAG.getSelect(DL, VT,
15422 Cond: DAG.getSetCC(DL, VT: CondVT, LHS, RHS, Cond: ISD::SETNE),
15423 LHS: False, RHS: True);
15424 }
15425 }
15426 return SDValue();
15427}
15428
15429static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
15430 const RISCVSubtarget &Subtarget) {
15431 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
15432 return Folded;
15433
15434 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
15435 return V;
15436
15437 if (Subtarget.hasConditionalMoveFusion())
15438 return SDValue();
15439
15440 SDValue TrueVal = N->getOperand(Num: 1);
15441 SDValue FalseVal = N->getOperand(Num: 2);
15442 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
15443 return V;
15444 return tryFoldSelectIntoOp(N, DAG, TrueVal: FalseVal, FalseVal: TrueVal, /*Swapped*/true);
15445}
15446
15447/// If we have a build_vector where each lane is binop X, C, where C
15448/// is a constant (but not necessarily the same constant on all lanes),
15449/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
15450/// We assume that materializing a constant build vector will be no more
15451/// expensive that performing O(n) binops.
15452static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
15453 const RISCVSubtarget &Subtarget,
15454 const RISCVTargetLowering &TLI) {
15455 SDLoc DL(N);
15456 EVT VT = N->getValueType(ResNo: 0);
15457
15458 assert(!VT.isScalableVector() && "unexpected build vector");
15459
15460 if (VT.getVectorNumElements() == 1)
15461 return SDValue();
15462
15463 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
15464 if (!TLI.isBinOp(Opcode))
15465 return SDValue();
15466
15467 if (!TLI.isOperationLegalOrCustom(Op: Opcode, VT) || !TLI.isTypeLegal(VT))
15468 return SDValue();
15469
15470 // This BUILD_VECTOR involves an implicit truncation, and sinking
15471 // truncates through binops is non-trivial.
15472 if (N->op_begin()->getValueType() != VT.getVectorElementType())
15473 return SDValue();
15474
15475 SmallVector<SDValue> LHSOps;
15476 SmallVector<SDValue> RHSOps;
15477 for (SDValue Op : N->ops()) {
15478 if (Op.isUndef()) {
15479 // We can't form a divide or remainder from undef.
15480 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
15481 return SDValue();
15482
15483 LHSOps.push_back(Elt: Op);
15484 RHSOps.push_back(Elt: Op);
15485 continue;
15486 }
15487
15488 // TODO: We can handle operations which have an neutral rhs value
15489 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
15490 // of profit in a more explicit manner.
15491 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
15492 return SDValue();
15493
15494 LHSOps.push_back(Elt: Op.getOperand(i: 0));
15495 if (!isa<ConstantSDNode>(Val: Op.getOperand(i: 1)) &&
15496 !isa<ConstantFPSDNode>(Val: Op.getOperand(i: 1)))
15497 return SDValue();
15498 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15499 // have different LHS and RHS types.
15500 if (Op.getOperand(i: 0).getValueType() != Op.getOperand(i: 1).getValueType())
15501 return SDValue();
15502
15503 RHSOps.push_back(Elt: Op.getOperand(i: 1));
15504 }
15505
15506 return DAG.getNode(Opcode, DL, VT, N1: DAG.getBuildVector(VT, DL, Ops: LHSOps),
15507 N2: DAG.getBuildVector(VT, DL, Ops: RHSOps));
15508}
15509
15510static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
15511 const RISCVSubtarget &Subtarget,
15512 const RISCVTargetLowering &TLI) {
15513 SDValue InVec = N->getOperand(Num: 0);
15514 SDValue InVal = N->getOperand(Num: 1);
15515 SDValue EltNo = N->getOperand(Num: 2);
15516 SDLoc DL(N);
15517
15518 EVT VT = InVec.getValueType();
15519 if (VT.isScalableVector())
15520 return SDValue();
15521
15522 if (!InVec.hasOneUse())
15523 return SDValue();
15524
15525 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
15526 // move the insert_vector_elts into the arms of the binop. Note that
15527 // the new RHS must be a constant.
15528 const unsigned InVecOpcode = InVec->getOpcode();
15529 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(Opcode: InVecOpcode) &&
15530 InVal.hasOneUse()) {
15531 SDValue InVecLHS = InVec->getOperand(Num: 0);
15532 SDValue InVecRHS = InVec->getOperand(Num: 1);
15533 SDValue InValLHS = InVal->getOperand(Num: 0);
15534 SDValue InValRHS = InVal->getOperand(Num: 1);
15535
15536 if (!ISD::isBuildVectorOfConstantSDNodes(N: InVecRHS.getNode()))
15537 return SDValue();
15538 if (!isa<ConstantSDNode>(Val: InValRHS) && !isa<ConstantFPSDNode>(Val: InValRHS))
15539 return SDValue();
15540 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15541 // have different LHS and RHS types.
15542 if (InVec.getOperand(i: 0).getValueType() != InVec.getOperand(i: 1).getValueType())
15543 return SDValue();
15544 SDValue LHS = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT,
15545 N1: InVecLHS, N2: InValLHS, N3: EltNo);
15546 SDValue RHS = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT,
15547 N1: InVecRHS, N2: InValRHS, N3: EltNo);
15548 return DAG.getNode(Opcode: InVecOpcode, DL, VT, N1: LHS, N2: RHS);
15549 }
15550
15551 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
15552 // move the insert_vector_elt to the source operand of the concat_vector.
15553 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
15554 return SDValue();
15555
15556 auto *IndexC = dyn_cast<ConstantSDNode>(Val&: EltNo);
15557 if (!IndexC)
15558 return SDValue();
15559 unsigned Elt = IndexC->getZExtValue();
15560
15561 EVT ConcatVT = InVec.getOperand(i: 0).getValueType();
15562 if (ConcatVT.getVectorElementType() != InVal.getValueType())
15563 return SDValue();
15564 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
15565 SDValue NewIdx = DAG.getVectorIdxConstant(Val: Elt % ConcatNumElts, DL);
15566
15567 unsigned ConcatOpIdx = Elt / ConcatNumElts;
15568 SDValue ConcatOp = InVec.getOperand(i: ConcatOpIdx);
15569 ConcatOp = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ConcatVT,
15570 N1: ConcatOp, N2: InVal, N3: NewIdx);
15571
15572 SmallVector<SDValue> ConcatOps;
15573 ConcatOps.append(in_start: InVec->op_begin(), in_end: InVec->op_end());
15574 ConcatOps[ConcatOpIdx] = ConcatOp;
15575 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, Ops: ConcatOps);
15576}
15577
15578// If we're concatenating a series of vector loads like
15579// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
15580// Then we can turn this into a strided load by widening the vector elements
15581// vlse32 p, stride=n
15582static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
15583 const RISCVSubtarget &Subtarget,
15584 const RISCVTargetLowering &TLI) {
15585 SDLoc DL(N);
15586 EVT VT = N->getValueType(ResNo: 0);
15587
15588 // Only perform this combine on legal MVTs.
15589 if (!TLI.isTypeLegal(VT))
15590 return SDValue();
15591
15592 // TODO: Potentially extend this to scalable vectors
15593 if (VT.isScalableVector())
15594 return SDValue();
15595
15596 auto *BaseLd = dyn_cast<LoadSDNode>(Val: N->getOperand(Num: 0));
15597 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(N: BaseLd) ||
15598 !SDValue(BaseLd, 0).hasOneUse())
15599 return SDValue();
15600
15601 EVT BaseLdVT = BaseLd->getValueType(ResNo: 0);
15602
15603 // Go through the loads and check that they're strided
15604 SmallVector<LoadSDNode *> Lds;
15605 Lds.push_back(Elt: BaseLd);
15606 Align Align = BaseLd->getAlign();
15607 for (SDValue Op : N->ops().drop_front()) {
15608 auto *Ld = dyn_cast<LoadSDNode>(Val&: Op);
15609 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
15610 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(N: Ld) ||
15611 Ld->getValueType(ResNo: 0) != BaseLdVT)
15612 return SDValue();
15613
15614 Lds.push_back(Elt: Ld);
15615
15616 // The common alignment is the most restrictive (smallest) of all the loads
15617 Align = std::min(a: Align, b: Ld->getAlign());
15618 }
15619
15620 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
15621 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
15622 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
15623 // If the load ptrs can be decomposed into a common (Base + Index) with a
15624 // common constant stride, then return the constant stride.
15625 BaseIndexOffset BIO1 = BaseIndexOffset::match(N: Ld1, DAG);
15626 BaseIndexOffset BIO2 = BaseIndexOffset::match(N: Ld2, DAG);
15627 if (BIO1.equalBaseIndex(Other: BIO2, DAG))
15628 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
15629
15630 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
15631 SDValue P1 = Ld1->getBasePtr();
15632 SDValue P2 = Ld2->getBasePtr();
15633 if (P2.getOpcode() == ISD::ADD && P2.getOperand(i: 0) == P1)
15634 return {{P2.getOperand(i: 1), false}};
15635 if (P1.getOpcode() == ISD::ADD && P1.getOperand(i: 0) == P2)
15636 return {{P1.getOperand(i: 1), true}};
15637
15638 return std::nullopt;
15639 };
15640
15641 // Get the distance between the first and second loads
15642 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
15643 if (!BaseDiff)
15644 return SDValue();
15645
15646 // Check all the loads are the same distance apart
15647 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
15648 if (GetPtrDiff(*It, *std::next(x: It)) != BaseDiff)
15649 return SDValue();
15650
15651 // TODO: At this point, we've successfully matched a generalized gather
15652 // load. Maybe we should emit that, and then move the specialized
15653 // matchers above and below into a DAG combine?
15654
15655 // Get the widened scalar type, e.g. v4i8 -> i64
15656 unsigned WideScalarBitWidth =
15657 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
15658 MVT WideScalarVT = MVT::getIntegerVT(BitWidth: WideScalarBitWidth);
15659
15660 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
15661 MVT WideVecVT = MVT::getVectorVT(VT: WideScalarVT, NumElements: N->getNumOperands());
15662 if (!TLI.isTypeLegal(VT: WideVecVT))
15663 return SDValue();
15664
15665 // Check that the operation is legal
15666 if (!TLI.isLegalStridedLoadStore(DataType: WideVecVT, Alignment: Align))
15667 return SDValue();
15668
15669 auto [StrideVariant, MustNegateStride] = *BaseDiff;
15670 SDValue Stride = std::holds_alternative<SDValue>(v: StrideVariant)
15671 ? std::get<SDValue>(v&: StrideVariant)
15672 : DAG.getConstant(Val: std::get<int64_t>(v&: StrideVariant), DL,
15673 VT: Lds[0]->getOffset().getValueType());
15674 if (MustNegateStride)
15675 Stride = DAG.getNegative(Val: Stride, DL, VT: Stride.getValueType());
15676
15677 SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other});
15678 SDValue IntID =
15679 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
15680 Subtarget.getXLenVT());
15681
15682 SDValue AllOneMask =
15683 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
15684 DAG.getConstant(1, DL, MVT::i1));
15685
15686 SDValue Ops[] = {BaseLd->getChain(), IntID, DAG.getUNDEF(VT: WideVecVT),
15687 BaseLd->getBasePtr(), Stride, AllOneMask};
15688
15689 uint64_t MemSize;
15690 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Val&: Stride);
15691 ConstStride && ConstStride->getSExtValue() >= 0)
15692 // total size = (elsize * n) + (stride - elsize) * (n-1)
15693 // = elsize + stride * (n-1)
15694 MemSize = WideScalarVT.getSizeInBits() +
15695 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
15696 else
15697 // If Stride isn't constant, then we can't know how much it will load
15698 MemSize = MemoryLocation::UnknownSize;
15699
15700 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
15701 PtrInfo: BaseLd->getPointerInfo(), F: BaseLd->getMemOperand()->getFlags(), Size: MemSize,
15702 BaseAlignment: Align);
15703
15704 SDValue StridedLoad = DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs,
15705 Ops, MemVT: WideVecVT, MMO);
15706 for (SDValue Ld : N->ops())
15707 DAG.makeEquivalentMemoryOrdering(OldLoad: cast<LoadSDNode>(Val&: Ld), NewMemOp: StridedLoad);
15708
15709 return DAG.getBitcast(VT: VT.getSimpleVT(), V: StridedLoad);
15710}
15711
15712static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,
15713 const RISCVSubtarget &Subtarget) {
15714
15715 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
15716
15717 if (N->getValueType(ResNo: 0).isFixedLengthVector())
15718 return SDValue();
15719
15720 SDValue Addend = N->getOperand(Num: 0);
15721 SDValue MulOp = N->getOperand(Num: 1);
15722
15723 if (N->getOpcode() == RISCVISD::ADD_VL) {
15724 SDValue AddMergeOp = N->getOperand(Num: 2);
15725 if (!AddMergeOp.isUndef())
15726 return SDValue();
15727 }
15728
15729 auto IsVWMulOpc = [](unsigned Opc) {
15730 switch (Opc) {
15731 case RISCVISD::VWMUL_VL:
15732 case RISCVISD::VWMULU_VL:
15733 case RISCVISD::VWMULSU_VL:
15734 return true;
15735 default:
15736 return false;
15737 }
15738 };
15739
15740 if (!IsVWMulOpc(MulOp.getOpcode()))
15741 std::swap(a&: Addend, b&: MulOp);
15742
15743 if (!IsVWMulOpc(MulOp.getOpcode()))
15744 return SDValue();
15745
15746 SDValue MulMergeOp = MulOp.getOperand(i: 2);
15747
15748 if (!MulMergeOp.isUndef())
15749 return SDValue();
15750
15751 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
15752 const RISCVSubtarget &Subtarget) {
15753 if (N->getOpcode() == ISD::ADD) {
15754 SDLoc DL(N);
15755 return getDefaultScalableVLOps(VecVT: N->getSimpleValueType(ResNo: 0), DL, DAG,
15756 Subtarget);
15757 }
15758 return std::make_pair(x: N->getOperand(Num: 3), y: N->getOperand(Num: 4));
15759 }(N, DAG, Subtarget);
15760
15761 SDValue MulMask = MulOp.getOperand(i: 3);
15762 SDValue MulVL = MulOp.getOperand(i: 4);
15763
15764 if (AddMask != MulMask || AddVL != MulVL)
15765 return SDValue();
15766
15767 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
15768 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
15769 "Unexpected opcode after VWMACC_VL");
15770 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
15771 "Unexpected opcode after VWMACC_VL!");
15772 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
15773 "Unexpected opcode after VWMUL_VL!");
15774 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
15775 "Unexpected opcode after VWMUL_VL!");
15776
15777 SDLoc DL(N);
15778 EVT VT = N->getValueType(ResNo: 0);
15779 SDValue Ops[] = {MulOp.getOperand(i: 0), MulOp.getOperand(i: 1), Addend, AddMask,
15780 AddVL};
15781 return DAG.getNode(Opcode: Opc, DL, VT, Ops);
15782}
15783
15784static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index,
15785 ISD::MemIndexType &IndexType,
15786 RISCVTargetLowering::DAGCombinerInfo &DCI) {
15787 if (!DCI.isBeforeLegalize())
15788 return false;
15789
15790 SelectionDAG &DAG = DCI.DAG;
15791 const MVT XLenVT =
15792 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
15793
15794 const EVT IndexVT = Index.getValueType();
15795
15796 // RISC-V indexed loads only support the "unsigned unscaled" addressing
15797 // mode, so anything else must be manually legalized.
15798 if (!isIndexTypeSigned(IndexType))
15799 return false;
15800
15801 if (IndexVT.getVectorElementType().bitsLT(VT: XLenVT)) {
15802 // Any index legalization should first promote to XLenVT, so we don't lose
15803 // bits when scaling. This may create an illegal index type so we let
15804 // LLVM's legalization take care of the splitting.
15805 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
15806 Index = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL,
15807 VT: IndexVT.changeVectorElementType(EltVT: XLenVT), Operand: Index);
15808 }
15809 IndexType = ISD::UNSIGNED_SCALED;
15810 return true;
15811}
15812
15813/// Match the index vector of a scatter or gather node as the shuffle mask
15814/// which performs the rearrangement if possible. Will only match if
15815/// all lanes are touched, and thus replacing the scatter or gather with
15816/// a unit strided access and shuffle is legal.
15817static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
15818 SmallVector<int> &ShuffleMask) {
15819 if (!ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()))
15820 return false;
15821 if (!ISD::isBuildVectorOfConstantSDNodes(N: Index.getNode()))
15822 return false;
15823
15824 const unsigned ElementSize = VT.getScalarStoreSize();
15825 const unsigned NumElems = VT.getVectorNumElements();
15826
15827 // Create the shuffle mask and check all bits active
15828 assert(ShuffleMask.empty());
15829 BitVector ActiveLanes(NumElems);
15830 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
15831 // TODO: We've found an active bit of UB, and could be
15832 // more aggressive here if desired.
15833 if (Index->getOperand(Num: i)->isUndef())
15834 return false;
15835 uint64_t C = Index->getConstantOperandVal(Num: i);
15836 if (C % ElementSize != 0)
15837 return false;
15838 C = C / ElementSize;
15839 if (C >= NumElems)
15840 return false;
15841 ShuffleMask.push_back(Elt: C);
15842 ActiveLanes.set(C);
15843 }
15844 return ActiveLanes.all();
15845}
15846
15847/// Match the index of a gather or scatter operation as an operation
15848/// with twice the element width and half the number of elements. This is
15849/// generally profitable (if legal) because these operations are linear
15850/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
15851/// come out ahead.
15852static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
15853 Align BaseAlign, const RISCVSubtarget &ST) {
15854 if (!ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()))
15855 return false;
15856 if (!ISD::isBuildVectorOfConstantSDNodes(N: Index.getNode()))
15857 return false;
15858
15859 // Attempt a doubling. If we can use a element type 4x or 8x in
15860 // size, this will happen via multiply iterations of the transform.
15861 const unsigned NumElems = VT.getVectorNumElements();
15862 if (NumElems % 2 != 0)
15863 return false;
15864
15865 const unsigned ElementSize = VT.getScalarStoreSize();
15866 const unsigned WiderElementSize = ElementSize * 2;
15867 if (WiderElementSize > ST.getELen()/8)
15868 return false;
15869
15870 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
15871 return false;
15872
15873 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
15874 // TODO: We've found an active bit of UB, and could be
15875 // more aggressive here if desired.
15876 if (Index->getOperand(Num: i)->isUndef())
15877 return false;
15878 // TODO: This offset check is too strict if we support fully
15879 // misaligned memory operations.
15880 uint64_t C = Index->getConstantOperandVal(Num: i);
15881 if (i % 2 == 0) {
15882 if (C % WiderElementSize != 0)
15883 return false;
15884 continue;
15885 }
15886 uint64_t Last = Index->getConstantOperandVal(Num: i-1);
15887 if (C != Last + ElementSize)
15888 return false;
15889 }
15890 return true;
15891}
15892
15893
15894SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
15895 DAGCombinerInfo &DCI) const {
15896 SelectionDAG &DAG = DCI.DAG;
15897 const MVT XLenVT = Subtarget.getXLenVT();
15898 SDLoc DL(N);
15899
15900 // Helper to call SimplifyDemandedBits on an operand of N where only some low
15901 // bits are demanded. N will be added to the Worklist if it was not deleted.
15902 // Caller should return SDValue(N, 0) if this returns true.
15903 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
15904 SDValue Op = N->getOperand(Num: OpNo);
15905 APInt Mask = APInt::getLowBitsSet(numBits: Op.getValueSizeInBits(), loBitsSet: LowBits);
15906 if (!SimplifyDemandedBits(Op, DemandedBits: Mask, DCI))
15907 return false;
15908
15909 if (N->getOpcode() != ISD::DELETED_NODE)
15910 DCI.AddToWorklist(N);
15911 return true;
15912 };
15913
15914 switch (N->getOpcode()) {
15915 default:
15916 break;
15917 case RISCVISD::SplitF64: {
15918 SDValue Op0 = N->getOperand(Num: 0);
15919 // If the input to SplitF64 is just BuildPairF64 then the operation is
15920 // redundant. Instead, use BuildPairF64's operands directly.
15921 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
15922 return DCI.CombineTo(N, Res0: Op0.getOperand(i: 0), Res1: Op0.getOperand(i: 1));
15923
15924 if (Op0->isUndef()) {
15925 SDValue Lo = DAG.getUNDEF(MVT::i32);
15926 SDValue Hi = DAG.getUNDEF(MVT::i32);
15927 return DCI.CombineTo(N, Res0: Lo, Res1: Hi);
15928 }
15929
15930 // It's cheaper to materialise two 32-bit integers than to load a double
15931 // from the constant pool and transfer it to integer registers through the
15932 // stack.
15933 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: Op0)) {
15934 APInt V = C->getValueAPF().bitcastToAPInt();
15935 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
15936 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
15937 return DCI.CombineTo(N, Res0: Lo, Res1: Hi);
15938 }
15939
15940 // This is a target-specific version of a DAGCombine performed in
15941 // DAGCombiner::visitBITCAST. It performs the equivalent of:
15942 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15943 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15944 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
15945 !Op0.getNode()->hasOneUse())
15946 break;
15947 SDValue NewSplitF64 =
15948 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
15949 Op0.getOperand(0));
15950 SDValue Lo = NewSplitF64.getValue(R: 0);
15951 SDValue Hi = NewSplitF64.getValue(R: 1);
15952 APInt SignBit = APInt::getSignMask(BitWidth: 32);
15953 if (Op0.getOpcode() == ISD::FNEG) {
15954 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
15955 DAG.getConstant(SignBit, DL, MVT::i32));
15956 return DCI.CombineTo(N, Res0: Lo, Res1: NewHi);
15957 }
15958 assert(Op0.getOpcode() == ISD::FABS);
15959 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
15960 DAG.getConstant(~SignBit, DL, MVT::i32));
15961 return DCI.CombineTo(N, Res0: Lo, Res1: NewHi);
15962 }
15963 case RISCVISD::SLLW:
15964 case RISCVISD::SRAW:
15965 case RISCVISD::SRLW:
15966 case RISCVISD::RORW:
15967 case RISCVISD::ROLW: {
15968 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
15969 if (SimplifyDemandedLowBitsHelper(0, 32) ||
15970 SimplifyDemandedLowBitsHelper(1, 5))
15971 return SDValue(N, 0);
15972
15973 break;
15974 }
15975 case RISCVISD::CLZW:
15976 case RISCVISD::CTZW: {
15977 // Only the lower 32 bits of the first operand are read
15978 if (SimplifyDemandedLowBitsHelper(0, 32))
15979 return SDValue(N, 0);
15980 break;
15981 }
15982 case RISCVISD::FMV_W_X_RV64: {
15983 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
15984 // conversion is unnecessary and can be replaced with the
15985 // FMV_X_ANYEXTW_RV64 operand.
15986 SDValue Op0 = N->getOperand(Num: 0);
15987 if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)
15988 return Op0.getOperand(i: 0);
15989 break;
15990 }
15991 case RISCVISD::FMV_X_ANYEXTH:
15992 case RISCVISD::FMV_X_ANYEXTW_RV64: {
15993 SDLoc DL(N);
15994 SDValue Op0 = N->getOperand(Num: 0);
15995 MVT VT = N->getSimpleValueType(ResNo: 0);
15996 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
15997 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
15998 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
15999 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
16000 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
16001 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
16002 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
16003 assert(Op0.getOperand(0).getValueType() == VT &&
16004 "Unexpected value type!");
16005 return Op0.getOperand(i: 0);
16006 }
16007
16008 // This is a target-specific version of a DAGCombine performed in
16009 // DAGCombiner::visitBITCAST. It performs the equivalent of:
16010 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16011 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16012 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
16013 !Op0.getNode()->hasOneUse())
16014 break;
16015 SDValue NewFMV = DAG.getNode(Opcode: N->getOpcode(), DL, VT, Operand: Op0.getOperand(i: 0));
16016 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
16017 APInt SignBit = APInt::getSignMask(BitWidth: FPBits).sext(width: VT.getSizeInBits());
16018 if (Op0.getOpcode() == ISD::FNEG)
16019 return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: NewFMV,
16020 N2: DAG.getConstant(Val: SignBit, DL, VT));
16021
16022 assert(Op0.getOpcode() == ISD::FABS);
16023 return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: NewFMV,
16024 N2: DAG.getConstant(Val: ~SignBit, DL, VT));
16025 }
16026 case ISD::ABS: {
16027 EVT VT = N->getValueType(ResNo: 0);
16028 SDValue N0 = N->getOperand(Num: 0);
16029 // abs (sext) -> zext (abs)
16030 // abs (zext) -> zext (handled elsewhere)
16031 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
16032 SDValue Src = N0.getOperand(i: 0);
16033 SDLoc DL(N);
16034 return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT,
16035 Operand: DAG.getNode(Opcode: ISD::ABS, DL, VT: Src.getValueType(), Operand: Src));
16036 }
16037 break;
16038 }
16039 case ISD::ADD: {
16040 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16041 return V;
16042 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
16043 return V;
16044 return performADDCombine(N, DAG, Subtarget);
16045 }
16046 case ISD::SUB: {
16047 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16048 return V;
16049 return performSUBCombine(N, DAG, Subtarget);
16050 }
16051 case ISD::AND:
16052 return performANDCombine(N, DCI, Subtarget);
16053 case ISD::OR: {
16054 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16055 return V;
16056 return performORCombine(N, DCI, Subtarget);
16057 }
16058 case ISD::XOR:
16059 return performXORCombine(N, DAG, Subtarget);
16060 case ISD::MUL:
16061 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16062 return V;
16063 return performMULCombine(N, DAG, DCI, Subtarget);
16064 case ISD::SDIV:
16065 case ISD::UDIV:
16066 case ISD::SREM:
16067 case ISD::UREM:
16068 if (SDValue V = combineBinOpOfZExt(N, DAG))
16069 return V;
16070 break;
16071 case ISD::FADD:
16072 case ISD::UMAX:
16073 case ISD::UMIN:
16074 case ISD::SMAX:
16075 case ISD::SMIN:
16076 case ISD::FMAXNUM:
16077 case ISD::FMINNUM: {
16078 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16079 return V;
16080 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16081 return V;
16082 return SDValue();
16083 }
16084 case ISD::SETCC:
16085 return performSETCCCombine(N, DAG, Subtarget);
16086 case ISD::SIGN_EXTEND_INREG:
16087 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
16088 case ISD::ZERO_EXTEND:
16089 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
16090 // type legalization. This is safe because fp_to_uint produces poison if
16091 // it overflows.
16092 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
16093 SDValue Src = N->getOperand(Num: 0);
16094 if (Src.getOpcode() == ISD::FP_TO_UINT &&
16095 isTypeLegal(Src.getOperand(0).getValueType()))
16096 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
16097 Src.getOperand(0));
16098 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
16099 isTypeLegal(VT: Src.getOperand(i: 1).getValueType())) {
16100 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
16101 SDValue Res = DAG.getNode(Opcode: ISD::STRICT_FP_TO_UINT, DL: SDLoc(N), VTList: VTs,
16102 N1: Src.getOperand(i: 0), N2: Src.getOperand(i: 1));
16103 DCI.CombineTo(N, Res);
16104 DAG.ReplaceAllUsesOfValueWith(From: Src.getValue(R: 1), To: Res.getValue(R: 1));
16105 DCI.recursivelyDeleteUnusedNodes(N: Src.getNode());
16106 return SDValue(N, 0); // Return N so it doesn't get rechecked.
16107 }
16108 }
16109 return SDValue();
16110 case RISCVISD::TRUNCATE_VECTOR_VL: {
16111 // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
16112 // This would be benefit for the cases where X and Y are both the same value
16113 // type of low precision vectors. Since the truncate would be lowered into
16114 // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
16115 // restriction, such pattern would be expanded into a series of "vsetvli"
16116 // and "vnsrl" instructions later to reach this point.
16117 auto IsTruncNode = [](SDValue V) {
16118 if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL)
16119 return false;
16120 SDValue VL = V.getOperand(i: 2);
16121 auto *C = dyn_cast<ConstantSDNode>(Val&: VL);
16122 // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand
16123 bool IsVLMAXForVMSET = (C && C->isAllOnes()) ||
16124 (isa<RegisterSDNode>(VL) &&
16125 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
16126 return V.getOperand(i: 1).getOpcode() == RISCVISD::VMSET_VL &&
16127 IsVLMAXForVMSET;
16128 };
16129
16130 SDValue Op = N->getOperand(Num: 0);
16131
16132 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
16133 // to distinguish such pattern.
16134 while (IsTruncNode(Op)) {
16135 if (!Op.hasOneUse())
16136 return SDValue();
16137 Op = Op.getOperand(i: 0);
16138 }
16139
16140 if (Op.getOpcode() == ISD::SRA && Op.hasOneUse()) {
16141 SDValue N0 = Op.getOperand(i: 0);
16142 SDValue N1 = Op.getOperand(i: 1);
16143 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
16144 N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) {
16145 SDValue N00 = N0.getOperand(i: 0);
16146 SDValue N10 = N1.getOperand(i: 0);
16147 if (N00.getValueType().isVector() &&
16148 N00.getValueType() == N10.getValueType() &&
16149 N->getValueType(ResNo: 0) == N10.getValueType()) {
16150 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
16151 SDValue SMin = DAG.getNode(
16152 Opcode: ISD::SMIN, DL: SDLoc(N1), VT: N->getValueType(ResNo: 0), N1: N10,
16153 N2: DAG.getConstant(Val: MaxShAmt, DL: SDLoc(N1), VT: N->getValueType(ResNo: 0)));
16154 return DAG.getNode(Opcode: ISD::SRA, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), N1: N00, N2: SMin);
16155 }
16156 }
16157 }
16158 break;
16159 }
16160 case ISD::TRUNCATE:
16161 return performTRUNCATECombine(N, DAG, Subtarget);
16162 case ISD::SELECT:
16163 return performSELECTCombine(N, DAG, Subtarget);
16164 case RISCVISD::CZERO_EQZ:
16165 case RISCVISD::CZERO_NEZ:
16166 // czero_eq X, (xor Y, 1) -> czero_ne X, Y if Y is 0 or 1.
16167 // czero_ne X, (xor Y, 1) -> czero_eq X, Y if Y is 0 or 1.
16168 if (N->getOperand(Num: 1).getOpcode() == ISD::XOR &&
16169 isOneConstant(V: N->getOperand(Num: 1).getOperand(i: 1))) {
16170 SDValue Cond = N->getOperand(Num: 1).getOperand(i: 0);
16171 APInt Mask = APInt::getBitsSetFrom(numBits: Cond.getValueSizeInBits(), loBit: 1);
16172 if (DAG.MaskedValueIsZero(Op: Cond, Mask)) {
16173 unsigned NewOpc = N->getOpcode() == RISCVISD::CZERO_EQZ
16174 ? RISCVISD::CZERO_NEZ
16175 : RISCVISD::CZERO_EQZ;
16176 return DAG.getNode(Opcode: NewOpc, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
16177 N1: N->getOperand(Num: 0), N2: Cond);
16178 }
16179 }
16180 return SDValue();
16181
16182 case RISCVISD::SELECT_CC: {
16183 // Transform
16184 SDValue LHS = N->getOperand(Num: 0);
16185 SDValue RHS = N->getOperand(Num: 1);
16186 SDValue CC = N->getOperand(Num: 2);
16187 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val&: CC)->get();
16188 SDValue TrueV = N->getOperand(Num: 3);
16189 SDValue FalseV = N->getOperand(Num: 4);
16190 SDLoc DL(N);
16191 EVT VT = N->getValueType(ResNo: 0);
16192
16193 // If the True and False values are the same, we don't need a select_cc.
16194 if (TrueV == FalseV)
16195 return TrueV;
16196
16197 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
16198 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
16199 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(Val: TrueV) &&
16200 isa<ConstantSDNode>(Val: FalseV) && isNullConstant(V: RHS) &&
16201 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
16202 if (CCVal == ISD::CondCode::SETGE)
16203 std::swap(a&: TrueV, b&: FalseV);
16204
16205 int64_t TrueSImm = cast<ConstantSDNode>(Val&: TrueV)->getSExtValue();
16206 int64_t FalseSImm = cast<ConstantSDNode>(Val&: FalseV)->getSExtValue();
16207 // Only handle simm12, if it is not in this range, it can be considered as
16208 // register.
16209 if (isInt<12>(x: TrueSImm) && isInt<12>(x: FalseSImm) &&
16210 isInt<12>(x: TrueSImm - FalseSImm)) {
16211 SDValue SRA =
16212 DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: LHS,
16213 N2: DAG.getConstant(Val: Subtarget.getXLen() - 1, DL, VT));
16214 SDValue AND =
16215 DAG.getNode(Opcode: ISD::AND, DL, VT, N1: SRA,
16216 N2: DAG.getConstant(Val: TrueSImm - FalseSImm, DL, VT));
16217 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: AND, N2: FalseV);
16218 }
16219
16220 if (CCVal == ISD::CondCode::SETGE)
16221 std::swap(a&: TrueV, b&: FalseV);
16222 }
16223
16224 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16225 return DAG.getNode(Opcode: RISCVISD::SELECT_CC, DL, VT: N->getValueType(ResNo: 0),
16226 Ops: {LHS, RHS, CC, TrueV, FalseV});
16227
16228 if (!Subtarget.hasConditionalMoveFusion()) {
16229 // (select c, -1, y) -> -c | y
16230 if (isAllOnesConstant(V: TrueV)) {
16231 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, Cond: CCVal);
16232 SDValue Neg = DAG.getNegative(Val: C, DL, VT);
16233 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: FalseV);
16234 }
16235 // (select c, y, -1) -> -!c | y
16236 if (isAllOnesConstant(V: FalseV)) {
16237 SDValue C =
16238 DAG.getSetCC(DL, VT, LHS, RHS, Cond: ISD::getSetCCInverse(Operation: CCVal, Type: VT));
16239 SDValue Neg = DAG.getNegative(Val: C, DL, VT);
16240 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: TrueV);
16241 }
16242
16243 // (select c, 0, y) -> -!c & y
16244 if (isNullConstant(V: TrueV)) {
16245 SDValue C =
16246 DAG.getSetCC(DL, VT, LHS, RHS, Cond: ISD::getSetCCInverse(Operation: CCVal, Type: VT));
16247 SDValue Neg = DAG.getNegative(Val: C, DL, VT);
16248 return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: FalseV);
16249 }
16250 // (select c, y, 0) -> -c & y
16251 if (isNullConstant(V: FalseV)) {
16252 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, Cond: CCVal);
16253 SDValue Neg = DAG.getNegative(Val: C, DL, VT);
16254 return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: TrueV);
16255 }
16256 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
16257 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
16258 if (((isOneConstant(V: FalseV) && LHS == TrueV &&
16259 CCVal == ISD::CondCode::SETNE) ||
16260 (isOneConstant(V: TrueV) && LHS == FalseV &&
16261 CCVal == ISD::CondCode::SETEQ)) &&
16262 isNullConstant(V: RHS)) {
16263 // freeze it to be safe.
16264 LHS = DAG.getFreeze(V: LHS);
16265 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, Cond: ISD::CondCode::SETEQ);
16266 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: LHS, N2: C);
16267 }
16268 }
16269
16270 // If both true/false are an xor with 1, pull through the select.
16271 // This can occur after op legalization if both operands are setccs that
16272 // require an xor to invert.
16273 // FIXME: Generalize to other binary ops with identical operand?
16274 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
16275 TrueV.getOperand(i: 1) == FalseV.getOperand(i: 1) &&
16276 isOneConstant(V: TrueV.getOperand(i: 1)) &&
16277 TrueV.hasOneUse() && FalseV.hasOneUse()) {
16278 SDValue NewSel = DAG.getNode(Opcode: RISCVISD::SELECT_CC, DL, VT, N1: LHS, N2: RHS, N3: CC,
16279 N4: TrueV.getOperand(i: 0), N5: FalseV.getOperand(i: 0));
16280 return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: NewSel, N2: TrueV.getOperand(i: 1));
16281 }
16282
16283 return SDValue();
16284 }
16285 case RISCVISD::BR_CC: {
16286 SDValue LHS = N->getOperand(Num: 1);
16287 SDValue RHS = N->getOperand(Num: 2);
16288 SDValue CC = N->getOperand(Num: 3);
16289 SDLoc DL(N);
16290
16291 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16292 return DAG.getNode(Opcode: RISCVISD::BR_CC, DL, VT: N->getValueType(ResNo: 0),
16293 N1: N->getOperand(Num: 0), N2: LHS, N3: RHS, N4: CC, N5: N->getOperand(Num: 4));
16294
16295 return SDValue();
16296 }
16297 case ISD::BITREVERSE:
16298 return performBITREVERSECombine(N, DAG, Subtarget);
16299 case ISD::FP_TO_SINT:
16300 case ISD::FP_TO_UINT:
16301 return performFP_TO_INTCombine(N, DCI, Subtarget);
16302 case ISD::FP_TO_SINT_SAT:
16303 case ISD::FP_TO_UINT_SAT:
16304 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
16305 case ISD::FCOPYSIGN: {
16306 EVT VT = N->getValueType(ResNo: 0);
16307 if (!VT.isVector())
16308 break;
16309 // There is a form of VFSGNJ which injects the negated sign of its second
16310 // operand. Try and bubble any FNEG up after the extend/round to produce
16311 // this optimized pattern. Avoid modifying cases where FP_ROUND and
16312 // TRUNC=1.
16313 SDValue In2 = N->getOperand(Num: 1);
16314 // Avoid cases where the extend/round has multiple uses, as duplicating
16315 // those is typically more expensive than removing a fneg.
16316 if (!In2.hasOneUse())
16317 break;
16318 if (In2.getOpcode() != ISD::FP_EXTEND &&
16319 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(i: 1) != 0))
16320 break;
16321 In2 = In2.getOperand(i: 0);
16322 if (In2.getOpcode() != ISD::FNEG)
16323 break;
16324 SDLoc DL(N);
16325 SDValue NewFPExtRound = DAG.getFPExtendOrRound(Op: In2.getOperand(i: 0), DL, VT);
16326 return DAG.getNode(Opcode: ISD::FCOPYSIGN, DL, VT, N1: N->getOperand(Num: 0),
16327 N2: DAG.getNode(Opcode: ISD::FNEG, DL, VT, Operand: NewFPExtRound));
16328 }
16329 case ISD::MGATHER: {
16330 const auto *MGN = dyn_cast<MaskedGatherSDNode>(Val: N);
16331 const EVT VT = N->getValueType(ResNo: 0);
16332 SDValue Index = MGN->getIndex();
16333 SDValue ScaleOp = MGN->getScale();
16334 ISD::MemIndexType IndexType = MGN->getIndexType();
16335 assert(!MGN->isIndexScaled() &&
16336 "Scaled gather/scatter should not be formed");
16337
16338 SDLoc DL(N);
16339 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16340 return DAG.getMaskedGather(
16341 VTs: N->getVTList(), MemVT: MGN->getMemoryVT(), dl: DL,
16342 Ops: {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16343 MGN->getBasePtr(), Index, ScaleOp},
16344 MMO: MGN->getMemOperand(), IndexType, ExtTy: MGN->getExtensionType());
16345
16346 if (narrowIndex(N&: Index, IndexType, DAG))
16347 return DAG.getMaskedGather(
16348 VTs: N->getVTList(), MemVT: MGN->getMemoryVT(), dl: DL,
16349 Ops: {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16350 MGN->getBasePtr(), Index, ScaleOp},
16351 MMO: MGN->getMemOperand(), IndexType, ExtTy: MGN->getExtensionType());
16352
16353 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
16354 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
16355 // The sequence will be XLenVT, not the type of Index. Tell
16356 // isSimpleVIDSequence this so we avoid overflow.
16357 if (std::optional<VIDSequence> SimpleVID =
16358 isSimpleVIDSequence(Op: Index, EltSizeInBits: Subtarget.getXLen());
16359 SimpleVID && SimpleVID->StepDenominator == 1) {
16360 const int64_t StepNumerator = SimpleVID->StepNumerator;
16361 const int64_t Addend = SimpleVID->Addend;
16362
16363 // Note: We don't need to check alignment here since (by assumption
16364 // from the existance of the gather), our offsets must be sufficiently
16365 // aligned.
16366
16367 const EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
16368 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
16369 assert(IndexType == ISD::UNSIGNED_SCALED);
16370 SDValue BasePtr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: MGN->getBasePtr(),
16371 N2: DAG.getConstant(Val: Addend, DL, VT: PtrVT));
16372
16373 SDVTList VTs = DAG.getVTList({VT, MVT::Other});
16374 SDValue IntID =
16375 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
16376 XLenVT);
16377 SDValue Ops[] =
16378 {MGN->getChain(), IntID, MGN->getPassThru(), BasePtr,
16379 DAG.getConstant(Val: StepNumerator, DL, VT: XLenVT), MGN->getMask()};
16380 return DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs,
16381 Ops, MemVT: VT, MMO: MGN->getMemOperand());
16382 }
16383 }
16384
16385 SmallVector<int> ShuffleMask;
16386 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16387 matchIndexAsShuffle(VT, Index, Mask: MGN->getMask(), ShuffleMask)) {
16388 SDValue Load = DAG.getMaskedLoad(VT, dl: DL, Chain: MGN->getChain(),
16389 Base: MGN->getBasePtr(), Offset: DAG.getUNDEF(VT: XLenVT),
16390 Mask: MGN->getMask(), Src0: DAG.getUNDEF(VT),
16391 MemVT: MGN->getMemoryVT(), MMO: MGN->getMemOperand(),
16392 AM: ISD::UNINDEXED, ISD::NON_EXTLOAD);
16393 SDValue Shuffle =
16394 DAG.getVectorShuffle(VT, dl: DL, N1: Load, N2: DAG.getUNDEF(VT), Mask: ShuffleMask);
16395 return DAG.getMergeValues(Ops: {Shuffle, Load.getValue(R: 1)}, dl: DL);
16396 }
16397
16398 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16399 matchIndexAsWiderOp(VT, Index, Mask: MGN->getMask(),
16400 BaseAlign: MGN->getMemOperand()->getBaseAlign(), ST: Subtarget)) {
16401 SmallVector<SDValue> NewIndices;
16402 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
16403 NewIndices.push_back(Elt: Index.getOperand(i));
16404 EVT IndexVT = Index.getValueType()
16405 .getHalfNumVectorElementsVT(Context&: *DAG.getContext());
16406 Index = DAG.getBuildVector(VT: IndexVT, DL, Ops: NewIndices);
16407
16408 unsigned ElementSize = VT.getScalarStoreSize();
16409 EVT WideScalarVT = MVT::getIntegerVT(BitWidth: ElementSize * 8 * 2);
16410 auto EltCnt = VT.getVectorElementCount();
16411 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
16412 EVT WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideScalarVT,
16413 EC: EltCnt.divideCoefficientBy(RHS: 2));
16414 SDValue Passthru = DAG.getBitcast(VT: WideVT, V: MGN->getPassThru());
16415 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
16416 EltCnt.divideCoefficientBy(2));
16417 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
16418
16419 SDValue Gather =
16420 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
16421 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
16422 Index, ScaleOp},
16423 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
16424 SDValue Result = DAG.getBitcast(VT, V: Gather.getValue(R: 0));
16425 return DAG.getMergeValues(Ops: {Result, Gather.getValue(R: 1)}, dl: DL);
16426 }
16427 break;
16428 }
16429 case ISD::MSCATTER:{
16430 const auto *MSN = dyn_cast<MaskedScatterSDNode>(Val: N);
16431 SDValue Index = MSN->getIndex();
16432 SDValue ScaleOp = MSN->getScale();
16433 ISD::MemIndexType IndexType = MSN->getIndexType();
16434 assert(!MSN->isIndexScaled() &&
16435 "Scaled gather/scatter should not be formed");
16436
16437 SDLoc DL(N);
16438 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16439 return DAG.getMaskedScatter(
16440 VTs: N->getVTList(), MemVT: MSN->getMemoryVT(), dl: DL,
16441 Ops: {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16442 Index, ScaleOp},
16443 MMO: MSN->getMemOperand(), IndexType, IsTruncating: MSN->isTruncatingStore());
16444
16445 if (narrowIndex(N&: Index, IndexType, DAG))
16446 return DAG.getMaskedScatter(
16447 VTs: N->getVTList(), MemVT: MSN->getMemoryVT(), dl: DL,
16448 Ops: {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16449 Index, ScaleOp},
16450 MMO: MSN->getMemOperand(), IndexType, IsTruncating: MSN->isTruncatingStore());
16451
16452 EVT VT = MSN->getValue()->getValueType(ResNo: 0);
16453 SmallVector<int> ShuffleMask;
16454 if (!MSN->isTruncatingStore() &&
16455 matchIndexAsShuffle(VT, Index, Mask: MSN->getMask(), ShuffleMask)) {
16456 SDValue Shuffle = DAG.getVectorShuffle(VT, dl: DL, N1: MSN->getValue(),
16457 N2: DAG.getUNDEF(VT), Mask: ShuffleMask);
16458 return DAG.getMaskedStore(Chain: MSN->getChain(), dl: DL, Val: Shuffle, Base: MSN->getBasePtr(),
16459 Offset: DAG.getUNDEF(VT: XLenVT), Mask: MSN->getMask(),
16460 MemVT: MSN->getMemoryVT(), MMO: MSN->getMemOperand(),
16461 AM: ISD::UNINDEXED, IsTruncating: false);
16462 }
16463 break;
16464 }
16465 case ISD::VP_GATHER: {
16466 const auto *VPGN = dyn_cast<VPGatherSDNode>(Val: N);
16467 SDValue Index = VPGN->getIndex();
16468 SDValue ScaleOp = VPGN->getScale();
16469 ISD::MemIndexType IndexType = VPGN->getIndexType();
16470 assert(!VPGN->isIndexScaled() &&
16471 "Scaled gather/scatter should not be formed");
16472
16473 SDLoc DL(N);
16474 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16475 return DAG.getGatherVP(VTs: N->getVTList(), VT: VPGN->getMemoryVT(), dl: DL,
16476 Ops: {VPGN->getChain(), VPGN->getBasePtr(), Index,
16477 ScaleOp, VPGN->getMask(),
16478 VPGN->getVectorLength()},
16479 MMO: VPGN->getMemOperand(), IndexType);
16480
16481 if (narrowIndex(N&: Index, IndexType, DAG))
16482 return DAG.getGatherVP(VTs: N->getVTList(), VT: VPGN->getMemoryVT(), dl: DL,
16483 Ops: {VPGN->getChain(), VPGN->getBasePtr(), Index,
16484 ScaleOp, VPGN->getMask(),
16485 VPGN->getVectorLength()},
16486 MMO: VPGN->getMemOperand(), IndexType);
16487
16488 break;
16489 }
16490 case ISD::VP_SCATTER: {
16491 const auto *VPSN = dyn_cast<VPScatterSDNode>(Val: N);
16492 SDValue Index = VPSN->getIndex();
16493 SDValue ScaleOp = VPSN->getScale();
16494 ISD::MemIndexType IndexType = VPSN->getIndexType();
16495 assert(!VPSN->isIndexScaled() &&
16496 "Scaled gather/scatter should not be formed");
16497
16498 SDLoc DL(N);
16499 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16500 return DAG.getScatterVP(VTs: N->getVTList(), VT: VPSN->getMemoryVT(), dl: DL,
16501 Ops: {VPSN->getChain(), VPSN->getValue(),
16502 VPSN->getBasePtr(), Index, ScaleOp,
16503 VPSN->getMask(), VPSN->getVectorLength()},
16504 MMO: VPSN->getMemOperand(), IndexType);
16505
16506 if (narrowIndex(N&: Index, IndexType, DAG))
16507 return DAG.getScatterVP(VTs: N->getVTList(), VT: VPSN->getMemoryVT(), dl: DL,
16508 Ops: {VPSN->getChain(), VPSN->getValue(),
16509 VPSN->getBasePtr(), Index, ScaleOp,
16510 VPSN->getMask(), VPSN->getVectorLength()},
16511 MMO: VPSN->getMemOperand(), IndexType);
16512 break;
16513 }
16514 case RISCVISD::SHL_VL:
16515 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16516 return V;
16517 [[fallthrough]];
16518 case RISCVISD::SRA_VL:
16519 case RISCVISD::SRL_VL: {
16520 SDValue ShAmt = N->getOperand(Num: 1);
16521 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
16522 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
16523 SDLoc DL(N);
16524 SDValue VL = N->getOperand(Num: 4);
16525 EVT VT = N->getValueType(ResNo: 0);
16526 ShAmt = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: DAG.getUNDEF(VT),
16527 N2: ShAmt.getOperand(i: 1), N3: VL);
16528 return DAG.getNode(Opcode: N->getOpcode(), DL, VT, N1: N->getOperand(Num: 0), N2: ShAmt,
16529 N3: N->getOperand(Num: 2), N4: N->getOperand(Num: 3), N5: N->getOperand(Num: 4));
16530 }
16531 break;
16532 }
16533 case ISD::SRA:
16534 if (SDValue V = performSRACombine(N, DAG, Subtarget))
16535 return V;
16536 [[fallthrough]];
16537 case ISD::SRL:
16538 case ISD::SHL: {
16539 if (N->getOpcode() == ISD::SHL) {
16540 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16541 return V;
16542 }
16543 SDValue ShAmt = N->getOperand(Num: 1);
16544 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
16545 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
16546 SDLoc DL(N);
16547 EVT VT = N->getValueType(ResNo: 0);
16548 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
16549 ShAmt.getOperand(1),
16550 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
16551 return DAG.getNode(Opcode: N->getOpcode(), DL, VT, N1: N->getOperand(Num: 0), N2: ShAmt);
16552 }
16553 break;
16554 }
16555 case RISCVISD::ADD_VL:
16556 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16557 return V;
16558 return combineToVWMACC(N, DAG, Subtarget);
16559 case RISCVISD::VWADD_W_VL:
16560 case RISCVISD::VWADDU_W_VL:
16561 case RISCVISD::VWSUB_W_VL:
16562 case RISCVISD::VWSUBU_W_VL:
16563 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
16564 case RISCVISD::SUB_VL:
16565 case RISCVISD::MUL_VL:
16566 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
16567 case RISCVISD::VFMADD_VL:
16568 case RISCVISD::VFNMADD_VL:
16569 case RISCVISD::VFMSUB_VL:
16570 case RISCVISD::VFNMSUB_VL:
16571 case RISCVISD::STRICT_VFMADD_VL:
16572 case RISCVISD::STRICT_VFNMADD_VL:
16573 case RISCVISD::STRICT_VFMSUB_VL:
16574 case RISCVISD::STRICT_VFNMSUB_VL:
16575 return performVFMADD_VLCombine(N, DAG, Subtarget);
16576 case RISCVISD::FADD_VL:
16577 case RISCVISD::FSUB_VL:
16578 case RISCVISD::FMUL_VL:
16579 case RISCVISD::VFWADD_W_VL:
16580 case RISCVISD::VFWSUB_W_VL: {
16581 if (N->getValueType(0).isScalableVector() &&
16582 N->getValueType(0).getVectorElementType() == MVT::f32 &&
16583 (Subtarget.hasVInstructionsF16Minimal() &&
16584 !Subtarget.hasVInstructionsF16()))
16585 return SDValue();
16586 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
16587 }
16588 case ISD::LOAD:
16589 case ISD::STORE: {
16590 if (DCI.isAfterLegalizeDAG())
16591 if (SDValue V = performMemPairCombine(N, DCI))
16592 return V;
16593
16594 if (N->getOpcode() != ISD::STORE)
16595 break;
16596
16597 auto *Store = cast<StoreSDNode>(Val: N);
16598 SDValue Chain = Store->getChain();
16599 EVT MemVT = Store->getMemoryVT();
16600 SDValue Val = Store->getValue();
16601 SDLoc DL(N);
16602
16603 bool IsScalarizable =
16604 MemVT.isFixedLengthVector() && ISD::isNormalStore(N: Store) &&
16605 Store->isSimple() &&
16606 MemVT.getVectorElementType().bitsLE(VT: Subtarget.getXLenVT()) &&
16607 isPowerOf2_64(Value: MemVT.getSizeInBits()) &&
16608 MemVT.getSizeInBits() <= Subtarget.getXLen();
16609
16610 // If sufficiently aligned we can scalarize stores of constant vectors of
16611 // any power-of-two size up to XLen bits, provided that they aren't too
16612 // expensive to materialize.
16613 // vsetivli zero, 2, e8, m1, ta, ma
16614 // vmv.v.i v8, 4
16615 // vse64.v v8, (a0)
16616 // ->
16617 // li a1, 1028
16618 // sh a1, 0(a0)
16619 if (DCI.isBeforeLegalize() && IsScalarizable &&
16620 ISD::isBuildVectorOfConstantSDNodes(N: Val.getNode())) {
16621 // Get the constant vector bits
16622 APInt NewC(Val.getValueSizeInBits(), 0);
16623 uint64_t EltSize = Val.getScalarValueSizeInBits();
16624 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
16625 if (Val.getOperand(i).isUndef())
16626 continue;
16627 NewC.insertBits(SubBits: Val.getConstantOperandAPInt(i).trunc(width: EltSize),
16628 bitPosition: i * EltSize);
16629 }
16630 MVT NewVT = MVT::getIntegerVT(BitWidth: MemVT.getSizeInBits());
16631
16632 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
16633 true) <= 2 &&
16634 allowsMemoryAccessForAlignment(Context&: *DAG.getContext(), DL: DAG.getDataLayout(),
16635 VT: NewVT, MMO: *Store->getMemOperand())) {
16636 SDValue NewV = DAG.getConstant(Val: NewC, DL, VT: NewVT);
16637 return DAG.getStore(Chain, dl: DL, Val: NewV, Ptr: Store->getBasePtr(),
16638 PtrInfo: Store->getPointerInfo(), Alignment: Store->getOriginalAlign(),
16639 MMOFlags: Store->getMemOperand()->getFlags());
16640 }
16641 }
16642
16643 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
16644 // vsetivli zero, 2, e16, m1, ta, ma
16645 // vle16.v v8, (a0)
16646 // vse16.v v8, (a1)
16647 if (auto *L = dyn_cast<LoadSDNode>(Val);
16648 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
16649 L->hasNUsesOfValue(NUses: 1, Value: 0) && L->hasNUsesOfValue(NUses: 1, Value: 1) &&
16650 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(N: L) &&
16651 L->getMemoryVT() == MemVT) {
16652 MVT NewVT = MVT::getIntegerVT(BitWidth: MemVT.getSizeInBits());
16653 if (allowsMemoryAccessForAlignment(Context&: *DAG.getContext(), DL: DAG.getDataLayout(),
16654 VT: NewVT, MMO: *Store->getMemOperand()) &&
16655 allowsMemoryAccessForAlignment(Context&: *DAG.getContext(), DL: DAG.getDataLayout(),
16656 VT: NewVT, MMO: *L->getMemOperand())) {
16657 SDValue NewL = DAG.getLoad(VT: NewVT, dl: DL, Chain: L->getChain(), Ptr: L->getBasePtr(),
16658 PtrInfo: L->getPointerInfo(), Alignment: L->getOriginalAlign(),
16659 MMOFlags: L->getMemOperand()->getFlags());
16660 return DAG.getStore(Chain, dl: DL, Val: NewL, Ptr: Store->getBasePtr(),
16661 PtrInfo: Store->getPointerInfo(), Alignment: Store->getOriginalAlign(),
16662 MMOFlags: Store->getMemOperand()->getFlags());
16663 }
16664 }
16665
16666 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
16667 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
16668 // any illegal types.
16669 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
16670 (DCI.isAfterLegalizeDAG() &&
16671 Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16672 isNullConstant(V: Val.getOperand(i: 1)))) {
16673 SDValue Src = Val.getOperand(i: 0);
16674 MVT VecVT = Src.getSimpleValueType();
16675 // VecVT should be scalable and memory VT should match the element type.
16676 if (!Store->isIndexed() && VecVT.isScalableVector() &&
16677 MemVT == VecVT.getVectorElementType()) {
16678 SDLoc DL(N);
16679 MVT MaskVT = getMaskTypeFor(VecVT);
16680 return DAG.getStoreVP(
16681 Chain: Store->getChain(), dl: DL, Val: Src, Ptr: Store->getBasePtr(), Offset: Store->getOffset(),
16682 Mask: DAG.getConstant(Val: 1, DL, VT: MaskVT),
16683 EVL: DAG.getConstant(Val: 1, DL, VT: Subtarget.getXLenVT()), MemVT,
16684 MMO: Store->getMemOperand(), AM: Store->getAddressingMode(),
16685 IsTruncating: Store->isTruncatingStore(), /*IsCompress*/ IsCompressing: false);
16686 }
16687 }
16688
16689 break;
16690 }
16691 case ISD::SPLAT_VECTOR: {
16692 EVT VT = N->getValueType(ResNo: 0);
16693 // Only perform this combine on legal MVT types.
16694 if (!isTypeLegal(VT))
16695 break;
16696 if (auto Gather = matchSplatAsGather(SplatVal: N->getOperand(Num: 0), VT: VT.getSimpleVT(), DL: N,
16697 DAG, Subtarget))
16698 return Gather;
16699 break;
16700 }
16701 case ISD::BUILD_VECTOR:
16702 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, TLI: *this))
16703 return V;
16704 break;
16705 case ISD::CONCAT_VECTORS:
16706 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, TLI: *this))
16707 return V;
16708 break;
16709 case ISD::INSERT_VECTOR_ELT:
16710 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, TLI: *this))
16711 return V;
16712 break;
16713 case RISCVISD::VFMV_V_F_VL: {
16714 const MVT VT = N->getSimpleValueType(ResNo: 0);
16715 SDValue Passthru = N->getOperand(Num: 0);
16716 SDValue Scalar = N->getOperand(Num: 1);
16717 SDValue VL = N->getOperand(Num: 2);
16718
16719 // If VL is 1, we can use vfmv.s.f.
16720 if (isOneConstant(V: VL))
16721 return DAG.getNode(Opcode: RISCVISD::VFMV_S_F_VL, DL, VT, N1: Passthru, N2: Scalar, N3: VL);
16722 break;
16723 }
16724 case RISCVISD::VMV_V_X_VL: {
16725 const MVT VT = N->getSimpleValueType(ResNo: 0);
16726 SDValue Passthru = N->getOperand(Num: 0);
16727 SDValue Scalar = N->getOperand(Num: 1);
16728 SDValue VL = N->getOperand(Num: 2);
16729
16730 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
16731 // scalar input.
16732 unsigned ScalarSize = Scalar.getValueSizeInBits();
16733 unsigned EltWidth = VT.getScalarSizeInBits();
16734 if (ScalarSize > EltWidth && Passthru.isUndef())
16735 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
16736 return SDValue(N, 0);
16737
16738 // If VL is 1 and the scalar value won't benefit from immediate, we can
16739 // use vmv.s.x.
16740 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val&: Scalar);
16741 if (isOneConstant(V: VL) &&
16742 (!Const || Const->isZero() ||
16743 !Const->getAPIntValue().sextOrTrunc(width: EltWidth).isSignedIntN(N: 5)))
16744 return DAG.getNode(Opcode: RISCVISD::VMV_S_X_VL, DL, VT, N1: Passthru, N2: Scalar, N3: VL);
16745
16746 break;
16747 }
16748 case RISCVISD::VFMV_S_F_VL: {
16749 SDValue Src = N->getOperand(Num: 1);
16750 // Try to remove vector->scalar->vector if the scalar->vector is inserting
16751 // into an undef vector.
16752 // TODO: Could use a vslide or vmv.v.v for non-undef.
16753 if (N->getOperand(Num: 0).isUndef() &&
16754 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16755 isNullConstant(V: Src.getOperand(i: 1)) &&
16756 Src.getOperand(i: 0).getValueType().isScalableVector()) {
16757 EVT VT = N->getValueType(ResNo: 0);
16758 EVT SrcVT = Src.getOperand(i: 0).getValueType();
16759 assert(SrcVT.getVectorElementType() == VT.getVectorElementType());
16760 // Widths match, just return the original vector.
16761 if (SrcVT == VT)
16762 return Src.getOperand(i: 0);
16763 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
16764 }
16765 [[fallthrough]];
16766 }
16767 case RISCVISD::VMV_S_X_VL: {
16768 const MVT VT = N->getSimpleValueType(ResNo: 0);
16769 SDValue Passthru = N->getOperand(Num: 0);
16770 SDValue Scalar = N->getOperand(Num: 1);
16771 SDValue VL = N->getOperand(Num: 2);
16772
16773 // Use M1 or smaller to avoid over constraining register allocation
16774 const MVT M1VT = getLMUL1VT(VT);
16775 if (M1VT.bitsLT(VT)) {
16776 SDValue M1Passthru =
16777 DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: M1VT, N1: Passthru,
16778 N2: DAG.getVectorIdxConstant(Val: 0, DL));
16779 SDValue Result =
16780 DAG.getNode(Opcode: N->getOpcode(), DL, VT: M1VT, N1: M1Passthru, N2: Scalar, N3: VL);
16781 Result = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: Passthru, N2: Result,
16782 N3: DAG.getVectorIdxConstant(Val: 0, DL));
16783 return Result;
16784 }
16785
16786 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
16787 // higher would involve overly constraining the register allocator for
16788 // no purpose.
16789 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val&: Scalar);
16790 Const && !Const->isZero() && isInt<5>(x: Const->getSExtValue()) &&
16791 VT.bitsLE(VT: getLMUL1VT(VT)) && Passthru.isUndef())
16792 return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: Passthru, N2: Scalar, N3: VL);
16793
16794 break;
16795 }
16796 case RISCVISD::VMV_X_S: {
16797 SDValue Vec = N->getOperand(Num: 0);
16798 MVT VecVT = N->getOperand(Num: 0).getSimpleValueType();
16799 const MVT M1VT = getLMUL1VT(VT: VecVT);
16800 if (M1VT.bitsLT(VT: VecVT)) {
16801 Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: M1VT, N1: Vec,
16802 N2: DAG.getVectorIdxConstant(Val: 0, DL));
16803 return DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: N->getSimpleValueType(ResNo: 0), Operand: Vec);
16804 }
16805 break;
16806 }
16807 case ISD::INTRINSIC_VOID:
16808 case ISD::INTRINSIC_W_CHAIN:
16809 case ISD::INTRINSIC_WO_CHAIN: {
16810 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
16811 unsigned IntNo = N->getConstantOperandVal(Num: IntOpNo);
16812 switch (IntNo) {
16813 // By default we do not combine any intrinsic.
16814 default:
16815 return SDValue();
16816 case Intrinsic::riscv_masked_strided_load: {
16817 MVT VT = N->getSimpleValueType(ResNo: 0);
16818 auto *Load = cast<MemIntrinsicSDNode>(Val: N);
16819 SDValue PassThru = N->getOperand(Num: 2);
16820 SDValue Base = N->getOperand(Num: 3);
16821 SDValue Stride = N->getOperand(Num: 4);
16822 SDValue Mask = N->getOperand(Num: 5);
16823
16824 // If the stride is equal to the element size in bytes, we can use
16825 // a masked.load.
16826 const unsigned ElementSize = VT.getScalarStoreSize();
16827 if (auto *StrideC = dyn_cast<ConstantSDNode>(Val&: Stride);
16828 StrideC && StrideC->getZExtValue() == ElementSize)
16829 return DAG.getMaskedLoad(VT, dl: DL, Chain: Load->getChain(), Base,
16830 Offset: DAG.getUNDEF(VT: XLenVT), Mask, Src0: PassThru,
16831 MemVT: Load->getMemoryVT(), MMO: Load->getMemOperand(),
16832 AM: ISD::UNINDEXED, ISD::NON_EXTLOAD);
16833 return SDValue();
16834 }
16835 case Intrinsic::riscv_masked_strided_store: {
16836 auto *Store = cast<MemIntrinsicSDNode>(Val: N);
16837 SDValue Value = N->getOperand(Num: 2);
16838 SDValue Base = N->getOperand(Num: 3);
16839 SDValue Stride = N->getOperand(Num: 4);
16840 SDValue Mask = N->getOperand(Num: 5);
16841
16842 // If the stride is equal to the element size in bytes, we can use
16843 // a masked.store.
16844 const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
16845 if (auto *StrideC = dyn_cast<ConstantSDNode>(Val&: Stride);
16846 StrideC && StrideC->getZExtValue() == ElementSize)
16847 return DAG.getMaskedStore(Chain: Store->getChain(), dl: DL, Val: Value, Base,
16848 Offset: DAG.getUNDEF(VT: XLenVT), Mask,
16849 MemVT: Value.getValueType(), MMO: Store->getMemOperand(),
16850 AM: ISD::UNINDEXED, IsTruncating: false);
16851 return SDValue();
16852 }
16853 case Intrinsic::riscv_vcpop:
16854 case Intrinsic::riscv_vcpop_mask:
16855 case Intrinsic::riscv_vfirst:
16856 case Intrinsic::riscv_vfirst_mask: {
16857 SDValue VL = N->getOperand(Num: 2);
16858 if (IntNo == Intrinsic::riscv_vcpop_mask ||
16859 IntNo == Intrinsic::riscv_vfirst_mask)
16860 VL = N->getOperand(Num: 3);
16861 if (!isNullConstant(V: VL))
16862 return SDValue();
16863 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
16864 SDLoc DL(N);
16865 EVT VT = N->getValueType(ResNo: 0);
16866 if (IntNo == Intrinsic::riscv_vfirst ||
16867 IntNo == Intrinsic::riscv_vfirst_mask)
16868 return DAG.getConstant(Val: -1, DL, VT);
16869 return DAG.getConstant(Val: 0, DL, VT);
16870 }
16871 }
16872 }
16873 case ISD::BITCAST: {
16874 assert(Subtarget.useRVVForFixedLengthVectors());
16875 SDValue N0 = N->getOperand(Num: 0);
16876 EVT VT = N->getValueType(ResNo: 0);
16877 EVT SrcVT = N0.getValueType();
16878 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
16879 // type, widen both sides to avoid a trip through memory.
16880 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
16881 VT.isScalarInteger()) {
16882 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
16883 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(VT: SrcVT));
16884 Ops[0] = N0;
16885 SDLoc DL(N);
16886 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
16887 N0 = DAG.getBitcast(MVT::i8, N0);
16888 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: N0);
16889 }
16890
16891 return SDValue();
16892 }
16893 }
16894
16895 return SDValue();
16896}
16897
16898bool RISCVTargetLowering::shouldTransformSignedTruncationCheck(
16899 EVT XVT, unsigned KeptBits) const {
16900 // For vectors, we don't have a preference..
16901 if (XVT.isVector())
16902 return false;
16903
16904 if (XVT != MVT::i32 && XVT != MVT::i64)
16905 return false;
16906
16907 // We can use sext.w for RV64 or an srai 31 on RV32.
16908 if (KeptBits == 32 || KeptBits == 64)
16909 return true;
16910
16911 // With Zbb we can use sext.h/sext.b.
16912 return Subtarget.hasStdExtZbb() &&
16913 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
16914 KeptBits == 16);
16915}
16916
16917bool RISCVTargetLowering::isDesirableToCommuteWithShift(
16918 const SDNode *N, CombineLevel Level) const {
16919 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
16920 N->getOpcode() == ISD::SRL) &&
16921 "Expected shift op");
16922
16923 // The following folds are only desirable if `(OP _, c1 << c2)` can be
16924 // materialised in fewer instructions than `(OP _, c1)`:
16925 //
16926 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
16927 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
16928 SDValue N0 = N->getOperand(Num: 0);
16929 EVT Ty = N0.getValueType();
16930 if (Ty.isScalarInteger() &&
16931 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
16932 auto *C1 = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1));
16933 auto *C2 = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1));
16934 if (C1 && C2) {
16935 const APInt &C1Int = C1->getAPIntValue();
16936 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
16937
16938 // We can materialise `c1 << c2` into an add immediate, so it's "free",
16939 // and the combine should happen, to potentially allow further combines
16940 // later.
16941 if (ShiftedC1Int.getSignificantBits() <= 64 &&
16942 isLegalAddImmediate(Imm: ShiftedC1Int.getSExtValue()))
16943 return true;
16944
16945 // We can materialise `c1` in an add immediate, so it's "free", and the
16946 // combine should be prevented.
16947 if (C1Int.getSignificantBits() <= 64 &&
16948 isLegalAddImmediate(Imm: C1Int.getSExtValue()))
16949 return false;
16950
16951 // Neither constant will fit into an immediate, so find materialisation
16952 // costs.
16953 int C1Cost =
16954 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
16955 /*CompressionCost*/ true);
16956 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
16957 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
16958 /*CompressionCost*/ true);
16959
16960 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
16961 // combine should be prevented.
16962 if (C1Cost < ShiftedC1Cost)
16963 return false;
16964 }
16965 }
16966 return true;
16967}
16968
16969bool RISCVTargetLowering::targetShrinkDemandedConstant(
16970 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
16971 TargetLoweringOpt &TLO) const {
16972 // Delay this optimization as late as possible.
16973 if (!TLO.LegalOps)
16974 return false;
16975
16976 EVT VT = Op.getValueType();
16977 if (VT.isVector())
16978 return false;
16979
16980 unsigned Opcode = Op.getOpcode();
16981 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
16982 return false;
16983
16984 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1));
16985 if (!C)
16986 return false;
16987
16988 const APInt &Mask = C->getAPIntValue();
16989
16990 // Clear all non-demanded bits initially.
16991 APInt ShrunkMask = Mask & DemandedBits;
16992
16993 // Try to make a smaller immediate by setting undemanded bits.
16994
16995 APInt ExpandedMask = Mask | ~DemandedBits;
16996
16997 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
16998 return ShrunkMask.isSubsetOf(RHS: Mask) && Mask.isSubsetOf(RHS: ExpandedMask);
16999 };
17000 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
17001 if (NewMask == Mask)
17002 return true;
17003 SDLoc DL(Op);
17004 SDValue NewC = TLO.DAG.getConstant(Val: NewMask, DL, VT: Op.getValueType());
17005 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(),
17006 N1: Op.getOperand(i: 0), N2: NewC);
17007 return TLO.CombineTo(O: Op, N: NewOp);
17008 };
17009
17010 // If the shrunk mask fits in sign extended 12 bits, let the target
17011 // independent code apply it.
17012 if (ShrunkMask.isSignedIntN(N: 12))
17013 return false;
17014
17015 // And has a few special cases for zext.
17016 if (Opcode == ISD::AND) {
17017 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
17018 // otherwise use SLLI + SRLI.
17019 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
17020 if (IsLegalMask(NewMask))
17021 return UseMask(NewMask);
17022
17023 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
17024 if (VT == MVT::i64) {
17025 APInt NewMask = APInt(64, 0xffffffff);
17026 if (IsLegalMask(NewMask))
17027 return UseMask(NewMask);
17028 }
17029 }
17030
17031 // For the remaining optimizations, we need to be able to make a negative
17032 // number through a combination of mask and undemanded bits.
17033 if (!ExpandedMask.isNegative())
17034 return false;
17035
17036 // What is the fewest number of bits we need to represent the negative number.
17037 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
17038
17039 // Try to make a 12 bit negative immediate. If that fails try to make a 32
17040 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
17041 // If we can't create a simm12, we shouldn't change opaque constants.
17042 APInt NewMask = ShrunkMask;
17043 if (MinSignedBits <= 12)
17044 NewMask.setBitsFrom(11);
17045 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(N: 32))
17046 NewMask.setBitsFrom(31);
17047 else
17048 return false;
17049
17050 // Check that our new mask is a subset of the demanded mask.
17051 assert(IsLegalMask(NewMask));
17052 return UseMask(NewMask);
17053}
17054
17055static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
17056 static const uint64_t GREVMasks[] = {
17057 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
17058 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
17059
17060 for (unsigned Stage = 0; Stage != 6; ++Stage) {
17061 unsigned Shift = 1 << Stage;
17062 if (ShAmt & Shift) {
17063 uint64_t Mask = GREVMasks[Stage];
17064 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
17065 if (IsGORC)
17066 Res |= x;
17067 x = Res;
17068 }
17069 }
17070
17071 return x;
17072}
17073
17074void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
17075 KnownBits &Known,
17076 const APInt &DemandedElts,
17077 const SelectionDAG &DAG,
17078 unsigned Depth) const {
17079 unsigned BitWidth = Known.getBitWidth();
17080 unsigned Opc = Op.getOpcode();
17081 assert((Opc >= ISD::BUILTIN_OP_END ||
17082 Opc == ISD::INTRINSIC_WO_CHAIN ||
17083 Opc == ISD::INTRINSIC_W_CHAIN ||
17084 Opc == ISD::INTRINSIC_VOID) &&
17085 "Should use MaskedValueIsZero if you don't know whether Op"
17086 " is a target node!");
17087
17088 Known.resetAll();
17089 switch (Opc) {
17090 default: break;
17091 case RISCVISD::SELECT_CC: {
17092 Known = DAG.computeKnownBits(Op: Op.getOperand(i: 4), Depth: Depth + 1);
17093 // If we don't know any bits, early out.
17094 if (Known.isUnknown())
17095 break;
17096 KnownBits Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: 3), Depth: Depth + 1);
17097
17098 // Only known if known in both the LHS and RHS.
17099 Known = Known.intersectWith(RHS: Known2);
17100 break;
17101 }
17102 case RISCVISD::CZERO_EQZ:
17103 case RISCVISD::CZERO_NEZ:
17104 Known = DAG.computeKnownBits(Op: Op.getOperand(i: 0), Depth: Depth + 1);
17105 // Result is either all zero or operand 0. We can propagate zeros, but not
17106 // ones.
17107 Known.One.clearAllBits();
17108 break;
17109 case RISCVISD::REMUW: {
17110 KnownBits Known2;
17111 Known = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
17112 Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
17113 // We only care about the lower 32 bits.
17114 Known = KnownBits::urem(LHS: Known.trunc(BitWidth: 32), RHS: Known2.trunc(BitWidth: 32));
17115 // Restore the original width by sign extending.
17116 Known = Known.sext(BitWidth);
17117 break;
17118 }
17119 case RISCVISD::DIVUW: {
17120 KnownBits Known2;
17121 Known = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
17122 Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
17123 // We only care about the lower 32 bits.
17124 Known = KnownBits::udiv(LHS: Known.trunc(BitWidth: 32), RHS: Known2.trunc(BitWidth: 32));
17125 // Restore the original width by sign extending.
17126 Known = Known.sext(BitWidth);
17127 break;
17128 }
17129 case RISCVISD::SLLW: {
17130 KnownBits Known2;
17131 Known = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
17132 Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
17133 Known = KnownBits::shl(LHS: Known.trunc(BitWidth: 32), RHS: Known2.trunc(BitWidth: 5).zext(BitWidth: 32));
17134 // Restore the original width by sign extending.
17135 Known = Known.sext(BitWidth);
17136 break;
17137 }
17138 case RISCVISD::CTZW: {
17139 KnownBits Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: 0), Depth: Depth + 1);
17140 unsigned PossibleTZ = Known2.trunc(BitWidth: 32).countMaxTrailingZeros();
17141 unsigned LowBits = llvm::bit_width(Value: PossibleTZ);
17142 Known.Zero.setBitsFrom(LowBits);
17143 break;
17144 }
17145 case RISCVISD::CLZW: {
17146 KnownBits Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: 0), Depth: Depth + 1);
17147 unsigned PossibleLZ = Known2.trunc(BitWidth: 32).countMaxLeadingZeros();
17148 unsigned LowBits = llvm::bit_width(Value: PossibleLZ);
17149 Known.Zero.setBitsFrom(LowBits);
17150 break;
17151 }
17152 case RISCVISD::BREV8:
17153 case RISCVISD::ORC_B: {
17154 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
17155 // control value of 7 is equivalent to brev8 and orc.b.
17156 Known = DAG.computeKnownBits(Op: Op.getOperand(i: 0), Depth: Depth + 1);
17157 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
17158 // To compute zeros, we need to invert the value and invert it back after.
17159 Known.Zero =
17160 ~computeGREVOrGORC(x: ~Known.Zero.getZExtValue(), ShAmt: 7, IsGORC);
17161 Known.One = computeGREVOrGORC(x: Known.One.getZExtValue(), ShAmt: 7, IsGORC);
17162 break;
17163 }
17164 case RISCVISD::READ_VLENB: {
17165 // We can use the minimum and maximum VLEN values to bound VLENB. We
17166 // know VLEN must be a power of two.
17167 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
17168 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
17169 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
17170 Known.Zero.setLowBits(Log2_32(Value: MinVLenB));
17171 Known.Zero.setBitsFrom(Log2_32(Value: MaxVLenB)+1);
17172 if (MaxVLenB == MinVLenB)
17173 Known.One.setBit(Log2_32(Value: MinVLenB));
17174 break;
17175 }
17176 case RISCVISD::FCLASS: {
17177 // fclass will only set one of the low 10 bits.
17178 Known.Zero.setBitsFrom(10);
17179 break;
17180 }
17181 case ISD::INTRINSIC_W_CHAIN:
17182 case ISD::INTRINSIC_WO_CHAIN: {
17183 unsigned IntNo =
17184 Op.getConstantOperandVal(i: Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
17185 switch (IntNo) {
17186 default:
17187 // We can't do anything for most intrinsics.
17188 break;
17189 case Intrinsic::riscv_vsetvli:
17190 case Intrinsic::riscv_vsetvlimax: {
17191 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
17192 unsigned VSEW = Op.getConstantOperandVal(i: HasAVL + 1);
17193 RISCVII::VLMUL VLMUL =
17194 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(i: HasAVL + 2));
17195 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
17196 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
17197 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
17198 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
17199
17200 // Result of vsetvli must be not larger than AVL.
17201 if (HasAVL && isa<ConstantSDNode>(Val: Op.getOperand(i: 1)))
17202 MaxVL = std::min(a: MaxVL, b: Op.getConstantOperandVal(i: 1));
17203
17204 unsigned KnownZeroFirstBit = Log2_32(Value: MaxVL) + 1;
17205 if (BitWidth > KnownZeroFirstBit)
17206 Known.Zero.setBitsFrom(KnownZeroFirstBit);
17207 break;
17208 }
17209 }
17210 break;
17211 }
17212 }
17213}
17214
17215unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
17216 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17217 unsigned Depth) const {
17218 switch (Op.getOpcode()) {
17219 default:
17220 break;
17221 case RISCVISD::SELECT_CC: {
17222 unsigned Tmp =
17223 DAG.ComputeNumSignBits(Op: Op.getOperand(i: 3), DemandedElts, Depth: Depth + 1);
17224 if (Tmp == 1) return 1; // Early out.
17225 unsigned Tmp2 =
17226 DAG.ComputeNumSignBits(Op: Op.getOperand(i: 4), DemandedElts, Depth: Depth + 1);
17227 return std::min(a: Tmp, b: Tmp2);
17228 }
17229 case RISCVISD::CZERO_EQZ:
17230 case RISCVISD::CZERO_NEZ:
17231 // Output is either all zero or operand 0. We can propagate sign bit count
17232 // from operand 0.
17233 return DAG.ComputeNumSignBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
17234 case RISCVISD::ABSW: {
17235 // We expand this at isel to negw+max. The result will have 33 sign bits
17236 // if the input has at least 33 sign bits.
17237 unsigned Tmp =
17238 DAG.ComputeNumSignBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
17239 if (Tmp < 33) return 1;
17240 return 33;
17241 }
17242 case RISCVISD::SLLW:
17243 case RISCVISD::SRAW:
17244 case RISCVISD::SRLW:
17245 case RISCVISD::DIVW:
17246 case RISCVISD::DIVUW:
17247 case RISCVISD::REMUW:
17248 case RISCVISD::ROLW:
17249 case RISCVISD::RORW:
17250 case RISCVISD::FCVT_W_RV64:
17251 case RISCVISD::FCVT_WU_RV64:
17252 case RISCVISD::STRICT_FCVT_W_RV64:
17253 case RISCVISD::STRICT_FCVT_WU_RV64:
17254 // TODO: As the result is sign-extended, this is conservatively correct. A
17255 // more precise answer could be calculated for SRAW depending on known
17256 // bits in the shift amount.
17257 return 33;
17258 case RISCVISD::VMV_X_S: {
17259 // The number of sign bits of the scalar result is computed by obtaining the
17260 // element type of the input vector operand, subtracting its width from the
17261 // XLEN, and then adding one (sign bit within the element type). If the
17262 // element type is wider than XLen, the least-significant XLEN bits are
17263 // taken.
17264 unsigned XLen = Subtarget.getXLen();
17265 unsigned EltBits = Op.getOperand(i: 0).getScalarValueSizeInBits();
17266 if (EltBits <= XLen)
17267 return XLen - EltBits + 1;
17268 break;
17269 }
17270 case ISD::INTRINSIC_W_CHAIN: {
17271 unsigned IntNo = Op.getConstantOperandVal(i: 1);
17272 switch (IntNo) {
17273 default:
17274 break;
17275 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
17276 case Intrinsic::riscv_masked_atomicrmw_add_i64:
17277 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
17278 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
17279 case Intrinsic::riscv_masked_atomicrmw_max_i64:
17280 case Intrinsic::riscv_masked_atomicrmw_min_i64:
17281 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
17282 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
17283 case Intrinsic::riscv_masked_cmpxchg_i64:
17284 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
17285 // narrow atomic operation. These are implemented using atomic
17286 // operations at the minimum supported atomicrmw/cmpxchg width whose
17287 // result is then sign extended to XLEN. With +A, the minimum width is
17288 // 32 for both 64 and 32.
17289 assert(Subtarget.getXLen() == 64);
17290 assert(getMinCmpXchgSizeInBits() == 32);
17291 assert(Subtarget.hasStdExtA());
17292 return 33;
17293 }
17294 break;
17295 }
17296 }
17297
17298 return 1;
17299}
17300
17301bool RISCVTargetLowering::canCreateUndefOrPoisonForTargetNode(
17302 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17303 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
17304
17305 // TODO: Add more target nodes.
17306 switch (Op.getOpcode()) {
17307 case RISCVISD::SELECT_CC:
17308 // Integer select_cc cannot create poison.
17309 // TODO: What are the FP poison semantics?
17310 // TODO: This instruction blocks poison from the unselected operand, can
17311 // we do anything with that?
17312 return !Op.getValueType().isInteger();
17313 }
17314 return TargetLowering::canCreateUndefOrPoisonForTargetNode(
17315 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
17316}
17317
17318const Constant *
17319RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode *Ld) const {
17320 assert(Ld && "Unexpected null LoadSDNode");
17321 if (!ISD::isNormalLoad(N: Ld))
17322 return nullptr;
17323
17324 SDValue Ptr = Ld->getBasePtr();
17325
17326 // Only constant pools with no offset are supported.
17327 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
17328 auto *CNode = dyn_cast<ConstantPoolSDNode>(Val&: Ptr);
17329 if (!CNode || CNode->isMachineConstantPoolEntry() ||
17330 CNode->getOffset() != 0)
17331 return nullptr;
17332
17333 return CNode;
17334 };
17335
17336 // Simple case, LLA.
17337 if (Ptr.getOpcode() == RISCVISD::LLA) {
17338 auto *CNode = GetSupportedConstantPool(Ptr);
17339 if (!CNode || CNode->getTargetFlags() != 0)
17340 return nullptr;
17341
17342 return CNode->getConstVal();
17343 }
17344
17345 // Look for a HI and ADD_LO pair.
17346 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
17347 Ptr.getOperand(i: 0).getOpcode() != RISCVISD::HI)
17348 return nullptr;
17349
17350 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(i: 1));
17351 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(i: 0).getOperand(i: 0));
17352
17353 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
17354 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
17355 return nullptr;
17356
17357 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
17358 return nullptr;
17359
17360 return CNodeLo->getConstVal();
17361}
17362
17363static MachineBasicBlock *emitReadCounterWidePseudo(MachineInstr &MI,
17364 MachineBasicBlock *BB) {
17365 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
17366
17367 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
17368 // Should the count have wrapped while it was being read, we need to try
17369 // again.
17370 // For example:
17371 // ```
17372 // read:
17373 // csrrs x3, counterh # load high word of counter
17374 // csrrs x2, counter # load low word of counter
17375 // csrrs x4, counterh # load high word of counter
17376 // bne x3, x4, read # check if high word reads match, otherwise try again
17377 // ```
17378
17379 MachineFunction &MF = *BB->getParent();
17380 const BasicBlock *LLVMBB = BB->getBasicBlock();
17381 MachineFunction::iterator It = ++BB->getIterator();
17382
17383 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(BB: LLVMBB);
17384 MF.insert(MBBI: It, MBB: LoopMBB);
17385
17386 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(BB: LLVMBB);
17387 MF.insert(MBBI: It, MBB: DoneMBB);
17388
17389 // Transfer the remainder of BB and its successor edges to DoneMBB.
17390 DoneMBB->splice(Where: DoneMBB->begin(), Other: BB,
17391 From: std::next(x: MachineBasicBlock::iterator(MI)), To: BB->end());
17392 DoneMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
17393
17394 BB->addSuccessor(Succ: LoopMBB);
17395
17396 MachineRegisterInfo &RegInfo = MF.getRegInfo();
17397 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
17398 Register LoReg = MI.getOperand(i: 0).getReg();
17399 Register HiReg = MI.getOperand(i: 1).getReg();
17400 int64_t LoCounter = MI.getOperand(i: 2).getImm();
17401 int64_t HiCounter = MI.getOperand(i: 3).getImm();
17402 DebugLoc DL = MI.getDebugLoc();
17403
17404 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
17405 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
17406 .addImm(HiCounter)
17407 .addReg(RISCV::X0);
17408 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
17409 .addImm(LoCounter)
17410 .addReg(RISCV::X0);
17411 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
17412 .addImm(HiCounter)
17413 .addReg(RISCV::X0);
17414
17415 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
17416 .addReg(HiReg)
17417 .addReg(ReadAgainReg)
17418 .addMBB(LoopMBB);
17419
17420 LoopMBB->addSuccessor(Succ: LoopMBB);
17421 LoopMBB->addSuccessor(Succ: DoneMBB);
17422
17423 MI.eraseFromParent();
17424
17425 return DoneMBB;
17426}
17427
17428static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
17429 MachineBasicBlock *BB,
17430 const RISCVSubtarget &Subtarget) {
17431 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
17432
17433 MachineFunction &MF = *BB->getParent();
17434 DebugLoc DL = MI.getDebugLoc();
17435 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
17436 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
17437 Register LoReg = MI.getOperand(i: 0).getReg();
17438 Register HiReg = MI.getOperand(i: 1).getReg();
17439 Register SrcReg = MI.getOperand(i: 2).getReg();
17440
17441 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
17442 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17443
17444 TII.storeRegToStackSlot(MBB&: *BB, MI, SrcReg, isKill: MI.getOperand(i: 2).isKill(), FrameIndex: FI, RC: SrcRC,
17445 TRI: RI, VReg: Register());
17446 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
17447 MachineMemOperand *MMOLo =
17448 MF.getMachineMemOperand(PtrInfo: MPI, F: MachineMemOperand::MOLoad, Size: 4, BaseAlignment: Align(8));
17449 MachineMemOperand *MMOHi = MF.getMachineMemOperand(
17450 PtrInfo: MPI.getWithOffset(O: 4), F: MachineMemOperand::MOLoad, Size: 4, BaseAlignment: Align(8));
17451 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
17452 .addFrameIndex(FI)
17453 .addImm(0)
17454 .addMemOperand(MMOLo);
17455 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
17456 .addFrameIndex(FI)
17457 .addImm(4)
17458 .addMemOperand(MMOHi);
17459 MI.eraseFromParent(); // The pseudo instruction is gone now.
17460 return BB;
17461}
17462
17463static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
17464 MachineBasicBlock *BB,
17465 const RISCVSubtarget &Subtarget) {
17466 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
17467 "Unexpected instruction");
17468
17469 MachineFunction &MF = *BB->getParent();
17470 DebugLoc DL = MI.getDebugLoc();
17471 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
17472 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
17473 Register DstReg = MI.getOperand(i: 0).getReg();
17474 Register LoReg = MI.getOperand(i: 1).getReg();
17475 Register HiReg = MI.getOperand(i: 2).getReg();
17476
17477 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
17478 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17479
17480 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
17481 MachineMemOperand *MMOLo =
17482 MF.getMachineMemOperand(PtrInfo: MPI, F: MachineMemOperand::MOStore, Size: 4, BaseAlignment: Align(8));
17483 MachineMemOperand *MMOHi = MF.getMachineMemOperand(
17484 PtrInfo: MPI.getWithOffset(O: 4), F: MachineMemOperand::MOStore, Size: 4, BaseAlignment: Align(8));
17485 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17486 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
17487 .addFrameIndex(FI)
17488 .addImm(0)
17489 .addMemOperand(MMOLo);
17490 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17491 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
17492 .addFrameIndex(FI)
17493 .addImm(4)
17494 .addMemOperand(MMOHi);
17495 TII.loadRegFromStackSlot(MBB&: *BB, MI, DestReg: DstReg, FrameIndex: FI, RC: DstRC, TRI: RI, VReg: Register());
17496 MI.eraseFromParent(); // The pseudo instruction is gone now.
17497 return BB;
17498}
17499
17500static bool isSelectPseudo(MachineInstr &MI) {
17501 switch (MI.getOpcode()) {
17502 default:
17503 return false;
17504 case RISCV::Select_GPR_Using_CC_GPR:
17505 case RISCV::Select_FPR16_Using_CC_GPR:
17506 case RISCV::Select_FPR16INX_Using_CC_GPR:
17507 case RISCV::Select_FPR32_Using_CC_GPR:
17508 case RISCV::Select_FPR32INX_Using_CC_GPR:
17509 case RISCV::Select_FPR64_Using_CC_GPR:
17510 case RISCV::Select_FPR64INX_Using_CC_GPR:
17511 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
17512 return true;
17513 }
17514}
17515
17516static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB,
17517 unsigned RelOpcode, unsigned EqOpcode,
17518 const RISCVSubtarget &Subtarget) {
17519 DebugLoc DL = MI.getDebugLoc();
17520 Register DstReg = MI.getOperand(i: 0).getReg();
17521 Register Src1Reg = MI.getOperand(i: 1).getReg();
17522 Register Src2Reg = MI.getOperand(i: 2).getReg();
17523 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
17524 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17525 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
17526
17527 // Save the current FFLAGS.
17528 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
17529
17530 auto MIB = BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: RelOpcode), DestReg: DstReg)
17531 .addReg(RegNo: Src1Reg)
17532 .addReg(RegNo: Src2Reg);
17533 if (MI.getFlag(Flag: MachineInstr::MIFlag::NoFPExcept))
17534 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
17535
17536 // Restore the FFLAGS.
17537 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
17538 .addReg(SavedFFlags, RegState::Kill);
17539
17540 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
17541 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
17542 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
17543 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
17544 if (MI.getFlag(Flag: MachineInstr::MIFlag::NoFPExcept))
17545 MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept);
17546
17547 // Erase the pseudoinstruction.
17548 MI.eraseFromParent();
17549 return BB;
17550}
17551
17552static MachineBasicBlock *
17553EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second,
17554 MachineBasicBlock *ThisMBB,
17555 const RISCVSubtarget &Subtarget) {
17556 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
17557 // Without this, custom-inserter would have generated:
17558 //
17559 // A
17560 // | \
17561 // | B
17562 // | /
17563 // C
17564 // | \
17565 // | D
17566 // | /
17567 // E
17568 //
17569 // A: X = ...; Y = ...
17570 // B: empty
17571 // C: Z = PHI [X, A], [Y, B]
17572 // D: empty
17573 // E: PHI [X, C], [Z, D]
17574 //
17575 // If we lower both Select_FPRX_ in a single step, we can instead generate:
17576 //
17577 // A
17578 // | \
17579 // | C
17580 // | /|
17581 // |/ |
17582 // | |
17583 // | D
17584 // | /
17585 // E
17586 //
17587 // A: X = ...; Y = ...
17588 // D: empty
17589 // E: PHI [X, A], [X, C], [Y, D]
17590
17591 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17592 const DebugLoc &DL = First.getDebugLoc();
17593 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
17594 MachineFunction *F = ThisMBB->getParent();
17595 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
17596 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
17597 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
17598 MachineFunction::iterator It = ++ThisMBB->getIterator();
17599 F->insert(MBBI: It, MBB: FirstMBB);
17600 F->insert(MBBI: It, MBB: SecondMBB);
17601 F->insert(MBBI: It, MBB: SinkMBB);
17602
17603 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
17604 SinkMBB->splice(Where: SinkMBB->begin(), Other: ThisMBB,
17605 From: std::next(x: MachineBasicBlock::iterator(First)),
17606 To: ThisMBB->end());
17607 SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: ThisMBB);
17608
17609 // Fallthrough block for ThisMBB.
17610 ThisMBB->addSuccessor(Succ: FirstMBB);
17611 // Fallthrough block for FirstMBB.
17612 FirstMBB->addSuccessor(Succ: SecondMBB);
17613 ThisMBB->addSuccessor(Succ: SinkMBB);
17614 FirstMBB->addSuccessor(Succ: SinkMBB);
17615 // This is fallthrough.
17616 SecondMBB->addSuccessor(Succ: SinkMBB);
17617
17618 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(i: 3).getImm());
17619 Register FLHS = First.getOperand(i: 1).getReg();
17620 Register FRHS = First.getOperand(i: 2).getReg();
17621 // Insert appropriate branch.
17622 BuildMI(BB: FirstMBB, MIMD: DL, MCID: TII.getBrCond(CC: FirstCC))
17623 .addReg(RegNo: FLHS)
17624 .addReg(RegNo: FRHS)
17625 .addMBB(MBB: SinkMBB);
17626
17627 Register SLHS = Second.getOperand(i: 1).getReg();
17628 Register SRHS = Second.getOperand(i: 2).getReg();
17629 Register Op1Reg4 = First.getOperand(i: 4).getReg();
17630 Register Op1Reg5 = First.getOperand(i: 5).getReg();
17631
17632 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(i: 3).getImm());
17633 // Insert appropriate branch.
17634 BuildMI(BB: ThisMBB, MIMD: DL, MCID: TII.getBrCond(CC: SecondCC))
17635 .addReg(RegNo: SLHS)
17636 .addReg(RegNo: SRHS)
17637 .addMBB(MBB: SinkMBB);
17638
17639 Register DestReg = Second.getOperand(i: 0).getReg();
17640 Register Op2Reg4 = Second.getOperand(i: 4).getReg();
17641 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
17642 .addReg(Op2Reg4)
17643 .addMBB(ThisMBB)
17644 .addReg(Op1Reg4)
17645 .addMBB(FirstMBB)
17646 .addReg(Op1Reg5)
17647 .addMBB(SecondMBB);
17648
17649 // Now remove the Select_FPRX_s.
17650 First.eraseFromParent();
17651 Second.eraseFromParent();
17652 return SinkMBB;
17653}
17654
17655static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
17656 MachineBasicBlock *BB,
17657 const RISCVSubtarget &Subtarget) {
17658 // To "insert" Select_* instructions, we actually have to insert the triangle
17659 // control-flow pattern. The incoming instructions know the destination vreg
17660 // to set, the condition code register to branch on, the true/false values to
17661 // select between, and the condcode to use to select the appropriate branch.
17662 //
17663 // We produce the following control flow:
17664 // HeadMBB
17665 // | \
17666 // | IfFalseMBB
17667 // | /
17668 // TailMBB
17669 //
17670 // When we find a sequence of selects we attempt to optimize their emission
17671 // by sharing the control flow. Currently we only handle cases where we have
17672 // multiple selects with the exact same condition (same LHS, RHS and CC).
17673 // The selects may be interleaved with other instructions if the other
17674 // instructions meet some requirements we deem safe:
17675 // - They are not pseudo instructions.
17676 // - They are debug instructions. Otherwise,
17677 // - They do not have side-effects, do not access memory and their inputs do
17678 // not depend on the results of the select pseudo-instructions.
17679 // The TrueV/FalseV operands of the selects cannot depend on the result of
17680 // previous selects in the sequence.
17681 // These conditions could be further relaxed. See the X86 target for a
17682 // related approach and more information.
17683 //
17684 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
17685 // is checked here and handled by a separate function -
17686 // EmitLoweredCascadedSelect.
17687 Register LHS = MI.getOperand(i: 1).getReg();
17688 Register RHS = MI.getOperand(i: 2).getReg();
17689 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(i: 3).getImm());
17690
17691 SmallVector<MachineInstr *, 4> SelectDebugValues;
17692 SmallSet<Register, 4> SelectDests;
17693 SelectDests.insert(V: MI.getOperand(i: 0).getReg());
17694
17695 MachineInstr *LastSelectPseudo = &MI;
17696 auto Next = next_nodbg(It: MI.getIterator(), End: BB->instr_end());
17697 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() &&
17698 Next->getOpcode() == MI.getOpcode() &&
17699 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
17700 Next->getOperand(5).isKill()) {
17701 return EmitLoweredCascadedSelect(First&: MI, Second&: *Next, ThisMBB: BB, Subtarget);
17702 }
17703
17704 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
17705 SequenceMBBI != E; ++SequenceMBBI) {
17706 if (SequenceMBBI->isDebugInstr())
17707 continue;
17708 if (isSelectPseudo(MI&: *SequenceMBBI)) {
17709 if (SequenceMBBI->getOperand(i: 1).getReg() != LHS ||
17710 SequenceMBBI->getOperand(i: 2).getReg() != RHS ||
17711 SequenceMBBI->getOperand(i: 3).getImm() != CC ||
17712 SelectDests.count(V: SequenceMBBI->getOperand(i: 4).getReg()) ||
17713 SelectDests.count(V: SequenceMBBI->getOperand(i: 5).getReg()))
17714 break;
17715 LastSelectPseudo = &*SequenceMBBI;
17716 SequenceMBBI->collectDebugValues(DbgValues&: SelectDebugValues);
17717 SelectDests.insert(V: SequenceMBBI->getOperand(i: 0).getReg());
17718 continue;
17719 }
17720 if (SequenceMBBI->hasUnmodeledSideEffects() ||
17721 SequenceMBBI->mayLoadOrStore() ||
17722 SequenceMBBI->usesCustomInsertionHook())
17723 break;
17724 if (llvm::any_of(Range: SequenceMBBI->operands(), P: [&](MachineOperand &MO) {
17725 return MO.isReg() && MO.isUse() && SelectDests.count(V: MO.getReg());
17726 }))
17727 break;
17728 }
17729
17730 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17731 const BasicBlock *LLVM_BB = BB->getBasicBlock();
17732 DebugLoc DL = MI.getDebugLoc();
17733 MachineFunction::iterator I = ++BB->getIterator();
17734
17735 MachineBasicBlock *HeadMBB = BB;
17736 MachineFunction *F = BB->getParent();
17737 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
17738 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
17739
17740 F->insert(MBBI: I, MBB: IfFalseMBB);
17741 F->insert(MBBI: I, MBB: TailMBB);
17742
17743 // Transfer debug instructions associated with the selects to TailMBB.
17744 for (MachineInstr *DebugInstr : SelectDebugValues) {
17745 TailMBB->push_back(MI: DebugInstr->removeFromParent());
17746 }
17747
17748 // Move all instructions after the sequence to TailMBB.
17749 TailMBB->splice(Where: TailMBB->end(), Other: HeadMBB,
17750 From: std::next(x: LastSelectPseudo->getIterator()), To: HeadMBB->end());
17751 // Update machine-CFG edges by transferring all successors of the current
17752 // block to the new block which will contain the Phi nodes for the selects.
17753 TailMBB->transferSuccessorsAndUpdatePHIs(FromMBB: HeadMBB);
17754 // Set the successors for HeadMBB.
17755 HeadMBB->addSuccessor(Succ: IfFalseMBB);
17756 HeadMBB->addSuccessor(Succ: TailMBB);
17757
17758 // Insert appropriate branch.
17759 BuildMI(BB: HeadMBB, MIMD: DL, MCID: TII.getBrCond(CC))
17760 .addReg(RegNo: LHS)
17761 .addReg(RegNo: RHS)
17762 .addMBB(MBB: TailMBB);
17763
17764 // IfFalseMBB just falls through to TailMBB.
17765 IfFalseMBB->addSuccessor(Succ: TailMBB);
17766
17767 // Create PHIs for all of the select pseudo-instructions.
17768 auto SelectMBBI = MI.getIterator();
17769 auto SelectEnd = std::next(x: LastSelectPseudo->getIterator());
17770 auto InsertionPoint = TailMBB->begin();
17771 while (SelectMBBI != SelectEnd) {
17772 auto Next = std::next(x: SelectMBBI);
17773 if (isSelectPseudo(MI&: *SelectMBBI)) {
17774 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
17775 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
17776 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
17777 .addReg(SelectMBBI->getOperand(4).getReg())
17778 .addMBB(HeadMBB)
17779 .addReg(SelectMBBI->getOperand(5).getReg())
17780 .addMBB(IfFalseMBB);
17781 SelectMBBI->eraseFromParent();
17782 }
17783 SelectMBBI = Next;
17784 }
17785
17786 F->getProperties().reset(P: MachineFunctionProperties::Property::NoPHIs);
17787 return TailMBB;
17788}
17789
17790// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
17791static const RISCV::RISCVMaskedPseudoInfo *
17792lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
17793 const RISCVVInversePseudosTable::PseudoInfo *Inverse =
17794 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
17795 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
17796 const RISCV::RISCVMaskedPseudoInfo *Masked =
17797 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
17798 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
17799 return Masked;
17800}
17801
17802static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI,
17803 MachineBasicBlock *BB,
17804 unsigned CVTXOpc) {
17805 DebugLoc DL = MI.getDebugLoc();
17806
17807 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
17808
17809 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
17810 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17811
17812 // Save the old value of FFLAGS.
17813 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
17814
17815 assert(MI.getNumOperands() == 7);
17816
17817 // Emit a VFCVT_X_F
17818 const TargetRegisterInfo *TRI =
17819 BB->getParent()->getSubtarget().getRegisterInfo();
17820 const TargetRegisterClass *RC = MI.getRegClassConstraint(OpIdx: 0, TII: &TII, TRI);
17821 Register Tmp = MRI.createVirtualRegister(RegClass: RC);
17822 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
17823 .add(MI.getOperand(1))
17824 .add(MI.getOperand(2))
17825 .add(MI.getOperand(3))
17826 .add(MachineOperand::CreateImm(7)) // frm = DYN
17827 .add(MI.getOperand(4))
17828 .add(MI.getOperand(5))
17829 .add(MI.getOperand(6))
17830 .add(MachineOperand::CreateReg(RISCV::FRM,
17831 /*IsDef*/ false,
17832 /*IsImp*/ true));
17833
17834 // Emit a VFCVT_F_X
17835 RISCVII::VLMUL LMul = RISCVII::getLMul(TSFlags: MI.getDesc().TSFlags);
17836 unsigned Log2SEW = MI.getOperand(i: RISCVII::getSEWOpNum(Desc: MI.getDesc())).getImm();
17837 // There is no E8 variant for VFCVT_F_X.
17838 assert(Log2SEW >= 4);
17839 unsigned CVTFOpc =
17840 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
17841 ->MaskedPseudo;
17842
17843 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
17844 .add(MI.getOperand(0))
17845 .add(MI.getOperand(1))
17846 .addReg(Tmp)
17847 .add(MI.getOperand(3))
17848 .add(MachineOperand::CreateImm(7)) // frm = DYN
17849 .add(MI.getOperand(4))
17850 .add(MI.getOperand(5))
17851 .add(MI.getOperand(6))
17852 .add(MachineOperand::CreateReg(RISCV::FRM,
17853 /*IsDef*/ false,
17854 /*IsImp*/ true));
17855
17856 // Restore FFLAGS.
17857 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
17858 .addReg(SavedFFLAGS, RegState::Kill);
17859
17860 // Erase the pseudoinstruction.
17861 MI.eraseFromParent();
17862 return BB;
17863}
17864
17865static MachineBasicBlock *emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB,
17866 const RISCVSubtarget &Subtarget) {
17867 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
17868 const TargetRegisterClass *RC;
17869 switch (MI.getOpcode()) {
17870 default:
17871 llvm_unreachable("Unexpected opcode");
17872 case RISCV::PseudoFROUND_H:
17873 CmpOpc = RISCV::FLT_H;
17874 F2IOpc = RISCV::FCVT_W_H;
17875 I2FOpc = RISCV::FCVT_H_W;
17876 FSGNJOpc = RISCV::FSGNJ_H;
17877 FSGNJXOpc = RISCV::FSGNJX_H;
17878 RC = &RISCV::FPR16RegClass;
17879 break;
17880 case RISCV::PseudoFROUND_H_INX:
17881 CmpOpc = RISCV::FLT_H_INX;
17882 F2IOpc = RISCV::FCVT_W_H_INX;
17883 I2FOpc = RISCV::FCVT_H_W_INX;
17884 FSGNJOpc = RISCV::FSGNJ_H_INX;
17885 FSGNJXOpc = RISCV::FSGNJX_H_INX;
17886 RC = &RISCV::GPRF16RegClass;
17887 break;
17888 case RISCV::PseudoFROUND_S:
17889 CmpOpc = RISCV::FLT_S;
17890 F2IOpc = RISCV::FCVT_W_S;
17891 I2FOpc = RISCV::FCVT_S_W;
17892 FSGNJOpc = RISCV::FSGNJ_S;
17893 FSGNJXOpc = RISCV::FSGNJX_S;
17894 RC = &RISCV::FPR32RegClass;
17895 break;
17896 case RISCV::PseudoFROUND_S_INX:
17897 CmpOpc = RISCV::FLT_S_INX;
17898 F2IOpc = RISCV::FCVT_W_S_INX;
17899 I2FOpc = RISCV::FCVT_S_W_INX;
17900 FSGNJOpc = RISCV::FSGNJ_S_INX;
17901 FSGNJXOpc = RISCV::FSGNJX_S_INX;
17902 RC = &RISCV::GPRF32RegClass;
17903 break;
17904 case RISCV::PseudoFROUND_D:
17905 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
17906 CmpOpc = RISCV::FLT_D;
17907 F2IOpc = RISCV::FCVT_L_D;
17908 I2FOpc = RISCV::FCVT_D_L;
17909 FSGNJOpc = RISCV::FSGNJ_D;
17910 FSGNJXOpc = RISCV::FSGNJX_D;
17911 RC = &RISCV::FPR64RegClass;
17912 break;
17913 case RISCV::PseudoFROUND_D_INX:
17914 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
17915 CmpOpc = RISCV::FLT_D_INX;
17916 F2IOpc = RISCV::FCVT_L_D_INX;
17917 I2FOpc = RISCV::FCVT_D_L_INX;
17918 FSGNJOpc = RISCV::FSGNJ_D_INX;
17919 FSGNJXOpc = RISCV::FSGNJX_D_INX;
17920 RC = &RISCV::GPRRegClass;
17921 break;
17922 }
17923
17924 const BasicBlock *BB = MBB->getBasicBlock();
17925 DebugLoc DL = MI.getDebugLoc();
17926 MachineFunction::iterator I = ++MBB->getIterator();
17927
17928 MachineFunction *F = MBB->getParent();
17929 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
17930 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
17931
17932 F->insert(MBBI: I, MBB: CvtMBB);
17933 F->insert(MBBI: I, MBB: DoneMBB);
17934 // Move all instructions after the sequence to DoneMBB.
17935 DoneMBB->splice(Where: DoneMBB->end(), Other: MBB, From: MachineBasicBlock::iterator(MI),
17936 To: MBB->end());
17937 // Update machine-CFG edges by transferring all successors of the current
17938 // block to the new block which will contain the Phi nodes for the selects.
17939 DoneMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
17940 // Set the successors for MBB.
17941 MBB->addSuccessor(Succ: CvtMBB);
17942 MBB->addSuccessor(Succ: DoneMBB);
17943
17944 Register DstReg = MI.getOperand(i: 0).getReg();
17945 Register SrcReg = MI.getOperand(i: 1).getReg();
17946 Register MaxReg = MI.getOperand(i: 2).getReg();
17947 int64_t FRM = MI.getOperand(i: 3).getImm();
17948
17949 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17950 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
17951
17952 Register FabsReg = MRI.createVirtualRegister(RegClass: RC);
17953 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
17954
17955 // Compare the FP value to the max value.
17956 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17957 auto MIB =
17958 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
17959 if (MI.getFlag(Flag: MachineInstr::MIFlag::NoFPExcept))
17960 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
17961
17962 // Insert branch.
17963 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
17964 .addReg(CmpReg)
17965 .addReg(RISCV::X0)
17966 .addMBB(DoneMBB);
17967
17968 CvtMBB->addSuccessor(Succ: DoneMBB);
17969
17970 // Convert to integer.
17971 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17972 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
17973 if (MI.getFlag(Flag: MachineInstr::MIFlag::NoFPExcept))
17974 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
17975
17976 // Convert back to FP.
17977 Register I2FReg = MRI.createVirtualRegister(RegClass: RC);
17978 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
17979 if (MI.getFlag(Flag: MachineInstr::MIFlag::NoFPExcept))
17980 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
17981
17982 // Restore the sign bit.
17983 Register CvtReg = MRI.createVirtualRegister(RegClass: RC);
17984 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
17985
17986 // Merge the results.
17987 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
17988 .addReg(SrcReg)
17989 .addMBB(MBB)
17990 .addReg(CvtReg)
17991 .addMBB(CvtMBB);
17992
17993 MI.eraseFromParent();
17994 return DoneMBB;
17995}
17996
17997MachineBasicBlock *
17998RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
17999 MachineBasicBlock *BB) const {
18000 switch (MI.getOpcode()) {
18001 default:
18002 llvm_unreachable("Unexpected instr type to insert");
18003 case RISCV::ReadCounterWide:
18004 assert(!Subtarget.is64Bit() &&
18005 "ReadCounterWide is only to be used on riscv32");
18006 return emitReadCounterWidePseudo(MI, BB);
18007 case RISCV::Select_GPR_Using_CC_GPR:
18008 case RISCV::Select_FPR16_Using_CC_GPR:
18009 case RISCV::Select_FPR16INX_Using_CC_GPR:
18010 case RISCV::Select_FPR32_Using_CC_GPR:
18011 case RISCV::Select_FPR32INX_Using_CC_GPR:
18012 case RISCV::Select_FPR64_Using_CC_GPR:
18013 case RISCV::Select_FPR64INX_Using_CC_GPR:
18014 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
18015 return emitSelectPseudo(MI, BB, Subtarget);
18016 case RISCV::BuildPairF64Pseudo:
18017 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
18018 case RISCV::SplitF64Pseudo:
18019 return emitSplitF64Pseudo(MI, BB, Subtarget);
18020 case RISCV::PseudoQuietFLE_H:
18021 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
18022 case RISCV::PseudoQuietFLE_H_INX:
18023 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
18024 case RISCV::PseudoQuietFLT_H:
18025 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
18026 case RISCV::PseudoQuietFLT_H_INX:
18027 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
18028 case RISCV::PseudoQuietFLE_S:
18029 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
18030 case RISCV::PseudoQuietFLE_S_INX:
18031 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
18032 case RISCV::PseudoQuietFLT_S:
18033 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
18034 case RISCV::PseudoQuietFLT_S_INX:
18035 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
18036 case RISCV::PseudoQuietFLE_D:
18037 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
18038 case RISCV::PseudoQuietFLE_D_INX:
18039 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
18040 case RISCV::PseudoQuietFLE_D_IN32X:
18041 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
18042 Subtarget);
18043 case RISCV::PseudoQuietFLT_D:
18044 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
18045 case RISCV::PseudoQuietFLT_D_INX:
18046 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
18047 case RISCV::PseudoQuietFLT_D_IN32X:
18048 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
18049 Subtarget);
18050
18051 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
18052 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
18053 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
18054 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
18055 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
18056 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
18057 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
18058 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
18059 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
18060 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
18061 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
18062 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
18063 case RISCV::PseudoFROUND_H:
18064 case RISCV::PseudoFROUND_H_INX:
18065 case RISCV::PseudoFROUND_S:
18066 case RISCV::PseudoFROUND_S_INX:
18067 case RISCV::PseudoFROUND_D:
18068 case RISCV::PseudoFROUND_D_INX:
18069 case RISCV::PseudoFROUND_D_IN32X:
18070 return emitFROUND(MI, MBB: BB, Subtarget);
18071 case TargetOpcode::STATEPOINT:
18072 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
18073 // while jal call instruction (where statepoint will be lowered at the end)
18074 // has implicit def. This def is early-clobber as it will be set at
18075 // the moment of the call and earlier than any use is read.
18076 // Add this implicit dead def here as a workaround.
18077 MI.addOperand(*MI.getMF(),
18078 MachineOperand::CreateReg(
18079 RISCV::X1, /*isDef*/ true,
18080 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
18081 /*isUndef*/ false, /*isEarlyClobber*/ true));
18082 [[fallthrough]];
18083 case TargetOpcode::STACKMAP:
18084 case TargetOpcode::PATCHPOINT:
18085 if (!Subtarget.is64Bit())
18086 report_fatal_error(reason: "STACKMAP, PATCHPOINT and STATEPOINT are only "
18087 "supported on 64-bit targets");
18088 return emitPatchPoint(MI, MBB: BB);
18089 }
18090}
18091
18092void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
18093 SDNode *Node) const {
18094 // Add FRM dependency to any instructions with dynamic rounding mode.
18095 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
18096 if (Idx < 0) {
18097 // Vector pseudos have FRM index indicated by TSFlags.
18098 Idx = RISCVII::getFRMOpNum(Desc: MI.getDesc());
18099 if (Idx < 0)
18100 return;
18101 }
18102 if (MI.getOperand(i: Idx).getImm() != RISCVFPRndMode::DYN)
18103 return;
18104 // If the instruction already reads FRM, don't add another read.
18105 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
18106 return;
18107 MI.addOperand(
18108 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
18109}
18110
18111// Calling Convention Implementation.
18112// The expectations for frontend ABI lowering vary from target to target.
18113// Ideally, an LLVM frontend would be able to avoid worrying about many ABI
18114// details, but this is a longer term goal. For now, we simply try to keep the
18115// role of the frontend as simple and well-defined as possible. The rules can
18116// be summarised as:
18117// * Never split up large scalar arguments. We handle them here.
18118// * If a hardfloat calling convention is being used, and the struct may be
18119// passed in a pair of registers (fp+fp, int+fp), and both registers are
18120// available, then pass as two separate arguments. If either the GPRs or FPRs
18121// are exhausted, then pass according to the rule below.
18122// * If a struct could never be passed in registers or directly in a stack
18123// slot (as it is larger than 2*XLEN and the floating point rules don't
18124// apply), then pass it using a pointer with the byval attribute.
18125// * If a struct is less than 2*XLEN, then coerce to either a two-element
18126// word-sized array or a 2*XLEN scalar (depending on alignment).
18127// * The frontend can determine whether a struct is returned by reference or
18128// not based on its size and fields. If it will be returned by reference, the
18129// frontend must modify the prototype so a pointer with the sret annotation is
18130// passed as the first argument. This is not necessary for large scalar
18131// returns.
18132// * Struct return values and varargs should be coerced to structs containing
18133// register-size fields in the same situations they would be for fixed
18134// arguments.
18135
18136static const MCPhysReg ArgFPR16s[] = {
18137 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
18138 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
18139};
18140static const MCPhysReg ArgFPR32s[] = {
18141 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
18142 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
18143};
18144static const MCPhysReg ArgFPR64s[] = {
18145 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
18146 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
18147};
18148// This is an interim calling convention and it may be changed in the future.
18149static const MCPhysReg ArgVRs[] = {
18150 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
18151 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
18152 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
18153static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,
18154 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
18155 RISCV::V20M2, RISCV::V22M2};
18156static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
18157 RISCV::V20M4};
18158static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
18159
18160ArrayRef<MCPhysReg> RISCV::getArgGPRs(const RISCVABI::ABI ABI) {
18161 // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except
18162 // the ILP32E ABI.
18163 static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18164 RISCV::X13, RISCV::X14, RISCV::X15,
18165 RISCV::X16, RISCV::X17};
18166 // The GPRs used for passing arguments in the ILP32E/ILP64E ABI.
18167 static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18168 RISCV::X13, RISCV::X14, RISCV::X15};
18169
18170 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18171 return ArrayRef(ArgEGPRs);
18172
18173 return ArrayRef(ArgIGPRs);
18174}
18175
18176static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) {
18177 // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
18178 // for save-restore libcall, so we don't use them.
18179 static const MCPhysReg FastCCIGPRs[] = {
18180 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
18181 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
18182 RISCV::X29, RISCV::X30, RISCV::X31};
18183
18184 // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.
18185 static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18186 RISCV::X13, RISCV::X14, RISCV::X15,
18187 RISCV::X7};
18188
18189 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18190 return ArrayRef(FastCCEGPRs);
18191
18192 return ArrayRef(FastCCIGPRs);
18193}
18194
18195// Pass a 2*XLEN argument that has been split into two XLEN values through
18196// registers or the stack as necessary.
18197static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
18198 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
18199 MVT ValVT2, MVT LocVT2,
18200 ISD::ArgFlagsTy ArgFlags2, bool EABI) {
18201 unsigned XLenInBytes = XLen / 8;
18202 const RISCVSubtarget &STI =
18203 State.getMachineFunction().getSubtarget<RISCVSubtarget>();
18204 ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI: STI.getTargetABI());
18205
18206 if (Register Reg = State.AllocateReg(Regs: ArgGPRs)) {
18207 // At least one half can be passed via register.
18208 State.addLoc(V: CCValAssign::getReg(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(), RegNo: Reg,
18209 LocVT: VA1.getLocVT(), HTP: CCValAssign::Full));
18210 } else {
18211 // Both halves must be passed on the stack, with proper alignment.
18212 // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte
18213 // alignment. This behavior may be changed when RV32E/ILP32E is ratified.
18214 Align StackAlign(XLenInBytes);
18215 if (!EABI || XLen != 32)
18216 StackAlign = std::max(a: StackAlign, b: ArgFlags1.getNonZeroOrigAlign());
18217 State.addLoc(
18218 V: CCValAssign::getMem(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(),
18219 Offset: State.AllocateStack(Size: XLenInBytes, Alignment: StackAlign),
18220 LocVT: VA1.getLocVT(), HTP: CCValAssign::Full));
18221 State.addLoc(V: CCValAssign::getMem(
18222 ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: XLenInBytes, Alignment: Align(XLenInBytes)),
18223 LocVT: LocVT2, HTP: CCValAssign::Full));
18224 return false;
18225 }
18226
18227 if (Register Reg = State.AllocateReg(Regs: ArgGPRs)) {
18228 // The second half can also be passed via register.
18229 State.addLoc(
18230 V: CCValAssign::getReg(ValNo: ValNo2, ValVT: ValVT2, RegNo: Reg, LocVT: LocVT2, HTP: CCValAssign::Full));
18231 } else {
18232 // The second half is passed via the stack, without additional alignment.
18233 State.addLoc(V: CCValAssign::getMem(
18234 ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: XLenInBytes, Alignment: Align(XLenInBytes)),
18235 LocVT: LocVT2, HTP: CCValAssign::Full));
18236 }
18237
18238 return false;
18239}
18240
18241// Implements the RISC-V calling convention. Returns true upon failure.
18242bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
18243 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
18244 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
18245 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
18246 RVVArgDispatcher &RVVDispatcher) {
18247 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
18248 assert(XLen == 32 || XLen == 64);
18249 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
18250
18251 // Static chain parameter must not be passed in normal argument registers,
18252 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
18253 if (ArgFlags.isNest()) {
18254 if (unsigned Reg = State.AllocateReg(RISCV::X7)) {
18255 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
18256 return false;
18257 }
18258 }
18259
18260 // Any return value split in to more than two values can't be returned
18261 // directly. Vectors are returned via the available vector registers.
18262 if (!LocVT.isVector() && IsRet && ValNo > 1)
18263 return true;
18264
18265 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
18266 // variadic argument, or if no F16/F32 argument registers are available.
18267 bool UseGPRForF16_F32 = true;
18268 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
18269 // variadic argument, or if no F64 argument registers are available.
18270 bool UseGPRForF64 = true;
18271
18272 switch (ABI) {
18273 default:
18274 llvm_unreachable("Unexpected ABI");
18275 case RISCVABI::ABI_ILP32:
18276 case RISCVABI::ABI_ILP32E:
18277 case RISCVABI::ABI_LP64:
18278 case RISCVABI::ABI_LP64E:
18279 break;
18280 case RISCVABI::ABI_ILP32F:
18281 case RISCVABI::ABI_LP64F:
18282 UseGPRForF16_F32 = !IsFixed;
18283 break;
18284 case RISCVABI::ABI_ILP32D:
18285 case RISCVABI::ABI_LP64D:
18286 UseGPRForF16_F32 = !IsFixed;
18287 UseGPRForF64 = !IsFixed;
18288 break;
18289 }
18290
18291 // FPR16, FPR32, and FPR64 alias each other.
18292 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) {
18293 UseGPRForF16_F32 = true;
18294 UseGPRForF64 = true;
18295 }
18296
18297 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
18298 // similar local variables rather than directly checking against the target
18299 // ABI.
18300
18301 if (UseGPRForF16_F32 &&
18302 (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) {
18303 LocVT = XLenVT;
18304 LocInfo = CCValAssign::BCvt;
18305 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
18306 LocVT = MVT::i64;
18307 LocInfo = CCValAssign::BCvt;
18308 }
18309
18310 ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI);
18311
18312 // If this is a variadic argument, the RISC-V calling convention requires
18313 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
18314 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
18315 // be used regardless of whether the original argument was split during
18316 // legalisation or not. The argument will not be passed by registers if the
18317 // original type is larger than 2*XLEN, so the register alignment rule does
18318 // not apply.
18319 // TODO: To be compatible with GCC's behaviors, we don't align registers
18320 // currently if we are using ILP32E calling convention. This behavior may be
18321 // changed when RV32E/ILP32E is ratified.
18322 unsigned TwoXLenInBytes = (2 * XLen) / 8;
18323 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
18324 DL.getTypeAllocSize(Ty: OrigTy) == TwoXLenInBytes &&
18325 ABI != RISCVABI::ABI_ILP32E) {
18326 unsigned RegIdx = State.getFirstUnallocated(Regs: ArgGPRs);
18327 // Skip 'odd' register if necessary.
18328 if (RegIdx != std::size(cont: ArgGPRs) && RegIdx % 2 == 1)
18329 State.AllocateReg(Regs: ArgGPRs);
18330 }
18331
18332 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
18333 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
18334 State.getPendingArgFlags();
18335
18336 assert(PendingLocs.size() == PendingArgFlags.size() &&
18337 "PendingLocs and PendingArgFlags out of sync");
18338
18339 // Handle passing f64 on RV32D with a soft float ABI or when floating point
18340 // registers are exhausted.
18341 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
18342 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
18343 // Depending on available argument GPRS, f64 may be passed in a pair of
18344 // GPRs, split between a GPR and the stack, or passed completely on the
18345 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
18346 // cases.
18347 Register Reg = State.AllocateReg(Regs: ArgGPRs);
18348 if (!Reg) {
18349 unsigned StackOffset = State.AllocateStack(Size: 8, Alignment: Align(8));
18350 State.addLoc(
18351 V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
18352 return false;
18353 }
18354 LocVT = MVT::i32;
18355 State.addLoc(V: CCValAssign::getCustomReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
18356 Register HiReg = State.AllocateReg(Regs: ArgGPRs);
18357 if (HiReg) {
18358 State.addLoc(
18359 V: CCValAssign::getCustomReg(ValNo, ValVT, RegNo: HiReg, LocVT, HTP: LocInfo));
18360 } else {
18361 unsigned StackOffset = State.AllocateStack(Size: 4, Alignment: Align(4));
18362 State.addLoc(
18363 V: CCValAssign::getCustomMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
18364 }
18365 return false;
18366 }
18367
18368 // Fixed-length vectors are located in the corresponding scalable-vector
18369 // container types.
18370 if (ValVT.isFixedLengthVector())
18371 LocVT = TLI.getContainerForFixedLengthVector(VT: LocVT);
18372
18373 // Split arguments might be passed indirectly, so keep track of the pending
18374 // values. Split vectors are passed via a mix of registers and indirectly, so
18375 // treat them as we would any other argument.
18376 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
18377 LocVT = XLenVT;
18378 LocInfo = CCValAssign::Indirect;
18379 PendingLocs.push_back(
18380 Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo));
18381 PendingArgFlags.push_back(Elt: ArgFlags);
18382 if (!ArgFlags.isSplitEnd()) {
18383 return false;
18384 }
18385 }
18386
18387 // If the split argument only had two elements, it should be passed directly
18388 // in registers or on the stack.
18389 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
18390 PendingLocs.size() <= 2) {
18391 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
18392 // Apply the normal calling convention rules to the first half of the
18393 // split argument.
18394 CCValAssign VA = PendingLocs[0];
18395 ISD::ArgFlagsTy AF = PendingArgFlags[0];
18396 PendingLocs.clear();
18397 PendingArgFlags.clear();
18398 return CC_RISCVAssign2XLen(
18399 XLen, State, VA1: VA, ArgFlags1: AF, ValNo2: ValNo, ValVT2: ValVT, LocVT2: LocVT, ArgFlags2: ArgFlags,
18400 EABI: ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E);
18401 }
18402
18403 // Allocate to a register if possible, or else a stack slot.
18404 Register Reg;
18405 unsigned StoreSizeBytes = XLen / 8;
18406 Align StackAlign = Align(XLen / 8);
18407
18408 if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32)
18409 Reg = State.AllocateReg(ArgFPR16s);
18410 else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
18411 Reg = State.AllocateReg(ArgFPR32s);
18412 else if (ValVT == MVT::f64 && !UseGPRForF64)
18413 Reg = State.AllocateReg(ArgFPR64s);
18414 else if (ValVT.isVector()) {
18415 Reg = RVVDispatcher.getNextPhysReg();
18416 if (!Reg) {
18417 // For return values, the vector must be passed fully via registers or
18418 // via the stack.
18419 // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
18420 // but we're using all of them.
18421 if (IsRet)
18422 return true;
18423 // Try using a GPR to pass the address
18424 if ((Reg = State.AllocateReg(Regs: ArgGPRs))) {
18425 LocVT = XLenVT;
18426 LocInfo = CCValAssign::Indirect;
18427 } else if (ValVT.isScalableVector()) {
18428 LocVT = XLenVT;
18429 LocInfo = CCValAssign::Indirect;
18430 } else {
18431 // Pass fixed-length vectors on the stack.
18432 LocVT = ValVT;
18433 StoreSizeBytes = ValVT.getStoreSize();
18434 // Align vectors to their element sizes, being careful for vXi1
18435 // vectors.
18436 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
18437 }
18438 }
18439 } else {
18440 Reg = State.AllocateReg(Regs: ArgGPRs);
18441 }
18442
18443 unsigned StackOffset =
18444 Reg ? 0 : State.AllocateStack(Size: StoreSizeBytes, Alignment: StackAlign);
18445
18446 // If we reach this point and PendingLocs is non-empty, we must be at the
18447 // end of a split argument that must be passed indirectly.
18448 if (!PendingLocs.empty()) {
18449 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
18450 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
18451
18452 for (auto &It : PendingLocs) {
18453 if (Reg)
18454 It.convertToReg(RegNo: Reg);
18455 else
18456 It.convertToMem(Offset: StackOffset);
18457 State.addLoc(V: It);
18458 }
18459 PendingLocs.clear();
18460 PendingArgFlags.clear();
18461 return false;
18462 }
18463
18464 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
18465 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
18466 "Expected an XLenVT or vector types at this stage");
18467
18468 if (Reg) {
18469 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
18470 return false;
18471 }
18472
18473 // When a scalar floating-point value is passed on the stack, no
18474 // bit-conversion is needed.
18475 if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) {
18476 assert(!ValVT.isVector());
18477 LocVT = ValVT;
18478 LocInfo = CCValAssign::Full;
18479 }
18480 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
18481 return false;
18482}
18483
18484template <typename ArgTy>
18485static std::optional<unsigned> preAssignMask(const ArgTy &Args) {
18486 for (const auto &ArgIdx : enumerate(Args)) {
18487 MVT ArgVT = ArgIdx.value().VT;
18488 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
18489 return ArgIdx.index();
18490 }
18491 return std::nullopt;
18492}
18493
18494void RISCVTargetLowering::analyzeInputArgs(
18495 MachineFunction &MF, CCState &CCInfo,
18496 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
18497 RISCVCCAssignFn Fn) const {
18498 unsigned NumArgs = Ins.size();
18499 FunctionType *FType = MF.getFunction().getFunctionType();
18500
18501 RVVArgDispatcher Dispatcher;
18502 if (IsRet) {
18503 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(Ins)};
18504 } else {
18505 SmallVector<Type *, 4> TypeList;
18506 for (const Argument &Arg : MF.getFunction().args())
18507 TypeList.push_back(Elt: Arg.getType());
18508 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(TypeList)};
18509 }
18510
18511 for (unsigned i = 0; i != NumArgs; ++i) {
18512 MVT ArgVT = Ins[i].VT;
18513 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
18514
18515 Type *ArgTy = nullptr;
18516 if (IsRet)
18517 ArgTy = FType->getReturnType();
18518 else if (Ins[i].isOrigArg())
18519 ArgTy = FType->getParamType(i: Ins[i].getOrigArgIndex());
18520
18521 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
18522 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18523 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
18524 Dispatcher)) {
18525 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
18526 << ArgVT << '\n');
18527 llvm_unreachable(nullptr);
18528 }
18529 }
18530}
18531
18532void RISCVTargetLowering::analyzeOutputArgs(
18533 MachineFunction &MF, CCState &CCInfo,
18534 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
18535 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
18536 unsigned NumArgs = Outs.size();
18537
18538 SmallVector<Type *, 4> TypeList;
18539 if (IsRet)
18540 TypeList.push_back(Elt: MF.getFunction().getReturnType());
18541 else if (CLI)
18542 for (const TargetLowering::ArgListEntry &Arg : CLI->getArgs())
18543 TypeList.push_back(Elt: Arg.Ty);
18544 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(TypeList)};
18545
18546 for (unsigned i = 0; i != NumArgs; i++) {
18547 MVT ArgVT = Outs[i].VT;
18548 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
18549 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
18550
18551 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
18552 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18553 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
18554 Dispatcher)) {
18555 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
18556 << ArgVT << "\n");
18557 llvm_unreachable(nullptr);
18558 }
18559 }
18560}
18561
18562// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
18563// values.
18564static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
18565 const CCValAssign &VA, const SDLoc &DL,
18566 const RISCVSubtarget &Subtarget) {
18567 switch (VA.getLocInfo()) {
18568 default:
18569 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18570 case CCValAssign::Full:
18571 if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())
18572 Val = convertFromScalableVector(VT: VA.getValVT(), V: Val, DAG, Subtarget);
18573 break;
18574 case CCValAssign::BCvt:
18575 if (VA.getLocVT().isInteger() &&
18576 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
18577 Val = DAG.getNode(Opcode: RISCVISD::FMV_H_X, DL, VT: VA.getValVT(), Operand: Val);
18578 } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
18579 if (RV64LegalI32) {
18580 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val);
18581 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
18582 } else {
18583 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
18584 }
18585 } else {
18586 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VA.getValVT(), Operand: Val);
18587 }
18588 break;
18589 }
18590 return Val;
18591}
18592
18593// The caller is responsible for loading the full value if the argument is
18594// passed with CCValAssign::Indirect.
18595static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
18596 const CCValAssign &VA, const SDLoc &DL,
18597 const ISD::InputArg &In,
18598 const RISCVTargetLowering &TLI) {
18599 MachineFunction &MF = DAG.getMachineFunction();
18600 MachineRegisterInfo &RegInfo = MF.getRegInfo();
18601 EVT LocVT = VA.getLocVT();
18602 SDValue Val;
18603 const TargetRegisterClass *RC = TLI.getRegClassFor(VT: LocVT.getSimpleVT());
18604 Register VReg = RegInfo.createVirtualRegister(RegClass: RC);
18605 RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: VReg);
18606 Val = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: LocVT);
18607
18608 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
18609 if (In.isOrigArg()) {
18610 Argument *OrigArg = MF.getFunction().getArg(i: In.getOrigArgIndex());
18611 if (OrigArg->getType()->isIntegerTy()) {
18612 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
18613 // An input zero extended from i31 can also be considered sign extended.
18614 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
18615 (BitWidth < 32 && In.Flags.isZExt())) {
18616 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
18617 RVFI->addSExt32Register(Reg: VReg);
18618 }
18619 }
18620 }
18621
18622 if (VA.getLocInfo() == CCValAssign::Indirect)
18623 return Val;
18624
18625 return convertLocVTToValVT(DAG, Val, VA, DL, Subtarget: TLI.getSubtarget());
18626}
18627
18628static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
18629 const CCValAssign &VA, const SDLoc &DL,
18630 const RISCVSubtarget &Subtarget) {
18631 EVT LocVT = VA.getLocVT();
18632
18633 switch (VA.getLocInfo()) {
18634 default:
18635 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18636 case CCValAssign::Full:
18637 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
18638 Val = convertToScalableVector(VT: LocVT, V: Val, DAG, Subtarget);
18639 break;
18640 case CCValAssign::BCvt:
18641 if (LocVT.isInteger() &&
18642 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
18643 Val = DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTH, DL, VT: LocVT, Operand: Val);
18644 } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) {
18645 if (RV64LegalI32) {
18646 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
18647 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val);
18648 } else {
18649 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
18650 }
18651 } else {
18652 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: LocVT, Operand: Val);
18653 }
18654 break;
18655 }
18656 return Val;
18657}
18658
18659// The caller is responsible for loading the full value if the argument is
18660// passed with CCValAssign::Indirect.
18661static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
18662 const CCValAssign &VA, const SDLoc &DL) {
18663 MachineFunction &MF = DAG.getMachineFunction();
18664 MachineFrameInfo &MFI = MF.getFrameInfo();
18665 EVT LocVT = VA.getLocVT();
18666 EVT ValVT = VA.getValVT();
18667 EVT PtrVT = MVT::getIntegerVT(BitWidth: DAG.getDataLayout().getPointerSizeInBits(AS: 0));
18668 if (ValVT.isScalableVector()) {
18669 // When the value is a scalable vector, we save the pointer which points to
18670 // the scalable vector value in the stack. The ValVT will be the pointer
18671 // type, instead of the scalable vector type.
18672 ValVT = LocVT;
18673 }
18674 int FI = MFI.CreateFixedObject(Size: ValVT.getStoreSize(), SPOffset: VA.getLocMemOffset(),
18675 /*IsImmutable=*/true);
18676 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
18677 SDValue Val;
18678
18679 ISD::LoadExtType ExtType;
18680 switch (VA.getLocInfo()) {
18681 default:
18682 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18683 case CCValAssign::Full:
18684 case CCValAssign::Indirect:
18685 case CCValAssign::BCvt:
18686 ExtType = ISD::NON_EXTLOAD;
18687 break;
18688 }
18689 Val = DAG.getExtLoad(
18690 ExtType, dl: DL, VT: LocVT, Chain, Ptr: FIN,
18691 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI), MemVT: ValVT);
18692 return Val;
18693}
18694
18695static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
18696 const CCValAssign &VA,
18697 const CCValAssign &HiVA,
18698 const SDLoc &DL) {
18699 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
18700 "Unexpected VA");
18701 MachineFunction &MF = DAG.getMachineFunction();
18702 MachineFrameInfo &MFI = MF.getFrameInfo();
18703 MachineRegisterInfo &RegInfo = MF.getRegInfo();
18704
18705 assert(VA.isRegLoc() && "Expected register VA assignment");
18706
18707 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18708 RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: LoVReg);
18709 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
18710 SDValue Hi;
18711 if (HiVA.isMemLoc()) {
18712 // Second half of f64 is passed on the stack.
18713 int FI = MFI.CreateFixedObject(Size: 4, SPOffset: HiVA.getLocMemOffset(),
18714 /*IsImmutable=*/true);
18715 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
18716 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
18717 MachinePointerInfo::getFixedStack(MF, FI));
18718 } else {
18719 // Second half of f64 is passed in another GPR.
18720 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18721 RegInfo.addLiveIn(Reg: HiVA.getLocReg(), vreg: HiVReg);
18722 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
18723 }
18724 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
18725}
18726
18727// FastCC has less than 1% performance improvement for some particular
18728// benchmark. But theoretically, it may has benenfit for some cases.
18729bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
18730 unsigned ValNo, MVT ValVT, MVT LocVT,
18731 CCValAssign::LocInfo LocInfo,
18732 ISD::ArgFlagsTy ArgFlags, CCState &State,
18733 bool IsFixed, bool IsRet, Type *OrigTy,
18734 const RISCVTargetLowering &TLI,
18735 RVVArgDispatcher &RVVDispatcher) {
18736 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
18737 if (unsigned Reg = State.AllocateReg(Regs: getFastCCArgGPRs(ABI))) {
18738 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
18739 return false;
18740 }
18741 }
18742
18743 const RISCVSubtarget &Subtarget = TLI.getSubtarget();
18744
18745 if (LocVT == MVT::f16 &&
18746 (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {
18747 static const MCPhysReg FPR16List[] = {
18748 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
18749 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
18750 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,
18751 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
18752 if (unsigned Reg = State.AllocateReg(FPR16List)) {
18753 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
18754 return false;
18755 }
18756 }
18757
18758 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
18759 static const MCPhysReg FPR32List[] = {
18760 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
18761 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
18762 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
18763 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
18764 if (unsigned Reg = State.AllocateReg(FPR32List)) {
18765 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
18766 return false;
18767 }
18768 }
18769
18770 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
18771 static const MCPhysReg FPR64List[] = {
18772 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
18773 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
18774 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
18775 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
18776 if (unsigned Reg = State.AllocateReg(FPR64List)) {
18777 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
18778 return false;
18779 }
18780 }
18781
18782 // Check if there is an available GPR before hitting the stack.
18783 if ((LocVT == MVT::f16 &&
18784 (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) ||
18785 (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
18786 (LocVT == MVT::f64 && Subtarget.is64Bit() &&
18787 Subtarget.hasStdExtZdinx())) {
18788 if (unsigned Reg = State.AllocateReg(Regs: getFastCCArgGPRs(ABI))) {
18789 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
18790 return false;
18791 }
18792 }
18793
18794 if (LocVT == MVT::f16) {
18795 unsigned Offset2 = State.AllocateStack(Size: 2, Alignment: Align(2));
18796 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: Offset2, LocVT, HTP: LocInfo));
18797 return false;
18798 }
18799
18800 if (LocVT == MVT::i32 || LocVT == MVT::f32) {
18801 unsigned Offset4 = State.AllocateStack(Size: 4, Alignment: Align(4));
18802 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: Offset4, LocVT, HTP: LocInfo));
18803 return false;
18804 }
18805
18806 if (LocVT == MVT::i64 || LocVT == MVT::f64) {
18807 unsigned Offset5 = State.AllocateStack(Size: 8, Alignment: Align(8));
18808 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: Offset5, LocVT, HTP: LocInfo));
18809 return false;
18810 }
18811
18812 if (LocVT.isVector()) {
18813 MCPhysReg AllocatedVReg = RVVDispatcher.getNextPhysReg();
18814 if (AllocatedVReg) {
18815 // Fixed-length vectors are located in the corresponding scalable-vector
18816 // container types.
18817 if (ValVT.isFixedLengthVector())
18818 LocVT = TLI.getContainerForFixedLengthVector(VT: LocVT);
18819 State.addLoc(
18820 V: CCValAssign::getReg(ValNo, ValVT, RegNo: AllocatedVReg, LocVT, HTP: LocInfo));
18821 } else {
18822 // Try and pass the address via a "fast" GPR.
18823 if (unsigned GPRReg = State.AllocateReg(Regs: getFastCCArgGPRs(ABI))) {
18824 LocInfo = CCValAssign::Indirect;
18825 LocVT = TLI.getSubtarget().getXLenVT();
18826 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: GPRReg, LocVT, HTP: LocInfo));
18827 } else if (ValVT.isFixedLengthVector()) {
18828 auto StackAlign =
18829 MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
18830 unsigned StackOffset =
18831 State.AllocateStack(Size: ValVT.getStoreSize(), Alignment: StackAlign);
18832 State.addLoc(
18833 V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
18834 } else {
18835 // Can't pass scalable vectors on the stack.
18836 return true;
18837 }
18838 }
18839
18840 return false;
18841 }
18842
18843 return true; // CC didn't match.
18844}
18845
18846bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
18847 CCValAssign::LocInfo LocInfo,
18848 ISD::ArgFlagsTy ArgFlags, CCState &State) {
18849 if (ArgFlags.isNest()) {
18850 report_fatal_error(
18851 reason: "Attribute 'nest' is not supported in GHC calling convention");
18852 }
18853
18854 static const MCPhysReg GPRList[] = {
18855 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
18856 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
18857
18858 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
18859 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
18860 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
18861 if (unsigned Reg = State.AllocateReg(GPRList)) {
18862 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
18863 return false;
18864 }
18865 }
18866
18867 const RISCVSubtarget &Subtarget =
18868 State.getMachineFunction().getSubtarget<RISCVSubtarget>();
18869
18870 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
18871 // Pass in STG registers: F1, ..., F6
18872 // fs0 ... fs5
18873 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
18874 RISCV::F18_F, RISCV::F19_F,
18875 RISCV::F20_F, RISCV::F21_F};
18876 if (unsigned Reg = State.AllocateReg(FPR32List)) {
18877 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
18878 return false;
18879 }
18880 }
18881
18882 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
18883 // Pass in STG registers: D1, ..., D6
18884 // fs6 ... fs11
18885 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
18886 RISCV::F24_D, RISCV::F25_D,
18887 RISCV::F26_D, RISCV::F27_D};
18888 if (unsigned Reg = State.AllocateReg(FPR64List)) {
18889 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
18890 return false;
18891 }
18892 }
18893
18894 if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
18895 (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
18896 Subtarget.is64Bit())) {
18897 if (unsigned Reg = State.AllocateReg(GPRList)) {
18898 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
18899 return false;
18900 }
18901 }
18902
18903 report_fatal_error(reason: "No registers left in GHC calling convention");
18904 return true;
18905}
18906
18907// Transform physical registers into virtual registers.
18908SDValue RISCVTargetLowering::LowerFormalArguments(
18909 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
18910 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
18911 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
18912
18913 MachineFunction &MF = DAG.getMachineFunction();
18914
18915 switch (CallConv) {
18916 default:
18917 report_fatal_error(reason: "Unsupported calling convention");
18918 case CallingConv::C:
18919 case CallingConv::Fast:
18920 case CallingConv::SPIR_KERNEL:
18921 case CallingConv::GRAAL:
18922 case CallingConv::RISCV_VectorCall:
18923 break;
18924 case CallingConv::GHC:
18925 if (Subtarget.hasStdExtE())
18926 report_fatal_error(reason: "GHC calling convention is not supported on RVE!");
18927 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
18928 report_fatal_error(reason: "GHC calling convention requires the (Zfinx/F) and "
18929 "(Zdinx/D) instruction set extensions");
18930 }
18931
18932 const Function &Func = MF.getFunction();
18933 if (Func.hasFnAttribute(Kind: "interrupt")) {
18934 if (!Func.arg_empty())
18935 report_fatal_error(
18936 reason: "Functions with the interrupt attribute cannot have arguments!");
18937
18938 StringRef Kind =
18939 MF.getFunction().getFnAttribute(Kind: "interrupt").getValueAsString();
18940
18941 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
18942 report_fatal_error(
18943 reason: "Function interrupt attribute argument not supported!");
18944 }
18945
18946 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
18947 MVT XLenVT = Subtarget.getXLenVT();
18948 unsigned XLenInBytes = Subtarget.getXLen() / 8;
18949 // Used with vargs to acumulate store chains.
18950 std::vector<SDValue> OutChains;
18951
18952 // Assign locations to all of the incoming arguments.
18953 SmallVector<CCValAssign, 16> ArgLocs;
18954 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
18955
18956 if (CallConv == CallingConv::GHC)
18957 CCInfo.AnalyzeFormalArguments(Ins, Fn: RISCV::CC_RISCV_GHC);
18958 else
18959 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
18960 Fn: CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC
18961 : RISCV::CC_RISCV);
18962
18963 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
18964 CCValAssign &VA = ArgLocs[i];
18965 SDValue ArgValue;
18966 // Passing f64 on RV32D with a soft float ABI must be handled as a special
18967 // case.
18968 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
18969 assert(VA.needsCustom());
18970 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, HiVA: ArgLocs[++i], DL);
18971 } else if (VA.isRegLoc())
18972 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, In: Ins[InsIdx], TLI: *this);
18973 else
18974 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
18975
18976 if (VA.getLocInfo() == CCValAssign::Indirect) {
18977 // If the original argument was split and passed by reference (e.g. i128
18978 // on RV32), we need to load all parts of it here (using the same
18979 // address). Vectors may be partly split to registers and partly to the
18980 // stack, in which case the base address is partly offset and subsequent
18981 // stores are relative to that.
18982 InVals.push_back(Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain, Ptr: ArgValue,
18983 PtrInfo: MachinePointerInfo()));
18984 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
18985 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
18986 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
18987 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
18988 CCValAssign &PartVA = ArgLocs[i + 1];
18989 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
18990 SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL);
18991 if (PartVA.getValVT().isScalableVector())
18992 Offset = DAG.getNode(Opcode: ISD::VSCALE, DL, VT: XLenVT, Operand: Offset);
18993 SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: ArgValue, N2: Offset);
18994 InVals.push_back(Elt: DAG.getLoad(VT: PartVA.getValVT(), dl: DL, Chain, Ptr: Address,
18995 PtrInfo: MachinePointerInfo()));
18996 ++i;
18997 ++InsIdx;
18998 }
18999 continue;
19000 }
19001 InVals.push_back(Elt: ArgValue);
19002 }
19003
19004 if (any_of(Range&: ArgLocs,
19005 P: [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19006 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19007
19008 if (IsVarArg) {
19009 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(ABI: Subtarget.getTargetABI());
19010 unsigned Idx = CCInfo.getFirstUnallocated(Regs: ArgRegs);
19011 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
19012 MachineFrameInfo &MFI = MF.getFrameInfo();
19013 MachineRegisterInfo &RegInfo = MF.getRegInfo();
19014 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
19015
19016 // Size of the vararg save area. For now, the varargs save area is either
19017 // zero or large enough to hold a0-a7.
19018 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
19019 int FI;
19020
19021 // If all registers are allocated, then all varargs must be passed on the
19022 // stack and we don't need to save any argregs.
19023 if (VarArgsSaveSize == 0) {
19024 int VaArgOffset = CCInfo.getStackSize();
19025 FI = MFI.CreateFixedObject(Size: XLenInBytes, SPOffset: VaArgOffset, IsImmutable: true);
19026 } else {
19027 int VaArgOffset = -VarArgsSaveSize;
19028 FI = MFI.CreateFixedObject(Size: VarArgsSaveSize, SPOffset: VaArgOffset, IsImmutable: true);
19029
19030 // If saving an odd number of registers then create an extra stack slot to
19031 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
19032 // offsets to even-numbered registered remain 2*XLEN-aligned.
19033 if (Idx % 2) {
19034 MFI.CreateFixedObject(
19035 Size: XLenInBytes, SPOffset: VaArgOffset - static_cast<int>(XLenInBytes), IsImmutable: true);
19036 VarArgsSaveSize += XLenInBytes;
19037 }
19038
19039 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
19040
19041 // Copy the integer registers that may have been used for passing varargs
19042 // to the vararg save area.
19043 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
19044 const Register Reg = RegInfo.createVirtualRegister(RegClass: RC);
19045 RegInfo.addLiveIn(Reg: ArgRegs[I], vreg: Reg);
19046 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT: XLenVT);
19047 SDValue Store = DAG.getStore(
19048 Chain, dl: DL, Val: ArgValue, Ptr: FIN,
19049 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI, Offset: (I - Idx) * XLenInBytes));
19050 OutChains.push_back(x: Store);
19051 FIN =
19052 DAG.getMemBasePlusOffset(Base: FIN, Offset: TypeSize::getFixed(ExactSize: XLenInBytes), DL);
19053 }
19054 }
19055
19056 // Record the frame index of the first variable argument
19057 // which is a value necessary to VASTART.
19058 RVFI->setVarArgsFrameIndex(FI);
19059 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
19060 }
19061
19062 // All stores are grouped in one node to allow the matching between
19063 // the size of Ins and InVals. This only happens for vararg functions.
19064 if (!OutChains.empty()) {
19065 OutChains.push_back(x: Chain);
19066 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
19067 }
19068
19069 return Chain;
19070}
19071
19072/// isEligibleForTailCallOptimization - Check whether the call is eligible
19073/// for tail call optimization.
19074/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
19075bool RISCVTargetLowering::isEligibleForTailCallOptimization(
19076 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
19077 const SmallVector<CCValAssign, 16> &ArgLocs) const {
19078
19079 auto CalleeCC = CLI.CallConv;
19080 auto &Outs = CLI.Outs;
19081 auto &Caller = MF.getFunction();
19082 auto CallerCC = Caller.getCallingConv();
19083
19084 // Exception-handling functions need a special set of instructions to
19085 // indicate a return to the hardware. Tail-calling another function would
19086 // probably break this.
19087 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
19088 // should be expanded as new function attributes are introduced.
19089 if (Caller.hasFnAttribute(Kind: "interrupt"))
19090 return false;
19091
19092 // Do not tail call opt if the stack is used to pass parameters.
19093 if (CCInfo.getStackSize() != 0)
19094 return false;
19095
19096 // Do not tail call opt if any parameters need to be passed indirectly.
19097 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
19098 // passed indirectly. So the address of the value will be passed in a
19099 // register, or if not available, then the address is put on the stack. In
19100 // order to pass indirectly, space on the stack often needs to be allocated
19101 // in order to store the value. In this case the CCInfo.getNextStackOffset()
19102 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
19103 // are passed CCValAssign::Indirect.
19104 for (auto &VA : ArgLocs)
19105 if (VA.getLocInfo() == CCValAssign::Indirect)
19106 return false;
19107
19108 // Do not tail call opt if either caller or callee uses struct return
19109 // semantics.
19110 auto IsCallerStructRet = Caller.hasStructRetAttr();
19111 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
19112 if (IsCallerStructRet || IsCalleeStructRet)
19113 return false;
19114
19115 // The callee has to preserve all registers the caller needs to preserve.
19116 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
19117 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
19118 if (CalleeCC != CallerCC) {
19119 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
19120 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
19121 return false;
19122 }
19123
19124 // Byval parameters hand the function a pointer directly into the stack area
19125 // we want to reuse during a tail call. Working around this *is* possible
19126 // but less efficient and uglier in LowerCall.
19127 for (auto &Arg : Outs)
19128 if (Arg.Flags.isByVal())
19129 return false;
19130
19131 return true;
19132}
19133
19134static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
19135 return DAG.getDataLayout().getPrefTypeAlign(
19136 Ty: VT.getTypeForEVT(Context&: *DAG.getContext()));
19137}
19138
19139// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
19140// and output parameter nodes.
19141SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
19142 SmallVectorImpl<SDValue> &InVals) const {
19143 SelectionDAG &DAG = CLI.DAG;
19144 SDLoc &DL = CLI.DL;
19145 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
19146 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
19147 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
19148 SDValue Chain = CLI.Chain;
19149 SDValue Callee = CLI.Callee;
19150 bool &IsTailCall = CLI.IsTailCall;
19151 CallingConv::ID CallConv = CLI.CallConv;
19152 bool IsVarArg = CLI.IsVarArg;
19153 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
19154 MVT XLenVT = Subtarget.getXLenVT();
19155
19156 MachineFunction &MF = DAG.getMachineFunction();
19157
19158 // Analyze the operands of the call, assigning locations to each operand.
19159 SmallVector<CCValAssign, 16> ArgLocs;
19160 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19161
19162 if (CallConv == CallingConv::GHC) {
19163 if (Subtarget.hasStdExtE())
19164 report_fatal_error(reason: "GHC calling convention is not supported on RVE!");
19165 ArgCCInfo.AnalyzeCallOperands(Outs, Fn: RISCV::CC_RISCV_GHC);
19166 } else
19167 analyzeOutputArgs(MF, CCInfo&: ArgCCInfo, Outs, /*IsRet=*/false, CLI: &CLI,
19168 Fn: CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC
19169 : RISCV::CC_RISCV);
19170
19171 // Check if it's really possible to do a tail call.
19172 if (IsTailCall)
19173 IsTailCall = isEligibleForTailCallOptimization(CCInfo&: ArgCCInfo, CLI, MF, ArgLocs);
19174
19175 if (IsTailCall)
19176 ++NumTailCalls;
19177 else if (CLI.CB && CLI.CB->isMustTailCall())
19178 report_fatal_error(reason: "failed to perform tail call elimination on a call "
19179 "site marked musttail");
19180
19181 // Get a count of how many bytes are to be pushed on the stack.
19182 unsigned NumBytes = ArgCCInfo.getStackSize();
19183
19184 // Create local copies for byval args
19185 SmallVector<SDValue, 8> ByValArgs;
19186 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19187 ISD::ArgFlagsTy Flags = Outs[i].Flags;
19188 if (!Flags.isByVal())
19189 continue;
19190
19191 SDValue Arg = OutVals[i];
19192 unsigned Size = Flags.getByValSize();
19193 Align Alignment = Flags.getNonZeroByValAlign();
19194
19195 int FI =
19196 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/isSpillSlot: false);
19197 SDValue FIPtr = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
19198 SDValue SizeNode = DAG.getConstant(Val: Size, DL, VT: XLenVT);
19199
19200 Chain = DAG.getMemcpy(Chain, dl: DL, Dst: FIPtr, Src: Arg, Size: SizeNode, Alignment,
19201 /*IsVolatile=*/isVol: false,
19202 /*AlwaysInline=*/false, isTailCall: IsTailCall,
19203 DstPtrInfo: MachinePointerInfo(), SrcPtrInfo: MachinePointerInfo());
19204 ByValArgs.push_back(Elt: FIPtr);
19205 }
19206
19207 if (!IsTailCall)
19208 Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL: CLI.DL);
19209
19210 // Copy argument values to their designated locations.
19211 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
19212 SmallVector<SDValue, 8> MemOpChains;
19213 SDValue StackPtr;
19214 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
19215 ++i, ++OutIdx) {
19216 CCValAssign &VA = ArgLocs[i];
19217 SDValue ArgValue = OutVals[OutIdx];
19218 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
19219
19220 // Handle passing f64 on RV32D with a soft float ABI as a special case.
19221 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19222 assert(VA.isRegLoc() && "Expected register VA assignment");
19223 assert(VA.needsCustom());
19224 SDValue SplitF64 = DAG.getNode(
19225 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
19226 SDValue Lo = SplitF64.getValue(R: 0);
19227 SDValue Hi = SplitF64.getValue(R: 1);
19228
19229 Register RegLo = VA.getLocReg();
19230 RegsToPass.push_back(Elt: std::make_pair(x&: RegLo, y&: Lo));
19231
19232 // Get the CCValAssign for the Hi part.
19233 CCValAssign &HiVA = ArgLocs[++i];
19234
19235 if (HiVA.isMemLoc()) {
19236 // Second half of f64 is passed on the stack.
19237 if (!StackPtr.getNode())
19238 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19239 SDValue Address =
19240 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr,
19241 N2: DAG.getIntPtrConstant(Val: HiVA.getLocMemOffset(), DL));
19242 // Emit the store.
19243 MemOpChains.push_back(
19244 Elt: DAG.getStore(Chain, dl: DL, Val: Hi, Ptr: Address, PtrInfo: MachinePointerInfo()));
19245 } else {
19246 // Second half of f64 is passed in another GPR.
19247 Register RegHigh = HiVA.getLocReg();
19248 RegsToPass.push_back(Elt: std::make_pair(x&: RegHigh, y&: Hi));
19249 }
19250 continue;
19251 }
19252
19253 // Promote the value if needed.
19254 // For now, only handle fully promoted and indirect arguments.
19255 if (VA.getLocInfo() == CCValAssign::Indirect) {
19256 // Store the argument in a stack slot and pass its address.
19257 Align StackAlign =
19258 std::max(a: getPrefTypeAlign(VT: Outs[OutIdx].ArgVT, DAG),
19259 b: getPrefTypeAlign(VT: ArgValue.getValueType(), DAG));
19260 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
19261 // If the original argument was split (e.g. i128), we need
19262 // to store the required parts of it here (and pass just one address).
19263 // Vectors may be partly split to registers and partly to the stack, in
19264 // which case the base address is partly offset and subsequent stores are
19265 // relative to that.
19266 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
19267 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
19268 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19269 // Calculate the total size to store. We don't have access to what we're
19270 // actually storing other than performing the loop and collecting the
19271 // info.
19272 SmallVector<std::pair<SDValue, SDValue>> Parts;
19273 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
19274 SDValue PartValue = OutVals[OutIdx + 1];
19275 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
19276 SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL);
19277 EVT PartVT = PartValue.getValueType();
19278 if (PartVT.isScalableVector())
19279 Offset = DAG.getNode(Opcode: ISD::VSCALE, DL, VT: XLenVT, Operand: Offset);
19280 StoredSize += PartVT.getStoreSize();
19281 StackAlign = std::max(a: StackAlign, b: getPrefTypeAlign(VT: PartVT, DAG));
19282 Parts.push_back(Elt: std::make_pair(x&: PartValue, y&: Offset));
19283 ++i;
19284 ++OutIdx;
19285 }
19286 SDValue SpillSlot = DAG.CreateStackTemporary(Bytes: StoredSize, Alignment: StackAlign);
19287 int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex();
19288 MemOpChains.push_back(
19289 Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: SpillSlot,
19290 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
19291 for (const auto &Part : Parts) {
19292 SDValue PartValue = Part.first;
19293 SDValue PartOffset = Part.second;
19294 SDValue Address =
19295 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: SpillSlot, N2: PartOffset);
19296 MemOpChains.push_back(
19297 Elt: DAG.getStore(Chain, dl: DL, Val: PartValue, Ptr: Address,
19298 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
19299 }
19300 ArgValue = SpillSlot;
19301 } else {
19302 ArgValue = convertValVTToLocVT(DAG, Val: ArgValue, VA, DL, Subtarget);
19303 }
19304
19305 // Use local copy if it is a byval arg.
19306 if (Flags.isByVal())
19307 ArgValue = ByValArgs[j++];
19308
19309 if (VA.isRegLoc()) {
19310 // Queue up the argument copies and emit them at the end.
19311 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgValue));
19312 } else {
19313 assert(VA.isMemLoc() && "Argument not register or memory");
19314 assert(!IsTailCall && "Tail call not allowed if stack is used "
19315 "for passing parameters");
19316
19317 // Work out the address of the stack slot.
19318 if (!StackPtr.getNode())
19319 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19320 SDValue Address =
19321 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr,
19322 N2: DAG.getIntPtrConstant(Val: VA.getLocMemOffset(), DL));
19323
19324 // Emit the store.
19325 MemOpChains.push_back(
19326 Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: Address, PtrInfo: MachinePointerInfo()));
19327 }
19328 }
19329
19330 // Join the stores, which are independent of one another.
19331 if (!MemOpChains.empty())
19332 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
19333
19334 SDValue Glue;
19335
19336 // Build a sequence of copy-to-reg nodes, chained and glued together.
19337 for (auto &Reg : RegsToPass) {
19338 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: Reg.first, N: Reg.second, Glue);
19339 Glue = Chain.getValue(R: 1);
19340 }
19341
19342 // Validate that none of the argument registers have been marked as
19343 // reserved, if so report an error. Do the same for the return address if this
19344 // is not a tailcall.
19345 validateCCReservedRegs(Regs: RegsToPass, MF);
19346 if (!IsTailCall &&
19347 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
19348 MF.getFunction().getContext().diagnose(DI: DiagnosticInfoUnsupported{
19349 MF.getFunction(),
19350 "Return address register required, but has been reserved."});
19351
19352 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
19353 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
19354 // split it and then direct call can be matched by PseudoCALL.
19355 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) {
19356 const GlobalValue *GV = S->getGlobal();
19357 Callee = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: 0, TargetFlags: RISCVII::MO_CALL);
19358 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
19359 Callee = DAG.getTargetExternalSymbol(Sym: S->getSymbol(), VT: PtrVT, TargetFlags: RISCVII::MO_CALL);
19360 }
19361
19362 // The first call operand is the chain and the second is the target address.
19363 SmallVector<SDValue, 8> Ops;
19364 Ops.push_back(Elt: Chain);
19365 Ops.push_back(Elt: Callee);
19366
19367 // Add argument registers to the end of the list so that they are
19368 // known live into the call.
19369 for (auto &Reg : RegsToPass)
19370 Ops.push_back(Elt: DAG.getRegister(Reg: Reg.first, VT: Reg.second.getValueType()));
19371
19372 if (!IsTailCall) {
19373 // Add a register mask operand representing the call-preserved registers.
19374 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
19375 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
19376 assert(Mask && "Missing call preserved mask for calling convention");
19377 Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
19378 }
19379
19380 // Glue the call to the argument copies, if any.
19381 if (Glue.getNode())
19382 Ops.push_back(Elt: Glue);
19383
19384 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
19385 "Unexpected CFI type for a direct call");
19386
19387 // Emit the call.
19388 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
19389
19390 if (IsTailCall) {
19391 MF.getFrameInfo().setHasTailCall();
19392 SDValue Ret = DAG.getNode(Opcode: RISCVISD::TAIL, DL, VTList: NodeTys, Ops);
19393 if (CLI.CFIType)
19394 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19395 DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge);
19396 return Ret;
19397 }
19398
19399 Chain = DAG.getNode(Opcode: RISCVISD::CALL, DL, VTList: NodeTys, Ops);
19400 if (CLI.CFIType)
19401 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19402 DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge);
19403 Glue = Chain.getValue(R: 1);
19404
19405 // Mark the end of the call, which is glued to the call itself.
19406 Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: 0, Glue, DL);
19407 Glue = Chain.getValue(R: 1);
19408
19409 // Assign locations to each value returned by this call.
19410 SmallVector<CCValAssign, 16> RVLocs;
19411 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
19412 analyzeInputArgs(MF, CCInfo&: RetCCInfo, Ins, /*IsRet=*/true, Fn: RISCV::CC_RISCV);
19413
19414 // Copy all of the result registers out of their specified physreg.
19415 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
19416 auto &VA = RVLocs[i];
19417 // Copy the value out
19418 SDValue RetValue =
19419 DAG.getCopyFromReg(Chain, dl: DL, Reg: VA.getLocReg(), VT: VA.getLocVT(), Glue);
19420 // Glue the RetValue to the end of the call sequence
19421 Chain = RetValue.getValue(R: 1);
19422 Glue = RetValue.getValue(R: 2);
19423
19424 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19425 assert(VA.needsCustom());
19426 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
19427 MVT::i32, Glue);
19428 Chain = RetValue2.getValue(R: 1);
19429 Glue = RetValue2.getValue(R: 2);
19430 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
19431 RetValue2);
19432 }
19433
19434 RetValue = convertLocVTToValVT(DAG, Val: RetValue, VA, DL, Subtarget);
19435
19436 InVals.push_back(Elt: RetValue);
19437 }
19438
19439 return Chain;
19440}
19441
19442bool RISCVTargetLowering::CanLowerReturn(
19443 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
19444 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
19445 SmallVector<CCValAssign, 16> RVLocs;
19446 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
19447
19448 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(Outs)};
19449
19450 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19451 MVT VT = Outs[i].VT;
19452 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19453 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
19454 if (RISCV::CC_RISCV(DL: MF.getDataLayout(), ABI, ValNo: i, ValVT: VT, LocVT: VT, LocInfo: CCValAssign::Full,
19455 ArgFlags, State&: CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
19456 OrigTy: nullptr, TLI: *this, RVVDispatcher&: Dispatcher))
19457 return false;
19458 }
19459 return true;
19460}
19461
19462SDValue
19463RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
19464 bool IsVarArg,
19465 const SmallVectorImpl<ISD::OutputArg> &Outs,
19466 const SmallVectorImpl<SDValue> &OutVals,
19467 const SDLoc &DL, SelectionDAG &DAG) const {
19468 MachineFunction &MF = DAG.getMachineFunction();
19469 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19470
19471 // Stores the assignment of the return value to a location.
19472 SmallVector<CCValAssign, 16> RVLocs;
19473
19474 // Info about the registers and stack slot.
19475 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
19476 *DAG.getContext());
19477
19478 analyzeOutputArgs(MF&: DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
19479 CLI: nullptr, Fn: RISCV::CC_RISCV);
19480
19481 if (CallConv == CallingConv::GHC && !RVLocs.empty())
19482 report_fatal_error(reason: "GHC functions return void only");
19483
19484 SDValue Glue;
19485 SmallVector<SDValue, 4> RetOps(1, Chain);
19486
19487 // Copy the result values into the output registers.
19488 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
19489 SDValue Val = OutVals[OutIdx];
19490 CCValAssign &VA = RVLocs[i];
19491 assert(VA.isRegLoc() && "Can only return in registers!");
19492
19493 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19494 // Handle returning f64 on RV32D with a soft float ABI.
19495 assert(VA.isRegLoc() && "Expected return via registers");
19496 assert(VA.needsCustom());
19497 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
19498 DAG.getVTList(MVT::i32, MVT::i32), Val);
19499 SDValue Lo = SplitF64.getValue(R: 0);
19500 SDValue Hi = SplitF64.getValue(R: 1);
19501 Register RegLo = VA.getLocReg();
19502 Register RegHi = RVLocs[++i].getLocReg();
19503
19504 if (STI.isRegisterReservedByUser(i: RegLo) ||
19505 STI.isRegisterReservedByUser(i: RegHi))
19506 MF.getFunction().getContext().diagnose(DI: DiagnosticInfoUnsupported{
19507 MF.getFunction(),
19508 "Return value register required, but has been reserved."});
19509
19510 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegLo, N: Lo, Glue);
19511 Glue = Chain.getValue(R: 1);
19512 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
19513 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegHi, N: Hi, Glue);
19514 Glue = Chain.getValue(R: 1);
19515 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
19516 } else {
19517 // Handle a 'normal' return.
19518 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
19519 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: Val, Glue);
19520
19521 if (STI.isRegisterReservedByUser(i: VA.getLocReg()))
19522 MF.getFunction().getContext().diagnose(DI: DiagnosticInfoUnsupported{
19523 MF.getFunction(),
19524 "Return value register required, but has been reserved."});
19525
19526 // Guarantee that all emitted copies are stuck together.
19527 Glue = Chain.getValue(R: 1);
19528 RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
19529 }
19530 }
19531
19532 RetOps[0] = Chain; // Update chain.
19533
19534 // Add the glue node if we have it.
19535 if (Glue.getNode()) {
19536 RetOps.push_back(Elt: Glue);
19537 }
19538
19539 if (any_of(Range&: RVLocs,
19540 P: [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19541 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19542
19543 unsigned RetOpc = RISCVISD::RET_GLUE;
19544 // Interrupt service routines use different return instructions.
19545 const Function &Func = DAG.getMachineFunction().getFunction();
19546 if (Func.hasFnAttribute(Kind: "interrupt")) {
19547 if (!Func.getReturnType()->isVoidTy())
19548 report_fatal_error(
19549 reason: "Functions with the interrupt attribute must have void return type!");
19550
19551 MachineFunction &MF = DAG.getMachineFunction();
19552 StringRef Kind =
19553 MF.getFunction().getFnAttribute(Kind: "interrupt").getValueAsString();
19554
19555 if (Kind == "supervisor")
19556 RetOpc = RISCVISD::SRET_GLUE;
19557 else
19558 RetOpc = RISCVISD::MRET_GLUE;
19559 }
19560
19561 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
19562}
19563
19564void RISCVTargetLowering::validateCCReservedRegs(
19565 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
19566 MachineFunction &MF) const {
19567 const Function &F = MF.getFunction();
19568 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19569
19570 if (llvm::any_of(Range: Regs, P: [&STI](auto Reg) {
19571 return STI.isRegisterReservedByUser(i: Reg.first);
19572 }))
19573 F.getContext().diagnose(DI: DiagnosticInfoUnsupported{
19574 F, "Argument register required, but has been reserved."});
19575}
19576
19577// Check if the result of the node is only used as a return value, as
19578// otherwise we can't perform a tail-call.
19579bool RISCVTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
19580 if (N->getNumValues() != 1)
19581 return false;
19582 if (!N->hasNUsesOfValue(NUses: 1, Value: 0))
19583 return false;
19584
19585 SDNode *Copy = *N->use_begin();
19586
19587 if (Copy->getOpcode() == ISD::BITCAST) {
19588 return isUsedByReturnOnly(N: Copy, Chain);
19589 }
19590
19591 // TODO: Handle additional opcodes in order to support tail-calling libcalls
19592 // with soft float ABIs.
19593 if (Copy->getOpcode() != ISD::CopyToReg) {
19594 return false;
19595 }
19596
19597 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
19598 // isn't safe to perform a tail call.
19599 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
19600 return false;
19601
19602 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
19603 bool HasRet = false;
19604 for (SDNode *Node : Copy->uses()) {
19605 if (Node->getOpcode() != RISCVISD::RET_GLUE)
19606 return false;
19607 HasRet = true;
19608 }
19609 if (!HasRet)
19610 return false;
19611
19612 Chain = Copy->getOperand(Num: 0);
19613 return true;
19614}
19615
19616bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
19617 return CI->isTailCall();
19618}
19619
19620const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
19621#define NODE_NAME_CASE(NODE) \
19622 case RISCVISD::NODE: \
19623 return "RISCVISD::" #NODE;
19624 // clang-format off
19625 switch ((RISCVISD::NodeType)Opcode) {
19626 case RISCVISD::FIRST_NUMBER:
19627 break;
19628 NODE_NAME_CASE(RET_GLUE)
19629 NODE_NAME_CASE(SRET_GLUE)
19630 NODE_NAME_CASE(MRET_GLUE)
19631 NODE_NAME_CASE(CALL)
19632 NODE_NAME_CASE(SELECT_CC)
19633 NODE_NAME_CASE(BR_CC)
19634 NODE_NAME_CASE(BuildPairF64)
19635 NODE_NAME_CASE(SplitF64)
19636 NODE_NAME_CASE(TAIL)
19637 NODE_NAME_CASE(ADD_LO)
19638 NODE_NAME_CASE(HI)
19639 NODE_NAME_CASE(LLA)
19640 NODE_NAME_CASE(ADD_TPREL)
19641 NODE_NAME_CASE(MULHSU)
19642 NODE_NAME_CASE(SHL_ADD)
19643 NODE_NAME_CASE(SLLW)
19644 NODE_NAME_CASE(SRAW)
19645 NODE_NAME_CASE(SRLW)
19646 NODE_NAME_CASE(DIVW)
19647 NODE_NAME_CASE(DIVUW)
19648 NODE_NAME_CASE(REMUW)
19649 NODE_NAME_CASE(ROLW)
19650 NODE_NAME_CASE(RORW)
19651 NODE_NAME_CASE(CLZW)
19652 NODE_NAME_CASE(CTZW)
19653 NODE_NAME_CASE(ABSW)
19654 NODE_NAME_CASE(FMV_H_X)
19655 NODE_NAME_CASE(FMV_X_ANYEXTH)
19656 NODE_NAME_CASE(FMV_X_SIGNEXTH)
19657 NODE_NAME_CASE(FMV_W_X_RV64)
19658 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
19659 NODE_NAME_CASE(FCVT_X)
19660 NODE_NAME_CASE(FCVT_XU)
19661 NODE_NAME_CASE(FCVT_W_RV64)
19662 NODE_NAME_CASE(FCVT_WU_RV64)
19663 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
19664 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
19665 NODE_NAME_CASE(FP_ROUND_BF16)
19666 NODE_NAME_CASE(FP_EXTEND_BF16)
19667 NODE_NAME_CASE(FROUND)
19668 NODE_NAME_CASE(FCLASS)
19669 NODE_NAME_CASE(FMAX)
19670 NODE_NAME_CASE(FMIN)
19671 NODE_NAME_CASE(READ_COUNTER_WIDE)
19672 NODE_NAME_CASE(BREV8)
19673 NODE_NAME_CASE(ORC_B)
19674 NODE_NAME_CASE(ZIP)
19675 NODE_NAME_CASE(UNZIP)
19676 NODE_NAME_CASE(CLMUL)
19677 NODE_NAME_CASE(CLMULH)
19678 NODE_NAME_CASE(CLMULR)
19679 NODE_NAME_CASE(MOPR)
19680 NODE_NAME_CASE(MOPRR)
19681 NODE_NAME_CASE(SHA256SIG0)
19682 NODE_NAME_CASE(SHA256SIG1)
19683 NODE_NAME_CASE(SHA256SUM0)
19684 NODE_NAME_CASE(SHA256SUM1)
19685 NODE_NAME_CASE(SM4KS)
19686 NODE_NAME_CASE(SM4ED)
19687 NODE_NAME_CASE(SM3P0)
19688 NODE_NAME_CASE(SM3P1)
19689 NODE_NAME_CASE(TH_LWD)
19690 NODE_NAME_CASE(TH_LWUD)
19691 NODE_NAME_CASE(TH_LDD)
19692 NODE_NAME_CASE(TH_SWD)
19693 NODE_NAME_CASE(TH_SDD)
19694 NODE_NAME_CASE(VMV_V_V_VL)
19695 NODE_NAME_CASE(VMV_V_X_VL)
19696 NODE_NAME_CASE(VFMV_V_F_VL)
19697 NODE_NAME_CASE(VMV_X_S)
19698 NODE_NAME_CASE(VMV_S_X_VL)
19699 NODE_NAME_CASE(VFMV_S_F_VL)
19700 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
19701 NODE_NAME_CASE(READ_VLENB)
19702 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
19703 NODE_NAME_CASE(VSLIDEUP_VL)
19704 NODE_NAME_CASE(VSLIDE1UP_VL)
19705 NODE_NAME_CASE(VSLIDEDOWN_VL)
19706 NODE_NAME_CASE(VSLIDE1DOWN_VL)
19707 NODE_NAME_CASE(VFSLIDE1UP_VL)
19708 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
19709 NODE_NAME_CASE(VID_VL)
19710 NODE_NAME_CASE(VFNCVT_ROD_VL)
19711 NODE_NAME_CASE(VECREDUCE_ADD_VL)
19712 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
19713 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
19714 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
19715 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
19716 NODE_NAME_CASE(VECREDUCE_AND_VL)
19717 NODE_NAME_CASE(VECREDUCE_OR_VL)
19718 NODE_NAME_CASE(VECREDUCE_XOR_VL)
19719 NODE_NAME_CASE(VECREDUCE_FADD_VL)
19720 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
19721 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
19722 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
19723 NODE_NAME_CASE(ADD_VL)
19724 NODE_NAME_CASE(AND_VL)
19725 NODE_NAME_CASE(MUL_VL)
19726 NODE_NAME_CASE(OR_VL)
19727 NODE_NAME_CASE(SDIV_VL)
19728 NODE_NAME_CASE(SHL_VL)
19729 NODE_NAME_CASE(SREM_VL)
19730 NODE_NAME_CASE(SRA_VL)
19731 NODE_NAME_CASE(SRL_VL)
19732 NODE_NAME_CASE(ROTL_VL)
19733 NODE_NAME_CASE(ROTR_VL)
19734 NODE_NAME_CASE(SUB_VL)
19735 NODE_NAME_CASE(UDIV_VL)
19736 NODE_NAME_CASE(UREM_VL)
19737 NODE_NAME_CASE(XOR_VL)
19738 NODE_NAME_CASE(AVGFLOORU_VL)
19739 NODE_NAME_CASE(AVGCEILU_VL)
19740 NODE_NAME_CASE(SADDSAT_VL)
19741 NODE_NAME_CASE(UADDSAT_VL)
19742 NODE_NAME_CASE(SSUBSAT_VL)
19743 NODE_NAME_CASE(USUBSAT_VL)
19744 NODE_NAME_CASE(FADD_VL)
19745 NODE_NAME_CASE(FSUB_VL)
19746 NODE_NAME_CASE(FMUL_VL)
19747 NODE_NAME_CASE(FDIV_VL)
19748 NODE_NAME_CASE(FNEG_VL)
19749 NODE_NAME_CASE(FABS_VL)
19750 NODE_NAME_CASE(FSQRT_VL)
19751 NODE_NAME_CASE(FCLASS_VL)
19752 NODE_NAME_CASE(VFMADD_VL)
19753 NODE_NAME_CASE(VFNMADD_VL)
19754 NODE_NAME_CASE(VFMSUB_VL)
19755 NODE_NAME_CASE(VFNMSUB_VL)
19756 NODE_NAME_CASE(VFWMADD_VL)
19757 NODE_NAME_CASE(VFWNMADD_VL)
19758 NODE_NAME_CASE(VFWMSUB_VL)
19759 NODE_NAME_CASE(VFWNMSUB_VL)
19760 NODE_NAME_CASE(FCOPYSIGN_VL)
19761 NODE_NAME_CASE(SMIN_VL)
19762 NODE_NAME_CASE(SMAX_VL)
19763 NODE_NAME_CASE(UMIN_VL)
19764 NODE_NAME_CASE(UMAX_VL)
19765 NODE_NAME_CASE(BITREVERSE_VL)
19766 NODE_NAME_CASE(BSWAP_VL)
19767 NODE_NAME_CASE(CTLZ_VL)
19768 NODE_NAME_CASE(CTTZ_VL)
19769 NODE_NAME_CASE(CTPOP_VL)
19770 NODE_NAME_CASE(VFMIN_VL)
19771 NODE_NAME_CASE(VFMAX_VL)
19772 NODE_NAME_CASE(MULHS_VL)
19773 NODE_NAME_CASE(MULHU_VL)
19774 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
19775 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
19776 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
19777 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
19778 NODE_NAME_CASE(VFCVT_X_F_VL)
19779 NODE_NAME_CASE(VFCVT_XU_F_VL)
19780 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
19781 NODE_NAME_CASE(SINT_TO_FP_VL)
19782 NODE_NAME_CASE(UINT_TO_FP_VL)
19783 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
19784 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
19785 NODE_NAME_CASE(FP_EXTEND_VL)
19786 NODE_NAME_CASE(FP_ROUND_VL)
19787 NODE_NAME_CASE(STRICT_FADD_VL)
19788 NODE_NAME_CASE(STRICT_FSUB_VL)
19789 NODE_NAME_CASE(STRICT_FMUL_VL)
19790 NODE_NAME_CASE(STRICT_FDIV_VL)
19791 NODE_NAME_CASE(STRICT_FSQRT_VL)
19792 NODE_NAME_CASE(STRICT_VFMADD_VL)
19793 NODE_NAME_CASE(STRICT_VFNMADD_VL)
19794 NODE_NAME_CASE(STRICT_VFMSUB_VL)
19795 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
19796 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
19797 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
19798 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
19799 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
19800 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
19801 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
19802 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
19803 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
19804 NODE_NAME_CASE(STRICT_FSETCC_VL)
19805 NODE_NAME_CASE(STRICT_FSETCCS_VL)
19806 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
19807 NODE_NAME_CASE(VWMUL_VL)
19808 NODE_NAME_CASE(VWMULU_VL)
19809 NODE_NAME_CASE(VWMULSU_VL)
19810 NODE_NAME_CASE(VWADD_VL)
19811 NODE_NAME_CASE(VWADDU_VL)
19812 NODE_NAME_CASE(VWSUB_VL)
19813 NODE_NAME_CASE(VWSUBU_VL)
19814 NODE_NAME_CASE(VWADD_W_VL)
19815 NODE_NAME_CASE(VWADDU_W_VL)
19816 NODE_NAME_CASE(VWSUB_W_VL)
19817 NODE_NAME_CASE(VWSUBU_W_VL)
19818 NODE_NAME_CASE(VWSLL_VL)
19819 NODE_NAME_CASE(VFWMUL_VL)
19820 NODE_NAME_CASE(VFWADD_VL)
19821 NODE_NAME_CASE(VFWSUB_VL)
19822 NODE_NAME_CASE(VFWADD_W_VL)
19823 NODE_NAME_CASE(VFWSUB_W_VL)
19824 NODE_NAME_CASE(VWMACC_VL)
19825 NODE_NAME_CASE(VWMACCU_VL)
19826 NODE_NAME_CASE(VWMACCSU_VL)
19827 NODE_NAME_CASE(VNSRL_VL)
19828 NODE_NAME_CASE(SETCC_VL)
19829 NODE_NAME_CASE(VMERGE_VL)
19830 NODE_NAME_CASE(VMAND_VL)
19831 NODE_NAME_CASE(VMOR_VL)
19832 NODE_NAME_CASE(VMXOR_VL)
19833 NODE_NAME_CASE(VMCLR_VL)
19834 NODE_NAME_CASE(VMSET_VL)
19835 NODE_NAME_CASE(VRGATHER_VX_VL)
19836 NODE_NAME_CASE(VRGATHER_VV_VL)
19837 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
19838 NODE_NAME_CASE(VSEXT_VL)
19839 NODE_NAME_CASE(VZEXT_VL)
19840 NODE_NAME_CASE(VCPOP_VL)
19841 NODE_NAME_CASE(VFIRST_VL)
19842 NODE_NAME_CASE(READ_CSR)
19843 NODE_NAME_CASE(WRITE_CSR)
19844 NODE_NAME_CASE(SWAP_CSR)
19845 NODE_NAME_CASE(CZERO_EQZ)
19846 NODE_NAME_CASE(CZERO_NEZ)
19847 NODE_NAME_CASE(SF_VC_XV_SE)
19848 NODE_NAME_CASE(SF_VC_IV_SE)
19849 NODE_NAME_CASE(SF_VC_VV_SE)
19850 NODE_NAME_CASE(SF_VC_FV_SE)
19851 NODE_NAME_CASE(SF_VC_XVV_SE)
19852 NODE_NAME_CASE(SF_VC_IVV_SE)
19853 NODE_NAME_CASE(SF_VC_VVV_SE)
19854 NODE_NAME_CASE(SF_VC_FVV_SE)
19855 NODE_NAME_CASE(SF_VC_XVW_SE)
19856 NODE_NAME_CASE(SF_VC_IVW_SE)
19857 NODE_NAME_CASE(SF_VC_VVW_SE)
19858 NODE_NAME_CASE(SF_VC_FVW_SE)
19859 NODE_NAME_CASE(SF_VC_V_X_SE)
19860 NODE_NAME_CASE(SF_VC_V_I_SE)
19861 NODE_NAME_CASE(SF_VC_V_XV_SE)
19862 NODE_NAME_CASE(SF_VC_V_IV_SE)
19863 NODE_NAME_CASE(SF_VC_V_VV_SE)
19864 NODE_NAME_CASE(SF_VC_V_FV_SE)
19865 NODE_NAME_CASE(SF_VC_V_XVV_SE)
19866 NODE_NAME_CASE(SF_VC_V_IVV_SE)
19867 NODE_NAME_CASE(SF_VC_V_VVV_SE)
19868 NODE_NAME_CASE(SF_VC_V_FVV_SE)
19869 NODE_NAME_CASE(SF_VC_V_XVW_SE)
19870 NODE_NAME_CASE(SF_VC_V_IVW_SE)
19871 NODE_NAME_CASE(SF_VC_V_VVW_SE)
19872 NODE_NAME_CASE(SF_VC_V_FVW_SE)
19873 }
19874 // clang-format on
19875 return nullptr;
19876#undef NODE_NAME_CASE
19877}
19878
19879/// getConstraintType - Given a constraint letter, return the type of
19880/// constraint it is for this target.
19881RISCVTargetLowering::ConstraintType
19882RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
19883 if (Constraint.size() == 1) {
19884 switch (Constraint[0]) {
19885 default:
19886 break;
19887 case 'f':
19888 return C_RegisterClass;
19889 case 'I':
19890 case 'J':
19891 case 'K':
19892 return C_Immediate;
19893 case 'A':
19894 return C_Memory;
19895 case 's':
19896 case 'S': // A symbolic address
19897 return C_Other;
19898 }
19899 } else {
19900 if (Constraint == "vr" || Constraint == "vm")
19901 return C_RegisterClass;
19902 }
19903 return TargetLowering::getConstraintType(Constraint);
19904}
19905
19906std::pair<unsigned, const TargetRegisterClass *>
19907RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
19908 StringRef Constraint,
19909 MVT VT) const {
19910 // First, see if this is a constraint that directly corresponds to a RISC-V
19911 // register class.
19912 if (Constraint.size() == 1) {
19913 switch (Constraint[0]) {
19914 case 'r':
19915 // TODO: Support fixed vectors up to XLen for P extension?
19916 if (VT.isVector())
19917 break;
19918 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
19919 return std::make_pair(0U, &RISCV::GPRF16RegClass);
19920 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
19921 return std::make_pair(0U, &RISCV::GPRF32RegClass);
19922 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
19923 return std::make_pair(0U, &RISCV::GPRPairRegClass);
19924 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
19925 case 'f':
19926 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16)
19927 return std::make_pair(0U, &RISCV::FPR16RegClass);
19928 if (Subtarget.hasStdExtF() && VT == MVT::f32)
19929 return std::make_pair(0U, &RISCV::FPR32RegClass);
19930 if (Subtarget.hasStdExtD() && VT == MVT::f64)
19931 return std::make_pair(0U, &RISCV::FPR64RegClass);
19932 break;
19933 default:
19934 break;
19935 }
19936 } else if (Constraint == "vr") {
19937 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
19938 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
19939 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
19940 return std::make_pair(0U, RC);
19941 }
19942 } else if (Constraint == "vm") {
19943 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
19944 return std::make_pair(0U, &RISCV::VMV0RegClass);
19945 }
19946
19947 // Clang will correctly decode the usage of register name aliases into their
19948 // official names. However, other frontends like `rustc` do not. This allows
19949 // users of these frontends to use the ABI names for registers in LLVM-style
19950 // register constraints.
19951 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
19952 .Case("{zero}", RISCV::X0)
19953 .Case("{ra}", RISCV::X1)
19954 .Case("{sp}", RISCV::X2)
19955 .Case("{gp}", RISCV::X3)
19956 .Case("{tp}", RISCV::X4)
19957 .Case("{t0}", RISCV::X5)
19958 .Case("{t1}", RISCV::X6)
19959 .Case("{t2}", RISCV::X7)
19960 .Cases("{s0}", "{fp}", RISCV::X8)
19961 .Case("{s1}", RISCV::X9)
19962 .Case("{a0}", RISCV::X10)
19963 .Case("{a1}", RISCV::X11)
19964 .Case("{a2}", RISCV::X12)
19965 .Case("{a3}", RISCV::X13)
19966 .Case("{a4}", RISCV::X14)
19967 .Case("{a5}", RISCV::X15)
19968 .Case("{a6}", RISCV::X16)
19969 .Case("{a7}", RISCV::X17)
19970 .Case("{s2}", RISCV::X18)
19971 .Case("{s3}", RISCV::X19)
19972 .Case("{s4}", RISCV::X20)
19973 .Case("{s5}", RISCV::X21)
19974 .Case("{s6}", RISCV::X22)
19975 .Case("{s7}", RISCV::X23)
19976 .Case("{s8}", RISCV::X24)
19977 .Case("{s9}", RISCV::X25)
19978 .Case("{s10}", RISCV::X26)
19979 .Case("{s11}", RISCV::X27)
19980 .Case("{t3}", RISCV::X28)
19981 .Case("{t4}", RISCV::X29)
19982 .Case("{t5}", RISCV::X30)
19983 .Case("{t6}", RISCV::X31)
19984 .Default(RISCV::NoRegister);
19985 if (XRegFromAlias != RISCV::NoRegister)
19986 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
19987
19988 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
19989 // TableGen record rather than the AsmName to choose registers for InlineAsm
19990 // constraints, plus we want to match those names to the widest floating point
19991 // register type available, manually select floating point registers here.
19992 //
19993 // The second case is the ABI name of the register, so that frontends can also
19994 // use the ABI names in register constraint lists.
19995 if (Subtarget.hasStdExtF()) {
19996 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
19997 .Cases("{f0}", "{ft0}", RISCV::F0_F)
19998 .Cases("{f1}", "{ft1}", RISCV::F1_F)
19999 .Cases("{f2}", "{ft2}", RISCV::F2_F)
20000 .Cases("{f3}", "{ft3}", RISCV::F3_F)
20001 .Cases("{f4}", "{ft4}", RISCV::F4_F)
20002 .Cases("{f5}", "{ft5}", RISCV::F5_F)
20003 .Cases("{f6}", "{ft6}", RISCV::F6_F)
20004 .Cases("{f7}", "{ft7}", RISCV::F7_F)
20005 .Cases("{f8}", "{fs0}", RISCV::F8_F)
20006 .Cases("{f9}", "{fs1}", RISCV::F9_F)
20007 .Cases("{f10}", "{fa0}", RISCV::F10_F)
20008 .Cases("{f11}", "{fa1}", RISCV::F11_F)
20009 .Cases("{f12}", "{fa2}", RISCV::F12_F)
20010 .Cases("{f13}", "{fa3}", RISCV::F13_F)
20011 .Cases("{f14}", "{fa4}", RISCV::F14_F)
20012 .Cases("{f15}", "{fa5}", RISCV::F15_F)
20013 .Cases("{f16}", "{fa6}", RISCV::F16_F)
20014 .Cases("{f17}", "{fa7}", RISCV::F17_F)
20015 .Cases("{f18}", "{fs2}", RISCV::F18_F)
20016 .Cases("{f19}", "{fs3}", RISCV::F19_F)
20017 .Cases("{f20}", "{fs4}", RISCV::F20_F)
20018 .Cases("{f21}", "{fs5}", RISCV::F21_F)
20019 .Cases("{f22}", "{fs6}", RISCV::F22_F)
20020 .Cases("{f23}", "{fs7}", RISCV::F23_F)
20021 .Cases("{f24}", "{fs8}", RISCV::F24_F)
20022 .Cases("{f25}", "{fs9}", RISCV::F25_F)
20023 .Cases("{f26}", "{fs10}", RISCV::F26_F)
20024 .Cases("{f27}", "{fs11}", RISCV::F27_F)
20025 .Cases("{f28}", "{ft8}", RISCV::F28_F)
20026 .Cases("{f29}", "{ft9}", RISCV::F29_F)
20027 .Cases("{f30}", "{ft10}", RISCV::F30_F)
20028 .Cases("{f31}", "{ft11}", RISCV::F31_F)
20029 .Default(RISCV::NoRegister);
20030 if (FReg != RISCV::NoRegister) {
20031 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
20032 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
20033 unsigned RegNo = FReg - RISCV::F0_F;
20034 unsigned DReg = RISCV::F0_D + RegNo;
20035 return std::make_pair(DReg, &RISCV::FPR64RegClass);
20036 }
20037 if (VT == MVT::f32 || VT == MVT::Other)
20038 return std::make_pair(FReg, &RISCV::FPR32RegClass);
20039 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
20040 unsigned RegNo = FReg - RISCV::F0_F;
20041 unsigned HReg = RISCV::F0_H + RegNo;
20042 return std::make_pair(HReg, &RISCV::FPR16RegClass);
20043 }
20044 }
20045 }
20046
20047 if (Subtarget.hasVInstructions()) {
20048 Register VReg = StringSwitch<Register>(Constraint.lower())
20049 .Case("{v0}", RISCV::V0)
20050 .Case("{v1}", RISCV::V1)
20051 .Case("{v2}", RISCV::V2)
20052 .Case("{v3}", RISCV::V3)
20053 .Case("{v4}", RISCV::V4)
20054 .Case("{v5}", RISCV::V5)
20055 .Case("{v6}", RISCV::V6)
20056 .Case("{v7}", RISCV::V7)
20057 .Case("{v8}", RISCV::V8)
20058 .Case("{v9}", RISCV::V9)
20059 .Case("{v10}", RISCV::V10)
20060 .Case("{v11}", RISCV::V11)
20061 .Case("{v12}", RISCV::V12)
20062 .Case("{v13}", RISCV::V13)
20063 .Case("{v14}", RISCV::V14)
20064 .Case("{v15}", RISCV::V15)
20065 .Case("{v16}", RISCV::V16)
20066 .Case("{v17}", RISCV::V17)
20067 .Case("{v18}", RISCV::V18)
20068 .Case("{v19}", RISCV::V19)
20069 .Case("{v20}", RISCV::V20)
20070 .Case("{v21}", RISCV::V21)
20071 .Case("{v22}", RISCV::V22)
20072 .Case("{v23}", RISCV::V23)
20073 .Case("{v24}", RISCV::V24)
20074 .Case("{v25}", RISCV::V25)
20075 .Case("{v26}", RISCV::V26)
20076 .Case("{v27}", RISCV::V27)
20077 .Case("{v28}", RISCV::V28)
20078 .Case("{v29}", RISCV::V29)
20079 .Case("{v30}", RISCV::V30)
20080 .Case("{v31}", RISCV::V31)
20081 .Default(RISCV::NoRegister);
20082 if (VReg != RISCV::NoRegister) {
20083 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
20084 return std::make_pair(VReg, &RISCV::VMRegClass);
20085 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
20086 return std::make_pair(VReg, &RISCV::VRRegClass);
20087 for (const auto *RC :
20088 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
20089 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
20090 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
20091 return std::make_pair(VReg, RC);
20092 }
20093 }
20094 }
20095 }
20096
20097 std::pair<Register, const TargetRegisterClass *> Res =
20098 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
20099
20100 // If we picked one of the Zfinx register classes, remap it to the GPR class.
20101 // FIXME: When Zfinx is supported in CodeGen this will need to take the
20102 // Subtarget into account.
20103 if (Res.second == &RISCV::GPRF16RegClass ||
20104 Res.second == &RISCV::GPRF32RegClass ||
20105 Res.second == &RISCV::GPRPairRegClass)
20106 return std::make_pair(Res.first, &RISCV::GPRRegClass);
20107
20108 return Res;
20109}
20110
20111InlineAsm::ConstraintCode
20112RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
20113 // Currently only support length 1 constraints.
20114 if (ConstraintCode.size() == 1) {
20115 switch (ConstraintCode[0]) {
20116 case 'A':
20117 return InlineAsm::ConstraintCode::A;
20118 default:
20119 break;
20120 }
20121 }
20122
20123 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
20124}
20125
20126void RISCVTargetLowering::LowerAsmOperandForConstraint(
20127 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
20128 SelectionDAG &DAG) const {
20129 // Currently only support length 1 constraints.
20130 if (Constraint.size() == 1) {
20131 switch (Constraint[0]) {
20132 case 'I':
20133 // Validate & create a 12-bit signed immediate operand.
20134 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
20135 uint64_t CVal = C->getSExtValue();
20136 if (isInt<12>(x: CVal))
20137 Ops.push_back(
20138 x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getXLenVT()));
20139 }
20140 return;
20141 case 'J':
20142 // Validate & create an integer zero operand.
20143 if (isNullConstant(V: Op))
20144 Ops.push_back(
20145 x: DAG.getTargetConstant(Val: 0, DL: SDLoc(Op), VT: Subtarget.getXLenVT()));
20146 return;
20147 case 'K':
20148 // Validate & create a 5-bit unsigned immediate operand.
20149 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
20150 uint64_t CVal = C->getZExtValue();
20151 if (isUInt<5>(x: CVal))
20152 Ops.push_back(
20153 x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getXLenVT()));
20154 }
20155 return;
20156 case 'S':
20157 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint: "s", Ops, DAG);
20158 return;
20159 default:
20160 break;
20161 }
20162 }
20163 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20164}
20165
20166Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
20167 Instruction *Inst,
20168 AtomicOrdering Ord) const {
20169 if (Subtarget.hasStdExtZtso()) {
20170 if (isa<LoadInst>(Val: Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20171 return Builder.CreateFence(Ordering: Ord);
20172 return nullptr;
20173 }
20174
20175 if (isa<LoadInst>(Val: Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20176 return Builder.CreateFence(Ordering: Ord);
20177 if (isa<StoreInst>(Val: Inst) && isReleaseOrStronger(AO: Ord))
20178 return Builder.CreateFence(Ordering: AtomicOrdering::Release);
20179 return nullptr;
20180}
20181
20182Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
20183 Instruction *Inst,
20184 AtomicOrdering Ord) const {
20185 if (Subtarget.hasStdExtZtso()) {
20186 if (isa<StoreInst>(Val: Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20187 return Builder.CreateFence(Ordering: Ord);
20188 return nullptr;
20189 }
20190
20191 if (isa<LoadInst>(Val: Inst) && isAcquireOrStronger(AO: Ord))
20192 return Builder.CreateFence(Ordering: AtomicOrdering::Acquire);
20193 if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Val: Inst) &&
20194 Ord == AtomicOrdering::SequentiallyConsistent)
20195 return Builder.CreateFence(Ordering: AtomicOrdering::SequentiallyConsistent);
20196 return nullptr;
20197}
20198
20199TargetLowering::AtomicExpansionKind
20200RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
20201 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
20202 // point operations can't be used in an lr/sc sequence without breaking the
20203 // forward-progress guarantee.
20204 if (AI->isFloatingPointOperation() ||
20205 AI->getOperation() == AtomicRMWInst::UIncWrap ||
20206 AI->getOperation() == AtomicRMWInst::UDecWrap)
20207 return AtomicExpansionKind::CmpXChg;
20208
20209 // Don't expand forced atomics, we want to have __sync libcalls instead.
20210 if (Subtarget.hasForcedAtomics())
20211 return AtomicExpansionKind::None;
20212
20213 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20214 if (AI->getOperation() == AtomicRMWInst::Nand) {
20215 if (Subtarget.hasStdExtZacas() &&
20216 (Size >= 32 || Subtarget.hasStdExtZabha()))
20217 return AtomicExpansionKind::CmpXChg;
20218 if (Size < 32)
20219 return AtomicExpansionKind::MaskedIntrinsic;
20220 }
20221
20222 if (Size < 32 && !Subtarget.hasStdExtZabha())
20223 return AtomicExpansionKind::MaskedIntrinsic;
20224
20225 return AtomicExpansionKind::None;
20226}
20227
20228static Intrinsic::ID
20229getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
20230 if (XLen == 32) {
20231 switch (BinOp) {
20232 default:
20233 llvm_unreachable("Unexpected AtomicRMW BinOp");
20234 case AtomicRMWInst::Xchg:
20235 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
20236 case AtomicRMWInst::Add:
20237 return Intrinsic::riscv_masked_atomicrmw_add_i32;
20238 case AtomicRMWInst::Sub:
20239 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
20240 case AtomicRMWInst::Nand:
20241 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
20242 case AtomicRMWInst::Max:
20243 return Intrinsic::riscv_masked_atomicrmw_max_i32;
20244 case AtomicRMWInst::Min:
20245 return Intrinsic::riscv_masked_atomicrmw_min_i32;
20246 case AtomicRMWInst::UMax:
20247 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
20248 case AtomicRMWInst::UMin:
20249 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
20250 }
20251 }
20252
20253 if (XLen == 64) {
20254 switch (BinOp) {
20255 default:
20256 llvm_unreachable("Unexpected AtomicRMW BinOp");
20257 case AtomicRMWInst::Xchg:
20258 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
20259 case AtomicRMWInst::Add:
20260 return Intrinsic::riscv_masked_atomicrmw_add_i64;
20261 case AtomicRMWInst::Sub:
20262 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
20263 case AtomicRMWInst::Nand:
20264 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
20265 case AtomicRMWInst::Max:
20266 return Intrinsic::riscv_masked_atomicrmw_max_i64;
20267 case AtomicRMWInst::Min:
20268 return Intrinsic::riscv_masked_atomicrmw_min_i64;
20269 case AtomicRMWInst::UMax:
20270 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
20271 case AtomicRMWInst::UMin:
20272 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
20273 }
20274 }
20275
20276 llvm_unreachable("Unexpected XLen\n");
20277}
20278
20279Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
20280 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20281 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20282 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
20283 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
20284 // mask, as this produces better code than the LR/SC loop emitted by
20285 // int_riscv_masked_atomicrmw_xchg.
20286 if (AI->getOperation() == AtomicRMWInst::Xchg &&
20287 isa<ConstantInt>(Val: AI->getValOperand())) {
20288 ConstantInt *CVal = cast<ConstantInt>(Val: AI->getValOperand());
20289 if (CVal->isZero())
20290 return Builder.CreateAtomicRMW(Op: AtomicRMWInst::And, Ptr: AlignedAddr,
20291 Val: Builder.CreateNot(V: Mask, Name: "Inv_Mask"),
20292 Align: AI->getAlign(), Ordering: Ord);
20293 if (CVal->isMinusOne())
20294 return Builder.CreateAtomicRMW(Op: AtomicRMWInst::Or, Ptr: AlignedAddr, Val: Mask,
20295 Align: AI->getAlign(), Ordering: Ord);
20296 }
20297
20298 unsigned XLen = Subtarget.getXLen();
20299 Value *Ordering =
20300 Builder.getIntN(N: XLen, C: static_cast<uint64_t>(AI->getOrdering()));
20301 Type *Tys[] = {AlignedAddr->getType()};
20302 Function *LrwOpScwLoop = Intrinsic::getDeclaration(
20303 M: AI->getModule(),
20304 id: getIntrinsicForMaskedAtomicRMWBinOp(XLen, BinOp: AI->getOperation()), Tys);
20305
20306 if (XLen == 64) {
20307 Incr = Builder.CreateSExt(V: Incr, DestTy: Builder.getInt64Ty());
20308 Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty());
20309 ShiftAmt = Builder.CreateSExt(V: ShiftAmt, DestTy: Builder.getInt64Ty());
20310 }
20311
20312 Value *Result;
20313
20314 // Must pass the shift amount needed to sign extend the loaded value prior
20315 // to performing a signed comparison for min/max. ShiftAmt is the number of
20316 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
20317 // is the number of bits to left+right shift the value in order to
20318 // sign-extend.
20319 if (AI->getOperation() == AtomicRMWInst::Min ||
20320 AI->getOperation() == AtomicRMWInst::Max) {
20321 const DataLayout &DL = AI->getModule()->getDataLayout();
20322 unsigned ValWidth =
20323 DL.getTypeStoreSizeInBits(Ty: AI->getValOperand()->getType());
20324 Value *SextShamt =
20325 Builder.CreateSub(LHS: Builder.getIntN(N: XLen, C: XLen - ValWidth), RHS: ShiftAmt);
20326 Result = Builder.CreateCall(Callee: LrwOpScwLoop,
20327 Args: {AlignedAddr, Incr, Mask, SextShamt, Ordering});
20328 } else {
20329 Result =
20330 Builder.CreateCall(Callee: LrwOpScwLoop, Args: {AlignedAddr, Incr, Mask, Ordering});
20331 }
20332
20333 if (XLen == 64)
20334 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
20335 return Result;
20336}
20337
20338TargetLowering::AtomicExpansionKind
20339RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
20340 AtomicCmpXchgInst *CI) const {
20341 // Don't expand forced atomics, we want to have __sync libcalls instead.
20342 if (Subtarget.hasForcedAtomics())
20343 return AtomicExpansionKind::None;
20344
20345 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
20346 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
20347 (Size == 8 || Size == 16))
20348 return AtomicExpansionKind::MaskedIntrinsic;
20349 return AtomicExpansionKind::None;
20350}
20351
20352Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
20353 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20354 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20355 unsigned XLen = Subtarget.getXLen();
20356 Value *Ordering = Builder.getIntN(N: XLen, C: static_cast<uint64_t>(Ord));
20357 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
20358 if (XLen == 64) {
20359 CmpVal = Builder.CreateSExt(V: CmpVal, DestTy: Builder.getInt64Ty());
20360 NewVal = Builder.CreateSExt(V: NewVal, DestTy: Builder.getInt64Ty());
20361 Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty());
20362 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
20363 }
20364 Type *Tys[] = {AlignedAddr->getType()};
20365 Function *MaskedCmpXchg =
20366 Intrinsic::getDeclaration(M: CI->getModule(), id: CmpXchgIntrID, Tys);
20367 Value *Result = Builder.CreateCall(
20368 Callee: MaskedCmpXchg, Args: {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
20369 if (XLen == 64)
20370 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
20371 return Result;
20372}
20373
20374bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend,
20375 EVT DataVT) const {
20376 // We have indexed loads for all supported EEW types. Indices are always
20377 // zero extended.
20378 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
20379 isTypeLegal(Extend.getValueType()) &&
20380 isTypeLegal(Extend.getOperand(0).getValueType()) &&
20381 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
20382}
20383
20384bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
20385 EVT VT) const {
20386 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
20387 return false;
20388
20389 switch (FPVT.getSimpleVT().SimpleTy) {
20390 case MVT::f16:
20391 return Subtarget.hasStdExtZfhmin();
20392 case MVT::f32:
20393 return Subtarget.hasStdExtF();
20394 case MVT::f64:
20395 return Subtarget.hasStdExtD();
20396 default:
20397 return false;
20398 }
20399}
20400
20401unsigned RISCVTargetLowering::getJumpTableEncoding() const {
20402 // If we are using the small code model, we can reduce size of jump table
20403 // entry to 4 bytes.
20404 if (Subtarget.is64Bit() && !isPositionIndependent() &&
20405 getTargetMachine().getCodeModel() == CodeModel::Small) {
20406 return MachineJumpTableInfo::EK_Custom32;
20407 }
20408 return TargetLowering::getJumpTableEncoding();
20409}
20410
20411const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry(
20412 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
20413 unsigned uid, MCContext &Ctx) const {
20414 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
20415 getTargetMachine().getCodeModel() == CodeModel::Small);
20416 return MCSymbolRefExpr::create(Symbol: MBB->getSymbol(), Ctx);
20417}
20418
20419bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const {
20420 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
20421 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
20422 // a power of two as well.
20423 // FIXME: This doesn't work for zve32, but that's already broken
20424 // elsewhere for the same reason.
20425 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
20426 static_assert(RISCV::RVVBitsPerBlock == 64,
20427 "RVVBitsPerBlock changed, audit needed");
20428 return true;
20429}
20430
20431bool RISCVTargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
20432 SDValue &Offset,
20433 ISD::MemIndexedMode &AM,
20434 SelectionDAG &DAG) const {
20435 // Target does not support indexed loads.
20436 if (!Subtarget.hasVendorXTHeadMemIdx())
20437 return false;
20438
20439 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
20440 return false;
20441
20442 Base = Op->getOperand(Num: 0);
20443 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: Op->getOperand(Num: 1))) {
20444 int64_t RHSC = RHS->getSExtValue();
20445 if (Op->getOpcode() == ISD::SUB)
20446 RHSC = -(uint64_t)RHSC;
20447
20448 // The constants that can be encoded in the THeadMemIdx instructions
20449 // are of the form (sign_extend(imm5) << imm2).
20450 bool isLegalIndexedOffset = false;
20451 for (unsigned i = 0; i < 4; i++)
20452 if (isInt<5>(x: RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
20453 isLegalIndexedOffset = true;
20454 break;
20455 }
20456
20457 if (!isLegalIndexedOffset)
20458 return false;
20459
20460 Offset = Op->getOperand(Num: 1);
20461 return true;
20462 }
20463
20464 return false;
20465}
20466
20467bool RISCVTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
20468 SDValue &Offset,
20469 ISD::MemIndexedMode &AM,
20470 SelectionDAG &DAG) const {
20471 EVT VT;
20472 SDValue Ptr;
20473 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
20474 VT = LD->getMemoryVT();
20475 Ptr = LD->getBasePtr();
20476 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Val: N)) {
20477 VT = ST->getMemoryVT();
20478 Ptr = ST->getBasePtr();
20479 } else
20480 return false;
20481
20482 if (!getIndexedAddressParts(Op: Ptr.getNode(), Base, Offset, AM, DAG))
20483 return false;
20484
20485 AM = ISD::PRE_INC;
20486 return true;
20487}
20488
20489bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
20490 SDValue &Base,
20491 SDValue &Offset,
20492 ISD::MemIndexedMode &AM,
20493 SelectionDAG &DAG) const {
20494 EVT VT;
20495 SDValue Ptr;
20496 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
20497 VT = LD->getMemoryVT();
20498 Ptr = LD->getBasePtr();
20499 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Val: N)) {
20500 VT = ST->getMemoryVT();
20501 Ptr = ST->getBasePtr();
20502 } else
20503 return false;
20504
20505 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
20506 return false;
20507 // Post-indexing updates the base, so it's not a valid transform
20508 // if that's not the same as the load's pointer.
20509 if (Ptr != Base)
20510 return false;
20511
20512 AM = ISD::POST_INC;
20513 return true;
20514}
20515
20516bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
20517 EVT VT) const {
20518 EVT SVT = VT.getScalarType();
20519
20520 if (!SVT.isSimple())
20521 return false;
20522
20523 switch (SVT.getSimpleVT().SimpleTy) {
20524 case MVT::f16:
20525 return VT.isVector() ? Subtarget.hasVInstructionsF16()
20526 : Subtarget.hasStdExtZfhOrZhinx();
20527 case MVT::f32:
20528 return Subtarget.hasStdExtFOrZfinx();
20529 case MVT::f64:
20530 return Subtarget.hasStdExtDOrZdinx();
20531 default:
20532 break;
20533 }
20534
20535 return false;
20536}
20537
20538ISD::NodeType RISCVTargetLowering::getExtendForAtomicCmpSwapArg() const {
20539 // Zacas will use amocas.w which does not require extension.
20540 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
20541}
20542
20543Register RISCVTargetLowering::getExceptionPointerRegister(
20544 const Constant *PersonalityFn) const {
20545 return RISCV::X10;
20546}
20547
20548Register RISCVTargetLowering::getExceptionSelectorRegister(
20549 const Constant *PersonalityFn) const {
20550 return RISCV::X11;
20551}
20552
20553bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
20554 // Return false to suppress the unnecessary extensions if the LibCall
20555 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
20556 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
20557 Type.getSizeInBits() < Subtarget.getXLen()))
20558 return false;
20559
20560 return true;
20561}
20562
20563bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
20564 if (Subtarget.is64Bit() && Type == MVT::i32)
20565 return true;
20566
20567 return IsSigned;
20568}
20569
20570bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
20571 SDValue C) const {
20572 // Check integral scalar types.
20573 const bool HasExtMOrZmmul =
20574 Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul();
20575 if (!VT.isScalarInteger())
20576 return false;
20577
20578 // Omit the optimization if the sub target has the M extension and the data
20579 // size exceeds XLen.
20580 if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen())
20581 return false;
20582
20583 if (auto *ConstNode = dyn_cast<ConstantSDNode>(Val: C.getNode())) {
20584 // Break the MUL to a SLLI and an ADD/SUB.
20585 const APInt &Imm = ConstNode->getAPIntValue();
20586 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
20587 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
20588 return true;
20589
20590 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
20591 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(N: 12) &&
20592 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
20593 (Imm - 8).isPowerOf2()))
20594 return true;
20595
20596 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
20597 // a pair of LUI/ADDI.
20598 if (!Imm.isSignedIntN(N: 12) && Imm.countr_zero() < 12 &&
20599 ConstNode->hasOneUse()) {
20600 APInt ImmS = Imm.ashr(ShiftAmt: Imm.countr_zero());
20601 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
20602 (1 - ImmS).isPowerOf2())
20603 return true;
20604 }
20605 }
20606
20607 return false;
20608}
20609
20610bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode,
20611 SDValue ConstNode) const {
20612 // Let the DAGCombiner decide for vectors.
20613 EVT VT = AddNode.getValueType();
20614 if (VT.isVector())
20615 return true;
20616
20617 // Let the DAGCombiner decide for larger types.
20618 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
20619 return true;
20620
20621 // It is worse if c1 is simm12 while c1*c2 is not.
20622 ConstantSDNode *C1Node = cast<ConstantSDNode>(Val: AddNode.getOperand(i: 1));
20623 ConstantSDNode *C2Node = cast<ConstantSDNode>(Val&: ConstNode);
20624 const APInt &C1 = C1Node->getAPIntValue();
20625 const APInt &C2 = C2Node->getAPIntValue();
20626 if (C1.isSignedIntN(N: 12) && !(C1 * C2).isSignedIntN(N: 12))
20627 return false;
20628
20629 // Default to true and let the DAGCombiner decide.
20630 return true;
20631}
20632
20633bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
20634 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
20635 unsigned *Fast) const {
20636 if (!VT.isVector()) {
20637 if (Fast)
20638 *Fast = Subtarget.enableUnalignedScalarMem();
20639 return Subtarget.enableUnalignedScalarMem();
20640 }
20641
20642 // All vector implementations must support element alignment
20643 EVT ElemVT = VT.getVectorElementType();
20644 if (Alignment >= ElemVT.getStoreSize()) {
20645 if (Fast)
20646 *Fast = 1;
20647 return true;
20648 }
20649
20650 // Note: We lower an unmasked unaligned vector access to an equally sized
20651 // e8 element type access. Given this, we effectively support all unmasked
20652 // misaligned accesses. TODO: Work through the codegen implications of
20653 // allowing such accesses to be formed, and considered fast.
20654 if (Fast)
20655 *Fast = Subtarget.enableUnalignedVectorMem();
20656 return Subtarget.enableUnalignedVectorMem();
20657}
20658
20659
20660EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op,
20661 const AttributeList &FuncAttributes) const {
20662 if (!Subtarget.hasVInstructions())
20663 return MVT::Other;
20664
20665 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
20666 return MVT::Other;
20667
20668 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
20669 // has an expansion threshold, and we want the number of hardware memory
20670 // operations to correspond roughly to that threshold. LMUL>1 operations
20671 // are typically expanded linearly internally, and thus correspond to more
20672 // than one actual memory operation. Note that store merging and load
20673 // combining will typically form larger LMUL operations from the LMUL1
20674 // operations emitted here, and that's okay because combining isn't
20675 // introducing new memory operations; it's just merging existing ones.
20676 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
20677 if (Op.size() < MinVLenInBytes)
20678 // TODO: Figure out short memops. For the moment, do the default thing
20679 // which ends up using scalar sequences.
20680 return MVT::Other;
20681
20682 // Prefer i8 for non-zero memset as it allows us to avoid materializing
20683 // a large scalar constant and instead use vmv.v.x/i to do the
20684 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
20685 // maximize the chance we can encode the size in the vsetvli.
20686 MVT ELenVT = MVT::getIntegerVT(BitWidth: Subtarget.getELen());
20687 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
20688
20689 // Do we have sufficient alignment for our preferred VT? If not, revert
20690 // to largest size allowed by our alignment criteria.
20691 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
20692 Align RequiredAlign(PreferredVT.getStoreSize());
20693 if (Op.isFixedDstAlign())
20694 RequiredAlign = std::min(a: RequiredAlign, b: Op.getDstAlign());
20695 if (Op.isMemcpy())
20696 RequiredAlign = std::min(a: RequiredAlign, b: Op.getSrcAlign());
20697 PreferredVT = MVT::getIntegerVT(BitWidth: RequiredAlign.value() * 8);
20698 }
20699 return MVT::getVectorVT(VT: PreferredVT, NumElements: MinVLenInBytes/PreferredVT.getStoreSize());
20700}
20701
20702bool RISCVTargetLowering::splitValueIntoRegisterParts(
20703 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
20704 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
20705 bool IsABIRegCopy = CC.has_value();
20706 EVT ValueVT = Val.getValueType();
20707 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
20708 PartVT == MVT::f32) {
20709 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
20710 // nan, and cast to f32.
20711 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
20712 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
20713 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
20714 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
20715 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
20716 Parts[0] = Val;
20717 return true;
20718 }
20719
20720 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
20721 LLVMContext &Context = *DAG.getContext();
20722 EVT ValueEltVT = ValueVT.getVectorElementType();
20723 EVT PartEltVT = PartVT.getVectorElementType();
20724 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
20725 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
20726 if (PartVTBitSize % ValueVTBitSize == 0) {
20727 assert(PartVTBitSize >= ValueVTBitSize);
20728 // If the element types are different, bitcast to the same element type of
20729 // PartVT first.
20730 // Give an example here, we want copy a <vscale x 1 x i8> value to
20731 // <vscale x 4 x i16>.
20732 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
20733 // subvector, then we can bitcast to <vscale x 4 x i16>.
20734 if (ValueEltVT != PartEltVT) {
20735 if (PartVTBitSize > ValueVTBitSize) {
20736 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
20737 assert(Count != 0 && "The number of element should not be zero.");
20738 EVT SameEltTypeVT =
20739 EVT::getVectorVT(Context, VT: ValueEltVT, NumElements: Count, /*IsScalable=*/true);
20740 Val = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: SameEltTypeVT,
20741 N1: DAG.getUNDEF(VT: SameEltTypeVT), N2: Val,
20742 N3: DAG.getVectorIdxConstant(Val: 0, DL));
20743 }
20744 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Val);
20745 } else {
20746 Val =
20747 DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: PartVT, N1: DAG.getUNDEF(VT: PartVT),
20748 N2: Val, N3: DAG.getVectorIdxConstant(Val: 0, DL));
20749 }
20750 Parts[0] = Val;
20751 return true;
20752 }
20753 }
20754 return false;
20755}
20756
20757SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
20758 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
20759 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
20760 bool IsABIRegCopy = CC.has_value();
20761 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
20762 PartVT == MVT::f32) {
20763 SDValue Val = Parts[0];
20764
20765 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
20766 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
20767 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
20768 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val);
20769 return Val;
20770 }
20771
20772 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
20773 LLVMContext &Context = *DAG.getContext();
20774 SDValue Val = Parts[0];
20775 EVT ValueEltVT = ValueVT.getVectorElementType();
20776 EVT PartEltVT = PartVT.getVectorElementType();
20777 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
20778 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
20779 if (PartVTBitSize % ValueVTBitSize == 0) {
20780 assert(PartVTBitSize >= ValueVTBitSize);
20781 EVT SameEltTypeVT = ValueVT;
20782 // If the element types are different, convert it to the same element type
20783 // of PartVT.
20784 // Give an example here, we want copy a <vscale x 1 x i8> value from
20785 // <vscale x 4 x i16>.
20786 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
20787 // then we can extract <vscale x 1 x i8>.
20788 if (ValueEltVT != PartEltVT) {
20789 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
20790 assert(Count != 0 && "The number of element should not be zero.");
20791 SameEltTypeVT =
20792 EVT::getVectorVT(Context, VT: ValueEltVT, NumElements: Count, /*IsScalable=*/true);
20793 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: SameEltTypeVT, Operand: Val);
20794 }
20795 Val = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ValueVT, N1: Val,
20796 N2: DAG.getVectorIdxConstant(Val: 0, DL));
20797 return Val;
20798 }
20799 }
20800 return SDValue();
20801}
20802
20803bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
20804 // When aggressively optimizing for code size, we prefer to use a div
20805 // instruction, as it is usually smaller than the alternative sequence.
20806 // TODO: Add vector division?
20807 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
20808 return OptSize && !VT.isVector();
20809}
20810
20811bool RISCVTargetLowering::preferScalarizeSplat(SDNode *N) const {
20812 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
20813 // some situation.
20814 unsigned Opc = N->getOpcode();
20815 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
20816 return false;
20817 return true;
20818}
20819
20820static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
20821 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
20822 Function *ThreadPointerFunc =
20823 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
20824 return IRB.CreateConstGEP1_32(Ty: IRB.getInt8Ty(),
20825 Ptr: IRB.CreateCall(Callee: ThreadPointerFunc), Idx0: Offset);
20826}
20827
20828Value *RISCVTargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
20829 // Fuchsia provides a fixed TLS slot for the stack cookie.
20830 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
20831 if (Subtarget.isTargetFuchsia())
20832 return useTpOffset(IRB, Offset: -0x10);
20833
20834 return TargetLowering::getIRStackGuard(IRB);
20835}
20836
20837bool RISCVTargetLowering::isLegalInterleavedAccessType(
20838 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
20839 const DataLayout &DL) const {
20840 EVT VT = getValueType(DL, Ty: VTy);
20841 // Don't lower vlseg/vsseg for vector types that can't be split.
20842 if (!isTypeLegal(VT))
20843 return false;
20844
20845 if (!isLegalElementTypeForRVV(ScalarTy: VT.getScalarType()) ||
20846 !allowsMemoryAccessForAlignment(Context&: VTy->getContext(), DL, VT, AddrSpace,
20847 Alignment))
20848 return false;
20849
20850 MVT ContainerVT = VT.getSimpleVT();
20851
20852 if (auto *FVTy = dyn_cast<FixedVectorType>(Val: VTy)) {
20853 if (!Subtarget.useRVVForFixedLengthVectors())
20854 return false;
20855 // Sometimes the interleaved access pass picks up splats as interleaves of
20856 // one element. Don't lower these.
20857 if (FVTy->getNumElements() < 2)
20858 return false;
20859
20860 ContainerVT = getContainerForFixedLengthVector(VT: VT.getSimpleVT());
20861 }
20862
20863 // Need to make sure that EMUL * NFIELDS ≤ 8
20864 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(VLMUL: getLMUL(VT: ContainerVT));
20865 if (Fractional)
20866 return true;
20867 return Factor * LMUL <= 8;
20868}
20869
20870bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
20871 Align Alignment) const {
20872 if (!Subtarget.hasVInstructions())
20873 return false;
20874
20875 // Only support fixed vectors if we know the minimum vector size.
20876 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
20877 return false;
20878
20879 EVT ScalarType = DataType.getScalarType();
20880 if (!isLegalElementTypeForRVV(ScalarTy: ScalarType))
20881 return false;
20882
20883 if (!Subtarget.enableUnalignedVectorMem() &&
20884 Alignment < ScalarType.getStoreSize())
20885 return false;
20886
20887 return true;
20888}
20889
20890static const Intrinsic::ID FixedVlsegIntrIds[] = {
20891 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
20892 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
20893 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
20894 Intrinsic::riscv_seg8_load};
20895
20896/// Lower an interleaved load into a vlsegN intrinsic.
20897///
20898/// E.g. Lower an interleaved load (Factor = 2):
20899/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
20900/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
20901/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
20902///
20903/// Into:
20904/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
20905/// %ptr, i64 4)
20906/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
20907/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
20908bool RISCVTargetLowering::lowerInterleavedLoad(
20909 LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
20910 ArrayRef<unsigned> Indices, unsigned Factor) const {
20911 IRBuilder<> Builder(LI);
20912
20913 auto *VTy = cast<FixedVectorType>(Val: Shuffles[0]->getType());
20914 if (!isLegalInterleavedAccessType(VTy, Factor, Alignment: LI->getAlign(),
20915 AddrSpace: LI->getPointerAddressSpace(),
20916 DL: LI->getModule()->getDataLayout()))
20917 return false;
20918
20919 auto *XLenTy = Type::getIntNTy(C&: LI->getContext(), N: Subtarget.getXLen());
20920
20921 Function *VlsegNFunc =
20922 Intrinsic::getDeclaration(M: LI->getModule(), id: FixedVlsegIntrIds[Factor - 2],
20923 Tys: {VTy, LI->getPointerOperandType(), XLenTy});
20924
20925 Value *VL = ConstantInt::get(Ty: XLenTy, V: VTy->getNumElements());
20926
20927 CallInst *VlsegN =
20928 Builder.CreateCall(Callee: VlsegNFunc, Args: {LI->getPointerOperand(), VL});
20929
20930 for (unsigned i = 0; i < Shuffles.size(); i++) {
20931 Value *SubVec = Builder.CreateExtractValue(Agg: VlsegN, Idxs: Indices[i]);
20932 Shuffles[i]->replaceAllUsesWith(V: SubVec);
20933 }
20934
20935 return true;
20936}
20937
20938static const Intrinsic::ID FixedVssegIntrIds[] = {
20939 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
20940 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
20941 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
20942 Intrinsic::riscv_seg8_store};
20943
20944/// Lower an interleaved store into a vssegN intrinsic.
20945///
20946/// E.g. Lower an interleaved store (Factor = 3):
20947/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
20948/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
20949/// store <12 x i32> %i.vec, <12 x i32>* %ptr
20950///
20951/// Into:
20952/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
20953/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
20954/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
20955/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
20956/// %ptr, i32 4)
20957///
20958/// Note that the new shufflevectors will be removed and we'll only generate one
20959/// vsseg3 instruction in CodeGen.
20960bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
20961 ShuffleVectorInst *SVI,
20962 unsigned Factor) const {
20963 IRBuilder<> Builder(SI);
20964 auto *ShuffleVTy = cast<FixedVectorType>(Val: SVI->getType());
20965 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
20966 auto *VTy = FixedVectorType::get(ElementType: ShuffleVTy->getElementType(),
20967 NumElts: ShuffleVTy->getNumElements() / Factor);
20968 if (!isLegalInterleavedAccessType(VTy, Factor, Alignment: SI->getAlign(),
20969 AddrSpace: SI->getPointerAddressSpace(),
20970 DL: SI->getModule()->getDataLayout()))
20971 return false;
20972
20973 auto *XLenTy = Type::getIntNTy(C&: SI->getContext(), N: Subtarget.getXLen());
20974
20975 Function *VssegNFunc =
20976 Intrinsic::getDeclaration(M: SI->getModule(), id: FixedVssegIntrIds[Factor - 2],
20977 Tys: {VTy, SI->getPointerOperandType(), XLenTy});
20978
20979 auto Mask = SVI->getShuffleMask();
20980 SmallVector<Value *, 10> Ops;
20981
20982 for (unsigned i = 0; i < Factor; i++) {
20983 Value *Shuffle = Builder.CreateShuffleVector(
20984 V1: SVI->getOperand(i_nocapture: 0), V2: SVI->getOperand(i_nocapture: 1),
20985 Mask: createSequentialMask(Start: Mask[i], NumInts: VTy->getNumElements(), NumUndefs: 0));
20986 Ops.push_back(Elt: Shuffle);
20987 }
20988 // This VL should be OK (should be executable in one vsseg instruction,
20989 // potentially under larger LMULs) because we checked that the fixed vector
20990 // type fits in isLegalInterleavedAccessType
20991 Value *VL = ConstantInt::get(Ty: XLenTy, V: VTy->getNumElements());
20992 Ops.append(IL: {SI->getPointerOperand(), VL});
20993
20994 Builder.CreateCall(Callee: VssegNFunc, Args: Ops);
20995
20996 return true;
20997}
20998
20999bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
21000 LoadInst *LI) const {
21001 assert(LI->isSimple());
21002 IRBuilder<> Builder(LI);
21003
21004 // Only deinterleave2 supported at present.
21005 if (DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2)
21006 return false;
21007
21008 unsigned Factor = 2;
21009
21010 VectorType *VTy = cast<VectorType>(Val: DI->getOperand(i_nocapture: 0)->getType());
21011 VectorType *ResVTy = cast<VectorType>(Val: DI->getType()->getContainedType(i: 0));
21012
21013 if (!isLegalInterleavedAccessType(VTy: ResVTy, Factor, Alignment: LI->getAlign(),
21014 AddrSpace: LI->getPointerAddressSpace(),
21015 DL: LI->getModule()->getDataLayout()))
21016 return false;
21017
21018 Function *VlsegNFunc;
21019 Value *VL;
21020 Type *XLenTy = Type::getIntNTy(C&: LI->getContext(), N: Subtarget.getXLen());
21021 SmallVector<Value *, 10> Ops;
21022
21023 if (auto *FVTy = dyn_cast<FixedVectorType>(Val: VTy)) {
21024 VlsegNFunc = Intrinsic::getDeclaration(
21025 M: LI->getModule(), id: FixedVlsegIntrIds[Factor - 2],
21026 Tys: {ResVTy, LI->getPointerOperandType(), XLenTy});
21027 VL = ConstantInt::get(Ty: XLenTy, V: FVTy->getNumElements());
21028 } else {
21029 static const Intrinsic::ID IntrIds[] = {
21030 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
21031 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
21032 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
21033 Intrinsic::riscv_vlseg8};
21034
21035 VlsegNFunc = Intrinsic::getDeclaration(M: LI->getModule(), id: IntrIds[Factor - 2],
21036 Tys: {ResVTy, XLenTy});
21037 VL = Constant::getAllOnesValue(Ty: XLenTy);
21038 Ops.append(NumInputs: Factor, Elt: PoisonValue::get(T: ResVTy));
21039 }
21040
21041 Ops.append(IL: {LI->getPointerOperand(), VL});
21042
21043 Value *Vlseg = Builder.CreateCall(Callee: VlsegNFunc, Args: Ops);
21044 DI->replaceAllUsesWith(V: Vlseg);
21045
21046 return true;
21047}
21048
21049bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
21050 StoreInst *SI) const {
21051 assert(SI->isSimple());
21052 IRBuilder<> Builder(SI);
21053
21054 // Only interleave2 supported at present.
21055 if (II->getIntrinsicID() != Intrinsic::experimental_vector_interleave2)
21056 return false;
21057
21058 unsigned Factor = 2;
21059
21060 VectorType *VTy = cast<VectorType>(Val: II->getType());
21061 VectorType *InVTy = cast<VectorType>(Val: II->getOperand(i_nocapture: 0)->getType());
21062
21063 if (!isLegalInterleavedAccessType(VTy: InVTy, Factor, Alignment: SI->getAlign(),
21064 AddrSpace: SI->getPointerAddressSpace(),
21065 DL: SI->getModule()->getDataLayout()))
21066 return false;
21067
21068 Function *VssegNFunc;
21069 Value *VL;
21070 Type *XLenTy = Type::getIntNTy(C&: SI->getContext(), N: Subtarget.getXLen());
21071
21072 if (auto *FVTy = dyn_cast<FixedVectorType>(Val: VTy)) {
21073 VssegNFunc = Intrinsic::getDeclaration(
21074 M: SI->getModule(), id: FixedVssegIntrIds[Factor - 2],
21075 Tys: {InVTy, SI->getPointerOperandType(), XLenTy});
21076 VL = ConstantInt::get(Ty: XLenTy, V: FVTy->getNumElements());
21077 } else {
21078 static const Intrinsic::ID IntrIds[] = {
21079 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
21080 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
21081 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
21082 Intrinsic::riscv_vsseg8};
21083
21084 VssegNFunc = Intrinsic::getDeclaration(M: SI->getModule(), id: IntrIds[Factor - 2],
21085 Tys: {InVTy, XLenTy});
21086 VL = Constant::getAllOnesValue(Ty: XLenTy);
21087 }
21088
21089 Builder.CreateCall(Callee: VssegNFunc, Args: {II->getOperand(i_nocapture: 0), II->getOperand(i_nocapture: 1),
21090 SI->getPointerOperand(), VL});
21091
21092 return true;
21093}
21094
21095MachineInstr *
21096RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
21097 MachineBasicBlock::instr_iterator &MBBI,
21098 const TargetInstrInfo *TII) const {
21099 assert(MBBI->isCall() && MBBI->getCFIType() &&
21100 "Invalid call instruction for a KCFI check");
21101 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
21102 MBBI->getOpcode()));
21103
21104 MachineOperand &Target = MBBI->getOperand(i: 0);
21105 Target.setIsRenamable(false);
21106
21107 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
21108 .addReg(Target.getReg())
21109 .addImm(MBBI->getCFIType())
21110 .getInstr();
21111}
21112
21113#define GET_REGISTER_MATCHER
21114#include "RISCVGenAsmMatcher.inc"
21115
21116Register
21117RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
21118 const MachineFunction &MF) const {
21119 Register Reg = MatchRegisterAltName(RegName);
21120 if (Reg == RISCV::NoRegister)
21121 Reg = MatchRegisterName(RegName);
21122 if (Reg == RISCV::NoRegister)
21123 report_fatal_error(
21124 reason: Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
21125 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
21126 if (!ReservedRegs.test(Idx: Reg) && !Subtarget.isRegisterReservedByUser(i: Reg))
21127 report_fatal_error(reason: Twine("Trying to obtain non-reserved register \"" +
21128 StringRef(RegName) + "\"."));
21129 return Reg;
21130}
21131
21132MachineMemOperand::Flags
21133RISCVTargetLowering::getTargetMMOFlags(const Instruction &I) const {
21134 const MDNode *NontemporalInfo = I.getMetadata(KindID: LLVMContext::MD_nontemporal);
21135
21136 if (NontemporalInfo == nullptr)
21137 return MachineMemOperand::MONone;
21138
21139 // 1 for default value work as __RISCV_NTLH_ALL
21140 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
21141 // 3 -> __RISCV_NTLH_ALL_PRIVATE
21142 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
21143 // 5 -> __RISCV_NTLH_ALL
21144 int NontemporalLevel = 5;
21145 const MDNode *RISCVNontemporalInfo =
21146 I.getMetadata(Kind: "riscv-nontemporal-domain");
21147 if (RISCVNontemporalInfo != nullptr)
21148 NontemporalLevel =
21149 cast<ConstantInt>(
21150 Val: cast<ConstantAsMetadata>(Val: RISCVNontemporalInfo->getOperand(I: 0))
21151 ->getValue())
21152 ->getZExtValue();
21153
21154 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
21155 "RISC-V target doesn't support this non-temporal domain.");
21156
21157 NontemporalLevel -= 2;
21158 MachineMemOperand::Flags Flags = MachineMemOperand::MONone;
21159 if (NontemporalLevel & 0b1)
21160 Flags |= MONontemporalBit0;
21161 if (NontemporalLevel & 0b10)
21162 Flags |= MONontemporalBit1;
21163
21164 return Flags;
21165}
21166
21167MachineMemOperand::Flags
21168RISCVTargetLowering::getTargetMMOFlags(const MemSDNode &Node) const {
21169
21170 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
21171 MachineMemOperand::Flags TargetFlags = MachineMemOperand::MONone;
21172 TargetFlags |= (NodeFlags & MONontemporalBit0);
21173 TargetFlags |= (NodeFlags & MONontemporalBit1);
21174 return TargetFlags;
21175}
21176
21177bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable(
21178 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
21179 return getTargetMMOFlags(Node: NodeX) == getTargetMMOFlags(Node: NodeY);
21180}
21181
21182bool RISCVTargetLowering::isCtpopFast(EVT VT) const {
21183 if (VT.isScalableVector())
21184 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
21185 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
21186 return true;
21187 return Subtarget.hasStdExtZbb() &&
21188 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
21189}
21190
21191unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT,
21192 ISD::CondCode Cond) const {
21193 return isCtpopFast(VT) ? 0 : 1;
21194}
21195
21196bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
21197
21198 // GISel support is in progress or complete for these opcodes.
21199 unsigned Op = Inst.getOpcode();
21200 if (Op == Instruction::Add || Op == Instruction::Sub ||
21201 Op == Instruction::And || Op == Instruction::Or ||
21202 Op == Instruction::Xor || Op == Instruction::InsertElement ||
21203 Op == Instruction::ShuffleVector || Op == Instruction::Load)
21204 return false;
21205
21206 if (Inst.getType()->isScalableTy())
21207 return true;
21208
21209 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
21210 if (Inst.getOperand(i)->getType()->isScalableTy() &&
21211 !isa<ReturnInst>(Val: &Inst))
21212 return true;
21213
21214 if (const AllocaInst *AI = dyn_cast<AllocaInst>(Val: &Inst)) {
21215 if (AI->getAllocatedType()->isScalableTy())
21216 return true;
21217 }
21218
21219 return false;
21220}
21221
21222SDValue
21223RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
21224 SelectionDAG &DAG,
21225 SmallVectorImpl<SDNode *> &Created) const {
21226 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
21227 if (isIntDivCheap(VT: N->getValueType(ResNo: 0), Attr))
21228 return SDValue(N, 0); // Lower SDIV as SDIV
21229
21230 // Only perform this transform if short forward branch opt is supported.
21231 if (!Subtarget.hasShortForwardBranchOpt())
21232 return SDValue();
21233 EVT VT = N->getValueType(ResNo: 0);
21234 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
21235 return SDValue();
21236
21237 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
21238 if (Divisor.sgt(RHS: 2048) || Divisor.slt(RHS: -2048))
21239 return SDValue();
21240 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
21241}
21242
21243bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
21244 EVT VT, const APInt &AndMask) const {
21245 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
21246 return !Subtarget.hasStdExtZbs() && AndMask.ugt(RHS: 1024);
21247 return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask);
21248}
21249
21250unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
21251 return Subtarget.getMinimumJumpTableEntries();
21252}
21253
21254// Handle single arg such as return value.
21255template <typename Arg>
21256void RVVArgDispatcher::constructArgInfos(ArrayRef<Arg> ArgList) {
21257 // This lambda determines whether an array of types are constructed by
21258 // homogeneous vector types.
21259 auto isHomogeneousScalableVectorType = [](ArrayRef<Arg> ArgList) {
21260 // First, extract the first element in the argument type.
21261 auto It = ArgList.begin();
21262 MVT FirstArgRegType = It->VT;
21263
21264 // Return if there is no return or the type needs split.
21265 if (It == ArgList.end() || It->Flags.isSplit())
21266 return false;
21267
21268 ++It;
21269
21270 // Return if this argument type contains only 1 element, or it's not a
21271 // vector type.
21272 if (It == ArgList.end() || !FirstArgRegType.isScalableVector())
21273 return false;
21274
21275 // Second, check if the following elements in this argument type are all the
21276 // same.
21277 for (; It != ArgList.end(); ++It)
21278 if (It->Flags.isSplit() || It->VT != FirstArgRegType)
21279 return false;
21280
21281 return true;
21282 };
21283
21284 if (isHomogeneousScalableVectorType(ArgList)) {
21285 // Handle as tuple type
21286 RVVArgInfos.push_back(Elt: {(unsigned)ArgList.size(), ArgList[0].VT, false});
21287 } else {
21288 // Handle as normal vector type
21289 bool FirstVMaskAssigned = false;
21290 for (const auto &OutArg : ArgList) {
21291 MVT RegisterVT = OutArg.VT;
21292
21293 // Skip non-RVV register type
21294 if (!RegisterVT.isVector())
21295 continue;
21296
21297 if (RegisterVT.isFixedLengthVector())
21298 RegisterVT = TLI->getContainerForFixedLengthVector(VT: RegisterVT);
21299
21300 if (!FirstVMaskAssigned && RegisterVT.getVectorElementType() == MVT::i1) {
21301 RVVArgInfos.push_back(Elt: {.NF: 1, .VT: RegisterVT, .FirstVMask: true});
21302 FirstVMaskAssigned = true;
21303 continue;
21304 }
21305
21306 RVVArgInfos.push_back(Elt: {.NF: 1, .VT: RegisterVT, .FirstVMask: false});
21307 }
21308 }
21309}
21310
21311// Handle multiple args.
21312template <>
21313void RVVArgDispatcher::constructArgInfos<Type *>(ArrayRef<Type *> TypeList) {
21314 const DataLayout &DL = MF->getDataLayout();
21315 const Function &F = MF->getFunction();
21316 LLVMContext &Context = F.getContext();
21317
21318 bool FirstVMaskAssigned = false;
21319 for (Type *Ty : TypeList) {
21320 StructType *STy = dyn_cast<StructType>(Val: Ty);
21321 if (STy && STy->containsHomogeneousScalableVectorTypes()) {
21322 Type *ElemTy = STy->getTypeAtIndex(N: 0U);
21323 EVT VT = TLI->getValueType(DL, Ty: ElemTy);
21324 MVT RegisterVT =
21325 TLI->getRegisterTypeForCallingConv(Context, CC: F.getCallingConv(), VT);
21326 unsigned NumRegs =
21327 TLI->getNumRegistersForCallingConv(Context, CC: F.getCallingConv(), VT);
21328
21329 RVVArgInfos.push_back(
21330 Elt: {.NF: NumRegs * STy->getNumElements(), .VT: RegisterVT, .FirstVMask: false});
21331 } else {
21332 SmallVector<EVT, 4> ValueVTs;
21333 ComputeValueVTs(TLI: *TLI, DL, Ty, ValueVTs);
21334
21335 for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
21336 ++Value) {
21337 EVT VT = ValueVTs[Value];
21338 MVT RegisterVT =
21339 TLI->getRegisterTypeForCallingConv(Context, CC: F.getCallingConv(), VT);
21340 unsigned NumRegs =
21341 TLI->getNumRegistersForCallingConv(Context, CC: F.getCallingConv(), VT);
21342
21343 // Skip non-RVV register type
21344 if (!RegisterVT.isVector())
21345 continue;
21346
21347 if (RegisterVT.isFixedLengthVector())
21348 RegisterVT = TLI->getContainerForFixedLengthVector(VT: RegisterVT);
21349
21350 if (!FirstVMaskAssigned &&
21351 RegisterVT.getVectorElementType() == MVT::i1) {
21352 RVVArgInfos.push_back(Elt: {.NF: 1, .VT: RegisterVT, .FirstVMask: true});
21353 FirstVMaskAssigned = true;
21354 --NumRegs;
21355 }
21356
21357 RVVArgInfos.insert(I: RVVArgInfos.end(), NumToInsert: NumRegs, Elt: {.NF: 1, .VT: RegisterVT, .FirstVMask: false});
21358 }
21359 }
21360 }
21361}
21362
21363void RVVArgDispatcher::allocatePhysReg(unsigned NF, unsigned LMul,
21364 unsigned StartReg) {
21365 assert((StartReg % LMul) == 0 &&
21366 "Start register number should be multiple of lmul");
21367 const MCPhysReg *VRArrays;
21368 switch (LMul) {
21369 default:
21370 report_fatal_error(reason: "Invalid lmul");
21371 case 1:
21372 VRArrays = ArgVRs;
21373 break;
21374 case 2:
21375 VRArrays = ArgVRM2s;
21376 break;
21377 case 4:
21378 VRArrays = ArgVRM4s;
21379 break;
21380 case 8:
21381 VRArrays = ArgVRM8s;
21382 break;
21383 }
21384
21385 for (unsigned i = 0; i < NF; ++i)
21386 if (StartReg)
21387 AllocatedPhysRegs.push_back(Elt: VRArrays[(StartReg - 8) / LMul + i]);
21388 else
21389 AllocatedPhysRegs.push_back(Elt: MCPhysReg());
21390}
21391
21392/// This function determines if each RVV argument is passed by register, if the
21393/// argument can be assigned to a VR, then give it a specific register.
21394/// Otherwise, assign the argument to 0 which is a invalid MCPhysReg.
21395void RVVArgDispatcher::compute() {
21396 uint32_t AssignedMap = 0;
21397 auto allocate = [&](const RVVArgInfo &ArgInfo) {
21398 // Allocate first vector mask argument to V0.
21399 if (ArgInfo.FirstVMask) {
21400 AllocatedPhysRegs.push_back(RISCV::V0);
21401 return;
21402 }
21403
21404 unsigned RegsNeeded = divideCeil(
21405 Numerator: ArgInfo.VT.getSizeInBits().getKnownMinValue(), Denominator: RISCV::RVVBitsPerBlock);
21406 unsigned TotalRegsNeeded = ArgInfo.NF * RegsNeeded;
21407 for (unsigned StartReg = 0; StartReg + TotalRegsNeeded <= NumArgVRs;
21408 StartReg += RegsNeeded) {
21409 uint32_t Map = ((1 << TotalRegsNeeded) - 1) << StartReg;
21410 if ((AssignedMap & Map) == 0) {
21411 allocatePhysReg(NF: ArgInfo.NF, LMul: RegsNeeded, StartReg: StartReg + 8);
21412 AssignedMap |= Map;
21413 return;
21414 }
21415 }
21416
21417 allocatePhysReg(NF: ArgInfo.NF, LMul: RegsNeeded, StartReg: 0);
21418 };
21419
21420 for (unsigned i = 0; i < RVVArgInfos.size(); ++i)
21421 allocate(RVVArgInfos[i]);
21422}
21423
21424MCPhysReg RVVArgDispatcher::getNextPhysReg() {
21425 assert(CurIdx < AllocatedPhysRegs.size() && "Index out of range");
21426 return AllocatedPhysRegs[CurIdx++];
21427}
21428
21429namespace llvm::RISCVVIntrinsicsTable {
21430
21431#define GET_RISCVVIntrinsicsTable_IMPL
21432#include "RISCVGenSearchableTables.inc"
21433
21434} // namespace llvm::RISCVVIntrinsicsTable
21435

source code of llvm/lib/Target/RISCV/RISCVISelLowering.cpp