1 | //===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the interfaces that RISC-V uses to lower LLVM code into a |
10 | // selection DAG. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "RISCVISelLowering.h" |
15 | #include "MCTargetDesc/RISCVMatInt.h" |
16 | #include "RISCV.h" |
17 | #include "RISCVMachineFunctionInfo.h" |
18 | #include "RISCVRegisterInfo.h" |
19 | #include "RISCVSubtarget.h" |
20 | #include "RISCVTargetMachine.h" |
21 | #include "llvm/ADT/SmallSet.h" |
22 | #include "llvm/ADT/Statistic.h" |
23 | #include "llvm/Analysis/MemoryLocation.h" |
24 | #include "llvm/Analysis/VectorUtils.h" |
25 | #include "llvm/CodeGen/Analysis.h" |
26 | #include "llvm/CodeGen/MachineFrameInfo.h" |
27 | #include "llvm/CodeGen/MachineFunction.h" |
28 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
29 | #include "llvm/CodeGen/MachineJumpTableInfo.h" |
30 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
31 | #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" |
32 | #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" |
33 | #include "llvm/CodeGen/ValueTypes.h" |
34 | #include "llvm/IR/DiagnosticInfo.h" |
35 | #include "llvm/IR/DiagnosticPrinter.h" |
36 | #include "llvm/IR/IRBuilder.h" |
37 | #include "llvm/IR/Instructions.h" |
38 | #include "llvm/IR/IntrinsicsRISCV.h" |
39 | #include "llvm/IR/PatternMatch.h" |
40 | #include "llvm/Support/CommandLine.h" |
41 | #include "llvm/Support/Debug.h" |
42 | #include "llvm/Support/ErrorHandling.h" |
43 | #include "llvm/Support/InstructionCost.h" |
44 | #include "llvm/Support/KnownBits.h" |
45 | #include "llvm/Support/MathExtras.h" |
46 | #include "llvm/Support/raw_ostream.h" |
47 | #include <optional> |
48 | |
49 | using namespace llvm; |
50 | |
51 | #define DEBUG_TYPE "riscv-lower" |
52 | |
53 | STATISTIC(NumTailCalls, "Number of tail calls" ); |
54 | |
55 | static cl::opt<unsigned> ExtensionMaxWebSize( |
56 | DEBUG_TYPE "-ext-max-web-size" , cl::Hidden, |
57 | cl::desc("Give the maximum size (in number of nodes) of the web of " |
58 | "instructions that we will consider for VW expansion" ), |
59 | cl::init(Val: 18)); |
60 | |
61 | static cl::opt<bool> |
62 | AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat" , cl::Hidden, |
63 | cl::desc("Allow the formation of VW_W operations (e.g., " |
64 | "VWADD_W) with splat constants" ), |
65 | cl::init(Val: false)); |
66 | |
67 | static cl::opt<unsigned> NumRepeatedDivisors( |
68 | DEBUG_TYPE "-fp-repeated-divisors" , cl::Hidden, |
69 | cl::desc("Set the minimum number of repetitions of a divisor to allow " |
70 | "transformation to multiplications by the reciprocal" ), |
71 | cl::init(Val: 2)); |
72 | |
73 | static cl::opt<int> |
74 | FPImmCost(DEBUG_TYPE "-fpimm-cost" , cl::Hidden, |
75 | cl::desc("Give the maximum number of instructions that we will " |
76 | "use for creating a floating-point immediate value" ), |
77 | cl::init(Val: 2)); |
78 | |
79 | static cl::opt<bool> |
80 | RV64LegalI32("riscv-experimental-rv64-legal-i32" , cl::ReallyHidden, |
81 | cl::desc("Make i32 a legal type for SelectionDAG on RV64." )); |
82 | |
83 | RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, |
84 | const RISCVSubtarget &STI) |
85 | : TargetLowering(TM), Subtarget(STI) { |
86 | |
87 | RISCVABI::ABI ABI = Subtarget.getTargetABI(); |
88 | assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI" ); |
89 | |
90 | if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && |
91 | !Subtarget.hasStdExtF()) { |
92 | errs() << "Hard-float 'f' ABI can't be used for a target that " |
93 | "doesn't support the F instruction set extension (ignoring " |
94 | "target-abi)\n" ; |
95 | ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; |
96 | } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && |
97 | !Subtarget.hasStdExtD()) { |
98 | errs() << "Hard-float 'd' ABI can't be used for a target that " |
99 | "doesn't support the D instruction set extension (ignoring " |
100 | "target-abi)\n" ; |
101 | ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; |
102 | } |
103 | |
104 | switch (ABI) { |
105 | default: |
106 | report_fatal_error(reason: "Don't know how to lower this ABI" ); |
107 | case RISCVABI::ABI_ILP32: |
108 | case RISCVABI::ABI_ILP32E: |
109 | case RISCVABI::ABI_LP64E: |
110 | case RISCVABI::ABI_ILP32F: |
111 | case RISCVABI::ABI_ILP32D: |
112 | case RISCVABI::ABI_LP64: |
113 | case RISCVABI::ABI_LP64F: |
114 | case RISCVABI::ABI_LP64D: |
115 | break; |
116 | } |
117 | |
118 | MVT XLenVT = Subtarget.getXLenVT(); |
119 | |
120 | // Set up the register classes. |
121 | addRegisterClass(VT: XLenVT, RC: &RISCV::GPRRegClass); |
122 | if (Subtarget.is64Bit() && RV64LegalI32) |
123 | addRegisterClass(MVT::VT: i32, RC: &RISCV::GPRRegClass); |
124 | |
125 | if (Subtarget.hasStdExtZfhmin()) |
126 | addRegisterClass(MVT::VT: f16, RC: &RISCV::FPR16RegClass); |
127 | if (Subtarget.hasStdExtZfbfmin()) |
128 | addRegisterClass(MVT::VT: bf16, RC: &RISCV::FPR16RegClass); |
129 | if (Subtarget.hasStdExtF()) |
130 | addRegisterClass(MVT::VT: f32, RC: &RISCV::FPR32RegClass); |
131 | if (Subtarget.hasStdExtD()) |
132 | addRegisterClass(MVT::VT: f64, RC: &RISCV::FPR64RegClass); |
133 | if (Subtarget.hasStdExtZhinxmin()) |
134 | addRegisterClass(MVT::VT: f16, RC: &RISCV::GPRF16RegClass); |
135 | if (Subtarget.hasStdExtZfinx()) |
136 | addRegisterClass(MVT::VT: f32, RC: &RISCV::GPRF32RegClass); |
137 | if (Subtarget.hasStdExtZdinx()) { |
138 | if (Subtarget.is64Bit()) |
139 | addRegisterClass(MVT::VT: f64, RC: &RISCV::GPRRegClass); |
140 | else |
141 | addRegisterClass(MVT::VT: f64, RC: &RISCV::GPRPairRegClass); |
142 | } |
143 | |
144 | static const MVT::SimpleValueType BoolVecVTs[] = { |
145 | MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, |
146 | MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1}; |
147 | static const MVT::SimpleValueType IntVecVTs[] = { |
148 | MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8, |
149 | MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16, |
150 | MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32, |
151 | MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64, |
152 | MVT::nxv4i64, MVT::nxv8i64}; |
153 | static const MVT::SimpleValueType F16VecVTs[] = { |
154 | MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, |
155 | MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; |
156 | static const MVT::SimpleValueType BF16VecVTs[] = { |
157 | MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16, |
158 | MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16}; |
159 | static const MVT::SimpleValueType F32VecVTs[] = { |
160 | MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; |
161 | static const MVT::SimpleValueType F64VecVTs[] = { |
162 | MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64}; |
163 | |
164 | if (Subtarget.hasVInstructions()) { |
165 | auto addRegClassForRVV = [this](MVT VT) { |
166 | // Disable the smallest fractional LMUL types if ELEN is less than |
167 | // RVVBitsPerBlock. |
168 | unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen(); |
169 | if (VT.getVectorMinNumElements() < MinElts) |
170 | return; |
171 | |
172 | unsigned Size = VT.getSizeInBits().getKnownMinValue(); |
173 | const TargetRegisterClass *RC; |
174 | if (Size <= RISCV::RVVBitsPerBlock) |
175 | RC = &RISCV::VRRegClass; |
176 | else if (Size == 2 * RISCV::RVVBitsPerBlock) |
177 | RC = &RISCV::VRM2RegClass; |
178 | else if (Size == 4 * RISCV::RVVBitsPerBlock) |
179 | RC = &RISCV::VRM4RegClass; |
180 | else if (Size == 8 * RISCV::RVVBitsPerBlock) |
181 | RC = &RISCV::VRM8RegClass; |
182 | else |
183 | llvm_unreachable("Unexpected size" ); |
184 | |
185 | addRegisterClass(VT, RC); |
186 | }; |
187 | |
188 | for (MVT VT : BoolVecVTs) |
189 | addRegClassForRVV(VT); |
190 | for (MVT VT : IntVecVTs) { |
191 | if (VT.getVectorElementType() == MVT::i64 && |
192 | !Subtarget.hasVInstructionsI64()) |
193 | continue; |
194 | addRegClassForRVV(VT); |
195 | } |
196 | |
197 | if (Subtarget.hasVInstructionsF16Minimal()) |
198 | for (MVT VT : F16VecVTs) |
199 | addRegClassForRVV(VT); |
200 | |
201 | if (Subtarget.hasVInstructionsBF16()) |
202 | for (MVT VT : BF16VecVTs) |
203 | addRegClassForRVV(VT); |
204 | |
205 | if (Subtarget.hasVInstructionsF32()) |
206 | for (MVT VT : F32VecVTs) |
207 | addRegClassForRVV(VT); |
208 | |
209 | if (Subtarget.hasVInstructionsF64()) |
210 | for (MVT VT : F64VecVTs) |
211 | addRegClassForRVV(VT); |
212 | |
213 | if (Subtarget.useRVVForFixedLengthVectors()) { |
214 | auto addRegClassForFixedVectors = [this](MVT VT) { |
215 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
216 | unsigned RCID = getRegClassIDForVecVT(VT: ContainerVT); |
217 | const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo(); |
218 | addRegisterClass(VT, RC: TRI.getRegClass(RCID)); |
219 | }; |
220 | for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) |
221 | if (useRVVForFixedLengthVectorVT(VT)) |
222 | addRegClassForFixedVectors(VT); |
223 | |
224 | for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) |
225 | if (useRVVForFixedLengthVectorVT(VT)) |
226 | addRegClassForFixedVectors(VT); |
227 | } |
228 | } |
229 | |
230 | // Compute derived properties from the register classes. |
231 | computeRegisterProperties(STI.getRegisterInfo()); |
232 | |
233 | setStackPointerRegisterToSaveRestore(RISCV::X2); |
234 | |
235 | setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT, |
236 | MVT::i1, Promote); |
237 | // DAGCombiner can call isLoadExtLegal for types that aren't legal. |
238 | setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32, |
239 | MVT::i1, Promote); |
240 | |
241 | // TODO: add all necessary setOperationAction calls. |
242 | setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: XLenVT, Action: Expand); |
243 | |
244 | setOperationAction(ISD::BR_JT, MVT::Other, Expand); |
245 | setOperationAction(Op: ISD::BR_CC, VT: XLenVT, Action: Expand); |
246 | if (RV64LegalI32 && Subtarget.is64Bit()) |
247 | setOperationAction(ISD::BR_CC, MVT::i32, Expand); |
248 | setOperationAction(ISD::BRCOND, MVT::Other, Custom); |
249 | setOperationAction(Op: ISD::SELECT_CC, VT: XLenVT, Action: Expand); |
250 | if (RV64LegalI32 && Subtarget.is64Bit()) |
251 | setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); |
252 | |
253 | setCondCodeAction(CCs: ISD::SETLE, VT: XLenVT, Action: Expand); |
254 | setCondCodeAction(CCs: ISD::SETGT, VT: XLenVT, Action: Custom); |
255 | setCondCodeAction(CCs: ISD::SETGE, VT: XLenVT, Action: Expand); |
256 | setCondCodeAction(CCs: ISD::SETULE, VT: XLenVT, Action: Expand); |
257 | setCondCodeAction(CCs: ISD::SETUGT, VT: XLenVT, Action: Custom); |
258 | setCondCodeAction(CCs: ISD::SETUGE, VT: XLenVT, Action: Expand); |
259 | |
260 | if (RV64LegalI32 && Subtarget.is64Bit()) |
261 | setOperationAction(ISD::SETCC, MVT::i32, Promote); |
262 | |
263 | setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand); |
264 | |
265 | setOperationAction(ISD::VASTART, MVT::Other, Custom); |
266 | setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand); |
267 | if (RV64LegalI32 && Subtarget.is64Bit()) |
268 | setOperationAction(ISD::VAARG, MVT::i32, Promote); |
269 | |
270 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); |
271 | |
272 | setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); |
273 | |
274 | if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb()) |
275 | setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand); |
276 | |
277 | if (Subtarget.is64Bit()) { |
278 | setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); |
279 | |
280 | if (!RV64LegalI32) { |
281 | setOperationAction(ISD::LOAD, MVT::i32, Custom); |
282 | setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL}, |
283 | MVT::i32, Custom); |
284 | setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT}, |
285 | MVT::i32, Custom); |
286 | if (!Subtarget.hasStdExtZbb()) |
287 | setOperationAction({ISD::SADDSAT, ISD::SSUBSAT}, MVT::i32, Custom); |
288 | } else { |
289 | setOperationAction(ISD::SSUBO, MVT::i32, Custom); |
290 | if (Subtarget.hasStdExtZbb()) { |
291 | setOperationAction({ISD::SADDSAT, ISD::SSUBSAT}, MVT::i32, Custom); |
292 | setOperationAction({ISD::UADDSAT, ISD::USUBSAT}, MVT::i32, Custom); |
293 | } |
294 | } |
295 | setOperationAction(ISD::SADDO, MVT::i32, Custom); |
296 | } else { |
297 | setLibcallName( |
298 | Calls: {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128}, |
299 | Name: nullptr); |
300 | setLibcallName(Call: RTLIB::MULO_I64, Name: nullptr); |
301 | } |
302 | |
303 | if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) { |
304 | setOperationAction(Ops: {ISD::MUL, ISD::MULHS, ISD::MULHU}, VT: XLenVT, Action: Expand); |
305 | if (RV64LegalI32 && Subtarget.is64Bit()) |
306 | setOperationAction(ISD::MUL, MVT::i32, Promote); |
307 | } else if (Subtarget.is64Bit()) { |
308 | setOperationAction(ISD::MUL, MVT::i128, Custom); |
309 | if (!RV64LegalI32) |
310 | setOperationAction(ISD::MUL, MVT::i32, Custom); |
311 | else |
312 | setOperationAction(ISD::SMULO, MVT::i32, Custom); |
313 | } else { |
314 | setOperationAction(ISD::MUL, MVT::i64, Custom); |
315 | } |
316 | |
317 | if (!Subtarget.hasStdExtM()) { |
318 | setOperationAction(Ops: {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, |
319 | VT: XLenVT, Action: Expand); |
320 | if (RV64LegalI32 && Subtarget.is64Bit()) |
321 | setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32, |
322 | Promote); |
323 | } else if (Subtarget.is64Bit()) { |
324 | if (!RV64LegalI32) |
325 | setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM}, |
326 | {MVT::i8, MVT::i16, MVT::i32}, Custom); |
327 | } |
328 | |
329 | if (RV64LegalI32 && Subtarget.is64Bit()) { |
330 | setOperationAction({ISD::MULHS, ISD::MULHU}, MVT::i32, Expand); |
331 | setOperationAction( |
332 | {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32, |
333 | Expand); |
334 | } |
335 | |
336 | setOperationAction( |
337 | Ops: {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT: XLenVT, |
338 | Action: Expand); |
339 | |
340 | setOperationAction(Ops: {ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, VT: XLenVT, |
341 | Action: Custom); |
342 | |
343 | if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) { |
344 | if (!RV64LegalI32 && Subtarget.is64Bit()) |
345 | setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom); |
346 | } else if (Subtarget.hasVendorXTHeadBb()) { |
347 | if (Subtarget.is64Bit()) |
348 | setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom); |
349 | setOperationAction(Ops: {ISD::ROTL, ISD::ROTR}, VT: XLenVT, Action: Custom); |
350 | } else if (Subtarget.hasVendorXCVbitmanip()) { |
351 | setOperationAction(Op: ISD::ROTL, VT: XLenVT, Action: Expand); |
352 | } else { |
353 | setOperationAction(Ops: {ISD::ROTL, ISD::ROTR}, VT: XLenVT, Action: Expand); |
354 | if (RV64LegalI32 && Subtarget.is64Bit()) |
355 | setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Expand); |
356 | } |
357 | |
358 | // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll |
359 | // pattern match it directly in isel. |
360 | setOperationAction(ISD::BSWAP, XLenVT, |
361 | (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() || |
362 | Subtarget.hasVendorXTHeadBb()) |
363 | ? Legal |
364 | : Expand); |
365 | if (RV64LegalI32 && Subtarget.is64Bit()) |
366 | setOperationAction(ISD::BSWAP, MVT::i32, |
367 | (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() || |
368 | Subtarget.hasVendorXTHeadBb()) |
369 | ? Promote |
370 | : Expand); |
371 | |
372 | |
373 | if (Subtarget.hasVendorXCVbitmanip()) { |
374 | setOperationAction(Op: ISD::BITREVERSE, VT: XLenVT, Action: Legal); |
375 | } else { |
376 | // Zbkb can use rev8+brev8 to implement bitreverse. |
377 | setOperationAction(ISD::BITREVERSE, XLenVT, |
378 | Subtarget.hasStdExtZbkb() ? Custom : Expand); |
379 | } |
380 | |
381 | if (Subtarget.hasStdExtZbb()) { |
382 | setOperationAction(Ops: {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT: XLenVT, |
383 | Action: Legal); |
384 | if (RV64LegalI32 && Subtarget.is64Bit()) |
385 | setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, MVT::i32, |
386 | Promote); |
387 | |
388 | if (Subtarget.is64Bit()) { |
389 | if (RV64LegalI32) |
390 | setOperationAction(ISD::CTTZ, MVT::i32, Legal); |
391 | else |
392 | setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom); |
393 | } |
394 | } else if (!Subtarget.hasVendorXCVbitmanip()) { |
395 | setOperationAction(Ops: {ISD::CTTZ, ISD::CTPOP}, VT: XLenVT, Action: Expand); |
396 | if (RV64LegalI32 && Subtarget.is64Bit()) |
397 | setOperationAction({ISD::CTTZ, ISD::CTPOP}, MVT::i32, Expand); |
398 | } |
399 | |
400 | if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() || |
401 | Subtarget.hasVendorXCVbitmanip()) { |
402 | // We need the custom lowering to make sure that the resulting sequence |
403 | // for the 32bit case is efficient on 64bit targets. |
404 | if (Subtarget.is64Bit()) { |
405 | if (RV64LegalI32) { |
406 | setOperationAction(ISD::CTLZ, MVT::i32, |
407 | Subtarget.hasStdExtZbb() ? Legal : Promote); |
408 | if (!Subtarget.hasStdExtZbb()) |
409 | setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote); |
410 | } else |
411 | setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom); |
412 | } |
413 | } else { |
414 | setOperationAction(Op: ISD::CTLZ, VT: XLenVT, Action: Expand); |
415 | if (RV64LegalI32 && Subtarget.is64Bit()) |
416 | setOperationAction(ISD::CTLZ, MVT::i32, Expand); |
417 | } |
418 | |
419 | if (!RV64LegalI32 && Subtarget.is64Bit() && |
420 | !Subtarget.hasShortForwardBranchOpt()) |
421 | setOperationAction(ISD::ABS, MVT::i32, Custom); |
422 | |
423 | // We can use PseudoCCSUB to implement ABS. |
424 | if (Subtarget.hasShortForwardBranchOpt()) |
425 | setOperationAction(Op: ISD::ABS, VT: XLenVT, Action: Legal); |
426 | |
427 | if (!Subtarget.hasVendorXTHeadCondMov()) { |
428 | setOperationAction(Op: ISD::SELECT, VT: XLenVT, Action: Custom); |
429 | if (RV64LegalI32 && Subtarget.is64Bit()) |
430 | setOperationAction(ISD::SELECT, MVT::i32, Promote); |
431 | } |
432 | |
433 | static const unsigned FPLegalNodeTypes[] = { |
434 | ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT, |
435 | ISD::LLRINT, ISD::LROUND, ISD::LLROUND, |
436 | ISD::STRICT_LRINT, ISD::STRICT_LLRINT, ISD::STRICT_LROUND, |
437 | ISD::STRICT_LLROUND, ISD::STRICT_FMA, ISD::STRICT_FADD, |
438 | ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, |
439 | ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS}; |
440 | |
441 | static const ISD::CondCode FPCCToExpand[] = { |
442 | ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, |
443 | ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, |
444 | ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; |
445 | |
446 | static const unsigned FPOpToExpand[] = { |
447 | ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, |
448 | ISD::FREM}; |
449 | |
450 | static const unsigned FPRndMode[] = { |
451 | ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND, |
452 | ISD::FROUNDEVEN}; |
453 | |
454 | if (Subtarget.hasStdExtZfhminOrZhinxmin()) |
455 | setOperationAction(ISD::BITCAST, MVT::i16, Custom); |
456 | |
457 | static const unsigned ZfhminZfbfminPromoteOps[] = { |
458 | ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, |
459 | ISD::FSUB, ISD::FMUL, ISD::FMA, |
460 | ISD::FDIV, ISD::FSQRT, ISD::FABS, |
461 | ISD::FNEG, ISD::STRICT_FMA, ISD::STRICT_FADD, |
462 | ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, |
463 | ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, |
464 | ISD::SETCC, ISD::FCEIL, ISD::FFLOOR, |
465 | ISD::FTRUNC, ISD::FRINT, ISD::FROUND, |
466 | ISD::FROUNDEVEN, ISD::SELECT}; |
467 | |
468 | if (Subtarget.hasStdExtZfbfmin()) { |
469 | setOperationAction(ISD::BITCAST, MVT::i16, Custom); |
470 | setOperationAction(ISD::BITCAST, MVT::bf16, Custom); |
471 | setOperationAction(ISD::FP_ROUND, MVT::bf16, Custom); |
472 | setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom); |
473 | setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); |
474 | setOperationAction(ISD::ConstantFP, MVT::bf16, Expand); |
475 | setOperationAction(ISD::SELECT_CC, MVT::bf16, Expand); |
476 | setOperationAction(ISD::BR_CC, MVT::bf16, Expand); |
477 | setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote); |
478 | setOperationAction(ISD::FREM, MVT::bf16, Promote); |
479 | // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the |
480 | // DAGCombiner::visitFP_ROUND probably needs improvements first. |
481 | setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand); |
482 | } |
483 | |
484 | if (Subtarget.hasStdExtZfhminOrZhinxmin()) { |
485 | if (Subtarget.hasStdExtZfhOrZhinx()) { |
486 | setOperationAction(FPLegalNodeTypes, MVT::f16, Legal); |
487 | setOperationAction(FPRndMode, MVT::f16, |
488 | Subtarget.hasStdExtZfa() ? Legal : Custom); |
489 | setOperationAction(ISD::SELECT, MVT::f16, Custom); |
490 | setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom); |
491 | } else { |
492 | setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote); |
493 | setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT, |
494 | ISD::STRICT_LROUND, ISD::STRICT_LLROUND}, |
495 | MVT::f16, Legal); |
496 | // FIXME: Need to promote f16 FCOPYSIGN to f32, but the |
497 | // DAGCombiner::visitFP_ROUND probably needs improvements first. |
498 | setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); |
499 | } |
500 | |
501 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal); |
502 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); |
503 | setCondCodeAction(FPCCToExpand, MVT::f16, Expand); |
504 | setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); |
505 | setOperationAction(ISD::BR_CC, MVT::f16, Expand); |
506 | |
507 | setOperationAction(ISD::FNEARBYINT, MVT::f16, |
508 | Subtarget.hasStdExtZfa() ? Legal : Promote); |
509 | setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI, |
510 | ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP, |
511 | ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2, |
512 | ISD::FLOG10}, |
513 | MVT::f16, Promote); |
514 | |
515 | // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have |
516 | // complete support for all operations in LegalizeDAG. |
517 | setOperationAction({ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, |
518 | ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT, |
519 | ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN, |
520 | ISD::STRICT_FTRUNC}, |
521 | MVT::f16, Promote); |
522 | |
523 | // We need to custom promote this. |
524 | if (Subtarget.is64Bit()) |
525 | setOperationAction(ISD::FPOWI, MVT::i32, Custom); |
526 | |
527 | if (!Subtarget.hasStdExtZfa()) |
528 | setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom); |
529 | } |
530 | |
531 | if (Subtarget.hasStdExtFOrZfinx()) { |
532 | setOperationAction(FPLegalNodeTypes, MVT::f32, Legal); |
533 | setOperationAction(FPRndMode, MVT::f32, |
534 | Subtarget.hasStdExtZfa() ? Legal : Custom); |
535 | setCondCodeAction(FPCCToExpand, MVT::f32, Expand); |
536 | setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); |
537 | setOperationAction(ISD::SELECT, MVT::f32, Custom); |
538 | setOperationAction(ISD::BR_CC, MVT::f32, Expand); |
539 | setOperationAction(FPOpToExpand, MVT::f32, Expand); |
540 | setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); |
541 | setTruncStoreAction(MVT::f32, MVT::f16, Expand); |
542 | setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand); |
543 | setTruncStoreAction(MVT::f32, MVT::bf16, Expand); |
544 | setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom); |
545 | setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom); |
546 | setOperationAction(ISD::FP_TO_BF16, MVT::f32, |
547 | Subtarget.isSoftFPABI() ? LibCall : Custom); |
548 | setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom); |
549 | setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom); |
550 | |
551 | if (Subtarget.hasStdExtZfa()) |
552 | setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); |
553 | else |
554 | setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom); |
555 | } |
556 | |
557 | if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit()) |
558 | setOperationAction(ISD::BITCAST, MVT::i32, Custom); |
559 | |
560 | if (Subtarget.hasStdExtDOrZdinx()) { |
561 | setOperationAction(FPLegalNodeTypes, MVT::f64, Legal); |
562 | |
563 | if (!Subtarget.is64Bit()) |
564 | setOperationAction(ISD::BITCAST, MVT::i64, Custom); |
565 | |
566 | if (Subtarget.hasStdExtZfa()) { |
567 | setOperationAction(FPRndMode, MVT::f64, Legal); |
568 | setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); |
569 | } else { |
570 | if (Subtarget.is64Bit()) |
571 | setOperationAction(FPRndMode, MVT::f64, Custom); |
572 | |
573 | setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom); |
574 | } |
575 | |
576 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); |
577 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); |
578 | setCondCodeAction(FPCCToExpand, MVT::f64, Expand); |
579 | setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); |
580 | setOperationAction(ISD::SELECT, MVT::f64, Custom); |
581 | setOperationAction(ISD::BR_CC, MVT::f64, Expand); |
582 | setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); |
583 | setTruncStoreAction(MVT::f64, MVT::f32, Expand); |
584 | setOperationAction(FPOpToExpand, MVT::f64, Expand); |
585 | setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); |
586 | setTruncStoreAction(MVT::f64, MVT::f16, Expand); |
587 | setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand); |
588 | setTruncStoreAction(MVT::f64, MVT::bf16, Expand); |
589 | setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom); |
590 | setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom); |
591 | setOperationAction(ISD::FP_TO_BF16, MVT::f64, |
592 | Subtarget.isSoftFPABI() ? LibCall : Custom); |
593 | setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom); |
594 | setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); |
595 | } |
596 | |
597 | if (Subtarget.is64Bit()) { |
598 | setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT, |
599 | ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT}, |
600 | MVT::i32, Custom); |
601 | setOperationAction(ISD::LROUND, MVT::i32, Custom); |
602 | } |
603 | |
604 | if (Subtarget.hasStdExtFOrZfinx()) { |
605 | setOperationAction(Ops: {ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, VT: XLenVT, |
606 | Action: Custom); |
607 | |
608 | setOperationAction(Ops: {ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT, |
609 | ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP}, |
610 | VT: XLenVT, Action: Legal); |
611 | |
612 | if (RV64LegalI32 && Subtarget.is64Bit()) |
613 | setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT, |
614 | ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP}, |
615 | MVT::i32, Legal); |
616 | |
617 | setOperationAction(Op: ISD::GET_ROUNDING, VT: XLenVT, Action: Custom); |
618 | setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); |
619 | } |
620 | |
621 | setOperationAction(Ops: {ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, |
622 | ISD::JumpTable}, |
623 | VT: XLenVT, Action: Custom); |
624 | |
625 | setOperationAction(Op: ISD::GlobalTLSAddress, VT: XLenVT, Action: Custom); |
626 | |
627 | if (Subtarget.is64Bit()) |
628 | setOperationAction(ISD::Constant, MVT::i64, Custom); |
629 | |
630 | // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present. |
631 | // Unfortunately this can't be determined just from the ISA naming string. |
632 | setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, |
633 | Subtarget.is64Bit() ? Legal : Custom); |
634 | setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64, |
635 | Subtarget.is64Bit() ? Legal : Custom); |
636 | |
637 | setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal); |
638 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); |
639 | if (Subtarget.is64Bit()) |
640 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); |
641 | |
642 | if (Subtarget.hasStdExtZicbop()) { |
643 | setOperationAction(ISD::PREFETCH, MVT::Other, Legal); |
644 | } |
645 | |
646 | if (Subtarget.hasStdExtA()) { |
647 | setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); |
648 | if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) |
649 | setMinCmpXchgSizeInBits(8); |
650 | else |
651 | setMinCmpXchgSizeInBits(32); |
652 | } else if (Subtarget.hasForcedAtomics()) { |
653 | setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); |
654 | } else { |
655 | setMaxAtomicSizeInBitsSupported(0); |
656 | } |
657 | |
658 | setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); |
659 | |
660 | setBooleanContents(ZeroOrOneBooleanContent); |
661 | |
662 | if (Subtarget.hasVInstructions()) { |
663 | setBooleanVectorContents(ZeroOrOneBooleanContent); |
664 | |
665 | setOperationAction(Op: ISD::VSCALE, VT: XLenVT, Action: Custom); |
666 | if (RV64LegalI32 && Subtarget.is64Bit()) |
667 | setOperationAction(ISD::VSCALE, MVT::i32, Custom); |
668 | |
669 | // RVV intrinsics may have illegal operands. |
670 | // We also need to custom legalize vmv.x.s. |
671 | setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN, |
672 | ISD::INTRINSIC_VOID}, |
673 | {MVT::i8, MVT::i16}, Custom); |
674 | if (Subtarget.is64Bit()) |
675 | setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID}, |
676 | MVT::i32, Custom); |
677 | else |
678 | setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN}, |
679 | MVT::i64, Custom); |
680 | |
681 | setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID}, |
682 | MVT::Other, Custom); |
683 | |
684 | static const unsigned IntegerVPOps[] = { |
685 | ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL, |
686 | ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM, |
687 | ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR, |
688 | ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR, |
689 | ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, |
690 | ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX, |
691 | ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN, |
692 | ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT, |
693 | ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND, |
694 | ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN, |
695 | ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX, |
696 | ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE, |
697 | ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT, |
698 | ISD::VP_USUBSAT}; |
699 | |
700 | static const unsigned FloatingPointVPOps[] = { |
701 | ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, |
702 | ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS, |
703 | ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, |
704 | ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE, |
705 | ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP, |
706 | ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND, |
707 | ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM, |
708 | ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND, |
709 | ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, |
710 | ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS, |
711 | ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT, |
712 | ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE, |
713 | ISD::EXPERIMENTAL_VP_SPLICE}; |
714 | |
715 | static const unsigned IntegerVecReduceOps[] = { |
716 | ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, |
717 | ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN, |
718 | ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN}; |
719 | |
720 | static const unsigned FloatingPointVecReduceOps[] = { |
721 | ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN, |
722 | ISD::VECREDUCE_FMAX, ISD::VECREDUCE_FMINIMUM, ISD::VECREDUCE_FMAXIMUM}; |
723 | |
724 | if (!Subtarget.is64Bit()) { |
725 | // We must custom-lower certain vXi64 operations on RV32 due to the vector |
726 | // element type being illegal. |
727 | setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, |
728 | MVT::i64, Custom); |
729 | |
730 | setOperationAction(IntegerVecReduceOps, MVT::i64, Custom); |
731 | |
732 | setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, |
733 | ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, |
734 | ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN, |
735 | ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN}, |
736 | MVT::i64, Custom); |
737 | } |
738 | |
739 | for (MVT VT : BoolVecVTs) { |
740 | if (!isTypeLegal(VT)) |
741 | continue; |
742 | |
743 | setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); |
744 | |
745 | // Mask VTs are custom-expanded into a series of standard nodes |
746 | setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS, |
747 | ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR, |
748 | ISD::SCALAR_TO_VECTOR}, |
749 | VT, Custom); |
750 | |
751 | setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, |
752 | Custom); |
753 | |
754 | setOperationAction(ISD::SELECT, VT, Custom); |
755 | setOperationAction( |
756 | {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT, |
757 | Expand); |
758 | |
759 | setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom); |
760 | |
761 | setOperationAction( |
762 | {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT, |
763 | Custom); |
764 | |
765 | setOperationAction( |
766 | {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT, |
767 | Custom); |
768 | |
769 | // RVV has native int->float & float->int conversions where the |
770 | // element type sizes are within one power-of-two of each other. Any |
771 | // wider distances between type sizes have to be lowered as sequences |
772 | // which progressively narrow the gap in stages. |
773 | setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, |
774 | ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP, |
775 | ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT, |
776 | ISD::STRICT_FP_TO_UINT}, |
777 | VT, Custom); |
778 | setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, |
779 | Custom); |
780 | |
781 | // Expand all extending loads to types larger than this, and truncating |
782 | // stores from types larger than this. |
783 | for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { |
784 | setTruncStoreAction(VT, OtherVT, Expand); |
785 | setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT, |
786 | OtherVT, Expand); |
787 | } |
788 | |
789 | setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT, |
790 | ISD::VP_TRUNCATE, ISD::VP_SETCC}, |
791 | VT, Custom); |
792 | |
793 | setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); |
794 | setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); |
795 | |
796 | setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); |
797 | |
798 | setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); |
799 | setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); |
800 | |
801 | setOperationPromotedToType( |
802 | ISD::VECTOR_SPLICE, VT, |
803 | MVT::getVectorVT(MVT::i8, VT.getVectorElementCount())); |
804 | } |
805 | |
806 | for (MVT VT : IntVecVTs) { |
807 | if (!isTypeLegal(VT)) |
808 | continue; |
809 | |
810 | setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); |
811 | setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); |
812 | |
813 | // Vectors implement MULHS/MULHU. |
814 | setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand); |
815 | |
816 | // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*. |
817 | if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV()) |
818 | setOperationAction({ISD::MULHU, ISD::MULHS}, VT, Expand); |
819 | |
820 | setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT, |
821 | Legal); |
822 | |
823 | setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom); |
824 | |
825 | // Custom-lower extensions and truncations from/to mask types. |
826 | setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, |
827 | VT, Custom); |
828 | |
829 | // RVV has native int->float & float->int conversions where the |
830 | // element type sizes are within one power-of-two of each other. Any |
831 | // wider distances between type sizes have to be lowered as sequences |
832 | // which progressively narrow the gap in stages. |
833 | setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, |
834 | ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP, |
835 | ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT, |
836 | ISD::STRICT_FP_TO_UINT}, |
837 | VT, Custom); |
838 | setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, |
839 | Custom); |
840 | setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT, |
841 | ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, |
842 | VT, Legal); |
843 | |
844 | // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" |
845 | // nodes which truncate by one power of two at a time. |
846 | setOperationAction(ISD::TRUNCATE, VT, Custom); |
847 | |
848 | // Custom-lower insert/extract operations to simplify patterns. |
849 | setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, |
850 | Custom); |
851 | |
852 | // Custom-lower reduction operations to set up the corresponding custom |
853 | // nodes' operands. |
854 | setOperationAction(IntegerVecReduceOps, VT, Custom); |
855 | |
856 | setOperationAction(IntegerVPOps, VT, Custom); |
857 | |
858 | setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); |
859 | |
860 | setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, |
861 | VT, Custom); |
862 | |
863 | setOperationAction( |
864 | {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, |
865 | ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, |
866 | VT, Custom); |
867 | |
868 | setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, |
869 | ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, |
870 | VT, Custom); |
871 | |
872 | setOperationAction(ISD::SELECT, VT, Custom); |
873 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
874 | |
875 | setOperationAction({ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Custom); |
876 | |
877 | for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { |
878 | setTruncStoreAction(VT, OtherVT, Expand); |
879 | setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT, |
880 | OtherVT, Expand); |
881 | } |
882 | |
883 | setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); |
884 | setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); |
885 | |
886 | // Splice |
887 | setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); |
888 | |
889 | if (Subtarget.hasStdExtZvkb()) { |
890 | setOperationAction(ISD::BSWAP, VT, Legal); |
891 | setOperationAction(ISD::VP_BSWAP, VT, Custom); |
892 | } else { |
893 | setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand); |
894 | setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand); |
895 | } |
896 | |
897 | if (Subtarget.hasStdExtZvbb()) { |
898 | setOperationAction(ISD::BITREVERSE, VT, Legal); |
899 | setOperationAction(ISD::VP_BITREVERSE, VT, Custom); |
900 | setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ, |
901 | ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP}, |
902 | VT, Custom); |
903 | } else { |
904 | setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand); |
905 | setOperationAction({ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Expand); |
906 | setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ, |
907 | ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP}, |
908 | VT, Expand); |
909 | |
910 | // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the |
911 | // range of f32. |
912 | EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); |
913 | if (isTypeLegal(FloatVT)) { |
914 | setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, |
915 | ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ, |
916 | ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF}, |
917 | VT, Custom); |
918 | } |
919 | } |
920 | } |
921 | |
922 | // Expand various CCs to best match the RVV ISA, which natively supports UNE |
923 | // but no other unordered comparisons, and supports all ordered comparisons |
924 | // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization |
925 | // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), |
926 | // and we pattern-match those back to the "original", swapping operands once |
927 | // more. This way we catch both operations and both "vf" and "fv" forms with |
928 | // fewer patterns. |
929 | static const ISD::CondCode VFPCCToExpand[] = { |
930 | ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, |
931 | ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, |
932 | ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, |
933 | }; |
934 | |
935 | // TODO: support more ops. |
936 | static const unsigned ZvfhminPromoteOps[] = { |
937 | ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB, |
938 | ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT, |
939 | ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL, |
940 | ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT, |
941 | ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, ISD::FMAXIMUM, |
942 | ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, |
943 | ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA}; |
944 | |
945 | // TODO: support more vp ops. |
946 | static const unsigned ZvfhminPromoteVPOps[] = { |
947 | ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, |
948 | ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS, |
949 | ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, |
950 | ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT, |
951 | ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL, |
952 | ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN, |
953 | ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT, |
954 | ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM, |
955 | ISD::VP_FMAXIMUM}; |
956 | |
957 | // Sets common operation actions on RVV floating-point vector types. |
958 | const auto SetCommonVFPActions = [&](MVT VT) { |
959 | setOperationAction(Op: ISD::SPLAT_VECTOR, VT, Action: Legal); |
960 | // RVV has native FP_ROUND & FP_EXTEND conversions where the element type |
961 | // sizes are within one power-of-two of each other. Therefore conversions |
962 | // between vXf16 and vXf64 must be lowered as sequences which convert via |
963 | // vXf32. |
964 | setOperationAction(Ops: {ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Action: Custom); |
965 | setOperationAction(Ops: {ISD::LRINT, ISD::LLRINT}, VT, Action: Custom); |
966 | // Custom-lower insert/extract operations to simplify patterns. |
967 | setOperationAction(Ops: {ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, |
968 | Action: Custom); |
969 | // Expand various condition codes (explained above). |
970 | setCondCodeAction(CCs: VFPCCToExpand, VT, Action: Expand); |
971 | |
972 | setOperationAction(Ops: {ISD::FMINNUM, ISD::FMAXNUM}, VT, Action: Legal); |
973 | setOperationAction(Ops: {ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Action: Custom); |
974 | |
975 | setOperationAction(Ops: {ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, |
976 | ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT, |
977 | ISD::IS_FPCLASS}, |
978 | VT, Action: Custom); |
979 | |
980 | setOperationAction(Ops: FloatingPointVecReduceOps, VT, Action: Custom); |
981 | |
982 | // Expand FP operations that need libcalls. |
983 | setOperationAction(Op: ISD::FREM, VT, Action: Expand); |
984 | setOperationAction(Op: ISD::FPOW, VT, Action: Expand); |
985 | setOperationAction(Op: ISD::FCOS, VT, Action: Expand); |
986 | setOperationAction(Op: ISD::FSIN, VT, Action: Expand); |
987 | setOperationAction(Op: ISD::FSINCOS, VT, Action: Expand); |
988 | setOperationAction(Op: ISD::FEXP, VT, Action: Expand); |
989 | setOperationAction(Op: ISD::FEXP2, VT, Action: Expand); |
990 | setOperationAction(Op: ISD::FEXP10, VT, Action: Expand); |
991 | setOperationAction(Op: ISD::FLOG, VT, Action: Expand); |
992 | setOperationAction(Op: ISD::FLOG2, VT, Action: Expand); |
993 | setOperationAction(Op: ISD::FLOG10, VT, Action: Expand); |
994 | |
995 | setOperationAction(Op: ISD::FCOPYSIGN, VT, Action: Legal); |
996 | |
997 | setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Custom); |
998 | |
999 | setOperationAction(Ops: {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, |
1000 | VT, Action: Custom); |
1001 | |
1002 | setOperationAction( |
1003 | Ops: {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, |
1004 | ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, |
1005 | VT, Action: Custom); |
1006 | |
1007 | setOperationAction(Op: ISD::SELECT, VT, Action: Custom); |
1008 | setOperationAction(Op: ISD::SELECT_CC, VT, Action: Expand); |
1009 | |
1010 | setOperationAction(Ops: {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, |
1011 | ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, |
1012 | VT, Action: Custom); |
1013 | |
1014 | setOperationAction(Op: ISD::VECTOR_DEINTERLEAVE, VT, Action: Custom); |
1015 | setOperationAction(Op: ISD::VECTOR_INTERLEAVE, VT, Action: Custom); |
1016 | |
1017 | setOperationAction(Ops: {ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Action: Custom); |
1018 | |
1019 | setOperationAction(Ops: FloatingPointVPOps, VT, Action: Custom); |
1020 | |
1021 | setOperationAction(Ops: {ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT, |
1022 | Action: Custom); |
1023 | setOperationAction(Ops: {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, |
1024 | ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA}, |
1025 | VT, Action: Legal); |
1026 | setOperationAction(Ops: {ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, |
1027 | ISD::STRICT_FTRUNC, ISD::STRICT_FCEIL, |
1028 | ISD::STRICT_FFLOOR, ISD::STRICT_FROUND, |
1029 | ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT}, |
1030 | VT, Action: Custom); |
1031 | }; |
1032 | |
1033 | // Sets common extload/truncstore actions on RVV floating-point vector |
1034 | // types. |
1035 | const auto SetCommonVFPExtLoadTruncStoreActions = |
1036 | [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) { |
1037 | for (auto SmallVT : SmallerVTs) { |
1038 | setTruncStoreAction(ValVT: VT, MemVT: SmallVT, Action: Expand); |
1039 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: SmallVT, Action: Expand); |
1040 | } |
1041 | }; |
1042 | |
1043 | if (Subtarget.hasVInstructionsF16()) { |
1044 | for (MVT VT : F16VecVTs) { |
1045 | if (!isTypeLegal(VT)) |
1046 | continue; |
1047 | SetCommonVFPActions(VT); |
1048 | } |
1049 | } else if (Subtarget.hasVInstructionsF16Minimal()) { |
1050 | for (MVT VT : F16VecVTs) { |
1051 | if (!isTypeLegal(VT)) |
1052 | continue; |
1053 | setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); |
1054 | setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, |
1055 | Custom); |
1056 | setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); |
1057 | setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT, |
1058 | Custom); |
1059 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
1060 | setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, |
1061 | ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, |
1062 | VT, Custom); |
1063 | setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, |
1064 | ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, |
1065 | VT, Custom); |
1066 | if (Subtarget.hasStdExtZfhminOrZhinxmin()) |
1067 | setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); |
1068 | // load/store |
1069 | setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); |
1070 | |
1071 | // Custom split nxv32f16 since nxv32f32 if not legal. |
1072 | if (VT == MVT::nxv32f16) { |
1073 | setOperationAction(ZvfhminPromoteOps, VT, Custom); |
1074 | setOperationAction(ZvfhminPromoteVPOps, VT, Custom); |
1075 | continue; |
1076 | } |
1077 | // Add more promote ops. |
1078 | MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); |
1079 | setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT); |
1080 | setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT); |
1081 | } |
1082 | } |
1083 | |
1084 | if (Subtarget.hasVInstructionsF32()) { |
1085 | for (MVT VT : F32VecVTs) { |
1086 | if (!isTypeLegal(VT)) |
1087 | continue; |
1088 | SetCommonVFPActions(VT); |
1089 | SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); |
1090 | } |
1091 | } |
1092 | |
1093 | if (Subtarget.hasVInstructionsF64()) { |
1094 | for (MVT VT : F64VecVTs) { |
1095 | if (!isTypeLegal(VT)) |
1096 | continue; |
1097 | SetCommonVFPActions(VT); |
1098 | SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); |
1099 | SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs); |
1100 | } |
1101 | } |
1102 | |
1103 | if (Subtarget.useRVVForFixedLengthVectors()) { |
1104 | for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { |
1105 | if (!useRVVForFixedLengthVectorVT(VT)) |
1106 | continue; |
1107 | |
1108 | // By default everything must be expanded. |
1109 | for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) |
1110 | setOperationAction(Op, VT, Expand); |
1111 | for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) { |
1112 | setTruncStoreAction(VT, OtherVT, Expand); |
1113 | setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT, |
1114 | OtherVT, Expand); |
1115 | } |
1116 | |
1117 | // Custom lower fixed vector undefs to scalable vector undefs to avoid |
1118 | // expansion to a build_vector of 0s. |
1119 | setOperationAction(ISD::UNDEF, VT, Custom); |
1120 | |
1121 | // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. |
1122 | setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT, |
1123 | Custom); |
1124 | |
1125 | setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}, VT, |
1126 | Custom); |
1127 | |
1128 | setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, |
1129 | VT, Custom); |
1130 | |
1131 | setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); |
1132 | |
1133 | setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); |
1134 | |
1135 | setOperationAction(ISD::SETCC, VT, Custom); |
1136 | |
1137 | setOperationAction(ISD::SELECT, VT, Custom); |
1138 | |
1139 | setOperationAction(ISD::TRUNCATE, VT, Custom); |
1140 | |
1141 | setOperationAction(ISD::BITCAST, VT, Custom); |
1142 | |
1143 | setOperationAction( |
1144 | {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT, |
1145 | Custom); |
1146 | |
1147 | setOperationAction( |
1148 | {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT, |
1149 | Custom); |
1150 | |
1151 | setOperationAction( |
1152 | { |
1153 | ISD::SINT_TO_FP, |
1154 | ISD::UINT_TO_FP, |
1155 | ISD::FP_TO_SINT, |
1156 | ISD::FP_TO_UINT, |
1157 | ISD::STRICT_SINT_TO_FP, |
1158 | ISD::STRICT_UINT_TO_FP, |
1159 | ISD::STRICT_FP_TO_SINT, |
1160 | ISD::STRICT_FP_TO_UINT, |
1161 | }, |
1162 | VT, Custom); |
1163 | setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, |
1164 | Custom); |
1165 | |
1166 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
1167 | |
1168 | // Operations below are different for between masks and other vectors. |
1169 | if (VT.getVectorElementType() == MVT::i1) { |
1170 | setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND, |
1171 | ISD::OR, ISD::XOR}, |
1172 | VT, Custom); |
1173 | |
1174 | setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT, |
1175 | ISD::VP_SETCC, ISD::VP_TRUNCATE}, |
1176 | VT, Custom); |
1177 | |
1178 | setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); |
1179 | setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); |
1180 | continue; |
1181 | } |
1182 | |
1183 | // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to |
1184 | // it before type legalization for i64 vectors on RV32. It will then be |
1185 | // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle. |
1186 | // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs |
1187 | // improvements first. |
1188 | if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) { |
1189 | setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); |
1190 | setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); |
1191 | } |
1192 | |
1193 | setOperationAction( |
1194 | {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom); |
1195 | |
1196 | setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, |
1197 | ISD::EXPERIMENTAL_VP_STRIDED_LOAD, |
1198 | ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, |
1199 | ISD::VP_SCATTER}, |
1200 | VT, Custom); |
1201 | |
1202 | setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR, |
1203 | ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV, |
1204 | ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL}, |
1205 | VT, Custom); |
1206 | |
1207 | setOperationAction( |
1208 | {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom); |
1209 | |
1210 | setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom); |
1211 | |
1212 | // vXi64 MULHS/MULHU requires the V extension instead of Zve64*. |
1213 | if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) |
1214 | setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom); |
1215 | |
1216 | setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT, |
1217 | ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, |
1218 | VT, Custom); |
1219 | |
1220 | setOperationAction(ISD::VSELECT, VT, Custom); |
1221 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
1222 | |
1223 | setOperationAction( |
1224 | {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom); |
1225 | |
1226 | // Custom-lower reduction operations to set up the corresponding custom |
1227 | // nodes' operands. |
1228 | setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX, |
1229 | ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX, |
1230 | ISD::VECREDUCE_UMIN}, |
1231 | VT, Custom); |
1232 | |
1233 | setOperationAction(IntegerVPOps, VT, Custom); |
1234 | |
1235 | if (Subtarget.hasStdExtZvkb()) |
1236 | setOperationAction({ISD::BSWAP, ISD::ROTL, ISD::ROTR}, VT, Custom); |
1237 | |
1238 | if (Subtarget.hasStdExtZvbb()) { |
1239 | setOperationAction({ISD::BITREVERSE, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, |
1240 | ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTPOP}, |
1241 | VT, Custom); |
1242 | } else { |
1243 | // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the |
1244 | // range of f32. |
1245 | EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); |
1246 | if (isTypeLegal(FloatVT)) |
1247 | setOperationAction( |
1248 | {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, |
1249 | Custom); |
1250 | } |
1251 | } |
1252 | |
1253 | for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { |
1254 | // There are no extending loads or truncating stores. |
1255 | for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) { |
1256 | setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); |
1257 | setTruncStoreAction(VT, InnerVT, Expand); |
1258 | } |
1259 | |
1260 | if (!useRVVForFixedLengthVectorVT(VT)) |
1261 | continue; |
1262 | |
1263 | // By default everything must be expanded. |
1264 | for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) |
1265 | setOperationAction(Op, VT, Expand); |
1266 | |
1267 | // Custom lower fixed vector undefs to scalable vector undefs to avoid |
1268 | // expansion to a build_vector of 0s. |
1269 | setOperationAction(ISD::UNDEF, VT, Custom); |
1270 | |
1271 | if (VT.getVectorElementType() == MVT::f16 && |
1272 | !Subtarget.hasVInstructionsF16()) { |
1273 | setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); |
1274 | setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, |
1275 | Custom); |
1276 | setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); |
1277 | setOperationAction( |
1278 | {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT, |
1279 | Custom); |
1280 | setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, |
1281 | ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, |
1282 | VT, Custom); |
1283 | setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, |
1284 | ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, |
1285 | VT, Custom); |
1286 | setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); |
1287 | setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); |
1288 | MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); |
1289 | // Don't promote f16 vector operations to f32 if f32 vector type is |
1290 | // not legal. |
1291 | // TODO: could split the f16 vector into two vectors and do promotion. |
1292 | if (!isTypeLegal(F32VecVT)) |
1293 | continue; |
1294 | setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT); |
1295 | setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT); |
1296 | continue; |
1297 | } |
1298 | |
1299 | // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. |
1300 | setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT, |
1301 | Custom); |
1302 | |
1303 | setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS, |
1304 | ISD::VECTOR_SHUFFLE, ISD::INSERT_VECTOR_ELT, |
1305 | ISD::EXTRACT_VECTOR_ELT}, |
1306 | VT, Custom); |
1307 | |
1308 | setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE, |
1309 | ISD::MGATHER, ISD::MSCATTER}, |
1310 | VT, Custom); |
1311 | |
1312 | setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, |
1313 | ISD::EXPERIMENTAL_VP_STRIDED_LOAD, |
1314 | ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, |
1315 | ISD::VP_SCATTER}, |
1316 | VT, Custom); |
1317 | |
1318 | setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV, |
1319 | ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT, |
1320 | ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM, |
1321 | ISD::IS_FPCLASS, ISD::FMAXIMUM, ISD::FMINIMUM}, |
1322 | VT, Custom); |
1323 | |
1324 | setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); |
1325 | |
1326 | setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, |
1327 | ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT}, |
1328 | VT, Custom); |
1329 | |
1330 | setCondCodeAction(VFPCCToExpand, VT, Expand); |
1331 | |
1332 | setOperationAction(ISD::SETCC, VT, Custom); |
1333 | setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom); |
1334 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
1335 | |
1336 | setOperationAction(ISD::BITCAST, VT, Custom); |
1337 | |
1338 | setOperationAction(FloatingPointVecReduceOps, VT, Custom); |
1339 | |
1340 | setOperationAction(FloatingPointVPOps, VT, Custom); |
1341 | |
1342 | setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT, |
1343 | Custom); |
1344 | setOperationAction( |
1345 | {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, |
1346 | ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA, |
1347 | ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::STRICT_FTRUNC, |
1348 | ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND, |
1349 | ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT}, |
1350 | VT, Custom); |
1351 | } |
1352 | |
1353 | // Custom-legalize bitcasts from fixed-length vectors to scalar types. |
1354 | setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64}, |
1355 | Custom); |
1356 | if (Subtarget.hasStdExtZfhminOrZhinxmin()) |
1357 | setOperationAction(ISD::BITCAST, MVT::f16, Custom); |
1358 | if (Subtarget.hasStdExtFOrZfinx()) |
1359 | setOperationAction(ISD::BITCAST, MVT::f32, Custom); |
1360 | if (Subtarget.hasStdExtDOrZdinx()) |
1361 | setOperationAction(ISD::BITCAST, MVT::f64, Custom); |
1362 | } |
1363 | } |
1364 | |
1365 | if (Subtarget.hasStdExtA()) { |
1366 | setOperationAction(Op: ISD::ATOMIC_LOAD_SUB, VT: XLenVT, Action: Expand); |
1367 | if (RV64LegalI32 && Subtarget.is64Bit()) |
1368 | setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); |
1369 | } |
1370 | |
1371 | if (Subtarget.hasForcedAtomics()) { |
1372 | // Force __sync libcalls to be emitted for atomic rmw/cas operations. |
1373 | setOperationAction( |
1374 | Ops: {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD, |
1375 | ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR, |
1376 | ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN, |
1377 | ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX}, |
1378 | VT: XLenVT, Action: LibCall); |
1379 | } |
1380 | |
1381 | if (Subtarget.hasVendorXTHeadMemIdx()) { |
1382 | for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) { |
1383 | setIndexedLoadAction(im, MVT::i8, Legal); |
1384 | setIndexedStoreAction(im, MVT::i8, Legal); |
1385 | setIndexedLoadAction(im, MVT::i16, Legal); |
1386 | setIndexedStoreAction(im, MVT::i16, Legal); |
1387 | setIndexedLoadAction(im, MVT::i32, Legal); |
1388 | setIndexedStoreAction(im, MVT::i32, Legal); |
1389 | |
1390 | if (Subtarget.is64Bit()) { |
1391 | setIndexedLoadAction(im, MVT::i64, Legal); |
1392 | setIndexedStoreAction(im, MVT::i64, Legal); |
1393 | } |
1394 | } |
1395 | } |
1396 | |
1397 | // Function alignments. |
1398 | const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4); |
1399 | setMinFunctionAlignment(FunctionAlignment); |
1400 | // Set preferred alignments. |
1401 | setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment()); |
1402 | setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); |
1403 | |
1404 | setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN, |
1405 | ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::MUL, |
1406 | ISD::AND, ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT}); |
1407 | if (Subtarget.is64Bit()) |
1408 | setTargetDAGCombine(ISD::SRA); |
1409 | |
1410 | if (Subtarget.hasStdExtFOrZfinx()) |
1411 | setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM}); |
1412 | |
1413 | if (Subtarget.hasStdExtZbb()) |
1414 | setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}); |
1415 | |
1416 | if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) |
1417 | setTargetDAGCombine(ISD::TRUNCATE); |
1418 | |
1419 | if (Subtarget.hasStdExtZbkb()) |
1420 | setTargetDAGCombine(ISD::BITREVERSE); |
1421 | if (Subtarget.hasStdExtZfhminOrZhinxmin()) |
1422 | setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); |
1423 | if (Subtarget.hasStdExtFOrZfinx()) |
1424 | setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT, |
1425 | ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}); |
1426 | if (Subtarget.hasVInstructions()) |
1427 | setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER, |
1428 | ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL, |
1429 | ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR, |
1430 | ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS, |
1431 | ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL, |
1432 | ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, |
1433 | ISD::INSERT_VECTOR_ELT, ISD::ABS}); |
1434 | if (Subtarget.hasVendorXTHeadMemPair()) |
1435 | setTargetDAGCombine({ISD::LOAD, ISD::STORE}); |
1436 | if (Subtarget.useRVVForFixedLengthVectors()) |
1437 | setTargetDAGCombine(ISD::BITCAST); |
1438 | |
1439 | setLibcallName(Call: RTLIB::FPEXT_F16_F32, Name: "__extendhfsf2" ); |
1440 | setLibcallName(Call: RTLIB::FPROUND_F32_F16, Name: "__truncsfhf2" ); |
1441 | |
1442 | // Disable strict node mutation. |
1443 | IsStrictFPEnabled = true; |
1444 | } |
1445 | |
1446 | EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, |
1447 | LLVMContext &Context, |
1448 | EVT VT) const { |
1449 | if (!VT.isVector()) |
1450 | return getPointerTy(DL); |
1451 | if (Subtarget.hasVInstructions() && |
1452 | (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors())) |
1453 | return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); |
1454 | return VT.changeVectorElementTypeToInteger(); |
1455 | } |
1456 | |
1457 | MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const { |
1458 | return Subtarget.getXLenVT(); |
1459 | } |
1460 | |
1461 | // Return false if we can lower get_vector_length to a vsetvli intrinsic. |
1462 | bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT, |
1463 | unsigned VF, |
1464 | bool IsScalable) const { |
1465 | if (!Subtarget.hasVInstructions()) |
1466 | return true; |
1467 | |
1468 | if (!IsScalable) |
1469 | return true; |
1470 | |
1471 | if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT()) |
1472 | return true; |
1473 | |
1474 | // Don't allow VF=1 if those types are't legal. |
1475 | if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen()) |
1476 | return true; |
1477 | |
1478 | // VLEN=32 support is incomplete. |
1479 | if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock) |
1480 | return true; |
1481 | |
1482 | // The maximum VF is for the smallest element width with LMUL=8. |
1483 | // VF must be a power of 2. |
1484 | unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8; |
1485 | return VF > MaxVF || !isPowerOf2_32(Value: VF); |
1486 | } |
1487 | |
1488 | bool RISCVTargetLowering::shouldExpandCttzElements(EVT VT) const { |
1489 | return !Subtarget.hasVInstructions() || |
1490 | VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT); |
1491 | } |
1492 | |
1493 | bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, |
1494 | const CallInst &I, |
1495 | MachineFunction &MF, |
1496 | unsigned Intrinsic) const { |
1497 | auto &DL = I.getModule()->getDataLayout(); |
1498 | |
1499 | auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore, |
1500 | bool IsUnitStrided, bool UsePtrVal = false) { |
1501 | Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN; |
1502 | // We can't use ptrVal if the intrinsic can access memory before the |
1503 | // pointer. This means we can't use it for strided or indexed intrinsics. |
1504 | if (UsePtrVal) |
1505 | Info.ptrVal = I.getArgOperand(i: PtrOp); |
1506 | else |
1507 | Info.fallbackAddressSpace = |
1508 | I.getArgOperand(i: PtrOp)->getType()->getPointerAddressSpace(); |
1509 | Type *MemTy; |
1510 | if (IsStore) { |
1511 | // Store value is the first operand. |
1512 | MemTy = I.getArgOperand(i: 0)->getType(); |
1513 | } else { |
1514 | // Use return type. If it's segment load, return type is a struct. |
1515 | MemTy = I.getType(); |
1516 | if (MemTy->isStructTy()) |
1517 | MemTy = MemTy->getStructElementType(N: 0); |
1518 | } |
1519 | if (!IsUnitStrided) |
1520 | MemTy = MemTy->getScalarType(); |
1521 | |
1522 | Info.memVT = getValueType(DL, Ty: MemTy); |
1523 | Info.align = Align(DL.getTypeSizeInBits(Ty: MemTy->getScalarType()) / 8); |
1524 | Info.size = MemoryLocation::UnknownSize; |
1525 | Info.flags |= |
1526 | IsStore ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad; |
1527 | return true; |
1528 | }; |
1529 | |
1530 | if (I.hasMetadata(KindID: LLVMContext::MD_nontemporal)) |
1531 | Info.flags |= MachineMemOperand::MONonTemporal; |
1532 | |
1533 | Info.flags |= RISCVTargetLowering::getTargetMMOFlags(I); |
1534 | switch (Intrinsic) { |
1535 | default: |
1536 | return false; |
1537 | case Intrinsic::riscv_masked_atomicrmw_xchg_i32: |
1538 | case Intrinsic::riscv_masked_atomicrmw_add_i32: |
1539 | case Intrinsic::riscv_masked_atomicrmw_sub_i32: |
1540 | case Intrinsic::riscv_masked_atomicrmw_nand_i32: |
1541 | case Intrinsic::riscv_masked_atomicrmw_max_i32: |
1542 | case Intrinsic::riscv_masked_atomicrmw_min_i32: |
1543 | case Intrinsic::riscv_masked_atomicrmw_umax_i32: |
1544 | case Intrinsic::riscv_masked_atomicrmw_umin_i32: |
1545 | case Intrinsic::riscv_masked_cmpxchg_i32: |
1546 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
1547 | Info.memVT = MVT::i32; |
1548 | Info.ptrVal = I.getArgOperand(i: 0); |
1549 | Info.offset = 0; |
1550 | Info.align = Align(4); |
1551 | Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | |
1552 | MachineMemOperand::MOVolatile; |
1553 | return true; |
1554 | case Intrinsic::riscv_masked_strided_load: |
1555 | return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false, |
1556 | /*IsUnitStrided*/ false); |
1557 | case Intrinsic::riscv_masked_strided_store: |
1558 | return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true, |
1559 | /*IsUnitStrided*/ false); |
1560 | case Intrinsic::riscv_seg2_load: |
1561 | case Intrinsic::riscv_seg3_load: |
1562 | case Intrinsic::riscv_seg4_load: |
1563 | case Intrinsic::riscv_seg5_load: |
1564 | case Intrinsic::riscv_seg6_load: |
1565 | case Intrinsic::riscv_seg7_load: |
1566 | case Intrinsic::riscv_seg8_load: |
1567 | return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false, |
1568 | /*IsUnitStrided*/ false, /*UsePtrVal*/ true); |
1569 | case Intrinsic::riscv_seg2_store: |
1570 | case Intrinsic::riscv_seg3_store: |
1571 | case Intrinsic::riscv_seg4_store: |
1572 | case Intrinsic::riscv_seg5_store: |
1573 | case Intrinsic::riscv_seg6_store: |
1574 | case Intrinsic::riscv_seg7_store: |
1575 | case Intrinsic::riscv_seg8_store: |
1576 | // Operands are (vec, ..., vec, ptr, vl) |
1577 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2, |
1578 | /*IsStore*/ true, |
1579 | /*IsUnitStrided*/ false, /*UsePtrVal*/ true); |
1580 | case Intrinsic::riscv_vle: |
1581 | case Intrinsic::riscv_vle_mask: |
1582 | case Intrinsic::riscv_vleff: |
1583 | case Intrinsic::riscv_vleff_mask: |
1584 | return SetRVVLoadStoreInfo(/*PtrOp*/ 1, |
1585 | /*IsStore*/ false, |
1586 | /*IsUnitStrided*/ true, |
1587 | /*UsePtrVal*/ true); |
1588 | case Intrinsic::riscv_vse: |
1589 | case Intrinsic::riscv_vse_mask: |
1590 | return SetRVVLoadStoreInfo(/*PtrOp*/ 1, |
1591 | /*IsStore*/ true, |
1592 | /*IsUnitStrided*/ true, |
1593 | /*UsePtrVal*/ true); |
1594 | case Intrinsic::riscv_vlse: |
1595 | case Intrinsic::riscv_vlse_mask: |
1596 | case Intrinsic::riscv_vloxei: |
1597 | case Intrinsic::riscv_vloxei_mask: |
1598 | case Intrinsic::riscv_vluxei: |
1599 | case Intrinsic::riscv_vluxei_mask: |
1600 | return SetRVVLoadStoreInfo(/*PtrOp*/ 1, |
1601 | /*IsStore*/ false, |
1602 | /*IsUnitStrided*/ false); |
1603 | case Intrinsic::riscv_vsse: |
1604 | case Intrinsic::riscv_vsse_mask: |
1605 | case Intrinsic::riscv_vsoxei: |
1606 | case Intrinsic::riscv_vsoxei_mask: |
1607 | case Intrinsic::riscv_vsuxei: |
1608 | case Intrinsic::riscv_vsuxei_mask: |
1609 | return SetRVVLoadStoreInfo(/*PtrOp*/ 1, |
1610 | /*IsStore*/ true, |
1611 | /*IsUnitStrided*/ false); |
1612 | case Intrinsic::riscv_vlseg2: |
1613 | case Intrinsic::riscv_vlseg3: |
1614 | case Intrinsic::riscv_vlseg4: |
1615 | case Intrinsic::riscv_vlseg5: |
1616 | case Intrinsic::riscv_vlseg6: |
1617 | case Intrinsic::riscv_vlseg7: |
1618 | case Intrinsic::riscv_vlseg8: |
1619 | case Intrinsic::riscv_vlseg2ff: |
1620 | case Intrinsic::riscv_vlseg3ff: |
1621 | case Intrinsic::riscv_vlseg4ff: |
1622 | case Intrinsic::riscv_vlseg5ff: |
1623 | case Intrinsic::riscv_vlseg6ff: |
1624 | case Intrinsic::riscv_vlseg7ff: |
1625 | case Intrinsic::riscv_vlseg8ff: |
1626 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2, |
1627 | /*IsStore*/ false, |
1628 | /*IsUnitStrided*/ false, /*UsePtrVal*/ true); |
1629 | case Intrinsic::riscv_vlseg2_mask: |
1630 | case Intrinsic::riscv_vlseg3_mask: |
1631 | case Intrinsic::riscv_vlseg4_mask: |
1632 | case Intrinsic::riscv_vlseg5_mask: |
1633 | case Intrinsic::riscv_vlseg6_mask: |
1634 | case Intrinsic::riscv_vlseg7_mask: |
1635 | case Intrinsic::riscv_vlseg8_mask: |
1636 | case Intrinsic::riscv_vlseg2ff_mask: |
1637 | case Intrinsic::riscv_vlseg3ff_mask: |
1638 | case Intrinsic::riscv_vlseg4ff_mask: |
1639 | case Intrinsic::riscv_vlseg5ff_mask: |
1640 | case Intrinsic::riscv_vlseg6ff_mask: |
1641 | case Intrinsic::riscv_vlseg7ff_mask: |
1642 | case Intrinsic::riscv_vlseg8ff_mask: |
1643 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4, |
1644 | /*IsStore*/ false, |
1645 | /*IsUnitStrided*/ false, /*UsePtrVal*/ true); |
1646 | case Intrinsic::riscv_vlsseg2: |
1647 | case Intrinsic::riscv_vlsseg3: |
1648 | case Intrinsic::riscv_vlsseg4: |
1649 | case Intrinsic::riscv_vlsseg5: |
1650 | case Intrinsic::riscv_vlsseg6: |
1651 | case Intrinsic::riscv_vlsseg7: |
1652 | case Intrinsic::riscv_vlsseg8: |
1653 | case Intrinsic::riscv_vloxseg2: |
1654 | case Intrinsic::riscv_vloxseg3: |
1655 | case Intrinsic::riscv_vloxseg4: |
1656 | case Intrinsic::riscv_vloxseg5: |
1657 | case Intrinsic::riscv_vloxseg6: |
1658 | case Intrinsic::riscv_vloxseg7: |
1659 | case Intrinsic::riscv_vloxseg8: |
1660 | case Intrinsic::riscv_vluxseg2: |
1661 | case Intrinsic::riscv_vluxseg3: |
1662 | case Intrinsic::riscv_vluxseg4: |
1663 | case Intrinsic::riscv_vluxseg5: |
1664 | case Intrinsic::riscv_vluxseg6: |
1665 | case Intrinsic::riscv_vluxseg7: |
1666 | case Intrinsic::riscv_vluxseg8: |
1667 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3, |
1668 | /*IsStore*/ false, |
1669 | /*IsUnitStrided*/ false); |
1670 | case Intrinsic::riscv_vlsseg2_mask: |
1671 | case Intrinsic::riscv_vlsseg3_mask: |
1672 | case Intrinsic::riscv_vlsseg4_mask: |
1673 | case Intrinsic::riscv_vlsseg5_mask: |
1674 | case Intrinsic::riscv_vlsseg6_mask: |
1675 | case Intrinsic::riscv_vlsseg7_mask: |
1676 | case Intrinsic::riscv_vlsseg8_mask: |
1677 | case Intrinsic::riscv_vloxseg2_mask: |
1678 | case Intrinsic::riscv_vloxseg3_mask: |
1679 | case Intrinsic::riscv_vloxseg4_mask: |
1680 | case Intrinsic::riscv_vloxseg5_mask: |
1681 | case Intrinsic::riscv_vloxseg6_mask: |
1682 | case Intrinsic::riscv_vloxseg7_mask: |
1683 | case Intrinsic::riscv_vloxseg8_mask: |
1684 | case Intrinsic::riscv_vluxseg2_mask: |
1685 | case Intrinsic::riscv_vluxseg3_mask: |
1686 | case Intrinsic::riscv_vluxseg4_mask: |
1687 | case Intrinsic::riscv_vluxseg5_mask: |
1688 | case Intrinsic::riscv_vluxseg6_mask: |
1689 | case Intrinsic::riscv_vluxseg7_mask: |
1690 | case Intrinsic::riscv_vluxseg8_mask: |
1691 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5, |
1692 | /*IsStore*/ false, |
1693 | /*IsUnitStrided*/ false); |
1694 | case Intrinsic::riscv_vsseg2: |
1695 | case Intrinsic::riscv_vsseg3: |
1696 | case Intrinsic::riscv_vsseg4: |
1697 | case Intrinsic::riscv_vsseg5: |
1698 | case Intrinsic::riscv_vsseg6: |
1699 | case Intrinsic::riscv_vsseg7: |
1700 | case Intrinsic::riscv_vsseg8: |
1701 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2, |
1702 | /*IsStore*/ true, |
1703 | /*IsUnitStrided*/ false); |
1704 | case Intrinsic::riscv_vsseg2_mask: |
1705 | case Intrinsic::riscv_vsseg3_mask: |
1706 | case Intrinsic::riscv_vsseg4_mask: |
1707 | case Intrinsic::riscv_vsseg5_mask: |
1708 | case Intrinsic::riscv_vsseg6_mask: |
1709 | case Intrinsic::riscv_vsseg7_mask: |
1710 | case Intrinsic::riscv_vsseg8_mask: |
1711 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3, |
1712 | /*IsStore*/ true, |
1713 | /*IsUnitStrided*/ false); |
1714 | case Intrinsic::riscv_vssseg2: |
1715 | case Intrinsic::riscv_vssseg3: |
1716 | case Intrinsic::riscv_vssseg4: |
1717 | case Intrinsic::riscv_vssseg5: |
1718 | case Intrinsic::riscv_vssseg6: |
1719 | case Intrinsic::riscv_vssseg7: |
1720 | case Intrinsic::riscv_vssseg8: |
1721 | case Intrinsic::riscv_vsoxseg2: |
1722 | case Intrinsic::riscv_vsoxseg3: |
1723 | case Intrinsic::riscv_vsoxseg4: |
1724 | case Intrinsic::riscv_vsoxseg5: |
1725 | case Intrinsic::riscv_vsoxseg6: |
1726 | case Intrinsic::riscv_vsoxseg7: |
1727 | case Intrinsic::riscv_vsoxseg8: |
1728 | case Intrinsic::riscv_vsuxseg2: |
1729 | case Intrinsic::riscv_vsuxseg3: |
1730 | case Intrinsic::riscv_vsuxseg4: |
1731 | case Intrinsic::riscv_vsuxseg5: |
1732 | case Intrinsic::riscv_vsuxseg6: |
1733 | case Intrinsic::riscv_vsuxseg7: |
1734 | case Intrinsic::riscv_vsuxseg8: |
1735 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3, |
1736 | /*IsStore*/ true, |
1737 | /*IsUnitStrided*/ false); |
1738 | case Intrinsic::riscv_vssseg2_mask: |
1739 | case Intrinsic::riscv_vssseg3_mask: |
1740 | case Intrinsic::riscv_vssseg4_mask: |
1741 | case Intrinsic::riscv_vssseg5_mask: |
1742 | case Intrinsic::riscv_vssseg6_mask: |
1743 | case Intrinsic::riscv_vssseg7_mask: |
1744 | case Intrinsic::riscv_vssseg8_mask: |
1745 | case Intrinsic::riscv_vsoxseg2_mask: |
1746 | case Intrinsic::riscv_vsoxseg3_mask: |
1747 | case Intrinsic::riscv_vsoxseg4_mask: |
1748 | case Intrinsic::riscv_vsoxseg5_mask: |
1749 | case Intrinsic::riscv_vsoxseg6_mask: |
1750 | case Intrinsic::riscv_vsoxseg7_mask: |
1751 | case Intrinsic::riscv_vsoxseg8_mask: |
1752 | case Intrinsic::riscv_vsuxseg2_mask: |
1753 | case Intrinsic::riscv_vsuxseg3_mask: |
1754 | case Intrinsic::riscv_vsuxseg4_mask: |
1755 | case Intrinsic::riscv_vsuxseg5_mask: |
1756 | case Intrinsic::riscv_vsuxseg6_mask: |
1757 | case Intrinsic::riscv_vsuxseg7_mask: |
1758 | case Intrinsic::riscv_vsuxseg8_mask: |
1759 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4, |
1760 | /*IsStore*/ true, |
1761 | /*IsUnitStrided*/ false); |
1762 | } |
1763 | } |
1764 | |
1765 | bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, |
1766 | const AddrMode &AM, Type *Ty, |
1767 | unsigned AS, |
1768 | Instruction *I) const { |
1769 | // No global is ever allowed as a base. |
1770 | if (AM.BaseGV) |
1771 | return false; |
1772 | |
1773 | // RVV instructions only support register addressing. |
1774 | if (Subtarget.hasVInstructions() && isa<VectorType>(Val: Ty)) |
1775 | return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs; |
1776 | |
1777 | // Require a 12-bit signed offset. |
1778 | if (!isInt<12>(x: AM.BaseOffs)) |
1779 | return false; |
1780 | |
1781 | switch (AM.Scale) { |
1782 | case 0: // "r+i" or just "i", depending on HasBaseReg. |
1783 | break; |
1784 | case 1: |
1785 | if (!AM.HasBaseReg) // allow "r+i". |
1786 | break; |
1787 | return false; // disallow "r+r" or "r+r+i". |
1788 | default: |
1789 | return false; |
1790 | } |
1791 | |
1792 | return true; |
1793 | } |
1794 | |
1795 | bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { |
1796 | return isInt<12>(x: Imm); |
1797 | } |
1798 | |
1799 | bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { |
1800 | return isInt<12>(x: Imm); |
1801 | } |
1802 | |
1803 | // On RV32, 64-bit integers are split into their high and low parts and held |
1804 | // in two different registers, so the trunc is free since the low register can |
1805 | // just be used. |
1806 | // FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of |
1807 | // isTruncateFree? |
1808 | bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { |
1809 | if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) |
1810 | return false; |
1811 | unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); |
1812 | unsigned DestBits = DstTy->getPrimitiveSizeInBits(); |
1813 | return (SrcBits == 64 && DestBits == 32); |
1814 | } |
1815 | |
1816 | bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { |
1817 | // We consider i64->i32 free on RV64 since we have good selection of W |
1818 | // instructions that make promoting operations back to i64 free in many cases. |
1819 | if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() || |
1820 | !DstVT.isInteger()) |
1821 | return false; |
1822 | unsigned SrcBits = SrcVT.getSizeInBits(); |
1823 | unsigned DestBits = DstVT.getSizeInBits(); |
1824 | return (SrcBits == 64 && DestBits == 32); |
1825 | } |
1826 | |
1827 | bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { |
1828 | // Zexts are free if they can be combined with a load. |
1829 | // Don't advertise i32->i64 zextload as being free for RV64. It interacts |
1830 | // poorly with type legalization of compares preferring sext. |
1831 | if (auto *LD = dyn_cast<LoadSDNode>(Val)) { |
1832 | EVT MemVT = LD->getMemoryVT(); |
1833 | if ((MemVT == MVT::i8 || MemVT == MVT::i16) && |
1834 | (LD->getExtensionType() == ISD::NON_EXTLOAD || |
1835 | LD->getExtensionType() == ISD::ZEXTLOAD)) |
1836 | return true; |
1837 | } |
1838 | |
1839 | return TargetLowering::isZExtFree(Val, VT2); |
1840 | } |
1841 | |
1842 | bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { |
1843 | return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; |
1844 | } |
1845 | |
1846 | bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const { |
1847 | return Subtarget.is64Bit() && CI->getType()->isIntegerTy(Bitwidth: 32); |
1848 | } |
1849 | |
1850 | bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const { |
1851 | return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip(); |
1852 | } |
1853 | |
1854 | bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const { |
1855 | return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() || |
1856 | Subtarget.hasVendorXCVbitmanip(); |
1857 | } |
1858 | |
1859 | bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial( |
1860 | const Instruction &AndI) const { |
1861 | // We expect to be able to match a bit extraction instruction if the Zbs |
1862 | // extension is supported and the mask is a power of two. However, we |
1863 | // conservatively return false if the mask would fit in an ANDI instruction, |
1864 | // on the basis that it's possible the sinking+duplication of the AND in |
1865 | // CodeGenPrepare triggered by this hook wouldn't decrease the instruction |
1866 | // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ). |
1867 | if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs()) |
1868 | return false; |
1869 | ConstantInt *Mask = dyn_cast<ConstantInt>(Val: AndI.getOperand(i: 1)); |
1870 | if (!Mask) |
1871 | return false; |
1872 | return !Mask->getValue().isSignedIntN(N: 12) && Mask->getValue().isPowerOf2(); |
1873 | } |
1874 | |
1875 | bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const { |
1876 | EVT VT = Y.getValueType(); |
1877 | |
1878 | // FIXME: Support vectors once we have tests. |
1879 | if (VT.isVector()) |
1880 | return false; |
1881 | |
1882 | return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) && |
1883 | !isa<ConstantSDNode>(Val: Y); |
1884 | } |
1885 | |
1886 | bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const { |
1887 | // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test. |
1888 | if (Subtarget.hasStdExtZbs()) |
1889 | return X.getValueType().isScalarInteger(); |
1890 | auto *C = dyn_cast<ConstantSDNode>(Val&: Y); |
1891 | // XTheadBs provides th.tst (similar to bexti), if Y is a constant |
1892 | if (Subtarget.hasVendorXTHeadBs()) |
1893 | return C != nullptr; |
1894 | // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position. |
1895 | return C && C->getAPIntValue().ule(RHS: 10); |
1896 | } |
1897 | |
1898 | bool RISCVTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode, |
1899 | EVT VT) const { |
1900 | // Only enable for rvv. |
1901 | if (!VT.isVector() || !Subtarget.hasVInstructions()) |
1902 | return false; |
1903 | |
1904 | if (VT.isFixedLengthVector() && !isTypeLegal(VT)) |
1905 | return false; |
1906 | |
1907 | return true; |
1908 | } |
1909 | |
1910 | bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, |
1911 | Type *Ty) const { |
1912 | assert(Ty->isIntegerTy()); |
1913 | |
1914 | unsigned BitSize = Ty->getIntegerBitWidth(); |
1915 | if (BitSize > Subtarget.getXLen()) |
1916 | return false; |
1917 | |
1918 | // Fast path, assume 32-bit immediates are cheap. |
1919 | int64_t Val = Imm.getSExtValue(); |
1920 | if (isInt<32>(x: Val)) |
1921 | return true; |
1922 | |
1923 | // A constant pool entry may be more aligned thant he load we're trying to |
1924 | // replace. If we don't support unaligned scalar mem, prefer the constant |
1925 | // pool. |
1926 | // TODO: Can the caller pass down the alignment? |
1927 | if (!Subtarget.enableUnalignedScalarMem()) |
1928 | return true; |
1929 | |
1930 | // Prefer to keep the load if it would require many instructions. |
1931 | // This uses the same threshold we use for constant pools but doesn't |
1932 | // check useConstantPoolForLargeInts. |
1933 | // TODO: Should we keep the load only when we're definitely going to emit a |
1934 | // constant pool? |
1935 | |
1936 | RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, Subtarget); |
1937 | return Seq.size() <= Subtarget.getMaxBuildIntsCost(); |
1938 | } |
1939 | |
1940 | bool RISCVTargetLowering:: |
1941 | shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
1942 | SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, |
1943 | unsigned OldShiftOpcode, unsigned NewShiftOpcode, |
1944 | SelectionDAG &DAG) const { |
1945 | // One interesting pattern that we'd want to form is 'bit extract': |
1946 | // ((1 >> Y) & 1) ==/!= 0 |
1947 | // But we also need to be careful not to try to reverse that fold. |
1948 | |
1949 | // Is this '((1 >> Y) & 1)'? |
1950 | if (XC && OldShiftOpcode == ISD::SRL && XC->isOne()) |
1951 | return false; // Keep the 'bit extract' pattern. |
1952 | |
1953 | // Will this be '((1 >> Y) & 1)' after the transform? |
1954 | if (NewShiftOpcode == ISD::SRL && CC->isOne()) |
1955 | return true; // Do form the 'bit extract' pattern. |
1956 | |
1957 | // If 'X' is a constant, and we transform, then we will immediately |
1958 | // try to undo the fold, thus causing endless combine loop. |
1959 | // So only do the transform if X is not a constant. This matches the default |
1960 | // implementation of this function. |
1961 | return !XC; |
1962 | } |
1963 | |
1964 | bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const { |
1965 | switch (Opcode) { |
1966 | case Instruction::Add: |
1967 | case Instruction::Sub: |
1968 | case Instruction::Mul: |
1969 | case Instruction::And: |
1970 | case Instruction::Or: |
1971 | case Instruction::Xor: |
1972 | case Instruction::FAdd: |
1973 | case Instruction::FSub: |
1974 | case Instruction::FMul: |
1975 | case Instruction::FDiv: |
1976 | case Instruction::ICmp: |
1977 | case Instruction::FCmp: |
1978 | return true; |
1979 | case Instruction::Shl: |
1980 | case Instruction::LShr: |
1981 | case Instruction::AShr: |
1982 | case Instruction::UDiv: |
1983 | case Instruction::SDiv: |
1984 | case Instruction::URem: |
1985 | case Instruction::SRem: |
1986 | return Operand == 1; |
1987 | default: |
1988 | return false; |
1989 | } |
1990 | } |
1991 | |
1992 | |
1993 | bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const { |
1994 | if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions()) |
1995 | return false; |
1996 | |
1997 | if (canSplatOperand(Opcode: I->getOpcode(), Operand)) |
1998 | return true; |
1999 | |
2000 | auto *II = dyn_cast<IntrinsicInst>(Val: I); |
2001 | if (!II) |
2002 | return false; |
2003 | |
2004 | switch (II->getIntrinsicID()) { |
2005 | case Intrinsic::fma: |
2006 | case Intrinsic::vp_fma: |
2007 | return Operand == 0 || Operand == 1; |
2008 | case Intrinsic::vp_shl: |
2009 | case Intrinsic::vp_lshr: |
2010 | case Intrinsic::vp_ashr: |
2011 | case Intrinsic::vp_udiv: |
2012 | case Intrinsic::vp_sdiv: |
2013 | case Intrinsic::vp_urem: |
2014 | case Intrinsic::vp_srem: |
2015 | case Intrinsic::ssub_sat: |
2016 | case Intrinsic::vp_ssub_sat: |
2017 | case Intrinsic::usub_sat: |
2018 | case Intrinsic::vp_usub_sat: |
2019 | return Operand == 1; |
2020 | // These intrinsics are commutative. |
2021 | case Intrinsic::vp_add: |
2022 | case Intrinsic::vp_mul: |
2023 | case Intrinsic::vp_and: |
2024 | case Intrinsic::vp_or: |
2025 | case Intrinsic::vp_xor: |
2026 | case Intrinsic::vp_fadd: |
2027 | case Intrinsic::vp_fmul: |
2028 | case Intrinsic::vp_icmp: |
2029 | case Intrinsic::vp_fcmp: |
2030 | case Intrinsic::smin: |
2031 | case Intrinsic::vp_smin: |
2032 | case Intrinsic::umin: |
2033 | case Intrinsic::vp_umin: |
2034 | case Intrinsic::smax: |
2035 | case Intrinsic::vp_smax: |
2036 | case Intrinsic::umax: |
2037 | case Intrinsic::vp_umax: |
2038 | case Intrinsic::sadd_sat: |
2039 | case Intrinsic::vp_sadd_sat: |
2040 | case Intrinsic::uadd_sat: |
2041 | case Intrinsic::vp_uadd_sat: |
2042 | // These intrinsics have 'vr' versions. |
2043 | case Intrinsic::vp_sub: |
2044 | case Intrinsic::vp_fsub: |
2045 | case Intrinsic::vp_fdiv: |
2046 | return Operand == 0 || Operand == 1; |
2047 | default: |
2048 | return false; |
2049 | } |
2050 | } |
2051 | |
2052 | /// Check if sinking \p I's operands to I's basic block is profitable, because |
2053 | /// the operands can be folded into a target instruction, e.g. |
2054 | /// splats of scalars can fold into vector instructions. |
2055 | bool RISCVTargetLowering::shouldSinkOperands( |
2056 | Instruction *I, SmallVectorImpl<Use *> &Ops) const { |
2057 | using namespace llvm::PatternMatch; |
2058 | |
2059 | if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions()) |
2060 | return false; |
2061 | |
2062 | // Don't sink splat operands if the target prefers it. Some targets requires |
2063 | // S2V transfer buffers and we can run out of them copying the same value |
2064 | // repeatedly. |
2065 | // FIXME: It could still be worth doing if it would improve vector register |
2066 | // pressure and prevent a vector spill. |
2067 | if (!Subtarget.sinkSplatOperands()) |
2068 | return false; |
2069 | |
2070 | for (auto OpIdx : enumerate(First: I->operands())) { |
2071 | if (!canSplatOperand(I, Operand: OpIdx.index())) |
2072 | continue; |
2073 | |
2074 | Instruction *Op = dyn_cast<Instruction>(Val: OpIdx.value().get()); |
2075 | // Make sure we are not already sinking this operand |
2076 | if (!Op || any_of(Range&: Ops, P: [&](Use *U) { return U->get() == Op; })) |
2077 | continue; |
2078 | |
2079 | // We are looking for a splat that can be sunk. |
2080 | if (!match(V: Op, P: m_Shuffle(v1: m_InsertElt(Val: m_Undef(), Elt: m_Value(), Idx: m_ZeroInt()), |
2081 | v2: m_Undef(), mask: m_ZeroMask()))) |
2082 | continue; |
2083 | |
2084 | // Don't sink i1 splats. |
2085 | if (cast<VectorType>(Val: Op->getType())->getElementType()->isIntegerTy(Bitwidth: 1)) |
2086 | continue; |
2087 | |
2088 | // All uses of the shuffle should be sunk to avoid duplicating it across gpr |
2089 | // and vector registers |
2090 | for (Use &U : Op->uses()) { |
2091 | Instruction *Insn = cast<Instruction>(Val: U.getUser()); |
2092 | if (!canSplatOperand(I: Insn, Operand: U.getOperandNo())) |
2093 | return false; |
2094 | } |
2095 | |
2096 | Ops.push_back(Elt: &Op->getOperandUse(i: 0)); |
2097 | Ops.push_back(Elt: &OpIdx.value()); |
2098 | } |
2099 | return true; |
2100 | } |
2101 | |
2102 | bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const { |
2103 | unsigned Opc = VecOp.getOpcode(); |
2104 | |
2105 | // Assume target opcodes can't be scalarized. |
2106 | // TODO - do we have any exceptions? |
2107 | if (Opc >= ISD::BUILTIN_OP_END) |
2108 | return false; |
2109 | |
2110 | // If the vector op is not supported, try to convert to scalar. |
2111 | EVT VecVT = VecOp.getValueType(); |
2112 | if (!isOperationLegalOrCustomOrPromote(Op: Opc, VT: VecVT)) |
2113 | return true; |
2114 | |
2115 | // If the vector op is supported, but the scalar op is not, the transform may |
2116 | // not be worthwhile. |
2117 | // Permit a vector binary operation can be converted to scalar binary |
2118 | // operation which is custom lowered with illegal type. |
2119 | EVT ScalarVT = VecVT.getScalarType(); |
2120 | return isOperationLegalOrCustomOrPromote(Op: Opc, VT: ScalarVT) || |
2121 | isOperationCustom(Op: Opc, VT: ScalarVT); |
2122 | } |
2123 | |
2124 | bool RISCVTargetLowering::isOffsetFoldingLegal( |
2125 | const GlobalAddressSDNode *GA) const { |
2126 | // In order to maximise the opportunity for common subexpression elimination, |
2127 | // keep a separate ADD node for the global address offset instead of folding |
2128 | // it in the global address node. Later peephole optimisations may choose to |
2129 | // fold it back in when profitable. |
2130 | return false; |
2131 | } |
2132 | |
2133 | // Return one of the followings: |
2134 | // (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value. |
2135 | // (2) `{0-31 value, true}` if Imm is negative and FLI is available for its |
2136 | // positive counterpart, which will be materialized from the first returned |
2137 | // element. The second returned element indicated that there should be a FNEG |
2138 | // followed. |
2139 | // (3) `{-1, _}` if there is no way FLI can be used to materialize Imm. |
2140 | std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm, |
2141 | EVT VT) const { |
2142 | if (!Subtarget.hasStdExtZfa()) |
2143 | return std::make_pair(x: -1, y: false); |
2144 | |
2145 | bool IsSupportedVT = false; |
2146 | if (VT == MVT::f16) { |
2147 | IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh(); |
2148 | } else if (VT == MVT::f32) { |
2149 | IsSupportedVT = true; |
2150 | } else if (VT == MVT::f64) { |
2151 | assert(Subtarget.hasStdExtD() && "Expect D extension" ); |
2152 | IsSupportedVT = true; |
2153 | } |
2154 | |
2155 | if (!IsSupportedVT) |
2156 | return std::make_pair(x: -1, y: false); |
2157 | |
2158 | int Index = RISCVLoadFPImm::getLoadFPImm(FPImm: Imm); |
2159 | if (Index < 0 && Imm.isNegative()) |
2160 | // Try the combination of its positive counterpart + FNEG. |
2161 | return std::make_pair(x: RISCVLoadFPImm::getLoadFPImm(FPImm: -Imm), y: true); |
2162 | else |
2163 | return std::make_pair(x&: Index, y: false); |
2164 | } |
2165 | |
2166 | bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, |
2167 | bool ForCodeSize) const { |
2168 | bool IsLegalVT = false; |
2169 | if (VT == MVT::f16) |
2170 | IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin(); |
2171 | else if (VT == MVT::f32) |
2172 | IsLegalVT = Subtarget.hasStdExtFOrZfinx(); |
2173 | else if (VT == MVT::f64) |
2174 | IsLegalVT = Subtarget.hasStdExtDOrZdinx(); |
2175 | else if (VT == MVT::bf16) |
2176 | IsLegalVT = Subtarget.hasStdExtZfbfmin(); |
2177 | |
2178 | if (!IsLegalVT) |
2179 | return false; |
2180 | |
2181 | if (getLegalZfaFPImm(Imm, VT).first >= 0) |
2182 | return true; |
2183 | |
2184 | // Cannot create a 64 bit floating-point immediate value for rv32. |
2185 | if (Subtarget.getXLen() < VT.getScalarSizeInBits()) { |
2186 | // td can handle +0.0 or -0.0 already. |
2187 | // -0.0 can be created by fmv + fneg. |
2188 | return Imm.isZero(); |
2189 | } |
2190 | |
2191 | // Special case: fmv + fneg |
2192 | if (Imm.isNegZero()) |
2193 | return true; |
2194 | |
2195 | // Building an integer and then converting requires a fmv at the end of |
2196 | // the integer sequence. |
2197 | const int Cost = |
2198 | 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(), |
2199 | Subtarget); |
2200 | return Cost <= FPImmCost; |
2201 | } |
2202 | |
2203 | // TODO: This is very conservative. |
2204 | bool RISCVTargetLowering::(EVT ResVT, EVT SrcVT, |
2205 | unsigned Index) const { |
2206 | if (!isOperationLegalOrCustom(Op: ISD::EXTRACT_SUBVECTOR, VT: ResVT)) |
2207 | return false; |
2208 | |
2209 | // Only support extracting a fixed from a fixed vector for now. |
2210 | if (ResVT.isScalableVector() || SrcVT.isScalableVector()) |
2211 | return false; |
2212 | |
2213 | EVT EltVT = ResVT.getVectorElementType(); |
2214 | assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node" ); |
2215 | |
2216 | // The smallest type we can slide is i8. |
2217 | // TODO: We can extract index 0 from a mask vector without a slide. |
2218 | if (EltVT == MVT::i1) |
2219 | return false; |
2220 | |
2221 | unsigned ResElts = ResVT.getVectorNumElements(); |
2222 | unsigned SrcElts = SrcVT.getVectorNumElements(); |
2223 | |
2224 | unsigned MinVLen = Subtarget.getRealMinVLen(); |
2225 | unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits(); |
2226 | |
2227 | // If we're extracting only data from the first VLEN bits of the source |
2228 | // then we can always do this with an m1 vslidedown.vx. Restricting the |
2229 | // Index ensures we can use a vslidedown.vi. |
2230 | // TODO: We can generalize this when the exact VLEN is known. |
2231 | if (Index + ResElts <= MinVLMAX && Index < 31) |
2232 | return true; |
2233 | |
2234 | // Convervatively only handle extracting half of a vector. |
2235 | // TODO: For sizes which aren't multiples of VLEN sizes, this may not be |
2236 | // a cheap extract. However, this case is important in practice for |
2237 | // shuffled extracts of longer vectors. How resolve? |
2238 | if ((ResElts * 2) != SrcElts) |
2239 | return false; |
2240 | |
2241 | // Slide can support arbitrary index, but we only treat vslidedown.vi as |
2242 | // cheap. |
2243 | if (Index >= 32) |
2244 | return false; |
2245 | |
2246 | // TODO: We can do arbitrary slidedowns, but for now only support extracting |
2247 | // the upper half of a vector until we have more test coverage. |
2248 | return Index == 0 || Index == ResElts; |
2249 | } |
2250 | |
2251 | MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, |
2252 | CallingConv::ID CC, |
2253 | EVT VT) const { |
2254 | // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled. |
2255 | // We might still end up using a GPR but that will be decided based on ABI. |
2256 | if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() && |
2257 | !Subtarget.hasStdExtZfhminOrZhinxmin()) |
2258 | return MVT::f32; |
2259 | |
2260 | MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); |
2261 | |
2262 | if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32) |
2263 | return MVT::i64; |
2264 | |
2265 | return PartVT; |
2266 | } |
2267 | |
2268 | unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, |
2269 | CallingConv::ID CC, |
2270 | EVT VT) const { |
2271 | // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled. |
2272 | // We might still end up using a GPR but that will be decided based on ABI. |
2273 | if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() && |
2274 | !Subtarget.hasStdExtZfhminOrZhinxmin()) |
2275 | return 1; |
2276 | |
2277 | return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); |
2278 | } |
2279 | |
2280 | unsigned RISCVTargetLowering::getVectorTypeBreakdownForCallingConv( |
2281 | LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, |
2282 | unsigned &NumIntermediates, MVT &RegisterVT) const { |
2283 | unsigned NumRegs = TargetLowering::getVectorTypeBreakdownForCallingConv( |
2284 | Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT); |
2285 | |
2286 | if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32) |
2287 | IntermediateVT = MVT::i64; |
2288 | |
2289 | if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32) |
2290 | RegisterVT = MVT::i64; |
2291 | |
2292 | return NumRegs; |
2293 | } |
2294 | |
2295 | // Changes the condition code and swaps operands if necessary, so the SetCC |
2296 | // operation matches one of the comparisons supported directly by branches |
2297 | // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare |
2298 | // with 1/-1. |
2299 | static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, |
2300 | ISD::CondCode &CC, SelectionDAG &DAG) { |
2301 | // If this is a single bit test that can't be handled by ANDI, shift the |
2302 | // bit to be tested to the MSB and perform a signed compare with 0. |
2303 | if (isIntEqualitySetCC(Code: CC) && isNullConstant(V: RHS) && |
2304 | LHS.getOpcode() == ISD::AND && LHS.hasOneUse() && |
2305 | isa<ConstantSDNode>(Val: LHS.getOperand(i: 1))) { |
2306 | uint64_t Mask = LHS.getConstantOperandVal(i: 1); |
2307 | if ((isPowerOf2_64(Value: Mask) || isMask_64(Value: Mask)) && !isInt<12>(x: Mask)) { |
2308 | unsigned ShAmt = 0; |
2309 | if (isPowerOf2_64(Value: Mask)) { |
2310 | CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT; |
2311 | ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Value: Mask); |
2312 | } else { |
2313 | ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Value: Mask); |
2314 | } |
2315 | |
2316 | LHS = LHS.getOperand(i: 0); |
2317 | if (ShAmt != 0) |
2318 | LHS = DAG.getNode(Opcode: ISD::SHL, DL, VT: LHS.getValueType(), N1: LHS, |
2319 | N2: DAG.getConstant(Val: ShAmt, DL, VT: LHS.getValueType())); |
2320 | return; |
2321 | } |
2322 | } |
2323 | |
2324 | if (auto *RHSC = dyn_cast<ConstantSDNode>(Val&: RHS)) { |
2325 | int64_t C = RHSC->getSExtValue(); |
2326 | switch (CC) { |
2327 | default: break; |
2328 | case ISD::SETGT: |
2329 | // Convert X > -1 to X >= 0. |
2330 | if (C == -1) { |
2331 | RHS = DAG.getConstant(Val: 0, DL, VT: RHS.getValueType()); |
2332 | CC = ISD::SETGE; |
2333 | return; |
2334 | } |
2335 | break; |
2336 | case ISD::SETLT: |
2337 | // Convert X < 1 to 0 >= X. |
2338 | if (C == 1) { |
2339 | RHS = LHS; |
2340 | LHS = DAG.getConstant(Val: 0, DL, VT: RHS.getValueType()); |
2341 | CC = ISD::SETGE; |
2342 | return; |
2343 | } |
2344 | break; |
2345 | } |
2346 | } |
2347 | |
2348 | switch (CC) { |
2349 | default: |
2350 | break; |
2351 | case ISD::SETGT: |
2352 | case ISD::SETLE: |
2353 | case ISD::SETUGT: |
2354 | case ISD::SETULE: |
2355 | CC = ISD::getSetCCSwappedOperands(Operation: CC); |
2356 | std::swap(a&: LHS, b&: RHS); |
2357 | break; |
2358 | } |
2359 | } |
2360 | |
2361 | RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) { |
2362 | assert(VT.isScalableVector() && "Expecting a scalable vector type" ); |
2363 | unsigned KnownSize = VT.getSizeInBits().getKnownMinValue(); |
2364 | if (VT.getVectorElementType() == MVT::i1) |
2365 | KnownSize *= 8; |
2366 | |
2367 | switch (KnownSize) { |
2368 | default: |
2369 | llvm_unreachable("Invalid LMUL." ); |
2370 | case 8: |
2371 | return RISCVII::VLMUL::LMUL_F8; |
2372 | case 16: |
2373 | return RISCVII::VLMUL::LMUL_F4; |
2374 | case 32: |
2375 | return RISCVII::VLMUL::LMUL_F2; |
2376 | case 64: |
2377 | return RISCVII::VLMUL::LMUL_1; |
2378 | case 128: |
2379 | return RISCVII::VLMUL::LMUL_2; |
2380 | case 256: |
2381 | return RISCVII::VLMUL::LMUL_4; |
2382 | case 512: |
2383 | return RISCVII::VLMUL::LMUL_8; |
2384 | } |
2385 | } |
2386 | |
2387 | unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) { |
2388 | switch (LMul) { |
2389 | default: |
2390 | llvm_unreachable("Invalid LMUL." ); |
2391 | case RISCVII::VLMUL::LMUL_F8: |
2392 | case RISCVII::VLMUL::LMUL_F4: |
2393 | case RISCVII::VLMUL::LMUL_F2: |
2394 | case RISCVII::VLMUL::LMUL_1: |
2395 | return RISCV::VRRegClassID; |
2396 | case RISCVII::VLMUL::LMUL_2: |
2397 | return RISCV::VRM2RegClassID; |
2398 | case RISCVII::VLMUL::LMUL_4: |
2399 | return RISCV::VRM4RegClassID; |
2400 | case RISCVII::VLMUL::LMUL_8: |
2401 | return RISCV::VRM8RegClassID; |
2402 | } |
2403 | } |
2404 | |
2405 | unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) { |
2406 | RISCVII::VLMUL LMUL = getLMUL(VT); |
2407 | if (LMUL == RISCVII::VLMUL::LMUL_F8 || |
2408 | LMUL == RISCVII::VLMUL::LMUL_F4 || |
2409 | LMUL == RISCVII::VLMUL::LMUL_F2 || |
2410 | LMUL == RISCVII::VLMUL::LMUL_1) { |
2411 | static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, |
2412 | "Unexpected subreg numbering" ); |
2413 | return RISCV::sub_vrm1_0 + Index; |
2414 | } |
2415 | if (LMUL == RISCVII::VLMUL::LMUL_2) { |
2416 | static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, |
2417 | "Unexpected subreg numbering" ); |
2418 | return RISCV::sub_vrm2_0 + Index; |
2419 | } |
2420 | if (LMUL == RISCVII::VLMUL::LMUL_4) { |
2421 | static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, |
2422 | "Unexpected subreg numbering" ); |
2423 | return RISCV::sub_vrm4_0 + Index; |
2424 | } |
2425 | llvm_unreachable("Invalid vector type." ); |
2426 | } |
2427 | |
2428 | unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) { |
2429 | if (VT.getVectorElementType() == MVT::i1) |
2430 | return RISCV::VRRegClassID; |
2431 | return getRegClassIDForLMUL(LMul: getLMUL(VT)); |
2432 | } |
2433 | |
2434 | // Attempt to decompose a subvector insert/extract between VecVT and |
2435 | // SubVecVT via subregister indices. Returns the subregister index that |
2436 | // can perform the subvector insert/extract with the given element index, as |
2437 | // well as the index corresponding to any leftover subvectors that must be |
2438 | // further inserted/extracted within the register class for SubVecVT. |
2439 | std::pair<unsigned, unsigned> |
2440 | RISCVTargetLowering::( |
2441 | MVT VecVT, MVT SubVecVT, unsigned , |
2442 | const RISCVRegisterInfo *TRI) { |
2443 | static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID && |
2444 | RISCV::VRM4RegClassID > RISCV::VRM2RegClassID && |
2445 | RISCV::VRM2RegClassID > RISCV::VRRegClassID), |
2446 | "Register classes not ordered" ); |
2447 | unsigned VecRegClassID = getRegClassIDForVecVT(VT: VecVT); |
2448 | unsigned SubRegClassID = getRegClassIDForVecVT(VT: SubVecVT); |
2449 | // Try to compose a subregister index that takes us from the incoming |
2450 | // LMUL>1 register class down to the outgoing one. At each step we half |
2451 | // the LMUL: |
2452 | // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0 |
2453 | // Note that this is not guaranteed to find a subregister index, such as |
2454 | // when we are extracting from one VR type to another. |
2455 | unsigned SubRegIdx = RISCV::NoSubRegister; |
2456 | for (const unsigned RCID : |
2457 | {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID}) |
2458 | if (VecRegClassID > RCID && SubRegClassID <= RCID) { |
2459 | VecVT = VecVT.getHalfNumVectorElementsVT(); |
2460 | bool IsHi = |
2461 | InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue(); |
2462 | SubRegIdx = TRI->composeSubRegIndices(SubRegIdx, |
2463 | getSubregIndexByMVT(VecVT, IsHi)); |
2464 | if (IsHi) |
2465 | InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue(); |
2466 | } |
2467 | return {SubRegIdx, InsertExtractIdx}; |
2468 | } |
2469 | |
2470 | // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar |
2471 | // stores for those types. |
2472 | bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const { |
2473 | return !Subtarget.useRVVForFixedLengthVectors() || |
2474 | (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1); |
2475 | } |
2476 | |
2477 | bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const { |
2478 | if (!ScalarTy.isSimple()) |
2479 | return false; |
2480 | switch (ScalarTy.getSimpleVT().SimpleTy) { |
2481 | case MVT::iPTR: |
2482 | return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true; |
2483 | case MVT::i8: |
2484 | case MVT::i16: |
2485 | case MVT::i32: |
2486 | return true; |
2487 | case MVT::i64: |
2488 | return Subtarget.hasVInstructionsI64(); |
2489 | case MVT::f16: |
2490 | return Subtarget.hasVInstructionsF16(); |
2491 | case MVT::f32: |
2492 | return Subtarget.hasVInstructionsF32(); |
2493 | case MVT::f64: |
2494 | return Subtarget.hasVInstructionsF64(); |
2495 | default: |
2496 | return false; |
2497 | } |
2498 | } |
2499 | |
2500 | |
2501 | unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const { |
2502 | return NumRepeatedDivisors; |
2503 | } |
2504 | |
2505 | static SDValue getVLOperand(SDValue Op) { |
2506 | assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || |
2507 | Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) && |
2508 | "Unexpected opcode" ); |
2509 | bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; |
2510 | unsigned IntNo = Op.getConstantOperandVal(i: HasChain ? 1 : 0); |
2511 | const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = |
2512 | RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo); |
2513 | if (!II) |
2514 | return SDValue(); |
2515 | return Op.getOperand(i: II->VLOperand + 1 + HasChain); |
2516 | } |
2517 | |
2518 | static bool useRVVForFixedLengthVectorVT(MVT VT, |
2519 | const RISCVSubtarget &Subtarget) { |
2520 | assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!" ); |
2521 | if (!Subtarget.useRVVForFixedLengthVectors()) |
2522 | return false; |
2523 | |
2524 | // We only support a set of vector types with a consistent maximum fixed size |
2525 | // across all supported vector element types to avoid legalization issues. |
2526 | // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest |
2527 | // fixed-length vector type we support is 1024 bytes. |
2528 | if (VT.getFixedSizeInBits() > 1024 * 8) |
2529 | return false; |
2530 | |
2531 | unsigned MinVLen = Subtarget.getRealMinVLen(); |
2532 | |
2533 | MVT EltVT = VT.getVectorElementType(); |
2534 | |
2535 | // Don't use RVV for vectors we cannot scalarize if required. |
2536 | switch (EltVT.SimpleTy) { |
2537 | // i1 is supported but has different rules. |
2538 | default: |
2539 | return false; |
2540 | case MVT::i1: |
2541 | // Masks can only use a single register. |
2542 | if (VT.getVectorNumElements() > MinVLen) |
2543 | return false; |
2544 | MinVLen /= 8; |
2545 | break; |
2546 | case MVT::i8: |
2547 | case MVT::i16: |
2548 | case MVT::i32: |
2549 | break; |
2550 | case MVT::i64: |
2551 | if (!Subtarget.hasVInstructionsI64()) |
2552 | return false; |
2553 | break; |
2554 | case MVT::f16: |
2555 | if (!Subtarget.hasVInstructionsF16Minimal()) |
2556 | return false; |
2557 | break; |
2558 | case MVT::f32: |
2559 | if (!Subtarget.hasVInstructionsF32()) |
2560 | return false; |
2561 | break; |
2562 | case MVT::f64: |
2563 | if (!Subtarget.hasVInstructionsF64()) |
2564 | return false; |
2565 | break; |
2566 | } |
2567 | |
2568 | // Reject elements larger than ELEN. |
2569 | if (EltVT.getSizeInBits() > Subtarget.getELen()) |
2570 | return false; |
2571 | |
2572 | unsigned LMul = divideCeil(Numerator: VT.getSizeInBits(), Denominator: MinVLen); |
2573 | // Don't use RVV for types that don't fit. |
2574 | if (LMul > Subtarget.getMaxLMULForFixedLengthVectors()) |
2575 | return false; |
2576 | |
2577 | // TODO: Perhaps an artificial restriction, but worth having whilst getting |
2578 | // the base fixed length RVV support in place. |
2579 | if (!VT.isPow2VectorType()) |
2580 | return false; |
2581 | |
2582 | return true; |
2583 | } |
2584 | |
2585 | bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const { |
2586 | return ::useRVVForFixedLengthVectorVT(VT, Subtarget); |
2587 | } |
2588 | |
2589 | // Return the largest legal scalable vector type that matches VT's element type. |
2590 | static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT, |
2591 | const RISCVSubtarget &Subtarget) { |
2592 | // This may be called before legal types are setup. |
2593 | assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) || |
2594 | useRVVForFixedLengthVectorVT(VT, Subtarget)) && |
2595 | "Expected legal fixed length vector!" ); |
2596 | |
2597 | unsigned MinVLen = Subtarget.getRealMinVLen(); |
2598 | unsigned MaxELen = Subtarget.getELen(); |
2599 | |
2600 | MVT EltVT = VT.getVectorElementType(); |
2601 | switch (EltVT.SimpleTy) { |
2602 | default: |
2603 | llvm_unreachable("unexpected element type for RVV container" ); |
2604 | case MVT::i1: |
2605 | case MVT::i8: |
2606 | case MVT::i16: |
2607 | case MVT::i32: |
2608 | case MVT::i64: |
2609 | case MVT::f16: |
2610 | case MVT::f32: |
2611 | case MVT::f64: { |
2612 | // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for |
2613 | // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within |
2614 | // each fractional LMUL we support SEW between 8 and LMUL*ELEN. |
2615 | unsigned NumElts = |
2616 | (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen; |
2617 | NumElts = std::max(a: NumElts, b: RISCV::RVVBitsPerBlock / MaxELen); |
2618 | assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts" ); |
2619 | return MVT::getScalableVectorVT(VT: EltVT, NumElements: NumElts); |
2620 | } |
2621 | } |
2622 | } |
2623 | |
2624 | static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT, |
2625 | const RISCVSubtarget &Subtarget) { |
2626 | return getContainerForFixedLengthVector(TLI: DAG.getTargetLoweringInfo(), VT, |
2627 | Subtarget); |
2628 | } |
2629 | |
2630 | MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const { |
2631 | return ::getContainerForFixedLengthVector(TLI: *this, VT, Subtarget: getSubtarget()); |
2632 | } |
2633 | |
2634 | // Grow V to consume an entire RVV register. |
2635 | static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, |
2636 | const RISCVSubtarget &Subtarget) { |
2637 | assert(VT.isScalableVector() && |
2638 | "Expected to convert into a scalable vector!" ); |
2639 | assert(V.getValueType().isFixedLengthVector() && |
2640 | "Expected a fixed length vector operand!" ); |
2641 | SDLoc DL(V); |
2642 | SDValue Zero = DAG.getVectorIdxConstant(Val: 0, DL); |
2643 | return DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: DAG.getUNDEF(VT), N2: V, N3: Zero); |
2644 | } |
2645 | |
2646 | // Shrink V so it's just big enough to maintain a VT's worth of data. |
2647 | static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, |
2648 | const RISCVSubtarget &Subtarget) { |
2649 | assert(VT.isFixedLengthVector() && |
2650 | "Expected to convert into a fixed length vector!" ); |
2651 | assert(V.getValueType().isScalableVector() && |
2652 | "Expected a scalable vector operand!" ); |
2653 | SDLoc DL(V); |
2654 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT: Subtarget.getXLenVT()); |
2655 | return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: V, N2: Zero); |
2656 | } |
2657 | |
2658 | /// Return the type of the mask type suitable for masking the provided |
2659 | /// vector type. This is simply an i1 element type vector of the same |
2660 | /// (possibly scalable) length. |
2661 | static MVT getMaskTypeFor(MVT VecVT) { |
2662 | assert(VecVT.isVector()); |
2663 | ElementCount EC = VecVT.getVectorElementCount(); |
2664 | return MVT::getVectorVT(MVT::i1, EC); |
2665 | } |
2666 | |
2667 | /// Creates an all ones mask suitable for masking a vector of type VecTy with |
2668 | /// vector length VL. . |
2669 | static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, |
2670 | SelectionDAG &DAG) { |
2671 | MVT MaskVT = getMaskTypeFor(VecVT); |
2672 | return DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT: MaskVT, Operand: VL); |
2673 | } |
2674 | |
2675 | static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, |
2676 | SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { |
2677 | // If we know the exact VLEN, and our VL is exactly equal to VLMAX, |
2678 | // canonicalize the representation. InsertVSETVLI will pick the immediate |
2679 | // encoding later if profitable. |
2680 | const auto [MinVLMAX, MaxVLMAX] = |
2681 | RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget); |
2682 | if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX) |
2683 | return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()); |
2684 | |
2685 | return DAG.getConstant(Val: NumElts, DL, VT: Subtarget.getXLenVT()); |
2686 | } |
2687 | |
2688 | static std::pair<SDValue, SDValue> |
2689 | getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, |
2690 | const RISCVSubtarget &Subtarget) { |
2691 | assert(VecVT.isScalableVector() && "Expecting a scalable vector" ); |
2692 | SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()); |
2693 | SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG); |
2694 | return {Mask, VL}; |
2695 | } |
2696 | |
2697 | static std::pair<SDValue, SDValue> |
2698 | getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, |
2699 | SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { |
2700 | assert(ContainerVT.isScalableVector() && "Expecting scalable container type" ); |
2701 | SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget); |
2702 | SDValue Mask = getAllOnesMask(VecVT: ContainerVT, VL, DL, DAG); |
2703 | return {Mask, VL}; |
2704 | } |
2705 | |
2706 | // Gets the two common "VL" operands: an all-ones mask and the vector length. |
2707 | // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is |
2708 | // the vector type that the fixed-length vector is contained in. Otherwise if |
2709 | // VecVT is scalable, then ContainerVT should be the same as VecVT. |
2710 | static std::pair<SDValue, SDValue> |
2711 | getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, |
2712 | const RISCVSubtarget &Subtarget) { |
2713 | if (VecVT.isFixedLengthVector()) |
2714 | return getDefaultVLOps(NumElts: VecVT.getVectorNumElements(), ContainerVT, DL, DAG, |
2715 | Subtarget); |
2716 | assert(ContainerVT.isScalableVector() && "Expecting scalable container type" ); |
2717 | return getDefaultScalableVLOps(VecVT: ContainerVT, DL, DAG, Subtarget); |
2718 | } |
2719 | |
2720 | SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL, |
2721 | SelectionDAG &DAG) const { |
2722 | assert(VecVT.isScalableVector() && "Expected scalable vector" ); |
2723 | return DAG.getElementCount(DL, VT: Subtarget.getXLenVT(), |
2724 | EC: VecVT.getVectorElementCount()); |
2725 | } |
2726 | |
2727 | std::pair<unsigned, unsigned> |
2728 | RISCVTargetLowering::computeVLMAXBounds(MVT VecVT, |
2729 | const RISCVSubtarget &Subtarget) { |
2730 | assert(VecVT.isScalableVector() && "Expected scalable vector" ); |
2731 | |
2732 | unsigned EltSize = VecVT.getScalarSizeInBits(); |
2733 | unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue(); |
2734 | |
2735 | unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); |
2736 | unsigned MaxVLMAX = |
2737 | RISCVTargetLowering::computeVLMAX(VectorBits: VectorBitsMax, EltSize, MinSize); |
2738 | |
2739 | unsigned VectorBitsMin = Subtarget.getRealMinVLen(); |
2740 | unsigned MinVLMAX = |
2741 | RISCVTargetLowering::computeVLMAX(VectorBits: VectorBitsMin, EltSize, MinSize); |
2742 | |
2743 | return std::make_pair(x&: MinVLMAX, y&: MaxVLMAX); |
2744 | } |
2745 | |
2746 | // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few |
2747 | // of either is (currently) supported. This can get us into an infinite loop |
2748 | // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR |
2749 | // as a ..., etc. |
2750 | // Until either (or both) of these can reliably lower any node, reporting that |
2751 | // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks |
2752 | // the infinite loop. Note that this lowers BUILD_VECTOR through the stack, |
2753 | // which is not desirable. |
2754 | bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles( |
2755 | EVT VT, unsigned DefinedValues) const { |
2756 | return false; |
2757 | } |
2758 | |
2759 | InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const { |
2760 | // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is |
2761 | // implementation-defined. |
2762 | if (!VT.isVector()) |
2763 | return InstructionCost::getInvalid(); |
2764 | unsigned DLenFactor = Subtarget.getDLenFactor(); |
2765 | unsigned Cost; |
2766 | if (VT.isScalableVector()) { |
2767 | unsigned LMul; |
2768 | bool Fractional; |
2769 | std::tie(args&: LMul, args&: Fractional) = |
2770 | RISCVVType::decodeVLMUL(VLMUL: RISCVTargetLowering::getLMUL(VT)); |
2771 | if (Fractional) |
2772 | Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1; |
2773 | else |
2774 | Cost = (LMul * DLenFactor); |
2775 | } else { |
2776 | Cost = divideCeil(Numerator: VT.getSizeInBits(), Denominator: Subtarget.getRealMinVLen() / DLenFactor); |
2777 | } |
2778 | return Cost; |
2779 | } |
2780 | |
2781 | |
2782 | /// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv |
2783 | /// is generally quadratic in the number of vreg implied by LMUL. Note that |
2784 | /// operand (index and possibly mask) are handled separately. |
2785 | InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const { |
2786 | return getLMULCost(VT) * getLMULCost(VT); |
2787 | } |
2788 | |
2789 | /// Return the cost of a vrgather.vi (or vx) instruction for the type VT. |
2790 | /// vrgather.vi/vx may be linear in the number of vregs implied by LMUL, |
2791 | /// or may track the vrgather.vv cost. It is implementation-dependent. |
2792 | InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const { |
2793 | return getLMULCost(VT); |
2794 | } |
2795 | |
2796 | /// Return the cost of a vslidedown.vx or vslideup.vx instruction |
2797 | /// for the type VT. (This does not cover the vslide1up or vslide1down |
2798 | /// variants.) Slides may be linear in the number of vregs implied by LMUL, |
2799 | /// or may track the vrgather.vv cost. It is implementation-dependent. |
2800 | InstructionCost RISCVTargetLowering::getVSlideVXCost(MVT VT) const { |
2801 | return getLMULCost(VT); |
2802 | } |
2803 | |
2804 | /// Return the cost of a vslidedown.vi or vslideup.vi instruction |
2805 | /// for the type VT. (This does not cover the vslide1up or vslide1down |
2806 | /// variants.) Slides may be linear in the number of vregs implied by LMUL, |
2807 | /// or may track the vrgather.vv cost. It is implementation-dependent. |
2808 | InstructionCost RISCVTargetLowering::getVSlideVICost(MVT VT) const { |
2809 | return getLMULCost(VT); |
2810 | } |
2811 | |
2812 | static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, |
2813 | const RISCVSubtarget &Subtarget) { |
2814 | // RISC-V FP-to-int conversions saturate to the destination register size, but |
2815 | // don't produce 0 for nan. We can use a conversion instruction and fix the |
2816 | // nan case with a compare and a select. |
2817 | SDValue Src = Op.getOperand(i: 0); |
2818 | |
2819 | MVT DstVT = Op.getSimpleValueType(); |
2820 | EVT SatVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT(); |
2821 | |
2822 | bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT; |
2823 | |
2824 | if (!DstVT.isVector()) { |
2825 | // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate |
2826 | // the result. |
2827 | if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) || |
2828 | Src.getValueType() == MVT::bf16) { |
2829 | Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src); |
2830 | } |
2831 | |
2832 | unsigned Opc; |
2833 | if (SatVT == DstVT) |
2834 | Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; |
2835 | else if (DstVT == MVT::i64 && SatVT == MVT::i32) |
2836 | Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; |
2837 | else |
2838 | return SDValue(); |
2839 | // FIXME: Support other SatVTs by clamping before or after the conversion. |
2840 | |
2841 | SDLoc DL(Op); |
2842 | SDValue FpToInt = DAG.getNode( |
2843 | Opcode: Opc, DL, VT: DstVT, N1: Src, |
2844 | N2: DAG.getTargetConstant(Val: RISCVFPRndMode::RTZ, DL, VT: Subtarget.getXLenVT())); |
2845 | |
2846 | if (Opc == RISCVISD::FCVT_WU_RV64) |
2847 | FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32); |
2848 | |
2849 | SDValue ZeroInt = DAG.getConstant(Val: 0, DL, VT: DstVT); |
2850 | return DAG.getSelectCC(DL, LHS: Src, RHS: Src, True: ZeroInt, False: FpToInt, |
2851 | Cond: ISD::CondCode::SETUO); |
2852 | } |
2853 | |
2854 | // Vectors. |
2855 | |
2856 | MVT DstEltVT = DstVT.getVectorElementType(); |
2857 | MVT SrcVT = Src.getSimpleValueType(); |
2858 | MVT SrcEltVT = SrcVT.getVectorElementType(); |
2859 | unsigned SrcEltSize = SrcEltVT.getSizeInBits(); |
2860 | unsigned DstEltSize = DstEltVT.getSizeInBits(); |
2861 | |
2862 | // Only handle saturating to the destination type. |
2863 | if (SatVT != DstEltVT) |
2864 | return SDValue(); |
2865 | |
2866 | // FIXME: Don't support narrowing by more than 1 steps for now. |
2867 | if (SrcEltSize > (2 * DstEltSize)) |
2868 | return SDValue(); |
2869 | |
2870 | MVT DstContainerVT = DstVT; |
2871 | MVT SrcContainerVT = SrcVT; |
2872 | if (DstVT.isFixedLengthVector()) { |
2873 | DstContainerVT = getContainerForFixedLengthVector(DAG, VT: DstVT, Subtarget); |
2874 | SrcContainerVT = getContainerForFixedLengthVector(DAG, VT: SrcVT, Subtarget); |
2875 | assert(DstContainerVT.getVectorElementCount() == |
2876 | SrcContainerVT.getVectorElementCount() && |
2877 | "Expected same element count" ); |
2878 | Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget); |
2879 | } |
2880 | |
2881 | SDLoc DL(Op); |
2882 | |
2883 | auto [Mask, VL] = getDefaultVLOps(VecVT: DstVT, ContainerVT: DstContainerVT, DL, DAG, Subtarget); |
2884 | |
2885 | SDValue IsNan = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: Mask.getValueType(), |
2886 | Ops: {Src, Src, DAG.getCondCode(Cond: ISD::SETNE), |
2887 | DAG.getUNDEF(VT: Mask.getValueType()), Mask, VL}); |
2888 | |
2889 | // Need to widen by more than 1 step, promote the FP type, then do a widening |
2890 | // convert. |
2891 | if (DstEltSize > (2 * SrcEltSize)) { |
2892 | assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!" ); |
2893 | MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32); |
2894 | Src = DAG.getNode(Opcode: RISCVISD::FP_EXTEND_VL, DL, VT: InterVT, N1: Src, N2: Mask, N3: VL); |
2895 | } |
2896 | |
2897 | unsigned RVVOpc = |
2898 | IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL; |
2899 | SDValue Res = DAG.getNode(Opcode: RVVOpc, DL, VT: DstContainerVT, N1: Src, N2: Mask, N3: VL); |
2900 | |
2901 | SDValue SplatZero = DAG.getNode( |
2902 | Opcode: RISCVISD::VMV_V_X_VL, DL, VT: DstContainerVT, N1: DAG.getUNDEF(VT: DstContainerVT), |
2903 | N2: DAG.getConstant(Val: 0, DL, VT: Subtarget.getXLenVT()), N3: VL); |
2904 | Res = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: DstContainerVT, N1: IsNan, N2: SplatZero, |
2905 | N3: Res, N4: DAG.getUNDEF(VT: DstContainerVT), N5: VL); |
2906 | |
2907 | if (DstVT.isFixedLengthVector()) |
2908 | Res = convertFromScalableVector(VT: DstVT, V: Res, DAG, Subtarget); |
2909 | |
2910 | return Res; |
2911 | } |
2912 | |
2913 | static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) { |
2914 | switch (Opc) { |
2915 | case ISD::FROUNDEVEN: |
2916 | case ISD::STRICT_FROUNDEVEN: |
2917 | case ISD::VP_FROUNDEVEN: |
2918 | return RISCVFPRndMode::RNE; |
2919 | case ISD::FTRUNC: |
2920 | case ISD::STRICT_FTRUNC: |
2921 | case ISD::VP_FROUNDTOZERO: |
2922 | return RISCVFPRndMode::RTZ; |
2923 | case ISD::FFLOOR: |
2924 | case ISD::STRICT_FFLOOR: |
2925 | case ISD::VP_FFLOOR: |
2926 | return RISCVFPRndMode::RDN; |
2927 | case ISD::FCEIL: |
2928 | case ISD::STRICT_FCEIL: |
2929 | case ISD::VP_FCEIL: |
2930 | return RISCVFPRndMode::RUP; |
2931 | case ISD::FROUND: |
2932 | case ISD::STRICT_FROUND: |
2933 | case ISD::VP_FROUND: |
2934 | return RISCVFPRndMode::RMM; |
2935 | case ISD::FRINT: |
2936 | return RISCVFPRndMode::DYN; |
2937 | } |
2938 | |
2939 | return RISCVFPRndMode::Invalid; |
2940 | } |
2941 | |
2942 | // Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND |
2943 | // VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to |
2944 | // the integer domain and back. Taking care to avoid converting values that are |
2945 | // nan or already correct. |
2946 | static SDValue |
2947 | lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, |
2948 | const RISCVSubtarget &Subtarget) { |
2949 | MVT VT = Op.getSimpleValueType(); |
2950 | assert(VT.isVector() && "Unexpected type" ); |
2951 | |
2952 | SDLoc DL(Op); |
2953 | |
2954 | SDValue Src = Op.getOperand(i: 0); |
2955 | |
2956 | MVT ContainerVT = VT; |
2957 | if (VT.isFixedLengthVector()) { |
2958 | ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
2959 | Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget); |
2960 | } |
2961 | |
2962 | SDValue Mask, VL; |
2963 | if (Op->isVPOpcode()) { |
2964 | Mask = Op.getOperand(i: 1); |
2965 | if (VT.isFixedLengthVector()) |
2966 | Mask = convertToScalableVector(VT: getMaskTypeFor(VecVT: ContainerVT), V: Mask, DAG, |
2967 | Subtarget); |
2968 | VL = Op.getOperand(i: 2); |
2969 | } else { |
2970 | std::tie(args&: Mask, args&: VL) = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
2971 | } |
2972 | |
2973 | // Freeze the source since we are increasing the number of uses. |
2974 | Src = DAG.getFreeze(V: Src); |
2975 | |
2976 | // We do the conversion on the absolute value and fix the sign at the end. |
2977 | SDValue Abs = DAG.getNode(Opcode: RISCVISD::FABS_VL, DL, VT: ContainerVT, N1: Src, N2: Mask, N3: VL); |
2978 | |
2979 | // Determine the largest integer that can be represented exactly. This and |
2980 | // values larger than it don't have any fractional bits so don't need to |
2981 | // be converted. |
2982 | const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT: ContainerVT); |
2983 | unsigned Precision = APFloat::semanticsPrecision(FltSem); |
2984 | APFloat MaxVal = APFloat(FltSem); |
2985 | MaxVal.convertFromAPInt(Input: APInt::getOneBitSet(numBits: Precision, BitNo: Precision - 1), |
2986 | /*IsSigned*/ false, RM: APFloat::rmNearestTiesToEven); |
2987 | SDValue MaxValNode = |
2988 | DAG.getConstantFP(Val: MaxVal, DL, VT: ContainerVT.getVectorElementType()); |
2989 | SDValue MaxValSplat = DAG.getNode(Opcode: RISCVISD::VFMV_V_F_VL, DL, VT: ContainerVT, |
2990 | N1: DAG.getUNDEF(VT: ContainerVT), N2: MaxValNode, N3: VL); |
2991 | |
2992 | // If abs(Src) was larger than MaxVal or nan, keep it. |
2993 | MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); |
2994 | Mask = |
2995 | DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: SetccVT, |
2996 | Ops: {Abs, MaxValSplat, DAG.getCondCode(Cond: ISD::SETOLT), |
2997 | Mask, Mask, VL}); |
2998 | |
2999 | // Truncate to integer and convert back to FP. |
3000 | MVT IntVT = ContainerVT.changeVectorElementTypeToInteger(); |
3001 | MVT XLenVT = Subtarget.getXLenVT(); |
3002 | SDValue Truncated; |
3003 | |
3004 | switch (Op.getOpcode()) { |
3005 | default: |
3006 | llvm_unreachable("Unexpected opcode" ); |
3007 | case ISD::FCEIL: |
3008 | case ISD::VP_FCEIL: |
3009 | case ISD::FFLOOR: |
3010 | case ISD::VP_FFLOOR: |
3011 | case ISD::FROUND: |
3012 | case ISD::FROUNDEVEN: |
3013 | case ISD::VP_FROUND: |
3014 | case ISD::VP_FROUNDEVEN: |
3015 | case ISD::VP_FROUNDTOZERO: { |
3016 | RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Opc: Op.getOpcode()); |
3017 | assert(FRM != RISCVFPRndMode::Invalid); |
3018 | Truncated = DAG.getNode(Opcode: RISCVISD::VFCVT_RM_X_F_VL, DL, VT: IntVT, N1: Src, N2: Mask, |
3019 | N3: DAG.getTargetConstant(Val: FRM, DL, VT: XLenVT), N4: VL); |
3020 | break; |
3021 | } |
3022 | case ISD::FTRUNC: |
3023 | Truncated = DAG.getNode(Opcode: RISCVISD::VFCVT_RTZ_X_F_VL, DL, VT: IntVT, N1: Src, |
3024 | N2: Mask, N3: VL); |
3025 | break; |
3026 | case ISD::FRINT: |
3027 | case ISD::VP_FRINT: |
3028 | Truncated = DAG.getNode(Opcode: RISCVISD::VFCVT_X_F_VL, DL, VT: IntVT, N1: Src, N2: Mask, N3: VL); |
3029 | break; |
3030 | case ISD::FNEARBYINT: |
3031 | case ISD::VP_FNEARBYINT: |
3032 | Truncated = DAG.getNode(Opcode: RISCVISD::VFROUND_NOEXCEPT_VL, DL, VT: ContainerVT, N1: Src, |
3033 | N2: Mask, N3: VL); |
3034 | break; |
3035 | } |
3036 | |
3037 | // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL. |
3038 | if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL) |
3039 | Truncated = DAG.getNode(Opcode: RISCVISD::SINT_TO_FP_VL, DL, VT: ContainerVT, N1: Truncated, |
3040 | N2: Mask, N3: VL); |
3041 | |
3042 | // Restore the original sign so that -0.0 is preserved. |
3043 | Truncated = DAG.getNode(Opcode: RISCVISD::FCOPYSIGN_VL, DL, VT: ContainerVT, N1: Truncated, |
3044 | N2: Src, N3: Src, N4: Mask, N5: VL); |
3045 | |
3046 | if (!VT.isFixedLengthVector()) |
3047 | return Truncated; |
3048 | |
3049 | return convertFromScalableVector(VT, V: Truncated, DAG, Subtarget); |
3050 | } |
3051 | |
3052 | // Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND |
3053 | // STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to |
3054 | // qNan and coverting the new source to integer and back to FP. |
3055 | static SDValue |
3056 | lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, |
3057 | const RISCVSubtarget &Subtarget) { |
3058 | SDLoc DL(Op); |
3059 | MVT VT = Op.getSimpleValueType(); |
3060 | SDValue Chain = Op.getOperand(i: 0); |
3061 | SDValue Src = Op.getOperand(i: 1); |
3062 | |
3063 | MVT ContainerVT = VT; |
3064 | if (VT.isFixedLengthVector()) { |
3065 | ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
3066 | Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget); |
3067 | } |
3068 | |
3069 | auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
3070 | |
3071 | // Freeze the source since we are increasing the number of uses. |
3072 | Src = DAG.getFreeze(V: Src); |
3073 | |
3074 | // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src. |
3075 | MVT MaskVT = Mask.getSimpleValueType(); |
3076 | SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL, |
3077 | DAG.getVTList(MaskVT, MVT::Other), |
3078 | {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE), |
3079 | DAG.getUNDEF(MaskVT), Mask, VL}); |
3080 | Chain = Unorder.getValue(R: 1); |
3081 | Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL, |
3082 | DAG.getVTList(ContainerVT, MVT::Other), |
3083 | {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL}); |
3084 | Chain = Src.getValue(R: 1); |
3085 | |
3086 | // We do the conversion on the absolute value and fix the sign at the end. |
3087 | SDValue Abs = DAG.getNode(Opcode: RISCVISD::FABS_VL, DL, VT: ContainerVT, N1: Src, N2: Mask, N3: VL); |
3088 | |
3089 | // Determine the largest integer that can be represented exactly. This and |
3090 | // values larger than it don't have any fractional bits so don't need to |
3091 | // be converted. |
3092 | const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT: ContainerVT); |
3093 | unsigned Precision = APFloat::semanticsPrecision(FltSem); |
3094 | APFloat MaxVal = APFloat(FltSem); |
3095 | MaxVal.convertFromAPInt(Input: APInt::getOneBitSet(numBits: Precision, BitNo: Precision - 1), |
3096 | /*IsSigned*/ false, RM: APFloat::rmNearestTiesToEven); |
3097 | SDValue MaxValNode = |
3098 | DAG.getConstantFP(Val: MaxVal, DL, VT: ContainerVT.getVectorElementType()); |
3099 | SDValue MaxValSplat = DAG.getNode(Opcode: RISCVISD::VFMV_V_F_VL, DL, VT: ContainerVT, |
3100 | N1: DAG.getUNDEF(VT: ContainerVT), N2: MaxValNode, N3: VL); |
3101 | |
3102 | // If abs(Src) was larger than MaxVal or nan, keep it. |
3103 | Mask = DAG.getNode( |
3104 | Opcode: RISCVISD::SETCC_VL, DL, VT: MaskVT, |
3105 | Ops: {Abs, MaxValSplat, DAG.getCondCode(Cond: ISD::SETOLT), Mask, Mask, VL}); |
3106 | |
3107 | // Truncate to integer and convert back to FP. |
3108 | MVT IntVT = ContainerVT.changeVectorElementTypeToInteger(); |
3109 | MVT XLenVT = Subtarget.getXLenVT(); |
3110 | SDValue Truncated; |
3111 | |
3112 | switch (Op.getOpcode()) { |
3113 | default: |
3114 | llvm_unreachable("Unexpected opcode" ); |
3115 | case ISD::STRICT_FCEIL: |
3116 | case ISD::STRICT_FFLOOR: |
3117 | case ISD::STRICT_FROUND: |
3118 | case ISD::STRICT_FROUNDEVEN: { |
3119 | RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Opc: Op.getOpcode()); |
3120 | assert(FRM != RISCVFPRndMode::Invalid); |
3121 | Truncated = DAG.getNode( |
3122 | RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other), |
3123 | {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL}); |
3124 | break; |
3125 | } |
3126 | case ISD::STRICT_FTRUNC: |
3127 | Truncated = |
3128 | DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL, |
3129 | DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL); |
3130 | break; |
3131 | case ISD::STRICT_FNEARBYINT: |
3132 | Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL, |
3133 | DAG.getVTList(ContainerVT, MVT::Other), Chain, Src, |
3134 | Mask, VL); |
3135 | break; |
3136 | } |
3137 | Chain = Truncated.getValue(R: 1); |
3138 | |
3139 | // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL. |
3140 | if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) { |
3141 | Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL, |
3142 | DAG.getVTList(ContainerVT, MVT::Other), Chain, |
3143 | Truncated, Mask, VL); |
3144 | Chain = Truncated.getValue(R: 1); |
3145 | } |
3146 | |
3147 | // Restore the original sign so that -0.0 is preserved. |
3148 | Truncated = DAG.getNode(Opcode: RISCVISD::FCOPYSIGN_VL, DL, VT: ContainerVT, N1: Truncated, |
3149 | N2: Src, N3: Src, N4: Mask, N5: VL); |
3150 | |
3151 | if (VT.isFixedLengthVector()) |
3152 | Truncated = convertFromScalableVector(VT, V: Truncated, DAG, Subtarget); |
3153 | return DAG.getMergeValues(Ops: {Truncated, Chain}, dl: DL); |
3154 | } |
3155 | |
3156 | static SDValue |
3157 | lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, |
3158 | const RISCVSubtarget &Subtarget) { |
3159 | MVT VT = Op.getSimpleValueType(); |
3160 | if (VT.isVector()) |
3161 | return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); |
3162 | |
3163 | if (DAG.shouldOptForSize()) |
3164 | return SDValue(); |
3165 | |
3166 | SDLoc DL(Op); |
3167 | SDValue Src = Op.getOperand(i: 0); |
3168 | |
3169 | // Create an integer the size of the mantissa with the MSB set. This and all |
3170 | // values larger than it don't have any fractional bits so don't need to be |
3171 | // converted. |
3172 | const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); |
3173 | unsigned Precision = APFloat::semanticsPrecision(FltSem); |
3174 | APFloat MaxVal = APFloat(FltSem); |
3175 | MaxVal.convertFromAPInt(Input: APInt::getOneBitSet(numBits: Precision, BitNo: Precision - 1), |
3176 | /*IsSigned*/ false, RM: APFloat::rmNearestTiesToEven); |
3177 | SDValue MaxValNode = DAG.getConstantFP(Val: MaxVal, DL, VT); |
3178 | |
3179 | RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Opc: Op.getOpcode()); |
3180 | return DAG.getNode(Opcode: RISCVISD::FROUND, DL, VT, N1: Src, N2: MaxValNode, |
3181 | N3: DAG.getTargetConstant(Val: FRM, DL, VT: Subtarget.getXLenVT())); |
3182 | } |
3183 | |
3184 | // Expand vector LRINT and LLRINT by converting to the integer domain. |
3185 | static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, |
3186 | const RISCVSubtarget &Subtarget) { |
3187 | MVT VT = Op.getSimpleValueType(); |
3188 | assert(VT.isVector() && "Unexpected type" ); |
3189 | |
3190 | SDLoc DL(Op); |
3191 | SDValue Src = Op.getOperand(i: 0); |
3192 | MVT ContainerVT = VT; |
3193 | |
3194 | if (VT.isFixedLengthVector()) { |
3195 | ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
3196 | Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget); |
3197 | } |
3198 | |
3199 | auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
3200 | SDValue Truncated = |
3201 | DAG.getNode(Opcode: RISCVISD::VFCVT_X_F_VL, DL, VT: ContainerVT, N1: Src, N2: Mask, N3: VL); |
3202 | |
3203 | if (!VT.isFixedLengthVector()) |
3204 | return Truncated; |
3205 | |
3206 | return convertFromScalableVector(VT, V: Truncated, DAG, Subtarget); |
3207 | } |
3208 | |
3209 | static SDValue |
3210 | getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, |
3211 | const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, |
3212 | SDValue Offset, SDValue Mask, SDValue VL, |
3213 | unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) { |
3214 | if (Merge.isUndef()) |
3215 | Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; |
3216 | SDValue PolicyOp = DAG.getTargetConstant(Val: Policy, DL, VT: Subtarget.getXLenVT()); |
3217 | SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp}; |
3218 | return DAG.getNode(Opcode: RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops); |
3219 | } |
3220 | |
3221 | static SDValue |
3222 | getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, |
3223 | EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, |
3224 | SDValue VL, |
3225 | unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) { |
3226 | if (Merge.isUndef()) |
3227 | Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; |
3228 | SDValue PolicyOp = DAG.getTargetConstant(Val: Policy, DL, VT: Subtarget.getXLenVT()); |
3229 | SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp}; |
3230 | return DAG.getNode(Opcode: RISCVISD::VSLIDEUP_VL, DL, VT, Ops); |
3231 | } |
3232 | |
3233 | static MVT getLMUL1VT(MVT VT) { |
3234 | assert(VT.getVectorElementType().getSizeInBits() <= 64 && |
3235 | "Unexpected vector MVT" ); |
3236 | return MVT::getScalableVectorVT( |
3237 | VT: VT.getVectorElementType(), |
3238 | NumElements: RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits()); |
3239 | } |
3240 | |
3241 | struct VIDSequence { |
3242 | int64_t StepNumerator; |
3243 | unsigned StepDenominator; |
3244 | int64_t Addend; |
3245 | }; |
3246 | |
3247 | static std::optional<uint64_t> getExactInteger(const APFloat &APF, |
3248 | uint32_t BitWidth) { |
3249 | // We will use a SINT_TO_FP to materialize this constant so we should use a |
3250 | // signed APSInt here. |
3251 | APSInt ValInt(BitWidth, /*IsUnsigned*/ false); |
3252 | // We use an arbitrary rounding mode here. If a floating-point is an exact |
3253 | // integer (e.g., 1.0), the rounding mode does not affect the output value. If |
3254 | // the rounding mode changes the output value, then it is not an exact |
3255 | // integer. |
3256 | RoundingMode ArbitraryRM = RoundingMode::TowardZero; |
3257 | bool IsExact; |
3258 | // If it is out of signed integer range, it will return an invalid operation. |
3259 | // If it is not an exact integer, IsExact is false. |
3260 | if ((APF.convertToInteger(Result&: ValInt, RM: ArbitraryRM, IsExact: &IsExact) == |
3261 | APFloatBase::opInvalidOp) || |
3262 | !IsExact) |
3263 | return std::nullopt; |
3264 | return ValInt.extractBitsAsZExtValue(numBits: BitWidth, bitPosition: 0); |
3265 | } |
3266 | |
3267 | // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S] |
3268 | // to the (non-zero) step S and start value X. This can be then lowered as the |
3269 | // RVV sequence (VID * S) + X, for example. |
3270 | // The step S is represented as an integer numerator divided by a positive |
3271 | // denominator. Note that the implementation currently only identifies |
3272 | // sequences in which either the numerator is +/- 1 or the denominator is 1. It |
3273 | // cannot detect 2/3, for example. |
3274 | // Note that this method will also match potentially unappealing index |
3275 | // sequences, like <i32 0, i32 50939494>, however it is left to the caller to |
3276 | // determine whether this is worth generating code for. |
3277 | static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op, |
3278 | unsigned EltSizeInBits) { |
3279 | assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR" ); |
3280 | if (!cast<BuildVectorSDNode>(Val&: Op)->isConstant()) |
3281 | return std::nullopt; |
3282 | bool IsInteger = Op.getValueType().isInteger(); |
3283 | |
3284 | std::optional<unsigned> SeqStepDenom; |
3285 | std::optional<int64_t> SeqStepNum, SeqAddend; |
3286 | std::optional<std::pair<uint64_t, unsigned>> PrevElt; |
3287 | assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits()); |
3288 | |
3289 | // First extract the ops into a list of constant integer values. This may not |
3290 | // be possible for floats if they're not all representable as integers. |
3291 | SmallVector<std::optional<uint64_t>> Elts(Op.getNumOperands()); |
3292 | const unsigned OpSize = Op.getScalarValueSizeInBits(); |
3293 | for (auto [Idx, Elt] : enumerate(First: Op->op_values())) { |
3294 | if (Elt.isUndef()) { |
3295 | Elts[Idx] = std::nullopt; |
3296 | continue; |
3297 | } |
3298 | if (IsInteger) { |
3299 | Elts[Idx] = Elt->getAsZExtVal() & maskTrailingOnes<uint64_t>(N: OpSize); |
3300 | } else { |
3301 | auto ExactInteger = |
3302 | getExactInteger(APF: cast<ConstantFPSDNode>(Val: Elt)->getValueAPF(), BitWidth: OpSize); |
3303 | if (!ExactInteger) |
3304 | return std::nullopt; |
3305 | Elts[Idx] = *ExactInteger; |
3306 | } |
3307 | } |
3308 | |
3309 | for (auto [Idx, Elt] : enumerate(First&: Elts)) { |
3310 | // Assume undef elements match the sequence; we just have to be careful |
3311 | // when interpolating across them. |
3312 | if (!Elt) |
3313 | continue; |
3314 | |
3315 | if (PrevElt) { |
3316 | // Calculate the step since the last non-undef element, and ensure |
3317 | // it's consistent across the entire sequence. |
3318 | unsigned IdxDiff = Idx - PrevElt->second; |
3319 | int64_t ValDiff = SignExtend64(X: *Elt - PrevElt->first, B: EltSizeInBits); |
3320 | |
3321 | // A zero-value value difference means that we're somewhere in the middle |
3322 | // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a |
3323 | // step change before evaluating the sequence. |
3324 | if (ValDiff == 0) |
3325 | continue; |
3326 | |
3327 | int64_t Remainder = ValDiff % IdxDiff; |
3328 | // Normalize the step if it's greater than 1. |
3329 | if (Remainder != ValDiff) { |
3330 | // The difference must cleanly divide the element span. |
3331 | if (Remainder != 0) |
3332 | return std::nullopt; |
3333 | ValDiff /= IdxDiff; |
3334 | IdxDiff = 1; |
3335 | } |
3336 | |
3337 | if (!SeqStepNum) |
3338 | SeqStepNum = ValDiff; |
3339 | else if (ValDiff != SeqStepNum) |
3340 | return std::nullopt; |
3341 | |
3342 | if (!SeqStepDenom) |
3343 | SeqStepDenom = IdxDiff; |
3344 | else if (IdxDiff != *SeqStepDenom) |
3345 | return std::nullopt; |
3346 | } |
3347 | |
3348 | // Record this non-undef element for later. |
3349 | if (!PrevElt || PrevElt->first != *Elt) |
3350 | PrevElt = std::make_pair(x&: *Elt, y&: Idx); |
3351 | } |
3352 | |
3353 | // We need to have logged a step for this to count as a legal index sequence. |
3354 | if (!SeqStepNum || !SeqStepDenom) |
3355 | return std::nullopt; |
3356 | |
3357 | // Loop back through the sequence and validate elements we might have skipped |
3358 | // while waiting for a valid step. While doing this, log any sequence addend. |
3359 | for (auto [Idx, Elt] : enumerate(First&: Elts)) { |
3360 | if (!Elt) |
3361 | continue; |
3362 | uint64_t ExpectedVal = |
3363 | (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom; |
3364 | int64_t Addend = SignExtend64(X: *Elt - ExpectedVal, B: EltSizeInBits); |
3365 | if (!SeqAddend) |
3366 | SeqAddend = Addend; |
3367 | else if (Addend != SeqAddend) |
3368 | return std::nullopt; |
3369 | } |
3370 | |
3371 | assert(SeqAddend && "Must have an addend if we have a step" ); |
3372 | |
3373 | return VIDSequence{.StepNumerator: *SeqStepNum, .StepDenominator: *SeqStepDenom, .Addend: *SeqAddend}; |
3374 | } |
3375 | |
3376 | // Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT |
3377 | // and lower it as a VRGATHER_VX_VL from the source vector. |
3378 | static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, |
3379 | SelectionDAG &DAG, |
3380 | const RISCVSubtarget &Subtarget) { |
3381 | if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
3382 | return SDValue(); |
3383 | SDValue Vec = SplatVal.getOperand(i: 0); |
3384 | // Only perform this optimization on vectors of the same size for simplicity. |
3385 | // Don't perform this optimization for i1 vectors. |
3386 | // FIXME: Support i1 vectors, maybe by promoting to i8? |
3387 | if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1) |
3388 | return SDValue(); |
3389 | SDValue Idx = SplatVal.getOperand(i: 1); |
3390 | // The index must be a legal type. |
3391 | if (Idx.getValueType() != Subtarget.getXLenVT()) |
3392 | return SDValue(); |
3393 | |
3394 | MVT ContainerVT = VT; |
3395 | if (VT.isFixedLengthVector()) { |
3396 | ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
3397 | Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget); |
3398 | } |
3399 | |
3400 | auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
3401 | |
3402 | SDValue Gather = DAG.getNode(Opcode: RISCVISD::VRGATHER_VX_VL, DL, VT: ContainerVT, N1: Vec, |
3403 | N2: Idx, N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL); |
3404 | |
3405 | if (!VT.isFixedLengthVector()) |
3406 | return Gather; |
3407 | |
3408 | return convertFromScalableVector(VT, V: Gather, DAG, Subtarget); |
3409 | } |
3410 | |
3411 | |
3412 | /// Try and optimize BUILD_VECTORs with "dominant values" - these are values |
3413 | /// which constitute a large proportion of the elements. In such cases we can |
3414 | /// splat a vector with the dominant element and make up the shortfall with |
3415 | /// INSERT_VECTOR_ELTs. Returns SDValue if not profitable. |
3416 | /// Note that this includes vectors of 2 elements by association. The |
3417 | /// upper-most element is the "dominant" one, allowing us to use a splat to |
3418 | /// "insert" the upper element, and an insert of the lower element at position |
3419 | /// 0, which improves codegen. |
3420 | static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, |
3421 | const RISCVSubtarget &Subtarget) { |
3422 | MVT VT = Op.getSimpleValueType(); |
3423 | assert(VT.isFixedLengthVector() && "Unexpected vector!" ); |
3424 | |
3425 | MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
3426 | |
3427 | SDLoc DL(Op); |
3428 | auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
3429 | |
3430 | MVT XLenVT = Subtarget.getXLenVT(); |
3431 | unsigned NumElts = Op.getNumOperands(); |
3432 | |
3433 | SDValue DominantValue; |
3434 | unsigned MostCommonCount = 0; |
3435 | DenseMap<SDValue, unsigned> ValueCounts; |
3436 | unsigned NumUndefElts = |
3437 | count_if(Range: Op->op_values(), P: [](const SDValue &V) { return V.isUndef(); }); |
3438 | |
3439 | // Track the number of scalar loads we know we'd be inserting, estimated as |
3440 | // any non-zero floating-point constant. Other kinds of element are either |
3441 | // already in registers or are materialized on demand. The threshold at which |
3442 | // a vector load is more desirable than several scalar materializion and |
3443 | // vector-insertion instructions is not known. |
3444 | unsigned NumScalarLoads = 0; |
3445 | |
3446 | for (SDValue V : Op->op_values()) { |
3447 | if (V.isUndef()) |
3448 | continue; |
3449 | |
3450 | ValueCounts.insert(KV: std::make_pair(x&: V, y: 0)); |
3451 | unsigned &Count = ValueCounts[V]; |
3452 | if (0 == Count) |
3453 | if (auto *CFP = dyn_cast<ConstantFPSDNode>(Val&: V)) |
3454 | NumScalarLoads += !CFP->isExactlyValue(V: +0.0); |
3455 | |
3456 | // Is this value dominant? In case of a tie, prefer the highest element as |
3457 | // it's cheaper to insert near the beginning of a vector than it is at the |
3458 | // end. |
3459 | if (++Count >= MostCommonCount) { |
3460 | DominantValue = V; |
3461 | MostCommonCount = Count; |
3462 | } |
3463 | } |
3464 | |
3465 | assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR" ); |
3466 | unsigned NumDefElts = NumElts - NumUndefElts; |
3467 | unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2; |
3468 | |
3469 | // Don't perform this optimization when optimizing for size, since |
3470 | // materializing elements and inserting them tends to cause code bloat. |
3471 | if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts && |
3472 | (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(N: Op.getNode())) && |
3473 | ((MostCommonCount > DominantValueCountThreshold) || |
3474 | (ValueCounts.size() <= Log2_32(Value: NumDefElts)))) { |
3475 | // Start by splatting the most common element. |
3476 | SDValue Vec = DAG.getSplatBuildVector(VT, DL, Op: DominantValue); |
3477 | |
3478 | DenseSet<SDValue> Processed{DominantValue}; |
3479 | |
3480 | // We can handle an insert into the last element (of a splat) via |
3481 | // v(f)slide1down. This is slightly better than the vslideup insert |
3482 | // lowering as it avoids the need for a vector group temporary. It |
3483 | // is also better than using vmerge.vx as it avoids the need to |
3484 | // materialize the mask in a vector register. |
3485 | if (SDValue LastOp = Op->getOperand(Num: Op->getNumOperands() - 1); |
3486 | !LastOp.isUndef() && ValueCounts[LastOp] == 1 && |
3487 | LastOp != DominantValue) { |
3488 | Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget); |
3489 | auto OpCode = |
3490 | VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL; |
3491 | if (!VT.isFloatingPoint()) |
3492 | LastOp = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: LastOp); |
3493 | Vec = DAG.getNode(Opcode: OpCode, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: Vec, |
3494 | N3: LastOp, N4: Mask, N5: VL); |
3495 | Vec = convertFromScalableVector(VT, V: Vec, DAG, Subtarget); |
3496 | Processed.insert(V: LastOp); |
3497 | } |
3498 | |
3499 | MVT SelMaskTy = VT.changeVectorElementType(MVT::i1); |
3500 | for (const auto &OpIdx : enumerate(First: Op->ops())) { |
3501 | const SDValue &V = OpIdx.value(); |
3502 | if (V.isUndef() || !Processed.insert(V).second) |
3503 | continue; |
3504 | if (ValueCounts[V] == 1) { |
3505 | Vec = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT, N1: Vec, N2: V, |
3506 | N3: DAG.getVectorIdxConstant(Val: OpIdx.index(), DL)); |
3507 | } else { |
3508 | // Blend in all instances of this value using a VSELECT, using a |
3509 | // mask where each bit signals whether that element is the one |
3510 | // we're after. |
3511 | SmallVector<SDValue> Ops; |
3512 | transform(Range: Op->op_values(), d_first: std::back_inserter(x&: Ops), F: [&](SDValue V1) { |
3513 | return DAG.getConstant(Val: V == V1, DL, VT: XLenVT); |
3514 | }); |
3515 | Vec = DAG.getNode(Opcode: ISD::VSELECT, DL, VT, |
3516 | N1: DAG.getBuildVector(VT: SelMaskTy, DL, Ops), |
3517 | N2: DAG.getSplatBuildVector(VT, DL, Op: V), N3: Vec); |
3518 | } |
3519 | } |
3520 | |
3521 | return Vec; |
3522 | } |
3523 | |
3524 | return SDValue(); |
3525 | } |
3526 | |
3527 | static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, |
3528 | const RISCVSubtarget &Subtarget) { |
3529 | MVT VT = Op.getSimpleValueType(); |
3530 | assert(VT.isFixedLengthVector() && "Unexpected vector!" ); |
3531 | |
3532 | MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
3533 | |
3534 | SDLoc DL(Op); |
3535 | auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
3536 | |
3537 | MVT XLenVT = Subtarget.getXLenVT(); |
3538 | unsigned NumElts = Op.getNumOperands(); |
3539 | |
3540 | if (VT.getVectorElementType() == MVT::i1) { |
3541 | if (ISD::isBuildVectorAllZeros(N: Op.getNode())) { |
3542 | SDValue VMClr = DAG.getNode(Opcode: RISCVISD::VMCLR_VL, DL, VT: ContainerVT, Operand: VL); |
3543 | return convertFromScalableVector(VT, V: VMClr, DAG, Subtarget); |
3544 | } |
3545 | |
3546 | if (ISD::isBuildVectorAllOnes(N: Op.getNode())) { |
3547 | SDValue VMSet = DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT: ContainerVT, Operand: VL); |
3548 | return convertFromScalableVector(VT, V: VMSet, DAG, Subtarget); |
3549 | } |
3550 | |
3551 | // Lower constant mask BUILD_VECTORs via an integer vector type, in |
3552 | // scalar integer chunks whose bit-width depends on the number of mask |
3553 | // bits and XLEN. |
3554 | // First, determine the most appropriate scalar integer type to use. This |
3555 | // is at most XLenVT, but may be shrunk to a smaller vector element type |
3556 | // according to the size of the final vector - use i8 chunks rather than |
3557 | // XLenVT if we're producing a v8i1. This results in more consistent |
3558 | // codegen across RV32 and RV64. |
3559 | unsigned NumViaIntegerBits = std::clamp(val: NumElts, lo: 8u, hi: Subtarget.getXLen()); |
3560 | NumViaIntegerBits = std::min(a: NumViaIntegerBits, b: Subtarget.getELen()); |
3561 | // If we have to use more than one INSERT_VECTOR_ELT then this |
3562 | // optimization is likely to increase code size; avoid peforming it in |
3563 | // such a case. We can use a load from a constant pool in this case. |
3564 | if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits) |
3565 | return SDValue(); |
3566 | // Now we can create our integer vector type. Note that it may be larger |
3567 | // than the resulting mask type: v4i1 would use v1i8 as its integer type. |
3568 | unsigned IntegerViaVecElts = divideCeil(Numerator: NumElts, Denominator: NumViaIntegerBits); |
3569 | MVT IntegerViaVecVT = |
3570 | MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: NumViaIntegerBits), |
3571 | NumElements: IntegerViaVecElts); |
3572 | |
3573 | uint64_t Bits = 0; |
3574 | unsigned BitPos = 0, IntegerEltIdx = 0; |
3575 | SmallVector<SDValue, 8> Elts(IntegerViaVecElts); |
3576 | |
3577 | for (unsigned I = 0; I < NumElts;) { |
3578 | SDValue V = Op.getOperand(i: I); |
3579 | bool BitValue = !V.isUndef() && V->getAsZExtVal(); |
3580 | Bits |= ((uint64_t)BitValue << BitPos); |
3581 | ++BitPos; |
3582 | ++I; |
3583 | |
3584 | // Once we accumulate enough bits to fill our scalar type or process the |
3585 | // last element, insert into our vector and clear our accumulated data. |
3586 | if (I % NumViaIntegerBits == 0 || I == NumElts) { |
3587 | if (NumViaIntegerBits <= 32) |
3588 | Bits = SignExtend64<32>(x: Bits); |
3589 | SDValue Elt = DAG.getConstant(Val: Bits, DL, VT: XLenVT); |
3590 | Elts[IntegerEltIdx] = Elt; |
3591 | Bits = 0; |
3592 | BitPos = 0; |
3593 | IntegerEltIdx++; |
3594 | } |
3595 | } |
3596 | |
3597 | SDValue Vec = DAG.getBuildVector(VT: IntegerViaVecVT, DL, Ops: Elts); |
3598 | |
3599 | if (NumElts < NumViaIntegerBits) { |
3600 | // If we're producing a smaller vector than our minimum legal integer |
3601 | // type, bitcast to the equivalent (known-legal) mask type, and extract |
3602 | // our final mask. |
3603 | assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type" ); |
3604 | Vec = DAG.getBitcast(MVT::v8i1, Vec); |
3605 | Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: Vec, |
3606 | N2: DAG.getConstant(Val: 0, DL, VT: XLenVT)); |
3607 | } else { |
3608 | // Else we must have produced an integer type with the same size as the |
3609 | // mask type; bitcast for the final result. |
3610 | assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits()); |
3611 | Vec = DAG.getBitcast(VT, V: Vec); |
3612 | } |
3613 | |
3614 | return Vec; |
3615 | } |
3616 | |
3617 | if (SDValue Splat = cast<BuildVectorSDNode>(Val&: Op)->getSplatValue()) { |
3618 | unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL |
3619 | : RISCVISD::VMV_V_X_VL; |
3620 | if (!VT.isFloatingPoint()) |
3621 | Splat = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Splat); |
3622 | Splat = |
3623 | DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: Splat, N3: VL); |
3624 | return convertFromScalableVector(VT, V: Splat, DAG, Subtarget); |
3625 | } |
3626 | |
3627 | // Try and match index sequences, which we can lower to the vid instruction |
3628 | // with optional modifications. An all-undef vector is matched by |
3629 | // getSplatValue, above. |
3630 | if (auto SimpleVID = isSimpleVIDSequence(Op, EltSizeInBits: Op.getScalarValueSizeInBits())) { |
3631 | int64_t StepNumerator = SimpleVID->StepNumerator; |
3632 | unsigned StepDenominator = SimpleVID->StepDenominator; |
3633 | int64_t Addend = SimpleVID->Addend; |
3634 | |
3635 | assert(StepNumerator != 0 && "Invalid step" ); |
3636 | bool Negate = false; |
3637 | int64_t SplatStepVal = StepNumerator; |
3638 | unsigned StepOpcode = ISD::MUL; |
3639 | // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it |
3640 | // anyway as the shift of 63 won't fit in uimm5. |
3641 | if (StepNumerator != 1 && StepNumerator != INT64_MIN && |
3642 | isPowerOf2_64(Value: std::abs(i: StepNumerator))) { |
3643 | Negate = StepNumerator < 0; |
3644 | StepOpcode = ISD::SHL; |
3645 | SplatStepVal = Log2_64(Value: std::abs(i: StepNumerator)); |
3646 | } |
3647 | |
3648 | // Only emit VIDs with suitably-small steps/addends. We use imm5 is a |
3649 | // threshold since it's the immediate value many RVV instructions accept. |
3650 | // There is no vmul.vi instruction so ensure multiply constant can fit in |
3651 | // a single addi instruction. |
3652 | if (((StepOpcode == ISD::MUL && isInt<12>(x: SplatStepVal)) || |
3653 | (StepOpcode == ISD::SHL && isUInt<5>(x: SplatStepVal))) && |
3654 | isPowerOf2_32(Value: StepDenominator) && |
3655 | (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(x: Addend)) { |
3656 | MVT VIDVT = |
3657 | VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT; |
3658 | MVT VIDContainerVT = |
3659 | getContainerForFixedLengthVector(DAG, VT: VIDVT, Subtarget); |
3660 | SDValue VID = DAG.getNode(Opcode: RISCVISD::VID_VL, DL, VT: VIDContainerVT, N1: Mask, N2: VL); |
3661 | // Convert right out of the scalable type so we can use standard ISD |
3662 | // nodes for the rest of the computation. If we used scalable types with |
3663 | // these, we'd lose the fixed-length vector info and generate worse |
3664 | // vsetvli code. |
3665 | VID = convertFromScalableVector(VT: VIDVT, V: VID, DAG, Subtarget); |
3666 | if ((StepOpcode == ISD::MUL && SplatStepVal != 1) || |
3667 | (StepOpcode == ISD::SHL && SplatStepVal != 0)) { |
3668 | SDValue SplatStep = DAG.getConstant(Val: SplatStepVal, DL, VT: VIDVT); |
3669 | VID = DAG.getNode(Opcode: StepOpcode, DL, VT: VIDVT, N1: VID, N2: SplatStep); |
3670 | } |
3671 | if (StepDenominator != 1) { |
3672 | SDValue SplatStep = |
3673 | DAG.getConstant(Val: Log2_64(Value: StepDenominator), DL, VT: VIDVT); |
3674 | VID = DAG.getNode(Opcode: ISD::SRL, DL, VT: VIDVT, N1: VID, N2: SplatStep); |
3675 | } |
3676 | if (Addend != 0 || Negate) { |
3677 | SDValue SplatAddend = DAG.getConstant(Val: Addend, DL, VT: VIDVT); |
3678 | VID = DAG.getNode(Opcode: Negate ? ISD::SUB : ISD::ADD, DL, VT: VIDVT, N1: SplatAddend, |
3679 | N2: VID); |
3680 | } |
3681 | if (VT.isFloatingPoint()) { |
3682 | // TODO: Use vfwcvt to reduce register pressure. |
3683 | VID = DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT, Operand: VID); |
3684 | } |
3685 | return VID; |
3686 | } |
3687 | } |
3688 | |
3689 | // For very small build_vectors, use a single scalar insert of a constant. |
3690 | // TODO: Base this on constant rematerialization cost, not size. |
3691 | const unsigned EltBitSize = VT.getScalarSizeInBits(); |
3692 | if (VT.getSizeInBits() <= 32 && |
3693 | ISD::isBuildVectorOfConstantSDNodes(N: Op.getNode())) { |
3694 | MVT ViaIntVT = MVT::getIntegerVT(BitWidth: VT.getSizeInBits()); |
3695 | assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) && |
3696 | "Unexpected sequence type" ); |
3697 | // If we can use the original VL with the modified element type, this |
3698 | // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this |
3699 | // be moved into InsertVSETVLI? |
3700 | unsigned ViaVecLen = |
3701 | (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1; |
3702 | MVT ViaVecVT = MVT::getVectorVT(VT: ViaIntVT, NumElements: ViaVecLen); |
3703 | |
3704 | uint64_t EltMask = maskTrailingOnes<uint64_t>(N: EltBitSize); |
3705 | uint64_t SplatValue = 0; |
3706 | // Construct the amalgamated value at this larger vector type. |
3707 | for (const auto &OpIdx : enumerate(First: Op->op_values())) { |
3708 | const auto &SeqV = OpIdx.value(); |
3709 | if (!SeqV.isUndef()) |
3710 | SplatValue |= |
3711 | ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize)); |
3712 | } |
3713 | |
3714 | // On RV64, sign-extend from 32 to 64 bits where possible in order to |
3715 | // achieve better constant materializion. |
3716 | if (Subtarget.is64Bit() && ViaIntVT == MVT::i32) |
3717 | SplatValue = SignExtend64<32>(x: SplatValue); |
3718 | |
3719 | SDValue Vec = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ViaVecVT, |
3720 | N1: DAG.getUNDEF(VT: ViaVecVT), |
3721 | N2: DAG.getConstant(Val: SplatValue, DL, VT: XLenVT), |
3722 | N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
3723 | if (ViaVecLen != 1) |
3724 | Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, |
3725 | VT: MVT::getVectorVT(VT: ViaIntVT, NumElements: 1), N1: Vec, |
3726 | N2: DAG.getConstant(Val: 0, DL, VT: XLenVT)); |
3727 | return DAG.getBitcast(VT, V: Vec); |
3728 | } |
3729 | |
3730 | |
3731 | // Attempt to detect "hidden" splats, which only reveal themselves as splats |
3732 | // when re-interpreted as a vector with a larger element type. For example, |
3733 | // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1 |
3734 | // could be instead splat as |
3735 | // v2i32 = build_vector i32 0x00010000, i32 0x00010000 |
3736 | // TODO: This optimization could also work on non-constant splats, but it |
3737 | // would require bit-manipulation instructions to construct the splat value. |
3738 | SmallVector<SDValue> Sequence; |
3739 | const auto *BV = cast<BuildVectorSDNode>(Val&: Op); |
3740 | if (VT.isInteger() && EltBitSize < Subtarget.getELen() && |
3741 | ISD::isBuildVectorOfConstantSDNodes(N: Op.getNode()) && |
3742 | BV->getRepeatedSequence(Sequence) && |
3743 | (Sequence.size() * EltBitSize) <= Subtarget.getELen()) { |
3744 | unsigned SeqLen = Sequence.size(); |
3745 | MVT ViaIntVT = MVT::getIntegerVT(BitWidth: EltBitSize * SeqLen); |
3746 | assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 || |
3747 | ViaIntVT == MVT::i64) && |
3748 | "Unexpected sequence type" ); |
3749 | |
3750 | // If we can use the original VL with the modified element type, this |
3751 | // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this |
3752 | // be moved into InsertVSETVLI? |
3753 | const unsigned RequiredVL = NumElts / SeqLen; |
3754 | const unsigned ViaVecLen = |
3755 | (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ? |
3756 | NumElts : RequiredVL; |
3757 | MVT ViaVecVT = MVT::getVectorVT(VT: ViaIntVT, NumElements: ViaVecLen); |
3758 | |
3759 | unsigned EltIdx = 0; |
3760 | uint64_t EltMask = maskTrailingOnes<uint64_t>(N: EltBitSize); |
3761 | uint64_t SplatValue = 0; |
3762 | // Construct the amalgamated value which can be splatted as this larger |
3763 | // vector type. |
3764 | for (const auto &SeqV : Sequence) { |
3765 | if (!SeqV.isUndef()) |
3766 | SplatValue |= |
3767 | ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize)); |
3768 | EltIdx++; |
3769 | } |
3770 | |
3771 | // On RV64, sign-extend from 32 to 64 bits where possible in order to |
3772 | // achieve better constant materializion. |
3773 | if (Subtarget.is64Bit() && ViaIntVT == MVT::i32) |
3774 | SplatValue = SignExtend64<32>(x: SplatValue); |
3775 | |
3776 | // Since we can't introduce illegal i64 types at this stage, we can only |
3777 | // perform an i64 splat on RV32 if it is its own sign-extended value. That |
3778 | // way we can use RVV instructions to splat. |
3779 | assert((ViaIntVT.bitsLE(XLenVT) || |
3780 | (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) && |
3781 | "Unexpected bitcast sequence" ); |
3782 | if (ViaIntVT.bitsLE(VT: XLenVT) || isInt<32>(x: SplatValue)) { |
3783 | SDValue ViaVL = |
3784 | DAG.getConstant(Val: ViaVecVT.getVectorNumElements(), DL, VT: XLenVT); |
3785 | MVT ViaContainerVT = |
3786 | getContainerForFixedLengthVector(DAG, VT: ViaVecVT, Subtarget); |
3787 | SDValue Splat = |
3788 | DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ViaContainerVT, |
3789 | N1: DAG.getUNDEF(VT: ViaContainerVT), |
3790 | N2: DAG.getConstant(Val: SplatValue, DL, VT: XLenVT), N3: ViaVL); |
3791 | Splat = convertFromScalableVector(VT: ViaVecVT, V: Splat, DAG, Subtarget); |
3792 | if (ViaVecLen != RequiredVL) |
3793 | Splat = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, |
3794 | VT: MVT::getVectorVT(VT: ViaIntVT, NumElements: RequiredVL), N1: Splat, |
3795 | N2: DAG.getConstant(Val: 0, DL, VT: XLenVT)); |
3796 | return DAG.getBitcast(VT, V: Splat); |
3797 | } |
3798 | } |
3799 | |
3800 | // If the number of signbits allows, see if we can lower as a <N x i8>. |
3801 | // Our main goal here is to reduce LMUL (and thus work) required to |
3802 | // build the constant, but we will also narrow if the resulting |
3803 | // narrow vector is known to materialize cheaply. |
3804 | // TODO: We really should be costing the smaller vector. There are |
3805 | // profitable cases this misses. |
3806 | if (EltBitSize > 8 && VT.isInteger() && |
3807 | (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) { |
3808 | unsigned SignBits = DAG.ComputeNumSignBits(Op); |
3809 | if (EltBitSize - SignBits < 8) { |
3810 | SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8), |
3811 | DL, Op->ops()); |
3812 | Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8), |
3813 | Source, DAG, Subtarget); |
3814 | SDValue Res = DAG.getNode(Opcode: RISCVISD::VSEXT_VL, DL, VT: ContainerVT, N1: Source, N2: Mask, N3: VL); |
3815 | return convertFromScalableVector(VT, V: Res, DAG, Subtarget); |
3816 | } |
3817 | } |
3818 | |
3819 | if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget)) |
3820 | return Res; |
3821 | |
3822 | // For constant vectors, use generic constant pool lowering. Otherwise, |
3823 | // we'd have to materialize constants in GPRs just to move them into the |
3824 | // vector. |
3825 | return SDValue(); |
3826 | } |
3827 | |
3828 | static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, |
3829 | const RISCVSubtarget &Subtarget) { |
3830 | MVT VT = Op.getSimpleValueType(); |
3831 | assert(VT.isFixedLengthVector() && "Unexpected vector!" ); |
3832 | |
3833 | if (ISD::isBuildVectorOfConstantSDNodes(N: Op.getNode()) || |
3834 | ISD::isBuildVectorOfConstantFPSDNodes(N: Op.getNode())) |
3835 | return lowerBuildVectorOfConstants(Op, DAG, Subtarget); |
3836 | |
3837 | MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
3838 | |
3839 | SDLoc DL(Op); |
3840 | auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
3841 | |
3842 | MVT XLenVT = Subtarget.getXLenVT(); |
3843 | |
3844 | if (VT.getVectorElementType() == MVT::i1) { |
3845 | // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask |
3846 | // vector type, we have a legal equivalently-sized i8 type, so we can use |
3847 | // that. |
3848 | MVT WideVecVT = VT.changeVectorElementType(MVT::i8); |
3849 | SDValue VecZero = DAG.getConstant(Val: 0, DL, VT: WideVecVT); |
3850 | |
3851 | SDValue WideVec; |
3852 | if (SDValue Splat = cast<BuildVectorSDNode>(Val&: Op)->getSplatValue()) { |
3853 | // For a splat, perform a scalar truncate before creating the wider |
3854 | // vector. |
3855 | Splat = DAG.getNode(Opcode: ISD::AND, DL, VT: Splat.getValueType(), N1: Splat, |
3856 | N2: DAG.getConstant(Val: 1, DL, VT: Splat.getValueType())); |
3857 | WideVec = DAG.getSplatBuildVector(VT: WideVecVT, DL, Op: Splat); |
3858 | } else { |
3859 | SmallVector<SDValue, 8> Ops(Op->op_values()); |
3860 | WideVec = DAG.getBuildVector(VT: WideVecVT, DL, Ops); |
3861 | SDValue VecOne = DAG.getConstant(Val: 1, DL, VT: WideVecVT); |
3862 | WideVec = DAG.getNode(Opcode: ISD::AND, DL, VT: WideVecVT, N1: WideVec, N2: VecOne); |
3863 | } |
3864 | |
3865 | return DAG.getSetCC(DL, VT, LHS: WideVec, RHS: VecZero, Cond: ISD::SETNE); |
3866 | } |
3867 | |
3868 | if (SDValue Splat = cast<BuildVectorSDNode>(Val&: Op)->getSplatValue()) { |
3869 | if (auto Gather = matchSplatAsGather(SplatVal: Splat, VT, DL, DAG, Subtarget)) |
3870 | return Gather; |
3871 | unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL |
3872 | : RISCVISD::VMV_V_X_VL; |
3873 | if (!VT.isFloatingPoint()) |
3874 | Splat = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Splat); |
3875 | Splat = |
3876 | DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: Splat, N3: VL); |
3877 | return convertFromScalableVector(VT, V: Splat, DAG, Subtarget); |
3878 | } |
3879 | |
3880 | if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget)) |
3881 | return Res; |
3882 | |
3883 | // If we're compiling for an exact VLEN value, we can split our work per |
3884 | // register in the register group. |
3885 | if (const auto VLen = Subtarget.getRealVLen(); |
3886 | VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) { |
3887 | MVT ElemVT = VT.getVectorElementType(); |
3888 | unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits(); |
3889 | EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
3890 | MVT OneRegVT = MVT::getVectorVT(VT: ElemVT, NumElements: ElemsPerVReg); |
3891 | MVT M1VT = getContainerForFixedLengthVector(DAG, VT: OneRegVT, Subtarget); |
3892 | assert(M1VT == getLMUL1VT(M1VT)); |
3893 | |
3894 | // The following semantically builds up a fixed length concat_vector |
3895 | // of the component build_vectors. We eagerly lower to scalable and |
3896 | // insert_subvector here to avoid DAG combining it back to a large |
3897 | // build_vector. |
3898 | SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end()); |
3899 | unsigned NumOpElts = M1VT.getVectorMinNumElements(); |
3900 | SDValue Vec = DAG.getUNDEF(VT: ContainerVT); |
3901 | for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) { |
3902 | auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(N: i, M: ElemsPerVReg); |
3903 | SDValue SubBV = |
3904 | DAG.getNode(Opcode: ISD::BUILD_VECTOR, DL, VT: OneRegVT, Ops: OneVRegOfOps); |
3905 | SubBV = convertToScalableVector(VT: M1VT, V: SubBV, DAG, Subtarget); |
3906 | unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts; |
3907 | Vec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ContainerVT, N1: Vec, N2: SubBV, |
3908 | N3: DAG.getVectorIdxConstant(Val: InsertIdx, DL)); |
3909 | } |
3910 | return convertFromScalableVector(VT, V: Vec, DAG, Subtarget); |
3911 | } |
3912 | |
3913 | // For m1 vectors, if we have non-undef values in both halves of our vector, |
3914 | // split the vector into low and high halves, build them separately, then |
3915 | // use a vselect to combine them. For long vectors, this cuts the critical |
3916 | // path of the vslide1down sequence in half, and gives us an opportunity |
3917 | // to special case each half independently. Note that we don't change the |
3918 | // length of the sub-vectors here, so if both fallback to the generic |
3919 | // vslide1down path, we should be able to fold the vselect into the final |
3920 | // vslidedown (for the undef tail) for the first half w/ masking. |
3921 | unsigned NumElts = VT.getVectorNumElements(); |
3922 | unsigned NumUndefElts = |
3923 | count_if(Range: Op->op_values(), P: [](const SDValue &V) { return V.isUndef(); }); |
3924 | unsigned NumDefElts = NumElts - NumUndefElts; |
3925 | if (NumDefElts >= 8 && NumDefElts > NumElts / 2 && |
3926 | ContainerVT.bitsLE(VT: getLMUL1VT(VT: ContainerVT))) { |
3927 | SmallVector<SDValue> SubVecAOps, SubVecBOps; |
3928 | SmallVector<SDValue> MaskVals; |
3929 | SDValue UndefElem = DAG.getUNDEF(VT: Op->getOperand(Num: 0)->getValueType(ResNo: 0)); |
3930 | SubVecAOps.reserve(N: NumElts); |
3931 | SubVecBOps.reserve(N: NumElts); |
3932 | for (unsigned i = 0; i < NumElts; i++) { |
3933 | SDValue Elem = Op->getOperand(Num: i); |
3934 | if (i < NumElts / 2) { |
3935 | SubVecAOps.push_back(Elt: Elem); |
3936 | SubVecBOps.push_back(Elt: UndefElem); |
3937 | } else { |
3938 | SubVecAOps.push_back(Elt: UndefElem); |
3939 | SubVecBOps.push_back(Elt: Elem); |
3940 | } |
3941 | bool SelectMaskVal = (i < NumElts / 2); |
3942 | MaskVals.push_back(Elt: DAG.getConstant(Val: SelectMaskVal, DL, VT: XLenVT)); |
3943 | } |
3944 | assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts && |
3945 | MaskVals.size() == NumElts); |
3946 | |
3947 | SDValue SubVecA = DAG.getBuildVector(VT, DL, Ops: SubVecAOps); |
3948 | SDValue SubVecB = DAG.getBuildVector(VT, DL, Ops: SubVecBOps); |
3949 | MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); |
3950 | SDValue SelectMask = DAG.getBuildVector(VT: MaskVT, DL, Ops: MaskVals); |
3951 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: SelectMask, N2: SubVecA, N3: SubVecB); |
3952 | } |
3953 | |
3954 | // Cap the cost at a value linear to the number of elements in the vector. |
3955 | // The default lowering is to use the stack. The vector store + scalar loads |
3956 | // is linear in VL. However, at high lmuls vslide1down and vslidedown end up |
3957 | // being (at least) linear in LMUL. As a result, using the vslidedown |
3958 | // lowering for every element ends up being VL*LMUL.. |
3959 | // TODO: Should we be directly costing the stack alternative? Doing so might |
3960 | // give us a more accurate upper bound. |
3961 | InstructionCost LinearBudget = VT.getVectorNumElements() * 2; |
3962 | |
3963 | // TODO: unify with TTI getSlideCost. |
3964 | InstructionCost PerSlideCost = 1; |
3965 | switch (RISCVTargetLowering::getLMUL(VT: ContainerVT)) { |
3966 | default: break; |
3967 | case RISCVII::VLMUL::LMUL_2: |
3968 | PerSlideCost = 2; |
3969 | break; |
3970 | case RISCVII::VLMUL::LMUL_4: |
3971 | PerSlideCost = 4; |
3972 | break; |
3973 | case RISCVII::VLMUL::LMUL_8: |
3974 | PerSlideCost = 8; |
3975 | break; |
3976 | } |
3977 | |
3978 | // TODO: Should we be using the build instseq then cost + evaluate scheme |
3979 | // we use for integer constants here? |
3980 | unsigned UndefCount = 0; |
3981 | for (const SDValue &V : Op->ops()) { |
3982 | if (V.isUndef()) { |
3983 | UndefCount++; |
3984 | continue; |
3985 | } |
3986 | if (UndefCount) { |
3987 | LinearBudget -= PerSlideCost; |
3988 | UndefCount = 0; |
3989 | } |
3990 | LinearBudget -= PerSlideCost; |
3991 | } |
3992 | if (UndefCount) { |
3993 | LinearBudget -= PerSlideCost; |
3994 | } |
3995 | |
3996 | if (LinearBudget < 0) |
3997 | return SDValue(); |
3998 | |
3999 | assert((!VT.isFloatingPoint() || |
4000 | VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) && |
4001 | "Illegal type which will result in reserved encoding" ); |
4002 | |
4003 | const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; |
4004 | |
4005 | SDValue Vec; |
4006 | UndefCount = 0; |
4007 | for (SDValue V : Op->ops()) { |
4008 | if (V.isUndef()) { |
4009 | UndefCount++; |
4010 | continue; |
4011 | } |
4012 | |
4013 | // Start our sequence with a TA splat in the hopes that hardware is able to |
4014 | // recognize there's no dependency on the prior value of our temporary |
4015 | // register. |
4016 | if (!Vec) { |
4017 | Vec = DAG.getSplatVector(VT, DL, Op: V); |
4018 | Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget); |
4019 | UndefCount = 0; |
4020 | continue; |
4021 | } |
4022 | |
4023 | if (UndefCount) { |
4024 | const SDValue Offset = DAG.getConstant(Val: UndefCount, DL, VT: Subtarget.getXLenVT()); |
4025 | Vec = getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, Merge: DAG.getUNDEF(VT: ContainerVT), |
4026 | Op: Vec, Offset, Mask, VL, Policy); |
4027 | UndefCount = 0; |
4028 | } |
4029 | auto OpCode = |
4030 | VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL; |
4031 | if (!VT.isFloatingPoint()) |
4032 | V = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getXLenVT(), Operand: V); |
4033 | Vec = DAG.getNode(Opcode: OpCode, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: Vec, |
4034 | N3: V, N4: Mask, N5: VL); |
4035 | } |
4036 | if (UndefCount) { |
4037 | const SDValue Offset = DAG.getConstant(Val: UndefCount, DL, VT: Subtarget.getXLenVT()); |
4038 | Vec = getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, Merge: DAG.getUNDEF(VT: ContainerVT), |
4039 | Op: Vec, Offset, Mask, VL, Policy); |
4040 | } |
4041 | return convertFromScalableVector(VT, V: Vec, DAG, Subtarget); |
4042 | } |
4043 | |
4044 | static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, |
4045 | SDValue Lo, SDValue Hi, SDValue VL, |
4046 | SelectionDAG &DAG) { |
4047 | if (!Passthru) |
4048 | Passthru = DAG.getUNDEF(VT); |
4049 | if (isa<ConstantSDNode>(Val: Lo) && isa<ConstantSDNode>(Val: Hi)) { |
4050 | int32_t LoC = cast<ConstantSDNode>(Val&: Lo)->getSExtValue(); |
4051 | int32_t HiC = cast<ConstantSDNode>(Val&: Hi)->getSExtValue(); |
4052 | // If Hi constant is all the same sign bit as Lo, lower this as a custom |
4053 | // node in order to try and match RVV vector/scalar instructions. |
4054 | if ((LoC >> 31) == HiC) |
4055 | return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: Passthru, N2: Lo, N3: VL); |
4056 | |
4057 | // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo, |
4058 | // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use |
4059 | // vlmax vsetvli or vsetivli to change the VL. |
4060 | // FIXME: Support larger constants? |
4061 | // FIXME: Support non-constant VLs by saturating? |
4062 | if (LoC == HiC) { |
4063 | SDValue NewVL; |
4064 | if (isAllOnesConstant(VL) || |
4065 | (isa<RegisterSDNode>(VL) && |
4066 | cast<RegisterSDNode>(VL)->getReg() == RISCV::X0)) |
4067 | NewVL = DAG.getRegister(RISCV::X0, MVT::i32); |
4068 | else if (isa<ConstantSDNode>(Val: VL) && isUInt<4>(x: VL->getAsZExtVal())) |
4069 | NewVL = DAG.getNode(Opcode: ISD::ADD, DL, VT: VL.getValueType(), N1: VL, N2: VL); |
4070 | |
4071 | if (NewVL) { |
4072 | MVT InterVT = |
4073 | MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); |
4074 | auto InterVec = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: InterVT, |
4075 | N1: DAG.getUNDEF(VT: InterVT), N2: Lo, N3: NewVL); |
4076 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: InterVec); |
4077 | } |
4078 | } |
4079 | } |
4080 | |
4081 | // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended. |
4082 | if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(i: 0) == Lo && |
4083 | isa<ConstantSDNode>(Val: Hi.getOperand(i: 1)) && |
4084 | Hi.getConstantOperandVal(i: 1) == 31) |
4085 | return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: Passthru, N2: Lo, N3: VL); |
4086 | |
4087 | // If the hi bits of the splat are undefined, then it's fine to just splat Lo |
4088 | // even if it might be sign extended. |
4089 | if (Hi.isUndef()) |
4090 | return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: Passthru, N2: Lo, N3: VL); |
4091 | |
4092 | // Fall back to a stack store and stride x0 vector load. |
4093 | return DAG.getNode(Opcode: RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, N1: Passthru, N2: Lo, |
4094 | N3: Hi, N4: VL); |
4095 | } |
4096 | |
4097 | // Called by type legalization to handle splat of i64 on RV32. |
4098 | // FIXME: We can optimize this when the type has sign or zero bits in one |
4099 | // of the halves. |
4100 | static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, |
4101 | SDValue Scalar, SDValue VL, |
4102 | SelectionDAG &DAG) { |
4103 | assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!" ); |
4104 | SDValue Lo, Hi; |
4105 | std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32); |
4106 | return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG); |
4107 | } |
4108 | |
4109 | // This function lowers a splat of a scalar operand Splat with the vector |
4110 | // length VL. It ensures the final sequence is type legal, which is useful when |
4111 | // lowering a splat after type legalization. |
4112 | static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, |
4113 | MVT VT, const SDLoc &DL, SelectionDAG &DAG, |
4114 | const RISCVSubtarget &Subtarget) { |
4115 | bool HasPassthru = Passthru && !Passthru.isUndef(); |
4116 | if (!HasPassthru && !Passthru) |
4117 | Passthru = DAG.getUNDEF(VT); |
4118 | if (VT.isFloatingPoint()) |
4119 | return DAG.getNode(Opcode: RISCVISD::VFMV_V_F_VL, DL, VT, N1: Passthru, N2: Scalar, N3: VL); |
4120 | |
4121 | MVT XLenVT = Subtarget.getXLenVT(); |
4122 | |
4123 | // Simplest case is that the operand needs to be promoted to XLenVT. |
4124 | if (Scalar.getValueType().bitsLE(VT: XLenVT)) { |
4125 | // If the operand is a constant, sign extend to increase our chances |
4126 | // of being able to use a .vi instruction. ANY_EXTEND would become a |
4127 | // a zero extend and the simm5 check in isel would fail. |
4128 | // FIXME: Should we ignore the upper bits in isel instead? |
4129 | unsigned ExtOpc = |
4130 | isa<ConstantSDNode>(Val: Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; |
4131 | Scalar = DAG.getNode(Opcode: ExtOpc, DL, VT: XLenVT, Operand: Scalar); |
4132 | return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: Passthru, N2: Scalar, N3: VL); |
4133 | } |
4134 | |
4135 | assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 && |
4136 | "Unexpected scalar for splat lowering!" ); |
4137 | |
4138 | if (isOneConstant(V: VL) && isNullConstant(V: Scalar)) |
4139 | return DAG.getNode(Opcode: RISCVISD::VMV_S_X_VL, DL, VT, N1: Passthru, |
4140 | N2: DAG.getConstant(Val: 0, DL, VT: XLenVT), N3: VL); |
4141 | |
4142 | // Otherwise use the more complicated splatting algorithm. |
4143 | return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG); |
4144 | } |
4145 | |
4146 | // This function lowers an insert of a scalar operand Scalar into lane |
4147 | // 0 of the vector regardless of the value of VL. The contents of the |
4148 | // remaining lanes of the result vector are unspecified. VL is assumed |
4149 | // to be non-zero. |
4150 | static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, |
4151 | const SDLoc &DL, SelectionDAG &DAG, |
4152 | const RISCVSubtarget &Subtarget) { |
4153 | assert(VT.isScalableVector() && "Expect VT is scalable vector type." ); |
4154 | |
4155 | const MVT XLenVT = Subtarget.getXLenVT(); |
4156 | SDValue Passthru = DAG.getUNDEF(VT); |
4157 | |
4158 | if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
4159 | isNullConstant(V: Scalar.getOperand(i: 1))) { |
4160 | SDValue = Scalar.getOperand(i: 0); |
4161 | // The element types must be the same. |
4162 | if (ExtractedVal.getValueType().getVectorElementType() == |
4163 | VT.getVectorElementType()) { |
4164 | MVT = ExtractedVal.getSimpleValueType(); |
4165 | MVT = ExtractedVT; |
4166 | if (ExtractedContainerVT.isFixedLengthVector()) { |
4167 | ExtractedContainerVT = getContainerForFixedLengthVector( |
4168 | DAG, VT: ExtractedContainerVT, Subtarget); |
4169 | ExtractedVal = convertToScalableVector(VT: ExtractedContainerVT, |
4170 | V: ExtractedVal, DAG, Subtarget); |
4171 | } |
4172 | if (ExtractedContainerVT.bitsLE(VT)) |
4173 | return DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: Passthru, |
4174 | N2: ExtractedVal, N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
4175 | return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: ExtractedVal, |
4176 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
4177 | } |
4178 | } |
4179 | |
4180 | |
4181 | if (VT.isFloatingPoint()) |
4182 | return DAG.getNode(Opcode: RISCVISD::VFMV_S_F_VL, DL, VT, |
4183 | N1: DAG.getUNDEF(VT), N2: Scalar, N3: VL); |
4184 | |
4185 | // Avoid the tricky legalization cases by falling back to using the |
4186 | // splat code which already handles it gracefully. |
4187 | if (!Scalar.getValueType().bitsLE(VT: XLenVT)) |
4188 | return lowerScalarSplat(Passthru: DAG.getUNDEF(VT), Scalar, |
4189 | VL: DAG.getConstant(Val: 1, DL, VT: XLenVT), |
4190 | VT, DL, DAG, Subtarget); |
4191 | |
4192 | // If the operand is a constant, sign extend to increase our chances |
4193 | // of being able to use a .vi instruction. ANY_EXTEND would become a |
4194 | // a zero extend and the simm5 check in isel would fail. |
4195 | // FIXME: Should we ignore the upper bits in isel instead? |
4196 | unsigned ExtOpc = |
4197 | isa<ConstantSDNode>(Val: Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; |
4198 | Scalar = DAG.getNode(Opcode: ExtOpc, DL, VT: XLenVT, Operand: Scalar); |
4199 | return DAG.getNode(Opcode: RISCVISD::VMV_S_X_VL, DL, VT, |
4200 | N1: DAG.getUNDEF(VT), N2: Scalar, N3: VL); |
4201 | } |
4202 | |
4203 | // Is this a shuffle extracts either the even or odd elements of a vector? |
4204 | // That is, specifically, either (a) or (b) below. |
4205 | // t34: v8i8 = extract_subvector t11, Constant:i64<0> |
4206 | // t33: v8i8 = extract_subvector t11, Constant:i64<8> |
4207 | // a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33 |
4208 | // b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33 |
4209 | // Returns {Src Vector, Even Elements} om success |
4210 | static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, |
4211 | SDValue V2, ArrayRef<int> Mask, |
4212 | const RISCVSubtarget &Subtarget) { |
4213 | // Need to be able to widen the vector. |
4214 | if (VT.getScalarSizeInBits() >= Subtarget.getELen()) |
4215 | return false; |
4216 | |
4217 | // Both input must be extracts. |
4218 | if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR || |
4219 | V2.getOpcode() != ISD::EXTRACT_SUBVECTOR) |
4220 | return false; |
4221 | |
4222 | // Extracting from the same source. |
4223 | SDValue Src = V1.getOperand(i: 0); |
4224 | if (Src != V2.getOperand(i: 0)) |
4225 | return false; |
4226 | |
4227 | // Src needs to have twice the number of elements. |
4228 | if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2)) |
4229 | return false; |
4230 | |
4231 | // The extracts must extract the two halves of the source. |
4232 | if (V1.getConstantOperandVal(i: 1) != 0 || |
4233 | V2.getConstantOperandVal(i: 1) != Mask.size()) |
4234 | return false; |
4235 | |
4236 | // First index must be the first even or odd element from V1. |
4237 | if (Mask[0] != 0 && Mask[0] != 1) |
4238 | return false; |
4239 | |
4240 | // The others must increase by 2 each time. |
4241 | // TODO: Support undef elements? |
4242 | for (unsigned i = 1; i != Mask.size(); ++i) |
4243 | if (Mask[i] != Mask[i - 1] + 2) |
4244 | return false; |
4245 | |
4246 | return true; |
4247 | } |
4248 | |
4249 | /// Is this shuffle interleaving contiguous elements from one vector into the |
4250 | /// even elements and contiguous elements from another vector into the odd |
4251 | /// elements. \p EvenSrc will contain the element that should be in the first |
4252 | /// even element. \p OddSrc will contain the element that should be in the first |
4253 | /// odd element. These can be the first element in a source or the element half |
4254 | /// way through the source. |
4255 | static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc, |
4256 | int &OddSrc, const RISCVSubtarget &Subtarget) { |
4257 | // We need to be able to widen elements to the next larger integer type. |
4258 | if (VT.getScalarSizeInBits() >= Subtarget.getELen()) |
4259 | return false; |
4260 | |
4261 | int Size = Mask.size(); |
4262 | int NumElts = VT.getVectorNumElements(); |
4263 | assert(Size == (int)NumElts && "Unexpected mask size" ); |
4264 | |
4265 | SmallVector<unsigned, 2> StartIndexes; |
4266 | if (!ShuffleVectorInst::isInterleaveMask(Mask, Factor: 2, NumInputElts: Size * 2, StartIndexes)) |
4267 | return false; |
4268 | |
4269 | EvenSrc = StartIndexes[0]; |
4270 | OddSrc = StartIndexes[1]; |
4271 | |
4272 | // One source should be low half of first vector. |
4273 | if (EvenSrc != 0 && OddSrc != 0) |
4274 | return false; |
4275 | |
4276 | // Subvectors will be subtracted from either at the start of the two input |
4277 | // vectors, or at the start and middle of the first vector if it's an unary |
4278 | // interleave. |
4279 | // In both cases, HalfNumElts will be extracted. |
4280 | // We need to ensure that the extract indices are 0 or HalfNumElts otherwise |
4281 | // we'll create an illegal extract_subvector. |
4282 | // FIXME: We could support other values using a slidedown first. |
4283 | int HalfNumElts = NumElts / 2; |
4284 | return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0); |
4285 | } |
4286 | |
4287 | /// Match shuffles that concatenate two vectors, rotate the concatenation, |
4288 | /// and then extract the original number of elements from the rotated result. |
4289 | /// This is equivalent to vector.splice or X86's PALIGNR instruction. The |
4290 | /// returned rotation amount is for a rotate right, where elements move from |
4291 | /// higher elements to lower elements. \p LoSrc indicates the first source |
4292 | /// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector |
4293 | /// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be |
4294 | /// 0 or 1 if a rotation is found. |
4295 | /// |
4296 | /// NOTE: We talk about rotate to the right which matches how bit shift and |
4297 | /// rotate instructions are described where LSBs are on the right, but LLVM IR |
4298 | /// and the table below write vectors with the lowest elements on the left. |
4299 | static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) { |
4300 | int Size = Mask.size(); |
4301 | |
4302 | // We need to detect various ways of spelling a rotation: |
4303 | // [11, 12, 13, 14, 15, 0, 1, 2] |
4304 | // [-1, 12, 13, 14, -1, -1, 1, -1] |
4305 | // [-1, -1, -1, -1, -1, -1, 1, 2] |
4306 | // [ 3, 4, 5, 6, 7, 8, 9, 10] |
4307 | // [-1, 4, 5, 6, -1, -1, 9, -1] |
4308 | // [-1, 4, 5, 6, -1, -1, -1, -1] |
4309 | int Rotation = 0; |
4310 | LoSrc = -1; |
4311 | HiSrc = -1; |
4312 | for (int i = 0; i != Size; ++i) { |
4313 | int M = Mask[i]; |
4314 | if (M < 0) |
4315 | continue; |
4316 | |
4317 | // Determine where a rotate vector would have started. |
4318 | int StartIdx = i - (M % Size); |
4319 | // The identity rotation isn't interesting, stop. |
4320 | if (StartIdx == 0) |
4321 | return -1; |
4322 | |
4323 | // If we found the tail of a vector the rotation must be the missing |
4324 | // front. If we found the head of a vector, it must be how much of the |
4325 | // head. |
4326 | int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx; |
4327 | |
4328 | if (Rotation == 0) |
4329 | Rotation = CandidateRotation; |
4330 | else if (Rotation != CandidateRotation) |
4331 | // The rotations don't match, so we can't match this mask. |
4332 | return -1; |
4333 | |
4334 | // Compute which value this mask is pointing at. |
4335 | int MaskSrc = M < Size ? 0 : 1; |
4336 | |
4337 | // Compute which of the two target values this index should be assigned to. |
4338 | // This reflects whether the high elements are remaining or the low elemnts |
4339 | // are remaining. |
4340 | int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc; |
4341 | |
4342 | // Either set up this value if we've not encountered it before, or check |
4343 | // that it remains consistent. |
4344 | if (TargetSrc < 0) |
4345 | TargetSrc = MaskSrc; |
4346 | else if (TargetSrc != MaskSrc) |
4347 | // This may be a rotation, but it pulls from the inputs in some |
4348 | // unsupported interleaving. |
4349 | return -1; |
4350 | } |
4351 | |
4352 | // Check that we successfully analyzed the mask, and normalize the results. |
4353 | assert(Rotation != 0 && "Failed to locate a viable rotation!" ); |
4354 | assert((LoSrc >= 0 || HiSrc >= 0) && |
4355 | "Failed to find a rotated input vector!" ); |
4356 | |
4357 | return Rotation; |
4358 | } |
4359 | |
4360 | // Lower a deinterleave shuffle to vnsrl. |
4361 | // [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true) |
4362 | // -> [p, q, r, s] (EvenElts == false) |
4363 | // VT is the type of the vector to return, <[vscale x ]n x ty> |
4364 | // Src is the vector to deinterleave of type <[vscale x ]n*2 x ty> |
4365 | static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, |
4366 | bool EvenElts, |
4367 | const RISCVSubtarget &Subtarget, |
4368 | SelectionDAG &DAG) { |
4369 | // The result is a vector of type <m x n x ty> |
4370 | MVT ContainerVT = VT; |
4371 | // Convert fixed vectors to scalable if needed |
4372 | if (ContainerVT.isFixedLengthVector()) { |
4373 | assert(Src.getSimpleValueType().isFixedLengthVector()); |
4374 | ContainerVT = getContainerForFixedLengthVector(DAG, VT: ContainerVT, Subtarget); |
4375 | |
4376 | // The source is a vector of type <m x n*2 x ty> |
4377 | MVT SrcContainerVT = |
4378 | MVT::getVectorVT(VT: ContainerVT.getVectorElementType(), |
4379 | EC: ContainerVT.getVectorElementCount() * 2); |
4380 | Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget); |
4381 | } |
4382 | |
4383 | auto [TrueMask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
4384 | |
4385 | // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2> |
4386 | // This also converts FP to int. |
4387 | unsigned EltBits = ContainerVT.getScalarSizeInBits(); |
4388 | MVT WideSrcContainerVT = MVT::getVectorVT( |
4389 | VT: MVT::getIntegerVT(BitWidth: EltBits * 2), EC: ContainerVT.getVectorElementCount()); |
4390 | Src = DAG.getBitcast(VT: WideSrcContainerVT, V: Src); |
4391 | |
4392 | // The integer version of the container type. |
4393 | MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger(); |
4394 | |
4395 | // If we want even elements, then the shift amount is 0. Otherwise, shift by |
4396 | // the original element size. |
4397 | unsigned Shift = EvenElts ? 0 : EltBits; |
4398 | SDValue SplatShift = DAG.getNode( |
4399 | Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IntContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), |
4400 | N2: DAG.getConstant(Val: Shift, DL, VT: Subtarget.getXLenVT()), N3: VL); |
4401 | SDValue Res = |
4402 | DAG.getNode(Opcode: RISCVISD::VNSRL_VL, DL, VT: IntContainerVT, N1: Src, N2: SplatShift, |
4403 | N3: DAG.getUNDEF(VT: IntContainerVT), N4: TrueMask, N5: VL); |
4404 | // Cast back to FP if needed. |
4405 | Res = DAG.getBitcast(VT: ContainerVT, V: Res); |
4406 | |
4407 | if (VT.isFixedLengthVector()) |
4408 | Res = convertFromScalableVector(VT, V: Res, DAG, Subtarget); |
4409 | return Res; |
4410 | } |
4411 | |
4412 | // Lower the following shuffle to vslidedown. |
4413 | // a) |
4414 | // t49: v8i8 = extract_subvector t13, Constant:i64<0> |
4415 | // t109: v8i8 = extract_subvector t13, Constant:i64<8> |
4416 | // t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106 |
4417 | // b) |
4418 | // t69: v16i16 = extract_subvector t68, Constant:i64<0> |
4419 | // t23: v8i16 = extract_subvector t69, Constant:i64<0> |
4420 | // t29: v4i16 = extract_subvector t23, Constant:i64<4> |
4421 | // t26: v8i16 = extract_subvector t69, Constant:i64<8> |
4422 | // t30: v4i16 = extract_subvector t26, Constant:i64<0> |
4423 | // t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30 |
4424 | static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, |
4425 | SDValue V1, SDValue V2, |
4426 | ArrayRef<int> Mask, |
4427 | const RISCVSubtarget &Subtarget, |
4428 | SelectionDAG &DAG) { |
4429 | auto = |
4430 | [](SDValue Parent) -> std::pair<SDValue, uint64_t> { |
4431 | uint64_t Offset = 0; |
4432 | while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
4433 | // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from |
4434 | // a scalable vector. But we don't want to match the case. |
4435 | Parent.getOperand(i: 0).getSimpleValueType().isFixedLengthVector()) { |
4436 | Offset += Parent.getConstantOperandVal(i: 1); |
4437 | Parent = Parent.getOperand(i: 0); |
4438 | } |
4439 | return std::make_pair(x&: Parent, y&: Offset); |
4440 | }; |
4441 | |
4442 | auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1); |
4443 | auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2); |
4444 | |
4445 | // Extracting from the same source. |
4446 | SDValue Src = V1Src; |
4447 | if (Src != V2Src) |
4448 | return SDValue(); |
4449 | |
4450 | // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs. |
4451 | SmallVector<int, 16> NewMask(Mask); |
4452 | for (size_t i = 0; i != NewMask.size(); ++i) { |
4453 | if (NewMask[i] == -1) |
4454 | continue; |
4455 | |
4456 | if (static_cast<size_t>(NewMask[i]) < NewMask.size()) { |
4457 | NewMask[i] = NewMask[i] + V1IndexOffset; |
4458 | } else { |
4459 | // Minus NewMask.size() is needed. Otherwise, the b case would be |
4460 | // <5,6,7,12> instead of <5,6,7,8>. |
4461 | NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset; |
4462 | } |
4463 | } |
4464 | |
4465 | // First index must be known and non-zero. It will be used as the slidedown |
4466 | // amount. |
4467 | if (NewMask[0] <= 0) |
4468 | return SDValue(); |
4469 | |
4470 | // NewMask is also continuous. |
4471 | for (unsigned i = 1; i != NewMask.size(); ++i) |
4472 | if (NewMask[i - 1] + 1 != NewMask[i]) |
4473 | return SDValue(); |
4474 | |
4475 | MVT XLenVT = Subtarget.getXLenVT(); |
4476 | MVT SrcVT = Src.getSimpleValueType(); |
4477 | MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT: SrcVT, Subtarget); |
4478 | auto [TrueMask, VL] = getDefaultVLOps(VecVT: SrcVT, ContainerVT, DL, DAG, Subtarget); |
4479 | SDValue Slidedown = |
4480 | getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, Merge: DAG.getUNDEF(VT: ContainerVT), |
4481 | Op: convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget), |
4482 | Offset: DAG.getConstant(Val: NewMask[0], DL, VT: XLenVT), Mask: TrueMask, VL); |
4483 | return DAG.getNode( |
4484 | Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, |
4485 | N1: convertFromScalableVector(VT: SrcVT, V: Slidedown, DAG, Subtarget), |
4486 | N2: DAG.getConstant(Val: 0, DL, VT: XLenVT)); |
4487 | } |
4488 | |
4489 | // Because vslideup leaves the destination elements at the start intact, we can |
4490 | // use it to perform shuffles that insert subvectors: |
4491 | // |
4492 | // vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11> |
4493 | // -> |
4494 | // vsetvli zero, 8, e8, mf2, ta, ma |
4495 | // vslideup.vi v8, v9, 4 |
4496 | // |
4497 | // vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7> |
4498 | // -> |
4499 | // vsetvli zero, 5, e8, mf2, tu, ma |
4500 | // vslideup.v1 v8, v9, 2 |
4501 | static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, |
4502 | SDValue V1, SDValue V2, |
4503 | ArrayRef<int> Mask, |
4504 | const RISCVSubtarget &Subtarget, |
4505 | SelectionDAG &DAG) { |
4506 | unsigned NumElts = VT.getVectorNumElements(); |
4507 | int NumSubElts, Index; |
4508 | if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumSrcElts: NumElts, NumSubElts, |
4509 | Index)) |
4510 | return SDValue(); |
4511 | |
4512 | bool OpsSwapped = Mask[Index] < (int)NumElts; |
4513 | SDValue InPlace = OpsSwapped ? V2 : V1; |
4514 | SDValue ToInsert = OpsSwapped ? V1 : V2; |
4515 | |
4516 | MVT XLenVT = Subtarget.getXLenVT(); |
4517 | MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
4518 | auto TrueMask = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).first; |
4519 | // We slide up by the index that the subvector is being inserted at, and set |
4520 | // VL to the index + the number of elements being inserted. |
4521 | unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED | RISCVII::MASK_AGNOSTIC; |
4522 | // If the we're adding a suffix to the in place vector, i.e. inserting right |
4523 | // up to the very end of it, then we don't actually care about the tail. |
4524 | if (NumSubElts + Index >= (int)NumElts) |
4525 | Policy |= RISCVII::TAIL_AGNOSTIC; |
4526 | |
4527 | InPlace = convertToScalableVector(VT: ContainerVT, V: InPlace, DAG, Subtarget); |
4528 | ToInsert = convertToScalableVector(VT: ContainerVT, V: ToInsert, DAG, Subtarget); |
4529 | SDValue VL = DAG.getConstant(Val: NumSubElts + Index, DL, VT: XLenVT); |
4530 | |
4531 | SDValue Res; |
4532 | // If we're inserting into the lowest elements, use a tail undisturbed |
4533 | // vmv.v.v. |
4534 | if (Index == 0) |
4535 | Res = DAG.getNode(Opcode: RISCVISD::VMV_V_V_VL, DL, VT: ContainerVT, N1: InPlace, N2: ToInsert, |
4536 | N3: VL); |
4537 | else |
4538 | Res = getVSlideup(DAG, Subtarget, DL, VT: ContainerVT, Merge: InPlace, Op: ToInsert, |
4539 | Offset: DAG.getConstant(Val: Index, DL, VT: XLenVT), Mask: TrueMask, VL, Policy); |
4540 | return convertFromScalableVector(VT, V: Res, DAG, Subtarget); |
4541 | } |
4542 | |
4543 | /// Match v(f)slide1up/down idioms. These operations involve sliding |
4544 | /// N-1 elements to make room for an inserted scalar at one end. |
4545 | static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, |
4546 | SDValue V1, SDValue V2, |
4547 | ArrayRef<int> Mask, |
4548 | const RISCVSubtarget &Subtarget, |
4549 | SelectionDAG &DAG) { |
4550 | bool OpsSwapped = false; |
4551 | if (!isa<BuildVectorSDNode>(Val: V1)) { |
4552 | if (!isa<BuildVectorSDNode>(Val: V2)) |
4553 | return SDValue(); |
4554 | std::swap(a&: V1, b&: V2); |
4555 | OpsSwapped = true; |
4556 | } |
4557 | SDValue Splat = cast<BuildVectorSDNode>(Val&: V1)->getSplatValue(); |
4558 | if (!Splat) |
4559 | return SDValue(); |
4560 | |
4561 | // Return true if the mask could describe a slide of Mask.size() - 1 |
4562 | // elements from concat_vector(V1, V2)[Base:] to [Offset:]. |
4563 | auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) { |
4564 | const unsigned S = (Offset > 0) ? 0 : -Offset; |
4565 | const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0); |
4566 | for (unsigned i = S; i != E; ++i) |
4567 | if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset) |
4568 | return false; |
4569 | return true; |
4570 | }; |
4571 | |
4572 | const unsigned NumElts = VT.getVectorNumElements(); |
4573 | bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1); |
4574 | if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1)) |
4575 | return SDValue(); |
4576 | |
4577 | const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0]; |
4578 | // Inserted lane must come from splat, undef scalar is legal but not profitable. |
4579 | if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped) |
4580 | return SDValue(); |
4581 | |
4582 | MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
4583 | auto [TrueMask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
4584 | auto OpCode = IsVSlidedown ? |
4585 | (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) : |
4586 | (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL); |
4587 | if (!VT.isFloatingPoint()) |
4588 | Splat = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getXLenVT(), Operand: Splat); |
4589 | auto Vec = DAG.getNode(Opcode: OpCode, DL, VT: ContainerVT, |
4590 | N1: DAG.getUNDEF(VT: ContainerVT), |
4591 | N2: convertToScalableVector(VT: ContainerVT, V: V2, DAG, Subtarget), |
4592 | N3: Splat, N4: TrueMask, N5: VL); |
4593 | return convertFromScalableVector(VT, V: Vec, DAG, Subtarget); |
4594 | } |
4595 | |
4596 | // Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx |
4597 | // to create an interleaved vector of <[vscale x] n*2 x ty>. |
4598 | // This requires that the size of ty is less than the subtarget's maximum ELEN. |
4599 | static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, |
4600 | const SDLoc &DL, SelectionDAG &DAG, |
4601 | const RISCVSubtarget &Subtarget) { |
4602 | MVT VecVT = EvenV.getSimpleValueType(); |
4603 | MVT VecContainerVT = VecVT; // <vscale x n x ty> |
4604 | // Convert fixed vectors to scalable if needed |
4605 | if (VecContainerVT.isFixedLengthVector()) { |
4606 | VecContainerVT = getContainerForFixedLengthVector(DAG, VT: VecVT, Subtarget); |
4607 | EvenV = convertToScalableVector(VT: VecContainerVT, V: EvenV, DAG, Subtarget); |
4608 | OddV = convertToScalableVector(VT: VecContainerVT, V: OddV, DAG, Subtarget); |
4609 | } |
4610 | |
4611 | assert(VecVT.getScalarSizeInBits() < Subtarget.getELen()); |
4612 | |
4613 | // We're working with a vector of the same size as the resulting |
4614 | // interleaved vector, but with half the number of elements and |
4615 | // twice the SEW (Hence the restriction on not using the maximum |
4616 | // ELEN) |
4617 | MVT WideVT = |
4618 | MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: VecVT.getScalarSizeInBits() * 2), |
4619 | EC: VecVT.getVectorElementCount()); |
4620 | MVT WideContainerVT = WideVT; // <vscale x n x ty*2> |
4621 | if (WideContainerVT.isFixedLengthVector()) |
4622 | WideContainerVT = getContainerForFixedLengthVector(DAG, VT: WideVT, Subtarget); |
4623 | |
4624 | // Bitcast the input vectors to integers in case they are FP |
4625 | VecContainerVT = VecContainerVT.changeTypeToInteger(); |
4626 | EvenV = DAG.getBitcast(VT: VecContainerVT, V: EvenV); |
4627 | OddV = DAG.getBitcast(VT: VecContainerVT, V: OddV); |
4628 | |
4629 | auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT: VecContainerVT, DL, DAG, Subtarget); |
4630 | SDValue Passthru = DAG.getUNDEF(VT: WideContainerVT); |
4631 | |
4632 | SDValue Interleaved; |
4633 | if (OddV.isUndef()) { |
4634 | // If OddV is undef, this is a zero extend. |
4635 | // FIXME: Not only does this optimize the code, it fixes some correctness |
4636 | // issues because MIR does not have freeze. |
4637 | Interleaved = |
4638 | DAG.getNode(Opcode: RISCVISD::VZEXT_VL, DL, VT: WideContainerVT, N1: EvenV, N2: Mask, N3: VL); |
4639 | } else if (Subtarget.hasStdExtZvbb()) { |
4640 | // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV. |
4641 | SDValue OffsetVec = |
4642 | DAG.getConstant(Val: VecVT.getScalarSizeInBits(), DL, VT: VecContainerVT); |
4643 | Interleaved = DAG.getNode(Opcode: RISCVISD::VWSLL_VL, DL, VT: WideContainerVT, N1: OddV, |
4644 | N2: OffsetVec, N3: Passthru, N4: Mask, N5: VL); |
4645 | if (!EvenV.isUndef()) |
4646 | Interleaved = DAG.getNode(Opcode: RISCVISD::VWADDU_W_VL, DL, VT: WideContainerVT, |
4647 | N1: Interleaved, N2: EvenV, N3: Passthru, N4: Mask, N5: VL); |
4648 | } else if (EvenV.isUndef()) { |
4649 | Interleaved = |
4650 | DAG.getNode(Opcode: RISCVISD::VZEXT_VL, DL, VT: WideContainerVT, N1: OddV, N2: Mask, N3: VL); |
4651 | |
4652 | SDValue OffsetVec = |
4653 | DAG.getConstant(Val: VecVT.getScalarSizeInBits(), DL, VT: WideContainerVT); |
4654 | Interleaved = DAG.getNode(Opcode: RISCVISD::SHL_VL, DL, VT: WideContainerVT, |
4655 | N1: Interleaved, N2: OffsetVec, N3: Passthru, N4: Mask, N5: VL); |
4656 | } else { |
4657 | // FIXME: We should freeze the odd vector here. We already handled the case |
4658 | // of provably undef/poison above. |
4659 | |
4660 | // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with |
4661 | // vwaddu.vv |
4662 | Interleaved = DAG.getNode(Opcode: RISCVISD::VWADDU_VL, DL, VT: WideContainerVT, N1: EvenV, |
4663 | N2: OddV, N3: Passthru, N4: Mask, N5: VL); |
4664 | |
4665 | // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1) |
4666 | SDValue AllOnesVec = DAG.getSplatVector( |
4667 | VT: VecContainerVT, DL, Op: DAG.getAllOnesConstant(DL, VT: Subtarget.getXLenVT())); |
4668 | SDValue OddsMul = DAG.getNode(Opcode: RISCVISD::VWMULU_VL, DL, VT: WideContainerVT, |
4669 | N1: OddV, N2: AllOnesVec, N3: Passthru, N4: Mask, N5: VL); |
4670 | |
4671 | // Add the two together so we get |
4672 | // (OddV * 0xff...ff) + (OddV + EvenV) |
4673 | // = (OddV * 0x100...00) + EvenV |
4674 | // = (OddV << VecVT.getScalarSizeInBits()) + EvenV |
4675 | // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx |
4676 | Interleaved = DAG.getNode(Opcode: RISCVISD::ADD_VL, DL, VT: WideContainerVT, |
4677 | N1: Interleaved, N2: OddsMul, N3: Passthru, N4: Mask, N5: VL); |
4678 | } |
4679 | |
4680 | // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty> |
4681 | MVT ResultContainerVT = MVT::getVectorVT( |
4682 | VT: VecVT.getVectorElementType(), // Make sure to use original type |
4683 | EC: VecContainerVT.getVectorElementCount().multiplyCoefficientBy(RHS: 2)); |
4684 | Interleaved = DAG.getBitcast(VT: ResultContainerVT, V: Interleaved); |
4685 | |
4686 | // Convert back to a fixed vector if needed |
4687 | MVT ResultVT = |
4688 | MVT::getVectorVT(VT: VecVT.getVectorElementType(), |
4689 | EC: VecVT.getVectorElementCount().multiplyCoefficientBy(RHS: 2)); |
4690 | if (ResultVT.isFixedLengthVector()) |
4691 | Interleaved = |
4692 | convertFromScalableVector(VT: ResultVT, V: Interleaved, DAG, Subtarget); |
4693 | |
4694 | return Interleaved; |
4695 | } |
4696 | |
4697 | // If we have a vector of bits that we want to reverse, we can use a vbrev on a |
4698 | // larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse. |
4699 | static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, |
4700 | SelectionDAG &DAG, |
4701 | const RISCVSubtarget &Subtarget) { |
4702 | SDLoc DL(SVN); |
4703 | MVT VT = SVN->getSimpleValueType(ResNo: 0); |
4704 | SDValue V = SVN->getOperand(Num: 0); |
4705 | unsigned NumElts = VT.getVectorNumElements(); |
4706 | |
4707 | assert(VT.getVectorElementType() == MVT::i1); |
4708 | |
4709 | if (!ShuffleVectorInst::isReverseMask(Mask: SVN->getMask(), |
4710 | NumSrcElts: SVN->getMask().size()) || |
4711 | !SVN->getOperand(Num: 1).isUndef()) |
4712 | return SDValue(); |
4713 | |
4714 | unsigned ViaEltSize = std::max(a: (uint64_t)8, b: PowerOf2Ceil(A: NumElts)); |
4715 | EVT ViaVT = EVT::getVectorVT( |
4716 | Context&: *DAG.getContext(), VT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ViaEltSize), NumElements: 1); |
4717 | EVT ViaBitVT = |
4718 | EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits()); |
4719 | |
4720 | // If we don't have zvbb or the larger element type > ELEN, the operation will |
4721 | // be illegal. |
4722 | if (!Subtarget.getTargetLowering()->isOperationLegalOrCustom(Op: ISD::BITREVERSE, |
4723 | VT: ViaVT) || |
4724 | !Subtarget.getTargetLowering()->isTypeLegal(VT: ViaBitVT)) |
4725 | return SDValue(); |
4726 | |
4727 | // If the bit vector doesn't fit exactly into the larger element type, we need |
4728 | // to insert it into the larger vector and then shift up the reversed bits |
4729 | // afterwards to get rid of the gap introduced. |
4730 | if (ViaEltSize > NumElts) |
4731 | V = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ViaBitVT, N1: DAG.getUNDEF(VT: ViaBitVT), |
4732 | N2: V, N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
4733 | |
4734 | SDValue Res = |
4735 | DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT: ViaVT, Operand: DAG.getBitcast(VT: ViaVT, V)); |
4736 | |
4737 | // Shift up the reversed bits if the vector didn't exactly fit into the larger |
4738 | // element type. |
4739 | if (ViaEltSize > NumElts) |
4740 | Res = DAG.getNode(Opcode: ISD::SRL, DL, VT: ViaVT, N1: Res, |
4741 | N2: DAG.getConstant(Val: ViaEltSize - NumElts, DL, VT: ViaVT)); |
4742 | |
4743 | Res = DAG.getBitcast(VT: ViaBitVT, V: Res); |
4744 | |
4745 | if (ViaEltSize > NumElts) |
4746 | Res = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: Res, |
4747 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
4748 | return Res; |
4749 | } |
4750 | |
4751 | static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, |
4752 | SelectionDAG &DAG, |
4753 | const RISCVSubtarget &Subtarget, |
4754 | MVT &RotateVT, unsigned &RotateAmt) { |
4755 | SDLoc DL(SVN); |
4756 | |
4757 | EVT VT = SVN->getValueType(ResNo: 0); |
4758 | unsigned NumElts = VT.getVectorNumElements(); |
4759 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
4760 | unsigned NumSubElts; |
4761 | if (!ShuffleVectorInst::isBitRotateMask(Mask: SVN->getMask(), EltSizeInBits, MinSubElts: 2, |
4762 | MaxSubElts: NumElts, NumSubElts, RotateAmt)) |
4763 | return false; |
4764 | RotateVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: EltSizeInBits * NumSubElts), |
4765 | NumElements: NumElts / NumSubElts); |
4766 | |
4767 | // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x. |
4768 | return Subtarget.getTargetLowering()->isTypeLegal(VT: RotateVT); |
4769 | } |
4770 | |
4771 | // Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can |
4772 | // reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this |
4773 | // as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor. |
4774 | static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, |
4775 | SelectionDAG &DAG, |
4776 | const RISCVSubtarget &Subtarget) { |
4777 | SDLoc DL(SVN); |
4778 | |
4779 | EVT VT = SVN->getValueType(ResNo: 0); |
4780 | unsigned RotateAmt; |
4781 | MVT RotateVT; |
4782 | if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt)) |
4783 | return SDValue(); |
4784 | |
4785 | SDValue Op = DAG.getBitcast(VT: RotateVT, V: SVN->getOperand(Num: 0)); |
4786 | |
4787 | SDValue Rotate; |
4788 | // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap, |
4789 | // so canonicalize to vrev8. |
4790 | if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8) |
4791 | Rotate = DAG.getNode(Opcode: ISD::BSWAP, DL, VT: RotateVT, Operand: Op); |
4792 | else |
4793 | Rotate = DAG.getNode(Opcode: ISD::ROTL, DL, VT: RotateVT, N1: Op, |
4794 | N2: DAG.getConstant(Val: RotateAmt, DL, VT: RotateVT)); |
4795 | |
4796 | return DAG.getBitcast(VT, V: Rotate); |
4797 | } |
4798 | |
4799 | // If compiling with an exactly known VLEN, see if we can split a |
4800 | // shuffle on m2 or larger into a small number of m1 sized shuffles |
4801 | // which write each destination registers exactly once. |
4802 | static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, |
4803 | SelectionDAG &DAG, |
4804 | const RISCVSubtarget &Subtarget) { |
4805 | SDLoc DL(SVN); |
4806 | MVT VT = SVN->getSimpleValueType(ResNo: 0); |
4807 | SDValue V1 = SVN->getOperand(Num: 0); |
4808 | SDValue V2 = SVN->getOperand(Num: 1); |
4809 | ArrayRef<int> Mask = SVN->getMask(); |
4810 | unsigned NumElts = VT.getVectorNumElements(); |
4811 | |
4812 | // If we don't know exact data layout, not much we can do. If this |
4813 | // is already m1 or smaller, no point in splitting further. |
4814 | const auto VLen = Subtarget.getRealVLen(); |
4815 | if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen) |
4816 | return SDValue(); |
4817 | |
4818 | // Avoid picking up bitrotate patterns which we have a linear-in-lmul |
4819 | // expansion for. |
4820 | unsigned RotateAmt; |
4821 | MVT RotateVT; |
4822 | if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt)) |
4823 | return SDValue(); |
4824 | |
4825 | MVT ElemVT = VT.getVectorElementType(); |
4826 | unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits(); |
4827 | unsigned VRegsPerSrc = NumElts / ElemsPerVReg; |
4828 | |
4829 | SmallVector<std::pair<int, SmallVector<int>>> |
4830 | OutMasks(VRegsPerSrc, {-1, {}}); |
4831 | |
4832 | // Check if our mask can be done as a 1-to-1 mapping from source |
4833 | // to destination registers in the group without needing to |
4834 | // write each destination more than once. |
4835 | for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) { |
4836 | int DstVecIdx = DstIdx / ElemsPerVReg; |
4837 | int DstSubIdx = DstIdx % ElemsPerVReg; |
4838 | int SrcIdx = Mask[DstIdx]; |
4839 | if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts) |
4840 | continue; |
4841 | int SrcVecIdx = SrcIdx / ElemsPerVReg; |
4842 | int SrcSubIdx = SrcIdx % ElemsPerVReg; |
4843 | if (OutMasks[DstVecIdx].first == -1) |
4844 | OutMasks[DstVecIdx].first = SrcVecIdx; |
4845 | if (OutMasks[DstVecIdx].first != SrcVecIdx) |
4846 | // Note: This case could easily be handled by keeping track of a chain |
4847 | // of source values and generating two element shuffles below. This is |
4848 | // less an implementation question, and more a profitability one. |
4849 | return SDValue(); |
4850 | |
4851 | OutMasks[DstVecIdx].second.resize(N: ElemsPerVReg, NV: -1); |
4852 | OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx; |
4853 | } |
4854 | |
4855 | EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
4856 | MVT OneRegVT = MVT::getVectorVT(VT: ElemVT, NumElements: ElemsPerVReg); |
4857 | MVT M1VT = getContainerForFixedLengthVector(DAG, VT: OneRegVT, Subtarget); |
4858 | assert(M1VT == getLMUL1VT(M1VT)); |
4859 | unsigned NumOpElts = M1VT.getVectorMinNumElements(); |
4860 | SDValue Vec = DAG.getUNDEF(VT: ContainerVT); |
4861 | // The following semantically builds up a fixed length concat_vector |
4862 | // of the component shuffle_vectors. We eagerly lower to scalable here |
4863 | // to avoid DAG combining it back to a large shuffle_vector again. |
4864 | V1 = convertToScalableVector(VT: ContainerVT, V: V1, DAG, Subtarget); |
4865 | V2 = convertToScalableVector(VT: ContainerVT, V: V2, DAG, Subtarget); |
4866 | for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) { |
4867 | auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx]; |
4868 | if (SrcVecIdx == -1) |
4869 | continue; |
4870 | unsigned = (SrcVecIdx % VRegsPerSrc) * NumOpElts; |
4871 | SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1; |
4872 | SDValue SubVec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: M1VT, N1: SrcVec, |
4873 | N2: DAG.getVectorIdxConstant(Val: ExtractIdx, DL)); |
4874 | SubVec = convertFromScalableVector(VT: OneRegVT, V: SubVec, DAG, Subtarget); |
4875 | SubVec = DAG.getVectorShuffle(VT: OneRegVT, dl: DL, N1: SubVec, N2: SubVec, Mask: SrcSubMask); |
4876 | SubVec = convertToScalableVector(VT: M1VT, V: SubVec, DAG, Subtarget); |
4877 | unsigned InsertIdx = DstVecIdx * NumOpElts; |
4878 | Vec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ContainerVT, N1: Vec, N2: SubVec, |
4879 | N3: DAG.getVectorIdxConstant(Val: InsertIdx, DL)); |
4880 | } |
4881 | return convertFromScalableVector(VT, V: Vec, DAG, Subtarget); |
4882 | } |
4883 | |
4884 | static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, |
4885 | const RISCVSubtarget &Subtarget) { |
4886 | SDValue V1 = Op.getOperand(i: 0); |
4887 | SDValue V2 = Op.getOperand(i: 1); |
4888 | SDLoc DL(Op); |
4889 | MVT XLenVT = Subtarget.getXLenVT(); |
4890 | MVT VT = Op.getSimpleValueType(); |
4891 | unsigned NumElts = VT.getVectorNumElements(); |
4892 | ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val: Op.getNode()); |
4893 | |
4894 | if (VT.getVectorElementType() == MVT::i1) { |
4895 | // Lower to a vror.vi of a larger element type if possible before we promote |
4896 | // i1s to i8s. |
4897 | if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget)) |
4898 | return V; |
4899 | if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget)) |
4900 | return V; |
4901 | |
4902 | // Promote i1 shuffle to i8 shuffle. |
4903 | MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()); |
4904 | V1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WidenVT, Operand: V1); |
4905 | V2 = V2.isUndef() ? DAG.getUNDEF(VT: WidenVT) |
4906 | : DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WidenVT, Operand: V2); |
4907 | SDValue Shuffled = DAG.getVectorShuffle(VT: WidenVT, dl: DL, N1: V1, N2: V2, Mask: SVN->getMask()); |
4908 | return DAG.getSetCC(DL, VT, LHS: Shuffled, RHS: DAG.getConstant(Val: 0, DL, VT: WidenVT), |
4909 | Cond: ISD::SETNE); |
4910 | } |
4911 | |
4912 | MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
4913 | |
4914 | auto [TrueMask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
4915 | |
4916 | if (SVN->isSplat()) { |
4917 | const int Lane = SVN->getSplatIndex(); |
4918 | if (Lane >= 0) { |
4919 | MVT SVT = VT.getVectorElementType(); |
4920 | |
4921 | // Turn splatted vector load into a strided load with an X0 stride. |
4922 | SDValue V = V1; |
4923 | // Peek through CONCAT_VECTORS as VectorCombine can concat a vector |
4924 | // with undef. |
4925 | // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts? |
4926 | int Offset = Lane; |
4927 | if (V.getOpcode() == ISD::CONCAT_VECTORS) { |
4928 | int OpElements = |
4929 | V.getOperand(i: 0).getSimpleValueType().getVectorNumElements(); |
4930 | V = V.getOperand(i: Offset / OpElements); |
4931 | Offset %= OpElements; |
4932 | } |
4933 | |
4934 | // We need to ensure the load isn't atomic or volatile. |
4935 | if (ISD::isNormalLoad(N: V.getNode()) && cast<LoadSDNode>(Val&: V)->isSimple()) { |
4936 | auto *Ld = cast<LoadSDNode>(Val&: V); |
4937 | Offset *= SVT.getStoreSize(); |
4938 | SDValue NewAddr = DAG.getMemBasePlusOffset( |
4939 | Base: Ld->getBasePtr(), Offset: TypeSize::getFixed(ExactSize: Offset), DL); |
4940 | |
4941 | // If this is SEW=64 on RV32, use a strided load with a stride of x0. |
4942 | if (SVT.isInteger() && SVT.bitsGT(VT: XLenVT)) { |
4943 | SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); |
4944 | SDValue IntID = |
4945 | DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT); |
4946 | SDValue Ops[] = {Ld->getChain(), |
4947 | IntID, |
4948 | DAG.getUNDEF(ContainerVT), |
4949 | NewAddr, |
4950 | DAG.getRegister(RISCV::X0, XLenVT), |
4951 | VL}; |
4952 | SDValue NewLoad = DAG.getMemIntrinsicNode( |
4953 | ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT, |
4954 | DAG.getMachineFunction().getMachineMemOperand( |
4955 | MMO: Ld->getMemOperand(), Offset, Size: SVT.getStoreSize())); |
4956 | DAG.makeEquivalentMemoryOrdering(OldLoad: Ld, NewMemOp: NewLoad); |
4957 | return convertFromScalableVector(VT, V: NewLoad, DAG, Subtarget); |
4958 | } |
4959 | |
4960 | // Otherwise use a scalar load and splat. This will give the best |
4961 | // opportunity to fold a splat into the operation. ISel can turn it into |
4962 | // the x0 strided load if we aren't able to fold away the select. |
4963 | if (SVT.isFloatingPoint()) |
4964 | V = DAG.getLoad(VT: SVT, dl: DL, Chain: Ld->getChain(), Ptr: NewAddr, |
4965 | PtrInfo: Ld->getPointerInfo().getWithOffset(O: Offset), |
4966 | Alignment: Ld->getOriginalAlign(), |
4967 | MMOFlags: Ld->getMemOperand()->getFlags()); |
4968 | else |
4969 | V = DAG.getExtLoad(ExtType: ISD::SEXTLOAD, dl: DL, VT: XLenVT, Chain: Ld->getChain(), Ptr: NewAddr, |
4970 | PtrInfo: Ld->getPointerInfo().getWithOffset(O: Offset), MemVT: SVT, |
4971 | Alignment: Ld->getOriginalAlign(), |
4972 | MMOFlags: Ld->getMemOperand()->getFlags()); |
4973 | DAG.makeEquivalentMemoryOrdering(OldLoad: Ld, NewMemOp: V); |
4974 | |
4975 | unsigned Opc = |
4976 | VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL; |
4977 | SDValue Splat = |
4978 | DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: V, N3: VL); |
4979 | return convertFromScalableVector(VT, V: Splat, DAG, Subtarget); |
4980 | } |
4981 | |
4982 | V1 = convertToScalableVector(VT: ContainerVT, V: V1, DAG, Subtarget); |
4983 | assert(Lane < (int)NumElts && "Unexpected lane!" ); |
4984 | SDValue Gather = DAG.getNode(Opcode: RISCVISD::VRGATHER_VX_VL, DL, VT: ContainerVT, |
4985 | N1: V1, N2: DAG.getConstant(Val: Lane, DL, VT: XLenVT), |
4986 | N3: DAG.getUNDEF(VT: ContainerVT), N4: TrueMask, N5: VL); |
4987 | return convertFromScalableVector(VT, V: Gather, DAG, Subtarget); |
4988 | } |
4989 | } |
4990 | |
4991 | // For exact VLEN m2 or greater, try to split to m1 operations if we |
4992 | // can split cleanly. |
4993 | if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget)) |
4994 | return V; |
4995 | |
4996 | ArrayRef<int> Mask = SVN->getMask(); |
4997 | |
4998 | if (SDValue V = |
4999 | lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG)) |
5000 | return V; |
5001 | |
5002 | if (SDValue V = |
5003 | lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG)) |
5004 | return V; |
5005 | |
5006 | // A bitrotate will be one instruction on Zvkb, so try to lower to it first if |
5007 | // available. |
5008 | if (Subtarget.hasStdExtZvkb()) |
5009 | if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget)) |
5010 | return V; |
5011 | |
5012 | // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may |
5013 | // be undef which can be handled with a single SLIDEDOWN/UP. |
5014 | int LoSrc, HiSrc; |
5015 | int Rotation = isElementRotate(LoSrc, HiSrc, Mask); |
5016 | if (Rotation > 0) { |
5017 | SDValue LoV, HiV; |
5018 | if (LoSrc >= 0) { |
5019 | LoV = LoSrc == 0 ? V1 : V2; |
5020 | LoV = convertToScalableVector(VT: ContainerVT, V: LoV, DAG, Subtarget); |
5021 | } |
5022 | if (HiSrc >= 0) { |
5023 | HiV = HiSrc == 0 ? V1 : V2; |
5024 | HiV = convertToScalableVector(VT: ContainerVT, V: HiV, DAG, Subtarget); |
5025 | } |
5026 | |
5027 | // We found a rotation. We need to slide HiV down by Rotation. Then we need |
5028 | // to slide LoV up by (NumElts - Rotation). |
5029 | unsigned InvRotate = NumElts - Rotation; |
5030 | |
5031 | SDValue Res = DAG.getUNDEF(VT: ContainerVT); |
5032 | if (HiV) { |
5033 | // Even though we could use a smaller VL, don't to avoid a vsetivli |
5034 | // toggle. |
5035 | Res = getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, Merge: Res, Op: HiV, |
5036 | Offset: DAG.getConstant(Val: Rotation, DL, VT: XLenVT), Mask: TrueMask, VL); |
5037 | } |
5038 | if (LoV) |
5039 | Res = getVSlideup(DAG, Subtarget, DL, VT: ContainerVT, Merge: Res, Op: LoV, |
5040 | Offset: DAG.getConstant(Val: InvRotate, DL, VT: XLenVT), Mask: TrueMask, VL, |
5041 | Policy: RISCVII::TAIL_AGNOSTIC); |
5042 | |
5043 | return convertFromScalableVector(VT, V: Res, DAG, Subtarget); |
5044 | } |
5045 | |
5046 | // If this is a deinterleave and we can widen the vector, then we can use |
5047 | // vnsrl to deinterleave. |
5048 | if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) { |
5049 | return getDeinterleaveViaVNSRL(DL, VT, Src: V1.getOperand(i: 0), EvenElts: Mask[0] == 0, |
5050 | Subtarget, DAG); |
5051 | } |
5052 | |
5053 | if (SDValue V = |
5054 | lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG)) |
5055 | return V; |
5056 | |
5057 | // Detect an interleave shuffle and lower to |
5058 | // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1)) |
5059 | int EvenSrc, OddSrc; |
5060 | if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) { |
5061 | // Extract the halves of the vectors. |
5062 | MVT HalfVT = VT.getHalfNumVectorElementsVT(); |
5063 | |
5064 | int Size = Mask.size(); |
5065 | SDValue EvenV, OddV; |
5066 | assert(EvenSrc >= 0 && "Undef source?" ); |
5067 | EvenV = (EvenSrc / Size) == 0 ? V1 : V2; |
5068 | EvenV = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: HalfVT, N1: EvenV, |
5069 | N2: DAG.getVectorIdxConstant(Val: EvenSrc % Size, DL)); |
5070 | |
5071 | assert(OddSrc >= 0 && "Undef source?" ); |
5072 | OddV = (OddSrc / Size) == 0 ? V1 : V2; |
5073 | OddV = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: HalfVT, N1: OddV, |
5074 | N2: DAG.getVectorIdxConstant(Val: OddSrc % Size, DL)); |
5075 | |
5076 | return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget); |
5077 | } |
5078 | |
5079 | |
5080 | // Handle any remaining single source shuffles |
5081 | assert(!V1.isUndef() && "Unexpected shuffle canonicalization" ); |
5082 | if (V2.isUndef()) { |
5083 | // We might be able to express the shuffle as a bitrotate. But even if we |
5084 | // don't have Zvkb and have to expand, the expanded sequence of approx. 2 |
5085 | // shifts and a vor will have a higher throughput than a vrgather. |
5086 | if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget)) |
5087 | return V; |
5088 | |
5089 | if (VT.getScalarSizeInBits() == 8 && |
5090 | any_of(Range&: Mask, P: [&](const auto &Idx) { return Idx > 255; })) { |
5091 | // On such a vector we're unable to use i8 as the index type. |
5092 | // FIXME: We could promote the index to i16 and use vrgatherei16, but that |
5093 | // may involve vector splitting if we're already at LMUL=8, or our |
5094 | // user-supplied maximum fixed-length LMUL. |
5095 | return SDValue(); |
5096 | } |
5097 | |
5098 | // Base case for the two operand recursion below - handle the worst case |
5099 | // single source shuffle. |
5100 | unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL; |
5101 | MVT IndexVT = VT.changeTypeToInteger(); |
5102 | // Since we can't introduce illegal index types at this stage, use i16 and |
5103 | // vrgatherei16 if the corresponding index type for plain vrgather is greater |
5104 | // than XLenVT. |
5105 | if (IndexVT.getScalarType().bitsGT(VT: XLenVT)) { |
5106 | GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL; |
5107 | IndexVT = IndexVT.changeVectorElementType(MVT::i16); |
5108 | } |
5109 | |
5110 | // If the mask allows, we can do all the index computation in 16 bits. This |
5111 | // requires less work and less register pressure at high LMUL, and creates |
5112 | // smaller constants which may be cheaper to materialize. |
5113 | if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) && |
5114 | (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) { |
5115 | GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL; |
5116 | IndexVT = IndexVT.changeVectorElementType(MVT::i16); |
5117 | } |
5118 | |
5119 | MVT IndexContainerVT = |
5120 | ContainerVT.changeVectorElementType(EltVT: IndexVT.getScalarType()); |
5121 | |
5122 | V1 = convertToScalableVector(VT: ContainerVT, V: V1, DAG, Subtarget); |
5123 | SmallVector<SDValue> GatherIndicesLHS; |
5124 | for (int MaskIndex : Mask) { |
5125 | bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0; |
5126 | GatherIndicesLHS.push_back(Elt: IsLHSIndex |
5127 | ? DAG.getConstant(Val: MaskIndex, DL, VT: XLenVT) |
5128 | : DAG.getUNDEF(VT: XLenVT)); |
5129 | } |
5130 | SDValue LHSIndices = DAG.getBuildVector(VT: IndexVT, DL, Ops: GatherIndicesLHS); |
5131 | LHSIndices = convertToScalableVector(VT: IndexContainerVT, V: LHSIndices, DAG, |
5132 | Subtarget); |
5133 | SDValue Gather = DAG.getNode(Opcode: GatherVVOpc, DL, VT: ContainerVT, N1: V1, N2: LHSIndices, |
5134 | N3: DAG.getUNDEF(VT: ContainerVT), N4: TrueMask, N5: VL); |
5135 | return convertFromScalableVector(VT, V: Gather, DAG, Subtarget); |
5136 | } |
5137 | |
5138 | // By default we preserve the original operand order, and use a mask to |
5139 | // select LHS as true and RHS as false. However, since RVV vector selects may |
5140 | // feature splats but only on the LHS, we may choose to invert our mask and |
5141 | // instead select between RHS and LHS. |
5142 | bool SwapOps = DAG.isSplatValue(V: V2) && !DAG.isSplatValue(V: V1); |
5143 | |
5144 | // Detect shuffles which can be re-expressed as vector selects; these are |
5145 | // shuffles in which each element in the destination is taken from an element |
5146 | // at the corresponding index in either source vectors. |
5147 | bool IsSelect = all_of(Range: enumerate(First&: Mask), P: [&](const auto &MaskIdx) { |
5148 | int MaskIndex = MaskIdx.value(); |
5149 | return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts; |
5150 | }); |
5151 | if (IsSelect) { |
5152 | // Now construct the mask that will be used by the vselect operation. |
5153 | SmallVector<SDValue> MaskVals; |
5154 | for (int MaskIndex : Mask) { |
5155 | bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps; |
5156 | MaskVals.push_back(Elt: DAG.getConstant(Val: SelectMaskVal, DL, VT: XLenVT)); |
5157 | } |
5158 | |
5159 | if (SwapOps) |
5160 | std::swap(a&: V1, b&: V2); |
5161 | |
5162 | assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle" ); |
5163 | MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); |
5164 | SDValue SelectMask = DAG.getBuildVector(VT: MaskVT, DL, Ops: MaskVals); |
5165 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: SelectMask, N2: V1, N3: V2); |
5166 | } |
5167 | |
5168 | // As a backup, shuffles can be lowered via a vrgather instruction, possibly |
5169 | // merged with a second vrgather. |
5170 | SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS; |
5171 | SmallVector<SDValue> MaskVals; |
5172 | |
5173 | // Now construct the mask that will be used by the blended vrgather operation. |
5174 | // Cconstruct the appropriate indices into each vector. |
5175 | for (int MaskIndex : Mask) { |
5176 | bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps; |
5177 | MaskVals.push_back(Elt: DAG.getConstant(Val: SelectMaskVal, DL, VT: XLenVT)); |
5178 | bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts; |
5179 | ShuffleMaskLHS.push_back(Elt: IsLHSOrUndefIndex && MaskIndex >= 0 |
5180 | ? MaskIndex : -1); |
5181 | ShuffleMaskRHS.push_back(Elt: IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts)); |
5182 | } |
5183 | |
5184 | if (SwapOps) { |
5185 | std::swap(a&: V1, b&: V2); |
5186 | std::swap(LHS&: ShuffleMaskLHS, RHS&: ShuffleMaskRHS); |
5187 | } |
5188 | |
5189 | assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle" ); |
5190 | MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); |
5191 | SDValue SelectMask = DAG.getBuildVector(VT: MaskVT, DL, Ops: MaskVals); |
5192 | |
5193 | // Recursively invoke lowering for each operand if we had two |
5194 | // independent single source shuffles, and then combine the result via a |
5195 | // vselect. Note that the vselect will likely be folded back into the |
5196 | // second permute (vrgather, or other) by the post-isel combine. |
5197 | V1 = DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: DAG.getUNDEF(VT), Mask: ShuffleMaskLHS); |
5198 | V2 = DAG.getVectorShuffle(VT, dl: DL, N1: V2, N2: DAG.getUNDEF(VT), Mask: ShuffleMaskRHS); |
5199 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: SelectMask, N2: V2, N3: V1); |
5200 | } |
5201 | |
5202 | bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const { |
5203 | // Support splats for any type. These should type legalize well. |
5204 | if (ShuffleVectorSDNode::isSplatMask(Mask: M.data(), VT)) |
5205 | return true; |
5206 | |
5207 | // Only support legal VTs for other shuffles for now. |
5208 | if (!isTypeLegal(VT)) |
5209 | return false; |
5210 | |
5211 | MVT SVT = VT.getSimpleVT(); |
5212 | |
5213 | // Not for i1 vectors. |
5214 | if (SVT.getScalarType() == MVT::i1) |
5215 | return false; |
5216 | |
5217 | int Dummy1, Dummy2; |
5218 | return (isElementRotate(LoSrc&: Dummy1, HiSrc&: Dummy2, Mask: M) > 0) || |
5219 | isInterleaveShuffle(Mask: M, VT: SVT, EvenSrc&: Dummy1, OddSrc&: Dummy2, Subtarget); |
5220 | } |
5221 | |
5222 | // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting |
5223 | // the exponent. |
5224 | SDValue |
5225 | RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, |
5226 | SelectionDAG &DAG) const { |
5227 | MVT VT = Op.getSimpleValueType(); |
5228 | unsigned EltSize = VT.getScalarSizeInBits(); |
5229 | SDValue Src = Op.getOperand(i: 0); |
5230 | SDLoc DL(Op); |
5231 | MVT ContainerVT = VT; |
5232 | |
5233 | SDValue Mask, VL; |
5234 | if (Op->isVPOpcode()) { |
5235 | Mask = Op.getOperand(i: 1); |
5236 | if (VT.isFixedLengthVector()) |
5237 | Mask = convertToScalableVector(VT: getMaskTypeFor(VecVT: ContainerVT), V: Mask, DAG, |
5238 | Subtarget); |
5239 | VL = Op.getOperand(i: 2); |
5240 | } |
5241 | |
5242 | // We choose FP type that can represent the value if possible. Otherwise, we |
5243 | // use rounding to zero conversion for correct exponent of the result. |
5244 | // TODO: Use f16 for i8 when possible? |
5245 | MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32; |
5246 | if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()))) |
5247 | FloatEltVT = MVT::f32; |
5248 | MVT FloatVT = MVT::getVectorVT(VT: FloatEltVT, EC: VT.getVectorElementCount()); |
5249 | |
5250 | // Legal types should have been checked in the RISCVTargetLowering |
5251 | // constructor. |
5252 | // TODO: Splitting may make sense in some cases. |
5253 | assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) && |
5254 | "Expected legal float type!" ); |
5255 | |
5256 | // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X. |
5257 | // The trailing zero count is equal to log2 of this single bit value. |
5258 | if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) { |
5259 | SDValue Neg = DAG.getNegative(Val: Src, DL, VT); |
5260 | Src = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Src, N2: Neg); |
5261 | } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) { |
5262 | SDValue Neg = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT, N1: DAG.getConstant(Val: 0, DL, VT), |
5263 | N2: Src, N3: Mask, N4: VL); |
5264 | Src = DAG.getNode(Opcode: ISD::VP_AND, DL, VT, N1: Src, N2: Neg, N3: Mask, N4: VL); |
5265 | } |
5266 | |
5267 | // We have a legal FP type, convert to it. |
5268 | SDValue FloatVal; |
5269 | if (FloatVT.bitsGT(VT)) { |
5270 | if (Op->isVPOpcode()) |
5271 | FloatVal = DAG.getNode(Opcode: ISD::VP_UINT_TO_FP, DL, VT: FloatVT, N1: Src, N2: Mask, N3: VL); |
5272 | else |
5273 | FloatVal = DAG.getNode(Opcode: ISD::UINT_TO_FP, DL, VT: FloatVT, Operand: Src); |
5274 | } else { |
5275 | // Use RTZ to avoid rounding influencing exponent of FloatVal. |
5276 | if (VT.isFixedLengthVector()) { |
5277 | ContainerVT = getContainerForFixedLengthVector(VT); |
5278 | Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget); |
5279 | } |
5280 | if (!Op->isVPOpcode()) |
5281 | std::tie(args&: Mask, args&: VL) = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
5282 | SDValue RTZRM = |
5283 | DAG.getTargetConstant(Val: RISCVFPRndMode::RTZ, DL, VT: Subtarget.getXLenVT()); |
5284 | MVT ContainerFloatVT = |
5285 | MVT::getVectorVT(VT: FloatEltVT, EC: ContainerVT.getVectorElementCount()); |
5286 | FloatVal = DAG.getNode(Opcode: RISCVISD::VFCVT_RM_F_XU_VL, DL, VT: ContainerFloatVT, |
5287 | N1: Src, N2: Mask, N3: RTZRM, N4: VL); |
5288 | if (VT.isFixedLengthVector()) |
5289 | FloatVal = convertFromScalableVector(VT: FloatVT, V: FloatVal, DAG, Subtarget); |
5290 | } |
5291 | // Bitcast to integer and shift the exponent to the LSB. |
5292 | EVT IntVT = FloatVT.changeVectorElementTypeToInteger(); |
5293 | SDValue Bitcast = DAG.getBitcast(VT: IntVT, V: FloatVal); |
5294 | unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23; |
5295 | |
5296 | SDValue Exp; |
5297 | // Restore back to original type. Truncation after SRL is to generate vnsrl. |
5298 | if (Op->isVPOpcode()) { |
5299 | Exp = DAG.getNode(Opcode: ISD::VP_LSHR, DL, VT: IntVT, N1: Bitcast, |
5300 | N2: DAG.getConstant(Val: ShiftAmt, DL, VT: IntVT), N3: Mask, N4: VL); |
5301 | Exp = DAG.getVPZExtOrTrunc(DL, VT, Op: Exp, Mask, EVL: VL); |
5302 | } else { |
5303 | Exp = DAG.getNode(Opcode: ISD::SRL, DL, VT: IntVT, N1: Bitcast, |
5304 | N2: DAG.getConstant(Val: ShiftAmt, DL, VT: IntVT)); |
5305 | if (IntVT.bitsLT(VT)) |
5306 | Exp = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: Exp); |
5307 | else if (IntVT.bitsGT(VT)) |
5308 | Exp = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Exp); |
5309 | } |
5310 | |
5311 | // The exponent contains log2 of the value in biased form. |
5312 | unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127; |
5313 | // For trailing zeros, we just need to subtract the bias. |
5314 | if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) |
5315 | return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Exp, |
5316 | N2: DAG.getConstant(Val: ExponentBias, DL, VT)); |
5317 | if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) |
5318 | return DAG.getNode(Opcode: ISD::VP_SUB, DL, VT, N1: Exp, |
5319 | N2: DAG.getConstant(Val: ExponentBias, DL, VT), N3: Mask, N4: VL); |
5320 | |
5321 | // For leading zeros, we need to remove the bias and convert from log2 to |
5322 | // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)). |
5323 | unsigned Adjust = ExponentBias + (EltSize - 1); |
5324 | SDValue Res; |
5325 | if (Op->isVPOpcode()) |
5326 | Res = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT, N1: DAG.getConstant(Val: Adjust, DL, VT), N2: Exp, |
5327 | N3: Mask, N4: VL); |
5328 | else |
5329 | Res = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: Adjust, DL, VT), N2: Exp); |
5330 | |
5331 | // The above result with zero input equals to Adjust which is greater than |
5332 | // EltSize. Hence, we can do min(Res, EltSize) for CTLZ. |
5333 | if (Op.getOpcode() == ISD::CTLZ) |
5334 | Res = DAG.getNode(Opcode: ISD::UMIN, DL, VT, N1: Res, N2: DAG.getConstant(Val: EltSize, DL, VT)); |
5335 | else if (Op.getOpcode() == ISD::VP_CTLZ) |
5336 | Res = DAG.getNode(Opcode: ISD::VP_UMIN, DL, VT, N1: Res, |
5337 | N2: DAG.getConstant(Val: EltSize, DL, VT), N3: Mask, N4: VL); |
5338 | return Res; |
5339 | } |
5340 | |
5341 | // While RVV has alignment restrictions, we should always be able to load as a |
5342 | // legal equivalently-sized byte-typed vector instead. This method is |
5343 | // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If |
5344 | // the load is already correctly-aligned, it returns SDValue(). |
5345 | SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op, |
5346 | SelectionDAG &DAG) const { |
5347 | auto *Load = cast<LoadSDNode>(Val&: Op); |
5348 | assert(Load && Load->getMemoryVT().isVector() && "Expected vector load" ); |
5349 | |
5350 | if (allowsMemoryAccessForAlignment(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), |
5351 | VT: Load->getMemoryVT(), |
5352 | MMO: *Load->getMemOperand())) |
5353 | return SDValue(); |
5354 | |
5355 | SDLoc DL(Op); |
5356 | MVT VT = Op.getSimpleValueType(); |
5357 | unsigned EltSizeBits = VT.getScalarSizeInBits(); |
5358 | assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && |
5359 | "Unexpected unaligned RVV load type" ); |
5360 | MVT NewVT = |
5361 | MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); |
5362 | assert(NewVT.isValid() && |
5363 | "Expecting equally-sized RVV vector types to be legal" ); |
5364 | SDValue L = DAG.getLoad(VT: NewVT, dl: DL, Chain: Load->getChain(), Ptr: Load->getBasePtr(), |
5365 | PtrInfo: Load->getPointerInfo(), Alignment: Load->getOriginalAlign(), |
5366 | MMOFlags: Load->getMemOperand()->getFlags()); |
5367 | return DAG.getMergeValues(Ops: {DAG.getBitcast(VT, V: L), L.getValue(R: 1)}, dl: DL); |
5368 | } |
5369 | |
5370 | // While RVV has alignment restrictions, we should always be able to store as a |
5371 | // legal equivalently-sized byte-typed vector instead. This method is |
5372 | // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It |
5373 | // returns SDValue() if the store is already correctly aligned. |
5374 | SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op, |
5375 | SelectionDAG &DAG) const { |
5376 | auto *Store = cast<StoreSDNode>(Val&: Op); |
5377 | assert(Store && Store->getValue().getValueType().isVector() && |
5378 | "Expected vector store" ); |
5379 | |
5380 | if (allowsMemoryAccessForAlignment(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), |
5381 | VT: Store->getMemoryVT(), |
5382 | MMO: *Store->getMemOperand())) |
5383 | return SDValue(); |
5384 | |
5385 | SDLoc DL(Op); |
5386 | SDValue StoredVal = Store->getValue(); |
5387 | MVT VT = StoredVal.getSimpleValueType(); |
5388 | unsigned EltSizeBits = VT.getScalarSizeInBits(); |
5389 | assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && |
5390 | "Unexpected unaligned RVV store type" ); |
5391 | MVT NewVT = |
5392 | MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); |
5393 | assert(NewVT.isValid() && |
5394 | "Expecting equally-sized RVV vector types to be legal" ); |
5395 | StoredVal = DAG.getBitcast(VT: NewVT, V: StoredVal); |
5396 | return DAG.getStore(Chain: Store->getChain(), dl: DL, Val: StoredVal, Ptr: Store->getBasePtr(), |
5397 | PtrInfo: Store->getPointerInfo(), Alignment: Store->getOriginalAlign(), |
5398 | MMOFlags: Store->getMemOperand()->getFlags()); |
5399 | } |
5400 | |
5401 | static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, |
5402 | const RISCVSubtarget &Subtarget) { |
5403 | assert(Op.getValueType() == MVT::i64 && "Unexpected VT" ); |
5404 | |
5405 | int64_t Imm = cast<ConstantSDNode>(Val&: Op)->getSExtValue(); |
5406 | |
5407 | // All simm32 constants should be handled by isel. |
5408 | // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making |
5409 | // this check redundant, but small immediates are common so this check |
5410 | // should have better compile time. |
5411 | if (isInt<32>(x: Imm)) |
5412 | return Op; |
5413 | |
5414 | // We only need to cost the immediate, if constant pool lowering is enabled. |
5415 | if (!Subtarget.useConstantPoolForLargeInts()) |
5416 | return Op; |
5417 | |
5418 | RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget); |
5419 | if (Seq.size() <= Subtarget.getMaxBuildIntsCost()) |
5420 | return Op; |
5421 | |
5422 | // Optimizations below are disabled for opt size. If we're optimizing for |
5423 | // size, use a constant pool. |
5424 | if (DAG.shouldOptForSize()) |
5425 | return SDValue(); |
5426 | |
5427 | // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do |
5428 | // that if it will avoid a constant pool. |
5429 | // It will require an extra temporary register though. |
5430 | // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where |
5431 | // low and high 32 bits are the same and bit 31 and 63 are set. |
5432 | unsigned ShiftAmt, AddOpc; |
5433 | RISCVMatInt::InstSeq SeqLo = |
5434 | RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc); |
5435 | if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost()) |
5436 | return Op; |
5437 | |
5438 | return SDValue(); |
5439 | } |
5440 | |
5441 | static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, |
5442 | const RISCVSubtarget &Subtarget) { |
5443 | SDLoc dl(Op); |
5444 | AtomicOrdering FenceOrdering = |
5445 | static_cast<AtomicOrdering>(Op.getConstantOperandVal(i: 1)); |
5446 | SyncScope::ID FenceSSID = |
5447 | static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: 2)); |
5448 | |
5449 | if (Subtarget.hasStdExtZtso()) { |
5450 | // The only fence that needs an instruction is a sequentially-consistent |
5451 | // cross-thread fence. |
5452 | if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && |
5453 | FenceSSID == SyncScope::System) |
5454 | return Op; |
5455 | |
5456 | // MEMBARRIER is a compiler barrier; it codegens to a no-op. |
5457 | return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); |
5458 | } |
5459 | |
5460 | // singlethread fences only synchronize with signal handlers on the same |
5461 | // thread and thus only need to preserve instruction order, not actually |
5462 | // enforce memory ordering. |
5463 | if (FenceSSID == SyncScope::SingleThread) |
5464 | // MEMBARRIER is a compiler barrier; it codegens to a no-op. |
5465 | return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); |
5466 | |
5467 | return Op; |
5468 | } |
5469 | |
5470 | static SDValue lowerSADDSAT_SSUBSAT(SDValue Op, SelectionDAG &DAG) { |
5471 | assert(Op.getValueType() == MVT::i32 && RV64LegalI32 && |
5472 | "Unexpected custom legalisation" ); |
5473 | |
5474 | // With Zbb, we can widen to i64 and smin/smax with INT32_MAX/MIN. |
5475 | bool IsAdd = Op.getOpcode() == ISD::SADDSAT; |
5476 | SDLoc DL(Op); |
5477 | SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0)); |
5478 | SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1)); |
5479 | SDValue Result = |
5480 | DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS); |
5481 | |
5482 | APInt MinVal = APInt::getSignedMinValue(numBits: 32).sext(width: 64); |
5483 | APInt MaxVal = APInt::getSignedMaxValue(numBits: 32).sext(width: 64); |
5484 | SDValue SatMin = DAG.getConstant(MinVal, DL, MVT::i64); |
5485 | SDValue SatMax = DAG.getConstant(MaxVal, DL, MVT::i64); |
5486 | Result = DAG.getNode(ISD::SMIN, DL, MVT::i64, Result, SatMax); |
5487 | Result = DAG.getNode(ISD::SMAX, DL, MVT::i64, Result, SatMin); |
5488 | return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result); |
5489 | } |
5490 | |
5491 | static SDValue lowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG) { |
5492 | assert(Op.getValueType() == MVT::i32 && RV64LegalI32 && |
5493 | "Unexpected custom legalisation" ); |
5494 | |
5495 | // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using |
5496 | // sign extend allows overflow of the lower 32 bits to be detected on |
5497 | // the promoted size. |
5498 | SDLoc DL(Op); |
5499 | SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0)); |
5500 | SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1)); |
5501 | SDValue WideOp = DAG.getNode(Op.getOpcode(), DL, MVT::i64, LHS, RHS); |
5502 | return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp); |
5503 | } |
5504 | |
5505 | // Custom lower i32 SADDO/SSUBO with RV64LegalI32 so we take advantage of addw. |
5506 | static SDValue lowerSADDO_SSUBO(SDValue Op, SelectionDAG &DAG) { |
5507 | assert(Op.getValueType() == MVT::i32 && RV64LegalI32 && |
5508 | "Unexpected custom legalisation" ); |
5509 | if (isa<ConstantSDNode>(Val: Op.getOperand(i: 1))) |
5510 | return SDValue(); |
5511 | |
5512 | bool IsAdd = Op.getOpcode() == ISD::SADDO; |
5513 | SDLoc DL(Op); |
5514 | SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0)); |
5515 | SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1)); |
5516 | SDValue WideOp = |
5517 | DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS); |
5518 | SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp); |
5519 | SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, WideOp, |
5520 | DAG.getValueType(MVT::i32)); |
5521 | SDValue Ovf = DAG.getSetCC(DL, VT: Op.getValue(R: 1).getValueType(), LHS: WideOp, RHS: SExt, |
5522 | Cond: ISD::SETNE); |
5523 | return DAG.getMergeValues(Ops: {Res, Ovf}, dl: DL); |
5524 | } |
5525 | |
5526 | // Custom lower i32 SMULO with RV64LegalI32 so we take advantage of mulw. |
5527 | static SDValue lowerSMULO(SDValue Op, SelectionDAG &DAG) { |
5528 | assert(Op.getValueType() == MVT::i32 && RV64LegalI32 && |
5529 | "Unexpected custom legalisation" ); |
5530 | SDLoc DL(Op); |
5531 | SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0)); |
5532 | SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1)); |
5533 | SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS); |
5534 | SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul); |
5535 | SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Mul, |
5536 | DAG.getValueType(MVT::i32)); |
5537 | SDValue Ovf = DAG.getSetCC(DL, VT: Op.getValue(R: 1).getValueType(), LHS: Mul, RHS: SExt, |
5538 | Cond: ISD::SETNE); |
5539 | return DAG.getMergeValues(Ops: {Res, Ovf}, dl: DL); |
5540 | } |
5541 | |
5542 | SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op, |
5543 | SelectionDAG &DAG) const { |
5544 | SDLoc DL(Op); |
5545 | MVT VT = Op.getSimpleValueType(); |
5546 | MVT XLenVT = Subtarget.getXLenVT(); |
5547 | unsigned Check = Op.getConstantOperandVal(i: 1); |
5548 | unsigned TDCMask = 0; |
5549 | if (Check & fcSNan) |
5550 | TDCMask |= RISCV::FPMASK_Signaling_NaN; |
5551 | if (Check & fcQNan) |
5552 | TDCMask |= RISCV::FPMASK_Quiet_NaN; |
5553 | if (Check & fcPosInf) |
5554 | TDCMask |= RISCV::FPMASK_Positive_Infinity; |
5555 | if (Check & fcNegInf) |
5556 | TDCMask |= RISCV::FPMASK_Negative_Infinity; |
5557 | if (Check & fcPosNormal) |
5558 | TDCMask |= RISCV::FPMASK_Positive_Normal; |
5559 | if (Check & fcNegNormal) |
5560 | TDCMask |= RISCV::FPMASK_Negative_Normal; |
5561 | if (Check & fcPosSubnormal) |
5562 | TDCMask |= RISCV::FPMASK_Positive_Subnormal; |
5563 | if (Check & fcNegSubnormal) |
5564 | TDCMask |= RISCV::FPMASK_Negative_Subnormal; |
5565 | if (Check & fcPosZero) |
5566 | TDCMask |= RISCV::FPMASK_Positive_Zero; |
5567 | if (Check & fcNegZero) |
5568 | TDCMask |= RISCV::FPMASK_Negative_Zero; |
5569 | |
5570 | bool IsOneBitMask = isPowerOf2_32(Value: TDCMask); |
5571 | |
5572 | SDValue TDCMaskV = DAG.getConstant(Val: TDCMask, DL, VT: XLenVT); |
5573 | |
5574 | if (VT.isVector()) { |
5575 | SDValue Op0 = Op.getOperand(i: 0); |
5576 | MVT VT0 = Op.getOperand(i: 0).getSimpleValueType(); |
5577 | |
5578 | if (VT.isScalableVector()) { |
5579 | MVT DstVT = VT0.changeVectorElementTypeToInteger(); |
5580 | auto [Mask, VL] = getDefaultScalableVLOps(VecVT: VT0, DL, DAG, Subtarget); |
5581 | if (Op.getOpcode() == ISD::VP_IS_FPCLASS) { |
5582 | Mask = Op.getOperand(i: 2); |
5583 | VL = Op.getOperand(i: 3); |
5584 | } |
5585 | SDValue FPCLASS = DAG.getNode(Opcode: RISCVISD::FCLASS_VL, DL, VT: DstVT, N1: Op0, N2: Mask, |
5586 | N3: VL, Flags: Op->getFlags()); |
5587 | if (IsOneBitMask) |
5588 | return DAG.getSetCC(DL, VT, LHS: FPCLASS, |
5589 | RHS: DAG.getConstant(Val: TDCMask, DL, VT: DstVT), |
5590 | Cond: ISD::CondCode::SETEQ); |
5591 | SDValue AND = DAG.getNode(Opcode: ISD::AND, DL, VT: DstVT, N1: FPCLASS, |
5592 | N2: DAG.getConstant(Val: TDCMask, DL, VT: DstVT)); |
5593 | return DAG.getSetCC(DL, VT, LHS: AND, RHS: DAG.getConstant(Val: 0, DL, VT: DstVT), |
5594 | Cond: ISD::SETNE); |
5595 | } |
5596 | |
5597 | MVT ContainerVT0 = getContainerForFixedLengthVector(VT: VT0); |
5598 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
5599 | MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger(); |
5600 | auto [Mask, VL] = getDefaultVLOps(VecVT: VT0, ContainerVT: ContainerVT0, DL, DAG, Subtarget); |
5601 | if (Op.getOpcode() == ISD::VP_IS_FPCLASS) { |
5602 | Mask = Op.getOperand(i: 2); |
5603 | MVT MaskContainerVT = |
5604 | getContainerForFixedLengthVector(VT: Mask.getSimpleValueType()); |
5605 | Mask = convertToScalableVector(VT: MaskContainerVT, V: Mask, DAG, Subtarget); |
5606 | VL = Op.getOperand(i: 3); |
5607 | } |
5608 | Op0 = convertToScalableVector(VT: ContainerVT0, V: Op0, DAG, Subtarget); |
5609 | |
5610 | SDValue FPCLASS = DAG.getNode(Opcode: RISCVISD::FCLASS_VL, DL, VT: ContainerDstVT, N1: Op0, |
5611 | N2: Mask, N3: VL, Flags: Op->getFlags()); |
5612 | |
5613 | TDCMaskV = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerDstVT, |
5614 | N1: DAG.getUNDEF(VT: ContainerDstVT), N2: TDCMaskV, N3: VL); |
5615 | if (IsOneBitMask) { |
5616 | SDValue VMSEQ = |
5617 | DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: ContainerVT, |
5618 | Ops: {FPCLASS, TDCMaskV, DAG.getCondCode(Cond: ISD::SETEQ), |
5619 | DAG.getUNDEF(VT: ContainerVT), Mask, VL}); |
5620 | return convertFromScalableVector(VT, V: VMSEQ, DAG, Subtarget); |
5621 | } |
5622 | SDValue AND = DAG.getNode(Opcode: RISCVISD::AND_VL, DL, VT: ContainerDstVT, N1: FPCLASS, |
5623 | N2: TDCMaskV, N3: DAG.getUNDEF(VT: ContainerDstVT), N4: Mask, N5: VL); |
5624 | |
5625 | SDValue SplatZero = DAG.getConstant(Val: 0, DL, VT: XLenVT); |
5626 | SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerDstVT, |
5627 | N1: DAG.getUNDEF(VT: ContainerDstVT), N2: SplatZero, N3: VL); |
5628 | |
5629 | SDValue VMSNE = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: ContainerVT, |
5630 | Ops: {AND, SplatZero, DAG.getCondCode(Cond: ISD::SETNE), |
5631 | DAG.getUNDEF(VT: ContainerVT), Mask, VL}); |
5632 | return convertFromScalableVector(VT, V: VMSNE, DAG, Subtarget); |
5633 | } |
5634 | |
5635 | SDValue FCLASS = DAG.getNode(Opcode: RISCVISD::FCLASS, DL, VT: XLenVT, Operand: Op.getOperand(i: 0)); |
5636 | SDValue AND = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: FCLASS, N2: TDCMaskV); |
5637 | SDValue Res = DAG.getSetCC(DL, VT: XLenVT, LHS: AND, RHS: DAG.getConstant(Val: 0, DL, VT: XLenVT), |
5638 | Cond: ISD::CondCode::SETNE); |
5639 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Res); |
5640 | } |
5641 | |
5642 | // Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these |
5643 | // operations propagate nans. |
5644 | static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, |
5645 | const RISCVSubtarget &Subtarget) { |
5646 | SDLoc DL(Op); |
5647 | MVT VT = Op.getSimpleValueType(); |
5648 | |
5649 | SDValue X = Op.getOperand(i: 0); |
5650 | SDValue Y = Op.getOperand(i: 1); |
5651 | |
5652 | if (!VT.isVector()) { |
5653 | MVT XLenVT = Subtarget.getXLenVT(); |
5654 | |
5655 | // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This |
5656 | // ensures that when one input is a nan, the other will also be a nan |
5657 | // allowing the nan to propagate. If both inputs are nan, this will swap the |
5658 | // inputs which is harmless. |
5659 | |
5660 | SDValue NewY = Y; |
5661 | if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Op: X)) { |
5662 | SDValue XIsNonNan = DAG.getSetCC(DL, VT: XLenVT, LHS: X, RHS: X, Cond: ISD::SETOEQ); |
5663 | NewY = DAG.getSelect(DL, VT, Cond: XIsNonNan, LHS: Y, RHS: X); |
5664 | } |
5665 | |
5666 | SDValue NewX = X; |
5667 | if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Op: Y)) { |
5668 | SDValue YIsNonNan = DAG.getSetCC(DL, VT: XLenVT, LHS: Y, RHS: Y, Cond: ISD::SETOEQ); |
5669 | NewX = DAG.getSelect(DL, VT, Cond: YIsNonNan, LHS: X, RHS: Y); |
5670 | } |
5671 | |
5672 | unsigned Opc = |
5673 | Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN; |
5674 | return DAG.getNode(Opcode: Opc, DL, VT, N1: NewX, N2: NewY); |
5675 | } |
5676 | |
5677 | // Check no NaNs before converting to fixed vector scalable. |
5678 | bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Op: X); |
5679 | bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Op: Y); |
5680 | |
5681 | MVT ContainerVT = VT; |
5682 | if (VT.isFixedLengthVector()) { |
5683 | ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
5684 | X = convertToScalableVector(VT: ContainerVT, V: X, DAG, Subtarget); |
5685 | Y = convertToScalableVector(VT: ContainerVT, V: Y, DAG, Subtarget); |
5686 | } |
5687 | |
5688 | SDValue Mask, VL; |
5689 | if (Op->isVPOpcode()) { |
5690 | Mask = Op.getOperand(i: 2); |
5691 | if (VT.isFixedLengthVector()) |
5692 | Mask = convertToScalableVector(VT: getMaskTypeFor(VecVT: ContainerVT), V: Mask, DAG, |
5693 | Subtarget); |
5694 | VL = Op.getOperand(i: 3); |
5695 | } else { |
5696 | std::tie(args&: Mask, args&: VL) = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
5697 | } |
5698 | |
5699 | SDValue NewY = Y; |
5700 | if (!XIsNeverNan) { |
5701 | SDValue XIsNonNan = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: Mask.getValueType(), |
5702 | Ops: {X, X, DAG.getCondCode(Cond: ISD::SETOEQ), |
5703 | DAG.getUNDEF(VT: ContainerVT), Mask, VL}); |
5704 | NewY = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: XIsNonNan, N2: Y, N3: X, |
5705 | N4: DAG.getUNDEF(VT: ContainerVT), N5: VL); |
5706 | } |
5707 | |
5708 | SDValue NewX = X; |
5709 | if (!YIsNeverNan) { |
5710 | SDValue YIsNonNan = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: Mask.getValueType(), |
5711 | Ops: {Y, Y, DAG.getCondCode(Cond: ISD::SETOEQ), |
5712 | DAG.getUNDEF(VT: ContainerVT), Mask, VL}); |
5713 | NewX = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: YIsNonNan, N2: X, N3: Y, |
5714 | N4: DAG.getUNDEF(VT: ContainerVT), N5: VL); |
5715 | } |
5716 | |
5717 | unsigned Opc = |
5718 | Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM |
5719 | ? RISCVISD::VFMAX_VL |
5720 | : RISCVISD::VFMIN_VL; |
5721 | SDValue Res = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: NewX, N2: NewY, |
5722 | N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL); |
5723 | if (VT.isFixedLengthVector()) |
5724 | Res = convertFromScalableVector(VT, V: Res, DAG, Subtarget); |
5725 | return Res; |
5726 | } |
5727 | |
5728 | /// Get a RISC-V target specified VL op for a given SDNode. |
5729 | static unsigned getRISCVVLOp(SDValue Op) { |
5730 | #define OP_CASE(NODE) \ |
5731 | case ISD::NODE: \ |
5732 | return RISCVISD::NODE##_VL; |
5733 | #define VP_CASE(NODE) \ |
5734 | case ISD::VP_##NODE: \ |
5735 | return RISCVISD::NODE##_VL; |
5736 | // clang-format off |
5737 | switch (Op.getOpcode()) { |
5738 | default: |
5739 | llvm_unreachable("don't have RISC-V specified VL op for this SDNode" ); |
5740 | OP_CASE(ADD) |
5741 | OP_CASE(SUB) |
5742 | OP_CASE(MUL) |
5743 | OP_CASE(MULHS) |
5744 | OP_CASE(MULHU) |
5745 | OP_CASE(SDIV) |
5746 | OP_CASE(SREM) |
5747 | OP_CASE(UDIV) |
5748 | OP_CASE(UREM) |
5749 | OP_CASE(SHL) |
5750 | OP_CASE(SRA) |
5751 | OP_CASE(SRL) |
5752 | OP_CASE(ROTL) |
5753 | OP_CASE(ROTR) |
5754 | OP_CASE(BSWAP) |
5755 | OP_CASE(CTTZ) |
5756 | OP_CASE(CTLZ) |
5757 | OP_CASE(CTPOP) |
5758 | OP_CASE(BITREVERSE) |
5759 | OP_CASE(SADDSAT) |
5760 | OP_CASE(UADDSAT) |
5761 | OP_CASE(SSUBSAT) |
5762 | OP_CASE(USUBSAT) |
5763 | OP_CASE(AVGFLOORU) |
5764 | OP_CASE(AVGCEILU) |
5765 | OP_CASE(FADD) |
5766 | OP_CASE(FSUB) |
5767 | OP_CASE(FMUL) |
5768 | OP_CASE(FDIV) |
5769 | OP_CASE(FNEG) |
5770 | OP_CASE(FABS) |
5771 | OP_CASE(FSQRT) |
5772 | OP_CASE(SMIN) |
5773 | OP_CASE(SMAX) |
5774 | OP_CASE(UMIN) |
5775 | OP_CASE(UMAX) |
5776 | OP_CASE(STRICT_FADD) |
5777 | OP_CASE(STRICT_FSUB) |
5778 | OP_CASE(STRICT_FMUL) |
5779 | OP_CASE(STRICT_FDIV) |
5780 | OP_CASE(STRICT_FSQRT) |
5781 | VP_CASE(ADD) // VP_ADD |
5782 | VP_CASE(SUB) // VP_SUB |
5783 | VP_CASE(MUL) // VP_MUL |
5784 | VP_CASE(SDIV) // VP_SDIV |
5785 | VP_CASE(SREM) // VP_SREM |
5786 | VP_CASE(UDIV) // VP_UDIV |
5787 | VP_CASE(UREM) // VP_UREM |
5788 | VP_CASE(SHL) // VP_SHL |
5789 | VP_CASE(FADD) // VP_FADD |
5790 | VP_CASE(FSUB) // VP_FSUB |
5791 | VP_CASE(FMUL) // VP_FMUL |
5792 | VP_CASE(FDIV) // VP_FDIV |
5793 | VP_CASE(FNEG) // VP_FNEG |
5794 | VP_CASE(FABS) // VP_FABS |
5795 | VP_CASE(SMIN) // VP_SMIN |
5796 | VP_CASE(SMAX) // VP_SMAX |
5797 | VP_CASE(UMIN) // VP_UMIN |
5798 | VP_CASE(UMAX) // VP_UMAX |
5799 | VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN |
5800 | VP_CASE(SETCC) // VP_SETCC |
5801 | VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP |
5802 | VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP |
5803 | VP_CASE(BITREVERSE) // VP_BITREVERSE |
5804 | VP_CASE(SADDSAT) // VP_SADDSAT |
5805 | VP_CASE(UADDSAT) // VP_UADDSAT |
5806 | VP_CASE(SSUBSAT) // VP_SSUBSAT |
5807 | VP_CASE(USUBSAT) // VP_USUBSAT |
5808 | VP_CASE(BSWAP) // VP_BSWAP |
5809 | VP_CASE(CTLZ) // VP_CTLZ |
5810 | VP_CASE(CTTZ) // VP_CTTZ |
5811 | VP_CASE(CTPOP) // VP_CTPOP |
5812 | case ISD::CTLZ_ZERO_UNDEF: |
5813 | case ISD::VP_CTLZ_ZERO_UNDEF: |
5814 | return RISCVISD::CTLZ_VL; |
5815 | case ISD::CTTZ_ZERO_UNDEF: |
5816 | case ISD::VP_CTTZ_ZERO_UNDEF: |
5817 | return RISCVISD::CTTZ_VL; |
5818 | case ISD::FMA: |
5819 | case ISD::VP_FMA: |
5820 | return RISCVISD::VFMADD_VL; |
5821 | case ISD::STRICT_FMA: |
5822 | return RISCVISD::STRICT_VFMADD_VL; |
5823 | case ISD::AND: |
5824 | case ISD::VP_AND: |
5825 | if (Op.getSimpleValueType().getVectorElementType() == MVT::i1) |
5826 | return RISCVISD::VMAND_VL; |
5827 | return RISCVISD::AND_VL; |
5828 | case ISD::OR: |
5829 | case ISD::VP_OR: |
5830 | if (Op.getSimpleValueType().getVectorElementType() == MVT::i1) |
5831 | return RISCVISD::VMOR_VL; |
5832 | return RISCVISD::OR_VL; |
5833 | case ISD::XOR: |
5834 | case ISD::VP_XOR: |
5835 | if (Op.getSimpleValueType().getVectorElementType() == MVT::i1) |
5836 | return RISCVISD::VMXOR_VL; |
5837 | return RISCVISD::XOR_VL; |
5838 | case ISD::VP_SELECT: |
5839 | case ISD::VP_MERGE: |
5840 | return RISCVISD::VMERGE_VL; |
5841 | case ISD::VP_ASHR: |
5842 | return RISCVISD::SRA_VL; |
5843 | case ISD::VP_LSHR: |
5844 | return RISCVISD::SRL_VL; |
5845 | case ISD::VP_SQRT: |
5846 | return RISCVISD::FSQRT_VL; |
5847 | case ISD::VP_SIGN_EXTEND: |
5848 | return RISCVISD::VSEXT_VL; |
5849 | case ISD::VP_ZERO_EXTEND: |
5850 | return RISCVISD::VZEXT_VL; |
5851 | case ISD::VP_FP_TO_SINT: |
5852 | return RISCVISD::VFCVT_RTZ_X_F_VL; |
5853 | case ISD::VP_FP_TO_UINT: |
5854 | return RISCVISD::VFCVT_RTZ_XU_F_VL; |
5855 | case ISD::FMINNUM: |
5856 | case ISD::VP_FMINNUM: |
5857 | return RISCVISD::VFMIN_VL; |
5858 | case ISD::FMAXNUM: |
5859 | case ISD::VP_FMAXNUM: |
5860 | return RISCVISD::VFMAX_VL; |
5861 | case ISD::LRINT: |
5862 | case ISD::VP_LRINT: |
5863 | case ISD::LLRINT: |
5864 | case ISD::VP_LLRINT: |
5865 | return RISCVISD::VFCVT_X_F_VL; |
5866 | } |
5867 | // clang-format on |
5868 | #undef OP_CASE |
5869 | #undef VP_CASE |
5870 | } |
5871 | |
5872 | /// Return true if a RISC-V target specified op has a merge operand. |
5873 | static bool hasMergeOp(unsigned Opcode) { |
5874 | assert(Opcode > RISCVISD::FIRST_NUMBER && |
5875 | Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE && |
5876 | "not a RISC-V target specific op" ); |
5877 | static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == |
5878 | 126 && |
5879 | RISCVISD::LAST_RISCV_STRICTFP_OPCODE - |
5880 | ISD::FIRST_TARGET_STRICTFP_OPCODE == |
5881 | 21 && |
5882 | "adding target specific op should update this function" ); |
5883 | if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL) |
5884 | return true; |
5885 | if (Opcode == RISCVISD::FCOPYSIGN_VL) |
5886 | return true; |
5887 | if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL) |
5888 | return true; |
5889 | if (Opcode == RISCVISD::SETCC_VL) |
5890 | return true; |
5891 | if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL) |
5892 | return true; |
5893 | if (Opcode == RISCVISD::VMERGE_VL) |
5894 | return true; |
5895 | return false; |
5896 | } |
5897 | |
5898 | /// Return true if a RISC-V target specified op has a mask operand. |
5899 | static bool hasMaskOp(unsigned Opcode) { |
5900 | assert(Opcode > RISCVISD::FIRST_NUMBER && |
5901 | Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE && |
5902 | "not a RISC-V target specific op" ); |
5903 | static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == |
5904 | 126 && |
5905 | RISCVISD::LAST_RISCV_STRICTFP_OPCODE - |
5906 | ISD::FIRST_TARGET_STRICTFP_OPCODE == |
5907 | 21 && |
5908 | "adding target specific op should update this function" ); |
5909 | if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL) |
5910 | return true; |
5911 | if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL) |
5912 | return true; |
5913 | if (Opcode >= RISCVISD::STRICT_FADD_VL && |
5914 | Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL) |
5915 | return true; |
5916 | return false; |
5917 | } |
5918 | |
5919 | static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) { |
5920 | auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: Op.getValueType()); |
5921 | SDLoc DL(Op); |
5922 | |
5923 | SmallVector<SDValue, 4> LoOperands(Op.getNumOperands()); |
5924 | SmallVector<SDValue, 4> HiOperands(Op.getNumOperands()); |
5925 | |
5926 | for (unsigned j = 0; j != Op.getNumOperands(); ++j) { |
5927 | if (!Op.getOperand(i: j).getValueType().isVector()) { |
5928 | LoOperands[j] = Op.getOperand(i: j); |
5929 | HiOperands[j] = Op.getOperand(i: j); |
5930 | continue; |
5931 | } |
5932 | std::tie(args&: LoOperands[j], args&: HiOperands[j]) = |
5933 | DAG.SplitVector(N: Op.getOperand(i: j), DL); |
5934 | } |
5935 | |
5936 | SDValue LoRes = |
5937 | DAG.getNode(Opcode: Op.getOpcode(), DL, VT: LoVT, Ops: LoOperands, Flags: Op->getFlags()); |
5938 | SDValue HiRes = |
5939 | DAG.getNode(Opcode: Op.getOpcode(), DL, VT: HiVT, Ops: HiOperands, Flags: Op->getFlags()); |
5940 | |
5941 | return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: Op.getValueType(), N1: LoRes, N2: HiRes); |
5942 | } |
5943 | |
5944 | static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG) { |
5945 | assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op" ); |
5946 | auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: Op.getValueType()); |
5947 | SDLoc DL(Op); |
5948 | |
5949 | SmallVector<SDValue, 4> LoOperands(Op.getNumOperands()); |
5950 | SmallVector<SDValue, 4> HiOperands(Op.getNumOperands()); |
5951 | |
5952 | for (unsigned j = 0; j != Op.getNumOperands(); ++j) { |
5953 | if (ISD::getVPExplicitVectorLengthIdx(Opcode: Op.getOpcode()) == j) { |
5954 | std::tie(args&: LoOperands[j], args&: HiOperands[j]) = |
5955 | DAG.SplitEVL(N: Op.getOperand(i: j), VecVT: Op.getValueType(), DL); |
5956 | continue; |
5957 | } |
5958 | if (!Op.getOperand(i: j).getValueType().isVector()) { |
5959 | LoOperands[j] = Op.getOperand(i: j); |
5960 | HiOperands[j] = Op.getOperand(i: j); |
5961 | continue; |
5962 | } |
5963 | std::tie(args&: LoOperands[j], args&: HiOperands[j]) = |
5964 | DAG.SplitVector(N: Op.getOperand(i: j), DL); |
5965 | } |
5966 | |
5967 | SDValue LoRes = |
5968 | DAG.getNode(Opcode: Op.getOpcode(), DL, VT: LoVT, Ops: LoOperands, Flags: Op->getFlags()); |
5969 | SDValue HiRes = |
5970 | DAG.getNode(Opcode: Op.getOpcode(), DL, VT: HiVT, Ops: HiOperands, Flags: Op->getFlags()); |
5971 | |
5972 | return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: Op.getValueType(), N1: LoRes, N2: HiRes); |
5973 | } |
5974 | |
5975 | static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG) { |
5976 | SDLoc DL(Op); |
5977 | |
5978 | auto [Lo, Hi] = DAG.SplitVector(N: Op.getOperand(i: 1), DL); |
5979 | auto [MaskLo, MaskHi] = DAG.SplitVector(N: Op.getOperand(i: 2), DL); |
5980 | auto [EVLLo, EVLHi] = |
5981 | DAG.SplitEVL(N: Op.getOperand(i: 3), VecVT: Op.getOperand(i: 1).getValueType(), DL); |
5982 | |
5983 | SDValue ResLo = |
5984 | DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(), |
5985 | Ops: {Op.getOperand(i: 0), Lo, MaskLo, EVLLo}, Flags: Op->getFlags()); |
5986 | return DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(), |
5987 | Ops: {ResLo, Hi, MaskHi, EVLHi}, Flags: Op->getFlags()); |
5988 | } |
5989 | |
5990 | static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG) { |
5991 | |
5992 | assert(Op->isStrictFPOpcode()); |
5993 | |
5994 | auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: Op->getValueType(ResNo: 0)); |
5995 | |
5996 | SDVTList LoVTs = DAG.getVTList(VT1: LoVT, VT2: Op->getValueType(ResNo: 1)); |
5997 | SDVTList HiVTs = DAG.getVTList(VT1: HiVT, VT2: Op->getValueType(ResNo: 1)); |
5998 | |
5999 | SDLoc DL(Op); |
6000 | |
6001 | SmallVector<SDValue, 4> LoOperands(Op.getNumOperands()); |
6002 | SmallVector<SDValue, 4> HiOperands(Op.getNumOperands()); |
6003 | |
6004 | for (unsigned j = 0; j != Op.getNumOperands(); ++j) { |
6005 | if (!Op.getOperand(i: j).getValueType().isVector()) { |
6006 | LoOperands[j] = Op.getOperand(i: j); |
6007 | HiOperands[j] = Op.getOperand(i: j); |
6008 | continue; |
6009 | } |
6010 | std::tie(args&: LoOperands[j], args&: HiOperands[j]) = |
6011 | DAG.SplitVector(N: Op.getOperand(i: j), DL); |
6012 | } |
6013 | |
6014 | SDValue LoRes = |
6015 | DAG.getNode(Opcode: Op.getOpcode(), DL, VTList: LoVTs, Ops: LoOperands, Flags: Op->getFlags()); |
6016 | HiOperands[0] = LoRes.getValue(R: 1); |
6017 | SDValue HiRes = |
6018 | DAG.getNode(Opcode: Op.getOpcode(), DL, VTList: HiVTs, Ops: HiOperands, Flags: Op->getFlags()); |
6019 | |
6020 | SDValue V = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: Op->getValueType(ResNo: 0), |
6021 | N1: LoRes.getValue(R: 0), N2: HiRes.getValue(R: 0)); |
6022 | return DAG.getMergeValues(Ops: {V, HiRes.getValue(R: 1)}, dl: DL); |
6023 | } |
6024 | |
6025 | SDValue RISCVTargetLowering::LowerOperation(SDValue Op, |
6026 | SelectionDAG &DAG) const { |
6027 | switch (Op.getOpcode()) { |
6028 | default: |
6029 | report_fatal_error(reason: "unimplemented operand" ); |
6030 | case ISD::ATOMIC_FENCE: |
6031 | return LowerATOMIC_FENCE(Op, DAG, Subtarget); |
6032 | case ISD::GlobalAddress: |
6033 | return lowerGlobalAddress(Op, DAG); |
6034 | case ISD::BlockAddress: |
6035 | return lowerBlockAddress(Op, DAG); |
6036 | case ISD::ConstantPool: |
6037 | return lowerConstantPool(Op, DAG); |
6038 | case ISD::JumpTable: |
6039 | return lowerJumpTable(Op, DAG); |
6040 | case ISD::GlobalTLSAddress: |
6041 | return lowerGlobalTLSAddress(Op, DAG); |
6042 | case ISD::Constant: |
6043 | return lowerConstant(Op, DAG, Subtarget); |
6044 | case ISD::SELECT: |
6045 | return lowerSELECT(Op, DAG); |
6046 | case ISD::BRCOND: |
6047 | return lowerBRCOND(Op, DAG); |
6048 | case ISD::VASTART: |
6049 | return lowerVASTART(Op, DAG); |
6050 | case ISD::FRAMEADDR: |
6051 | return lowerFRAMEADDR(Op, DAG); |
6052 | case ISD::RETURNADDR: |
6053 | return lowerRETURNADDR(Op, DAG); |
6054 | case ISD::SADDO: |
6055 | case ISD::SSUBO: |
6056 | return lowerSADDO_SSUBO(Op, DAG); |
6057 | case ISD::SMULO: |
6058 | return lowerSMULO(Op, DAG); |
6059 | case ISD::SHL_PARTS: |
6060 | return lowerShiftLeftParts(Op, DAG); |
6061 | case ISD::SRA_PARTS: |
6062 | return lowerShiftRightParts(Op, DAG, IsSRA: true); |
6063 | case ISD::SRL_PARTS: |
6064 | return lowerShiftRightParts(Op, DAG, IsSRA: false); |
6065 | case ISD::ROTL: |
6066 | case ISD::ROTR: |
6067 | if (Op.getValueType().isFixedLengthVector()) { |
6068 | assert(Subtarget.hasStdExtZvkb()); |
6069 | return lowerToScalableOp(Op, DAG); |
6070 | } |
6071 | assert(Subtarget.hasVendorXTHeadBb() && |
6072 | !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) && |
6073 | "Unexpected custom legalization" ); |
6074 | // XTHeadBb only supports rotate by constant. |
6075 | if (!isa<ConstantSDNode>(Val: Op.getOperand(i: 1))) |
6076 | return SDValue(); |
6077 | return Op; |
6078 | case ISD::BITCAST: { |
6079 | SDLoc DL(Op); |
6080 | EVT VT = Op.getValueType(); |
6081 | SDValue Op0 = Op.getOperand(i: 0); |
6082 | EVT Op0VT = Op0.getValueType(); |
6083 | MVT XLenVT = Subtarget.getXLenVT(); |
6084 | if (VT == MVT::f16 && Op0VT == MVT::i16 && |
6085 | Subtarget.hasStdExtZfhminOrZhinxmin()) { |
6086 | SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Op0); |
6087 | SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); |
6088 | return FPConv; |
6089 | } |
6090 | if (VT == MVT::bf16 && Op0VT == MVT::i16 && |
6091 | Subtarget.hasStdExtZfbfmin()) { |
6092 | SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Op0); |
6093 | SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0); |
6094 | return FPConv; |
6095 | } |
6096 | if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() && |
6097 | Subtarget.hasStdExtFOrZfinx()) { |
6098 | SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); |
6099 | SDValue FPConv = |
6100 | DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); |
6101 | return FPConv; |
6102 | } |
6103 | if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32) { |
6104 | SDValue Lo, Hi; |
6105 | std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32); |
6106 | SDValue RetReg = |
6107 | DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); |
6108 | return RetReg; |
6109 | } |
6110 | |
6111 | // Consider other scalar<->scalar casts as legal if the types are legal. |
6112 | // Otherwise expand them. |
6113 | if (!VT.isVector() && !Op0VT.isVector()) { |
6114 | if (isTypeLegal(VT) && isTypeLegal(VT: Op0VT)) |
6115 | return Op; |
6116 | return SDValue(); |
6117 | } |
6118 | |
6119 | assert(!VT.isScalableVector() && !Op0VT.isScalableVector() && |
6120 | "Unexpected types" ); |
6121 | |
6122 | if (VT.isFixedLengthVector()) { |
6123 | // We can handle fixed length vector bitcasts with a simple replacement |
6124 | // in isel. |
6125 | if (Op0VT.isFixedLengthVector()) |
6126 | return Op; |
6127 | // When bitcasting from scalar to fixed-length vector, insert the scalar |
6128 | // into a one-element vector of the result type, and perform a vector |
6129 | // bitcast. |
6130 | if (!Op0VT.isVector()) { |
6131 | EVT BVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: Op0VT, NumElements: 1); |
6132 | if (!isTypeLegal(VT: BVT)) |
6133 | return SDValue(); |
6134 | return DAG.getBitcast(VT, V: DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: BVT, |
6135 | N1: DAG.getUNDEF(VT: BVT), N2: Op0, |
6136 | N3: DAG.getVectorIdxConstant(Val: 0, DL))); |
6137 | } |
6138 | return SDValue(); |
6139 | } |
6140 | // Custom-legalize bitcasts from fixed-length vector types to scalar types |
6141 | // thus: bitcast the vector to a one-element vector type whose element type |
6142 | // is the same as the result type, and extract the first element. |
6143 | if (!VT.isVector() && Op0VT.isFixedLengthVector()) { |
6144 | EVT BVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT, NumElements: 1); |
6145 | if (!isTypeLegal(VT: BVT)) |
6146 | return SDValue(); |
6147 | SDValue BVec = DAG.getBitcast(VT: BVT, V: Op0); |
6148 | return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT, N1: BVec, |
6149 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
6150 | } |
6151 | return SDValue(); |
6152 | } |
6153 | case ISD::INTRINSIC_WO_CHAIN: |
6154 | return LowerINTRINSIC_WO_CHAIN(Op, DAG); |
6155 | case ISD::INTRINSIC_W_CHAIN: |
6156 | return LowerINTRINSIC_W_CHAIN(Op, DAG); |
6157 | case ISD::INTRINSIC_VOID: |
6158 | return LowerINTRINSIC_VOID(Op, DAG); |
6159 | case ISD::IS_FPCLASS: |
6160 | return LowerIS_FPCLASS(Op, DAG); |
6161 | case ISD::BITREVERSE: { |
6162 | MVT VT = Op.getSimpleValueType(); |
6163 | if (VT.isFixedLengthVector()) { |
6164 | assert(Subtarget.hasStdExtZvbb()); |
6165 | return lowerToScalableOp(Op, DAG); |
6166 | } |
6167 | SDLoc DL(Op); |
6168 | assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization" ); |
6169 | assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode" ); |
6170 | // Expand bitreverse to a bswap(rev8) followed by brev8. |
6171 | SDValue BSwap = DAG.getNode(Opcode: ISD::BSWAP, DL, VT, Operand: Op.getOperand(i: 0)); |
6172 | return DAG.getNode(Opcode: RISCVISD::BREV8, DL, VT, Operand: BSwap); |
6173 | } |
6174 | case ISD::TRUNCATE: |
6175 | // Only custom-lower vector truncates |
6176 | if (!Op.getSimpleValueType().isVector()) |
6177 | return Op; |
6178 | return lowerVectorTruncLike(Op, DAG); |
6179 | case ISD::ANY_EXTEND: |
6180 | case ISD::ZERO_EXTEND: |
6181 | if (Op.getOperand(0).getValueType().isVector() && |
6182 | Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) |
6183 | return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ ExtTrueVal: 1); |
6184 | return lowerFixedLengthVectorExtendToRVV(Op, DAG, ExtendOpc: RISCVISD::VZEXT_VL); |
6185 | case ISD::SIGN_EXTEND: |
6186 | if (Op.getOperand(0).getValueType().isVector() && |
6187 | Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) |
6188 | return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ ExtTrueVal: -1); |
6189 | return lowerFixedLengthVectorExtendToRVV(Op, DAG, ExtendOpc: RISCVISD::VSEXT_VL); |
6190 | case ISD::SPLAT_VECTOR_PARTS: |
6191 | return lowerSPLAT_VECTOR_PARTS(Op, DAG); |
6192 | case ISD::INSERT_VECTOR_ELT: |
6193 | return lowerINSERT_VECTOR_ELT(Op, DAG); |
6194 | case ISD::EXTRACT_VECTOR_ELT: |
6195 | return lowerEXTRACT_VECTOR_ELT(Op, DAG); |
6196 | case ISD::SCALAR_TO_VECTOR: { |
6197 | MVT VT = Op.getSimpleValueType(); |
6198 | SDLoc DL(Op); |
6199 | SDValue Scalar = Op.getOperand(i: 0); |
6200 | if (VT.getVectorElementType() == MVT::i1) { |
6201 | MVT WideVT = VT.changeVectorElementType(MVT::i8); |
6202 | SDValue V = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT: WideVT, Operand: Scalar); |
6203 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: V); |
6204 | } |
6205 | MVT ContainerVT = VT; |
6206 | if (VT.isFixedLengthVector()) |
6207 | ContainerVT = getContainerForFixedLengthVector(VT); |
6208 | SDValue VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second; |
6209 | Scalar = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getXLenVT(), Operand: Scalar); |
6210 | SDValue V = DAG.getNode(Opcode: RISCVISD::VMV_S_X_VL, DL, VT: ContainerVT, |
6211 | N1: DAG.getUNDEF(VT: ContainerVT), N2: Scalar, N3: VL); |
6212 | if (VT.isFixedLengthVector()) |
6213 | V = convertFromScalableVector(VT, V, DAG, Subtarget); |
6214 | return V; |
6215 | } |
6216 | case ISD::VSCALE: { |
6217 | MVT XLenVT = Subtarget.getXLenVT(); |
6218 | MVT VT = Op.getSimpleValueType(); |
6219 | SDLoc DL(Op); |
6220 | SDValue Res = DAG.getNode(Opcode: RISCVISD::READ_VLENB, DL, VT: XLenVT); |
6221 | // We define our scalable vector types for lmul=1 to use a 64 bit known |
6222 | // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate |
6223 | // vscale as VLENB / 8. |
6224 | static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!" ); |
6225 | if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock) |
6226 | report_fatal_error(reason: "Support for VLEN==32 is incomplete." ); |
6227 | // We assume VLENB is a multiple of 8. We manually choose the best shift |
6228 | // here because SimplifyDemandedBits isn't always able to simplify it. |
6229 | uint64_t Val = Op.getConstantOperandVal(i: 0); |
6230 | if (isPowerOf2_64(Value: Val)) { |
6231 | uint64_t Log2 = Log2_64(Value: Val); |
6232 | if (Log2 < 3) |
6233 | Res = DAG.getNode(Opcode: ISD::SRL, DL, VT: XLenVT, N1: Res, |
6234 | N2: DAG.getConstant(Val: 3 - Log2, DL, VT)); |
6235 | else if (Log2 > 3) |
6236 | Res = DAG.getNode(Opcode: ISD::SHL, DL, VT: XLenVT, N1: Res, |
6237 | N2: DAG.getConstant(Val: Log2 - 3, DL, VT: XLenVT)); |
6238 | } else if ((Val % 8) == 0) { |
6239 | // If the multiplier is a multiple of 8, scale it down to avoid needing |
6240 | // to shift the VLENB value. |
6241 | Res = DAG.getNode(Opcode: ISD::MUL, DL, VT: XLenVT, N1: Res, |
6242 | N2: DAG.getConstant(Val: Val / 8, DL, VT: XLenVT)); |
6243 | } else { |
6244 | SDValue VScale = DAG.getNode(Opcode: ISD::SRL, DL, VT: XLenVT, N1: Res, |
6245 | N2: DAG.getConstant(Val: 3, DL, VT: XLenVT)); |
6246 | Res = DAG.getNode(Opcode: ISD::MUL, DL, VT: XLenVT, N1: VScale, |
6247 | N2: DAG.getConstant(Val, DL, VT: XLenVT)); |
6248 | } |
6249 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Res); |
6250 | } |
6251 | case ISD::FPOWI: { |
6252 | // Custom promote f16 powi with illegal i32 integer type on RV64. Once |
6253 | // promoted this will be legalized into a libcall by LegalizeIntegerTypes. |
6254 | if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() && |
6255 | Op.getOperand(1).getValueType() == MVT::i32) { |
6256 | SDLoc DL(Op); |
6257 | SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0)); |
6258 | SDValue Powi = |
6259 | DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1)); |
6260 | return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi, |
6261 | DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); |
6262 | } |
6263 | return SDValue(); |
6264 | } |
6265 | case ISD::FMAXIMUM: |
6266 | case ISD::FMINIMUM: |
6267 | if (Op.getValueType() == MVT::nxv32f16 && |
6268 | (Subtarget.hasVInstructionsF16Minimal() && |
6269 | !Subtarget.hasVInstructionsF16())) |
6270 | return SplitVectorOp(Op, DAG); |
6271 | return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget); |
6272 | case ISD::FP_EXTEND: { |
6273 | SDLoc DL(Op); |
6274 | EVT VT = Op.getValueType(); |
6275 | SDValue Op0 = Op.getOperand(i: 0); |
6276 | EVT Op0VT = Op0.getValueType(); |
6277 | if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) |
6278 | return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0); |
6279 | if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) { |
6280 | SDValue FloatVal = |
6281 | DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0); |
6282 | return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal); |
6283 | } |
6284 | |
6285 | if (!Op.getValueType().isVector()) |
6286 | return Op; |
6287 | return lowerVectorFPExtendOrRoundLike(Op, DAG); |
6288 | } |
6289 | case ISD::FP_ROUND: { |
6290 | SDLoc DL(Op); |
6291 | EVT VT = Op.getValueType(); |
6292 | SDValue Op0 = Op.getOperand(i: 0); |
6293 | EVT Op0VT = Op0.getValueType(); |
6294 | if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin()) |
6295 | return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0); |
6296 | if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() && |
6297 | Subtarget.hasStdExtDOrZdinx()) { |
6298 | SDValue FloatVal = |
6299 | DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0, |
6300 | DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); |
6301 | return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal); |
6302 | } |
6303 | |
6304 | if (!Op.getValueType().isVector()) |
6305 | return Op; |
6306 | return lowerVectorFPExtendOrRoundLike(Op, DAG); |
6307 | } |
6308 | case ISD::STRICT_FP_ROUND: |
6309 | case ISD::STRICT_FP_EXTEND: |
6310 | return lowerStrictFPExtendOrRoundLike(Op, DAG); |
6311 | case ISD::SINT_TO_FP: |
6312 | case ISD::UINT_TO_FP: |
6313 | if (Op.getValueType().isVector() && |
6314 | Op.getValueType().getScalarType() == MVT::f16 && |
6315 | (Subtarget.hasVInstructionsF16Minimal() && |
6316 | !Subtarget.hasVInstructionsF16())) { |
6317 | if (Op.getValueType() == MVT::nxv32f16) |
6318 | return SplitVectorOp(Op, DAG); |
6319 | // int -> f32 |
6320 | SDLoc DL(Op); |
6321 | MVT NVT = |
6322 | MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()); |
6323 | SDValue NC = DAG.getNode(Opcode: Op.getOpcode(), DL, VT: NVT, Ops: Op->ops()); |
6324 | // f32 -> f16 |
6325 | return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: Op.getValueType(), N1: NC, |
6326 | N2: DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true)); |
6327 | } |
6328 | [[fallthrough]]; |
6329 | case ISD::FP_TO_SINT: |
6330 | case ISD::FP_TO_UINT: |
6331 | if (SDValue Op1 = Op.getOperand(i: 0); |
6332 | Op1.getValueType().isVector() && |
6333 | Op1.getValueType().getScalarType() == MVT::f16 && |
6334 | (Subtarget.hasVInstructionsF16Minimal() && |
6335 | !Subtarget.hasVInstructionsF16())) { |
6336 | if (Op1.getValueType() == MVT::nxv32f16) |
6337 | return SplitVectorOp(Op, DAG); |
6338 | // f16 -> f32 |
6339 | SDLoc DL(Op); |
6340 | MVT NVT = MVT::getVectorVT(MVT::f32, |
6341 | Op1.getValueType().getVectorElementCount()); |
6342 | SDValue WidenVec = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: NVT, Operand: Op1); |
6343 | // f32 -> int |
6344 | return DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(), Operand: WidenVec); |
6345 | } |
6346 | [[fallthrough]]; |
6347 | case ISD::STRICT_FP_TO_SINT: |
6348 | case ISD::STRICT_FP_TO_UINT: |
6349 | case ISD::STRICT_SINT_TO_FP: |
6350 | case ISD::STRICT_UINT_TO_FP: { |
6351 | // RVV can only do fp<->int conversions to types half/double the size as |
6352 | // the source. We custom-lower any conversions that do two hops into |
6353 | // sequences. |
6354 | MVT VT = Op.getSimpleValueType(); |
6355 | if (!VT.isVector()) |
6356 | return Op; |
6357 | SDLoc DL(Op); |
6358 | bool IsStrict = Op->isStrictFPOpcode(); |
6359 | SDValue Src = Op.getOperand(i: 0 + IsStrict); |
6360 | MVT EltVT = VT.getVectorElementType(); |
6361 | MVT SrcVT = Src.getSimpleValueType(); |
6362 | MVT SrcEltVT = SrcVT.getVectorElementType(); |
6363 | unsigned EltSize = EltVT.getSizeInBits(); |
6364 | unsigned SrcEltSize = SrcEltVT.getSizeInBits(); |
6365 | assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) && |
6366 | "Unexpected vector element types" ); |
6367 | |
6368 | bool IsInt2FP = SrcEltVT.isInteger(); |
6369 | // Widening conversions |
6370 | if (EltSize > (2 * SrcEltSize)) { |
6371 | if (IsInt2FP) { |
6372 | // Do a regular integer sign/zero extension then convert to float. |
6373 | MVT IVecVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: EltSize / 2), |
6374 | EC: VT.getVectorElementCount()); |
6375 | unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP || |
6376 | Op.getOpcode() == ISD::STRICT_UINT_TO_FP) |
6377 | ? ISD::ZERO_EXTEND |
6378 | : ISD::SIGN_EXTEND; |
6379 | SDValue Ext = DAG.getNode(Opcode: ExtOpcode, DL, VT: IVecVT, Operand: Src); |
6380 | if (IsStrict) |
6381 | return DAG.getNode(Opcode: Op.getOpcode(), DL, VTList: Op->getVTList(), |
6382 | N1: Op.getOperand(i: 0), N2: Ext); |
6383 | return DAG.getNode(Opcode: Op.getOpcode(), DL, VT, Operand: Ext); |
6384 | } |
6385 | // FP2Int |
6386 | assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering" ); |
6387 | // Do one doubling fp_extend then complete the operation by converting |
6388 | // to int. |
6389 | MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); |
6390 | if (IsStrict) { |
6391 | auto [FExt, Chain] = |
6392 | DAG.getStrictFPExtendOrRound(Op: Src, Chain: Op.getOperand(i: 0), DL, VT: InterimFVT); |
6393 | return DAG.getNode(Opcode: Op.getOpcode(), DL, VTList: Op->getVTList(), N1: Chain, N2: FExt); |
6394 | } |
6395 | SDValue FExt = DAG.getFPExtendOrRound(Op: Src, DL, VT: InterimFVT); |
6396 | return DAG.getNode(Opcode: Op.getOpcode(), DL, VT, Operand: FExt); |
6397 | } |
6398 | |
6399 | // Narrowing conversions |
6400 | if (SrcEltSize > (2 * EltSize)) { |
6401 | if (IsInt2FP) { |
6402 | // One narrowing int_to_fp, then an fp_round. |
6403 | assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering" ); |
6404 | MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); |
6405 | if (IsStrict) { |
6406 | SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, |
6407 | DAG.getVTList(InterimFVT, MVT::Other), |
6408 | Op.getOperand(0), Src); |
6409 | SDValue Chain = Int2FP.getValue(R: 1); |
6410 | return DAG.getStrictFPExtendOrRound(Op: Int2FP, Chain, DL, VT).first; |
6411 | } |
6412 | SDValue Int2FP = DAG.getNode(Opcode: Op.getOpcode(), DL, VT: InterimFVT, Operand: Src); |
6413 | return DAG.getFPExtendOrRound(Op: Int2FP, DL, VT); |
6414 | } |
6415 | // FP2Int |
6416 | // One narrowing fp_to_int, then truncate the integer. If the float isn't |
6417 | // representable by the integer, the result is poison. |
6418 | MVT IVecVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SrcEltSize / 2), |
6419 | EC: VT.getVectorElementCount()); |
6420 | if (IsStrict) { |
6421 | SDValue FP2Int = |
6422 | DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other), |
6423 | Op.getOperand(0), Src); |
6424 | SDValue Res = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: FP2Int); |
6425 | return DAG.getMergeValues(Ops: {Res, FP2Int.getValue(R: 1)}, dl: DL); |
6426 | } |
6427 | SDValue FP2Int = DAG.getNode(Opcode: Op.getOpcode(), DL, VT: IVecVT, Operand: Src); |
6428 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: FP2Int); |
6429 | } |
6430 | |
6431 | // Scalable vectors can exit here. Patterns will handle equally-sized |
6432 | // conversions halving/doubling ones. |
6433 | if (!VT.isFixedLengthVector()) |
6434 | return Op; |
6435 | |
6436 | // For fixed-length vectors we lower to a custom "VL" node. |
6437 | unsigned RVVOpc = 0; |
6438 | switch (Op.getOpcode()) { |
6439 | default: |
6440 | llvm_unreachable("Impossible opcode" ); |
6441 | case ISD::FP_TO_SINT: |
6442 | RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL; |
6443 | break; |
6444 | case ISD::FP_TO_UINT: |
6445 | RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL; |
6446 | break; |
6447 | case ISD::SINT_TO_FP: |
6448 | RVVOpc = RISCVISD::SINT_TO_FP_VL; |
6449 | break; |
6450 | case ISD::UINT_TO_FP: |
6451 | RVVOpc = RISCVISD::UINT_TO_FP_VL; |
6452 | break; |
6453 | case ISD::STRICT_FP_TO_SINT: |
6454 | RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL; |
6455 | break; |
6456 | case ISD::STRICT_FP_TO_UINT: |
6457 | RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL; |
6458 | break; |
6459 | case ISD::STRICT_SINT_TO_FP: |
6460 | RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL; |
6461 | break; |
6462 | case ISD::STRICT_UINT_TO_FP: |
6463 | RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL; |
6464 | break; |
6465 | } |
6466 | |
6467 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
6468 | MVT SrcContainerVT = getContainerForFixedLengthVector(VT: SrcVT); |
6469 | assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() && |
6470 | "Expected same element count" ); |
6471 | |
6472 | auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
6473 | |
6474 | Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget); |
6475 | if (IsStrict) { |
6476 | Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), |
6477 | Op.getOperand(0), Src, Mask, VL); |
6478 | SDValue SubVec = convertFromScalableVector(VT, V: Src, DAG, Subtarget); |
6479 | return DAG.getMergeValues(Ops: {SubVec, Src.getValue(R: 1)}, dl: DL); |
6480 | } |
6481 | Src = DAG.getNode(Opcode: RVVOpc, DL, VT: ContainerVT, N1: Src, N2: Mask, N3: VL); |
6482 | return convertFromScalableVector(VT, V: Src, DAG, Subtarget); |
6483 | } |
6484 | case ISD::FP_TO_SINT_SAT: |
6485 | case ISD::FP_TO_UINT_SAT: |
6486 | return lowerFP_TO_INT_SAT(Op, DAG, Subtarget); |
6487 | case ISD::FP_TO_BF16: { |
6488 | // Custom lower to ensure the libcall return is passed in an FPR on hard |
6489 | // float ABIs. |
6490 | assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization" ); |
6491 | SDLoc DL(Op); |
6492 | MakeLibCallOptions CallOptions; |
6493 | RTLIB::Libcall LC = |
6494 | RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16); |
6495 | SDValue Res = |
6496 | makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first; |
6497 | if (Subtarget.is64Bit() && !RV64LegalI32) |
6498 | return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res); |
6499 | return DAG.getBitcast(MVT::i32, Res); |
6500 | } |
6501 | case ISD::BF16_TO_FP: { |
6502 | assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization" ); |
6503 | MVT VT = Op.getSimpleValueType(); |
6504 | SDLoc DL(Op); |
6505 | Op = DAG.getNode( |
6506 | Opcode: ISD::SHL, DL, VT: Op.getOperand(i: 0).getValueType(), N1: Op.getOperand(i: 0), |
6507 | N2: DAG.getShiftAmountConstant(Val: 16, VT: Op.getOperand(i: 0).getValueType(), DL)); |
6508 | SDValue Res = Subtarget.is64Bit() |
6509 | ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op) |
6510 | : DAG.getBitcast(MVT::f32, Op); |
6511 | // fp_extend if the target VT is bigger than f32. |
6512 | if (VT != MVT::f32) |
6513 | return DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT, Operand: Res); |
6514 | return Res; |
6515 | } |
6516 | case ISD::FP_TO_FP16: { |
6517 | // Custom lower to ensure the libcall return is passed in an FPR on hard |
6518 | // float ABIs. |
6519 | assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation" ); |
6520 | SDLoc DL(Op); |
6521 | MakeLibCallOptions CallOptions; |
6522 | RTLIB::Libcall LC = |
6523 | RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16); |
6524 | SDValue Res = |
6525 | makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first; |
6526 | if (Subtarget.is64Bit() && !RV64LegalI32) |
6527 | return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res); |
6528 | return DAG.getBitcast(MVT::i32, Res); |
6529 | } |
6530 | case ISD::FP16_TO_FP: { |
6531 | // Custom lower to ensure the libcall argument is passed in an FPR on hard |
6532 | // float ABIs. |
6533 | assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation" ); |
6534 | SDLoc DL(Op); |
6535 | MakeLibCallOptions CallOptions; |
6536 | SDValue Arg = Subtarget.is64Bit() |
6537 | ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, |
6538 | Op.getOperand(0)) |
6539 | : DAG.getBitcast(MVT::f32, Op.getOperand(0)); |
6540 | SDValue Res = |
6541 | makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL) |
6542 | .first; |
6543 | return Res; |
6544 | } |
6545 | case ISD::FTRUNC: |
6546 | case ISD::FCEIL: |
6547 | case ISD::FFLOOR: |
6548 | case ISD::FNEARBYINT: |
6549 | case ISD::FRINT: |
6550 | case ISD::FROUND: |
6551 | case ISD::FROUNDEVEN: |
6552 | return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); |
6553 | case ISD::LRINT: |
6554 | case ISD::LLRINT: |
6555 | return lowerVectorXRINT(Op, DAG, Subtarget); |
6556 | case ISD::VECREDUCE_ADD: |
6557 | case ISD::VECREDUCE_UMAX: |
6558 | case ISD::VECREDUCE_SMAX: |
6559 | case ISD::VECREDUCE_UMIN: |
6560 | case ISD::VECREDUCE_SMIN: |
6561 | return lowerVECREDUCE(Op, DAG); |
6562 | case ISD::VECREDUCE_AND: |
6563 | case ISD::VECREDUCE_OR: |
6564 | case ISD::VECREDUCE_XOR: |
6565 | if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) |
6566 | return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false); |
6567 | return lowerVECREDUCE(Op, DAG); |
6568 | case ISD::VECREDUCE_FADD: |
6569 | case ISD::VECREDUCE_SEQ_FADD: |
6570 | case ISD::VECREDUCE_FMIN: |
6571 | case ISD::VECREDUCE_FMAX: |
6572 | case ISD::VECREDUCE_FMAXIMUM: |
6573 | case ISD::VECREDUCE_FMINIMUM: |
6574 | return lowerFPVECREDUCE(Op, DAG); |
6575 | case ISD::VP_REDUCE_ADD: |
6576 | case ISD::VP_REDUCE_UMAX: |
6577 | case ISD::VP_REDUCE_SMAX: |
6578 | case ISD::VP_REDUCE_UMIN: |
6579 | case ISD::VP_REDUCE_SMIN: |
6580 | case ISD::VP_REDUCE_FADD: |
6581 | case ISD::VP_REDUCE_SEQ_FADD: |
6582 | case ISD::VP_REDUCE_FMIN: |
6583 | case ISD::VP_REDUCE_FMAX: |
6584 | if (Op.getOperand(1).getValueType() == MVT::nxv32f16 && |
6585 | (Subtarget.hasVInstructionsF16Minimal() && |
6586 | !Subtarget.hasVInstructionsF16())) |
6587 | return SplitVectorReductionOp(Op, DAG); |
6588 | return lowerVPREDUCE(Op, DAG); |
6589 | case ISD::VP_REDUCE_AND: |
6590 | case ISD::VP_REDUCE_OR: |
6591 | case ISD::VP_REDUCE_XOR: |
6592 | if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1) |
6593 | return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true); |
6594 | return lowerVPREDUCE(Op, DAG); |
6595 | case ISD::UNDEF: { |
6596 | MVT ContainerVT = getContainerForFixedLengthVector(VT: Op.getSimpleValueType()); |
6597 | return convertFromScalableVector(VT: Op.getSimpleValueType(), |
6598 | V: DAG.getUNDEF(VT: ContainerVT), DAG, Subtarget); |
6599 | } |
6600 | case ISD::INSERT_SUBVECTOR: |
6601 | return lowerINSERT_SUBVECTOR(Op, DAG); |
6602 | case ISD::EXTRACT_SUBVECTOR: |
6603 | return lowerEXTRACT_SUBVECTOR(Op, DAG); |
6604 | case ISD::VECTOR_DEINTERLEAVE: |
6605 | return lowerVECTOR_DEINTERLEAVE(Op, DAG); |
6606 | case ISD::VECTOR_INTERLEAVE: |
6607 | return lowerVECTOR_INTERLEAVE(Op, DAG); |
6608 | case ISD::STEP_VECTOR: |
6609 | return lowerSTEP_VECTOR(Op, DAG); |
6610 | case ISD::VECTOR_REVERSE: |
6611 | return lowerVECTOR_REVERSE(Op, DAG); |
6612 | case ISD::VECTOR_SPLICE: |
6613 | return lowerVECTOR_SPLICE(Op, DAG); |
6614 | case ISD::BUILD_VECTOR: |
6615 | return lowerBUILD_VECTOR(Op, DAG, Subtarget); |
6616 | case ISD::SPLAT_VECTOR: |
6617 | if (Op.getValueType().getScalarType() == MVT::f16 && |
6618 | (Subtarget.hasVInstructionsF16Minimal() && |
6619 | !Subtarget.hasVInstructionsF16())) { |
6620 | if (Op.getValueType() == MVT::nxv32f16) |
6621 | return SplitVectorOp(Op, DAG); |
6622 | SDLoc DL(Op); |
6623 | SDValue NewScalar = |
6624 | DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0)); |
6625 | SDValue NewSplat = DAG.getNode( |
6626 | ISD::SPLAT_VECTOR, DL, |
6627 | MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()), |
6628 | NewScalar); |
6629 | return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: Op.getValueType(), N1: NewSplat, |
6630 | N2: DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true)); |
6631 | } |
6632 | if (Op.getValueType().getVectorElementType() == MVT::i1) |
6633 | return lowerVectorMaskSplat(Op, DAG); |
6634 | return SDValue(); |
6635 | case ISD::VECTOR_SHUFFLE: |
6636 | return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget); |
6637 | case ISD::CONCAT_VECTORS: { |
6638 | // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is |
6639 | // better than going through the stack, as the default expansion does. |
6640 | SDLoc DL(Op); |
6641 | MVT VT = Op.getSimpleValueType(); |
6642 | MVT ContainerVT = VT; |
6643 | if (VT.isFixedLengthVector()) |
6644 | ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget); |
6645 | |
6646 | // Recursively split concat_vectors with more than 2 operands: |
6647 | // |
6648 | // concat_vector op1, op2, op3, op4 |
6649 | // -> |
6650 | // concat_vector (concat_vector op1, op2), (concat_vector op3, op4) |
6651 | // |
6652 | // This reduces the length of the chain of vslideups and allows us to |
6653 | // perform the vslideups at a smaller LMUL, limited to MF2. |
6654 | if (Op.getNumOperands() > 2 && |
6655 | ContainerVT.bitsGE(VT: getLMUL1VT(VT: ContainerVT))) { |
6656 | MVT HalfVT = VT.getHalfNumVectorElementsVT(); |
6657 | assert(isPowerOf2_32(Op.getNumOperands())); |
6658 | size_t HalfNumOps = Op.getNumOperands() / 2; |
6659 | SDValue Lo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: HalfVT, |
6660 | Ops: Op->ops().take_front(N: HalfNumOps)); |
6661 | SDValue Hi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: HalfVT, |
6662 | Ops: Op->ops().drop_front(N: HalfNumOps)); |
6663 | return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, N1: Lo, N2: Hi); |
6664 | } |
6665 | |
6666 | unsigned NumOpElts = |
6667 | Op.getOperand(i: 0).getSimpleValueType().getVectorMinNumElements(); |
6668 | SDValue Vec = DAG.getUNDEF(VT); |
6669 | for (const auto &OpIdx : enumerate(First: Op->ops())) { |
6670 | SDValue SubVec = OpIdx.value(); |
6671 | // Don't insert undef subvectors. |
6672 | if (SubVec.isUndef()) |
6673 | continue; |
6674 | Vec = |
6675 | DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: Vec, N2: SubVec, |
6676 | N3: DAG.getVectorIdxConstant(Val: OpIdx.index() * NumOpElts, DL)); |
6677 | } |
6678 | return Vec; |
6679 | } |
6680 | case ISD::LOAD: |
6681 | if (auto V = expandUnalignedRVVLoad(Op, DAG)) |
6682 | return V; |
6683 | if (Op.getValueType().isFixedLengthVector()) |
6684 | return lowerFixedLengthVectorLoadToRVV(Op, DAG); |
6685 | return Op; |
6686 | case ISD::STORE: |
6687 | if (auto V = expandUnalignedRVVStore(Op, DAG)) |
6688 | return V; |
6689 | if (Op.getOperand(i: 1).getValueType().isFixedLengthVector()) |
6690 | return lowerFixedLengthVectorStoreToRVV(Op, DAG); |
6691 | return Op; |
6692 | case ISD::MLOAD: |
6693 | case ISD::VP_LOAD: |
6694 | return lowerMaskedLoad(Op, DAG); |
6695 | case ISD::MSTORE: |
6696 | case ISD::VP_STORE: |
6697 | return lowerMaskedStore(Op, DAG); |
6698 | case ISD::SELECT_CC: { |
6699 | // This occurs because we custom legalize SETGT and SETUGT for setcc. That |
6700 | // causes LegalizeDAG to think we need to custom legalize select_cc. Expand |
6701 | // into separate SETCC+SELECT just like LegalizeDAG. |
6702 | SDValue Tmp1 = Op.getOperand(i: 0); |
6703 | SDValue Tmp2 = Op.getOperand(i: 1); |
6704 | SDValue True = Op.getOperand(i: 2); |
6705 | SDValue False = Op.getOperand(i: 3); |
6706 | EVT VT = Op.getValueType(); |
6707 | SDValue CC = Op.getOperand(i: 4); |
6708 | EVT CmpVT = Tmp1.getValueType(); |
6709 | EVT CCVT = |
6710 | getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: CmpVT); |
6711 | SDLoc DL(Op); |
6712 | SDValue Cond = |
6713 | DAG.getNode(Opcode: ISD::SETCC, DL, VT: CCVT, N1: Tmp1, N2: Tmp2, N3: CC, Flags: Op->getFlags()); |
6714 | return DAG.getSelect(DL, VT, Cond, LHS: True, RHS: False); |
6715 | } |
6716 | case ISD::SETCC: { |
6717 | MVT OpVT = Op.getOperand(i: 0).getSimpleValueType(); |
6718 | if (OpVT.isScalarInteger()) { |
6719 | MVT VT = Op.getSimpleValueType(); |
6720 | SDValue LHS = Op.getOperand(i: 0); |
6721 | SDValue RHS = Op.getOperand(i: 1); |
6722 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get(); |
6723 | assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) && |
6724 | "Unexpected CondCode" ); |
6725 | |
6726 | SDLoc DL(Op); |
6727 | |
6728 | // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can |
6729 | // convert this to the equivalent of (set(u)ge X, C+1) by using |
6730 | // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant |
6731 | // in a register. |
6732 | if (isa<ConstantSDNode>(Val: RHS)) { |
6733 | int64_t Imm = cast<ConstantSDNode>(Val&: RHS)->getSExtValue(); |
6734 | if (Imm != 0 && isInt<12>(x: (uint64_t)Imm + 1)) { |
6735 | // If this is an unsigned compare and the constant is -1, incrementing |
6736 | // the constant would change behavior. The result should be false. |
6737 | if (CCVal == ISD::SETUGT && Imm == -1) |
6738 | return DAG.getConstant(Val: 0, DL, VT); |
6739 | // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT. |
6740 | CCVal = ISD::getSetCCSwappedOperands(Operation: CCVal); |
6741 | SDValue SetCC = DAG.getSetCC( |
6742 | DL, VT, LHS, RHS: DAG.getConstant(Val: Imm + 1, DL, VT: OpVT), Cond: CCVal); |
6743 | return DAG.getLogicalNOT(DL, Val: SetCC, VT); |
6744 | } |
6745 | } |
6746 | |
6747 | // Not a constant we could handle, swap the operands and condition code to |
6748 | // SETLT/SETULT. |
6749 | CCVal = ISD::getSetCCSwappedOperands(Operation: CCVal); |
6750 | return DAG.getSetCC(DL, VT, LHS: RHS, RHS: LHS, Cond: CCVal); |
6751 | } |
6752 | |
6753 | if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 && |
6754 | (Subtarget.hasVInstructionsF16Minimal() && |
6755 | !Subtarget.hasVInstructionsF16())) |
6756 | return SplitVectorOp(Op, DAG); |
6757 | |
6758 | return lowerFixedLengthVectorSetccToRVV(Op, DAG); |
6759 | } |
6760 | case ISD::ADD: |
6761 | case ISD::SUB: |
6762 | case ISD::MUL: |
6763 | case ISD::MULHS: |
6764 | case ISD::MULHU: |
6765 | case ISD::AND: |
6766 | case ISD::OR: |
6767 | case ISD::XOR: |
6768 | case ISD::SDIV: |
6769 | case ISD::SREM: |
6770 | case ISD::UDIV: |
6771 | case ISD::UREM: |
6772 | case ISD::BSWAP: |
6773 | case ISD::CTPOP: |
6774 | return lowerToScalableOp(Op, DAG); |
6775 | case ISD::SHL: |
6776 | case ISD::SRA: |
6777 | case ISD::SRL: |
6778 | if (Op.getSimpleValueType().isFixedLengthVector()) |
6779 | return lowerToScalableOp(Op, DAG); |
6780 | // This can be called for an i32 shift amount that needs to be promoted. |
6781 | assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() && |
6782 | "Unexpected custom legalisation" ); |
6783 | return SDValue(); |
6784 | case ISD::FADD: |
6785 | case ISD::FSUB: |
6786 | case ISD::FMUL: |
6787 | case ISD::FDIV: |
6788 | case ISD::FNEG: |
6789 | case ISD::FABS: |
6790 | case ISD::FSQRT: |
6791 | case ISD::FMA: |
6792 | case ISD::FMINNUM: |
6793 | case ISD::FMAXNUM: |
6794 | if (Op.getValueType() == MVT::nxv32f16 && |
6795 | (Subtarget.hasVInstructionsF16Minimal() && |
6796 | !Subtarget.hasVInstructionsF16())) |
6797 | return SplitVectorOp(Op, DAG); |
6798 | [[fallthrough]]; |
6799 | case ISD::AVGFLOORU: |
6800 | case ISD::AVGCEILU: |
6801 | case ISD::SMIN: |
6802 | case ISD::SMAX: |
6803 | case ISD::UMIN: |
6804 | case ISD::UMAX: |
6805 | return lowerToScalableOp(Op, DAG); |
6806 | case ISD::UADDSAT: |
6807 | case ISD::USUBSAT: |
6808 | if (!Op.getValueType().isVector()) |
6809 | return lowerUADDSAT_USUBSAT(Op, DAG); |
6810 | return lowerToScalableOp(Op, DAG); |
6811 | case ISD::SADDSAT: |
6812 | case ISD::SSUBSAT: |
6813 | if (!Op.getValueType().isVector()) |
6814 | return lowerSADDSAT_SSUBSAT(Op, DAG); |
6815 | return lowerToScalableOp(Op, DAG); |
6816 | case ISD::ABDS: |
6817 | case ISD::ABDU: { |
6818 | SDLoc dl(Op); |
6819 | EVT VT = Op->getValueType(ResNo: 0); |
6820 | SDValue LHS = DAG.getFreeze(V: Op->getOperand(Num: 0)); |
6821 | SDValue RHS = DAG.getFreeze(V: Op->getOperand(Num: 1)); |
6822 | bool IsSigned = Op->getOpcode() == ISD::ABDS; |
6823 | |
6824 | // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs)) |
6825 | // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs)) |
6826 | unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX; |
6827 | unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN; |
6828 | SDValue Max = DAG.getNode(Opcode: MaxOpc, DL: dl, VT, N1: LHS, N2: RHS); |
6829 | SDValue Min = DAG.getNode(Opcode: MinOpc, DL: dl, VT, N1: LHS, N2: RHS); |
6830 | return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: Min); |
6831 | } |
6832 | case ISD::ABS: |
6833 | case ISD::VP_ABS: |
6834 | return lowerABS(Op, DAG); |
6835 | case ISD::CTLZ: |
6836 | case ISD::CTLZ_ZERO_UNDEF: |
6837 | case ISD::CTTZ: |
6838 | case ISD::CTTZ_ZERO_UNDEF: |
6839 | if (Subtarget.hasStdExtZvbb()) |
6840 | return lowerToScalableOp(Op, DAG); |
6841 | assert(Op.getOpcode() != ISD::CTTZ); |
6842 | return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); |
6843 | case ISD::VSELECT: |
6844 | return lowerFixedLengthVectorSelectToRVV(Op, DAG); |
6845 | case ISD::FCOPYSIGN: |
6846 | if (Op.getValueType() == MVT::nxv32f16 && |
6847 | (Subtarget.hasVInstructionsF16Minimal() && |
6848 | !Subtarget.hasVInstructionsF16())) |
6849 | return SplitVectorOp(Op, DAG); |
6850 | return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG); |
6851 | case ISD::STRICT_FADD: |
6852 | case ISD::STRICT_FSUB: |
6853 | case ISD::STRICT_FMUL: |
6854 | case ISD::STRICT_FDIV: |
6855 | case ISD::STRICT_FSQRT: |
6856 | case ISD::STRICT_FMA: |
6857 | if (Op.getValueType() == MVT::nxv32f16 && |
6858 | (Subtarget.hasVInstructionsF16Minimal() && |
6859 | !Subtarget.hasVInstructionsF16())) |
6860 | return SplitStrictFPVectorOp(Op, DAG); |
6861 | return lowerToScalableOp(Op, DAG); |
6862 | case ISD::STRICT_FSETCC: |
6863 | case ISD::STRICT_FSETCCS: |
6864 | return lowerVectorStrictFSetcc(Op, DAG); |
6865 | case ISD::STRICT_FCEIL: |
6866 | case ISD::STRICT_FRINT: |
6867 | case ISD::STRICT_FFLOOR: |
6868 | case ISD::STRICT_FTRUNC: |
6869 | case ISD::STRICT_FNEARBYINT: |
6870 | case ISD::STRICT_FROUND: |
6871 | case ISD::STRICT_FROUNDEVEN: |
6872 | return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); |
6873 | case ISD::MGATHER: |
6874 | case ISD::VP_GATHER: |
6875 | return lowerMaskedGather(Op, DAG); |
6876 | case ISD::MSCATTER: |
6877 | case ISD::VP_SCATTER: |
6878 | return lowerMaskedScatter(Op, DAG); |
6879 | case ISD::GET_ROUNDING: |
6880 | return lowerGET_ROUNDING(Op, DAG); |
6881 | case ISD::SET_ROUNDING: |
6882 | return lowerSET_ROUNDING(Op, DAG); |
6883 | case ISD::EH_DWARF_CFA: |
6884 | return lowerEH_DWARF_CFA(Op, DAG); |
6885 | case ISD::VP_SELECT: |
6886 | case ISD::VP_MERGE: |
6887 | case ISD::VP_ADD: |
6888 | case ISD::VP_SUB: |
6889 | case ISD::VP_MUL: |
6890 | case ISD::VP_SDIV: |
6891 | case ISD::VP_UDIV: |
6892 | case ISD::VP_SREM: |
6893 | case ISD::VP_UREM: |
6894 | case ISD::VP_UADDSAT: |
6895 | case ISD::VP_USUBSAT: |
6896 | case ISD::VP_SADDSAT: |
6897 | case ISD::VP_SSUBSAT: |
6898 | case ISD::VP_LRINT: |
6899 | case ISD::VP_LLRINT: |
6900 | return lowerVPOp(Op, DAG); |
6901 | case ISD::VP_AND: |
6902 | case ISD::VP_OR: |
6903 | case ISD::VP_XOR: |
6904 | return lowerLogicVPOp(Op, DAG); |
6905 | case ISD::VP_FADD: |
6906 | case ISD::VP_FSUB: |
6907 | case ISD::VP_FMUL: |
6908 | case ISD::VP_FDIV: |
6909 | case ISD::VP_FNEG: |
6910 | case ISD::VP_FABS: |
6911 | case ISD::VP_SQRT: |
6912 | case ISD::VP_FMA: |
6913 | case ISD::VP_FMINNUM: |
6914 | case ISD::VP_FMAXNUM: |
6915 | case ISD::VP_FCOPYSIGN: |
6916 | if (Op.getValueType() == MVT::nxv32f16 && |
6917 | (Subtarget.hasVInstructionsF16Minimal() && |
6918 | !Subtarget.hasVInstructionsF16())) |
6919 | return SplitVPOp(Op, DAG); |
6920 | [[fallthrough]]; |
6921 | case ISD::VP_ASHR: |
6922 | case ISD::VP_LSHR: |
6923 | case ISD::VP_SHL: |
6924 | return lowerVPOp(Op, DAG); |
6925 | case ISD::VP_IS_FPCLASS: |
6926 | return LowerIS_FPCLASS(Op, DAG); |
6927 | case ISD::VP_SIGN_EXTEND: |
6928 | case ISD::VP_ZERO_EXTEND: |
6929 | if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1) |
6930 | return lowerVPExtMaskOp(Op, DAG); |
6931 | return lowerVPOp(Op, DAG); |
6932 | case ISD::VP_TRUNCATE: |
6933 | return lowerVectorTruncLike(Op, DAG); |
6934 | case ISD::VP_FP_EXTEND: |
6935 | case ISD::VP_FP_ROUND: |
6936 | return lowerVectorFPExtendOrRoundLike(Op, DAG); |
6937 | case ISD::VP_SINT_TO_FP: |
6938 | case ISD::VP_UINT_TO_FP: |
6939 | if (Op.getValueType().isVector() && |
6940 | Op.getValueType().getScalarType() == MVT::f16 && |
6941 | (Subtarget.hasVInstructionsF16Minimal() && |
6942 | !Subtarget.hasVInstructionsF16())) { |
6943 | if (Op.getValueType() == MVT::nxv32f16) |
6944 | return SplitVPOp(Op, DAG); |
6945 | // int -> f32 |
6946 | SDLoc DL(Op); |
6947 | MVT NVT = |
6948 | MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()); |
6949 | auto NC = DAG.getNode(Opcode: Op.getOpcode(), DL, VT: NVT, Ops: Op->ops()); |
6950 | // f32 -> f16 |
6951 | return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: Op.getValueType(), N1: NC, |
6952 | N2: DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true)); |
6953 | } |
6954 | [[fallthrough]]; |
6955 | case ISD::VP_FP_TO_SINT: |
6956 | case ISD::VP_FP_TO_UINT: |
6957 | if (SDValue Op1 = Op.getOperand(i: 0); |
6958 | Op1.getValueType().isVector() && |
6959 | Op1.getValueType().getScalarType() == MVT::f16 && |
6960 | (Subtarget.hasVInstructionsF16Minimal() && |
6961 | !Subtarget.hasVInstructionsF16())) { |
6962 | if (Op1.getValueType() == MVT::nxv32f16) |
6963 | return SplitVPOp(Op, DAG); |
6964 | // f16 -> f32 |
6965 | SDLoc DL(Op); |
6966 | MVT NVT = MVT::getVectorVT(MVT::f32, |
6967 | Op1.getValueType().getVectorElementCount()); |
6968 | SDValue WidenVec = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: NVT, Operand: Op1); |
6969 | // f32 -> int |
6970 | return DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(), |
6971 | Ops: {WidenVec, Op.getOperand(i: 1), Op.getOperand(i: 2)}); |
6972 | } |
6973 | return lowerVPFPIntConvOp(Op, DAG); |
6974 | case ISD::VP_SETCC: |
6975 | if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 && |
6976 | (Subtarget.hasVInstructionsF16Minimal() && |
6977 | !Subtarget.hasVInstructionsF16())) |
6978 | return SplitVPOp(Op, DAG); |
6979 | if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1) |
6980 | return lowerVPSetCCMaskOp(Op, DAG); |
6981 | [[fallthrough]]; |
6982 | case ISD::VP_SMIN: |
6983 | case ISD::VP_SMAX: |
6984 | case ISD::VP_UMIN: |
6985 | case ISD::VP_UMAX: |
6986 | case ISD::VP_BITREVERSE: |
6987 | case ISD::VP_BSWAP: |
6988 | return lowerVPOp(Op, DAG); |
6989 | case ISD::VP_CTLZ: |
6990 | case ISD::VP_CTLZ_ZERO_UNDEF: |
6991 | if (Subtarget.hasStdExtZvbb()) |
6992 | return lowerVPOp(Op, DAG); |
6993 | return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); |
6994 | case ISD::VP_CTTZ: |
6995 | case ISD::VP_CTTZ_ZERO_UNDEF: |
6996 | if (Subtarget.hasStdExtZvbb()) |
6997 | return lowerVPOp(Op, DAG); |
6998 | return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); |
6999 | case ISD::VP_CTPOP: |
7000 | return lowerVPOp(Op, DAG); |
7001 | case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: |
7002 | return lowerVPStridedLoad(Op, DAG); |
7003 | case ISD::EXPERIMENTAL_VP_STRIDED_STORE: |
7004 | return lowerVPStridedStore(Op, DAG); |
7005 | case ISD::VP_FCEIL: |
7006 | case ISD::VP_FFLOOR: |
7007 | case ISD::VP_FRINT: |
7008 | case ISD::VP_FNEARBYINT: |
7009 | case ISD::VP_FROUND: |
7010 | case ISD::VP_FROUNDEVEN: |
7011 | case ISD::VP_FROUNDTOZERO: |
7012 | if (Op.getValueType() == MVT::nxv32f16 && |
7013 | (Subtarget.hasVInstructionsF16Minimal() && |
7014 | !Subtarget.hasVInstructionsF16())) |
7015 | return SplitVPOp(Op, DAG); |
7016 | return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); |
7017 | case ISD::VP_FMAXIMUM: |
7018 | case ISD::VP_FMINIMUM: |
7019 | if (Op.getValueType() == MVT::nxv32f16 && |
7020 | (Subtarget.hasVInstructionsF16Minimal() && |
7021 | !Subtarget.hasVInstructionsF16())) |
7022 | return SplitVPOp(Op, DAG); |
7023 | return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget); |
7024 | case ISD::EXPERIMENTAL_VP_SPLICE: |
7025 | return lowerVPSpliceExperimental(Op, DAG); |
7026 | case ISD::EXPERIMENTAL_VP_REVERSE: |
7027 | return lowerVPReverseExperimental(Op, DAG); |
7028 | } |
7029 | } |
7030 | |
7031 | static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, |
7032 | SelectionDAG &DAG, unsigned Flags) { |
7033 | return DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: Flags); |
7034 | } |
7035 | |
7036 | static SDValue getTargetNode(BlockAddressSDNode *N, const SDLoc &DL, EVT Ty, |
7037 | SelectionDAG &DAG, unsigned Flags) { |
7038 | return DAG.getTargetBlockAddress(BA: N->getBlockAddress(), VT: Ty, Offset: N->getOffset(), |
7039 | TargetFlags: Flags); |
7040 | } |
7041 | |
7042 | static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, |
7043 | SelectionDAG &DAG, unsigned Flags) { |
7044 | return DAG.getTargetConstantPool(C: N->getConstVal(), VT: Ty, Align: N->getAlign(), |
7045 | Offset: N->getOffset(), TargetFlags: Flags); |
7046 | } |
7047 | |
7048 | static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty, |
7049 | SelectionDAG &DAG, unsigned Flags) { |
7050 | return DAG.getTargetJumpTable(JTI: N->getIndex(), VT: Ty, TargetFlags: Flags); |
7051 | } |
7052 | |
7053 | template <class NodeTy> |
7054 | SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, |
7055 | bool IsLocal, bool IsExternWeak) const { |
7056 | SDLoc DL(N); |
7057 | EVT Ty = getPointerTy(DL: DAG.getDataLayout()); |
7058 | |
7059 | // When HWASAN is used and tagging of global variables is enabled |
7060 | // they should be accessed via the GOT, since the tagged address of a global |
7061 | // is incompatible with existing code models. This also applies to non-pic |
7062 | // mode. |
7063 | if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) { |
7064 | SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); |
7065 | if (IsLocal && !Subtarget.allowTaggedGlobals()) |
7066 | // Use PC-relative addressing to access the symbol. This generates the |
7067 | // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) |
7068 | // %pcrel_lo(auipc)). |
7069 | return DAG.getNode(Opcode: RISCVISD::LLA, DL, VT: Ty, Operand: Addr); |
7070 | |
7071 | // Use PC-relative addressing to access the GOT for this symbol, then load |
7072 | // the address from the GOT. This generates the pattern (PseudoLGA sym), |
7073 | // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). |
7074 | SDValue Load = |
7075 | SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0); |
7076 | MachineFunction &MF = DAG.getMachineFunction(); |
7077 | MachineMemOperand *MemOp = MF.getMachineMemOperand( |
7078 | PtrInfo: MachinePointerInfo::getGOT(MF), |
7079 | f: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | |
7080 | MachineMemOperand::MOInvariant, |
7081 | MemTy: LLT(Ty.getSimpleVT()), base_alignment: Align(Ty.getFixedSizeInBits() / 8)); |
7082 | DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp}); |
7083 | return Load; |
7084 | } |
7085 | |
7086 | switch (getTargetMachine().getCodeModel()) { |
7087 | default: |
7088 | report_fatal_error(reason: "Unsupported code model for lowering" ); |
7089 | case CodeModel::Small: { |
7090 | // Generate a sequence for accessing addresses within the first 2 GiB of |
7091 | // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). |
7092 | SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); |
7093 | SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); |
7094 | SDValue MNHi = DAG.getNode(Opcode: RISCVISD::HI, DL, VT: Ty, Operand: AddrHi); |
7095 | return DAG.getNode(Opcode: RISCVISD::ADD_LO, DL, VT: Ty, N1: MNHi, N2: AddrLo); |
7096 | } |
7097 | case CodeModel::Medium: { |
7098 | SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); |
7099 | if (IsExternWeak) { |
7100 | // An extern weak symbol may be undefined, i.e. have value 0, which may |
7101 | // not be within 2GiB of PC, so use GOT-indirect addressing to access the |
7102 | // symbol. This generates the pattern (PseudoLGA sym), which expands to |
7103 | // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). |
7104 | SDValue Load = |
7105 | SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0); |
7106 | MachineFunction &MF = DAG.getMachineFunction(); |
7107 | MachineMemOperand *MemOp = MF.getMachineMemOperand( |
7108 | PtrInfo: MachinePointerInfo::getGOT(MF), |
7109 | f: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | |
7110 | MachineMemOperand::MOInvariant, |
7111 | MemTy: LLT(Ty.getSimpleVT()), base_alignment: Align(Ty.getFixedSizeInBits() / 8)); |
7112 | DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp}); |
7113 | return Load; |
7114 | } |
7115 | |
7116 | // Generate a sequence for accessing addresses within any 2GiB range within |
7117 | // the address space. This generates the pattern (PseudoLLA sym), which |
7118 | // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). |
7119 | return DAG.getNode(Opcode: RISCVISD::LLA, DL, VT: Ty, Operand: Addr); |
7120 | } |
7121 | } |
7122 | } |
7123 | |
7124 | SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, |
7125 | SelectionDAG &DAG) const { |
7126 | GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op); |
7127 | assert(N->getOffset() == 0 && "unexpected offset in global node" ); |
7128 | const GlobalValue *GV = N->getGlobal(); |
7129 | return getAddr(N, DAG, IsLocal: GV->isDSOLocal(), IsExternWeak: GV->hasExternalWeakLinkage()); |
7130 | } |
7131 | |
7132 | SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, |
7133 | SelectionDAG &DAG) const { |
7134 | BlockAddressSDNode *N = cast<BlockAddressSDNode>(Val&: Op); |
7135 | |
7136 | return getAddr(N, DAG); |
7137 | } |
7138 | |
7139 | SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, |
7140 | SelectionDAG &DAG) const { |
7141 | ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Val&: Op); |
7142 | |
7143 | return getAddr(N, DAG); |
7144 | } |
7145 | |
7146 | SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, |
7147 | SelectionDAG &DAG) const { |
7148 | JumpTableSDNode *N = cast<JumpTableSDNode>(Val&: Op); |
7149 | |
7150 | return getAddr(N, DAG); |
7151 | } |
7152 | |
7153 | SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, |
7154 | SelectionDAG &DAG, |
7155 | bool UseGOT) const { |
7156 | SDLoc DL(N); |
7157 | EVT Ty = getPointerTy(DL: DAG.getDataLayout()); |
7158 | const GlobalValue *GV = N->getGlobal(); |
7159 | MVT XLenVT = Subtarget.getXLenVT(); |
7160 | |
7161 | if (UseGOT) { |
7162 | // Use PC-relative addressing to access the GOT for this TLS symbol, then |
7163 | // load the address from the GOT and add the thread pointer. This generates |
7164 | // the pattern (PseudoLA_TLS_IE sym), which expands to |
7165 | // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). |
7166 | SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: 0); |
7167 | SDValue Load = |
7168 | SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); |
7169 | MachineFunction &MF = DAG.getMachineFunction(); |
7170 | MachineMemOperand *MemOp = MF.getMachineMemOperand( |
7171 | PtrInfo: MachinePointerInfo::getGOT(MF), |
7172 | f: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | |
7173 | MachineMemOperand::MOInvariant, |
7174 | MemTy: LLT(Ty.getSimpleVT()), base_alignment: Align(Ty.getFixedSizeInBits() / 8)); |
7175 | DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp}); |
7176 | |
7177 | // Add the thread pointer. |
7178 | SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); |
7179 | return DAG.getNode(Opcode: ISD::ADD, DL, VT: Ty, N1: Load, N2: TPReg); |
7180 | } |
7181 | |
7182 | // Generate a sequence for accessing the address relative to the thread |
7183 | // pointer, with the appropriate adjustment for the thread pointer offset. |
7184 | // This generates the pattern |
7185 | // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) |
7186 | SDValue AddrHi = |
7187 | DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: RISCVII::MO_TPREL_HI); |
7188 | SDValue AddrAdd = |
7189 | DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: RISCVII::MO_TPREL_ADD); |
7190 | SDValue AddrLo = |
7191 | DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: RISCVII::MO_TPREL_LO); |
7192 | |
7193 | SDValue MNHi = DAG.getNode(Opcode: RISCVISD::HI, DL, VT: Ty, Operand: AddrHi); |
7194 | SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); |
7195 | SDValue MNAdd = |
7196 | DAG.getNode(Opcode: RISCVISD::ADD_TPREL, DL, VT: Ty, N1: MNHi, N2: TPReg, N3: AddrAdd); |
7197 | return DAG.getNode(Opcode: RISCVISD::ADD_LO, DL, VT: Ty, N1: MNAdd, N2: AddrLo); |
7198 | } |
7199 | |
7200 | SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, |
7201 | SelectionDAG &DAG) const { |
7202 | SDLoc DL(N); |
7203 | EVT Ty = getPointerTy(DL: DAG.getDataLayout()); |
7204 | IntegerType *CallTy = Type::getIntNTy(C&: *DAG.getContext(), N: Ty.getSizeInBits()); |
7205 | const GlobalValue *GV = N->getGlobal(); |
7206 | |
7207 | // Use a PC-relative addressing mode to access the global dynamic GOT address. |
7208 | // This generates the pattern (PseudoLA_TLS_GD sym), which expands to |
7209 | // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). |
7210 | SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: 0); |
7211 | SDValue Load = |
7212 | SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); |
7213 | |
7214 | // Prepare argument list to generate call. |
7215 | ArgListTy Args; |
7216 | ArgListEntry Entry; |
7217 | Entry.Node = Load; |
7218 | Entry.Ty = CallTy; |
7219 | Args.push_back(x: Entry); |
7220 | |
7221 | // Setup call to __tls_get_addr. |
7222 | TargetLowering::CallLoweringInfo CLI(DAG); |
7223 | CLI.setDebugLoc(DL) |
7224 | .setChain(DAG.getEntryNode()) |
7225 | .setLibCallee(CC: CallingConv::C, ResultType: CallTy, |
7226 | Target: DAG.getExternalSymbol(Sym: "__tls_get_addr" , VT: Ty), |
7227 | ArgsList: std::move(Args)); |
7228 | |
7229 | return LowerCallTo(CLI).first; |
7230 | } |
7231 | |
7232 | SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N, |
7233 | SelectionDAG &DAG) const { |
7234 | SDLoc DL(N); |
7235 | EVT Ty = getPointerTy(DL: DAG.getDataLayout()); |
7236 | const GlobalValue *GV = N->getGlobal(); |
7237 | |
7238 | // Use a PC-relative addressing mode to access the global dynamic GOT address. |
7239 | // This generates the pattern (PseudoLA_TLSDESC sym), which expands to |
7240 | // |
7241 | // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol) |
7242 | // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label) |
7243 | // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label) |
7244 | // jalr t0, tY // R_RISCV_TLSDESC_CALL(label) |
7245 | SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: 0); |
7246 | return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0); |
7247 | } |
7248 | |
7249 | SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, |
7250 | SelectionDAG &DAG) const { |
7251 | GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op); |
7252 | assert(N->getOffset() == 0 && "unexpected offset in global node" ); |
7253 | |
7254 | if (DAG.getTarget().useEmulatedTLS()) |
7255 | return LowerToTLSEmulatedModel(GA: N, DAG); |
7256 | |
7257 | TLSModel::Model Model = getTargetMachine().getTLSModel(GV: N->getGlobal()); |
7258 | |
7259 | if (DAG.getMachineFunction().getFunction().getCallingConv() == |
7260 | CallingConv::GHC) |
7261 | report_fatal_error(reason: "In GHC calling convention TLS is not supported" ); |
7262 | |
7263 | SDValue Addr; |
7264 | switch (Model) { |
7265 | case TLSModel::LocalExec: |
7266 | Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); |
7267 | break; |
7268 | case TLSModel::InitialExec: |
7269 | Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); |
7270 | break; |
7271 | case TLSModel::LocalDynamic: |
7272 | case TLSModel::GeneralDynamic: |
7273 | Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG) |
7274 | : getDynamicTLSAddr(N, DAG); |
7275 | break; |
7276 | } |
7277 | |
7278 | return Addr; |
7279 | } |
7280 | |
7281 | // Return true if Val is equal to (setcc LHS, RHS, CC). |
7282 | // Return false if Val is the inverse of (setcc LHS, RHS, CC). |
7283 | // Otherwise, return std::nullopt. |
7284 | static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS, |
7285 | ISD::CondCode CC, SDValue Val) { |
7286 | assert(Val->getOpcode() == ISD::SETCC); |
7287 | SDValue LHS2 = Val.getOperand(i: 0); |
7288 | SDValue RHS2 = Val.getOperand(i: 1); |
7289 | ISD::CondCode CC2 = cast<CondCodeSDNode>(Val: Val.getOperand(i: 2))->get(); |
7290 | |
7291 | if (LHS == LHS2 && RHS == RHS2) { |
7292 | if (CC == CC2) |
7293 | return true; |
7294 | if (CC == ISD::getSetCCInverse(Operation: CC2, Type: LHS2.getValueType())) |
7295 | return false; |
7296 | } else if (LHS == RHS2 && RHS == LHS2) { |
7297 | CC2 = ISD::getSetCCSwappedOperands(Operation: CC2); |
7298 | if (CC == CC2) |
7299 | return true; |
7300 | if (CC == ISD::getSetCCInverse(Operation: CC2, Type: LHS2.getValueType())) |
7301 | return false; |
7302 | } |
7303 | |
7304 | return std::nullopt; |
7305 | } |
7306 | |
7307 | static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, |
7308 | const RISCVSubtarget &Subtarget) { |
7309 | SDValue CondV = N->getOperand(Num: 0); |
7310 | SDValue TrueV = N->getOperand(Num: 1); |
7311 | SDValue FalseV = N->getOperand(Num: 2); |
7312 | MVT VT = N->getSimpleValueType(ResNo: 0); |
7313 | SDLoc DL(N); |
7314 | |
7315 | if (!Subtarget.hasConditionalMoveFusion()) { |
7316 | // (select c, -1, y) -> -c | y |
7317 | if (isAllOnesConstant(V: TrueV)) { |
7318 | SDValue Neg = DAG.getNegative(Val: CondV, DL, VT); |
7319 | return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: DAG.getFreeze(V: FalseV)); |
7320 | } |
7321 | // (select c, y, -1) -> (c-1) | y |
7322 | if (isAllOnesConstant(V: FalseV)) { |
7323 | SDValue Neg = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, |
7324 | N2: DAG.getAllOnesConstant(DL, VT)); |
7325 | return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: DAG.getFreeze(V: TrueV)); |
7326 | } |
7327 | |
7328 | // (select c, 0, y) -> (c-1) & y |
7329 | if (isNullConstant(V: TrueV)) { |
7330 | SDValue Neg = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, |
7331 | N2: DAG.getAllOnesConstant(DL, VT)); |
7332 | return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: DAG.getFreeze(V: FalseV)); |
7333 | } |
7334 | // (select c, y, 0) -> -c & y |
7335 | if (isNullConstant(V: FalseV)) { |
7336 | SDValue Neg = DAG.getNegative(Val: CondV, DL, VT); |
7337 | return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: DAG.getFreeze(V: TrueV)); |
7338 | } |
7339 | } |
7340 | |
7341 | // select c, ~x, x --> xor -c, x |
7342 | if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV)) { |
7343 | const APInt &TrueVal = TrueV->getAsAPIntVal(); |
7344 | const APInt &FalseVal = FalseV->getAsAPIntVal(); |
7345 | if (~TrueVal == FalseVal) { |
7346 | SDValue Neg = DAG.getNegative(Val: CondV, DL, VT); |
7347 | return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Neg, N2: FalseV); |
7348 | } |
7349 | } |
7350 | |
7351 | // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops |
7352 | // when both truev and falsev are also setcc. |
7353 | if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC && |
7354 | FalseV.getOpcode() == ISD::SETCC) { |
7355 | SDValue LHS = CondV.getOperand(i: 0); |
7356 | SDValue RHS = CondV.getOperand(i: 1); |
7357 | ISD::CondCode CC = cast<CondCodeSDNode>(Val: CondV.getOperand(i: 2))->get(); |
7358 | |
7359 | // (select x, x, y) -> x | y |
7360 | // (select !x, x, y) -> x & y |
7361 | if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, Val: TrueV)) { |
7362 | return DAG.getNode(Opcode: *MatchResult ? ISD::OR : ISD::AND, DL, VT, N1: TrueV, |
7363 | N2: DAG.getFreeze(V: FalseV)); |
7364 | } |
7365 | // (select x, y, x) -> x & y |
7366 | // (select !x, y, x) -> x | y |
7367 | if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, Val: FalseV)) { |
7368 | return DAG.getNode(Opcode: *MatchResult ? ISD::AND : ISD::OR, DL, VT, |
7369 | N1: DAG.getFreeze(V: TrueV), N2: FalseV); |
7370 | } |
7371 | } |
7372 | |
7373 | return SDValue(); |
7374 | } |
7375 | |
7376 | // Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants |
7377 | // into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable. |
7378 | // For now we only consider transformation profitable if `binOp(c0, c1)` ends up |
7379 | // being `0` or `-1`. In such cases we can replace `select` with `and`. |
7380 | // TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize |
7381 | // than `c0`? |
7382 | static SDValue |
7383 | foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, |
7384 | const RISCVSubtarget &Subtarget) { |
7385 | if (Subtarget.hasShortForwardBranchOpt()) |
7386 | return SDValue(); |
7387 | |
7388 | unsigned SelOpNo = 0; |
7389 | SDValue Sel = BO->getOperand(Num: 0); |
7390 | if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) { |
7391 | SelOpNo = 1; |
7392 | Sel = BO->getOperand(Num: 1); |
7393 | } |
7394 | |
7395 | if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) |
7396 | return SDValue(); |
7397 | |
7398 | unsigned ConstSelOpNo = 1; |
7399 | unsigned OtherSelOpNo = 2; |
7400 | if (!dyn_cast<ConstantSDNode>(Val: Sel->getOperand(Num: ConstSelOpNo))) { |
7401 | ConstSelOpNo = 2; |
7402 | OtherSelOpNo = 1; |
7403 | } |
7404 | SDValue ConstSelOp = Sel->getOperand(Num: ConstSelOpNo); |
7405 | ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(Val&: ConstSelOp); |
7406 | if (!ConstSelOpNode || ConstSelOpNode->isOpaque()) |
7407 | return SDValue(); |
7408 | |
7409 | SDValue ConstBinOp = BO->getOperand(Num: SelOpNo ^ 1); |
7410 | ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(Val&: ConstBinOp); |
7411 | if (!ConstBinOpNode || ConstBinOpNode->isOpaque()) |
7412 | return SDValue(); |
7413 | |
7414 | SDLoc DL(Sel); |
7415 | EVT VT = BO->getValueType(ResNo: 0); |
7416 | |
7417 | SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp}; |
7418 | if (SelOpNo == 1) |
7419 | std::swap(a&: NewConstOps[0], b&: NewConstOps[1]); |
7420 | |
7421 | SDValue NewConstOp = |
7422 | DAG.FoldConstantArithmetic(Opcode: BO->getOpcode(), DL, VT, Ops: NewConstOps); |
7423 | if (!NewConstOp) |
7424 | return SDValue(); |
7425 | |
7426 | const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal(); |
7427 | if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes()) |
7428 | return SDValue(); |
7429 | |
7430 | SDValue OtherSelOp = Sel->getOperand(Num: OtherSelOpNo); |
7431 | SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp}; |
7432 | if (SelOpNo == 1) |
7433 | std::swap(a&: NewNonConstOps[0], b&: NewNonConstOps[1]); |
7434 | SDValue NewNonConstOp = DAG.getNode(Opcode: BO->getOpcode(), DL, VT, Ops: NewNonConstOps); |
7435 | |
7436 | SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp; |
7437 | SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp; |
7438 | return DAG.getSelect(DL, VT, Cond: Sel.getOperand(i: 0), LHS: NewT, RHS: NewF); |
7439 | } |
7440 | |
7441 | SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { |
7442 | SDValue CondV = Op.getOperand(i: 0); |
7443 | SDValue TrueV = Op.getOperand(i: 1); |
7444 | SDValue FalseV = Op.getOperand(i: 2); |
7445 | SDLoc DL(Op); |
7446 | MVT VT = Op.getSimpleValueType(); |
7447 | MVT XLenVT = Subtarget.getXLenVT(); |
7448 | |
7449 | // Lower vector SELECTs to VSELECTs by splatting the condition. |
7450 | if (VT.isVector()) { |
7451 | MVT SplatCondVT = VT.changeVectorElementType(MVT::i1); |
7452 | SDValue CondSplat = DAG.getSplat(VT: SplatCondVT, DL, Op: CondV); |
7453 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: CondSplat, N2: TrueV, N3: FalseV); |
7454 | } |
7455 | |
7456 | // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ |
7457 | // nodes to implement the SELECT. Performing the lowering here allows for |
7458 | // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless |
7459 | // sequence or RISCVISD::SELECT_CC node (branch-based select). |
7460 | if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) && |
7461 | VT.isScalarInteger()) { |
7462 | // (select c, t, 0) -> (czero_eqz t, c) |
7463 | if (isNullConstant(V: FalseV)) |
7464 | return DAG.getNode(Opcode: RISCVISD::CZERO_EQZ, DL, VT, N1: TrueV, N2: CondV); |
7465 | // (select c, 0, f) -> (czero_nez f, c) |
7466 | if (isNullConstant(V: TrueV)) |
7467 | return DAG.getNode(Opcode: RISCVISD::CZERO_NEZ, DL, VT, N1: FalseV, N2: CondV); |
7468 | |
7469 | // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c)) |
7470 | if (TrueV.getOpcode() == ISD::AND && |
7471 | (TrueV.getOperand(i: 0) == FalseV || TrueV.getOperand(i: 1) == FalseV)) |
7472 | return DAG.getNode( |
7473 | Opcode: ISD::OR, DL, VT, N1: TrueV, |
7474 | N2: DAG.getNode(Opcode: RISCVISD::CZERO_NEZ, DL, VT, N1: FalseV, N2: CondV)); |
7475 | // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x)) |
7476 | if (FalseV.getOpcode() == ISD::AND && |
7477 | (FalseV.getOperand(i: 0) == TrueV || FalseV.getOperand(i: 1) == TrueV)) |
7478 | return DAG.getNode( |
7479 | Opcode: ISD::OR, DL, VT, N1: FalseV, |
7480 | N2: DAG.getNode(Opcode: RISCVISD::CZERO_EQZ, DL, VT, N1: TrueV, N2: CondV)); |
7481 | |
7482 | // Try some other optimizations before falling back to generic lowering. |
7483 | if (SDValue V = combineSelectToBinOp(N: Op.getNode(), DAG, Subtarget)) |
7484 | return V; |
7485 | |
7486 | // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1) |
7487 | // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2) |
7488 | if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV)) { |
7489 | const APInt &TrueVal = TrueV->getAsAPIntVal(); |
7490 | const APInt &FalseVal = FalseV->getAsAPIntVal(); |
7491 | const int TrueValCost = RISCVMatInt::getIntMatCost( |
7492 | TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true); |
7493 | const int FalseValCost = RISCVMatInt::getIntMatCost( |
7494 | FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true); |
7495 | bool IsCZERO_NEZ = TrueValCost <= FalseValCost; |
7496 | SDValue LHSVal = DAG.getConstant( |
7497 | Val: IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT); |
7498 | SDValue RHSVal = |
7499 | DAG.getConstant(Val: IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT); |
7500 | SDValue CMOV = |
7501 | DAG.getNode(Opcode: IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ, |
7502 | DL, VT, N1: LHSVal, N2: CondV); |
7503 | return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CMOV, N2: RHSVal); |
7504 | } |
7505 | |
7506 | // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c)) |
7507 | // Unless we have the short forward branch optimization. |
7508 | if (!Subtarget.hasConditionalMoveFusion()) |
7509 | return DAG.getNode( |
7510 | Opcode: ISD::OR, DL, VT, |
7511 | N1: DAG.getNode(Opcode: RISCVISD::CZERO_EQZ, DL, VT, N1: TrueV, N2: CondV), |
7512 | N2: DAG.getNode(Opcode: RISCVISD::CZERO_NEZ, DL, VT, N1: FalseV, N2: CondV)); |
7513 | } |
7514 | |
7515 | if (SDValue V = combineSelectToBinOp(N: Op.getNode(), DAG, Subtarget)) |
7516 | return V; |
7517 | |
7518 | if (Op.hasOneUse()) { |
7519 | unsigned UseOpc = Op->use_begin()->getOpcode(); |
7520 | if (isBinOp(Opcode: UseOpc) && DAG.isSafeToSpeculativelyExecute(Opcode: UseOpc)) { |
7521 | SDNode *BinOp = *Op->use_begin(); |
7522 | if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(BO: *Op->use_begin(), |
7523 | DAG, Subtarget)) { |
7524 | DAG.ReplaceAllUsesWith(From: BinOp, To: &NewSel); |
7525 | return lowerSELECT(Op: NewSel, DAG); |
7526 | } |
7527 | } |
7528 | } |
7529 | |
7530 | // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc)) |
7531 | // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1))) |
7532 | const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(Val&: TrueV); |
7533 | const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(Val&: FalseV); |
7534 | if (FPTV && FPFV) { |
7535 | if (FPTV->isExactlyValue(V: 1.0) && FPFV->isExactlyValue(V: 0.0)) |
7536 | return DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT, Operand: CondV); |
7537 | if (FPTV->isExactlyValue(V: 0.0) && FPFV->isExactlyValue(V: 1.0)) { |
7538 | SDValue XOR = DAG.getNode(Opcode: ISD::XOR, DL, VT: XLenVT, N1: CondV, |
7539 | N2: DAG.getConstant(Val: 1, DL, VT: XLenVT)); |
7540 | return DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT, Operand: XOR); |
7541 | } |
7542 | } |
7543 | |
7544 | // If the condition is not an integer SETCC which operates on XLenVT, we need |
7545 | // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.: |
7546 | // (select condv, truev, falsev) |
7547 | // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) |
7548 | if (CondV.getOpcode() != ISD::SETCC || |
7549 | CondV.getOperand(i: 0).getSimpleValueType() != XLenVT) { |
7550 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT: XLenVT); |
7551 | SDValue SetNE = DAG.getCondCode(Cond: ISD::SETNE); |
7552 | |
7553 | SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; |
7554 | |
7555 | return DAG.getNode(Opcode: RISCVISD::SELECT_CC, DL, VT, Ops); |
7556 | } |
7557 | |
7558 | // If the CondV is the output of a SETCC node which operates on XLenVT inputs, |
7559 | // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take |
7560 | // advantage of the integer compare+branch instructions. i.e.: |
7561 | // (select (setcc lhs, rhs, cc), truev, falsev) |
7562 | // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) |
7563 | SDValue LHS = CondV.getOperand(i: 0); |
7564 | SDValue RHS = CondV.getOperand(i: 1); |
7565 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: CondV.getOperand(i: 2))->get(); |
7566 | |
7567 | // Special case for a select of 2 constants that have a diffence of 1. |
7568 | // Normally this is done by DAGCombine, but if the select is introduced by |
7569 | // type legalization or op legalization, we miss it. Restricting to SETLT |
7570 | // case for now because that is what signed saturating add/sub need. |
7571 | // FIXME: We don't need the condition to be SETLT or even a SETCC, |
7572 | // but we would probably want to swap the true/false values if the condition |
7573 | // is SETGE/SETLE to avoid an XORI. |
7574 | if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV) && |
7575 | CCVal == ISD::SETLT) { |
7576 | const APInt &TrueVal = TrueV->getAsAPIntVal(); |
7577 | const APInt &FalseVal = FalseV->getAsAPIntVal(); |
7578 | if (TrueVal - 1 == FalseVal) |
7579 | return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: FalseV); |
7580 | if (TrueVal + 1 == FalseVal) |
7581 | return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: FalseV, N2: CondV); |
7582 | } |
7583 | |
7584 | translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG); |
7585 | // 1 < x ? x : 1 -> 0 < x ? x : 1 |
7586 | if (isOneConstant(V: LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) && |
7587 | RHS == TrueV && LHS == FalseV) { |
7588 | LHS = DAG.getConstant(Val: 0, DL, VT); |
7589 | // 0 <u x is the same as x != 0. |
7590 | if (CCVal == ISD::SETULT) { |
7591 | std::swap(a&: LHS, b&: RHS); |
7592 | CCVal = ISD::SETNE; |
7593 | } |
7594 | } |
7595 | |
7596 | // x <s -1 ? x : -1 -> x <s 0 ? x : -1 |
7597 | if (isAllOnesConstant(V: RHS) && CCVal == ISD::SETLT && LHS == TrueV && |
7598 | RHS == FalseV) { |
7599 | RHS = DAG.getConstant(Val: 0, DL, VT); |
7600 | } |
7601 | |
7602 | SDValue TargetCC = DAG.getCondCode(Cond: CCVal); |
7603 | |
7604 | if (isa<ConstantSDNode>(Val: TrueV) && !isa<ConstantSDNode>(Val: FalseV)) { |
7605 | // (select (setcc lhs, rhs, CC), constant, falsev) |
7606 | // -> (select (setcc lhs, rhs, InverseCC), falsev, constant) |
7607 | std::swap(a&: TrueV, b&: FalseV); |
7608 | TargetCC = DAG.getCondCode(Cond: ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType())); |
7609 | } |
7610 | |
7611 | SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; |
7612 | return DAG.getNode(Opcode: RISCVISD::SELECT_CC, DL, VT, Ops); |
7613 | } |
7614 | |
7615 | SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const { |
7616 | SDValue CondV = Op.getOperand(i: 1); |
7617 | SDLoc DL(Op); |
7618 | MVT XLenVT = Subtarget.getXLenVT(); |
7619 | |
7620 | if (CondV.getOpcode() == ISD::SETCC && |
7621 | CondV.getOperand(i: 0).getValueType() == XLenVT) { |
7622 | SDValue LHS = CondV.getOperand(i: 0); |
7623 | SDValue RHS = CondV.getOperand(i: 1); |
7624 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: CondV.getOperand(i: 2))->get(); |
7625 | |
7626 | translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG); |
7627 | |
7628 | SDValue TargetCC = DAG.getCondCode(Cond: CCVal); |
7629 | return DAG.getNode(Opcode: RISCVISD::BR_CC, DL, VT: Op.getValueType(), N1: Op.getOperand(i: 0), |
7630 | N2: LHS, N3: RHS, N4: TargetCC, N5: Op.getOperand(i: 2)); |
7631 | } |
7632 | |
7633 | return DAG.getNode(Opcode: RISCVISD::BR_CC, DL, VT: Op.getValueType(), N1: Op.getOperand(i: 0), |
7634 | N2: CondV, N3: DAG.getConstant(Val: 0, DL, VT: XLenVT), |
7635 | N4: DAG.getCondCode(Cond: ISD::SETNE), N5: Op.getOperand(i: 2)); |
7636 | } |
7637 | |
7638 | SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { |
7639 | MachineFunction &MF = DAG.getMachineFunction(); |
7640 | RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); |
7641 | |
7642 | SDLoc DL(Op); |
7643 | SDValue FI = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), |
7644 | VT: getPointerTy(DL: MF.getDataLayout())); |
7645 | |
7646 | // vastart just stores the address of the VarArgsFrameIndex slot into the |
7647 | // memory location argument. |
7648 | const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue(); |
7649 | return DAG.getStore(Chain: Op.getOperand(i: 0), dl: DL, Val: FI, Ptr: Op.getOperand(i: 1), |
7650 | PtrInfo: MachinePointerInfo(SV)); |
7651 | } |
7652 | |
7653 | SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, |
7654 | SelectionDAG &DAG) const { |
7655 | const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); |
7656 | MachineFunction &MF = DAG.getMachineFunction(); |
7657 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
7658 | MFI.setFrameAddressIsTaken(true); |
7659 | Register FrameReg = RI.getFrameRegister(MF); |
7660 | int XLenInBytes = Subtarget.getXLen() / 8; |
7661 | |
7662 | EVT VT = Op.getValueType(); |
7663 | SDLoc DL(Op); |
7664 | SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: FrameReg, VT); |
7665 | unsigned Depth = Op.getConstantOperandVal(i: 0); |
7666 | while (Depth--) { |
7667 | int Offset = -(XLenInBytes * 2); |
7668 | SDValue Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr, |
7669 | N2: DAG.getIntPtrConstant(Val: Offset, DL)); |
7670 | FrameAddr = |
7671 | DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(), Ptr, PtrInfo: MachinePointerInfo()); |
7672 | } |
7673 | return FrameAddr; |
7674 | } |
7675 | |
7676 | SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, |
7677 | SelectionDAG &DAG) const { |
7678 | const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); |
7679 | MachineFunction &MF = DAG.getMachineFunction(); |
7680 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
7681 | MFI.setReturnAddressIsTaken(true); |
7682 | MVT XLenVT = Subtarget.getXLenVT(); |
7683 | int XLenInBytes = Subtarget.getXLen() / 8; |
7684 | |
7685 | if (verifyReturnAddressArgumentIsConstant(Op, DAG)) |
7686 | return SDValue(); |
7687 | |
7688 | EVT VT = Op.getValueType(); |
7689 | SDLoc DL(Op); |
7690 | unsigned Depth = Op.getConstantOperandVal(i: 0); |
7691 | if (Depth) { |
7692 | int Off = -XLenInBytes; |
7693 | SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); |
7694 | SDValue Offset = DAG.getConstant(Val: Off, DL, VT); |
7695 | return DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(), |
7696 | Ptr: DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr, N2: Offset), |
7697 | PtrInfo: MachinePointerInfo()); |
7698 | } |
7699 | |
7700 | // Return the value of the return address register, marking it an implicit |
7701 | // live-in. |
7702 | Register Reg = MF.addLiveIn(PReg: RI.getRARegister(), RC: getRegClassFor(VT: XLenVT)); |
7703 | return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg, VT: XLenVT); |
7704 | } |
7705 | |
7706 | SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, |
7707 | SelectionDAG &DAG) const { |
7708 | SDLoc DL(Op); |
7709 | SDValue Lo = Op.getOperand(i: 0); |
7710 | SDValue Hi = Op.getOperand(i: 1); |
7711 | SDValue Shamt = Op.getOperand(i: 2); |
7712 | EVT VT = Lo.getValueType(); |
7713 | |
7714 | // if Shamt-XLEN < 0: // Shamt < XLEN |
7715 | // Lo = Lo << Shamt |
7716 | // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) |
7717 | // else: |
7718 | // Lo = 0 |
7719 | // Hi = Lo << (Shamt-XLEN) |
7720 | |
7721 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT); |
7722 | SDValue One = DAG.getConstant(Val: 1, DL, VT); |
7723 | SDValue MinusXLen = DAG.getConstant(Val: -(int)Subtarget.getXLen(), DL, VT); |
7724 | SDValue XLenMinus1 = DAG.getConstant(Val: Subtarget.getXLen() - 1, DL, VT); |
7725 | SDValue ShamtMinusXLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusXLen); |
7726 | SDValue XLenMinus1Shamt = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: XLenMinus1, N2: Shamt); |
7727 | |
7728 | SDValue LoTrue = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: Shamt); |
7729 | SDValue ShiftRight1Lo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: One); |
7730 | SDValue ShiftRightLo = |
7731 | DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShiftRight1Lo, N2: XLenMinus1Shamt); |
7732 | SDValue ShiftLeftHi = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: Shamt); |
7733 | SDValue HiTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftLeftHi, N2: ShiftRightLo); |
7734 | SDValue HiFalse = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: ShamtMinusXLen); |
7735 | |
7736 | SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusXLen, RHS: Zero, Cond: ISD::SETLT); |
7737 | |
7738 | Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: Zero); |
7739 | Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse); |
7740 | |
7741 | SDValue Parts[2] = {Lo, Hi}; |
7742 | return DAG.getMergeValues(Ops: Parts, dl: DL); |
7743 | } |
7744 | |
7745 | SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, |
7746 | bool IsSRA) const { |
7747 | SDLoc DL(Op); |
7748 | SDValue Lo = Op.getOperand(i: 0); |
7749 | SDValue Hi = Op.getOperand(i: 1); |
7750 | SDValue Shamt = Op.getOperand(i: 2); |
7751 | EVT VT = Lo.getValueType(); |
7752 | |
7753 | // SRA expansion: |
7754 | // if Shamt-XLEN < 0: // Shamt < XLEN |
7755 | // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt)) |
7756 | // Hi = Hi >>s Shamt |
7757 | // else: |
7758 | // Lo = Hi >>s (Shamt-XLEN); |
7759 | // Hi = Hi >>s (XLEN-1) |
7760 | // |
7761 | // SRL expansion: |
7762 | // if Shamt-XLEN < 0: // Shamt < XLEN |
7763 | // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt)) |
7764 | // Hi = Hi >>u Shamt |
7765 | // else: |
7766 | // Lo = Hi >>u (Shamt-XLEN); |
7767 | // Hi = 0; |
7768 | |
7769 | unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; |
7770 | |
7771 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT); |
7772 | SDValue One = DAG.getConstant(Val: 1, DL, VT); |
7773 | SDValue MinusXLen = DAG.getConstant(Val: -(int)Subtarget.getXLen(), DL, VT); |
7774 | SDValue XLenMinus1 = DAG.getConstant(Val: Subtarget.getXLen() - 1, DL, VT); |
7775 | SDValue ShamtMinusXLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusXLen); |
7776 | SDValue XLenMinus1Shamt = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: XLenMinus1, N2: Shamt); |
7777 | |
7778 | SDValue ShiftRightLo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: Shamt); |
7779 | SDValue ShiftLeftHi1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: One); |
7780 | SDValue ShiftLeftHi = |
7781 | DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShiftLeftHi1, N2: XLenMinus1Shamt); |
7782 | SDValue LoTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftRightLo, N2: ShiftLeftHi); |
7783 | SDValue HiTrue = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: Shamt); |
7784 | SDValue LoFalse = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: ShamtMinusXLen); |
7785 | SDValue HiFalse = |
7786 | IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Hi, N2: XLenMinus1) : Zero; |
7787 | |
7788 | SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusXLen, RHS: Zero, Cond: ISD::SETLT); |
7789 | |
7790 | Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: LoFalse); |
7791 | Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse); |
7792 | |
7793 | SDValue Parts[2] = {Lo, Hi}; |
7794 | return DAG.getMergeValues(Ops: Parts, dl: DL); |
7795 | } |
7796 | |
7797 | // Lower splats of i1 types to SETCC. For each mask vector type, we have a |
7798 | // legal equivalently-sized i8 type, so we can use that as a go-between. |
7799 | SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op, |
7800 | SelectionDAG &DAG) const { |
7801 | SDLoc DL(Op); |
7802 | MVT VT = Op.getSimpleValueType(); |
7803 | SDValue SplatVal = Op.getOperand(i: 0); |
7804 | // All-zeros or all-ones splats are handled specially. |
7805 | if (ISD::isConstantSplatVectorAllOnes(N: Op.getNode())) { |
7806 | SDValue VL = getDefaultScalableVLOps(VecVT: VT, DL, DAG, Subtarget).second; |
7807 | return DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT, Operand: VL); |
7808 | } |
7809 | if (ISD::isConstantSplatVectorAllZeros(N: Op.getNode())) { |
7810 | SDValue VL = getDefaultScalableVLOps(VecVT: VT, DL, DAG, Subtarget).second; |
7811 | return DAG.getNode(Opcode: RISCVISD::VMCLR_VL, DL, VT, Operand: VL); |
7812 | } |
7813 | MVT InterVT = VT.changeVectorElementType(MVT::i8); |
7814 | SplatVal = DAG.getNode(Opcode: ISD::AND, DL, VT: SplatVal.getValueType(), N1: SplatVal, |
7815 | N2: DAG.getConstant(Val: 1, DL, VT: SplatVal.getValueType())); |
7816 | SDValue LHS = DAG.getSplatVector(VT: InterVT, DL, Op: SplatVal); |
7817 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT: InterVT); |
7818 | return DAG.getSetCC(DL, VT, LHS, RHS: Zero, Cond: ISD::SETNE); |
7819 | } |
7820 | |
7821 | // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is |
7822 | // illegal (currently only vXi64 RV32). |
7823 | // FIXME: We could also catch non-constant sign-extended i32 values and lower |
7824 | // them to VMV_V_X_VL. |
7825 | SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op, |
7826 | SelectionDAG &DAG) const { |
7827 | SDLoc DL(Op); |
7828 | MVT VecVT = Op.getSimpleValueType(); |
7829 | assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 && |
7830 | "Unexpected SPLAT_VECTOR_PARTS lowering" ); |
7831 | |
7832 | assert(Op.getNumOperands() == 2 && "Unexpected number of operands!" ); |
7833 | SDValue Lo = Op.getOperand(i: 0); |
7834 | SDValue Hi = Op.getOperand(i: 1); |
7835 | |
7836 | MVT ContainerVT = VecVT; |
7837 | if (VecVT.isFixedLengthVector()) |
7838 | ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
7839 | |
7840 | auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second; |
7841 | |
7842 | SDValue Res = |
7843 | splatPartsI64WithVL(DL, VT: ContainerVT, Passthru: SDValue(), Lo, Hi, VL, DAG); |
7844 | |
7845 | if (VecVT.isFixedLengthVector()) |
7846 | Res = convertFromScalableVector(VT: VecVT, V: Res, DAG, Subtarget); |
7847 | |
7848 | return Res; |
7849 | } |
7850 | |
7851 | // Custom-lower extensions from mask vectors by using a vselect either with 1 |
7852 | // for zero/any-extension or -1 for sign-extension: |
7853 | // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) |
7854 | // Note that any-extension is lowered identically to zero-extension. |
7855 | SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, |
7856 | int64_t ExtTrueVal) const { |
7857 | SDLoc DL(Op); |
7858 | MVT VecVT = Op.getSimpleValueType(); |
7859 | SDValue Src = Op.getOperand(i: 0); |
7860 | // Only custom-lower extensions from mask types |
7861 | assert(Src.getValueType().isVector() && |
7862 | Src.getValueType().getVectorElementType() == MVT::i1); |
7863 | |
7864 | if (VecVT.isScalableVector()) { |
7865 | SDValue SplatZero = DAG.getConstant(Val: 0, DL, VT: VecVT); |
7866 | SDValue SplatTrueVal = DAG.getConstant(Val: ExtTrueVal, DL, VT: VecVT); |
7867 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: VecVT, N1: Src, N2: SplatTrueVal, N3: SplatZero); |
7868 | } |
7869 | |
7870 | MVT ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
7871 | MVT I1ContainerVT = |
7872 | MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); |
7873 | |
7874 | SDValue CC = convertToScalableVector(VT: I1ContainerVT, V: Src, DAG, Subtarget); |
7875 | |
7876 | SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second; |
7877 | |
7878 | MVT XLenVT = Subtarget.getXLenVT(); |
7879 | SDValue SplatZero = DAG.getConstant(Val: 0, DL, VT: XLenVT); |
7880 | SDValue SplatTrueVal = DAG.getConstant(Val: ExtTrueVal, DL, VT: XLenVT); |
7881 | |
7882 | SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, |
7883 | N1: DAG.getUNDEF(VT: ContainerVT), N2: SplatZero, N3: VL); |
7884 | SplatTrueVal = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, |
7885 | N1: DAG.getUNDEF(VT: ContainerVT), N2: SplatTrueVal, N3: VL); |
7886 | SDValue Select = |
7887 | DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: CC, N2: SplatTrueVal, |
7888 | N3: SplatZero, N4: DAG.getUNDEF(VT: ContainerVT), N5: VL); |
7889 | |
7890 | return convertFromScalableVector(VT: VecVT, V: Select, DAG, Subtarget); |
7891 | } |
7892 | |
7893 | SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV( |
7894 | SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const { |
7895 | MVT ExtVT = Op.getSimpleValueType(); |
7896 | // Only custom-lower extensions from fixed-length vector types. |
7897 | if (!ExtVT.isFixedLengthVector()) |
7898 | return Op; |
7899 | MVT VT = Op.getOperand(i: 0).getSimpleValueType(); |
7900 | // Grab the canonical container type for the extended type. Infer the smaller |
7901 | // type from that to ensure the same number of vector elements, as we know |
7902 | // the LMUL will be sufficient to hold the smaller type. |
7903 | MVT ContainerExtVT = getContainerForFixedLengthVector(VT: ExtVT); |
7904 | // Get the extended container type manually to ensure the same number of |
7905 | // vector elements between source and dest. |
7906 | MVT ContainerVT = MVT::getVectorVT(VT: VT.getVectorElementType(), |
7907 | EC: ContainerExtVT.getVectorElementCount()); |
7908 | |
7909 | SDValue Op1 = |
7910 | convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i: 0), DAG, Subtarget); |
7911 | |
7912 | SDLoc DL(Op); |
7913 | auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
7914 | |
7915 | SDValue Ext = DAG.getNode(Opcode: ExtendOpc, DL, VT: ContainerExtVT, N1: Op1, N2: Mask, N3: VL); |
7916 | |
7917 | return convertFromScalableVector(VT: ExtVT, V: Ext, DAG, Subtarget); |
7918 | } |
7919 | |
7920 | // Custom-lower truncations from vectors to mask vectors by using a mask and a |
7921 | // setcc operation: |
7922 | // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) |
7923 | SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op, |
7924 | SelectionDAG &DAG) const { |
7925 | bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE; |
7926 | SDLoc DL(Op); |
7927 | EVT MaskVT = Op.getValueType(); |
7928 | // Only expect to custom-lower truncations to mask types |
7929 | assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 && |
7930 | "Unexpected type for vector mask lowering" ); |
7931 | SDValue Src = Op.getOperand(i: 0); |
7932 | MVT VecVT = Src.getSimpleValueType(); |
7933 | SDValue Mask, VL; |
7934 | if (IsVPTrunc) { |
7935 | Mask = Op.getOperand(i: 1); |
7936 | VL = Op.getOperand(i: 2); |
7937 | } |
7938 | // If this is a fixed vector, we need to convert it to a scalable vector. |
7939 | MVT ContainerVT = VecVT; |
7940 | |
7941 | if (VecVT.isFixedLengthVector()) { |
7942 | ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
7943 | Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget); |
7944 | if (IsVPTrunc) { |
7945 | MVT MaskContainerVT = |
7946 | getContainerForFixedLengthVector(VT: Mask.getSimpleValueType()); |
7947 | Mask = convertToScalableVector(VT: MaskContainerVT, V: Mask, DAG, Subtarget); |
7948 | } |
7949 | } |
7950 | |
7951 | if (!IsVPTrunc) { |
7952 | std::tie(args&: Mask, args&: VL) = |
7953 | getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); |
7954 | } |
7955 | |
7956 | SDValue SplatOne = DAG.getConstant(Val: 1, DL, VT: Subtarget.getXLenVT()); |
7957 | SDValue SplatZero = DAG.getConstant(Val: 0, DL, VT: Subtarget.getXLenVT()); |
7958 | |
7959 | SplatOne = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, |
7960 | N1: DAG.getUNDEF(VT: ContainerVT), N2: SplatOne, N3: VL); |
7961 | SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, |
7962 | N1: DAG.getUNDEF(VT: ContainerVT), N2: SplatZero, N3: VL); |
7963 | |
7964 | MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); |
7965 | SDValue Trunc = DAG.getNode(Opcode: RISCVISD::AND_VL, DL, VT: ContainerVT, N1: Src, N2: SplatOne, |
7966 | N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL); |
7967 | Trunc = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: MaskContainerVT, |
7968 | Ops: {Trunc, SplatZero, DAG.getCondCode(Cond: ISD::SETNE), |
7969 | DAG.getUNDEF(VT: MaskContainerVT), Mask, VL}); |
7970 | if (MaskVT.isFixedLengthVector()) |
7971 | Trunc = convertFromScalableVector(VT: MaskVT, V: Trunc, DAG, Subtarget); |
7972 | return Trunc; |
7973 | } |
7974 | |
7975 | SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op, |
7976 | SelectionDAG &DAG) const { |
7977 | bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE; |
7978 | SDLoc DL(Op); |
7979 | |
7980 | MVT VT = Op.getSimpleValueType(); |
7981 | // Only custom-lower vector truncates |
7982 | assert(VT.isVector() && "Unexpected type for vector truncate lowering" ); |
7983 | |
7984 | // Truncates to mask types are handled differently |
7985 | if (VT.getVectorElementType() == MVT::i1) |
7986 | return lowerVectorMaskTruncLike(Op, DAG); |
7987 | |
7988 | // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary |
7989 | // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which |
7990 | // truncate by one power of two at a time. |
7991 | MVT DstEltVT = VT.getVectorElementType(); |
7992 | |
7993 | SDValue Src = Op.getOperand(i: 0); |
7994 | MVT SrcVT = Src.getSimpleValueType(); |
7995 | MVT SrcEltVT = SrcVT.getVectorElementType(); |
7996 | |
7997 | assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) && |
7998 | isPowerOf2_64(SrcEltVT.getSizeInBits()) && |
7999 | "Unexpected vector truncate lowering" ); |
8000 | |
8001 | MVT ContainerVT = SrcVT; |
8002 | SDValue Mask, VL; |
8003 | if (IsVPTrunc) { |
8004 | Mask = Op.getOperand(i: 1); |
8005 | VL = Op.getOperand(i: 2); |
8006 | } |
8007 | if (SrcVT.isFixedLengthVector()) { |
8008 | ContainerVT = getContainerForFixedLengthVector(VT: SrcVT); |
8009 | Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget); |
8010 | if (IsVPTrunc) { |
8011 | MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT); |
8012 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
8013 | } |
8014 | } |
8015 | |
8016 | SDValue Result = Src; |
8017 | if (!IsVPTrunc) { |
8018 | std::tie(args&: Mask, args&: VL) = |
8019 | getDefaultVLOps(VecVT: SrcVT, ContainerVT, DL, DAG, Subtarget); |
8020 | } |
8021 | |
8022 | LLVMContext &Context = *DAG.getContext(); |
8023 | const ElementCount Count = ContainerVT.getVectorElementCount(); |
8024 | do { |
8025 | SrcEltVT = MVT::getIntegerVT(BitWidth: SrcEltVT.getSizeInBits() / 2); |
8026 | EVT ResultVT = EVT::getVectorVT(Context, VT: SrcEltVT, EC: Count); |
8027 | Result = DAG.getNode(Opcode: RISCVISD::TRUNCATE_VECTOR_VL, DL, VT: ResultVT, N1: Result, |
8028 | N2: Mask, N3: VL); |
8029 | } while (SrcEltVT != DstEltVT); |
8030 | |
8031 | if (SrcVT.isFixedLengthVector()) |
8032 | Result = convertFromScalableVector(VT, V: Result, DAG, Subtarget); |
8033 | |
8034 | return Result; |
8035 | } |
8036 | |
8037 | SDValue |
8038 | RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op, |
8039 | SelectionDAG &DAG) const { |
8040 | SDLoc DL(Op); |
8041 | SDValue Chain = Op.getOperand(i: 0); |
8042 | SDValue Src = Op.getOperand(i: 1); |
8043 | MVT VT = Op.getSimpleValueType(); |
8044 | MVT SrcVT = Src.getSimpleValueType(); |
8045 | MVT ContainerVT = VT; |
8046 | if (VT.isFixedLengthVector()) { |
8047 | MVT SrcContainerVT = getContainerForFixedLengthVector(VT: SrcVT); |
8048 | ContainerVT = |
8049 | SrcContainerVT.changeVectorElementType(EltVT: VT.getVectorElementType()); |
8050 | Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget); |
8051 | } |
8052 | |
8053 | auto [Mask, VL] = getDefaultVLOps(VecVT: SrcVT, ContainerVT, DL, DAG, Subtarget); |
8054 | |
8055 | // RVV can only widen/truncate fp to types double/half the size as the source. |
8056 | if ((VT.getVectorElementType() == MVT::f64 && |
8057 | SrcVT.getVectorElementType() == MVT::f16) || |
8058 | (VT.getVectorElementType() == MVT::f16 && |
8059 | SrcVT.getVectorElementType() == MVT::f64)) { |
8060 | // For double rounding, the intermediate rounding should be round-to-odd. |
8061 | unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND |
8062 | ? RISCVISD::STRICT_FP_EXTEND_VL |
8063 | : RISCVISD::STRICT_VFNCVT_ROD_VL; |
8064 | MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); |
8065 | Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other), |
8066 | Chain, Src, Mask, VL); |
8067 | Chain = Src.getValue(R: 1); |
8068 | } |
8069 | |
8070 | unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND |
8071 | ? RISCVISD::STRICT_FP_EXTEND_VL |
8072 | : RISCVISD::STRICT_FP_ROUND_VL; |
8073 | SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), |
8074 | Chain, Src, Mask, VL); |
8075 | if (VT.isFixedLengthVector()) { |
8076 | // StrictFP operations have two result values. Their lowered result should |
8077 | // have same result count. |
8078 | SDValue SubVec = convertFromScalableVector(VT, V: Res, DAG, Subtarget); |
8079 | Res = DAG.getMergeValues(Ops: {SubVec, Res.getValue(R: 1)}, dl: DL); |
8080 | } |
8081 | return Res; |
8082 | } |
8083 | |
8084 | SDValue |
8085 | RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op, |
8086 | SelectionDAG &DAG) const { |
8087 | bool IsVP = |
8088 | Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND; |
8089 | bool IsExtend = |
8090 | Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND; |
8091 | // RVV can only do truncate fp to types half the size as the source. We |
8092 | // custom-lower f64->f16 rounds via RVV's round-to-odd float |
8093 | // conversion instruction. |
8094 | SDLoc DL(Op); |
8095 | MVT VT = Op.getSimpleValueType(); |
8096 | |
8097 | assert(VT.isVector() && "Unexpected type for vector truncate lowering" ); |
8098 | |
8099 | SDValue Src = Op.getOperand(i: 0); |
8100 | MVT SrcVT = Src.getSimpleValueType(); |
8101 | |
8102 | bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 || |
8103 | SrcVT.getVectorElementType() != MVT::f16); |
8104 | bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 || |
8105 | SrcVT.getVectorElementType() != MVT::f64); |
8106 | |
8107 | bool IsDirectConv = IsDirectExtend || IsDirectTrunc; |
8108 | |
8109 | // Prepare any fixed-length vector operands. |
8110 | MVT ContainerVT = VT; |
8111 | SDValue Mask, VL; |
8112 | if (IsVP) { |
8113 | Mask = Op.getOperand(i: 1); |
8114 | VL = Op.getOperand(i: 2); |
8115 | } |
8116 | if (VT.isFixedLengthVector()) { |
8117 | MVT SrcContainerVT = getContainerForFixedLengthVector(VT: SrcVT); |
8118 | ContainerVT = |
8119 | SrcContainerVT.changeVectorElementType(EltVT: VT.getVectorElementType()); |
8120 | Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget); |
8121 | if (IsVP) { |
8122 | MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT); |
8123 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
8124 | } |
8125 | } |
8126 | |
8127 | if (!IsVP) |
8128 | std::tie(args&: Mask, args&: VL) = |
8129 | getDefaultVLOps(VecVT: SrcVT, ContainerVT, DL, DAG, Subtarget); |
8130 | |
8131 | unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL; |
8132 | |
8133 | if (IsDirectConv) { |
8134 | Src = DAG.getNode(Opcode: ConvOpc, DL, VT: ContainerVT, N1: Src, N2: Mask, N3: VL); |
8135 | if (VT.isFixedLengthVector()) |
8136 | Src = convertFromScalableVector(VT, V: Src, DAG, Subtarget); |
8137 | return Src; |
8138 | } |
8139 | |
8140 | unsigned InterConvOpc = |
8141 | IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL; |
8142 | |
8143 | MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); |
8144 | SDValue IntermediateConv = |
8145 | DAG.getNode(Opcode: InterConvOpc, DL, VT: InterVT, N1: Src, N2: Mask, N3: VL); |
8146 | SDValue Result = |
8147 | DAG.getNode(Opcode: ConvOpc, DL, VT: ContainerVT, N1: IntermediateConv, N2: Mask, N3: VL); |
8148 | if (VT.isFixedLengthVector()) |
8149 | return convertFromScalableVector(VT, V: Result, DAG, Subtarget); |
8150 | return Result; |
8151 | } |
8152 | |
8153 | // Given a scalable vector type and an index into it, returns the type for the |
8154 | // smallest subvector that the index fits in. This can be used to reduce LMUL |
8155 | // for operations like vslidedown. |
8156 | // |
8157 | // E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32. |
8158 | static std::optional<MVT> |
8159 | getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, |
8160 | const RISCVSubtarget &Subtarget) { |
8161 | assert(VecVT.isScalableVector()); |
8162 | const unsigned EltSize = VecVT.getScalarSizeInBits(); |
8163 | const unsigned VectorBitsMin = Subtarget.getRealMinVLen(); |
8164 | const unsigned MinVLMAX = VectorBitsMin / EltSize; |
8165 | MVT SmallerVT; |
8166 | if (MaxIdx < MinVLMAX) |
8167 | SmallerVT = getLMUL1VT(VT: VecVT); |
8168 | else if (MaxIdx < MinVLMAX * 2) |
8169 | SmallerVT = getLMUL1VT(VT: VecVT).getDoubleNumVectorElementsVT(); |
8170 | else if (MaxIdx < MinVLMAX * 4) |
8171 | SmallerVT = getLMUL1VT(VT: VecVT) |
8172 | .getDoubleNumVectorElementsVT() |
8173 | .getDoubleNumVectorElementsVT(); |
8174 | if (!SmallerVT.isValid() || !VecVT.bitsGT(VT: SmallerVT)) |
8175 | return std::nullopt; |
8176 | return SmallerVT; |
8177 | } |
8178 | |
8179 | // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the |
8180 | // first position of a vector, and that vector is slid up to the insert index. |
8181 | // By limiting the active vector length to index+1 and merging with the |
8182 | // original vector (with an undisturbed tail policy for elements >= VL), we |
8183 | // achieve the desired result of leaving all elements untouched except the one |
8184 | // at VL-1, which is replaced with the desired value. |
8185 | SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, |
8186 | SelectionDAG &DAG) const { |
8187 | SDLoc DL(Op); |
8188 | MVT VecVT = Op.getSimpleValueType(); |
8189 | SDValue Vec = Op.getOperand(i: 0); |
8190 | SDValue Val = Op.getOperand(i: 1); |
8191 | SDValue Idx = Op.getOperand(i: 2); |
8192 | |
8193 | if (VecVT.getVectorElementType() == MVT::i1) { |
8194 | // FIXME: For now we just promote to an i8 vector and insert into that, |
8195 | // but this is probably not optimal. |
8196 | MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); |
8197 | Vec = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WideVT, Operand: Vec); |
8198 | Vec = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: WideVT, N1: Vec, N2: Val, N3: Idx); |
8199 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VecVT, Operand: Vec); |
8200 | } |
8201 | |
8202 | MVT ContainerVT = VecVT; |
8203 | // If the operand is a fixed-length vector, convert to a scalable one. |
8204 | if (VecVT.isFixedLengthVector()) { |
8205 | ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
8206 | Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget); |
8207 | } |
8208 | |
8209 | // If we know the index we're going to insert at, we can shrink Vec so that |
8210 | // we're performing the scalar inserts and slideup on a smaller LMUL. |
8211 | MVT OrigContainerVT = ContainerVT; |
8212 | SDValue OrigVec = Vec; |
8213 | SDValue AlignedIdx; |
8214 | if (auto *IdxC = dyn_cast<ConstantSDNode>(Val&: Idx)) { |
8215 | const unsigned OrigIdx = IdxC->getZExtValue(); |
8216 | // Do we know an upper bound on LMUL? |
8217 | if (auto ShrunkVT = getSmallestVTForIndex(VecVT: ContainerVT, MaxIdx: OrigIdx, |
8218 | DL, DAG, Subtarget)) { |
8219 | ContainerVT = *ShrunkVT; |
8220 | AlignedIdx = DAG.getVectorIdxConstant(Val: 0, DL); |
8221 | } |
8222 | |
8223 | // If we're compiling for an exact VLEN value, we can always perform |
8224 | // the insert in m1 as we can determine the register corresponding to |
8225 | // the index in the register group. |
8226 | const MVT M1VT = getLMUL1VT(VT: ContainerVT); |
8227 | if (auto VLEN = Subtarget.getRealVLen(); |
8228 | VLEN && ContainerVT.bitsGT(VT: M1VT)) { |
8229 | EVT ElemVT = VecVT.getVectorElementType(); |
8230 | unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits(); |
8231 | unsigned RemIdx = OrigIdx % ElemsPerVReg; |
8232 | unsigned SubRegIdx = OrigIdx / ElemsPerVReg; |
8233 | unsigned = |
8234 | SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue(); |
8235 | AlignedIdx = DAG.getVectorIdxConstant(Val: ExtractIdx, DL); |
8236 | Idx = DAG.getVectorIdxConstant(Val: RemIdx, DL); |
8237 | ContainerVT = M1VT; |
8238 | } |
8239 | |
8240 | if (AlignedIdx) |
8241 | Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ContainerVT, N1: Vec, |
8242 | N2: AlignedIdx); |
8243 | } |
8244 | |
8245 | MVT XLenVT = Subtarget.getXLenVT(); |
8246 | |
8247 | bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64; |
8248 | // Even i64-element vectors on RV32 can be lowered without scalar |
8249 | // legalization if the most-significant 32 bits of the value are not affected |
8250 | // by the sign-extension of the lower 32 bits. |
8251 | // TODO: We could also catch sign extensions of a 32-bit value. |
8252 | if (!IsLegalInsert && isa<ConstantSDNode>(Val)) { |
8253 | const auto *CVal = cast<ConstantSDNode>(Val); |
8254 | if (isInt<32>(x: CVal->getSExtValue())) { |
8255 | IsLegalInsert = true; |
8256 | Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32); |
8257 | } |
8258 | } |
8259 | |
8260 | auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); |
8261 | |
8262 | SDValue ValInVec; |
8263 | |
8264 | if (IsLegalInsert) { |
8265 | unsigned Opc = |
8266 | VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL; |
8267 | if (isNullConstant(V: Idx)) { |
8268 | if (!VecVT.isFloatingPoint()) |
8269 | Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Val); |
8270 | Vec = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: Vec, N2: Val, N3: VL); |
8271 | |
8272 | if (AlignedIdx) |
8273 | Vec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: OrigContainerVT, N1: OrigVec, |
8274 | N2: Vec, N3: AlignedIdx); |
8275 | if (!VecVT.isFixedLengthVector()) |
8276 | return Vec; |
8277 | return convertFromScalableVector(VT: VecVT, V: Vec, DAG, Subtarget); |
8278 | } |
8279 | ValInVec = lowerScalarInsert(Scalar: Val, VL, VT: ContainerVT, DL, DAG, Subtarget); |
8280 | } else { |
8281 | // On RV32, i64-element vectors must be specially handled to place the |
8282 | // value at element 0, by using two vslide1down instructions in sequence on |
8283 | // the i32 split lo/hi value. Use an equivalently-sized i32 vector for |
8284 | // this. |
8285 | SDValue ValLo, ValHi; |
8286 | std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32); |
8287 | MVT I32ContainerVT = |
8288 | MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2); |
8289 | SDValue I32Mask = |
8290 | getDefaultScalableVLOps(VecVT: I32ContainerVT, DL, DAG, Subtarget).first; |
8291 | // Limit the active VL to two. |
8292 | SDValue InsertI64VL = DAG.getConstant(Val: 2, DL, VT: XLenVT); |
8293 | // If the Idx is 0 we can insert directly into the vector. |
8294 | if (isNullConstant(V: Idx)) { |
8295 | // First slide in the lo value, then the hi in above it. We use slide1down |
8296 | // to avoid the register group overlap constraint of vslide1up. |
8297 | ValInVec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32ContainerVT, |
8298 | N1: Vec, N2: Vec, N3: ValLo, N4: I32Mask, N5: InsertI64VL); |
8299 | // If the source vector is undef don't pass along the tail elements from |
8300 | // the previous slide1down. |
8301 | SDValue Tail = Vec.isUndef() ? Vec : ValInVec; |
8302 | ValInVec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32ContainerVT, |
8303 | N1: Tail, N2: ValInVec, N3: ValHi, N4: I32Mask, N5: InsertI64VL); |
8304 | // Bitcast back to the right container type. |
8305 | ValInVec = DAG.getBitcast(VT: ContainerVT, V: ValInVec); |
8306 | |
8307 | if (AlignedIdx) |
8308 | ValInVec = |
8309 | DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: OrigContainerVT, N1: OrigVec, |
8310 | N2: ValInVec, N3: AlignedIdx); |
8311 | if (!VecVT.isFixedLengthVector()) |
8312 | return ValInVec; |
8313 | return convertFromScalableVector(VT: VecVT, V: ValInVec, DAG, Subtarget); |
8314 | } |
8315 | |
8316 | // First slide in the lo value, then the hi in above it. We use slide1down |
8317 | // to avoid the register group overlap constraint of vslide1up. |
8318 | ValInVec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32ContainerVT, |
8319 | N1: DAG.getUNDEF(VT: I32ContainerVT), |
8320 | N2: DAG.getUNDEF(VT: I32ContainerVT), N3: ValLo, |
8321 | N4: I32Mask, N5: InsertI64VL); |
8322 | ValInVec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32ContainerVT, |
8323 | N1: DAG.getUNDEF(VT: I32ContainerVT), N2: ValInVec, N3: ValHi, |
8324 | N4: I32Mask, N5: InsertI64VL); |
8325 | // Bitcast back to the right container type. |
8326 | ValInVec = DAG.getBitcast(VT: ContainerVT, V: ValInVec); |
8327 | } |
8328 | |
8329 | // Now that the value is in a vector, slide it into position. |
8330 | SDValue InsertVL = |
8331 | DAG.getNode(Opcode: ISD::ADD, DL, VT: XLenVT, N1: Idx, N2: DAG.getConstant(Val: 1, DL, VT: XLenVT)); |
8332 | |
8333 | // Use tail agnostic policy if Idx is the last index of Vec. |
8334 | unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; |
8335 | if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Val: Idx) && |
8336 | Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements()) |
8337 | Policy = RISCVII::TAIL_AGNOSTIC; |
8338 | SDValue Slideup = getVSlideup(DAG, Subtarget, DL, VT: ContainerVT, Merge: Vec, Op: ValInVec, |
8339 | Offset: Idx, Mask, VL: InsertVL, Policy); |
8340 | |
8341 | if (AlignedIdx) |
8342 | Slideup = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: OrigContainerVT, N1: OrigVec, |
8343 | N2: Slideup, N3: AlignedIdx); |
8344 | if (!VecVT.isFixedLengthVector()) |
8345 | return Slideup; |
8346 | return convertFromScalableVector(VT: VecVT, V: Slideup, DAG, Subtarget); |
8347 | } |
8348 | |
8349 | // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then |
8350 | // extract the first element: (extractelt (slidedown vec, idx), 0). For integer |
8351 | // types this is done using VMV_X_S to allow us to glean information about the |
8352 | // sign bits of the result. |
8353 | SDValue RISCVTargetLowering::(SDValue Op, |
8354 | SelectionDAG &DAG) const { |
8355 | SDLoc DL(Op); |
8356 | SDValue Idx = Op.getOperand(i: 1); |
8357 | SDValue Vec = Op.getOperand(i: 0); |
8358 | EVT EltVT = Op.getValueType(); |
8359 | MVT VecVT = Vec.getSimpleValueType(); |
8360 | MVT XLenVT = Subtarget.getXLenVT(); |
8361 | |
8362 | if (VecVT.getVectorElementType() == MVT::i1) { |
8363 | // Use vfirst.m to extract the first bit. |
8364 | if (isNullConstant(V: Idx)) { |
8365 | MVT ContainerVT = VecVT; |
8366 | if (VecVT.isFixedLengthVector()) { |
8367 | ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
8368 | Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget); |
8369 | } |
8370 | auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); |
8371 | SDValue Vfirst = |
8372 | DAG.getNode(Opcode: RISCVISD::VFIRST_VL, DL, VT: XLenVT, N1: Vec, N2: Mask, N3: VL); |
8373 | SDValue Res = DAG.getSetCC(DL, VT: XLenVT, LHS: Vfirst, |
8374 | RHS: DAG.getConstant(Val: 0, DL, VT: XLenVT), Cond: ISD::SETEQ); |
8375 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: EltVT, Operand: Res); |
8376 | } |
8377 | if (VecVT.isFixedLengthVector()) { |
8378 | unsigned NumElts = VecVT.getVectorNumElements(); |
8379 | if (NumElts >= 8) { |
8380 | MVT WideEltVT; |
8381 | unsigned WidenVecLen; |
8382 | SDValue ; |
8383 | SDValue ; |
8384 | unsigned MaxEEW = Subtarget.getELen(); |
8385 | MVT LargestEltVT = MVT::getIntegerVT( |
8386 | BitWidth: std::min(a: MaxEEW, b: unsigned(XLenVT.getSizeInBits()))); |
8387 | if (NumElts <= LargestEltVT.getSizeInBits()) { |
8388 | assert(isPowerOf2_32(NumElts) && |
8389 | "the number of elements should be power of 2" ); |
8390 | WideEltVT = MVT::getIntegerVT(BitWidth: NumElts); |
8391 | WidenVecLen = 1; |
8392 | ExtractElementIdx = DAG.getConstant(Val: 0, DL, VT: XLenVT); |
8393 | ExtractBitIdx = Idx; |
8394 | } else { |
8395 | WideEltVT = LargestEltVT; |
8396 | WidenVecLen = NumElts / WideEltVT.getSizeInBits(); |
8397 | // extract element index = index / element width |
8398 | ExtractElementIdx = DAG.getNode( |
8399 | Opcode: ISD::SRL, DL, VT: XLenVT, N1: Idx, |
8400 | N2: DAG.getConstant(Val: Log2_64(Value: WideEltVT.getSizeInBits()), DL, VT: XLenVT)); |
8401 | // mask bit index = index % element width |
8402 | ExtractBitIdx = DAG.getNode( |
8403 | Opcode: ISD::AND, DL, VT: XLenVT, N1: Idx, |
8404 | N2: DAG.getConstant(Val: WideEltVT.getSizeInBits() - 1, DL, VT: XLenVT)); |
8405 | } |
8406 | MVT WideVT = MVT::getVectorVT(VT: WideEltVT, NumElements: WidenVecLen); |
8407 | Vec = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: WideVT, Operand: Vec); |
8408 | SDValue = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: XLenVT, |
8409 | N1: Vec, N2: ExtractElementIdx); |
8410 | // Extract the bit from GPR. |
8411 | SDValue ShiftRight = |
8412 | DAG.getNode(Opcode: ISD::SRL, DL, VT: XLenVT, N1: ExtractElt, N2: ExtractBitIdx); |
8413 | SDValue Res = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: ShiftRight, |
8414 | N2: DAG.getConstant(Val: 1, DL, VT: XLenVT)); |
8415 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: EltVT, Operand: Res); |
8416 | } |
8417 | } |
8418 | // Otherwise, promote to an i8 vector and extract from that. |
8419 | MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); |
8420 | Vec = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WideVT, Operand: Vec); |
8421 | return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: Vec, N2: Idx); |
8422 | } |
8423 | |
8424 | // If this is a fixed vector, we need to convert it to a scalable vector. |
8425 | MVT ContainerVT = VecVT; |
8426 | if (VecVT.isFixedLengthVector()) { |
8427 | ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
8428 | Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget); |
8429 | } |
8430 | |
8431 | // If we're compiling for an exact VLEN value and we have a known |
8432 | // constant index, we can always perform the extract in m1 (or |
8433 | // smaller) as we can determine the register corresponding to |
8434 | // the index in the register group. |
8435 | const auto VLen = Subtarget.getRealVLen(); |
8436 | if (auto *IdxC = dyn_cast<ConstantSDNode>(Val&: Idx); |
8437 | IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) { |
8438 | MVT M1VT = getLMUL1VT(VT: ContainerVT); |
8439 | unsigned OrigIdx = IdxC->getZExtValue(); |
8440 | EVT ElemVT = VecVT.getVectorElementType(); |
8441 | unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits(); |
8442 | unsigned RemIdx = OrigIdx % ElemsPerVReg; |
8443 | unsigned SubRegIdx = OrigIdx / ElemsPerVReg; |
8444 | unsigned = |
8445 | SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue(); |
8446 | Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: M1VT, N1: Vec, |
8447 | N2: DAG.getVectorIdxConstant(Val: ExtractIdx, DL)); |
8448 | Idx = DAG.getVectorIdxConstant(Val: RemIdx, DL); |
8449 | ContainerVT = M1VT; |
8450 | } |
8451 | |
8452 | // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which |
8453 | // contains our index. |
8454 | std::optional<uint64_t> MaxIdx; |
8455 | if (VecVT.isFixedLengthVector()) |
8456 | MaxIdx = VecVT.getVectorNumElements() - 1; |
8457 | if (auto *IdxC = dyn_cast<ConstantSDNode>(Val&: Idx)) |
8458 | MaxIdx = IdxC->getZExtValue(); |
8459 | if (MaxIdx) { |
8460 | if (auto SmallerVT = |
8461 | getSmallestVTForIndex(VecVT: ContainerVT, MaxIdx: *MaxIdx, DL, DAG, Subtarget)) { |
8462 | ContainerVT = *SmallerVT; |
8463 | Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ContainerVT, N1: Vec, |
8464 | N2: DAG.getConstant(Val: 0, DL, VT: XLenVT)); |
8465 | } |
8466 | } |
8467 | |
8468 | // If after narrowing, the required slide is still greater than LMUL2, |
8469 | // fallback to generic expansion and go through the stack. This is done |
8470 | // for a subtle reason: extracting *all* elements out of a vector is |
8471 | // widely expected to be linear in vector size, but because vslidedown |
8472 | // is linear in LMUL, performing N extracts using vslidedown becomes |
8473 | // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack |
8474 | // seems to have the same problem (the store is linear in LMUL), but the |
8475 | // generic expansion *memoizes* the store, and thus for many extracts of |
8476 | // the same vector we end up with one store and a bunch of loads. |
8477 | // TODO: We don't have the same code for insert_vector_elt because we |
8478 | // have BUILD_VECTOR and handle the degenerate case there. Should we |
8479 | // consider adding an inverse BUILD_VECTOR node? |
8480 | MVT LMUL2VT = getLMUL1VT(VT: ContainerVT).getDoubleNumVectorElementsVT(); |
8481 | if (ContainerVT.bitsGT(VT: LMUL2VT) && VecVT.isFixedLengthVector()) |
8482 | return SDValue(); |
8483 | |
8484 | // If the index is 0, the vector is already in the right position. |
8485 | if (!isNullConstant(V: Idx)) { |
8486 | // Use a VL of 1 to avoid processing more elements than we need. |
8487 | auto [Mask, VL] = getDefaultVLOps(NumElts: 1, ContainerVT, DL, DAG, Subtarget); |
8488 | Vec = getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, |
8489 | Merge: DAG.getUNDEF(VT: ContainerVT), Op: Vec, Offset: Idx, Mask, VL); |
8490 | } |
8491 | |
8492 | if (!EltVT.isInteger()) { |
8493 | // Floating-point extracts are handled in TableGen. |
8494 | return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: Vec, |
8495 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
8496 | } |
8497 | |
8498 | SDValue Elt0 = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: Vec); |
8499 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: EltVT, Operand: Elt0); |
8500 | } |
8501 | |
8502 | // Some RVV intrinsics may claim that they want an integer operand to be |
8503 | // promoted or expanded. |
8504 | static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, |
8505 | const RISCVSubtarget &Subtarget) { |
8506 | assert((Op.getOpcode() == ISD::INTRINSIC_VOID || |
8507 | Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || |
8508 | Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) && |
8509 | "Unexpected opcode" ); |
8510 | |
8511 | if (!Subtarget.hasVInstructions()) |
8512 | return SDValue(); |
8513 | |
8514 | bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID || |
8515 | Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; |
8516 | unsigned IntNo = Op.getConstantOperandVal(i: HasChain ? 1 : 0); |
8517 | |
8518 | SDLoc DL(Op); |
8519 | |
8520 | const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = |
8521 | RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo); |
8522 | if (!II || !II->hasScalarOperand()) |
8523 | return SDValue(); |
8524 | |
8525 | unsigned SplatOp = II->ScalarOperand + 1 + HasChain; |
8526 | assert(SplatOp < Op.getNumOperands()); |
8527 | |
8528 | SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); |
8529 | SDValue &ScalarOp = Operands[SplatOp]; |
8530 | MVT OpVT = ScalarOp.getSimpleValueType(); |
8531 | MVT XLenVT = Subtarget.getXLenVT(); |
8532 | |
8533 | // If this isn't a scalar, or its type is XLenVT we're done. |
8534 | if (!OpVT.isScalarInteger() || OpVT == XLenVT) |
8535 | return SDValue(); |
8536 | |
8537 | // Simplest case is that the operand needs to be promoted to XLenVT. |
8538 | if (OpVT.bitsLT(VT: XLenVT)) { |
8539 | // If the operand is a constant, sign extend to increase our chances |
8540 | // of being able to use a .vi instruction. ANY_EXTEND would become a |
8541 | // a zero extend and the simm5 check in isel would fail. |
8542 | // FIXME: Should we ignore the upper bits in isel instead? |
8543 | unsigned ExtOpc = |
8544 | isa<ConstantSDNode>(Val: ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; |
8545 | ScalarOp = DAG.getNode(Opcode: ExtOpc, DL, VT: XLenVT, Operand: ScalarOp); |
8546 | return DAG.getNode(Opcode: Op->getOpcode(), DL, VTList: Op->getVTList(), Ops: Operands); |
8547 | } |
8548 | |
8549 | // Use the previous operand to get the vXi64 VT. The result might be a mask |
8550 | // VT for compares. Using the previous operand assumes that the previous |
8551 | // operand will never have a smaller element size than a scalar operand and |
8552 | // that a widening operation never uses SEW=64. |
8553 | // NOTE: If this fails the below assert, we can probably just find the |
8554 | // element count from any operand or result and use it to construct the VT. |
8555 | assert(II->ScalarOperand > 0 && "Unexpected splat operand!" ); |
8556 | MVT VT = Op.getOperand(i: SplatOp - 1).getSimpleValueType(); |
8557 | |
8558 | // The more complex case is when the scalar is larger than XLenVT. |
8559 | assert(XLenVT == MVT::i32 && OpVT == MVT::i64 && |
8560 | VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!" ); |
8561 | |
8562 | // If this is a sign-extended 32-bit value, we can truncate it and rely on the |
8563 | // instruction to sign-extend since SEW>XLEN. |
8564 | if (DAG.ComputeNumSignBits(Op: ScalarOp) > 32) { |
8565 | ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp); |
8566 | return DAG.getNode(Opcode: Op->getOpcode(), DL, VTList: Op->getVTList(), Ops: Operands); |
8567 | } |
8568 | |
8569 | switch (IntNo) { |
8570 | case Intrinsic::riscv_vslide1up: |
8571 | case Intrinsic::riscv_vslide1down: |
8572 | case Intrinsic::riscv_vslide1up_mask: |
8573 | case Intrinsic::riscv_vslide1down_mask: { |
8574 | // We need to special case these when the scalar is larger than XLen. |
8575 | unsigned NumOps = Op.getNumOperands(); |
8576 | bool IsMasked = NumOps == 7; |
8577 | |
8578 | // Convert the vector source to the equivalent nxvXi32 vector. |
8579 | MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); |
8580 | SDValue Vec = DAG.getBitcast(VT: I32VT, V: Operands[2]); |
8581 | SDValue ScalarLo, ScalarHi; |
8582 | std::tie(ScalarLo, ScalarHi) = |
8583 | DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32); |
8584 | |
8585 | // Double the VL since we halved SEW. |
8586 | SDValue AVL = getVLOperand(Op); |
8587 | SDValue I32VL; |
8588 | |
8589 | // Optimize for constant AVL |
8590 | if (isa<ConstantSDNode>(Val: AVL)) { |
8591 | const auto [MinVLMAX, MaxVLMAX] = |
8592 | RISCVTargetLowering::computeVLMAXBounds(VecVT: VT, Subtarget); |
8593 | |
8594 | uint64_t AVLInt = AVL->getAsZExtVal(); |
8595 | if (AVLInt <= MinVLMAX) { |
8596 | I32VL = DAG.getConstant(Val: 2 * AVLInt, DL, VT: XLenVT); |
8597 | } else if (AVLInt >= 2 * MaxVLMAX) { |
8598 | // Just set vl to VLMAX in this situation |
8599 | RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT: I32VT); |
8600 | SDValue LMUL = DAG.getConstant(Val: Lmul, DL, VT: XLenVT); |
8601 | unsigned Sew = RISCVVType::encodeSEW(SEW: I32VT.getScalarSizeInBits()); |
8602 | SDValue SEW = DAG.getConstant(Val: Sew, DL, VT: XLenVT); |
8603 | SDValue SETVLMAX = DAG.getTargetConstant( |
8604 | Intrinsic::riscv_vsetvlimax, DL, MVT::i32); |
8605 | I32VL = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: XLenVT, N1: SETVLMAX, N2: SEW, |
8606 | N3: LMUL); |
8607 | } else { |
8608 | // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl |
8609 | // is related to the hardware implementation. |
8610 | // So let the following code handle |
8611 | } |
8612 | } |
8613 | if (!I32VL) { |
8614 | RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT); |
8615 | SDValue LMUL = DAG.getConstant(Val: Lmul, DL, VT: XLenVT); |
8616 | unsigned Sew = RISCVVType::encodeSEW(SEW: VT.getScalarSizeInBits()); |
8617 | SDValue SEW = DAG.getConstant(Val: Sew, DL, VT: XLenVT); |
8618 | SDValue SETVL = |
8619 | DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32); |
8620 | // Using vsetvli instruction to get actually used length which related to |
8621 | // the hardware implementation |
8622 | SDValue VL = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: XLenVT, N1: SETVL, N2: AVL, |
8623 | N3: SEW, N4: LMUL); |
8624 | I32VL = |
8625 | DAG.getNode(Opcode: ISD::SHL, DL, VT: XLenVT, N1: VL, N2: DAG.getConstant(Val: 1, DL, VT: XLenVT)); |
8626 | } |
8627 | |
8628 | SDValue I32Mask = getAllOnesMask(VecVT: I32VT, VL: I32VL, DL, DAG); |
8629 | |
8630 | // Shift the two scalar parts in using SEW=32 slide1up/slide1down |
8631 | // instructions. |
8632 | SDValue Passthru; |
8633 | if (IsMasked) |
8634 | Passthru = DAG.getUNDEF(VT: I32VT); |
8635 | else |
8636 | Passthru = DAG.getBitcast(VT: I32VT, V: Operands[1]); |
8637 | |
8638 | if (IntNo == Intrinsic::riscv_vslide1up || |
8639 | IntNo == Intrinsic::riscv_vslide1up_mask) { |
8640 | Vec = DAG.getNode(Opcode: RISCVISD::VSLIDE1UP_VL, DL, VT: I32VT, N1: Passthru, N2: Vec, |
8641 | N3: ScalarHi, N4: I32Mask, N5: I32VL); |
8642 | Vec = DAG.getNode(Opcode: RISCVISD::VSLIDE1UP_VL, DL, VT: I32VT, N1: Passthru, N2: Vec, |
8643 | N3: ScalarLo, N4: I32Mask, N5: I32VL); |
8644 | } else { |
8645 | Vec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32VT, N1: Passthru, N2: Vec, |
8646 | N3: ScalarLo, N4: I32Mask, N5: I32VL); |
8647 | Vec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32VT, N1: Passthru, N2: Vec, |
8648 | N3: ScalarHi, N4: I32Mask, N5: I32VL); |
8649 | } |
8650 | |
8651 | // Convert back to nxvXi64. |
8652 | Vec = DAG.getBitcast(VT, V: Vec); |
8653 | |
8654 | if (!IsMasked) |
8655 | return Vec; |
8656 | // Apply mask after the operation. |
8657 | SDValue Mask = Operands[NumOps - 3]; |
8658 | SDValue MaskedOff = Operands[1]; |
8659 | // Assume Policy operand is the last operand. |
8660 | uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal(); |
8661 | // We don't need to select maskedoff if it's undef. |
8662 | if (MaskedOff.isUndef()) |
8663 | return Vec; |
8664 | // TAMU |
8665 | if (Policy == RISCVII::TAIL_AGNOSTIC) |
8666 | return DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT, N1: Mask, N2: Vec, N3: MaskedOff, |
8667 | N4: DAG.getUNDEF(VT), N5: AVL); |
8668 | // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma. |
8669 | // It's fine because vmerge does not care mask policy. |
8670 | return DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT, N1: Mask, N2: Vec, N3: MaskedOff, |
8671 | N4: MaskedOff, N5: AVL); |
8672 | } |
8673 | } |
8674 | |
8675 | // We need to convert the scalar to a splat vector. |
8676 | SDValue VL = getVLOperand(Op); |
8677 | assert(VL.getValueType() == XLenVT); |
8678 | ScalarOp = splatSplitI64WithVL(DL, VT, Passthru: SDValue(), Scalar: ScalarOp, VL, DAG); |
8679 | return DAG.getNode(Opcode: Op->getOpcode(), DL, VTList: Op->getVTList(), Ops: Operands); |
8680 | } |
8681 | |
8682 | // Lower the llvm.get.vector.length intrinsic to vsetvli. We only support |
8683 | // scalable vector llvm.get.vector.length for now. |
8684 | // |
8685 | // We need to convert from a scalable VF to a vsetvli with VLMax equal to |
8686 | // (vscale * VF). The vscale and VF are independent of element width. We use |
8687 | // SEW=8 for the vsetvli because it is the only element width that supports all |
8688 | // fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is |
8689 | // (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The |
8690 | // InsertVSETVLI pass can fix up the vtype of the vsetvli if a different |
8691 | // SEW and LMUL are better for the surrounding vector instructions. |
8692 | static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, |
8693 | const RISCVSubtarget &Subtarget) { |
8694 | MVT XLenVT = Subtarget.getXLenVT(); |
8695 | |
8696 | // The smallest LMUL is only valid for the smallest element width. |
8697 | const unsigned ElementWidth = 8; |
8698 | |
8699 | // Determine the VF that corresponds to LMUL 1 for ElementWidth. |
8700 | unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth; |
8701 | // We don't support VF==1 with ELEN==32. |
8702 | [[maybe_unused]] unsigned MinVF = |
8703 | RISCV::RVVBitsPerBlock / Subtarget.getELen(); |
8704 | |
8705 | [[maybe_unused]] unsigned VF = N->getConstantOperandVal(Num: 2); |
8706 | assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) && |
8707 | "Unexpected VF" ); |
8708 | |
8709 | bool Fractional = VF < LMul1VF; |
8710 | unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF; |
8711 | unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMUL: LMulVal, Fractional); |
8712 | unsigned VSEW = RISCVVType::encodeSEW(SEW: ElementWidth); |
8713 | |
8714 | SDLoc DL(N); |
8715 | |
8716 | SDValue LMul = DAG.getTargetConstant(Val: VLMUL, DL, VT: XLenVT); |
8717 | SDValue Sew = DAG.getTargetConstant(Val: VSEW, DL, VT: XLenVT); |
8718 | |
8719 | SDValue AVL = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: XLenVT, Operand: N->getOperand(Num: 1)); |
8720 | |
8721 | SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT); |
8722 | SDValue Res = |
8723 | DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: XLenVT, N1: ID, N2: AVL, N3: Sew, N4: LMul); |
8724 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: Res); |
8725 | } |
8726 | |
8727 | static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, |
8728 | const RISCVSubtarget &Subtarget) { |
8729 | SDValue Op0 = N->getOperand(Num: 1); |
8730 | MVT OpVT = Op0.getSimpleValueType(); |
8731 | MVT ContainerVT = OpVT; |
8732 | if (OpVT.isFixedLengthVector()) { |
8733 | ContainerVT = getContainerForFixedLengthVector(DAG, VT: OpVT, Subtarget); |
8734 | Op0 = convertToScalableVector(VT: ContainerVT, V: Op0, DAG, Subtarget); |
8735 | } |
8736 | MVT XLenVT = Subtarget.getXLenVT(); |
8737 | SDLoc DL(N); |
8738 | auto [Mask, VL] = getDefaultVLOps(VecVT: OpVT, ContainerVT, DL, DAG, Subtarget); |
8739 | SDValue Res = DAG.getNode(Opcode: RISCVISD::VFIRST_VL, DL, VT: XLenVT, N1: Op0, N2: Mask, N3: VL); |
8740 | if (isOneConstant(V: N->getOperand(Num: 2))) |
8741 | return Res; |
8742 | |
8743 | // Convert -1 to VL. |
8744 | SDValue Setcc = |
8745 | DAG.getSetCC(DL, VT: XLenVT, LHS: Res, RHS: DAG.getConstant(Val: 0, DL, VT: XLenVT), Cond: ISD::SETLT); |
8746 | VL = DAG.getElementCount(DL, VT: XLenVT, EC: OpVT.getVectorElementCount()); |
8747 | return DAG.getSelect(DL, VT: XLenVT, Cond: Setcc, LHS: VL, RHS: Res); |
8748 | } |
8749 | |
8750 | static inline void promoteVCIXScalar(const SDValue &Op, |
8751 | SmallVectorImpl<SDValue> &Operands, |
8752 | SelectionDAG &DAG) { |
8753 | const RISCVSubtarget &Subtarget = |
8754 | DAG.getMachineFunction().getSubtarget<RISCVSubtarget>(); |
8755 | |
8756 | bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID || |
8757 | Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; |
8758 | unsigned IntNo = Op.getConstantOperandVal(i: HasChain ? 1 : 0); |
8759 | SDLoc DL(Op); |
8760 | |
8761 | const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = |
8762 | RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo); |
8763 | if (!II || !II->hasScalarOperand()) |
8764 | return; |
8765 | |
8766 | unsigned SplatOp = II->ScalarOperand + 1; |
8767 | assert(SplatOp < Op.getNumOperands()); |
8768 | |
8769 | SDValue &ScalarOp = Operands[SplatOp]; |
8770 | MVT OpVT = ScalarOp.getSimpleValueType(); |
8771 | MVT XLenVT = Subtarget.getXLenVT(); |
8772 | |
8773 | // The code below is partially copied from lowerVectorIntrinsicScalars. |
8774 | // If this isn't a scalar, or its type is XLenVT we're done. |
8775 | if (!OpVT.isScalarInteger() || OpVT == XLenVT) |
8776 | return; |
8777 | |
8778 | // Manually emit promote operation for scalar operation. |
8779 | if (OpVT.bitsLT(VT: XLenVT)) { |
8780 | unsigned ExtOpc = |
8781 | isa<ConstantSDNode>(Val: ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; |
8782 | ScalarOp = DAG.getNode(Opcode: ExtOpc, DL, VT: XLenVT, Operand: ScalarOp); |
8783 | } |
8784 | |
8785 | return; |
8786 | } |
8787 | |
8788 | static void processVCIXOperands(SDValue &OrigOp, |
8789 | SmallVectorImpl<SDValue> &Operands, |
8790 | SelectionDAG &DAG) { |
8791 | promoteVCIXScalar(Op: OrigOp, Operands, DAG); |
8792 | const RISCVSubtarget &Subtarget = |
8793 | DAG.getMachineFunction().getSubtarget<RISCVSubtarget>(); |
8794 | for (SDValue &V : Operands) { |
8795 | EVT ValType = V.getValueType(); |
8796 | if (ValType.isVector() && ValType.isFloatingPoint()) { |
8797 | MVT InterimIVT = |
8798 | MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: ValType.getScalarSizeInBits()), |
8799 | EC: ValType.getVectorElementCount()); |
8800 | V = DAG.getBitcast(VT: InterimIVT, V); |
8801 | } |
8802 | if (ValType.isFixedLengthVector()) { |
8803 | MVT OpContainerVT = getContainerForFixedLengthVector( |
8804 | DAG, VT: V.getSimpleValueType(), Subtarget); |
8805 | V = convertToScalableVector(VT: OpContainerVT, V, DAG, Subtarget); |
8806 | } |
8807 | } |
8808 | } |
8809 | |
8810 | // LMUL * VLEN should be greater than or equal to EGS * SEW |
8811 | static inline bool isValidEGW(int EGS, EVT VT, |
8812 | const RISCVSubtarget &Subtarget) { |
8813 | return (Subtarget.getRealMinVLen() * |
8814 | VT.getSizeInBits().getKnownMinValue()) / RISCV::RVVBitsPerBlock >= |
8815 | EGS * VT.getScalarSizeInBits(); |
8816 | } |
8817 | |
8818 | SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, |
8819 | SelectionDAG &DAG) const { |
8820 | unsigned IntNo = Op.getConstantOperandVal(i: 0); |
8821 | SDLoc DL(Op); |
8822 | MVT XLenVT = Subtarget.getXLenVT(); |
8823 | |
8824 | switch (IntNo) { |
8825 | default: |
8826 | break; // Don't custom lower most intrinsics. |
8827 | case Intrinsic::thread_pointer: { |
8828 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
8829 | return DAG.getRegister(RISCV::X4, PtrVT); |
8830 | } |
8831 | case Intrinsic::riscv_orc_b: |
8832 | case Intrinsic::riscv_brev8: |
8833 | case Intrinsic::riscv_sha256sig0: |
8834 | case Intrinsic::riscv_sha256sig1: |
8835 | case Intrinsic::riscv_sha256sum0: |
8836 | case Intrinsic::riscv_sha256sum1: |
8837 | case Intrinsic::riscv_sm3p0: |
8838 | case Intrinsic::riscv_sm3p1: { |
8839 | unsigned Opc; |
8840 | switch (IntNo) { |
8841 | case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break; |
8842 | case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break; |
8843 | case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break; |
8844 | case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break; |
8845 | case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break; |
8846 | case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break; |
8847 | case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break; |
8848 | case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break; |
8849 | } |
8850 | |
8851 | if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { |
8852 | SDValue NewOp = |
8853 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); |
8854 | SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp); |
8855 | return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); |
8856 | } |
8857 | |
8858 | return DAG.getNode(Opcode: Opc, DL, VT: XLenVT, Operand: Op.getOperand(i: 1)); |
8859 | } |
8860 | case Intrinsic::riscv_sm4ks: |
8861 | case Intrinsic::riscv_sm4ed: { |
8862 | unsigned Opc = |
8863 | IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED; |
8864 | |
8865 | if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { |
8866 | SDValue NewOp0 = |
8867 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); |
8868 | SDValue NewOp1 = |
8869 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2)); |
8870 | SDValue Res = |
8871 | DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3)); |
8872 | return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); |
8873 | } |
8874 | |
8875 | return DAG.getNode(Opcode: Opc, DL, VT: XLenVT, N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), |
8876 | N3: Op.getOperand(i: 3)); |
8877 | } |
8878 | case Intrinsic::riscv_zip: |
8879 | case Intrinsic::riscv_unzip: { |
8880 | unsigned Opc = |
8881 | IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP; |
8882 | return DAG.getNode(Opcode: Opc, DL, VT: XLenVT, Operand: Op.getOperand(i: 1)); |
8883 | } |
8884 | case Intrinsic::riscv_mopr: { |
8885 | if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { |
8886 | SDValue NewOp = |
8887 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); |
8888 | SDValue Res = DAG.getNode( |
8889 | RISCVISD::MOPR, DL, MVT::i64, NewOp, |
8890 | DAG.getTargetConstant(Op.getConstantOperandVal(2), DL, MVT::i64)); |
8891 | return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); |
8892 | } |
8893 | return DAG.getNode(Opcode: RISCVISD::MOPR, DL, VT: XLenVT, N1: Op.getOperand(i: 1), |
8894 | N2: Op.getOperand(i: 2)); |
8895 | } |
8896 | |
8897 | case Intrinsic::riscv_moprr: { |
8898 | if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { |
8899 | SDValue NewOp0 = |
8900 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); |
8901 | SDValue NewOp1 = |
8902 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2)); |
8903 | SDValue Res = DAG.getNode( |
8904 | RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1, |
8905 | DAG.getTargetConstant(Op.getConstantOperandVal(3), DL, MVT::i64)); |
8906 | return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); |
8907 | } |
8908 | return DAG.getNode(Opcode: RISCVISD::MOPRR, DL, VT: XLenVT, N1: Op.getOperand(i: 1), |
8909 | N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3)); |
8910 | } |
8911 | case Intrinsic::riscv_clmul: |
8912 | if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { |
8913 | SDValue NewOp0 = |
8914 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); |
8915 | SDValue NewOp1 = |
8916 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2)); |
8917 | SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1); |
8918 | return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); |
8919 | } |
8920 | return DAG.getNode(Opcode: RISCVISD::CLMUL, DL, VT: XLenVT, N1: Op.getOperand(i: 1), |
8921 | N2: Op.getOperand(i: 2)); |
8922 | case Intrinsic::riscv_clmulh: |
8923 | case Intrinsic::riscv_clmulr: { |
8924 | unsigned Opc = |
8925 | IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR; |
8926 | if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { |
8927 | SDValue NewOp0 = |
8928 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); |
8929 | SDValue NewOp1 = |
8930 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2)); |
8931 | NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, |
8932 | DAG.getConstant(32, DL, MVT::i64)); |
8933 | NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1, |
8934 | DAG.getConstant(32, DL, MVT::i64)); |
8935 | SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1); |
8936 | Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res, |
8937 | DAG.getConstant(32, DL, MVT::i64)); |
8938 | return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); |
8939 | } |
8940 | |
8941 | return DAG.getNode(Opcode: Opc, DL, VT: XLenVT, N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)); |
8942 | } |
8943 | case Intrinsic::experimental_get_vector_length: |
8944 | return lowerGetVectorLength(N: Op.getNode(), DAG, Subtarget); |
8945 | case Intrinsic::experimental_cttz_elts: |
8946 | return lowerCttzElts(N: Op.getNode(), DAG, Subtarget); |
8947 | case Intrinsic::riscv_vmv_x_s: { |
8948 | SDValue Res = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: Op.getOperand(i: 1)); |
8949 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Op.getValueType(), Operand: Res); |
8950 | } |
8951 | case Intrinsic::riscv_vfmv_f_s: |
8952 | return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: Op.getValueType(), |
8953 | N1: Op.getOperand(i: 1), N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
8954 | case Intrinsic::riscv_vmv_v_x: |
8955 | return lowerScalarSplat(Passthru: Op.getOperand(i: 1), Scalar: Op.getOperand(i: 2), |
8956 | VL: Op.getOperand(i: 3), VT: Op.getSimpleValueType(), DL, DAG, |
8957 | Subtarget); |
8958 | case Intrinsic::riscv_vfmv_v_f: |
8959 | return DAG.getNode(Opcode: RISCVISD::VFMV_V_F_VL, DL, VT: Op.getValueType(), |
8960 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3)); |
8961 | case Intrinsic::riscv_vmv_s_x: { |
8962 | SDValue Scalar = Op.getOperand(i: 2); |
8963 | |
8964 | if (Scalar.getValueType().bitsLE(VT: XLenVT)) { |
8965 | Scalar = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Scalar); |
8966 | return DAG.getNode(Opcode: RISCVISD::VMV_S_X_VL, DL, VT: Op.getValueType(), |
8967 | N1: Op.getOperand(i: 1), N2: Scalar, N3: Op.getOperand(i: 3)); |
8968 | } |
8969 | |
8970 | assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!" ); |
8971 | |
8972 | // This is an i64 value that lives in two scalar registers. We have to |
8973 | // insert this in a convoluted way. First we build vXi64 splat containing |
8974 | // the two values that we assemble using some bit math. Next we'll use |
8975 | // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask |
8976 | // to merge element 0 from our splat into the source vector. |
8977 | // FIXME: This is probably not the best way to do this, but it is |
8978 | // consistent with INSERT_VECTOR_ELT lowering so it is a good starting |
8979 | // point. |
8980 | // sw lo, (a0) |
8981 | // sw hi, 4(a0) |
8982 | // vlse vX, (a0) |
8983 | // |
8984 | // vid.v vVid |
8985 | // vmseq.vx mMask, vVid, 0 |
8986 | // vmerge.vvm vDest, vSrc, vVal, mMask |
8987 | MVT VT = Op.getSimpleValueType(); |
8988 | SDValue Vec = Op.getOperand(i: 1); |
8989 | SDValue VL = getVLOperand(Op); |
8990 | |
8991 | SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Passthru: SDValue(), Scalar, VL, DAG); |
8992 | if (Op.getOperand(i: 1).isUndef()) |
8993 | return SplattedVal; |
8994 | SDValue SplattedIdx = |
8995 | DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), |
8996 | DAG.getConstant(0, DL, MVT::i32), VL); |
8997 | |
8998 | MVT MaskVT = getMaskTypeFor(VecVT: VT); |
8999 | SDValue Mask = getAllOnesMask(VecVT: VT, VL, DL, DAG); |
9000 | SDValue VID = DAG.getNode(Opcode: RISCVISD::VID_VL, DL, VT, N1: Mask, N2: VL); |
9001 | SDValue SelectCond = |
9002 | DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: MaskVT, |
9003 | Ops: {VID, SplattedIdx, DAG.getCondCode(Cond: ISD::SETEQ), |
9004 | DAG.getUNDEF(VT: MaskVT), Mask, VL}); |
9005 | return DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT, N1: SelectCond, N2: SplattedVal, |
9006 | N3: Vec, N4: DAG.getUNDEF(VT), N5: VL); |
9007 | } |
9008 | case Intrinsic::riscv_vfmv_s_f: |
9009 | return DAG.getNode(Opcode: RISCVISD::VFMV_S_F_VL, DL, VT: Op.getSimpleValueType(), |
9010 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3)); |
9011 | // EGS * EEW >= 128 bits |
9012 | case Intrinsic::riscv_vaesdf_vv: |
9013 | case Intrinsic::riscv_vaesdf_vs: |
9014 | case Intrinsic::riscv_vaesdm_vv: |
9015 | case Intrinsic::riscv_vaesdm_vs: |
9016 | case Intrinsic::riscv_vaesef_vv: |
9017 | case Intrinsic::riscv_vaesef_vs: |
9018 | case Intrinsic::riscv_vaesem_vv: |
9019 | case Intrinsic::riscv_vaesem_vs: |
9020 | case Intrinsic::riscv_vaeskf1: |
9021 | case Intrinsic::riscv_vaeskf2: |
9022 | case Intrinsic::riscv_vaesz_vs: |
9023 | case Intrinsic::riscv_vsm4k: |
9024 | case Intrinsic::riscv_vsm4r_vv: |
9025 | case Intrinsic::riscv_vsm4r_vs: { |
9026 | if (!isValidEGW(EGS: 4, VT: Op.getSimpleValueType(), Subtarget) || |
9027 | !isValidEGW(EGS: 4, VT: Op->getOperand(Num: 1).getSimpleValueType(), Subtarget) || |
9028 | !isValidEGW(EGS: 4, VT: Op->getOperand(Num: 2).getSimpleValueType(), Subtarget)) |
9029 | report_fatal_error(reason: "EGW should be greater than or equal to 4 * SEW." ); |
9030 | return Op; |
9031 | } |
9032 | // EGS * EEW >= 256 bits |
9033 | case Intrinsic::riscv_vsm3c: |
9034 | case Intrinsic::riscv_vsm3me: { |
9035 | if (!isValidEGW(EGS: 8, VT: Op.getSimpleValueType(), Subtarget) || |
9036 | !isValidEGW(EGS: 8, VT: Op->getOperand(Num: 1).getSimpleValueType(), Subtarget)) |
9037 | report_fatal_error(reason: "EGW should be greater than or equal to 8 * SEW." ); |
9038 | return Op; |
9039 | } |
9040 | // zvknha(SEW=32)/zvknhb(SEW=[32|64]) |
9041 | case Intrinsic::riscv_vsha2ch: |
9042 | case Intrinsic::riscv_vsha2cl: |
9043 | case Intrinsic::riscv_vsha2ms: { |
9044 | if (Op->getSimpleValueType(ResNo: 0).getScalarSizeInBits() == 64 && |
9045 | !Subtarget.hasStdExtZvknhb()) |
9046 | report_fatal_error(reason: "SEW=64 needs Zvknhb to be enabled." ); |
9047 | if (!isValidEGW(EGS: 4, VT: Op.getSimpleValueType(), Subtarget) || |
9048 | !isValidEGW(EGS: 4, VT: Op->getOperand(Num: 1).getSimpleValueType(), Subtarget) || |
9049 | !isValidEGW(EGS: 4, VT: Op->getOperand(Num: 2).getSimpleValueType(), Subtarget)) |
9050 | report_fatal_error(reason: "EGW should be greater than or equal to 4 * SEW." ); |
9051 | return Op; |
9052 | } |
9053 | case Intrinsic::riscv_sf_vc_v_x: |
9054 | case Intrinsic::riscv_sf_vc_v_i: |
9055 | case Intrinsic::riscv_sf_vc_v_xv: |
9056 | case Intrinsic::riscv_sf_vc_v_iv: |
9057 | case Intrinsic::riscv_sf_vc_v_vv: |
9058 | case Intrinsic::riscv_sf_vc_v_fv: |
9059 | case Intrinsic::riscv_sf_vc_v_xvv: |
9060 | case Intrinsic::riscv_sf_vc_v_ivv: |
9061 | case Intrinsic::riscv_sf_vc_v_vvv: |
9062 | case Intrinsic::riscv_sf_vc_v_fvv: |
9063 | case Intrinsic::riscv_sf_vc_v_xvw: |
9064 | case Intrinsic::riscv_sf_vc_v_ivw: |
9065 | case Intrinsic::riscv_sf_vc_v_vvw: |
9066 | case Intrinsic::riscv_sf_vc_v_fvw: { |
9067 | MVT VT = Op.getSimpleValueType(); |
9068 | |
9069 | SmallVector<SDValue> Operands{Op->op_values()}; |
9070 | processVCIXOperands(OrigOp&: Op, Operands, DAG); |
9071 | |
9072 | MVT RetVT = VT; |
9073 | if (VT.isFixedLengthVector()) |
9074 | RetVT = getContainerForFixedLengthVector(VT); |
9075 | else if (VT.isFloatingPoint()) |
9076 | RetVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: VT.getScalarSizeInBits()), |
9077 | EC: VT.getVectorElementCount()); |
9078 | |
9079 | SDValue NewNode = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: RetVT, Ops: Operands); |
9080 | |
9081 | if (VT.isFixedLengthVector()) |
9082 | NewNode = convertFromScalableVector(VT, V: NewNode, DAG, Subtarget); |
9083 | else if (VT.isFloatingPoint()) |
9084 | NewNode = DAG.getBitcast(VT, V: NewNode); |
9085 | |
9086 | if (Op == NewNode) |
9087 | break; |
9088 | |
9089 | return NewNode; |
9090 | } |
9091 | } |
9092 | |
9093 | return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); |
9094 | } |
9095 | |
9096 | static inline SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, |
9097 | unsigned Type) { |
9098 | SDLoc DL(Op); |
9099 | SmallVector<SDValue> Operands{Op->op_values()}; |
9100 | Operands.erase(CI: Operands.begin() + 1); |
9101 | |
9102 | const RISCVSubtarget &Subtarget = |
9103 | DAG.getMachineFunction().getSubtarget<RISCVSubtarget>(); |
9104 | MVT VT = Op.getSimpleValueType(); |
9105 | MVT RetVT = VT; |
9106 | MVT FloatVT = VT; |
9107 | |
9108 | if (VT.isFloatingPoint()) { |
9109 | RetVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: VT.getScalarSizeInBits()), |
9110 | EC: VT.getVectorElementCount()); |
9111 | FloatVT = RetVT; |
9112 | } |
9113 | if (VT.isFixedLengthVector()) |
9114 | RetVT = getContainerForFixedLengthVector(TLI: DAG.getTargetLoweringInfo(), VT: RetVT, |
9115 | Subtarget); |
9116 | |
9117 | processVCIXOperands(OrigOp&: Op, Operands, DAG); |
9118 | |
9119 | SDVTList VTs = DAG.getVTList({RetVT, MVT::Other}); |
9120 | SDValue NewNode = DAG.getNode(Opcode: Type, DL, VTList: VTs, Ops: Operands); |
9121 | SDValue Chain = NewNode.getValue(R: 1); |
9122 | |
9123 | if (VT.isFixedLengthVector()) |
9124 | NewNode = convertFromScalableVector(VT: FloatVT, V: NewNode, DAG, Subtarget); |
9125 | if (VT.isFloatingPoint()) |
9126 | NewNode = DAG.getBitcast(VT, V: NewNode); |
9127 | |
9128 | NewNode = DAG.getMergeValues(Ops: {NewNode, Chain}, dl: DL); |
9129 | |
9130 | return NewNode; |
9131 | } |
9132 | |
9133 | static inline SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, |
9134 | unsigned Type) { |
9135 | SmallVector<SDValue> Operands{Op->op_values()}; |
9136 | Operands.erase(CI: Operands.begin() + 1); |
9137 | processVCIXOperands(OrigOp&: Op, Operands, DAG); |
9138 | |
9139 | return DAG.getNode(Opcode: Type, DL: SDLoc(Op), VT: Op.getValueType(), Ops: Operands); |
9140 | } |
9141 | |
9142 | SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, |
9143 | SelectionDAG &DAG) const { |
9144 | unsigned IntNo = Op.getConstantOperandVal(i: 1); |
9145 | switch (IntNo) { |
9146 | default: |
9147 | break; |
9148 | case Intrinsic::riscv_masked_strided_load: { |
9149 | SDLoc DL(Op); |
9150 | MVT XLenVT = Subtarget.getXLenVT(); |
9151 | |
9152 | // If the mask is known to be all ones, optimize to an unmasked intrinsic; |
9153 | // the selection of the masked intrinsics doesn't do this for us. |
9154 | SDValue Mask = Op.getOperand(i: 5); |
9155 | bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()); |
9156 | |
9157 | MVT VT = Op->getSimpleValueType(ResNo: 0); |
9158 | MVT ContainerVT = VT; |
9159 | if (VT.isFixedLengthVector()) |
9160 | ContainerVT = getContainerForFixedLengthVector(VT); |
9161 | |
9162 | SDValue PassThru = Op.getOperand(i: 2); |
9163 | if (!IsUnmasked) { |
9164 | MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT); |
9165 | if (VT.isFixedLengthVector()) { |
9166 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
9167 | PassThru = convertToScalableVector(VT: ContainerVT, V: PassThru, DAG, Subtarget); |
9168 | } |
9169 | } |
9170 | |
9171 | auto *Load = cast<MemIntrinsicSDNode>(Val&: Op); |
9172 | SDValue VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second; |
9173 | SDValue Ptr = Op.getOperand(i: 3); |
9174 | SDValue Stride = Op.getOperand(i: 4); |
9175 | SDValue Result, Chain; |
9176 | |
9177 | // TODO: We restrict this to unmasked loads currently in consideration of |
9178 | // the complexity of handling all falses masks. |
9179 | MVT ScalarVT = ContainerVT.getVectorElementType(); |
9180 | if (IsUnmasked && isNullConstant(V: Stride) && ContainerVT.isInteger()) { |
9181 | SDValue ScalarLoad = |
9182 | DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl: DL, VT: XLenVT, Chain: Load->getChain(), Ptr, |
9183 | MemVT: ScalarVT, MMO: Load->getMemOperand()); |
9184 | Chain = ScalarLoad.getValue(R: 1); |
9185 | Result = lowerScalarSplat(Passthru: SDValue(), Scalar: ScalarLoad, VL, VT: ContainerVT, DL, DAG, |
9186 | Subtarget); |
9187 | } else if (IsUnmasked && isNullConstant(V: Stride) && isTypeLegal(VT: ScalarVT)) { |
9188 | SDValue ScalarLoad = DAG.getLoad(VT: ScalarVT, dl: DL, Chain: Load->getChain(), Ptr, |
9189 | MMO: Load->getMemOperand()); |
9190 | Chain = ScalarLoad.getValue(R: 1); |
9191 | Result = DAG.getSplat(VT: ContainerVT, DL, Op: ScalarLoad); |
9192 | } else { |
9193 | SDValue IntID = DAG.getTargetConstant( |
9194 | IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL, |
9195 | XLenVT); |
9196 | |
9197 | SmallVector<SDValue, 8> Ops{Load->getChain(), IntID}; |
9198 | if (IsUnmasked) |
9199 | Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT)); |
9200 | else |
9201 | Ops.push_back(Elt: PassThru); |
9202 | Ops.push_back(Elt: Ptr); |
9203 | Ops.push_back(Elt: Stride); |
9204 | if (!IsUnmasked) |
9205 | Ops.push_back(Elt: Mask); |
9206 | Ops.push_back(Elt: VL); |
9207 | if (!IsUnmasked) { |
9208 | SDValue Policy = |
9209 | DAG.getTargetConstant(Val: RISCVII::TAIL_AGNOSTIC, DL, VT: XLenVT); |
9210 | Ops.push_back(Elt: Policy); |
9211 | } |
9212 | |
9213 | SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); |
9214 | Result = |
9215 | DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops, |
9216 | MemVT: Load->getMemoryVT(), MMO: Load->getMemOperand()); |
9217 | Chain = Result.getValue(R: 1); |
9218 | } |
9219 | if (VT.isFixedLengthVector()) |
9220 | Result = convertFromScalableVector(VT, V: Result, DAG, Subtarget); |
9221 | return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL); |
9222 | } |
9223 | case Intrinsic::riscv_seg2_load: |
9224 | case Intrinsic::riscv_seg3_load: |
9225 | case Intrinsic::riscv_seg4_load: |
9226 | case Intrinsic::riscv_seg5_load: |
9227 | case Intrinsic::riscv_seg6_load: |
9228 | case Intrinsic::riscv_seg7_load: |
9229 | case Intrinsic::riscv_seg8_load: { |
9230 | SDLoc DL(Op); |
9231 | static const Intrinsic::ID VlsegInts[7] = { |
9232 | Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3, |
9233 | Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5, |
9234 | Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7, |
9235 | Intrinsic::riscv_vlseg8}; |
9236 | unsigned NF = Op->getNumValues() - 1; |
9237 | assert(NF >= 2 && NF <= 8 && "Unexpected seg number" ); |
9238 | MVT XLenVT = Subtarget.getXLenVT(); |
9239 | MVT VT = Op->getSimpleValueType(ResNo: 0); |
9240 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
9241 | |
9242 | SDValue VL = getVLOp(NumElts: VT.getVectorNumElements(), ContainerVT, DL, DAG, |
9243 | Subtarget); |
9244 | SDValue IntID = DAG.getTargetConstant(Val: VlsegInts[NF - 2], DL, VT: XLenVT); |
9245 | auto *Load = cast<MemIntrinsicSDNode>(Val&: Op); |
9246 | SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT); |
9247 | ContainerVTs.push_back(MVT::Other); |
9248 | SDVTList VTs = DAG.getVTList(VTs: ContainerVTs); |
9249 | SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID}; |
9250 | Ops.insert(I: Ops.end(), NumToInsert: NF, Elt: DAG.getUNDEF(VT: ContainerVT)); |
9251 | Ops.push_back(Elt: Op.getOperand(i: 2)); |
9252 | Ops.push_back(Elt: VL); |
9253 | SDValue Result = |
9254 | DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops, |
9255 | MemVT: Load->getMemoryVT(), MMO: Load->getMemOperand()); |
9256 | SmallVector<SDValue, 9> Results; |
9257 | for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) |
9258 | Results.push_back(Elt: convertFromScalableVector(VT, V: Result.getValue(R: RetIdx), |
9259 | DAG, Subtarget)); |
9260 | Results.push_back(Elt: Result.getValue(R: NF)); |
9261 | return DAG.getMergeValues(Ops: Results, dl: DL); |
9262 | } |
9263 | case Intrinsic::riscv_sf_vc_v_x_se: |
9264 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_X_SE); |
9265 | case Intrinsic::riscv_sf_vc_v_i_se: |
9266 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_I_SE); |
9267 | case Intrinsic::riscv_sf_vc_v_xv_se: |
9268 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_XV_SE); |
9269 | case Intrinsic::riscv_sf_vc_v_iv_se: |
9270 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_IV_SE); |
9271 | case Intrinsic::riscv_sf_vc_v_vv_se: |
9272 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_VV_SE); |
9273 | case Intrinsic::riscv_sf_vc_v_fv_se: |
9274 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_FV_SE); |
9275 | case Intrinsic::riscv_sf_vc_v_xvv_se: |
9276 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_XVV_SE); |
9277 | case Intrinsic::riscv_sf_vc_v_ivv_se: |
9278 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_IVV_SE); |
9279 | case Intrinsic::riscv_sf_vc_v_vvv_se: |
9280 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_VVV_SE); |
9281 | case Intrinsic::riscv_sf_vc_v_fvv_se: |
9282 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_FVV_SE); |
9283 | case Intrinsic::riscv_sf_vc_v_xvw_se: |
9284 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_XVW_SE); |
9285 | case Intrinsic::riscv_sf_vc_v_ivw_se: |
9286 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_IVW_SE); |
9287 | case Intrinsic::riscv_sf_vc_v_vvw_se: |
9288 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_VVW_SE); |
9289 | case Intrinsic::riscv_sf_vc_v_fvw_se: |
9290 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_FVW_SE); |
9291 | } |
9292 | |
9293 | return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); |
9294 | } |
9295 | |
9296 | SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op, |
9297 | SelectionDAG &DAG) const { |
9298 | unsigned IntNo = Op.getConstantOperandVal(i: 1); |
9299 | switch (IntNo) { |
9300 | default: |
9301 | break; |
9302 | case Intrinsic::riscv_masked_strided_store: { |
9303 | SDLoc DL(Op); |
9304 | MVT XLenVT = Subtarget.getXLenVT(); |
9305 | |
9306 | // If the mask is known to be all ones, optimize to an unmasked intrinsic; |
9307 | // the selection of the masked intrinsics doesn't do this for us. |
9308 | SDValue Mask = Op.getOperand(i: 5); |
9309 | bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()); |
9310 | |
9311 | SDValue Val = Op.getOperand(i: 2); |
9312 | MVT VT = Val.getSimpleValueType(); |
9313 | MVT ContainerVT = VT; |
9314 | if (VT.isFixedLengthVector()) { |
9315 | ContainerVT = getContainerForFixedLengthVector(VT); |
9316 | Val = convertToScalableVector(VT: ContainerVT, V: Val, DAG, Subtarget); |
9317 | } |
9318 | if (!IsUnmasked) { |
9319 | MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT); |
9320 | if (VT.isFixedLengthVector()) |
9321 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
9322 | } |
9323 | |
9324 | SDValue VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second; |
9325 | |
9326 | SDValue IntID = DAG.getTargetConstant( |
9327 | IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL, |
9328 | XLenVT); |
9329 | |
9330 | auto *Store = cast<MemIntrinsicSDNode>(Val&: Op); |
9331 | SmallVector<SDValue, 8> Ops{Store->getChain(), IntID}; |
9332 | Ops.push_back(Elt: Val); |
9333 | Ops.push_back(Elt: Op.getOperand(i: 3)); // Ptr |
9334 | Ops.push_back(Elt: Op.getOperand(i: 4)); // Stride |
9335 | if (!IsUnmasked) |
9336 | Ops.push_back(Elt: Mask); |
9337 | Ops.push_back(Elt: VL); |
9338 | |
9339 | return DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl: DL, VTList: Store->getVTList(), |
9340 | Ops, MemVT: Store->getMemoryVT(), |
9341 | MMO: Store->getMemOperand()); |
9342 | } |
9343 | case Intrinsic::riscv_seg2_store: |
9344 | case Intrinsic::riscv_seg3_store: |
9345 | case Intrinsic::riscv_seg4_store: |
9346 | case Intrinsic::riscv_seg5_store: |
9347 | case Intrinsic::riscv_seg6_store: |
9348 | case Intrinsic::riscv_seg7_store: |
9349 | case Intrinsic::riscv_seg8_store: { |
9350 | SDLoc DL(Op); |
9351 | static const Intrinsic::ID VssegInts[] = { |
9352 | Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3, |
9353 | Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5, |
9354 | Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7, |
9355 | Intrinsic::riscv_vsseg8}; |
9356 | // Operands are (chain, int_id, vec*, ptr, vl) |
9357 | unsigned NF = Op->getNumOperands() - 4; |
9358 | assert(NF >= 2 && NF <= 8 && "Unexpected seg number" ); |
9359 | MVT XLenVT = Subtarget.getXLenVT(); |
9360 | MVT VT = Op->getOperand(Num: 2).getSimpleValueType(); |
9361 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
9362 | |
9363 | SDValue VL = getVLOp(NumElts: VT.getVectorNumElements(), ContainerVT, DL, DAG, |
9364 | Subtarget); |
9365 | SDValue IntID = DAG.getTargetConstant(Val: VssegInts[NF - 2], DL, VT: XLenVT); |
9366 | SDValue Ptr = Op->getOperand(Num: NF + 2); |
9367 | |
9368 | auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Val&: Op); |
9369 | SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID}; |
9370 | for (unsigned i = 0; i < NF; i++) |
9371 | Ops.push_back(Elt: convertToScalableVector( |
9372 | VT: ContainerVT, V: FixedIntrinsic->getOperand(Num: 2 + i), DAG, Subtarget)); |
9373 | Ops.append(IL: {Ptr, VL}); |
9374 | |
9375 | return DAG.getMemIntrinsicNode( |
9376 | ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops, |
9377 | FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand()); |
9378 | } |
9379 | case Intrinsic::riscv_sf_vc_xv_se: |
9380 | return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_XV_SE); |
9381 | case Intrinsic::riscv_sf_vc_iv_se: |
9382 | return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_IV_SE); |
9383 | case Intrinsic::riscv_sf_vc_vv_se: |
9384 | return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_VV_SE); |
9385 | case Intrinsic::riscv_sf_vc_fv_se: |
9386 | return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_FV_SE); |
9387 | case Intrinsic::riscv_sf_vc_xvv_se: |
9388 | return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_XVV_SE); |
9389 | case Intrinsic::riscv_sf_vc_ivv_se: |
9390 | return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_IVV_SE); |
9391 | case Intrinsic::riscv_sf_vc_vvv_se: |
9392 | return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_VVV_SE); |
9393 | case Intrinsic::riscv_sf_vc_fvv_se: |
9394 | return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_FVV_SE); |
9395 | case Intrinsic::riscv_sf_vc_xvw_se: |
9396 | return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_XVW_SE); |
9397 | case Intrinsic::riscv_sf_vc_ivw_se: |
9398 | return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_IVW_SE); |
9399 | case Intrinsic::riscv_sf_vc_vvw_se: |
9400 | return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_VVW_SE); |
9401 | case Intrinsic::riscv_sf_vc_fvw_se: |
9402 | return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_FVW_SE); |
9403 | } |
9404 | |
9405 | return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); |
9406 | } |
9407 | |
9408 | static unsigned getRVVReductionOp(unsigned ISDOpcode) { |
9409 | switch (ISDOpcode) { |
9410 | default: |
9411 | llvm_unreachable("Unhandled reduction" ); |
9412 | case ISD::VP_REDUCE_ADD: |
9413 | case ISD::VECREDUCE_ADD: |
9414 | return RISCVISD::VECREDUCE_ADD_VL; |
9415 | case ISD::VP_REDUCE_UMAX: |
9416 | case ISD::VECREDUCE_UMAX: |
9417 | return RISCVISD::VECREDUCE_UMAX_VL; |
9418 | case ISD::VP_REDUCE_SMAX: |
9419 | case ISD::VECREDUCE_SMAX: |
9420 | return RISCVISD::VECREDUCE_SMAX_VL; |
9421 | case ISD::VP_REDUCE_UMIN: |
9422 | case ISD::VECREDUCE_UMIN: |
9423 | return RISCVISD::VECREDUCE_UMIN_VL; |
9424 | case ISD::VP_REDUCE_SMIN: |
9425 | case ISD::VECREDUCE_SMIN: |
9426 | return RISCVISD::VECREDUCE_SMIN_VL; |
9427 | case ISD::VP_REDUCE_AND: |
9428 | case ISD::VECREDUCE_AND: |
9429 | return RISCVISD::VECREDUCE_AND_VL; |
9430 | case ISD::VP_REDUCE_OR: |
9431 | case ISD::VECREDUCE_OR: |
9432 | return RISCVISD::VECREDUCE_OR_VL; |
9433 | case ISD::VP_REDUCE_XOR: |
9434 | case ISD::VECREDUCE_XOR: |
9435 | return RISCVISD::VECREDUCE_XOR_VL; |
9436 | case ISD::VP_REDUCE_FADD: |
9437 | return RISCVISD::VECREDUCE_FADD_VL; |
9438 | case ISD::VP_REDUCE_SEQ_FADD: |
9439 | return RISCVISD::VECREDUCE_SEQ_FADD_VL; |
9440 | case ISD::VP_REDUCE_FMAX: |
9441 | return RISCVISD::VECREDUCE_FMAX_VL; |
9442 | case ISD::VP_REDUCE_FMIN: |
9443 | return RISCVISD::VECREDUCE_FMIN_VL; |
9444 | } |
9445 | |
9446 | } |
9447 | |
9448 | SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op, |
9449 | SelectionDAG &DAG, |
9450 | bool IsVP) const { |
9451 | SDLoc DL(Op); |
9452 | SDValue Vec = Op.getOperand(i: IsVP ? 1 : 0); |
9453 | MVT VecVT = Vec.getSimpleValueType(); |
9454 | assert((Op.getOpcode() == ISD::VECREDUCE_AND || |
9455 | Op.getOpcode() == ISD::VECREDUCE_OR || |
9456 | Op.getOpcode() == ISD::VECREDUCE_XOR || |
9457 | Op.getOpcode() == ISD::VP_REDUCE_AND || |
9458 | Op.getOpcode() == ISD::VP_REDUCE_OR || |
9459 | Op.getOpcode() == ISD::VP_REDUCE_XOR) && |
9460 | "Unexpected reduction lowering" ); |
9461 | |
9462 | MVT XLenVT = Subtarget.getXLenVT(); |
9463 | |
9464 | MVT ContainerVT = VecVT; |
9465 | if (VecVT.isFixedLengthVector()) { |
9466 | ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
9467 | Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget); |
9468 | } |
9469 | |
9470 | SDValue Mask, VL; |
9471 | if (IsVP) { |
9472 | Mask = Op.getOperand(i: 2); |
9473 | VL = Op.getOperand(i: 3); |
9474 | } else { |
9475 | std::tie(args&: Mask, args&: VL) = |
9476 | getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); |
9477 | } |
9478 | |
9479 | unsigned BaseOpc; |
9480 | ISD::CondCode CC; |
9481 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT: XLenVT); |
9482 | |
9483 | switch (Op.getOpcode()) { |
9484 | default: |
9485 | llvm_unreachable("Unhandled reduction" ); |
9486 | case ISD::VECREDUCE_AND: |
9487 | case ISD::VP_REDUCE_AND: { |
9488 | // vcpop ~x == 0 |
9489 | SDValue TrueMask = DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT: ContainerVT, Operand: VL); |
9490 | Vec = DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Vec, N2: TrueMask, N3: VL); |
9491 | Vec = DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: Vec, N2: Mask, N3: VL); |
9492 | CC = ISD::SETEQ; |
9493 | BaseOpc = ISD::AND; |
9494 | break; |
9495 | } |
9496 | case ISD::VECREDUCE_OR: |
9497 | case ISD::VP_REDUCE_OR: |
9498 | // vcpop x != 0 |
9499 | Vec = DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: Vec, N2: Mask, N3: VL); |
9500 | CC = ISD::SETNE; |
9501 | BaseOpc = ISD::OR; |
9502 | break; |
9503 | case ISD::VECREDUCE_XOR: |
9504 | case ISD::VP_REDUCE_XOR: { |
9505 | // ((vcpop x) & 1) != 0 |
9506 | SDValue One = DAG.getConstant(Val: 1, DL, VT: XLenVT); |
9507 | Vec = DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: Vec, N2: Mask, N3: VL); |
9508 | Vec = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: Vec, N2: One); |
9509 | CC = ISD::SETNE; |
9510 | BaseOpc = ISD::XOR; |
9511 | break; |
9512 | } |
9513 | } |
9514 | |
9515 | SDValue SetCC = DAG.getSetCC(DL, VT: XLenVT, LHS: Vec, RHS: Zero, Cond: CC); |
9516 | SetCC = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Op.getValueType(), Operand: SetCC); |
9517 | |
9518 | if (!IsVP) |
9519 | return SetCC; |
9520 | |
9521 | // Now include the start value in the operation. |
9522 | // Note that we must return the start value when no elements are operated |
9523 | // upon. The vcpop instructions we've emitted in each case above will return |
9524 | // 0 for an inactive vector, and so we've already received the neutral value: |
9525 | // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we |
9526 | // can simply include the start value. |
9527 | return DAG.getNode(Opcode: BaseOpc, DL, VT: Op.getValueType(), N1: SetCC, N2: Op.getOperand(i: 0)); |
9528 | } |
9529 | |
9530 | static bool isNonZeroAVL(SDValue AVL) { |
9531 | auto *RegisterAVL = dyn_cast<RegisterSDNode>(Val&: AVL); |
9532 | auto *ImmAVL = dyn_cast<ConstantSDNode>(Val&: AVL); |
9533 | return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) || |
9534 | (ImmAVL && ImmAVL->getZExtValue() >= 1); |
9535 | } |
9536 | |
9537 | /// Helper to lower a reduction sequence of the form: |
9538 | /// scalar = reduce_op vec, scalar_start |
9539 | static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, |
9540 | SDValue StartValue, SDValue Vec, SDValue Mask, |
9541 | SDValue VL, const SDLoc &DL, SelectionDAG &DAG, |
9542 | const RISCVSubtarget &Subtarget) { |
9543 | const MVT VecVT = Vec.getSimpleValueType(); |
9544 | const MVT M1VT = getLMUL1VT(VT: VecVT); |
9545 | const MVT XLenVT = Subtarget.getXLenVT(); |
9546 | const bool NonZeroAVL = isNonZeroAVL(AVL: VL); |
9547 | |
9548 | // The reduction needs an LMUL1 input; do the splat at either LMUL1 |
9549 | // or the original VT if fractional. |
9550 | auto InnerVT = VecVT.bitsLE(VT: M1VT) ? VecVT : M1VT; |
9551 | // We reuse the VL of the reduction to reduce vsetvli toggles if we can |
9552 | // prove it is non-zero. For the AVL=0 case, we need the scalar to |
9553 | // be the result of the reduction operation. |
9554 | auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(Val: 1, DL, VT: XLenVT); |
9555 | SDValue InitialValue = lowerScalarInsert(Scalar: StartValue, VL: InnerVL, VT: InnerVT, DL, |
9556 | DAG, Subtarget); |
9557 | if (M1VT != InnerVT) |
9558 | InitialValue = |
9559 | DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: M1VT, N1: DAG.getUNDEF(VT: M1VT), |
9560 | N2: InitialValue, N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
9561 | SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(VT: M1VT) : InitialValue; |
9562 | SDValue Policy = DAG.getTargetConstant(Val: RISCVII::TAIL_AGNOSTIC, DL, VT: XLenVT); |
9563 | SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy}; |
9564 | SDValue Reduction = DAG.getNode(Opcode: RVVOpcode, DL, VT: M1VT, Ops); |
9565 | return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: ResVT, N1: Reduction, |
9566 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
9567 | } |
9568 | |
9569 | SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, |
9570 | SelectionDAG &DAG) const { |
9571 | SDLoc DL(Op); |
9572 | SDValue Vec = Op.getOperand(i: 0); |
9573 | EVT VecEVT = Vec.getValueType(); |
9574 | |
9575 | unsigned BaseOpc = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Op.getOpcode()); |
9576 | |
9577 | // Due to ordering in legalize types we may have a vector type that needs to |
9578 | // be split. Do that manually so we can get down to a legal type. |
9579 | while (getTypeAction(Context&: *DAG.getContext(), VT: VecEVT) == |
9580 | TargetLowering::TypeSplitVector) { |
9581 | auto [Lo, Hi] = DAG.SplitVector(N: Vec, DL); |
9582 | VecEVT = Lo.getValueType(); |
9583 | Vec = DAG.getNode(Opcode: BaseOpc, DL, VT: VecEVT, N1: Lo, N2: Hi); |
9584 | } |
9585 | |
9586 | // TODO: The type may need to be widened rather than split. Or widened before |
9587 | // it can be split. |
9588 | if (!isTypeLegal(VT: VecEVT)) |
9589 | return SDValue(); |
9590 | |
9591 | MVT VecVT = VecEVT.getSimpleVT(); |
9592 | MVT VecEltVT = VecVT.getVectorElementType(); |
9593 | unsigned RVVOpcode = getRVVReductionOp(ISDOpcode: Op.getOpcode()); |
9594 | |
9595 | MVT ContainerVT = VecVT; |
9596 | if (VecVT.isFixedLengthVector()) { |
9597 | ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
9598 | Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget); |
9599 | } |
9600 | |
9601 | auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); |
9602 | |
9603 | SDValue StartV = DAG.getNeutralElement(Opcode: BaseOpc, DL, VT: VecEltVT, Flags: SDNodeFlags()); |
9604 | switch (BaseOpc) { |
9605 | case ISD::AND: |
9606 | case ISD::OR: |
9607 | case ISD::UMAX: |
9608 | case ISD::UMIN: |
9609 | case ISD::SMAX: |
9610 | case ISD::SMIN: |
9611 | StartV = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: VecEltVT, N1: Vec, |
9612 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
9613 | } |
9614 | return lowerReductionSeq(RVVOpcode, ResVT: Op.getSimpleValueType(), StartValue: StartV, Vec, |
9615 | Mask, VL, DL, DAG, Subtarget); |
9616 | } |
9617 | |
9618 | // Given a reduction op, this function returns the matching reduction opcode, |
9619 | // the vector SDValue and the scalar SDValue required to lower this to a |
9620 | // RISCVISD node. |
9621 | static std::tuple<unsigned, SDValue, SDValue> |
9622 | getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, |
9623 | const RISCVSubtarget &Subtarget) { |
9624 | SDLoc DL(Op); |
9625 | auto Flags = Op->getFlags(); |
9626 | unsigned Opcode = Op.getOpcode(); |
9627 | switch (Opcode) { |
9628 | default: |
9629 | llvm_unreachable("Unhandled reduction" ); |
9630 | case ISD::VECREDUCE_FADD: { |
9631 | // Use positive zero if we can. It is cheaper to materialize. |
9632 | SDValue Zero = |
9633 | DAG.getConstantFP(Val: Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, VT: EltVT); |
9634 | return std::make_tuple(args: RISCVISD::VECREDUCE_FADD_VL, args: Op.getOperand(i: 0), args&: Zero); |
9635 | } |
9636 | case ISD::VECREDUCE_SEQ_FADD: |
9637 | return std::make_tuple(args: RISCVISD::VECREDUCE_SEQ_FADD_VL, args: Op.getOperand(i: 1), |
9638 | args: Op.getOperand(i: 0)); |
9639 | case ISD::VECREDUCE_FMINIMUM: |
9640 | case ISD::VECREDUCE_FMAXIMUM: |
9641 | case ISD::VECREDUCE_FMIN: |
9642 | case ISD::VECREDUCE_FMAX: { |
9643 | SDValue Front = |
9644 | DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: Op.getOperand(i: 0), |
9645 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
9646 | unsigned RVVOpc = |
9647 | (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM) |
9648 | ? RISCVISD::VECREDUCE_FMIN_VL |
9649 | : RISCVISD::VECREDUCE_FMAX_VL; |
9650 | return std::make_tuple(args&: RVVOpc, args: Op.getOperand(i: 0), args&: Front); |
9651 | } |
9652 | } |
9653 | } |
9654 | |
9655 | SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op, |
9656 | SelectionDAG &DAG) const { |
9657 | SDLoc DL(Op); |
9658 | MVT VecEltVT = Op.getSimpleValueType(); |
9659 | |
9660 | unsigned RVVOpcode; |
9661 | SDValue VectorVal, ScalarVal; |
9662 | std::tie(args&: RVVOpcode, args&: VectorVal, args&: ScalarVal) = |
9663 | getRVVFPReductionOpAndOperands(Op, DAG, EltVT: VecEltVT, Subtarget); |
9664 | MVT VecVT = VectorVal.getSimpleValueType(); |
9665 | |
9666 | MVT ContainerVT = VecVT; |
9667 | if (VecVT.isFixedLengthVector()) { |
9668 | ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
9669 | VectorVal = convertToScalableVector(VT: ContainerVT, V: VectorVal, DAG, Subtarget); |
9670 | } |
9671 | |
9672 | MVT ResVT = Op.getSimpleValueType(); |
9673 | auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); |
9674 | SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, StartValue: ScalarVal, Vec: VectorVal, Mask, |
9675 | VL, DL, DAG, Subtarget); |
9676 | if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM && |
9677 | Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM) |
9678 | return Res; |
9679 | |
9680 | if (Op->getFlags().hasNoNaNs()) |
9681 | return Res; |
9682 | |
9683 | // Force output to NaN if any element is Nan. |
9684 | SDValue IsNan = |
9685 | DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: Mask.getValueType(), |
9686 | Ops: {VectorVal, VectorVal, DAG.getCondCode(Cond: ISD::SETNE), |
9687 | DAG.getUNDEF(VT: Mask.getValueType()), Mask, VL}); |
9688 | MVT XLenVT = Subtarget.getXLenVT(); |
9689 | SDValue CPop = DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: IsNan, N2: Mask, N3: VL); |
9690 | SDValue NoNaNs = DAG.getSetCC(DL, VT: XLenVT, LHS: CPop, |
9691 | RHS: DAG.getConstant(Val: 0, DL, VT: XLenVT), Cond: ISD::SETEQ); |
9692 | return DAG.getSelect( |
9693 | DL, VT: ResVT, Cond: NoNaNs, LHS: Res, |
9694 | RHS: DAG.getConstantFP(Val: APFloat::getNaN(Sem: DAG.EVTToAPFloatSemantics(VT: ResVT)), DL, |
9695 | VT: ResVT)); |
9696 | } |
9697 | |
9698 | SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op, |
9699 | SelectionDAG &DAG) const { |
9700 | SDLoc DL(Op); |
9701 | SDValue Vec = Op.getOperand(i: 1); |
9702 | EVT VecEVT = Vec.getValueType(); |
9703 | |
9704 | // TODO: The type may need to be widened rather than split. Or widened before |
9705 | // it can be split. |
9706 | if (!isTypeLegal(VT: VecEVT)) |
9707 | return SDValue(); |
9708 | |
9709 | MVT VecVT = VecEVT.getSimpleVT(); |
9710 | unsigned RVVOpcode = getRVVReductionOp(ISDOpcode: Op.getOpcode()); |
9711 | |
9712 | if (VecVT.isFixedLengthVector()) { |
9713 | auto ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
9714 | Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget); |
9715 | } |
9716 | |
9717 | SDValue VL = Op.getOperand(i: 3); |
9718 | SDValue Mask = Op.getOperand(i: 2); |
9719 | return lowerReductionSeq(RVVOpcode, ResVT: Op.getSimpleValueType(), StartValue: Op.getOperand(i: 0), |
9720 | Vec, Mask, VL, DL, DAG, Subtarget); |
9721 | } |
9722 | |
9723 | SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, |
9724 | SelectionDAG &DAG) const { |
9725 | SDValue Vec = Op.getOperand(i: 0); |
9726 | SDValue SubVec = Op.getOperand(i: 1); |
9727 | MVT VecVT = Vec.getSimpleValueType(); |
9728 | MVT SubVecVT = SubVec.getSimpleValueType(); |
9729 | |
9730 | SDLoc DL(Op); |
9731 | MVT XLenVT = Subtarget.getXLenVT(); |
9732 | unsigned OrigIdx = Op.getConstantOperandVal(i: 2); |
9733 | const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
9734 | |
9735 | // We don't have the ability to slide mask vectors up indexed by their i1 |
9736 | // elements; the smallest we can do is i8. Often we are able to bitcast to |
9737 | // equivalent i8 vectors. Note that when inserting a fixed-length vector |
9738 | // into a scalable one, we might not necessarily have enough scalable |
9739 | // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid. |
9740 | if (SubVecVT.getVectorElementType() == MVT::i1 && |
9741 | (OrigIdx != 0 || !Vec.isUndef())) { |
9742 | if (VecVT.getVectorMinNumElements() >= 8 && |
9743 | SubVecVT.getVectorMinNumElements() >= 8) { |
9744 | assert(OrigIdx % 8 == 0 && "Invalid index" ); |
9745 | assert(VecVT.getVectorMinNumElements() % 8 == 0 && |
9746 | SubVecVT.getVectorMinNumElements() % 8 == 0 && |
9747 | "Unexpected mask vector lowering" ); |
9748 | OrigIdx /= 8; |
9749 | SubVecVT = |
9750 | MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, |
9751 | SubVecVT.isScalableVector()); |
9752 | VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, |
9753 | VecVT.isScalableVector()); |
9754 | Vec = DAG.getBitcast(VT: VecVT, V: Vec); |
9755 | SubVec = DAG.getBitcast(VT: SubVecVT, V: SubVec); |
9756 | } else { |
9757 | // We can't slide this mask vector up indexed by its i1 elements. |
9758 | // This poses a problem when we wish to insert a scalable vector which |
9759 | // can't be re-expressed as a larger type. Just choose the slow path and |
9760 | // extend to a larger type, then truncate back down. |
9761 | MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); |
9762 | MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); |
9763 | Vec = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtVecVT, Operand: Vec); |
9764 | SubVec = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtSubVecVT, Operand: SubVec); |
9765 | Vec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ExtVecVT, N1: Vec, N2: SubVec, |
9766 | N3: Op.getOperand(i: 2)); |
9767 | SDValue SplatZero = DAG.getConstant(Val: 0, DL, VT: ExtVecVT); |
9768 | return DAG.getSetCC(DL, VT: VecVT, LHS: Vec, RHS: SplatZero, Cond: ISD::SETNE); |
9769 | } |
9770 | } |
9771 | |
9772 | // If the subvector vector is a fixed-length type, we cannot use subregister |
9773 | // manipulation to simplify the codegen; we don't know which register of a |
9774 | // LMUL group contains the specific subvector as we only know the minimum |
9775 | // register size. Therefore we must slide the vector group up the full |
9776 | // amount. |
9777 | if (SubVecVT.isFixedLengthVector()) { |
9778 | if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector()) |
9779 | return Op; |
9780 | MVT ContainerVT = VecVT; |
9781 | if (VecVT.isFixedLengthVector()) { |
9782 | ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
9783 | Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget); |
9784 | } |
9785 | |
9786 | if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) { |
9787 | SubVec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ContainerVT, |
9788 | N1: DAG.getUNDEF(VT: ContainerVT), N2: SubVec, |
9789 | N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
9790 | SubVec = convertFromScalableVector(VT: VecVT, V: SubVec, DAG, Subtarget); |
9791 | return DAG.getBitcast(VT: Op.getValueType(), V: SubVec); |
9792 | } |
9793 | |
9794 | SubVec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ContainerVT, |
9795 | N1: DAG.getUNDEF(VT: ContainerVT), N2: SubVec, |
9796 | N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
9797 | SDValue Mask = |
9798 | getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; |
9799 | // Set the vector length to only the number of elements we care about. Note |
9800 | // that for slideup this includes the offset. |
9801 | unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements(); |
9802 | SDValue VL = getVLOp(NumElts: EndIndex, ContainerVT, DL, DAG, Subtarget); |
9803 | |
9804 | // Use tail agnostic policy if we're inserting over Vec's tail. |
9805 | unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; |
9806 | if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements()) |
9807 | Policy = RISCVII::TAIL_AGNOSTIC; |
9808 | |
9809 | // If we're inserting into the lowest elements, use a tail undisturbed |
9810 | // vmv.v.v. |
9811 | if (OrigIdx == 0) { |
9812 | SubVec = |
9813 | DAG.getNode(Opcode: RISCVISD::VMV_V_V_VL, DL, VT: ContainerVT, N1: Vec, N2: SubVec, N3: VL); |
9814 | } else { |
9815 | SDValue SlideupAmt = DAG.getConstant(Val: OrigIdx, DL, VT: XLenVT); |
9816 | SubVec = getVSlideup(DAG, Subtarget, DL, VT: ContainerVT, Merge: Vec, Op: SubVec, |
9817 | Offset: SlideupAmt, Mask, VL, Policy); |
9818 | } |
9819 | |
9820 | if (VecVT.isFixedLengthVector()) |
9821 | SubVec = convertFromScalableVector(VT: VecVT, V: SubVec, DAG, Subtarget); |
9822 | return DAG.getBitcast(VT: Op.getValueType(), V: SubVec); |
9823 | } |
9824 | |
9825 | unsigned SubRegIdx, RemIdx; |
9826 | std::tie(args&: SubRegIdx, args&: RemIdx) = |
9827 | RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( |
9828 | VecVT, SubVecVT, InsertExtractIdx: OrigIdx, TRI); |
9829 | |
9830 | RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(VT: SubVecVT); |
9831 | bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || |
9832 | SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || |
9833 | SubVecLMUL == RISCVII::VLMUL::LMUL_F8; |
9834 | |
9835 | // 1. If the Idx has been completely eliminated and this subvector's size is |
9836 | // a vector register or a multiple thereof, or the surrounding elements are |
9837 | // undef, then this is a subvector insert which naturally aligns to a vector |
9838 | // register. These can easily be handled using subregister manipulation. |
9839 | // 2. If the subvector is smaller than a vector register, then the insertion |
9840 | // must preserve the undisturbed elements of the register. We do this by |
9841 | // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type |
9842 | // (which resolves to a subregister copy), performing a VSLIDEUP to place the |
9843 | // subvector within the vector register, and an INSERT_SUBVECTOR of that |
9844 | // LMUL=1 type back into the larger vector (resolving to another subregister |
9845 | // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type |
9846 | // to avoid allocating a large register group to hold our subvector. |
9847 | if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef())) |
9848 | return Op; |
9849 | |
9850 | // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements |
9851 | // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy |
9852 | // (in our case undisturbed). This means we can set up a subvector insertion |
9853 | // where OFFSET is the insertion offset, and the VL is the OFFSET plus the |
9854 | // size of the subvector. |
9855 | MVT InterSubVT = VecVT; |
9856 | SDValue = Vec; |
9857 | unsigned AlignedIdx = OrigIdx - RemIdx; |
9858 | if (VecVT.bitsGT(VT: getLMUL1VT(VT: VecVT))) { |
9859 | InterSubVT = getLMUL1VT(VT: VecVT); |
9860 | // Extract a subvector equal to the nearest full vector register type. This |
9861 | // should resolve to a EXTRACT_SUBREG instruction. |
9862 | AlignedExtract = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: InterSubVT, N1: Vec, |
9863 | N2: DAG.getVectorIdxConstant(Val: AlignedIdx, DL)); |
9864 | } |
9865 | |
9866 | SubVec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: InterSubVT, |
9867 | N1: DAG.getUNDEF(VT: InterSubVT), N2: SubVec, |
9868 | N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
9869 | |
9870 | auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); |
9871 | |
9872 | ElementCount EndIndex = |
9873 | ElementCount::getScalable(MinVal: RemIdx) + SubVecVT.getVectorElementCount(); |
9874 | VL = computeVLMax(VecVT: SubVecVT, DL, DAG); |
9875 | |
9876 | // Use tail agnostic policy if we're inserting over InterSubVT's tail. |
9877 | unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; |
9878 | if (EndIndex == InterSubVT.getVectorElementCount()) |
9879 | Policy = RISCVII::TAIL_AGNOSTIC; |
9880 | |
9881 | // If we're inserting into the lowest elements, use a tail undisturbed |
9882 | // vmv.v.v. |
9883 | if (RemIdx == 0) { |
9884 | SubVec = DAG.getNode(Opcode: RISCVISD::VMV_V_V_VL, DL, VT: InterSubVT, N1: AlignedExtract, |
9885 | N2: SubVec, N3: VL); |
9886 | } else { |
9887 | SDValue SlideupAmt = |
9888 | DAG.getVScale(DL, VT: XLenVT, MulImm: APInt(XLenVT.getSizeInBits(), RemIdx)); |
9889 | |
9890 | // Construct the vector length corresponding to RemIdx + length(SubVecVT). |
9891 | VL = DAG.getNode(Opcode: ISD::ADD, DL, VT: XLenVT, N1: SlideupAmt, N2: VL); |
9892 | |
9893 | SubVec = getVSlideup(DAG, Subtarget, DL, VT: InterSubVT, Merge: AlignedExtract, Op: SubVec, |
9894 | Offset: SlideupAmt, Mask, VL, Policy); |
9895 | } |
9896 | |
9897 | // If required, insert this subvector back into the correct vector register. |
9898 | // This should resolve to an INSERT_SUBREG instruction. |
9899 | if (VecVT.bitsGT(VT: InterSubVT)) |
9900 | SubVec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: VecVT, N1: Vec, N2: SubVec, |
9901 | N3: DAG.getVectorIdxConstant(Val: AlignedIdx, DL)); |
9902 | |
9903 | // We might have bitcast from a mask type: cast back to the original type if |
9904 | // required. |
9905 | return DAG.getBitcast(VT: Op.getSimpleValueType(), V: SubVec); |
9906 | } |
9907 | |
9908 | SDValue RISCVTargetLowering::(SDValue Op, |
9909 | SelectionDAG &DAG) const { |
9910 | SDValue Vec = Op.getOperand(i: 0); |
9911 | MVT SubVecVT = Op.getSimpleValueType(); |
9912 | MVT VecVT = Vec.getSimpleValueType(); |
9913 | |
9914 | SDLoc DL(Op); |
9915 | MVT XLenVT = Subtarget.getXLenVT(); |
9916 | unsigned OrigIdx = Op.getConstantOperandVal(i: 1); |
9917 | const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
9918 | |
9919 | // We don't have the ability to slide mask vectors down indexed by their i1 |
9920 | // elements; the smallest we can do is i8. Often we are able to bitcast to |
9921 | // equivalent i8 vectors. Note that when extracting a fixed-length vector |
9922 | // from a scalable one, we might not necessarily have enough scalable |
9923 | // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid. |
9924 | if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) { |
9925 | if (VecVT.getVectorMinNumElements() >= 8 && |
9926 | SubVecVT.getVectorMinNumElements() >= 8) { |
9927 | assert(OrigIdx % 8 == 0 && "Invalid index" ); |
9928 | assert(VecVT.getVectorMinNumElements() % 8 == 0 && |
9929 | SubVecVT.getVectorMinNumElements() % 8 == 0 && |
9930 | "Unexpected mask vector lowering" ); |
9931 | OrigIdx /= 8; |
9932 | SubVecVT = |
9933 | MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, |
9934 | SubVecVT.isScalableVector()); |
9935 | VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, |
9936 | VecVT.isScalableVector()); |
9937 | Vec = DAG.getBitcast(VT: VecVT, V: Vec); |
9938 | } else { |
9939 | // We can't slide this mask vector down, indexed by its i1 elements. |
9940 | // This poses a problem when we wish to extract a scalable vector which |
9941 | // can't be re-expressed as a larger type. Just choose the slow path and |
9942 | // extend to a larger type, then truncate back down. |
9943 | // TODO: We could probably improve this when extracting certain fixed |
9944 | // from fixed, where we can extract as i8 and shift the correct element |
9945 | // right to reach the desired subvector? |
9946 | MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); |
9947 | MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); |
9948 | Vec = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtVecVT, Operand: Vec); |
9949 | Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ExtSubVecVT, N1: Vec, |
9950 | N2: Op.getOperand(i: 1)); |
9951 | SDValue SplatZero = DAG.getConstant(Val: 0, DL, VT: ExtSubVecVT); |
9952 | return DAG.getSetCC(DL, VT: SubVecVT, LHS: Vec, RHS: SplatZero, Cond: ISD::SETNE); |
9953 | } |
9954 | } |
9955 | |
9956 | // With an index of 0 this is a cast-like subvector, which can be performed |
9957 | // with subregister operations. |
9958 | if (OrigIdx == 0) |
9959 | return Op; |
9960 | |
9961 | const auto VLen = Subtarget.getRealVLen(); |
9962 | |
9963 | // If the subvector vector is a fixed-length type and we don't know VLEN |
9964 | // exactly, we cannot use subregister manipulation to simplify the codegen; we |
9965 | // don't know which register of a LMUL group contains the specific subvector |
9966 | // as we only know the minimum register size. Therefore we must slide the |
9967 | // vector group down the full amount. |
9968 | if (SubVecVT.isFixedLengthVector() && !VLen) { |
9969 | MVT ContainerVT = VecVT; |
9970 | if (VecVT.isFixedLengthVector()) { |
9971 | ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
9972 | Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget); |
9973 | } |
9974 | |
9975 | // Shrink down Vec so we're performing the slidedown on a smaller LMUL. |
9976 | unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1; |
9977 | if (auto ShrunkVT = |
9978 | getSmallestVTForIndex(VecVT: ContainerVT, MaxIdx: LastIdx, DL, DAG, Subtarget)) { |
9979 | ContainerVT = *ShrunkVT; |
9980 | Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ContainerVT, N1: Vec, |
9981 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
9982 | } |
9983 | |
9984 | SDValue Mask = |
9985 | getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; |
9986 | // Set the vector length to only the number of elements we care about. This |
9987 | // avoids sliding down elements we're going to discard straight away. |
9988 | SDValue VL = getVLOp(NumElts: SubVecVT.getVectorNumElements(), ContainerVT, DL, DAG, |
9989 | Subtarget); |
9990 | SDValue SlidedownAmt = DAG.getConstant(Val: OrigIdx, DL, VT: XLenVT); |
9991 | SDValue Slidedown = |
9992 | getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, |
9993 | Merge: DAG.getUNDEF(VT: ContainerVT), Op: Vec, Offset: SlidedownAmt, Mask, VL); |
9994 | // Now we can use a cast-like subvector extract to get the result. |
9995 | Slidedown = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: SubVecVT, N1: Slidedown, |
9996 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
9997 | return DAG.getBitcast(VT: Op.getValueType(), V: Slidedown); |
9998 | } |
9999 | |
10000 | if (VecVT.isFixedLengthVector()) { |
10001 | VecVT = getContainerForFixedLengthVector(VT: VecVT); |
10002 | Vec = convertToScalableVector(VT: VecVT, V: Vec, DAG, Subtarget); |
10003 | } |
10004 | |
10005 | MVT ContainerSubVecVT = SubVecVT; |
10006 | if (SubVecVT.isFixedLengthVector()) |
10007 | ContainerSubVecVT = getContainerForFixedLengthVector(VT: SubVecVT); |
10008 | |
10009 | unsigned SubRegIdx; |
10010 | ElementCount RemIdx; |
10011 | // extract_subvector scales the index by vscale if the subvector is scalable, |
10012 | // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if |
10013 | // we have a fixed length subvector, we need to adjust the index by 1/vscale. |
10014 | if (SubVecVT.isFixedLengthVector()) { |
10015 | assert(VLen); |
10016 | unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock; |
10017 | auto Decompose = |
10018 | RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( |
10019 | VecVT, SubVecVT: ContainerSubVecVT, InsertExtractIdx: OrigIdx / Vscale, TRI); |
10020 | SubRegIdx = Decompose.first; |
10021 | RemIdx = ElementCount::getFixed(MinVal: (Decompose.second * Vscale) + |
10022 | (OrigIdx % Vscale)); |
10023 | } else { |
10024 | auto Decompose = |
10025 | RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( |
10026 | VecVT, SubVecVT: ContainerSubVecVT, InsertExtractIdx: OrigIdx, TRI); |
10027 | SubRegIdx = Decompose.first; |
10028 | RemIdx = ElementCount::getScalable(MinVal: Decompose.second); |
10029 | } |
10030 | |
10031 | // If the Idx has been completely eliminated then this is a subvector extract |
10032 | // which naturally aligns to a vector register. These can easily be handled |
10033 | // using subregister manipulation. |
10034 | if (RemIdx.isZero()) { |
10035 | if (SubVecVT.isFixedLengthVector()) { |
10036 | Vec = DAG.getTargetExtractSubreg(SRIdx: SubRegIdx, DL, VT: ContainerSubVecVT, Operand: Vec); |
10037 | return convertFromScalableVector(VT: SubVecVT, V: Vec, DAG, Subtarget); |
10038 | } |
10039 | return Op; |
10040 | } |
10041 | |
10042 | // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT |
10043 | // was > M1 then the index would need to be a multiple of VLMAX, and so would |
10044 | // divide exactly. |
10045 | assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second || |
10046 | getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1); |
10047 | |
10048 | // If the vector type is an LMUL-group type, extract a subvector equal to the |
10049 | // nearest full vector register type. |
10050 | MVT InterSubVT = VecVT; |
10051 | if (VecVT.bitsGT(VT: getLMUL1VT(VT: VecVT))) { |
10052 | // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and |
10053 | // we should have successfully decomposed the extract into a subregister. |
10054 | assert(SubRegIdx != RISCV::NoSubRegister); |
10055 | InterSubVT = getLMUL1VT(VT: VecVT); |
10056 | Vec = DAG.getTargetExtractSubreg(SRIdx: SubRegIdx, DL, VT: InterSubVT, Operand: Vec); |
10057 | } |
10058 | |
10059 | // Slide this vector register down by the desired number of elements in order |
10060 | // to place the desired subvector starting at element 0. |
10061 | SDValue SlidedownAmt = DAG.getElementCount(DL, VT: XLenVT, EC: RemIdx); |
10062 | auto [Mask, VL] = getDefaultScalableVLOps(VecVT: InterSubVT, DL, DAG, Subtarget); |
10063 | if (SubVecVT.isFixedLengthVector()) |
10064 | VL = getVLOp(NumElts: SubVecVT.getVectorNumElements(), ContainerVT: InterSubVT, DL, DAG, |
10065 | Subtarget); |
10066 | SDValue Slidedown = |
10067 | getVSlidedown(DAG, Subtarget, DL, VT: InterSubVT, Merge: DAG.getUNDEF(VT: InterSubVT), |
10068 | Op: Vec, Offset: SlidedownAmt, Mask, VL); |
10069 | |
10070 | // Now the vector is in the right position, extract our final subvector. This |
10071 | // should resolve to a COPY. |
10072 | Slidedown = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: SubVecVT, N1: Slidedown, |
10073 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
10074 | |
10075 | // We might have bitcast from a mask type: cast back to the original type if |
10076 | // required. |
10077 | return DAG.getBitcast(VT: Op.getSimpleValueType(), V: Slidedown); |
10078 | } |
10079 | |
10080 | // Widen a vector's operands to i8, then truncate its results back to the |
10081 | // original type, typically i1. All operand and result types must be the same. |
10082 | static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, |
10083 | SelectionDAG &DAG) { |
10084 | MVT VT = N.getSimpleValueType(); |
10085 | MVT WideVT = VT.changeVectorElementType(MVT::i8); |
10086 | SmallVector<SDValue, 4> WideOps; |
10087 | for (SDValue Op : N->ops()) { |
10088 | assert(Op.getSimpleValueType() == VT && |
10089 | "Operands and result must be same type" ); |
10090 | WideOps.push_back(Elt: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WideVT, Operand: Op)); |
10091 | } |
10092 | |
10093 | unsigned NumVals = N->getNumValues(); |
10094 | |
10095 | SDVTList VTs = DAG.getVTList(SmallVector<EVT, 4>( |
10096 | NumVals, N.getValueType().changeVectorElementType(MVT::i8))); |
10097 | SDValue WideN = DAG.getNode(Opcode: N.getOpcode(), DL, VTList: VTs, Ops: WideOps); |
10098 | SmallVector<SDValue, 4> TruncVals; |
10099 | for (unsigned I = 0; I < NumVals; I++) { |
10100 | TruncVals.push_back( |
10101 | Elt: DAG.getSetCC(DL, VT: N->getSimpleValueType(ResNo: I), LHS: WideN.getValue(R: I), |
10102 | RHS: DAG.getConstant(Val: 0, DL, VT: WideVT), Cond: ISD::SETNE)); |
10103 | } |
10104 | |
10105 | if (TruncVals.size() > 1) |
10106 | return DAG.getMergeValues(Ops: TruncVals, dl: DL); |
10107 | return TruncVals.front(); |
10108 | } |
10109 | |
10110 | SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op, |
10111 | SelectionDAG &DAG) const { |
10112 | SDLoc DL(Op); |
10113 | MVT VecVT = Op.getSimpleValueType(); |
10114 | |
10115 | assert(VecVT.isScalableVector() && |
10116 | "vector_interleave on non-scalable vector!" ); |
10117 | |
10118 | // 1 bit element vectors need to be widened to e8 |
10119 | if (VecVT.getVectorElementType() == MVT::i1) |
10120 | return widenVectorOpsToi8(N: Op, DL, DAG); |
10121 | |
10122 | // If the VT is LMUL=8, we need to split and reassemble. |
10123 | if (VecVT.getSizeInBits().getKnownMinValue() == |
10124 | (8 * RISCV::RVVBitsPerBlock)) { |
10125 | auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(N: Op.getNode(), OpNo: 0); |
10126 | auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(N: Op.getNode(), OpNo: 1); |
10127 | EVT SplitVT = Op0Lo.getValueType(); |
10128 | |
10129 | SDValue ResLo = DAG.getNode(Opcode: ISD::VECTOR_DEINTERLEAVE, DL, |
10130 | VTList: DAG.getVTList(VT1: SplitVT, VT2: SplitVT), N1: Op0Lo, N2: Op0Hi); |
10131 | SDValue ResHi = DAG.getNode(Opcode: ISD::VECTOR_DEINTERLEAVE, DL, |
10132 | VTList: DAG.getVTList(VT1: SplitVT, VT2: SplitVT), N1: Op1Lo, N2: Op1Hi); |
10133 | |
10134 | SDValue Even = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: VecVT, |
10135 | N1: ResLo.getValue(R: 0), N2: ResHi.getValue(R: 0)); |
10136 | SDValue Odd = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: VecVT, N1: ResLo.getValue(R: 1), |
10137 | N2: ResHi.getValue(R: 1)); |
10138 | return DAG.getMergeValues(Ops: {Even, Odd}, dl: DL); |
10139 | } |
10140 | |
10141 | // Concatenate the two vectors as one vector to deinterleave |
10142 | MVT ConcatVT = |
10143 | MVT::getVectorVT(VT: VecVT.getVectorElementType(), |
10144 | EC: VecVT.getVectorElementCount().multiplyCoefficientBy(RHS: 2)); |
10145 | SDValue Concat = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: ConcatVT, |
10146 | N1: Op.getOperand(i: 0), N2: Op.getOperand(i: 1)); |
10147 | |
10148 | // We want to operate on all lanes, so get the mask and VL and mask for it |
10149 | auto [Mask, VL] = getDefaultScalableVLOps(VecVT: ConcatVT, DL, DAG, Subtarget); |
10150 | SDValue Passthru = DAG.getUNDEF(VT: ConcatVT); |
10151 | |
10152 | // We can deinterleave through vnsrl.wi if the element type is smaller than |
10153 | // ELEN |
10154 | if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) { |
10155 | SDValue Even = |
10156 | getDeinterleaveViaVNSRL(DL, VT: VecVT, Src: Concat, EvenElts: true, Subtarget, DAG); |
10157 | SDValue Odd = |
10158 | getDeinterleaveViaVNSRL(DL, VT: VecVT, Src: Concat, EvenElts: false, Subtarget, DAG); |
10159 | return DAG.getMergeValues(Ops: {Even, Odd}, dl: DL); |
10160 | } |
10161 | |
10162 | // For the indices, use the same SEW to avoid an extra vsetvli |
10163 | MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger(); |
10164 | // Create a vector of even indices {0, 2, 4, ...} |
10165 | SDValue EvenIdx = |
10166 | DAG.getStepVector(DL, ResVT: IdxVT, StepVal: APInt(IdxVT.getScalarSizeInBits(), 2)); |
10167 | // Create a vector of odd indices {1, 3, 5, ... } |
10168 | SDValue OddIdx = |
10169 | DAG.getNode(Opcode: ISD::ADD, DL, VT: IdxVT, N1: EvenIdx, N2: DAG.getConstant(Val: 1, DL, VT: IdxVT)); |
10170 | |
10171 | // Gather the even and odd elements into two separate vectors |
10172 | SDValue EvenWide = DAG.getNode(Opcode: RISCVISD::VRGATHER_VV_VL, DL, VT: ConcatVT, |
10173 | N1: Concat, N2: EvenIdx, N3: Passthru, N4: Mask, N5: VL); |
10174 | SDValue OddWide = DAG.getNode(Opcode: RISCVISD::VRGATHER_VV_VL, DL, VT: ConcatVT, |
10175 | N1: Concat, N2: OddIdx, N3: Passthru, N4: Mask, N5: VL); |
10176 | |
10177 | // Extract the result half of the gather for even and odd |
10178 | SDValue Even = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: VecVT, N1: EvenWide, |
10179 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
10180 | SDValue Odd = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: VecVT, N1: OddWide, |
10181 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
10182 | |
10183 | return DAG.getMergeValues(Ops: {Even, Odd}, dl: DL); |
10184 | } |
10185 | |
10186 | SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op, |
10187 | SelectionDAG &DAG) const { |
10188 | SDLoc DL(Op); |
10189 | MVT VecVT = Op.getSimpleValueType(); |
10190 | |
10191 | assert(VecVT.isScalableVector() && |
10192 | "vector_interleave on non-scalable vector!" ); |
10193 | |
10194 | // i1 vectors need to be widened to i8 |
10195 | if (VecVT.getVectorElementType() == MVT::i1) |
10196 | return widenVectorOpsToi8(N: Op, DL, DAG); |
10197 | |
10198 | MVT XLenVT = Subtarget.getXLenVT(); |
10199 | SDValue VL = DAG.getRegister(RISCV::X0, XLenVT); |
10200 | |
10201 | // If the VT is LMUL=8, we need to split and reassemble. |
10202 | if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) { |
10203 | auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(N: Op.getNode(), OpNo: 0); |
10204 | auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(N: Op.getNode(), OpNo: 1); |
10205 | EVT SplitVT = Op0Lo.getValueType(); |
10206 | |
10207 | SDValue ResLo = DAG.getNode(Opcode: ISD::VECTOR_INTERLEAVE, DL, |
10208 | VTList: DAG.getVTList(VT1: SplitVT, VT2: SplitVT), N1: Op0Lo, N2: Op1Lo); |
10209 | SDValue ResHi = DAG.getNode(Opcode: ISD::VECTOR_INTERLEAVE, DL, |
10210 | VTList: DAG.getVTList(VT1: SplitVT, VT2: SplitVT), N1: Op0Hi, N2: Op1Hi); |
10211 | |
10212 | SDValue Lo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: VecVT, |
10213 | N1: ResLo.getValue(R: 0), N2: ResLo.getValue(R: 1)); |
10214 | SDValue Hi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: VecVT, |
10215 | N1: ResHi.getValue(R: 0), N2: ResHi.getValue(R: 1)); |
10216 | return DAG.getMergeValues(Ops: {Lo, Hi}, dl: DL); |
10217 | } |
10218 | |
10219 | SDValue Interleaved; |
10220 | |
10221 | // If the element type is smaller than ELEN, then we can interleave with |
10222 | // vwaddu.vv and vwmaccu.vx |
10223 | if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) { |
10224 | Interleaved = getWideningInterleave(EvenV: Op.getOperand(i: 0), OddV: Op.getOperand(i: 1), DL, |
10225 | DAG, Subtarget); |
10226 | } else { |
10227 | // Otherwise, fallback to using vrgathere16.vv |
10228 | MVT ConcatVT = |
10229 | MVT::getVectorVT(VT: VecVT.getVectorElementType(), |
10230 | EC: VecVT.getVectorElementCount().multiplyCoefficientBy(RHS: 2)); |
10231 | SDValue Concat = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: ConcatVT, |
10232 | N1: Op.getOperand(i: 0), N2: Op.getOperand(i: 1)); |
10233 | |
10234 | MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16); |
10235 | |
10236 | // 0 1 2 3 4 5 6 7 ... |
10237 | SDValue StepVec = DAG.getStepVector(DL, ResVT: IdxVT); |
10238 | |
10239 | // 1 1 1 1 1 1 1 1 ... |
10240 | SDValue Ones = DAG.getSplatVector(VT: IdxVT, DL, Op: DAG.getConstant(Val: 1, DL, VT: XLenVT)); |
10241 | |
10242 | // 1 0 1 0 1 0 1 0 ... |
10243 | SDValue OddMask = DAG.getNode(Opcode: ISD::AND, DL, VT: IdxVT, N1: StepVec, N2: Ones); |
10244 | OddMask = DAG.getSetCC( |
10245 | DL, IdxVT.changeVectorElementType(MVT::i1), OddMask, |
10246 | DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)), |
10247 | ISD::CondCode::SETNE); |
10248 | |
10249 | SDValue VLMax = DAG.getSplatVector(VT: IdxVT, DL, Op: computeVLMax(VecVT, DL, DAG)); |
10250 | |
10251 | // Build up the index vector for interleaving the concatenated vector |
10252 | // 0 0 1 1 2 2 3 3 ... |
10253 | SDValue Idx = DAG.getNode(Opcode: ISD::SRL, DL, VT: IdxVT, N1: StepVec, N2: Ones); |
10254 | // 0 n 1 n+1 2 n+2 3 n+3 ... |
10255 | Idx = |
10256 | DAG.getNode(Opcode: RISCVISD::ADD_VL, DL, VT: IdxVT, N1: Idx, N2: VLMax, N3: Idx, N4: OddMask, N5: VL); |
10257 | |
10258 | // Then perform the interleave |
10259 | // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ... |
10260 | SDValue TrueMask = getAllOnesMask(VecVT: IdxVT, VL, DL, DAG); |
10261 | Interleaved = DAG.getNode(Opcode: RISCVISD::VRGATHEREI16_VV_VL, DL, VT: ConcatVT, |
10262 | N1: Concat, N2: Idx, N3: DAG.getUNDEF(VT: ConcatVT), N4: TrueMask, N5: VL); |
10263 | } |
10264 | |
10265 | // Extract the two halves from the interleaved result |
10266 | SDValue Lo = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: VecVT, N1: Interleaved, |
10267 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
10268 | SDValue Hi = DAG.getNode( |
10269 | Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: VecVT, N1: Interleaved, |
10270 | N2: DAG.getVectorIdxConstant(Val: VecVT.getVectorMinNumElements(), DL)); |
10271 | |
10272 | return DAG.getMergeValues(Ops: {Lo, Hi}, dl: DL); |
10273 | } |
10274 | |
10275 | // Lower step_vector to the vid instruction. Any non-identity step value must |
10276 | // be accounted for my manual expansion. |
10277 | SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op, |
10278 | SelectionDAG &DAG) const { |
10279 | SDLoc DL(Op); |
10280 | MVT VT = Op.getSimpleValueType(); |
10281 | assert(VT.isScalableVector() && "Expected scalable vector" ); |
10282 | MVT XLenVT = Subtarget.getXLenVT(); |
10283 | auto [Mask, VL] = getDefaultScalableVLOps(VecVT: VT, DL, DAG, Subtarget); |
10284 | SDValue StepVec = DAG.getNode(Opcode: RISCVISD::VID_VL, DL, VT, N1: Mask, N2: VL); |
10285 | uint64_t StepValImm = Op.getConstantOperandVal(i: 0); |
10286 | if (StepValImm != 1) { |
10287 | if (isPowerOf2_64(Value: StepValImm)) { |
10288 | SDValue StepVal = |
10289 | DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: DAG.getUNDEF(VT), |
10290 | N2: DAG.getConstant(Val: Log2_64(Value: StepValImm), DL, VT: XLenVT), N3: VL); |
10291 | StepVec = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: StepVec, N2: StepVal); |
10292 | } else { |
10293 | SDValue StepVal = lowerScalarSplat( |
10294 | Passthru: SDValue(), Scalar: DAG.getConstant(Val: StepValImm, DL, VT: VT.getVectorElementType()), |
10295 | VL, VT, DL, DAG, Subtarget); |
10296 | StepVec = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: StepVec, N2: StepVal); |
10297 | } |
10298 | } |
10299 | return StepVec; |
10300 | } |
10301 | |
10302 | // Implement vector_reverse using vrgather.vv with indices determined by |
10303 | // subtracting the id of each element from (VLMAX-1). This will convert |
10304 | // the indices like so: |
10305 | // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0). |
10306 | // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. |
10307 | SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op, |
10308 | SelectionDAG &DAG) const { |
10309 | SDLoc DL(Op); |
10310 | MVT VecVT = Op.getSimpleValueType(); |
10311 | if (VecVT.getVectorElementType() == MVT::i1) { |
10312 | MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); |
10313 | SDValue Op1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WidenVT, Operand: Op.getOperand(i: 0)); |
10314 | SDValue Op2 = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: WidenVT, Operand: Op1); |
10315 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VecVT, Operand: Op2); |
10316 | } |
10317 | unsigned EltSize = VecVT.getScalarSizeInBits(); |
10318 | unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue(); |
10319 | unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); |
10320 | unsigned MaxVLMAX = |
10321 | RISCVTargetLowering::computeVLMAX(VectorBits: VectorBitsMax, EltSize, MinSize); |
10322 | |
10323 | unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; |
10324 | MVT IntVT = VecVT.changeVectorElementTypeToInteger(); |
10325 | |
10326 | // If this is SEW=8 and VLMAX is potentially more than 256, we need |
10327 | // to use vrgatherei16.vv. |
10328 | // TODO: It's also possible to use vrgatherei16.vv for other types to |
10329 | // decrease register width for the index calculation. |
10330 | if (MaxVLMAX > 256 && EltSize == 8) { |
10331 | // If this is LMUL=8, we have to split before can use vrgatherei16.vv. |
10332 | // Reverse each half, then reassemble them in reverse order. |
10333 | // NOTE: It's also possible that after splitting that VLMAX no longer |
10334 | // requires vrgatherei16.vv. |
10335 | if (MinSize == (8 * RISCV::RVVBitsPerBlock)) { |
10336 | auto [Lo, Hi] = DAG.SplitVectorOperand(N: Op.getNode(), OpNo: 0); |
10337 | auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: VecVT); |
10338 | Lo = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: LoVT, Operand: Lo); |
10339 | Hi = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: HiVT, Operand: Hi); |
10340 | // Reassemble the low and high pieces reversed. |
10341 | // FIXME: This is a CONCAT_VECTORS. |
10342 | SDValue Res = |
10343 | DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: VecVT, N1: DAG.getUNDEF(VT: VecVT), N2: Hi, |
10344 | N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
10345 | return DAG.getNode( |
10346 | Opcode: ISD::INSERT_SUBVECTOR, DL, VT: VecVT, N1: Res, N2: Lo, |
10347 | N3: DAG.getVectorIdxConstant(Val: LoVT.getVectorMinNumElements(), DL)); |
10348 | } |
10349 | |
10350 | // Just promote the int type to i16 which will double the LMUL. |
10351 | IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount()); |
10352 | GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; |
10353 | } |
10354 | |
10355 | MVT XLenVT = Subtarget.getXLenVT(); |
10356 | auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); |
10357 | |
10358 | // Calculate VLMAX-1 for the desired SEW. |
10359 | SDValue VLMinus1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, |
10360 | N1: computeVLMax(VecVT, DL, DAG), |
10361 | N2: DAG.getConstant(Val: 1, DL, VT: XLenVT)); |
10362 | |
10363 | // Splat VLMAX-1 taking care to handle SEW==64 on RV32. |
10364 | bool IsRV32E64 = |
10365 | !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64; |
10366 | SDValue SplatVL; |
10367 | if (!IsRV32E64) |
10368 | SplatVL = DAG.getSplatVector(VT: IntVT, DL, Op: VLMinus1); |
10369 | else |
10370 | SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT), |
10371 | VLMinus1, DAG.getRegister(RISCV::X0, XLenVT)); |
10372 | |
10373 | SDValue VID = DAG.getNode(Opcode: RISCVISD::VID_VL, DL, VT: IntVT, N1: Mask, N2: VL); |
10374 | SDValue Indices = DAG.getNode(Opcode: RISCVISD::SUB_VL, DL, VT: IntVT, N1: SplatVL, N2: VID, |
10375 | N3: DAG.getUNDEF(VT: IntVT), N4: Mask, N5: VL); |
10376 | |
10377 | return DAG.getNode(Opcode: GatherOpc, DL, VT: VecVT, N1: Op.getOperand(i: 0), N2: Indices, |
10378 | N3: DAG.getUNDEF(VT: VecVT), N4: Mask, N5: VL); |
10379 | } |
10380 | |
10381 | SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op, |
10382 | SelectionDAG &DAG) const { |
10383 | SDLoc DL(Op); |
10384 | SDValue V1 = Op.getOperand(i: 0); |
10385 | SDValue V2 = Op.getOperand(i: 1); |
10386 | MVT XLenVT = Subtarget.getXLenVT(); |
10387 | MVT VecVT = Op.getSimpleValueType(); |
10388 | |
10389 | SDValue VLMax = computeVLMax(VecVT, DL, DAG); |
10390 | |
10391 | int64_t ImmValue = cast<ConstantSDNode>(Val: Op.getOperand(i: 2))->getSExtValue(); |
10392 | SDValue DownOffset, UpOffset; |
10393 | if (ImmValue >= 0) { |
10394 | // The operand is a TargetConstant, we need to rebuild it as a regular |
10395 | // constant. |
10396 | DownOffset = DAG.getConstant(Val: ImmValue, DL, VT: XLenVT); |
10397 | UpOffset = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: VLMax, N2: DownOffset); |
10398 | } else { |
10399 | // The operand is a TargetConstant, we need to rebuild it as a regular |
10400 | // constant rather than negating the original operand. |
10401 | UpOffset = DAG.getConstant(Val: -ImmValue, DL, VT: XLenVT); |
10402 | DownOffset = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: VLMax, N2: UpOffset); |
10403 | } |
10404 | |
10405 | SDValue TrueMask = getAllOnesMask(VecVT, VL: VLMax, DL, DAG); |
10406 | |
10407 | SDValue SlideDown = |
10408 | getVSlidedown(DAG, Subtarget, DL, VT: VecVT, Merge: DAG.getUNDEF(VT: VecVT), Op: V1, |
10409 | Offset: DownOffset, Mask: TrueMask, VL: UpOffset); |
10410 | return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset, |
10411 | TrueMask, DAG.getRegister(RISCV::X0, XLenVT), |
10412 | RISCVII::TAIL_AGNOSTIC); |
10413 | } |
10414 | |
10415 | SDValue |
10416 | RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, |
10417 | SelectionDAG &DAG) const { |
10418 | SDLoc DL(Op); |
10419 | auto *Load = cast<LoadSDNode>(Val&: Op); |
10420 | |
10421 | assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), |
10422 | Load->getMemoryVT(), |
10423 | *Load->getMemOperand()) && |
10424 | "Expecting a correctly-aligned load" ); |
10425 | |
10426 | MVT VT = Op.getSimpleValueType(); |
10427 | MVT XLenVT = Subtarget.getXLenVT(); |
10428 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
10429 | |
10430 | // If we know the exact VLEN and our fixed length vector completely fills |
10431 | // the container, use a whole register load instead. |
10432 | const auto [MinVLMAX, MaxVLMAX] = |
10433 | RISCVTargetLowering::computeVLMAXBounds(VecVT: ContainerVT, Subtarget); |
10434 | if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() && |
10435 | getLMUL1VT(VT: ContainerVT).bitsLE(VT: ContainerVT)) { |
10436 | MachineMemOperand *MMO = Load->getMemOperand(); |
10437 | SDValue NewLoad = |
10438 | DAG.getLoad(VT: ContainerVT, dl: DL, Chain: Load->getChain(), Ptr: Load->getBasePtr(), |
10439 | PtrInfo: MMO->getPointerInfo(), Alignment: MMO->getBaseAlign(), MMOFlags: MMO->getFlags(), |
10440 | AAInfo: MMO->getAAInfo(), Ranges: MMO->getRanges()); |
10441 | SDValue Result = convertFromScalableVector(VT, V: NewLoad, DAG, Subtarget); |
10442 | return DAG.getMergeValues(Ops: {Result, NewLoad.getValue(R: 1)}, dl: DL); |
10443 | } |
10444 | |
10445 | SDValue VL = getVLOp(NumElts: VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget); |
10446 | |
10447 | bool IsMaskOp = VT.getVectorElementType() == MVT::i1; |
10448 | SDValue IntID = DAG.getTargetConstant( |
10449 | IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT); |
10450 | SmallVector<SDValue, 4> Ops{Load->getChain(), IntID}; |
10451 | if (!IsMaskOp) |
10452 | Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT)); |
10453 | Ops.push_back(Elt: Load->getBasePtr()); |
10454 | Ops.push_back(Elt: VL); |
10455 | SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); |
10456 | SDValue NewLoad = |
10457 | DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops, |
10458 | MemVT: Load->getMemoryVT(), MMO: Load->getMemOperand()); |
10459 | |
10460 | SDValue Result = convertFromScalableVector(VT, V: NewLoad, DAG, Subtarget); |
10461 | return DAG.getMergeValues(Ops: {Result, NewLoad.getValue(R: 1)}, dl: DL); |
10462 | } |
10463 | |
10464 | SDValue |
10465 | RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, |
10466 | SelectionDAG &DAG) const { |
10467 | SDLoc DL(Op); |
10468 | auto *Store = cast<StoreSDNode>(Val&: Op); |
10469 | |
10470 | assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), |
10471 | Store->getMemoryVT(), |
10472 | *Store->getMemOperand()) && |
10473 | "Expecting a correctly-aligned store" ); |
10474 | |
10475 | SDValue StoreVal = Store->getValue(); |
10476 | MVT VT = StoreVal.getSimpleValueType(); |
10477 | MVT XLenVT = Subtarget.getXLenVT(); |
10478 | |
10479 | // If the size less than a byte, we need to pad with zeros to make a byte. |
10480 | if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) { |
10481 | VT = MVT::v8i1; |
10482 | StoreVal = |
10483 | DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: DAG.getConstant(Val: 0, DL, VT), |
10484 | N2: StoreVal, N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
10485 | } |
10486 | |
10487 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
10488 | |
10489 | SDValue NewValue = |
10490 | convertToScalableVector(VT: ContainerVT, V: StoreVal, DAG, Subtarget); |
10491 | |
10492 | |
10493 | // If we know the exact VLEN and our fixed length vector completely fills |
10494 | // the container, use a whole register store instead. |
10495 | const auto [MinVLMAX, MaxVLMAX] = |
10496 | RISCVTargetLowering::computeVLMAXBounds(VecVT: ContainerVT, Subtarget); |
10497 | if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() && |
10498 | getLMUL1VT(VT: ContainerVT).bitsLE(VT: ContainerVT)) { |
10499 | MachineMemOperand *MMO = Store->getMemOperand(); |
10500 | return DAG.getStore(Chain: Store->getChain(), dl: DL, Val: NewValue, Ptr: Store->getBasePtr(), |
10501 | PtrInfo: MMO->getPointerInfo(), Alignment: MMO->getBaseAlign(), |
10502 | MMOFlags: MMO->getFlags(), AAInfo: MMO->getAAInfo()); |
10503 | } |
10504 | |
10505 | SDValue VL = getVLOp(NumElts: VT.getVectorNumElements(), ContainerVT, DL, DAG, |
10506 | Subtarget); |
10507 | |
10508 | bool IsMaskOp = VT.getVectorElementType() == MVT::i1; |
10509 | SDValue IntID = DAG.getTargetConstant( |
10510 | IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT); |
10511 | return DAG.getMemIntrinsicNode( |
10512 | ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), |
10513 | {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL}, |
10514 | Store->getMemoryVT(), Store->getMemOperand()); |
10515 | } |
10516 | |
10517 | SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op, |
10518 | SelectionDAG &DAG) const { |
10519 | SDLoc DL(Op); |
10520 | MVT VT = Op.getSimpleValueType(); |
10521 | |
10522 | const auto *MemSD = cast<MemSDNode>(Val&: Op); |
10523 | EVT MemVT = MemSD->getMemoryVT(); |
10524 | MachineMemOperand *MMO = MemSD->getMemOperand(); |
10525 | SDValue Chain = MemSD->getChain(); |
10526 | SDValue BasePtr = MemSD->getBasePtr(); |
10527 | |
10528 | SDValue Mask, PassThru, VL; |
10529 | if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Val&: Op)) { |
10530 | Mask = VPLoad->getMask(); |
10531 | PassThru = DAG.getUNDEF(VT); |
10532 | VL = VPLoad->getVectorLength(); |
10533 | } else { |
10534 | const auto *MLoad = cast<MaskedLoadSDNode>(Val&: Op); |
10535 | Mask = MLoad->getMask(); |
10536 | PassThru = MLoad->getPassThru(); |
10537 | } |
10538 | |
10539 | bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()); |
10540 | |
10541 | MVT XLenVT = Subtarget.getXLenVT(); |
10542 | |
10543 | MVT ContainerVT = VT; |
10544 | if (VT.isFixedLengthVector()) { |
10545 | ContainerVT = getContainerForFixedLengthVector(VT); |
10546 | PassThru = convertToScalableVector(VT: ContainerVT, V: PassThru, DAG, Subtarget); |
10547 | if (!IsUnmasked) { |
10548 | MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT); |
10549 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
10550 | } |
10551 | } |
10552 | |
10553 | if (!VL) |
10554 | VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second; |
10555 | |
10556 | unsigned IntID = |
10557 | IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask; |
10558 | SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(Val: IntID, DL, VT: XLenVT)}; |
10559 | if (IsUnmasked) |
10560 | Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT)); |
10561 | else |
10562 | Ops.push_back(Elt: PassThru); |
10563 | Ops.push_back(Elt: BasePtr); |
10564 | if (!IsUnmasked) |
10565 | Ops.push_back(Elt: Mask); |
10566 | Ops.push_back(Elt: VL); |
10567 | if (!IsUnmasked) |
10568 | Ops.push_back(Elt: DAG.getTargetConstant(Val: RISCVII::TAIL_AGNOSTIC, DL, VT: XLenVT)); |
10569 | |
10570 | SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); |
10571 | |
10572 | SDValue Result = |
10573 | DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops, MemVT, MMO); |
10574 | Chain = Result.getValue(R: 1); |
10575 | |
10576 | if (VT.isFixedLengthVector()) |
10577 | Result = convertFromScalableVector(VT, V: Result, DAG, Subtarget); |
10578 | |
10579 | return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL); |
10580 | } |
10581 | |
10582 | SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op, |
10583 | SelectionDAG &DAG) const { |
10584 | SDLoc DL(Op); |
10585 | |
10586 | const auto *MemSD = cast<MemSDNode>(Val&: Op); |
10587 | EVT MemVT = MemSD->getMemoryVT(); |
10588 | MachineMemOperand *MMO = MemSD->getMemOperand(); |
10589 | SDValue Chain = MemSD->getChain(); |
10590 | SDValue BasePtr = MemSD->getBasePtr(); |
10591 | SDValue Val, Mask, VL; |
10592 | |
10593 | bool IsCompressingStore = false; |
10594 | if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Val&: Op)) { |
10595 | Val = VPStore->getValue(); |
10596 | Mask = VPStore->getMask(); |
10597 | VL = VPStore->getVectorLength(); |
10598 | } else { |
10599 | const auto *MStore = cast<MaskedStoreSDNode>(Val&: Op); |
10600 | Val = MStore->getValue(); |
10601 | Mask = MStore->getMask(); |
10602 | IsCompressingStore = MStore->isCompressingStore(); |
10603 | } |
10604 | |
10605 | bool IsUnmasked = |
10606 | ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()) || IsCompressingStore; |
10607 | |
10608 | MVT VT = Val.getSimpleValueType(); |
10609 | MVT XLenVT = Subtarget.getXLenVT(); |
10610 | |
10611 | MVT ContainerVT = VT; |
10612 | if (VT.isFixedLengthVector()) { |
10613 | ContainerVT = getContainerForFixedLengthVector(VT); |
10614 | |
10615 | Val = convertToScalableVector(VT: ContainerVT, V: Val, DAG, Subtarget); |
10616 | if (!IsUnmasked || IsCompressingStore) { |
10617 | MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT); |
10618 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
10619 | } |
10620 | } |
10621 | |
10622 | if (!VL) |
10623 | VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second; |
10624 | |
10625 | if (IsCompressingStore) { |
10626 | Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT, |
10627 | DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT), |
10628 | DAG.getUNDEF(ContainerVT), Val, Mask, VL); |
10629 | VL = |
10630 | DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: Mask, |
10631 | N2: getAllOnesMask(VecVT: Mask.getSimpleValueType(), VL, DL, DAG), N3: VL); |
10632 | } |
10633 | |
10634 | unsigned IntID = |
10635 | IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask; |
10636 | SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(Val: IntID, DL, VT: XLenVT)}; |
10637 | Ops.push_back(Elt: Val); |
10638 | Ops.push_back(Elt: BasePtr); |
10639 | if (!IsUnmasked) |
10640 | Ops.push_back(Elt: Mask); |
10641 | Ops.push_back(Elt: VL); |
10642 | |
10643 | return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, |
10644 | DAG.getVTList(MVT::Other), Ops, MemVT, MMO); |
10645 | } |
10646 | |
10647 | SDValue |
10648 | RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op, |
10649 | SelectionDAG &DAG) const { |
10650 | MVT InVT = Op.getOperand(i: 0).getSimpleValueType(); |
10651 | MVT ContainerVT = getContainerForFixedLengthVector(VT: InVT); |
10652 | |
10653 | MVT VT = Op.getSimpleValueType(); |
10654 | |
10655 | SDValue Op1 = |
10656 | convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i: 0), DAG, Subtarget); |
10657 | SDValue Op2 = |
10658 | convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i: 1), DAG, Subtarget); |
10659 | |
10660 | SDLoc DL(Op); |
10661 | auto [Mask, VL] = getDefaultVLOps(NumElts: VT.getVectorNumElements(), ContainerVT, DL, |
10662 | DAG, Subtarget); |
10663 | MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT); |
10664 | |
10665 | SDValue Cmp = |
10666 | DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: MaskVT, |
10667 | Ops: {Op1, Op2, Op.getOperand(i: 2), DAG.getUNDEF(VT: MaskVT), Mask, VL}); |
10668 | |
10669 | return convertFromScalableVector(VT, V: Cmp, DAG, Subtarget); |
10670 | } |
10671 | |
10672 | SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op, |
10673 | SelectionDAG &DAG) const { |
10674 | unsigned Opc = Op.getOpcode(); |
10675 | SDLoc DL(Op); |
10676 | SDValue Chain = Op.getOperand(i: 0); |
10677 | SDValue Op1 = Op.getOperand(i: 1); |
10678 | SDValue Op2 = Op.getOperand(i: 2); |
10679 | SDValue CC = Op.getOperand(i: 3); |
10680 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Val&: CC)->get(); |
10681 | MVT VT = Op.getSimpleValueType(); |
10682 | MVT InVT = Op1.getSimpleValueType(); |
10683 | |
10684 | // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE |
10685 | // condition code. |
10686 | if (Opc == ISD::STRICT_FSETCCS) { |
10687 | // Expand strict_fsetccs(x, oeq) to |
10688 | // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole)) |
10689 | SDVTList VTList = Op->getVTList(); |
10690 | if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) { |
10691 | SDValue OLECCVal = DAG.getCondCode(Cond: ISD::SETOLE); |
10692 | SDValue Tmp1 = DAG.getNode(Opcode: ISD::STRICT_FSETCCS, DL, VTList, N1: Chain, N2: Op1, |
10693 | N3: Op2, N4: OLECCVal); |
10694 | SDValue Tmp2 = DAG.getNode(Opcode: ISD::STRICT_FSETCCS, DL, VTList, N1: Chain, N2: Op2, |
10695 | N3: Op1, N4: OLECCVal); |
10696 | SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, |
10697 | Tmp1.getValue(1), Tmp2.getValue(1)); |
10698 | // Tmp1 and Tmp2 might be the same node. |
10699 | if (Tmp1 != Tmp2) |
10700 | Tmp1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Tmp1, N2: Tmp2); |
10701 | return DAG.getMergeValues(Ops: {Tmp1, OutChain}, dl: DL); |
10702 | } |
10703 | |
10704 | // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq)) |
10705 | if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) { |
10706 | SDValue OEQCCVal = DAG.getCondCode(Cond: ISD::SETOEQ); |
10707 | SDValue OEQ = DAG.getNode(Opcode: ISD::STRICT_FSETCCS, DL, VTList, N1: Chain, N2: Op1, |
10708 | N3: Op2, N4: OEQCCVal); |
10709 | SDValue Res = DAG.getNOT(DL, Val: OEQ, VT); |
10710 | return DAG.getMergeValues(Ops: {Res, OEQ.getValue(R: 1)}, dl: DL); |
10711 | } |
10712 | } |
10713 | |
10714 | MVT ContainerInVT = InVT; |
10715 | if (InVT.isFixedLengthVector()) { |
10716 | ContainerInVT = getContainerForFixedLengthVector(VT: InVT); |
10717 | Op1 = convertToScalableVector(VT: ContainerInVT, V: Op1, DAG, Subtarget); |
10718 | Op2 = convertToScalableVector(VT: ContainerInVT, V: Op2, DAG, Subtarget); |
10719 | } |
10720 | MVT MaskVT = getMaskTypeFor(VecVT: ContainerInVT); |
10721 | |
10722 | auto [Mask, VL] = getDefaultVLOps(VecVT: InVT, ContainerVT: ContainerInVT, DL, DAG, Subtarget); |
10723 | |
10724 | SDValue Res; |
10725 | if (Opc == ISD::STRICT_FSETCC && |
10726 | (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE || |
10727 | CCVal == ISD::SETOLE)) { |
10728 | // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only |
10729 | // active when both input elements are ordered. |
10730 | SDValue True = getAllOnesMask(VecVT: ContainerInVT, VL, DL, DAG); |
10731 | SDValue OrderMask1 = DAG.getNode( |
10732 | RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other), |
10733 | {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT), |
10734 | True, VL}); |
10735 | SDValue OrderMask2 = DAG.getNode( |
10736 | RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other), |
10737 | {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT), |
10738 | True, VL}); |
10739 | Mask = |
10740 | DAG.getNode(Opcode: RISCVISD::VMAND_VL, DL, VT: MaskVT, N1: OrderMask1, N2: OrderMask2, N3: VL); |
10741 | // Use Mask as the merge operand to let the result be 0 if either of the |
10742 | // inputs is unordered. |
10743 | Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL, |
10744 | DAG.getVTList(MaskVT, MVT::Other), |
10745 | {Chain, Op1, Op2, CC, Mask, Mask, VL}); |
10746 | } else { |
10747 | unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL |
10748 | : RISCVISD::STRICT_FSETCCS_VL; |
10749 | Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other), |
10750 | {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL}); |
10751 | } |
10752 | |
10753 | if (VT.isFixedLengthVector()) { |
10754 | SDValue SubVec = convertFromScalableVector(VT, V: Res, DAG, Subtarget); |
10755 | return DAG.getMergeValues(Ops: {SubVec, Res.getValue(R: 1)}, dl: DL); |
10756 | } |
10757 | return Res; |
10758 | } |
10759 | |
10760 | // Lower vector ABS to smax(X, sub(0, X)). |
10761 | SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const { |
10762 | SDLoc DL(Op); |
10763 | MVT VT = Op.getSimpleValueType(); |
10764 | SDValue X = Op.getOperand(i: 0); |
10765 | |
10766 | assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) && |
10767 | "Unexpected type for ISD::ABS" ); |
10768 | |
10769 | MVT ContainerVT = VT; |
10770 | if (VT.isFixedLengthVector()) { |
10771 | ContainerVT = getContainerForFixedLengthVector(VT); |
10772 | X = convertToScalableVector(VT: ContainerVT, V: X, DAG, Subtarget); |
10773 | } |
10774 | |
10775 | SDValue Mask, VL; |
10776 | if (Op->getOpcode() == ISD::VP_ABS) { |
10777 | Mask = Op->getOperand(Num: 1); |
10778 | if (VT.isFixedLengthVector()) |
10779 | Mask = convertToScalableVector(VT: getMaskTypeFor(VecVT: ContainerVT), V: Mask, DAG, |
10780 | Subtarget); |
10781 | VL = Op->getOperand(Num: 2); |
10782 | } else |
10783 | std::tie(args&: Mask, args&: VL) = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
10784 | |
10785 | SDValue SplatZero = DAG.getNode( |
10786 | Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), |
10787 | N2: DAG.getConstant(Val: 0, DL, VT: Subtarget.getXLenVT()), N3: VL); |
10788 | SDValue NegX = DAG.getNode(Opcode: RISCVISD::SUB_VL, DL, VT: ContainerVT, N1: SplatZero, N2: X, |
10789 | N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL); |
10790 | SDValue Max = DAG.getNode(Opcode: RISCVISD::SMAX_VL, DL, VT: ContainerVT, N1: X, N2: NegX, |
10791 | N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL); |
10792 | |
10793 | if (VT.isFixedLengthVector()) |
10794 | Max = convertFromScalableVector(VT, V: Max, DAG, Subtarget); |
10795 | return Max; |
10796 | } |
10797 | |
10798 | SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV( |
10799 | SDValue Op, SelectionDAG &DAG) const { |
10800 | SDLoc DL(Op); |
10801 | MVT VT = Op.getSimpleValueType(); |
10802 | SDValue Mag = Op.getOperand(i: 0); |
10803 | SDValue Sign = Op.getOperand(i: 1); |
10804 | assert(Mag.getValueType() == Sign.getValueType() && |
10805 | "Can only handle COPYSIGN with matching types." ); |
10806 | |
10807 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
10808 | Mag = convertToScalableVector(VT: ContainerVT, V: Mag, DAG, Subtarget); |
10809 | Sign = convertToScalableVector(VT: ContainerVT, V: Sign, DAG, Subtarget); |
10810 | |
10811 | auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
10812 | |
10813 | SDValue CopySign = DAG.getNode(Opcode: RISCVISD::FCOPYSIGN_VL, DL, VT: ContainerVT, N1: Mag, |
10814 | N2: Sign, N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL); |
10815 | |
10816 | return convertFromScalableVector(VT, V: CopySign, DAG, Subtarget); |
10817 | } |
10818 | |
10819 | SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV( |
10820 | SDValue Op, SelectionDAG &DAG) const { |
10821 | MVT VT = Op.getSimpleValueType(); |
10822 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
10823 | |
10824 | MVT I1ContainerVT = |
10825 | MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); |
10826 | |
10827 | SDValue CC = |
10828 | convertToScalableVector(VT: I1ContainerVT, V: Op.getOperand(i: 0), DAG, Subtarget); |
10829 | SDValue Op1 = |
10830 | convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i: 1), DAG, Subtarget); |
10831 | SDValue Op2 = |
10832 | convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i: 2), DAG, Subtarget); |
10833 | |
10834 | SDLoc DL(Op); |
10835 | SDValue VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second; |
10836 | |
10837 | SDValue Select = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: CC, N2: Op1, |
10838 | N3: Op2, N4: DAG.getUNDEF(VT: ContainerVT), N5: VL); |
10839 | |
10840 | return convertFromScalableVector(VT, V: Select, DAG, Subtarget); |
10841 | } |
10842 | |
10843 | SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, |
10844 | SelectionDAG &DAG) const { |
10845 | unsigned NewOpc = getRISCVVLOp(Op); |
10846 | bool HasMergeOp = hasMergeOp(Opcode: NewOpc); |
10847 | bool HasMask = hasMaskOp(Opcode: NewOpc); |
10848 | |
10849 | MVT VT = Op.getSimpleValueType(); |
10850 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
10851 | |
10852 | // Create list of operands by converting existing ones to scalable types. |
10853 | SmallVector<SDValue, 6> Ops; |
10854 | for (const SDValue &V : Op->op_values()) { |
10855 | assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!" ); |
10856 | |
10857 | // Pass through non-vector operands. |
10858 | if (!V.getValueType().isVector()) { |
10859 | Ops.push_back(Elt: V); |
10860 | continue; |
10861 | } |
10862 | |
10863 | // "cast" fixed length vector to a scalable vector. |
10864 | assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) && |
10865 | "Only fixed length vectors are supported!" ); |
10866 | Ops.push_back(Elt: convertToScalableVector(VT: ContainerVT, V, DAG, Subtarget)); |
10867 | } |
10868 | |
10869 | SDLoc DL(Op); |
10870 | auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
10871 | if (HasMergeOp) |
10872 | Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT)); |
10873 | if (HasMask) |
10874 | Ops.push_back(Elt: Mask); |
10875 | Ops.push_back(Elt: VL); |
10876 | |
10877 | // StrictFP operations have two result values. Their lowered result should |
10878 | // have same result count. |
10879 | if (Op->isStrictFPOpcode()) { |
10880 | SDValue ScalableRes = |
10881 | DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops, |
10882 | Op->getFlags()); |
10883 | SDValue SubVec = convertFromScalableVector(VT, V: ScalableRes, DAG, Subtarget); |
10884 | return DAG.getMergeValues(Ops: {SubVec, ScalableRes.getValue(R: 1)}, dl: DL); |
10885 | } |
10886 | |
10887 | SDValue ScalableRes = |
10888 | DAG.getNode(Opcode: NewOpc, DL, VT: ContainerVT, Ops, Flags: Op->getFlags()); |
10889 | return convertFromScalableVector(VT, V: ScalableRes, DAG, Subtarget); |
10890 | } |
10891 | |
10892 | // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node: |
10893 | // * Operands of each node are assumed to be in the same order. |
10894 | // * The EVL operand is promoted from i32 to i64 on RV64. |
10895 | // * Fixed-length vectors are converted to their scalable-vector container |
10896 | // types. |
10897 | SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const { |
10898 | unsigned RISCVISDOpc = getRISCVVLOp(Op); |
10899 | bool HasMergeOp = hasMergeOp(Opcode: RISCVISDOpc); |
10900 | |
10901 | SDLoc DL(Op); |
10902 | MVT VT = Op.getSimpleValueType(); |
10903 | SmallVector<SDValue, 4> Ops; |
10904 | |
10905 | MVT ContainerVT = VT; |
10906 | if (VT.isFixedLengthVector()) |
10907 | ContainerVT = getContainerForFixedLengthVector(VT); |
10908 | |
10909 | for (const auto &OpIdx : enumerate(First: Op->ops())) { |
10910 | SDValue V = OpIdx.value(); |
10911 | assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!" ); |
10912 | // Add dummy merge value before the mask. Or if there isn't a mask, before |
10913 | // EVL. |
10914 | if (HasMergeOp) { |
10915 | auto MaskIdx = ISD::getVPMaskIdx(Opcode: Op.getOpcode()); |
10916 | if (MaskIdx) { |
10917 | if (*MaskIdx == OpIdx.index()) |
10918 | Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT)); |
10919 | } else if (ISD::getVPExplicitVectorLengthIdx(Opcode: Op.getOpcode()) == |
10920 | OpIdx.index()) { |
10921 | if (Op.getOpcode() == ISD::VP_MERGE) { |
10922 | // For VP_MERGE, copy the false operand instead of an undef value. |
10923 | Ops.push_back(Elt: Ops.back()); |
10924 | } else { |
10925 | assert(Op.getOpcode() == ISD::VP_SELECT); |
10926 | // For VP_SELECT, add an undef value. |
10927 | Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT)); |
10928 | } |
10929 | } |
10930 | } |
10931 | // Pass through operands which aren't fixed-length vectors. |
10932 | if (!V.getValueType().isFixedLengthVector()) { |
10933 | Ops.push_back(Elt: V); |
10934 | continue; |
10935 | } |
10936 | // "cast" fixed length vector to a scalable vector. |
10937 | MVT OpVT = V.getSimpleValueType(); |
10938 | MVT ContainerVT = getContainerForFixedLengthVector(VT: OpVT); |
10939 | assert(useRVVForFixedLengthVectorVT(OpVT) && |
10940 | "Only fixed length vectors are supported!" ); |
10941 | Ops.push_back(Elt: convertToScalableVector(VT: ContainerVT, V, DAG, Subtarget)); |
10942 | } |
10943 | |
10944 | if (!VT.isFixedLengthVector()) |
10945 | return DAG.getNode(Opcode: RISCVISDOpc, DL, VT, Ops, Flags: Op->getFlags()); |
10946 | |
10947 | SDValue VPOp = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: ContainerVT, Ops, Flags: Op->getFlags()); |
10948 | |
10949 | return convertFromScalableVector(VT, V: VPOp, DAG, Subtarget); |
10950 | } |
10951 | |
10952 | SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op, |
10953 | SelectionDAG &DAG) const { |
10954 | SDLoc DL(Op); |
10955 | MVT VT = Op.getSimpleValueType(); |
10956 | |
10957 | SDValue Src = Op.getOperand(i: 0); |
10958 | // NOTE: Mask is dropped. |
10959 | SDValue VL = Op.getOperand(i: 2); |
10960 | |
10961 | MVT ContainerVT = VT; |
10962 | if (VT.isFixedLengthVector()) { |
10963 | ContainerVT = getContainerForFixedLengthVector(VT); |
10964 | MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); |
10965 | Src = convertToScalableVector(VT: SrcVT, V: Src, DAG, Subtarget); |
10966 | } |
10967 | |
10968 | MVT XLenVT = Subtarget.getXLenVT(); |
10969 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT: XLenVT); |
10970 | SDValue ZeroSplat = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, |
10971 | N1: DAG.getUNDEF(VT: ContainerVT), N2: Zero, N3: VL); |
10972 | |
10973 | SDValue SplatValue = DAG.getConstant( |
10974 | Val: Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, VT: XLenVT); |
10975 | SDValue Splat = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, |
10976 | N1: DAG.getUNDEF(VT: ContainerVT), N2: SplatValue, N3: VL); |
10977 | |
10978 | SDValue Result = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: Src, N2: Splat, |
10979 | N3: ZeroSplat, N4: DAG.getUNDEF(VT: ContainerVT), N5: VL); |
10980 | if (!VT.isFixedLengthVector()) |
10981 | return Result; |
10982 | return convertFromScalableVector(VT, V: Result, DAG, Subtarget); |
10983 | } |
10984 | |
10985 | SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op, |
10986 | SelectionDAG &DAG) const { |
10987 | SDLoc DL(Op); |
10988 | MVT VT = Op.getSimpleValueType(); |
10989 | |
10990 | SDValue Op1 = Op.getOperand(i: 0); |
10991 | SDValue Op2 = Op.getOperand(i: 1); |
10992 | ISD::CondCode Condition = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get(); |
10993 | // NOTE: Mask is dropped. |
10994 | SDValue VL = Op.getOperand(i: 4); |
10995 | |
10996 | MVT ContainerVT = VT; |
10997 | if (VT.isFixedLengthVector()) { |
10998 | ContainerVT = getContainerForFixedLengthVector(VT); |
10999 | Op1 = convertToScalableVector(VT: ContainerVT, V: Op1, DAG, Subtarget); |
11000 | Op2 = convertToScalableVector(VT: ContainerVT, V: Op2, DAG, Subtarget); |
11001 | } |
11002 | |
11003 | SDValue Result; |
11004 | SDValue AllOneMask = DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT: ContainerVT, Operand: VL); |
11005 | |
11006 | switch (Condition) { |
11007 | default: |
11008 | break; |
11009 | // X != Y --> (X^Y) |
11010 | case ISD::SETNE: |
11011 | Result = DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op1, N2: Op2, N3: VL); |
11012 | break; |
11013 | // X == Y --> ~(X^Y) |
11014 | case ISD::SETEQ: { |
11015 | SDValue Temp = |
11016 | DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op1, N2: Op2, N3: VL); |
11017 | Result = |
11018 | DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Temp, N2: AllOneMask, N3: VL); |
11019 | break; |
11020 | } |
11021 | // X >s Y --> X == 0 & Y == 1 --> ~X & Y |
11022 | // X <u Y --> X == 0 & Y == 1 --> ~X & Y |
11023 | case ISD::SETGT: |
11024 | case ISD::SETULT: { |
11025 | SDValue Temp = |
11026 | DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op1, N2: AllOneMask, N3: VL); |
11027 | Result = DAG.getNode(Opcode: RISCVISD::VMAND_VL, DL, VT: ContainerVT, N1: Temp, N2: Op2, N3: VL); |
11028 | break; |
11029 | } |
11030 | // X <s Y --> X == 1 & Y == 0 --> ~Y & X |
11031 | // X >u Y --> X == 1 & Y == 0 --> ~Y & X |
11032 | case ISD::SETLT: |
11033 | case ISD::SETUGT: { |
11034 | SDValue Temp = |
11035 | DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op2, N2: AllOneMask, N3: VL); |
11036 | Result = DAG.getNode(Opcode: RISCVISD::VMAND_VL, DL, VT: ContainerVT, N1: Op1, N2: Temp, N3: VL); |
11037 | break; |
11038 | } |
11039 | // X >=s Y --> X == 0 | Y == 1 --> ~X | Y |
11040 | // X <=u Y --> X == 0 | Y == 1 --> ~X | Y |
11041 | case ISD::SETGE: |
11042 | case ISD::SETULE: { |
11043 | SDValue Temp = |
11044 | DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op1, N2: AllOneMask, N3: VL); |
11045 | Result = DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Temp, N2: Op2, N3: VL); |
11046 | break; |
11047 | } |
11048 | // X <=s Y --> X == 1 | Y == 0 --> ~Y | X |
11049 | // X >=u Y --> X == 1 | Y == 0 --> ~Y | X |
11050 | case ISD::SETLE: |
11051 | case ISD::SETUGE: { |
11052 | SDValue Temp = |
11053 | DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op2, N2: AllOneMask, N3: VL); |
11054 | Result = DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Temp, N2: Op1, N3: VL); |
11055 | break; |
11056 | } |
11057 | } |
11058 | |
11059 | if (!VT.isFixedLengthVector()) |
11060 | return Result; |
11061 | return convertFromScalableVector(VT, V: Result, DAG, Subtarget); |
11062 | } |
11063 | |
11064 | // Lower Floating-Point/Integer Type-Convert VP SDNodes |
11065 | SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, |
11066 | SelectionDAG &DAG) const { |
11067 | SDLoc DL(Op); |
11068 | |
11069 | SDValue Src = Op.getOperand(i: 0); |
11070 | SDValue Mask = Op.getOperand(i: 1); |
11071 | SDValue VL = Op.getOperand(i: 2); |
11072 | unsigned RISCVISDOpc = getRISCVVLOp(Op); |
11073 | |
11074 | MVT DstVT = Op.getSimpleValueType(); |
11075 | MVT SrcVT = Src.getSimpleValueType(); |
11076 | if (DstVT.isFixedLengthVector()) { |
11077 | DstVT = getContainerForFixedLengthVector(VT: DstVT); |
11078 | SrcVT = getContainerForFixedLengthVector(VT: SrcVT); |
11079 | Src = convertToScalableVector(VT: SrcVT, V: Src, DAG, Subtarget); |
11080 | MVT MaskVT = getMaskTypeFor(VecVT: DstVT); |
11081 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
11082 | } |
11083 | |
11084 | unsigned DstEltSize = DstVT.getScalarSizeInBits(); |
11085 | unsigned SrcEltSize = SrcVT.getScalarSizeInBits(); |
11086 | |
11087 | SDValue Result; |
11088 | if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion. |
11089 | if (SrcVT.isInteger()) { |
11090 | assert(DstVT.isFloatingPoint() && "Wrong input/output vector types" ); |
11091 | |
11092 | unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL |
11093 | ? RISCVISD::VSEXT_VL |
11094 | : RISCVISD::VZEXT_VL; |
11095 | |
11096 | // Do we need to do any pre-widening before converting? |
11097 | if (SrcEltSize == 1) { |
11098 | MVT IntVT = DstVT.changeVectorElementTypeToInteger(); |
11099 | MVT XLenVT = Subtarget.getXLenVT(); |
11100 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT: XLenVT); |
11101 | SDValue ZeroSplat = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IntVT, |
11102 | N1: DAG.getUNDEF(VT: IntVT), N2: Zero, N3: VL); |
11103 | SDValue One = DAG.getConstant( |
11104 | Val: RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, VT: XLenVT); |
11105 | SDValue OneSplat = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IntVT, |
11106 | N1: DAG.getUNDEF(VT: IntVT), N2: One, N3: VL); |
11107 | Src = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: IntVT, N1: Src, N2: OneSplat, |
11108 | N3: ZeroSplat, N4: DAG.getUNDEF(VT: IntVT), N5: VL); |
11109 | } else if (DstEltSize > (2 * SrcEltSize)) { |
11110 | // Widen before converting. |
11111 | MVT IntVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: DstEltSize / 2), |
11112 | EC: DstVT.getVectorElementCount()); |
11113 | Src = DAG.getNode(Opcode: RISCVISDExtOpc, DL, VT: IntVT, N1: Src, N2: Mask, N3: VL); |
11114 | } |
11115 | |
11116 | Result = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: DstVT, N1: Src, N2: Mask, N3: VL); |
11117 | } else { |
11118 | assert(SrcVT.isFloatingPoint() && DstVT.isInteger() && |
11119 | "Wrong input/output vector types" ); |
11120 | |
11121 | // Convert f16 to f32 then convert f32 to i64. |
11122 | if (DstEltSize > (2 * SrcEltSize)) { |
11123 | assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!" ); |
11124 | MVT InterimFVT = |
11125 | MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount()); |
11126 | Src = |
11127 | DAG.getNode(Opcode: RISCVISD::FP_EXTEND_VL, DL, VT: InterimFVT, N1: Src, N2: Mask, N3: VL); |
11128 | } |
11129 | |
11130 | Result = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: DstVT, N1: Src, N2: Mask, N3: VL); |
11131 | } |
11132 | } else { // Narrowing + Conversion |
11133 | if (SrcVT.isInteger()) { |
11134 | assert(DstVT.isFloatingPoint() && "Wrong input/output vector types" ); |
11135 | // First do a narrowing convert to an FP type half the size, then round |
11136 | // the FP type to a small FP type if needed. |
11137 | |
11138 | MVT InterimFVT = DstVT; |
11139 | if (SrcEltSize > (2 * DstEltSize)) { |
11140 | assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!" ); |
11141 | assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!" ); |
11142 | InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount()); |
11143 | } |
11144 | |
11145 | Result = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: InterimFVT, N1: Src, N2: Mask, N3: VL); |
11146 | |
11147 | if (InterimFVT != DstVT) { |
11148 | Src = Result; |
11149 | Result = DAG.getNode(Opcode: RISCVISD::FP_ROUND_VL, DL, VT: DstVT, N1: Src, N2: Mask, N3: VL); |
11150 | } |
11151 | } else { |
11152 | assert(SrcVT.isFloatingPoint() && DstVT.isInteger() && |
11153 | "Wrong input/output vector types" ); |
11154 | // First do a narrowing conversion to an integer half the size, then |
11155 | // truncate if needed. |
11156 | |
11157 | if (DstEltSize == 1) { |
11158 | // First convert to the same size integer, then convert to mask using |
11159 | // setcc. |
11160 | assert(SrcEltSize >= 16 && "Unexpected FP type!" ); |
11161 | MVT InterimIVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SrcEltSize), |
11162 | EC: DstVT.getVectorElementCount()); |
11163 | Result = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: InterimIVT, N1: Src, N2: Mask, N3: VL); |
11164 | |
11165 | // Compare the integer result to 0. The integer should be 0 or 1/-1, |
11166 | // otherwise the conversion was undefined. |
11167 | MVT XLenVT = Subtarget.getXLenVT(); |
11168 | SDValue SplatZero = DAG.getConstant(Val: 0, DL, VT: XLenVT); |
11169 | SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: InterimIVT, |
11170 | N1: DAG.getUNDEF(VT: InterimIVT), N2: SplatZero, N3: VL); |
11171 | Result = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: DstVT, |
11172 | Ops: {Result, SplatZero, DAG.getCondCode(Cond: ISD::SETNE), |
11173 | DAG.getUNDEF(VT: DstVT), Mask, VL}); |
11174 | } else { |
11175 | MVT InterimIVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SrcEltSize / 2), |
11176 | EC: DstVT.getVectorElementCount()); |
11177 | |
11178 | Result = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: InterimIVT, N1: Src, N2: Mask, N3: VL); |
11179 | |
11180 | while (InterimIVT != DstVT) { |
11181 | SrcEltSize /= 2; |
11182 | Src = Result; |
11183 | InterimIVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SrcEltSize / 2), |
11184 | EC: DstVT.getVectorElementCount()); |
11185 | Result = DAG.getNode(Opcode: RISCVISD::TRUNCATE_VECTOR_VL, DL, VT: InterimIVT, |
11186 | N1: Src, N2: Mask, N3: VL); |
11187 | } |
11188 | } |
11189 | } |
11190 | } |
11191 | |
11192 | MVT VT = Op.getSimpleValueType(); |
11193 | if (!VT.isFixedLengthVector()) |
11194 | return Result; |
11195 | return convertFromScalableVector(VT, V: Result, DAG, Subtarget); |
11196 | } |
11197 | |
11198 | SDValue |
11199 | RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op, |
11200 | SelectionDAG &DAG) const { |
11201 | SDLoc DL(Op); |
11202 | |
11203 | SDValue Op1 = Op.getOperand(i: 0); |
11204 | SDValue Op2 = Op.getOperand(i: 1); |
11205 | SDValue Offset = Op.getOperand(i: 2); |
11206 | SDValue Mask = Op.getOperand(i: 3); |
11207 | SDValue EVL1 = Op.getOperand(i: 4); |
11208 | SDValue EVL2 = Op.getOperand(i: 5); |
11209 | |
11210 | const MVT XLenVT = Subtarget.getXLenVT(); |
11211 | MVT VT = Op.getSimpleValueType(); |
11212 | MVT ContainerVT = VT; |
11213 | if (VT.isFixedLengthVector()) { |
11214 | ContainerVT = getContainerForFixedLengthVector(VT); |
11215 | Op1 = convertToScalableVector(VT: ContainerVT, V: Op1, DAG, Subtarget); |
11216 | Op2 = convertToScalableVector(VT: ContainerVT, V: Op2, DAG, Subtarget); |
11217 | MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT); |
11218 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
11219 | } |
11220 | |
11221 | // EVL1 may need to be extended to XLenVT with RV64LegalI32. |
11222 | EVL1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: XLenVT, Operand: EVL1); |
11223 | |
11224 | bool IsMaskVector = VT.getVectorElementType() == MVT::i1; |
11225 | if (IsMaskVector) { |
11226 | ContainerVT = ContainerVT.changeVectorElementType(MVT::i8); |
11227 | |
11228 | // Expand input operands |
11229 | SDValue SplatOneOp1 = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, |
11230 | N1: DAG.getUNDEF(VT: ContainerVT), |
11231 | N2: DAG.getConstant(Val: 1, DL, VT: XLenVT), N3: EVL1); |
11232 | SDValue SplatZeroOp1 = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, |
11233 | N1: DAG.getUNDEF(VT: ContainerVT), |
11234 | N2: DAG.getConstant(Val: 0, DL, VT: XLenVT), N3: EVL1); |
11235 | Op1 = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: Op1, N2: SplatOneOp1, |
11236 | N3: SplatZeroOp1, N4: DAG.getUNDEF(VT: ContainerVT), N5: EVL1); |
11237 | |
11238 | SDValue SplatOneOp2 = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, |
11239 | N1: DAG.getUNDEF(VT: ContainerVT), |
11240 | N2: DAG.getConstant(Val: 1, DL, VT: XLenVT), N3: EVL2); |
11241 | SDValue SplatZeroOp2 = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, |
11242 | N1: DAG.getUNDEF(VT: ContainerVT), |
11243 | N2: DAG.getConstant(Val: 0, DL, VT: XLenVT), N3: EVL2); |
11244 | Op2 = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: Op2, N2: SplatOneOp2, |
11245 | N3: SplatZeroOp2, N4: DAG.getUNDEF(VT: ContainerVT), N5: EVL2); |
11246 | } |
11247 | |
11248 | int64_t ImmValue = cast<ConstantSDNode>(Val&: Offset)->getSExtValue(); |
11249 | SDValue DownOffset, UpOffset; |
11250 | if (ImmValue >= 0) { |
11251 | // The operand is a TargetConstant, we need to rebuild it as a regular |
11252 | // constant. |
11253 | DownOffset = DAG.getConstant(Val: ImmValue, DL, VT: XLenVT); |
11254 | UpOffset = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: EVL1, N2: DownOffset); |
11255 | } else { |
11256 | // The operand is a TargetConstant, we need to rebuild it as a regular |
11257 | // constant rather than negating the original operand. |
11258 | UpOffset = DAG.getConstant(Val: -ImmValue, DL, VT: XLenVT); |
11259 | DownOffset = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: EVL1, N2: UpOffset); |
11260 | } |
11261 | |
11262 | SDValue SlideDown = |
11263 | getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, Merge: DAG.getUNDEF(VT: ContainerVT), |
11264 | Op: Op1, Offset: DownOffset, Mask, VL: UpOffset); |
11265 | SDValue Result = getVSlideup(DAG, Subtarget, DL, VT: ContainerVT, Merge: SlideDown, Op: Op2, |
11266 | Offset: UpOffset, Mask, VL: EVL2, Policy: RISCVII::TAIL_AGNOSTIC); |
11267 | |
11268 | if (IsMaskVector) { |
11269 | // Truncate Result back to a mask vector (Result has same EVL as Op2) |
11270 | Result = DAG.getNode( |
11271 | RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1), |
11272 | {Result, DAG.getConstant(0, DL, ContainerVT), |
11273 | DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)), |
11274 | Mask, EVL2}); |
11275 | } |
11276 | |
11277 | if (!VT.isFixedLengthVector()) |
11278 | return Result; |
11279 | return convertFromScalableVector(VT, V: Result, DAG, Subtarget); |
11280 | } |
11281 | |
11282 | SDValue |
11283 | RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op, |
11284 | SelectionDAG &DAG) const { |
11285 | SDLoc DL(Op); |
11286 | MVT VT = Op.getSimpleValueType(); |
11287 | MVT XLenVT = Subtarget.getXLenVT(); |
11288 | |
11289 | SDValue Op1 = Op.getOperand(i: 0); |
11290 | SDValue Mask = Op.getOperand(i: 1); |
11291 | SDValue EVL = Op.getOperand(i: 2); |
11292 | |
11293 | MVT ContainerVT = VT; |
11294 | if (VT.isFixedLengthVector()) { |
11295 | ContainerVT = getContainerForFixedLengthVector(VT); |
11296 | Op1 = convertToScalableVector(VT: ContainerVT, V: Op1, DAG, Subtarget); |
11297 | MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT); |
11298 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
11299 | } |
11300 | |
11301 | MVT GatherVT = ContainerVT; |
11302 | MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger(); |
11303 | // Check if we are working with mask vectors |
11304 | bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1; |
11305 | if (IsMaskVector) { |
11306 | GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8); |
11307 | |
11308 | // Expand input operand |
11309 | SDValue SplatOne = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IndicesVT, |
11310 | N1: DAG.getUNDEF(VT: IndicesVT), |
11311 | N2: DAG.getConstant(Val: 1, DL, VT: XLenVT), N3: EVL); |
11312 | SDValue SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IndicesVT, |
11313 | N1: DAG.getUNDEF(VT: IndicesVT), |
11314 | N2: DAG.getConstant(Val: 0, DL, VT: XLenVT), N3: EVL); |
11315 | Op1 = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: IndicesVT, N1: Op1, N2: SplatOne, |
11316 | N3: SplatZero, N4: DAG.getUNDEF(VT: IndicesVT), N5: EVL); |
11317 | } |
11318 | |
11319 | unsigned EltSize = GatherVT.getScalarSizeInBits(); |
11320 | unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue(); |
11321 | unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); |
11322 | unsigned MaxVLMAX = |
11323 | RISCVTargetLowering::computeVLMAX(VectorBits: VectorBitsMax, EltSize, MinSize); |
11324 | |
11325 | unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; |
11326 | // If this is SEW=8 and VLMAX is unknown or more than 256, we need |
11327 | // to use vrgatherei16.vv. |
11328 | // TODO: It's also possible to use vrgatherei16.vv for other types to |
11329 | // decrease register width for the index calculation. |
11330 | // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. |
11331 | if (MaxVLMAX > 256 && EltSize == 8) { |
11332 | // If this is LMUL=8, we have to split before using vrgatherei16.vv. |
11333 | // Split the vector in half and reverse each half using a full register |
11334 | // reverse. |
11335 | // Swap the halves and concatenate them. |
11336 | // Slide the concatenated result by (VLMax - VL). |
11337 | if (MinSize == (8 * RISCV::RVVBitsPerBlock)) { |
11338 | auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: GatherVT); |
11339 | auto [Lo, Hi] = DAG.SplitVector(N: Op1, DL); |
11340 | |
11341 | SDValue LoRev = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: LoVT, Operand: Lo); |
11342 | SDValue HiRev = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: HiVT, Operand: Hi); |
11343 | |
11344 | // Reassemble the low and high pieces reversed. |
11345 | // NOTE: this Result is unmasked (because we do not need masks for |
11346 | // shuffles). If in the future this has to change, we can use a SELECT_VL |
11347 | // between Result and UNDEF using the mask originally passed to VP_REVERSE |
11348 | SDValue Result = |
11349 | DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: GatherVT, N1: HiRev, N2: LoRev); |
11350 | |
11351 | // Slide off any elements from past EVL that were reversed into the low |
11352 | // elements. |
11353 | unsigned MinElts = GatherVT.getVectorMinNumElements(); |
11354 | SDValue VLMax = |
11355 | DAG.getVScale(DL, VT: XLenVT, MulImm: APInt(XLenVT.getSizeInBits(), MinElts)); |
11356 | SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: VLMax, N2: EVL); |
11357 | |
11358 | Result = getVSlidedown(DAG, Subtarget, DL, VT: GatherVT, |
11359 | Merge: DAG.getUNDEF(VT: GatherVT), Op: Result, Offset: Diff, Mask, VL: EVL); |
11360 | |
11361 | if (IsMaskVector) { |
11362 | // Truncate Result back to a mask vector |
11363 | Result = |
11364 | DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: ContainerVT, |
11365 | Ops: {Result, DAG.getConstant(Val: 0, DL, VT: GatherVT), |
11366 | DAG.getCondCode(Cond: ISD::SETNE), |
11367 | DAG.getUNDEF(VT: getMaskTypeFor(VecVT: ContainerVT)), Mask, EVL}); |
11368 | } |
11369 | |
11370 | if (!VT.isFixedLengthVector()) |
11371 | return Result; |
11372 | return convertFromScalableVector(VT, V: Result, DAG, Subtarget); |
11373 | } |
11374 | |
11375 | // Just promote the int type to i16 which will double the LMUL. |
11376 | IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount()); |
11377 | GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; |
11378 | } |
11379 | |
11380 | SDValue VID = DAG.getNode(Opcode: RISCVISD::VID_VL, DL, VT: IndicesVT, N1: Mask, N2: EVL); |
11381 | SDValue VecLen = |
11382 | DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: EVL, N2: DAG.getConstant(Val: 1, DL, VT: XLenVT)); |
11383 | SDValue VecLenSplat = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IndicesVT, |
11384 | N1: DAG.getUNDEF(VT: IndicesVT), N2: VecLen, N3: EVL); |
11385 | SDValue VRSUB = DAG.getNode(Opcode: RISCVISD::SUB_VL, DL, VT: IndicesVT, N1: VecLenSplat, N2: VID, |
11386 | N3: DAG.getUNDEF(VT: IndicesVT), N4: Mask, N5: EVL); |
11387 | SDValue Result = DAG.getNode(Opcode: GatherOpc, DL, VT: GatherVT, N1: Op1, N2: VRSUB, |
11388 | N3: DAG.getUNDEF(VT: GatherVT), N4: Mask, N5: EVL); |
11389 | |
11390 | if (IsMaskVector) { |
11391 | // Truncate Result back to a mask vector |
11392 | Result = DAG.getNode( |
11393 | Opcode: RISCVISD::SETCC_VL, DL, VT: ContainerVT, |
11394 | Ops: {Result, DAG.getConstant(Val: 0, DL, VT: GatherVT), DAG.getCondCode(Cond: ISD::SETNE), |
11395 | DAG.getUNDEF(VT: getMaskTypeFor(VecVT: ContainerVT)), Mask, EVL}); |
11396 | } |
11397 | |
11398 | if (!VT.isFixedLengthVector()) |
11399 | return Result; |
11400 | return convertFromScalableVector(VT, V: Result, DAG, Subtarget); |
11401 | } |
11402 | |
11403 | SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, |
11404 | SelectionDAG &DAG) const { |
11405 | MVT VT = Op.getSimpleValueType(); |
11406 | if (VT.getVectorElementType() != MVT::i1) |
11407 | return lowerVPOp(Op, DAG); |
11408 | |
11409 | // It is safe to drop mask parameter as masked-off elements are undef. |
11410 | SDValue Op1 = Op->getOperand(Num: 0); |
11411 | SDValue Op2 = Op->getOperand(Num: 1); |
11412 | SDValue VL = Op->getOperand(Num: 3); |
11413 | |
11414 | MVT ContainerVT = VT; |
11415 | const bool IsFixed = VT.isFixedLengthVector(); |
11416 | if (IsFixed) { |
11417 | ContainerVT = getContainerForFixedLengthVector(VT); |
11418 | Op1 = convertToScalableVector(VT: ContainerVT, V: Op1, DAG, Subtarget); |
11419 | Op2 = convertToScalableVector(VT: ContainerVT, V: Op2, DAG, Subtarget); |
11420 | } |
11421 | |
11422 | SDLoc DL(Op); |
11423 | SDValue Val = DAG.getNode(Opcode: getRISCVVLOp(Op), DL, VT: ContainerVT, N1: Op1, N2: Op2, N3: VL); |
11424 | if (!IsFixed) |
11425 | return Val; |
11426 | return convertFromScalableVector(VT, V: Val, DAG, Subtarget); |
11427 | } |
11428 | |
11429 | SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op, |
11430 | SelectionDAG &DAG) const { |
11431 | SDLoc DL(Op); |
11432 | MVT XLenVT = Subtarget.getXLenVT(); |
11433 | MVT VT = Op.getSimpleValueType(); |
11434 | MVT ContainerVT = VT; |
11435 | if (VT.isFixedLengthVector()) |
11436 | ContainerVT = getContainerForFixedLengthVector(VT); |
11437 | |
11438 | SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); |
11439 | |
11440 | auto *VPNode = cast<VPStridedLoadSDNode>(Val&: Op); |
11441 | // Check if the mask is known to be all ones |
11442 | SDValue Mask = VPNode->getMask(); |
11443 | bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()); |
11444 | |
11445 | SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse |
11446 | : Intrinsic::riscv_vlse_mask, |
11447 | DL, XLenVT); |
11448 | SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, |
11449 | DAG.getUNDEF(VT: ContainerVT), VPNode->getBasePtr(), |
11450 | VPNode->getStride()}; |
11451 | if (!IsUnmasked) { |
11452 | if (VT.isFixedLengthVector()) { |
11453 | MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1); |
11454 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
11455 | } |
11456 | Ops.push_back(Elt: Mask); |
11457 | } |
11458 | Ops.push_back(Elt: VPNode->getVectorLength()); |
11459 | if (!IsUnmasked) { |
11460 | SDValue Policy = DAG.getTargetConstant(Val: RISCVII::TAIL_AGNOSTIC, DL, VT: XLenVT); |
11461 | Ops.push_back(Elt: Policy); |
11462 | } |
11463 | |
11464 | SDValue Result = |
11465 | DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops, |
11466 | MemVT: VPNode->getMemoryVT(), MMO: VPNode->getMemOperand()); |
11467 | SDValue Chain = Result.getValue(R: 1); |
11468 | |
11469 | if (VT.isFixedLengthVector()) |
11470 | Result = convertFromScalableVector(VT, V: Result, DAG, Subtarget); |
11471 | |
11472 | return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL); |
11473 | } |
11474 | |
11475 | SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op, |
11476 | SelectionDAG &DAG) const { |
11477 | SDLoc DL(Op); |
11478 | MVT XLenVT = Subtarget.getXLenVT(); |
11479 | |
11480 | auto *VPNode = cast<VPStridedStoreSDNode>(Val&: Op); |
11481 | SDValue StoreVal = VPNode->getValue(); |
11482 | MVT VT = StoreVal.getSimpleValueType(); |
11483 | MVT ContainerVT = VT; |
11484 | if (VT.isFixedLengthVector()) { |
11485 | ContainerVT = getContainerForFixedLengthVector(VT); |
11486 | StoreVal = convertToScalableVector(VT: ContainerVT, V: StoreVal, DAG, Subtarget); |
11487 | } |
11488 | |
11489 | // Check if the mask is known to be all ones |
11490 | SDValue Mask = VPNode->getMask(); |
11491 | bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()); |
11492 | |
11493 | SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse |
11494 | : Intrinsic::riscv_vsse_mask, |
11495 | DL, XLenVT); |
11496 | SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal, |
11497 | VPNode->getBasePtr(), VPNode->getStride()}; |
11498 | if (!IsUnmasked) { |
11499 | if (VT.isFixedLengthVector()) { |
11500 | MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1); |
11501 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
11502 | } |
11503 | Ops.push_back(Elt: Mask); |
11504 | } |
11505 | Ops.push_back(Elt: VPNode->getVectorLength()); |
11506 | |
11507 | return DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl: DL, VTList: VPNode->getVTList(), |
11508 | Ops, MemVT: VPNode->getMemoryVT(), |
11509 | MMO: VPNode->getMemOperand()); |
11510 | } |
11511 | |
11512 | // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be |
11513 | // matched to a RVV indexed load. The RVV indexed load instructions only |
11514 | // support the "unsigned unscaled" addressing mode; indices are implicitly |
11515 | // zero-extended or truncated to XLEN and are treated as byte offsets. Any |
11516 | // signed or scaled indexing is extended to the XLEN value type and scaled |
11517 | // accordingly. |
11518 | SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op, |
11519 | SelectionDAG &DAG) const { |
11520 | SDLoc DL(Op); |
11521 | MVT VT = Op.getSimpleValueType(); |
11522 | |
11523 | const auto *MemSD = cast<MemSDNode>(Val: Op.getNode()); |
11524 | EVT MemVT = MemSD->getMemoryVT(); |
11525 | MachineMemOperand *MMO = MemSD->getMemOperand(); |
11526 | SDValue Chain = MemSD->getChain(); |
11527 | SDValue BasePtr = MemSD->getBasePtr(); |
11528 | |
11529 | [[maybe_unused]] ISD::LoadExtType LoadExtType; |
11530 | SDValue Index, Mask, PassThru, VL; |
11531 | |
11532 | if (auto *VPGN = dyn_cast<VPGatherSDNode>(Val: Op.getNode())) { |
11533 | Index = VPGN->getIndex(); |
11534 | Mask = VPGN->getMask(); |
11535 | PassThru = DAG.getUNDEF(VT); |
11536 | VL = VPGN->getVectorLength(); |
11537 | // VP doesn't support extending loads. |
11538 | LoadExtType = ISD::NON_EXTLOAD; |
11539 | } else { |
11540 | // Else it must be a MGATHER. |
11541 | auto *MGN = cast<MaskedGatherSDNode>(Val: Op.getNode()); |
11542 | Index = MGN->getIndex(); |
11543 | Mask = MGN->getMask(); |
11544 | PassThru = MGN->getPassThru(); |
11545 | LoadExtType = MGN->getExtensionType(); |
11546 | } |
11547 | |
11548 | MVT IndexVT = Index.getSimpleValueType(); |
11549 | MVT XLenVT = Subtarget.getXLenVT(); |
11550 | |
11551 | assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && |
11552 | "Unexpected VTs!" ); |
11553 | assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type" ); |
11554 | // Targets have to explicitly opt-in for extending vector loads. |
11555 | assert(LoadExtType == ISD::NON_EXTLOAD && |
11556 | "Unexpected extending MGATHER/VP_GATHER" ); |
11557 | |
11558 | // If the mask is known to be all ones, optimize to an unmasked intrinsic; |
11559 | // the selection of the masked intrinsics doesn't do this for us. |
11560 | bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()); |
11561 | |
11562 | MVT ContainerVT = VT; |
11563 | if (VT.isFixedLengthVector()) { |
11564 | ContainerVT = getContainerForFixedLengthVector(VT); |
11565 | IndexVT = MVT::getVectorVT(VT: IndexVT.getVectorElementType(), |
11566 | EC: ContainerVT.getVectorElementCount()); |
11567 | |
11568 | Index = convertToScalableVector(VT: IndexVT, V: Index, DAG, Subtarget); |
11569 | |
11570 | if (!IsUnmasked) { |
11571 | MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT); |
11572 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
11573 | PassThru = convertToScalableVector(VT: ContainerVT, V: PassThru, DAG, Subtarget); |
11574 | } |
11575 | } |
11576 | |
11577 | if (!VL) |
11578 | VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second; |
11579 | |
11580 | if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) { |
11581 | IndexVT = IndexVT.changeVectorElementType(EltVT: XLenVT); |
11582 | Index = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: IndexVT, Operand: Index); |
11583 | } |
11584 | |
11585 | unsigned IntID = |
11586 | IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask; |
11587 | SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(Val: IntID, DL, VT: XLenVT)}; |
11588 | if (IsUnmasked) |
11589 | Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT)); |
11590 | else |
11591 | Ops.push_back(Elt: PassThru); |
11592 | Ops.push_back(Elt: BasePtr); |
11593 | Ops.push_back(Elt: Index); |
11594 | if (!IsUnmasked) |
11595 | Ops.push_back(Elt: Mask); |
11596 | Ops.push_back(Elt: VL); |
11597 | if (!IsUnmasked) |
11598 | Ops.push_back(Elt: DAG.getTargetConstant(Val: RISCVII::TAIL_AGNOSTIC, DL, VT: XLenVT)); |
11599 | |
11600 | SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); |
11601 | SDValue Result = |
11602 | DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops, MemVT, MMO); |
11603 | Chain = Result.getValue(R: 1); |
11604 | |
11605 | if (VT.isFixedLengthVector()) |
11606 | Result = convertFromScalableVector(VT, V: Result, DAG, Subtarget); |
11607 | |
11608 | return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL); |
11609 | } |
11610 | |
11611 | // Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be |
11612 | // matched to a RVV indexed store. The RVV indexed store instructions only |
11613 | // support the "unsigned unscaled" addressing mode; indices are implicitly |
11614 | // zero-extended or truncated to XLEN and are treated as byte offsets. Any |
11615 | // signed or scaled indexing is extended to the XLEN value type and scaled |
11616 | // accordingly. |
11617 | SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op, |
11618 | SelectionDAG &DAG) const { |
11619 | SDLoc DL(Op); |
11620 | const auto *MemSD = cast<MemSDNode>(Val: Op.getNode()); |
11621 | EVT MemVT = MemSD->getMemoryVT(); |
11622 | MachineMemOperand *MMO = MemSD->getMemOperand(); |
11623 | SDValue Chain = MemSD->getChain(); |
11624 | SDValue BasePtr = MemSD->getBasePtr(); |
11625 | |
11626 | [[maybe_unused]] bool IsTruncatingStore = false; |
11627 | SDValue Index, Mask, Val, VL; |
11628 | |
11629 | if (auto *VPSN = dyn_cast<VPScatterSDNode>(Val: Op.getNode())) { |
11630 | Index = VPSN->getIndex(); |
11631 | Mask = VPSN->getMask(); |
11632 | Val = VPSN->getValue(); |
11633 | VL = VPSN->getVectorLength(); |
11634 | // VP doesn't support truncating stores. |
11635 | IsTruncatingStore = false; |
11636 | } else { |
11637 | // Else it must be a MSCATTER. |
11638 | auto *MSN = cast<MaskedScatterSDNode>(Val: Op.getNode()); |
11639 | Index = MSN->getIndex(); |
11640 | Mask = MSN->getMask(); |
11641 | Val = MSN->getValue(); |
11642 | IsTruncatingStore = MSN->isTruncatingStore(); |
11643 | } |
11644 | |
11645 | MVT VT = Val.getSimpleValueType(); |
11646 | MVT IndexVT = Index.getSimpleValueType(); |
11647 | MVT XLenVT = Subtarget.getXLenVT(); |
11648 | |
11649 | assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && |
11650 | "Unexpected VTs!" ); |
11651 | assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type" ); |
11652 | // Targets have to explicitly opt-in for extending vector loads and |
11653 | // truncating vector stores. |
11654 | assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER" ); |
11655 | |
11656 | // If the mask is known to be all ones, optimize to an unmasked intrinsic; |
11657 | // the selection of the masked intrinsics doesn't do this for us. |
11658 | bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()); |
11659 | |
11660 | MVT ContainerVT = VT; |
11661 | if (VT.isFixedLengthVector()) { |
11662 | ContainerVT = getContainerForFixedLengthVector(VT); |
11663 | IndexVT = MVT::getVectorVT(VT: IndexVT.getVectorElementType(), |
11664 | EC: ContainerVT.getVectorElementCount()); |
11665 | |
11666 | Index = convertToScalableVector(VT: IndexVT, V: Index, DAG, Subtarget); |
11667 | Val = convertToScalableVector(VT: ContainerVT, V: Val, DAG, Subtarget); |
11668 | |
11669 | if (!IsUnmasked) { |
11670 | MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT); |
11671 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
11672 | } |
11673 | } |
11674 | |
11675 | if (!VL) |
11676 | VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second; |
11677 | |
11678 | if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) { |
11679 | IndexVT = IndexVT.changeVectorElementType(EltVT: XLenVT); |
11680 | Index = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: IndexVT, Operand: Index); |
11681 | } |
11682 | |
11683 | unsigned IntID = |
11684 | IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask; |
11685 | SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(Val: IntID, DL, VT: XLenVT)}; |
11686 | Ops.push_back(Elt: Val); |
11687 | Ops.push_back(Elt: BasePtr); |
11688 | Ops.push_back(Elt: Index); |
11689 | if (!IsUnmasked) |
11690 | Ops.push_back(Elt: Mask); |
11691 | Ops.push_back(Elt: VL); |
11692 | |
11693 | return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, |
11694 | DAG.getVTList(MVT::Other), Ops, MemVT, MMO); |
11695 | } |
11696 | |
11697 | SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op, |
11698 | SelectionDAG &DAG) const { |
11699 | const MVT XLenVT = Subtarget.getXLenVT(); |
11700 | SDLoc DL(Op); |
11701 | SDValue Chain = Op->getOperand(Num: 0); |
11702 | SDValue SysRegNo = DAG.getTargetConstant( |
11703 | RISCVSysReg::lookupSysRegByName("FRM" )->Encoding, DL, XLenVT); |
11704 | SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other); |
11705 | SDValue RM = DAG.getNode(Opcode: RISCVISD::READ_CSR, DL, VTList: VTs, N1: Chain, N2: SysRegNo); |
11706 | |
11707 | // Encoding used for rounding mode in RISC-V differs from that used in |
11708 | // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a |
11709 | // table, which consists of a sequence of 4-bit fields, each representing |
11710 | // corresponding FLT_ROUNDS mode. |
11711 | static const int Table = |
11712 | (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) | |
11713 | (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) | |
11714 | (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) | |
11715 | (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) | |
11716 | (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM); |
11717 | |
11718 | SDValue Shift = |
11719 | DAG.getNode(Opcode: ISD::SHL, DL, VT: XLenVT, N1: RM, N2: DAG.getConstant(Val: 2, DL, VT: XLenVT)); |
11720 | SDValue Shifted = DAG.getNode(Opcode: ISD::SRL, DL, VT: XLenVT, |
11721 | N1: DAG.getConstant(Val: Table, DL, VT: XLenVT), N2: Shift); |
11722 | SDValue Masked = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: Shifted, |
11723 | N2: DAG.getConstant(Val: 7, DL, VT: XLenVT)); |
11724 | |
11725 | return DAG.getMergeValues(Ops: {Masked, Chain}, dl: DL); |
11726 | } |
11727 | |
11728 | SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op, |
11729 | SelectionDAG &DAG) const { |
11730 | const MVT XLenVT = Subtarget.getXLenVT(); |
11731 | SDLoc DL(Op); |
11732 | SDValue Chain = Op->getOperand(Num: 0); |
11733 | SDValue RMValue = Op->getOperand(Num: 1); |
11734 | SDValue SysRegNo = DAG.getTargetConstant( |
11735 | RISCVSysReg::lookupSysRegByName("FRM" )->Encoding, DL, XLenVT); |
11736 | |
11737 | // Encoding used for rounding mode in RISC-V differs from that used in |
11738 | // FLT_ROUNDS. To convert it the C rounding mode is used as an index in |
11739 | // a table, which consists of a sequence of 4-bit fields, each representing |
11740 | // corresponding RISC-V mode. |
11741 | static const unsigned Table = |
11742 | (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) | |
11743 | (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) | |
11744 | (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) | |
11745 | (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) | |
11746 | (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway)); |
11747 | |
11748 | RMValue = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: XLenVT, Operand: RMValue); |
11749 | |
11750 | SDValue Shift = DAG.getNode(Opcode: ISD::SHL, DL, VT: XLenVT, N1: RMValue, |
11751 | N2: DAG.getConstant(Val: 2, DL, VT: XLenVT)); |
11752 | SDValue Shifted = DAG.getNode(Opcode: ISD::SRL, DL, VT: XLenVT, |
11753 | N1: DAG.getConstant(Val: Table, DL, VT: XLenVT), N2: Shift); |
11754 | RMValue = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: Shifted, |
11755 | N2: DAG.getConstant(Val: 0x7, DL, VT: XLenVT)); |
11756 | return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo, |
11757 | RMValue); |
11758 | } |
11759 | |
11760 | SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op, |
11761 | SelectionDAG &DAG) const { |
11762 | MachineFunction &MF = DAG.getMachineFunction(); |
11763 | |
11764 | bool isRISCV64 = Subtarget.is64Bit(); |
11765 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
11766 | |
11767 | int FI = MF.getFrameInfo().CreateFixedObject(Size: isRISCV64 ? 8 : 4, SPOffset: 0, IsImmutable: false); |
11768 | return DAG.getFrameIndex(FI, VT: PtrVT); |
11769 | } |
11770 | |
11771 | // Returns the opcode of the target-specific SDNode that implements the 32-bit |
11772 | // form of the given Opcode. |
11773 | static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { |
11774 | switch (Opcode) { |
11775 | default: |
11776 | llvm_unreachable("Unexpected opcode" ); |
11777 | case ISD::SHL: |
11778 | return RISCVISD::SLLW; |
11779 | case ISD::SRA: |
11780 | return RISCVISD::SRAW; |
11781 | case ISD::SRL: |
11782 | return RISCVISD::SRLW; |
11783 | case ISD::SDIV: |
11784 | return RISCVISD::DIVW; |
11785 | case ISD::UDIV: |
11786 | return RISCVISD::DIVUW; |
11787 | case ISD::UREM: |
11788 | return RISCVISD::REMUW; |
11789 | case ISD::ROTL: |
11790 | return RISCVISD::ROLW; |
11791 | case ISD::ROTR: |
11792 | return RISCVISD::RORW; |
11793 | } |
11794 | } |
11795 | |
11796 | // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG |
11797 | // node. Because i8/i16/i32 isn't a legal type for RV64, these operations would |
11798 | // otherwise be promoted to i64, making it difficult to select the |
11799 | // SLLW/DIVUW/.../*W later one because the fact the operation was originally of |
11800 | // type i8/i16/i32 is lost. |
11801 | static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, |
11802 | unsigned ExtOpc = ISD::ANY_EXTEND) { |
11803 | SDLoc DL(N); |
11804 | RISCVISD::NodeType WOpcode = getRISCVWOpcode(Opcode: N->getOpcode()); |
11805 | SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); |
11806 | SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); |
11807 | SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); |
11808 | // ReplaceNodeResults requires we maintain the same type for the return value. |
11809 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: NewRes); |
11810 | } |
11811 | |
11812 | // Converts the given 32-bit operation to a i64 operation with signed extension |
11813 | // semantic to reduce the signed extension instructions. |
11814 | static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { |
11815 | SDLoc DL(N); |
11816 | SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); |
11817 | SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); |
11818 | SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); |
11819 | SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, |
11820 | DAG.getValueType(MVT::i32)); |
11821 | return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); |
11822 | } |
11823 | |
11824 | void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, |
11825 | SmallVectorImpl<SDValue> &Results, |
11826 | SelectionDAG &DAG) const { |
11827 | SDLoc DL(N); |
11828 | switch (N->getOpcode()) { |
11829 | default: |
11830 | llvm_unreachable("Don't know how to custom type legalize this operation!" ); |
11831 | case ISD::STRICT_FP_TO_SINT: |
11832 | case ISD::STRICT_FP_TO_UINT: |
11833 | case ISD::FP_TO_SINT: |
11834 | case ISD::FP_TO_UINT: { |
11835 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
11836 | "Unexpected custom legalisation" ); |
11837 | bool IsStrict = N->isStrictFPOpcode(); |
11838 | bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT || |
11839 | N->getOpcode() == ISD::STRICT_FP_TO_SINT; |
11840 | SDValue Op0 = IsStrict ? N->getOperand(Num: 1) : N->getOperand(Num: 0); |
11841 | if (getTypeAction(Context&: *DAG.getContext(), VT: Op0.getValueType()) != |
11842 | TargetLowering::TypeSoftenFloat) { |
11843 | if (!isTypeLegal(VT: Op0.getValueType())) |
11844 | return; |
11845 | if (IsStrict) { |
11846 | SDValue Chain = N->getOperand(Num: 0); |
11847 | // In absense of Zfh, promote f16 to f32, then convert. |
11848 | if (Op0.getValueType() == MVT::f16 && |
11849 | !Subtarget.hasStdExtZfhOrZhinx()) { |
11850 | Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other}, |
11851 | {Chain, Op0}); |
11852 | Chain = Op0.getValue(R: 1); |
11853 | } |
11854 | unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64 |
11855 | : RISCVISD::STRICT_FCVT_WU_RV64; |
11856 | SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); |
11857 | SDValue Res = DAG.getNode( |
11858 | Opc, DL, VTs, Chain, Op0, |
11859 | DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64)); |
11860 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
11861 | Results.push_back(Elt: Res.getValue(R: 1)); |
11862 | return; |
11863 | } |
11864 | // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then |
11865 | // convert. |
11866 | if ((Op0.getValueType() == MVT::f16 && |
11867 | !Subtarget.hasStdExtZfhOrZhinx()) || |
11868 | Op0.getValueType() == MVT::bf16) |
11869 | Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0); |
11870 | |
11871 | unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; |
11872 | SDValue Res = |
11873 | DAG.getNode(Opc, DL, MVT::i64, Op0, |
11874 | DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64)); |
11875 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
11876 | return; |
11877 | } |
11878 | // If the FP type needs to be softened, emit a library call using the 'si' |
11879 | // version. If we left it to default legalization we'd end up with 'di'. If |
11880 | // the FP type doesn't need to be softened just let generic type |
11881 | // legalization promote the result type. |
11882 | RTLIB::Libcall LC; |
11883 | if (IsSigned) |
11884 | LC = RTLIB::getFPTOSINT(OpVT: Op0.getValueType(), RetVT: N->getValueType(ResNo: 0)); |
11885 | else |
11886 | LC = RTLIB::getFPTOUINT(OpVT: Op0.getValueType(), RetVT: N->getValueType(ResNo: 0)); |
11887 | MakeLibCallOptions CallOptions; |
11888 | EVT OpVT = Op0.getValueType(); |
11889 | CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: N->getValueType(ResNo: 0), Value: true); |
11890 | SDValue Chain = IsStrict ? N->getOperand(Num: 0) : SDValue(); |
11891 | SDValue Result; |
11892 | std::tie(args&: Result, args&: Chain) = |
11893 | makeLibCall(DAG, LC, RetVT: N->getValueType(ResNo: 0), Ops: Op0, CallOptions, dl: DL, Chain); |
11894 | Results.push_back(Elt: Result); |
11895 | if (IsStrict) |
11896 | Results.push_back(Elt: Chain); |
11897 | break; |
11898 | } |
11899 | case ISD::LROUND: { |
11900 | SDValue Op0 = N->getOperand(Num: 0); |
11901 | EVT Op0VT = Op0.getValueType(); |
11902 | if (getTypeAction(Context&: *DAG.getContext(), VT: Op0.getValueType()) != |
11903 | TargetLowering::TypeSoftenFloat) { |
11904 | if (!isTypeLegal(VT: Op0VT)) |
11905 | return; |
11906 | |
11907 | // In absense of Zfh, promote f16 to f32, then convert. |
11908 | if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) |
11909 | Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0); |
11910 | |
11911 | SDValue Res = |
11912 | DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0, |
11913 | DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64)); |
11914 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
11915 | return; |
11916 | } |
11917 | // If the FP type needs to be softened, emit a library call to lround. We'll |
11918 | // need to truncate the result. We assume any value that doesn't fit in i32 |
11919 | // is allowed to return an unspecified value. |
11920 | RTLIB::Libcall LC = |
11921 | Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32; |
11922 | MakeLibCallOptions CallOptions; |
11923 | EVT OpVT = Op0.getValueType(); |
11924 | CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true); |
11925 | SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first; |
11926 | Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result); |
11927 | Results.push_back(Elt: Result); |
11928 | break; |
11929 | } |
11930 | case ISD::READCYCLECOUNTER: |
11931 | case ISD::READSTEADYCOUNTER: { |
11932 | assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only " |
11933 | "has custom type legalization on riscv32" ); |
11934 | |
11935 | SDValue LoCounter, HiCounter; |
11936 | MVT XLenVT = Subtarget.getXLenVT(); |
11937 | if (N->getOpcode() == ISD::READCYCLECOUNTER) { |
11938 | LoCounter = DAG.getTargetConstant( |
11939 | RISCVSysReg::lookupSysRegByName("CYCLE" )->Encoding, DL, XLenVT); |
11940 | HiCounter = DAG.getTargetConstant( |
11941 | RISCVSysReg::lookupSysRegByName("CYCLEH" )->Encoding, DL, XLenVT); |
11942 | } else { |
11943 | LoCounter = DAG.getTargetConstant( |
11944 | RISCVSysReg::lookupSysRegByName("TIME" )->Encoding, DL, XLenVT); |
11945 | HiCounter = DAG.getTargetConstant( |
11946 | RISCVSysReg::lookupSysRegByName("TIMEH" )->Encoding, DL, XLenVT); |
11947 | } |
11948 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); |
11949 | SDValue RCW = DAG.getNode(Opcode: RISCVISD::READ_COUNTER_WIDE, DL, VTList: VTs, |
11950 | N1: N->getOperand(Num: 0), N2: LoCounter, N3: HiCounter); |
11951 | |
11952 | Results.push_back( |
11953 | DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1))); |
11954 | Results.push_back(Elt: RCW.getValue(R: 2)); |
11955 | break; |
11956 | } |
11957 | case ISD::LOAD: { |
11958 | if (!ISD::isNON_EXTLoad(N)) |
11959 | return; |
11960 | |
11961 | // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the |
11962 | // sext_inreg we emit for ADD/SUB/MUL/SLLI. |
11963 | LoadSDNode *Ld = cast<LoadSDNode>(Val: N); |
11964 | |
11965 | SDLoc dl(N); |
11966 | SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(), |
11967 | Ld->getBasePtr(), Ld->getMemoryVT(), |
11968 | Ld->getMemOperand()); |
11969 | Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res)); |
11970 | Results.push_back(Elt: Res.getValue(R: 1)); |
11971 | return; |
11972 | } |
11973 | case ISD::MUL: { |
11974 | unsigned Size = N->getSimpleValueType(ResNo: 0).getSizeInBits(); |
11975 | unsigned XLen = Subtarget.getXLen(); |
11976 | // This multiply needs to be expanded, try to use MULHSU+MUL if possible. |
11977 | if (Size > XLen) { |
11978 | assert(Size == (XLen * 2) && "Unexpected custom legalisation" ); |
11979 | SDValue LHS = N->getOperand(Num: 0); |
11980 | SDValue RHS = N->getOperand(Num: 1); |
11981 | APInt HighMask = APInt::getHighBitsSet(numBits: Size, hiBitsSet: XLen); |
11982 | |
11983 | bool LHSIsU = DAG.MaskedValueIsZero(Op: LHS, Mask: HighMask); |
11984 | bool RHSIsU = DAG.MaskedValueIsZero(Op: RHS, Mask: HighMask); |
11985 | // We need exactly one side to be unsigned. |
11986 | if (LHSIsU == RHSIsU) |
11987 | return; |
11988 | |
11989 | auto MakeMULPair = [&](SDValue S, SDValue U) { |
11990 | MVT XLenVT = Subtarget.getXLenVT(); |
11991 | S = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: XLenVT, Operand: S); |
11992 | U = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: XLenVT, Operand: U); |
11993 | SDValue Lo = DAG.getNode(Opcode: ISD::MUL, DL, VT: XLenVT, N1: S, N2: U); |
11994 | SDValue Hi = DAG.getNode(Opcode: RISCVISD::MULHSU, DL, VT: XLenVT, N1: S, N2: U); |
11995 | return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: N->getValueType(ResNo: 0), N1: Lo, N2: Hi); |
11996 | }; |
11997 | |
11998 | bool LHSIsS = DAG.ComputeNumSignBits(Op: LHS) > XLen; |
11999 | bool RHSIsS = DAG.ComputeNumSignBits(Op: RHS) > XLen; |
12000 | |
12001 | // The other operand should be signed, but still prefer MULH when |
12002 | // possible. |
12003 | if (RHSIsU && LHSIsS && !RHSIsS) |
12004 | Results.push_back(Elt: MakeMULPair(LHS, RHS)); |
12005 | else if (LHSIsU && RHSIsS && !LHSIsS) |
12006 | Results.push_back(Elt: MakeMULPair(RHS, LHS)); |
12007 | |
12008 | return; |
12009 | } |
12010 | [[fallthrough]]; |
12011 | } |
12012 | case ISD::ADD: |
12013 | case ISD::SUB: |
12014 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
12015 | "Unexpected custom legalisation" ); |
12016 | Results.push_back(Elt: customLegalizeToWOpWithSExt(N, DAG)); |
12017 | break; |
12018 | case ISD::SHL: |
12019 | case ISD::SRA: |
12020 | case ISD::SRL: |
12021 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
12022 | "Unexpected custom legalisation" ); |
12023 | if (N->getOperand(Num: 1).getOpcode() != ISD::Constant) { |
12024 | // If we can use a BSET instruction, allow default promotion to apply. |
12025 | if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() && |
12026 | isOneConstant(V: N->getOperand(Num: 0))) |
12027 | break; |
12028 | Results.push_back(Elt: customLegalizeToWOp(N, DAG)); |
12029 | break; |
12030 | } |
12031 | |
12032 | // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is |
12033 | // similar to customLegalizeToWOpWithSExt, but we must zero_extend the |
12034 | // shift amount. |
12035 | if (N->getOpcode() == ISD::SHL) { |
12036 | SDLoc DL(N); |
12037 | SDValue NewOp0 = |
12038 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); |
12039 | SDValue NewOp1 = |
12040 | DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1)); |
12041 | SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1); |
12042 | SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, |
12043 | DAG.getValueType(MVT::i32)); |
12044 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); |
12045 | } |
12046 | |
12047 | break; |
12048 | case ISD::ROTL: |
12049 | case ISD::ROTR: |
12050 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
12051 | "Unexpected custom legalisation" ); |
12052 | assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() || |
12053 | Subtarget.hasVendorXTHeadBb()) && |
12054 | "Unexpected custom legalization" ); |
12055 | if (!isa<ConstantSDNode>(Val: N->getOperand(Num: 1)) && |
12056 | !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb())) |
12057 | return; |
12058 | Results.push_back(Elt: customLegalizeToWOp(N, DAG)); |
12059 | break; |
12060 | case ISD::CTTZ: |
12061 | case ISD::CTTZ_ZERO_UNDEF: |
12062 | case ISD::CTLZ: |
12063 | case ISD::CTLZ_ZERO_UNDEF: { |
12064 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
12065 | "Unexpected custom legalisation" ); |
12066 | |
12067 | SDValue NewOp0 = |
12068 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); |
12069 | bool IsCTZ = |
12070 | N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF; |
12071 | unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW; |
12072 | SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0); |
12073 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
12074 | return; |
12075 | } |
12076 | case ISD::SDIV: |
12077 | case ISD::UDIV: |
12078 | case ISD::UREM: { |
12079 | MVT VT = N->getSimpleValueType(ResNo: 0); |
12080 | assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && |
12081 | Subtarget.is64Bit() && Subtarget.hasStdExtM() && |
12082 | "Unexpected custom legalisation" ); |
12083 | // Don't promote division/remainder by constant since we should expand those |
12084 | // to multiply by magic constant. |
12085 | AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); |
12086 | if (N->getOperand(Num: 1).getOpcode() == ISD::Constant && |
12087 | !isIntDivCheap(VT: N->getValueType(ResNo: 0), Attr)) |
12088 | return; |
12089 | |
12090 | // If the input is i32, use ANY_EXTEND since the W instructions don't read |
12091 | // the upper 32 bits. For other types we need to sign or zero extend |
12092 | // based on the opcode. |
12093 | unsigned ExtOpc = ISD::ANY_EXTEND; |
12094 | if (VT != MVT::i32) |
12095 | ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND |
12096 | : ISD::ZERO_EXTEND; |
12097 | |
12098 | Results.push_back(Elt: customLegalizeToWOp(N, DAG, ExtOpc)); |
12099 | break; |
12100 | } |
12101 | case ISD::SADDO: { |
12102 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
12103 | "Unexpected custom legalisation" ); |
12104 | |
12105 | // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise |
12106 | // use the default legalization. |
12107 | if (!isa<ConstantSDNode>(Val: N->getOperand(Num: 1))) |
12108 | return; |
12109 | |
12110 | SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); |
12111 | SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1)); |
12112 | SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS); |
12113 | Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res, |
12114 | DAG.getValueType(MVT::i32)); |
12115 | |
12116 | SDValue Zero = DAG.getConstant(0, DL, MVT::i64); |
12117 | |
12118 | // For an addition, the result should be less than one of the operands (LHS) |
12119 | // if and only if the other operand (RHS) is negative, otherwise there will |
12120 | // be overflow. |
12121 | // For a subtraction, the result should be less than one of the operands |
12122 | // (LHS) if and only if the other operand (RHS) is (non-zero) positive, |
12123 | // otherwise there will be overflow. |
12124 | EVT OType = N->getValueType(ResNo: 1); |
12125 | SDValue ResultLowerThanLHS = DAG.getSetCC(DL, VT: OType, LHS: Res, RHS: LHS, Cond: ISD::SETLT); |
12126 | SDValue ConditionRHS = DAG.getSetCC(DL, VT: OType, LHS: RHS, RHS: Zero, Cond: ISD::SETLT); |
12127 | |
12128 | SDValue Overflow = |
12129 | DAG.getNode(Opcode: ISD::XOR, DL, VT: OType, N1: ConditionRHS, N2: ResultLowerThanLHS); |
12130 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
12131 | Results.push_back(Elt: Overflow); |
12132 | return; |
12133 | } |
12134 | case ISD::UADDO: |
12135 | case ISD::USUBO: { |
12136 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
12137 | "Unexpected custom legalisation" ); |
12138 | bool IsAdd = N->getOpcode() == ISD::UADDO; |
12139 | // Create an ADDW or SUBW. |
12140 | SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); |
12141 | SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); |
12142 | SDValue Res = |
12143 | DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS); |
12144 | Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res, |
12145 | DAG.getValueType(MVT::i32)); |
12146 | |
12147 | SDValue Overflow; |
12148 | if (IsAdd && isOneConstant(V: RHS)) { |
12149 | // Special case uaddo X, 1 overflowed if the addition result is 0. |
12150 | // The general case (X + C) < C is not necessarily beneficial. Although we |
12151 | // reduce the live range of X, we may introduce the materialization of |
12152 | // constant C, especially when the setcc result is used by branch. We have |
12153 | // no compare with constant and branch instructions. |
12154 | Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, |
12155 | DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ); |
12156 | } else if (IsAdd && isAllOnesConstant(V: RHS)) { |
12157 | // Special case uaddo X, -1 overflowed if X != 0. |
12158 | Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0), |
12159 | DAG.getConstant(0, DL, MVT::i32), ISD::SETNE); |
12160 | } else { |
12161 | // Sign extend the LHS and perform an unsigned compare with the ADDW |
12162 | // result. Since the inputs are sign extended from i32, this is equivalent |
12163 | // to comparing the lower 32 bits. |
12164 | LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); |
12165 | Overflow = DAG.getSetCC(DL, VT: N->getValueType(ResNo: 1), LHS: Res, RHS: LHS, |
12166 | Cond: IsAdd ? ISD::SETULT : ISD::SETUGT); |
12167 | } |
12168 | |
12169 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
12170 | Results.push_back(Elt: Overflow); |
12171 | return; |
12172 | } |
12173 | case ISD::UADDSAT: |
12174 | case ISD::USUBSAT: { |
12175 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
12176 | "Unexpected custom legalisation" ); |
12177 | if (Subtarget.hasStdExtZbb()) { |
12178 | // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using |
12179 | // sign extend allows overflow of the lower 32 bits to be detected on |
12180 | // the promoted size. |
12181 | SDValue LHS = |
12182 | DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); |
12183 | SDValue RHS = |
12184 | DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1)); |
12185 | SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS); |
12186 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
12187 | return; |
12188 | } |
12189 | |
12190 | // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom |
12191 | // promotion for UADDO/USUBO. |
12192 | Results.push_back(Elt: expandAddSubSat(Node: N, DAG)); |
12193 | return; |
12194 | } |
12195 | case ISD::SADDSAT: |
12196 | case ISD::SSUBSAT: { |
12197 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
12198 | "Unexpected custom legalisation" ); |
12199 | Results.push_back(Elt: expandAddSubSat(Node: N, DAG)); |
12200 | return; |
12201 | } |
12202 | case ISD::ABS: { |
12203 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
12204 | "Unexpected custom legalisation" ); |
12205 | |
12206 | if (Subtarget.hasStdExtZbb()) { |
12207 | // Emit a special ABSW node that will be expanded to NEGW+MAX at isel. |
12208 | // This allows us to remember that the result is sign extended. Expanding |
12209 | // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits. |
12210 | SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, |
12211 | N->getOperand(0)); |
12212 | SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src); |
12213 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs)); |
12214 | return; |
12215 | } |
12216 | |
12217 | // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y) |
12218 | SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); |
12219 | |
12220 | // Freeze the source so we can increase it's use count. |
12221 | Src = DAG.getFreeze(V: Src); |
12222 | |
12223 | // Copy sign bit to all bits using the sraiw pattern. |
12224 | SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src, |
12225 | DAG.getValueType(MVT::i32)); |
12226 | SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill, |
12227 | DAG.getConstant(31, DL, MVT::i64)); |
12228 | |
12229 | SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill); |
12230 | NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill); |
12231 | |
12232 | // NOTE: The result is only required to be anyextended, but sext is |
12233 | // consistent with type legalization of sub. |
12234 | NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes, |
12235 | DAG.getValueType(MVT::i32)); |
12236 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); |
12237 | return; |
12238 | } |
12239 | case ISD::BITCAST: { |
12240 | EVT VT = N->getValueType(ResNo: 0); |
12241 | assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!" ); |
12242 | SDValue Op0 = N->getOperand(Num: 0); |
12243 | EVT Op0VT = Op0.getValueType(); |
12244 | MVT XLenVT = Subtarget.getXLenVT(); |
12245 | if (VT == MVT::i16 && Op0VT == MVT::f16 && |
12246 | Subtarget.hasStdExtZfhminOrZhinxmin()) { |
12247 | SDValue FPConv = DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTH, DL, VT: XLenVT, Operand: Op0); |
12248 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); |
12249 | } else if (VT == MVT::i16 && Op0VT == MVT::bf16 && |
12250 | Subtarget.hasStdExtZfbfmin()) { |
12251 | SDValue FPConv = DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTH, DL, VT: XLenVT, Operand: Op0); |
12252 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); |
12253 | } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() && |
12254 | Subtarget.hasStdExtFOrZfinx()) { |
12255 | SDValue FPConv = |
12256 | DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); |
12257 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); |
12258 | } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32) { |
12259 | SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL, |
12260 | DAG.getVTList(MVT::i32, MVT::i32), Op0); |
12261 | SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, |
12262 | NewReg.getValue(0), NewReg.getValue(1)); |
12263 | Results.push_back(Elt: RetReg); |
12264 | } else if (!VT.isVector() && Op0VT.isFixedLengthVector() && |
12265 | isTypeLegal(VT: Op0VT)) { |
12266 | // Custom-legalize bitcasts from fixed-length vector types to illegal |
12267 | // scalar types in order to improve codegen. Bitcast the vector to a |
12268 | // one-element vector type whose element type is the same as the result |
12269 | // type, and extract the first element. |
12270 | EVT BVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT, NumElements: 1); |
12271 | if (isTypeLegal(VT: BVT)) { |
12272 | SDValue BVec = DAG.getBitcast(VT: BVT, V: Op0); |
12273 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT, N1: BVec, |
12274 | N2: DAG.getVectorIdxConstant(Val: 0, DL))); |
12275 | } |
12276 | } |
12277 | break; |
12278 | } |
12279 | case RISCVISD::BREV8: { |
12280 | MVT VT = N->getSimpleValueType(ResNo: 0); |
12281 | MVT XLenVT = Subtarget.getXLenVT(); |
12282 | assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) && |
12283 | "Unexpected custom legalisation" ); |
12284 | assert(Subtarget.hasStdExtZbkb() && "Unexpected extension" ); |
12285 | SDValue NewOp = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: N->getOperand(Num: 0)); |
12286 | SDValue NewRes = DAG.getNode(Opcode: N->getOpcode(), DL, VT: XLenVT, Operand: NewOp); |
12287 | // ReplaceNodeResults requires we maintain the same type for the return |
12288 | // value. |
12289 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: NewRes)); |
12290 | break; |
12291 | } |
12292 | case ISD::EXTRACT_VECTOR_ELT: { |
12293 | // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element |
12294 | // type is illegal (currently only vXi64 RV32). |
12295 | // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are |
12296 | // transferred to the destination register. We issue two of these from the |
12297 | // upper- and lower- halves of the SEW-bit vector element, slid down to the |
12298 | // first element. |
12299 | SDValue Vec = N->getOperand(Num: 0); |
12300 | SDValue Idx = N->getOperand(Num: 1); |
12301 | |
12302 | // The vector type hasn't been legalized yet so we can't issue target |
12303 | // specific nodes if it needs legalization. |
12304 | // FIXME: We would manually legalize if it's important. |
12305 | if (!isTypeLegal(VT: Vec.getValueType())) |
12306 | return; |
12307 | |
12308 | MVT VecVT = Vec.getSimpleValueType(); |
12309 | |
12310 | assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 && |
12311 | VecVT.getVectorElementType() == MVT::i64 && |
12312 | "Unexpected EXTRACT_VECTOR_ELT legalization" ); |
12313 | |
12314 | // If this is a fixed vector, we need to convert it to a scalable vector. |
12315 | MVT ContainerVT = VecVT; |
12316 | if (VecVT.isFixedLengthVector()) { |
12317 | ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
12318 | Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget); |
12319 | } |
12320 | |
12321 | MVT XLenVT = Subtarget.getXLenVT(); |
12322 | |
12323 | // Use a VL of 1 to avoid processing more elements than we need. |
12324 | auto [Mask, VL] = getDefaultVLOps(NumElts: 1, ContainerVT, DL, DAG, Subtarget); |
12325 | |
12326 | // Unless the index is known to be 0, we must slide the vector down to get |
12327 | // the desired element into index 0. |
12328 | if (!isNullConstant(V: Idx)) { |
12329 | Vec = getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, |
12330 | Merge: DAG.getUNDEF(VT: ContainerVT), Op: Vec, Offset: Idx, Mask, VL); |
12331 | } |
12332 | |
12333 | // Extract the lower XLEN bits of the correct vector element. |
12334 | SDValue EltLo = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: Vec); |
12335 | |
12336 | // To extract the upper XLEN bits of the vector element, shift the first |
12337 | // element right by 32 bits and re-extract the lower XLEN bits. |
12338 | SDValue ThirtyTwoV = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, |
12339 | N1: DAG.getUNDEF(VT: ContainerVT), |
12340 | N2: DAG.getConstant(Val: 32, DL, VT: XLenVT), N3: VL); |
12341 | SDValue LShr32 = |
12342 | DAG.getNode(Opcode: RISCVISD::SRL_VL, DL, VT: ContainerVT, N1: Vec, N2: ThirtyTwoV, |
12343 | N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL); |
12344 | |
12345 | SDValue EltHi = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: LShr32); |
12346 | |
12347 | Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); |
12348 | break; |
12349 | } |
12350 | case ISD::INTRINSIC_WO_CHAIN: { |
12351 | unsigned IntNo = N->getConstantOperandVal(Num: 0); |
12352 | switch (IntNo) { |
12353 | default: |
12354 | llvm_unreachable( |
12355 | "Don't know how to custom type legalize this intrinsic!" ); |
12356 | case Intrinsic::experimental_get_vector_length: { |
12357 | SDValue Res = lowerGetVectorLength(N, DAG, Subtarget); |
12358 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
12359 | return; |
12360 | } |
12361 | case Intrinsic::experimental_cttz_elts: { |
12362 | SDValue Res = lowerCttzElts(N, DAG, Subtarget); |
12363 | Results.push_back( |
12364 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: Res)); |
12365 | return; |
12366 | } |
12367 | case Intrinsic::riscv_orc_b: |
12368 | case Intrinsic::riscv_brev8: |
12369 | case Intrinsic::riscv_sha256sig0: |
12370 | case Intrinsic::riscv_sha256sig1: |
12371 | case Intrinsic::riscv_sha256sum0: |
12372 | case Intrinsic::riscv_sha256sum1: |
12373 | case Intrinsic::riscv_sm3p0: |
12374 | case Intrinsic::riscv_sm3p1: { |
12375 | if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32) |
12376 | return; |
12377 | unsigned Opc; |
12378 | switch (IntNo) { |
12379 | case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break; |
12380 | case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break; |
12381 | case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break; |
12382 | case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break; |
12383 | case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break; |
12384 | case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break; |
12385 | case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break; |
12386 | case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break; |
12387 | } |
12388 | |
12389 | SDValue NewOp = |
12390 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); |
12391 | SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp); |
12392 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
12393 | return; |
12394 | } |
12395 | case Intrinsic::riscv_sm4ks: |
12396 | case Intrinsic::riscv_sm4ed: { |
12397 | unsigned Opc = |
12398 | IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED; |
12399 | SDValue NewOp0 = |
12400 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); |
12401 | SDValue NewOp1 = |
12402 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); |
12403 | SDValue Res = |
12404 | DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3)); |
12405 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
12406 | return; |
12407 | } |
12408 | case Intrinsic::riscv_mopr: { |
12409 | if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32) |
12410 | return; |
12411 | SDValue NewOp = |
12412 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); |
12413 | SDValue Res = DAG.getNode( |
12414 | RISCVISD::MOPR, DL, MVT::i64, NewOp, |
12415 | DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64)); |
12416 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
12417 | return; |
12418 | } |
12419 | case Intrinsic::riscv_moprr: { |
12420 | if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32) |
12421 | return; |
12422 | SDValue NewOp0 = |
12423 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); |
12424 | SDValue NewOp1 = |
12425 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); |
12426 | SDValue Res = DAG.getNode( |
12427 | RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1, |
12428 | DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64)); |
12429 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
12430 | return; |
12431 | } |
12432 | case Intrinsic::riscv_clmul: { |
12433 | if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32) |
12434 | return; |
12435 | |
12436 | SDValue NewOp0 = |
12437 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); |
12438 | SDValue NewOp1 = |
12439 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); |
12440 | SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1); |
12441 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
12442 | return; |
12443 | } |
12444 | case Intrinsic::riscv_clmulh: |
12445 | case Intrinsic::riscv_clmulr: { |
12446 | if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32) |
12447 | return; |
12448 | |
12449 | // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros |
12450 | // to the full 128-bit clmul result of multiplying two xlen values. |
12451 | // Perform clmulr or clmulh on the shifted values. Finally, extract the |
12452 | // upper 32 bits. |
12453 | // |
12454 | // The alternative is to mask the inputs to 32 bits and use clmul, but |
12455 | // that requires two shifts to mask each input without zext.w. |
12456 | // FIXME: If the inputs are known zero extended or could be freely |
12457 | // zero extended, the mask form would be better. |
12458 | SDValue NewOp0 = |
12459 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); |
12460 | SDValue NewOp1 = |
12461 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); |
12462 | NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, |
12463 | DAG.getConstant(32, DL, MVT::i64)); |
12464 | NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1, |
12465 | DAG.getConstant(32, DL, MVT::i64)); |
12466 | unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH |
12467 | : RISCVISD::CLMULR; |
12468 | SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1); |
12469 | Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res, |
12470 | DAG.getConstant(32, DL, MVT::i64)); |
12471 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
12472 | return; |
12473 | } |
12474 | case Intrinsic::riscv_vmv_x_s: { |
12475 | EVT VT = N->getValueType(ResNo: 0); |
12476 | MVT XLenVT = Subtarget.getXLenVT(); |
12477 | if (VT.bitsLT(VT: XLenVT)) { |
12478 | // Simple case just extract using vmv.x.s and truncate. |
12479 | SDValue = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, |
12480 | VT: Subtarget.getXLenVT(), Operand: N->getOperand(Num: 1)); |
12481 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Extract)); |
12482 | return; |
12483 | } |
12484 | |
12485 | assert(VT == MVT::i64 && !Subtarget.is64Bit() && |
12486 | "Unexpected custom legalization" ); |
12487 | |
12488 | // We need to do the move in two steps. |
12489 | SDValue Vec = N->getOperand(Num: 1); |
12490 | MVT VecVT = Vec.getSimpleValueType(); |
12491 | |
12492 | // First extract the lower XLEN bits of the element. |
12493 | SDValue EltLo = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: Vec); |
12494 | |
12495 | // To extract the upper XLEN bits of the vector element, shift the first |
12496 | // element right by 32 bits and re-extract the lower XLEN bits. |
12497 | auto [Mask, VL] = getDefaultVLOps(NumElts: 1, ContainerVT: VecVT, DL, DAG, Subtarget); |
12498 | |
12499 | SDValue ThirtyTwoV = |
12500 | DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: VecVT, N1: DAG.getUNDEF(VT: VecVT), |
12501 | N2: DAG.getConstant(Val: 32, DL, VT: XLenVT), N3: VL); |
12502 | SDValue LShr32 = DAG.getNode(Opcode: RISCVISD::SRL_VL, DL, VT: VecVT, N1: Vec, N2: ThirtyTwoV, |
12503 | N3: DAG.getUNDEF(VT: VecVT), N4: Mask, N5: VL); |
12504 | SDValue EltHi = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: LShr32); |
12505 | |
12506 | Results.push_back( |
12507 | DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); |
12508 | break; |
12509 | } |
12510 | } |
12511 | break; |
12512 | } |
12513 | case ISD::VECREDUCE_ADD: |
12514 | case ISD::VECREDUCE_AND: |
12515 | case ISD::VECREDUCE_OR: |
12516 | case ISD::VECREDUCE_XOR: |
12517 | case ISD::VECREDUCE_SMAX: |
12518 | case ISD::VECREDUCE_UMAX: |
12519 | case ISD::VECREDUCE_SMIN: |
12520 | case ISD::VECREDUCE_UMIN: |
12521 | if (SDValue V = lowerVECREDUCE(Op: SDValue(N, 0), DAG)) |
12522 | Results.push_back(Elt: V); |
12523 | break; |
12524 | case ISD::VP_REDUCE_ADD: |
12525 | case ISD::VP_REDUCE_AND: |
12526 | case ISD::VP_REDUCE_OR: |
12527 | case ISD::VP_REDUCE_XOR: |
12528 | case ISD::VP_REDUCE_SMAX: |
12529 | case ISD::VP_REDUCE_UMAX: |
12530 | case ISD::VP_REDUCE_SMIN: |
12531 | case ISD::VP_REDUCE_UMIN: |
12532 | if (SDValue V = lowerVPREDUCE(Op: SDValue(N, 0), DAG)) |
12533 | Results.push_back(Elt: V); |
12534 | break; |
12535 | case ISD::GET_ROUNDING: { |
12536 | SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other); |
12537 | SDValue Res = DAG.getNode(Opcode: ISD::GET_ROUNDING, DL, VTList: VTs, N: N->getOperand(Num: 0)); |
12538 | Results.push_back(Elt: Res.getValue(R: 0)); |
12539 | Results.push_back(Elt: Res.getValue(R: 1)); |
12540 | break; |
12541 | } |
12542 | } |
12543 | } |
12544 | |
12545 | /// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP |
12546 | /// which corresponds to it. |
12547 | static unsigned getVecReduceOpcode(unsigned Opc) { |
12548 | switch (Opc) { |
12549 | default: |
12550 | llvm_unreachable("Unhandled binary to transfrom reduction" ); |
12551 | case ISD::ADD: |
12552 | return ISD::VECREDUCE_ADD; |
12553 | case ISD::UMAX: |
12554 | return ISD::VECREDUCE_UMAX; |
12555 | case ISD::SMAX: |
12556 | return ISD::VECREDUCE_SMAX; |
12557 | case ISD::UMIN: |
12558 | return ISD::VECREDUCE_UMIN; |
12559 | case ISD::SMIN: |
12560 | return ISD::VECREDUCE_SMIN; |
12561 | case ISD::AND: |
12562 | return ISD::VECREDUCE_AND; |
12563 | case ISD::OR: |
12564 | return ISD::VECREDUCE_OR; |
12565 | case ISD::XOR: |
12566 | return ISD::VECREDUCE_XOR; |
12567 | case ISD::FADD: |
12568 | // Note: This is the associative form of the generic reduction opcode. |
12569 | return ISD::VECREDUCE_FADD; |
12570 | } |
12571 | } |
12572 | |
12573 | /// Perform two related transforms whose purpose is to incrementally recognize |
12574 | /// an explode_vector followed by scalar reduction as a vector reduction node. |
12575 | /// This exists to recover from a deficiency in SLP which can't handle |
12576 | /// forests with multiple roots sharing common nodes. In some cases, one |
12577 | /// of the trees will be vectorized, and the other will remain (unprofitably) |
12578 | /// scalarized. |
12579 | static SDValue |
12580 | (SDNode *N, SelectionDAG &DAG, |
12581 | const RISCVSubtarget &Subtarget) { |
12582 | |
12583 | // This transforms need to run before all integer types have been legalized |
12584 | // to i64 (so that the vector element type matches the add type), and while |
12585 | // it's safe to introduce odd sized vector types. |
12586 | if (DAG.NewNodesMustHaveLegalTypes) |
12587 | return SDValue(); |
12588 | |
12589 | // Without V, this transform isn't useful. We could form the (illegal) |
12590 | // operations and let them be scalarized again, but there's really no point. |
12591 | if (!Subtarget.hasVInstructions()) |
12592 | return SDValue(); |
12593 | |
12594 | const SDLoc DL(N); |
12595 | const EVT VT = N->getValueType(ResNo: 0); |
12596 | const unsigned Opc = N->getOpcode(); |
12597 | |
12598 | // For FADD, we only handle the case with reassociation allowed. We |
12599 | // could handle strict reduction order, but at the moment, there's no |
12600 | // known reason to, and the complexity isn't worth it. |
12601 | // TODO: Handle fminnum and fmaxnum here |
12602 | if (!VT.isInteger() && |
12603 | (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation())) |
12604 | return SDValue(); |
12605 | |
12606 | const unsigned ReduceOpc = getVecReduceOpcode(Opc); |
12607 | assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) && |
12608 | "Inconsistent mappings" ); |
12609 | SDValue LHS = N->getOperand(Num: 0); |
12610 | SDValue RHS = N->getOperand(Num: 1); |
12611 | |
12612 | if (!LHS.hasOneUse() || !RHS.hasOneUse()) |
12613 | return SDValue(); |
12614 | |
12615 | if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
12616 | std::swap(a&: LHS, b&: RHS); |
12617 | |
12618 | if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
12619 | !isa<ConstantSDNode>(Val: RHS.getOperand(i: 1))) |
12620 | return SDValue(); |
12621 | |
12622 | uint64_t RHSIdx = cast<ConstantSDNode>(Val: RHS.getOperand(i: 1))->getLimitedValue(); |
12623 | SDValue SrcVec = RHS.getOperand(i: 0); |
12624 | EVT SrcVecVT = SrcVec.getValueType(); |
12625 | assert(SrcVecVT.getVectorElementType() == VT); |
12626 | if (SrcVecVT.isScalableVector()) |
12627 | return SDValue(); |
12628 | |
12629 | if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen()) |
12630 | return SDValue(); |
12631 | |
12632 | // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to |
12633 | // reduce_op (extract_subvector [2 x VT] from V). This will form the |
12634 | // root of our reduction tree. TODO: We could extend this to any two |
12635 | // adjacent aligned constant indices if desired. |
12636 | if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
12637 | LHS.getOperand(i: 0) == SrcVec && isa<ConstantSDNode>(Val: LHS.getOperand(i: 1))) { |
12638 | uint64_t LHSIdx = |
12639 | cast<ConstantSDNode>(Val: LHS.getOperand(i: 1))->getLimitedValue(); |
12640 | if (0 == std::min(a: LHSIdx, b: RHSIdx) && 1 == std::max(a: LHSIdx, b: RHSIdx)) { |
12641 | EVT ReduceVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT, NumElements: 2); |
12642 | SDValue Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ReduceVT, N1: SrcVec, |
12643 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
12644 | return DAG.getNode(Opcode: ReduceOpc, DL, VT, Operand: Vec, Flags: N->getFlags()); |
12645 | } |
12646 | } |
12647 | |
12648 | // Match (binop (reduce (extract_subvector V, 0), |
12649 | // (extract_vector_elt V, sizeof(SubVec)))) |
12650 | // into a reduction of one more element from the original vector V. |
12651 | if (LHS.getOpcode() != ReduceOpc) |
12652 | return SDValue(); |
12653 | |
12654 | SDValue ReduceVec = LHS.getOperand(i: 0); |
12655 | if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
12656 | ReduceVec.hasOneUse() && ReduceVec.getOperand(i: 0) == RHS.getOperand(i: 0) && |
12657 | isNullConstant(V: ReduceVec.getOperand(i: 1)) && |
12658 | ReduceVec.getValueType().getVectorNumElements() == RHSIdx) { |
12659 | // For illegal types (e.g. 3xi32), most will be combined again into a |
12660 | // wider (hopefully legal) type. If this is a terminal state, we are |
12661 | // relying on type legalization here to produce something reasonable |
12662 | // and this lowering quality could probably be improved. (TODO) |
12663 | EVT ReduceVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT, NumElements: RHSIdx + 1); |
12664 | SDValue Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ReduceVT, N1: SrcVec, |
12665 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
12666 | auto Flags = ReduceVec->getFlags(); |
12667 | Flags.intersectWith(Flags: N->getFlags()); |
12668 | return DAG.getNode(Opcode: ReduceOpc, DL, VT, Operand: Vec, Flags); |
12669 | } |
12670 | |
12671 | return SDValue(); |
12672 | } |
12673 | |
12674 | |
12675 | // Try to fold (<bop> x, (reduction.<bop> vec, start)) |
12676 | static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, |
12677 | const RISCVSubtarget &Subtarget) { |
12678 | auto BinOpToRVVReduce = [](unsigned Opc) { |
12679 | switch (Opc) { |
12680 | default: |
12681 | llvm_unreachable("Unhandled binary to transfrom reduction" ); |
12682 | case ISD::ADD: |
12683 | return RISCVISD::VECREDUCE_ADD_VL; |
12684 | case ISD::UMAX: |
12685 | return RISCVISD::VECREDUCE_UMAX_VL; |
12686 | case ISD::SMAX: |
12687 | return RISCVISD::VECREDUCE_SMAX_VL; |
12688 | case ISD::UMIN: |
12689 | return RISCVISD::VECREDUCE_UMIN_VL; |
12690 | case ISD::SMIN: |
12691 | return RISCVISD::VECREDUCE_SMIN_VL; |
12692 | case ISD::AND: |
12693 | return RISCVISD::VECREDUCE_AND_VL; |
12694 | case ISD::OR: |
12695 | return RISCVISD::VECREDUCE_OR_VL; |
12696 | case ISD::XOR: |
12697 | return RISCVISD::VECREDUCE_XOR_VL; |
12698 | case ISD::FADD: |
12699 | return RISCVISD::VECREDUCE_FADD_VL; |
12700 | case ISD::FMAXNUM: |
12701 | return RISCVISD::VECREDUCE_FMAX_VL; |
12702 | case ISD::FMINNUM: |
12703 | return RISCVISD::VECREDUCE_FMIN_VL; |
12704 | } |
12705 | }; |
12706 | |
12707 | auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) { |
12708 | return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
12709 | isNullConstant(V: V.getOperand(i: 1)) && |
12710 | V.getOperand(i: 0).getOpcode() == BinOpToRVVReduce(Opc); |
12711 | }; |
12712 | |
12713 | unsigned Opc = N->getOpcode(); |
12714 | unsigned ReduceIdx; |
12715 | if (IsReduction(N->getOperand(Num: 0), Opc)) |
12716 | ReduceIdx = 0; |
12717 | else if (IsReduction(N->getOperand(Num: 1), Opc)) |
12718 | ReduceIdx = 1; |
12719 | else |
12720 | return SDValue(); |
12721 | |
12722 | // Skip if FADD disallows reassociation but the combiner needs. |
12723 | if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation()) |
12724 | return SDValue(); |
12725 | |
12726 | SDValue = N->getOperand(Num: ReduceIdx); |
12727 | SDValue Reduce = Extract.getOperand(i: 0); |
12728 | if (!Extract.hasOneUse() || !Reduce.hasOneUse()) |
12729 | return SDValue(); |
12730 | |
12731 | SDValue ScalarV = Reduce.getOperand(i: 2); |
12732 | EVT ScalarVT = ScalarV.getValueType(); |
12733 | if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR && |
12734 | ScalarV.getOperand(i: 0)->isUndef() && |
12735 | isNullConstant(V: ScalarV.getOperand(i: 2))) |
12736 | ScalarV = ScalarV.getOperand(i: 1); |
12737 | |
12738 | // Make sure that ScalarV is a splat with VL=1. |
12739 | if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL && |
12740 | ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL && |
12741 | ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL) |
12742 | return SDValue(); |
12743 | |
12744 | if (!isNonZeroAVL(AVL: ScalarV.getOperand(i: 2))) |
12745 | return SDValue(); |
12746 | |
12747 | // Check the scalar of ScalarV is neutral element |
12748 | // TODO: Deal with value other than neutral element. |
12749 | if (!isNeutralConstant(Opc: N->getOpcode(), Flags: N->getFlags(), V: ScalarV.getOperand(i: 1), |
12750 | OperandNo: 0)) |
12751 | return SDValue(); |
12752 | |
12753 | // If the AVL is zero, operand 0 will be returned. So it's not safe to fold. |
12754 | // FIXME: We might be able to improve this if operand 0 is undef. |
12755 | if (!isNonZeroAVL(AVL: Reduce.getOperand(i: 5))) |
12756 | return SDValue(); |
12757 | |
12758 | SDValue NewStart = N->getOperand(Num: 1 - ReduceIdx); |
12759 | |
12760 | SDLoc DL(N); |
12761 | SDValue NewScalarV = |
12762 | lowerScalarInsert(Scalar: NewStart, VL: ScalarV.getOperand(i: 2), |
12763 | VT: ScalarV.getSimpleValueType(), DL, DAG, Subtarget); |
12764 | |
12765 | // If we looked through an INSERT_SUBVECTOR we need to restore it. |
12766 | if (ScalarVT != ScalarV.getValueType()) |
12767 | NewScalarV = |
12768 | DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ScalarVT, N1: DAG.getUNDEF(VT: ScalarVT), |
12769 | N2: NewScalarV, N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
12770 | |
12771 | SDValue Ops[] = {Reduce.getOperand(i: 0), Reduce.getOperand(i: 1), |
12772 | NewScalarV, Reduce.getOperand(i: 3), |
12773 | Reduce.getOperand(i: 4), Reduce.getOperand(i: 5)}; |
12774 | SDValue NewReduce = |
12775 | DAG.getNode(Opcode: Reduce.getOpcode(), DL, VT: Reduce.getValueType(), Ops); |
12776 | return DAG.getNode(Opcode: Extract.getOpcode(), DL, VT: Extract.getValueType(), N1: NewReduce, |
12777 | N2: Extract.getOperand(i: 1)); |
12778 | } |
12779 | |
12780 | // Optimize (add (shl x, c0), (shl y, c1)) -> |
12781 | // (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3]. |
12782 | static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, |
12783 | const RISCVSubtarget &Subtarget) { |
12784 | // Perform this optimization only in the zba extension. |
12785 | if (!Subtarget.hasStdExtZba()) |
12786 | return SDValue(); |
12787 | |
12788 | // Skip for vector types and larger types. |
12789 | EVT VT = N->getValueType(ResNo: 0); |
12790 | if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen()) |
12791 | return SDValue(); |
12792 | |
12793 | // The two operand nodes must be SHL and have no other use. |
12794 | SDValue N0 = N->getOperand(Num: 0); |
12795 | SDValue N1 = N->getOperand(Num: 1); |
12796 | if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL || |
12797 | !N0->hasOneUse() || !N1->hasOneUse()) |
12798 | return SDValue(); |
12799 | |
12800 | // Check c0 and c1. |
12801 | auto *N0C = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1)); |
12802 | auto *N1C = dyn_cast<ConstantSDNode>(Val: N1->getOperand(Num: 1)); |
12803 | if (!N0C || !N1C) |
12804 | return SDValue(); |
12805 | int64_t C0 = N0C->getSExtValue(); |
12806 | int64_t C1 = N1C->getSExtValue(); |
12807 | if (C0 <= 0 || C1 <= 0) |
12808 | return SDValue(); |
12809 | |
12810 | // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable. |
12811 | int64_t Bits = std::min(a: C0, b: C1); |
12812 | int64_t Diff = std::abs(i: C0 - C1); |
12813 | if (Diff != 1 && Diff != 2 && Diff != 3) |
12814 | return SDValue(); |
12815 | |
12816 | // Build nodes. |
12817 | SDLoc DL(N); |
12818 | SDValue NS = (C0 < C1) ? N0->getOperand(Num: 0) : N1->getOperand(Num: 0); |
12819 | SDValue NL = (C0 > C1) ? N0->getOperand(Num: 0) : N1->getOperand(Num: 0); |
12820 | SDValue NA0 = |
12821 | DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: NL, N2: DAG.getConstant(Val: Diff, DL, VT)); |
12822 | SDValue NA1 = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: NA0, N2: NS); |
12823 | return DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: NA1, N2: DAG.getConstant(Val: Bits, DL, VT)); |
12824 | } |
12825 | |
12826 | // Combine a constant select operand into its use: |
12827 | // |
12828 | // (and (select cond, -1, c), x) |
12829 | // -> (select cond, x, (and x, c)) [AllOnes=1] |
12830 | // (or (select cond, 0, c), x) |
12831 | // -> (select cond, x, (or x, c)) [AllOnes=0] |
12832 | // (xor (select cond, 0, c), x) |
12833 | // -> (select cond, x, (xor x, c)) [AllOnes=0] |
12834 | // (add (select cond, 0, c), x) |
12835 | // -> (select cond, x, (add x, c)) [AllOnes=0] |
12836 | // (sub x, (select cond, 0, c)) |
12837 | // -> (select cond, x, (sub x, c)) [AllOnes=0] |
12838 | static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, |
12839 | SelectionDAG &DAG, bool AllOnes, |
12840 | const RISCVSubtarget &Subtarget) { |
12841 | EVT VT = N->getValueType(ResNo: 0); |
12842 | |
12843 | // Skip vectors. |
12844 | if (VT.isVector()) |
12845 | return SDValue(); |
12846 | |
12847 | if (!Subtarget.hasConditionalMoveFusion()) { |
12848 | // (select cond, x, (and x, c)) has custom lowering with Zicond. |
12849 | if ((!Subtarget.hasStdExtZicond() && |
12850 | !Subtarget.hasVendorXVentanaCondOps()) || |
12851 | N->getOpcode() != ISD::AND) |
12852 | return SDValue(); |
12853 | |
12854 | // Maybe harmful when condition code has multiple use. |
12855 | if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(i: 0).hasOneUse()) |
12856 | return SDValue(); |
12857 | |
12858 | // Maybe harmful when VT is wider than XLen. |
12859 | if (VT.getSizeInBits() > Subtarget.getXLen()) |
12860 | return SDValue(); |
12861 | } |
12862 | |
12863 | if ((Slct.getOpcode() != ISD::SELECT && |
12864 | Slct.getOpcode() != RISCVISD::SELECT_CC) || |
12865 | !Slct.hasOneUse()) |
12866 | return SDValue(); |
12867 | |
12868 | auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) { |
12869 | return AllOnes ? isAllOnesConstant(V: N) : isNullConstant(V: N); |
12870 | }; |
12871 | |
12872 | bool SwapSelectOps; |
12873 | unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0; |
12874 | SDValue TrueVal = Slct.getOperand(i: 1 + OpOffset); |
12875 | SDValue FalseVal = Slct.getOperand(i: 2 + OpOffset); |
12876 | SDValue NonConstantVal; |
12877 | if (isZeroOrAllOnes(TrueVal, AllOnes)) { |
12878 | SwapSelectOps = false; |
12879 | NonConstantVal = FalseVal; |
12880 | } else if (isZeroOrAllOnes(FalseVal, AllOnes)) { |
12881 | SwapSelectOps = true; |
12882 | NonConstantVal = TrueVal; |
12883 | } else |
12884 | return SDValue(); |
12885 | |
12886 | // Slct is now know to be the desired identity constant when CC is true. |
12887 | TrueVal = OtherOp; |
12888 | FalseVal = DAG.getNode(Opcode: N->getOpcode(), DL: SDLoc(N), VT, N1: OtherOp, N2: NonConstantVal); |
12889 | // Unless SwapSelectOps says the condition should be false. |
12890 | if (SwapSelectOps) |
12891 | std::swap(a&: TrueVal, b&: FalseVal); |
12892 | |
12893 | if (Slct.getOpcode() == RISCVISD::SELECT_CC) |
12894 | return DAG.getNode(Opcode: RISCVISD::SELECT_CC, DL: SDLoc(N), VT, |
12895 | Ops: {Slct.getOperand(i: 0), Slct.getOperand(i: 1), |
12896 | Slct.getOperand(i: 2), TrueVal, FalseVal}); |
12897 | |
12898 | return DAG.getNode(Opcode: ISD::SELECT, DL: SDLoc(N), VT, |
12899 | Ops: {Slct.getOperand(i: 0), TrueVal, FalseVal}); |
12900 | } |
12901 | |
12902 | // Attempt combineSelectAndUse on each operand of a commutative operator N. |
12903 | static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG, |
12904 | bool AllOnes, |
12905 | const RISCVSubtarget &Subtarget) { |
12906 | SDValue N0 = N->getOperand(Num: 0); |
12907 | SDValue N1 = N->getOperand(Num: 1); |
12908 | if (SDValue Result = combineSelectAndUse(N, Slct: N0, OtherOp: N1, DAG, AllOnes, Subtarget)) |
12909 | return Result; |
12910 | if (SDValue Result = combineSelectAndUse(N, Slct: N1, OtherOp: N0, DAG, AllOnes, Subtarget)) |
12911 | return Result; |
12912 | return SDValue(); |
12913 | } |
12914 | |
12915 | // Transform (add (mul x, c0), c1) -> |
12916 | // (add (mul (add x, c1/c0), c0), c1%c0). |
12917 | // if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case |
12918 | // that should be excluded is when c0*(c1/c0) is simm12, which will lead |
12919 | // to an infinite loop in DAGCombine if transformed. |
12920 | // Or transform (add (mul x, c0), c1) -> |
12921 | // (add (mul (add x, c1/c0+1), c0), c1%c0-c0), |
12922 | // if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner |
12923 | // case that should be excluded is when c0*(c1/c0+1) is simm12, which will |
12924 | // lead to an infinite loop in DAGCombine if transformed. |
12925 | // Or transform (add (mul x, c0), c1) -> |
12926 | // (add (mul (add x, c1/c0-1), c0), c1%c0+c0), |
12927 | // if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner |
12928 | // case that should be excluded is when c0*(c1/c0-1) is simm12, which will |
12929 | // lead to an infinite loop in DAGCombine if transformed. |
12930 | // Or transform (add (mul x, c0), c1) -> |
12931 | // (mul (add x, c1/c0), c0). |
12932 | // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not. |
12933 | static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, |
12934 | const RISCVSubtarget &Subtarget) { |
12935 | // Skip for vector types and larger types. |
12936 | EVT VT = N->getValueType(ResNo: 0); |
12937 | if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen()) |
12938 | return SDValue(); |
12939 | // The first operand node must be a MUL and has no other use. |
12940 | SDValue N0 = N->getOperand(Num: 0); |
12941 | if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL) |
12942 | return SDValue(); |
12943 | // Check if c0 and c1 match above conditions. |
12944 | auto *N0C = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1)); |
12945 | auto *N1C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1)); |
12946 | if (!N0C || !N1C) |
12947 | return SDValue(); |
12948 | // If N0C has multiple uses it's possible one of the cases in |
12949 | // DAGCombiner::isMulAddWithConstProfitable will be true, which would result |
12950 | // in an infinite loop. |
12951 | if (!N0C->hasOneUse()) |
12952 | return SDValue(); |
12953 | int64_t C0 = N0C->getSExtValue(); |
12954 | int64_t C1 = N1C->getSExtValue(); |
12955 | int64_t CA, CB; |
12956 | if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(x: C1)) |
12957 | return SDValue(); |
12958 | // Search for proper CA (non-zero) and CB that both are simm12. |
12959 | if ((C1 / C0) != 0 && isInt<12>(x: C1 / C0) && isInt<12>(x: C1 % C0) && |
12960 | !isInt<12>(x: C0 * (C1 / C0))) { |
12961 | CA = C1 / C0; |
12962 | CB = C1 % C0; |
12963 | } else if ((C1 / C0 + 1) != 0 && isInt<12>(x: C1 / C0 + 1) && |
12964 | isInt<12>(x: C1 % C0 - C0) && !isInt<12>(x: C0 * (C1 / C0 + 1))) { |
12965 | CA = C1 / C0 + 1; |
12966 | CB = C1 % C0 - C0; |
12967 | } else if ((C1 / C0 - 1) != 0 && isInt<12>(x: C1 / C0 - 1) && |
12968 | isInt<12>(x: C1 % C0 + C0) && !isInt<12>(x: C0 * (C1 / C0 - 1))) { |
12969 | CA = C1 / C0 - 1; |
12970 | CB = C1 % C0 + C0; |
12971 | } else |
12972 | return SDValue(); |
12973 | // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0). |
12974 | SDLoc DL(N); |
12975 | SDValue New0 = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0->getOperand(Num: 0), |
12976 | N2: DAG.getConstant(Val: CA, DL, VT)); |
12977 | SDValue New1 = |
12978 | DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: New0, N2: DAG.getConstant(Val: C0, DL, VT)); |
12979 | return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: New1, N2: DAG.getConstant(Val: CB, DL, VT)); |
12980 | } |
12981 | |
12982 | // add (zext, zext) -> zext (add (zext, zext)) |
12983 | // sub (zext, zext) -> sext (sub (zext, zext)) |
12984 | // mul (zext, zext) -> zext (mul (zext, zext)) |
12985 | // sdiv (zext, zext) -> zext (sdiv (zext, zext)) |
12986 | // udiv (zext, zext) -> zext (udiv (zext, zext)) |
12987 | // srem (zext, zext) -> zext (srem (zext, zext)) |
12988 | // urem (zext, zext) -> zext (urem (zext, zext)) |
12989 | // |
12990 | // where the sum of the extend widths match, and the the range of the bin op |
12991 | // fits inside the width of the narrower bin op. (For profitability on rvv, we |
12992 | // use a power of two for both inner and outer extend.) |
12993 | static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG) { |
12994 | |
12995 | EVT VT = N->getValueType(ResNo: 0); |
12996 | if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT)) |
12997 | return SDValue(); |
12998 | |
12999 | SDValue N0 = N->getOperand(Num: 0); |
13000 | SDValue N1 = N->getOperand(Num: 1); |
13001 | if (N0.getOpcode() != ISD::ZERO_EXTEND || N1.getOpcode() != ISD::ZERO_EXTEND) |
13002 | return SDValue(); |
13003 | if (!N0.hasOneUse() || !N1.hasOneUse()) |
13004 | return SDValue(); |
13005 | |
13006 | SDValue Src0 = N0.getOperand(i: 0); |
13007 | SDValue Src1 = N1.getOperand(i: 0); |
13008 | EVT SrcVT = Src0.getValueType(); |
13009 | if (!DAG.getTargetLoweringInfo().isTypeLegal(VT: SrcVT) || |
13010 | SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 || |
13011 | SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2) |
13012 | return SDValue(); |
13013 | |
13014 | LLVMContext &C = *DAG.getContext(); |
13015 | EVT ElemVT = VT.getVectorElementType().getHalfSizedIntegerVT(Context&: C); |
13016 | EVT NarrowVT = EVT::getVectorVT(Context&: C, VT: ElemVT, EC: VT.getVectorElementCount()); |
13017 | |
13018 | Src0 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SDLoc(Src0), VT: NarrowVT, Operand: Src0); |
13019 | Src1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SDLoc(Src1), VT: NarrowVT, Operand: Src1); |
13020 | |
13021 | // Src0 and Src1 are zero extended, so they're always positive if signed. |
13022 | // |
13023 | // sub can produce a negative from two positive operands, so it needs sign |
13024 | // extended. Other nodes produce a positive from two positive operands, so |
13025 | // zero extend instead. |
13026 | unsigned OuterExtend = |
13027 | N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
13028 | |
13029 | return DAG.getNode( |
13030 | Opcode: OuterExtend, DL: SDLoc(N), VT, |
13031 | Operand: DAG.getNode(Opcode: N->getOpcode(), DL: SDLoc(N), VT: NarrowVT, N1: Src0, N2: Src1)); |
13032 | } |
13033 | |
13034 | // Try to turn (add (xor bool, 1) -1) into (neg bool). |
13035 | static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) { |
13036 | SDValue N0 = N->getOperand(Num: 0); |
13037 | SDValue N1 = N->getOperand(Num: 1); |
13038 | EVT VT = N->getValueType(ResNo: 0); |
13039 | SDLoc DL(N); |
13040 | |
13041 | // RHS should be -1. |
13042 | if (!isAllOnesConstant(V: N1)) |
13043 | return SDValue(); |
13044 | |
13045 | // Look for (xor X, 1). |
13046 | if (N0.getOpcode() != ISD::XOR || !isOneConstant(V: N0.getOperand(i: 1))) |
13047 | return SDValue(); |
13048 | |
13049 | // First xor input should be 0 or 1. |
13050 | APInt Mask = APInt::getBitsSetFrom(numBits: VT.getSizeInBits(), loBit: 1); |
13051 | if (!DAG.MaskedValueIsZero(Op: N0.getOperand(i: 0), Mask)) |
13052 | return SDValue(); |
13053 | |
13054 | // Emit a negate of the setcc. |
13055 | return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: 0, DL, VT), |
13056 | N2: N0.getOperand(i: 0)); |
13057 | } |
13058 | |
13059 | static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, |
13060 | const RISCVSubtarget &Subtarget) { |
13061 | if (SDValue V = combineAddOfBooleanXor(N, DAG)) |
13062 | return V; |
13063 | if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget)) |
13064 | return V; |
13065 | if (SDValue V = transformAddShlImm(N, DAG, Subtarget)) |
13066 | return V; |
13067 | if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) |
13068 | return V; |
13069 | if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) |
13070 | return V; |
13071 | if (SDValue V = combineBinOpOfZExt(N, DAG)) |
13072 | return V; |
13073 | |
13074 | // fold (add (select lhs, rhs, cc, 0, y), x) -> |
13075 | // (select lhs, rhs, cc, x, (add x, y)) |
13076 | return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); |
13077 | } |
13078 | |
13079 | // Try to turn a sub boolean RHS and constant LHS into an addi. |
13080 | static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG) { |
13081 | SDValue N0 = N->getOperand(Num: 0); |
13082 | SDValue N1 = N->getOperand(Num: 1); |
13083 | EVT VT = N->getValueType(ResNo: 0); |
13084 | SDLoc DL(N); |
13085 | |
13086 | // Require a constant LHS. |
13087 | auto *N0C = dyn_cast<ConstantSDNode>(Val&: N0); |
13088 | if (!N0C) |
13089 | return SDValue(); |
13090 | |
13091 | // All our optimizations involve subtracting 1 from the immediate and forming |
13092 | // an ADDI. Make sure the new immediate is valid for an ADDI. |
13093 | APInt ImmValMinus1 = N0C->getAPIntValue() - 1; |
13094 | if (!ImmValMinus1.isSignedIntN(N: 12)) |
13095 | return SDValue(); |
13096 | |
13097 | SDValue NewLHS; |
13098 | if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) { |
13099 | // (sub constant, (setcc x, y, eq/neq)) -> |
13100 | // (add (setcc x, y, neq/eq), constant - 1) |
13101 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: N1.getOperand(i: 2))->get(); |
13102 | EVT SetCCOpVT = N1.getOperand(i: 0).getValueType(); |
13103 | if (!isIntEqualitySetCC(Code: CCVal) || !SetCCOpVT.isInteger()) |
13104 | return SDValue(); |
13105 | CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: SetCCOpVT); |
13106 | NewLHS = |
13107 | DAG.getSetCC(DL: SDLoc(N1), VT, LHS: N1.getOperand(i: 0), RHS: N1.getOperand(i: 1), Cond: CCVal); |
13108 | } else if (N1.getOpcode() == ISD::XOR && isOneConstant(V: N1.getOperand(i: 1)) && |
13109 | N1.getOperand(i: 0).getOpcode() == ISD::SETCC) { |
13110 | // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1). |
13111 | // Since setcc returns a bool the xor is equivalent to 1-setcc. |
13112 | NewLHS = N1.getOperand(i: 0); |
13113 | } else |
13114 | return SDValue(); |
13115 | |
13116 | SDValue NewRHS = DAG.getConstant(Val: ImmValMinus1, DL, VT); |
13117 | return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: NewLHS, N2: NewRHS); |
13118 | } |
13119 | |
13120 | static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, |
13121 | const RISCVSubtarget &Subtarget) { |
13122 | if (SDValue V = combineSubOfBoolean(N, DAG)) |
13123 | return V; |
13124 | |
13125 | EVT VT = N->getValueType(ResNo: 0); |
13126 | SDValue N0 = N->getOperand(Num: 0); |
13127 | SDValue N1 = N->getOperand(Num: 1); |
13128 | // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1) |
13129 | if (isNullConstant(V: N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() && |
13130 | isNullConstant(V: N1.getOperand(i: 1))) { |
13131 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: N1.getOperand(i: 2))->get(); |
13132 | if (CCVal == ISD::SETLT) { |
13133 | SDLoc DL(N); |
13134 | unsigned ShAmt = N0.getValueSizeInBits() - 1; |
13135 | return DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: N1.getOperand(i: 0), |
13136 | N2: DAG.getConstant(Val: ShAmt, DL, VT)); |
13137 | } |
13138 | } |
13139 | |
13140 | if (SDValue V = combineBinOpOfZExt(N, DAG)) |
13141 | return V; |
13142 | |
13143 | // fold (sub x, (select lhs, rhs, cc, 0, y)) -> |
13144 | // (select lhs, rhs, cc, x, (sub x, y)) |
13145 | return combineSelectAndUse(N, Slct: N1, OtherOp: N0, DAG, /*AllOnes*/ false, Subtarget); |
13146 | } |
13147 | |
13148 | // Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1. |
13149 | // Legalizing setcc can introduce xors like this. Doing this transform reduces |
13150 | // the number of xors and may allow the xor to fold into a branch condition. |
13151 | static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG) { |
13152 | SDValue N0 = N->getOperand(Num: 0); |
13153 | SDValue N1 = N->getOperand(Num: 1); |
13154 | bool IsAnd = N->getOpcode() == ISD::AND; |
13155 | |
13156 | if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR) |
13157 | return SDValue(); |
13158 | |
13159 | if (!N0.hasOneUse() || !N1.hasOneUse()) |
13160 | return SDValue(); |
13161 | |
13162 | SDValue N01 = N0.getOperand(i: 1); |
13163 | SDValue N11 = N1.getOperand(i: 1); |
13164 | |
13165 | // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into |
13166 | // (xor X, -1) based on the upper bits of the other operand being 0. If the |
13167 | // operation is And, allow one of the Xors to use -1. |
13168 | if (isOneConstant(V: N01)) { |
13169 | if (!isOneConstant(V: N11) && !(IsAnd && isAllOnesConstant(V: N11))) |
13170 | return SDValue(); |
13171 | } else if (isOneConstant(V: N11)) { |
13172 | // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1. |
13173 | if (!(IsAnd && isAllOnesConstant(V: N01))) |
13174 | return SDValue(); |
13175 | } else |
13176 | return SDValue(); |
13177 | |
13178 | EVT VT = N->getValueType(ResNo: 0); |
13179 | |
13180 | SDValue N00 = N0.getOperand(i: 0); |
13181 | SDValue N10 = N1.getOperand(i: 0); |
13182 | |
13183 | // The LHS of the xors needs to be 0/1. |
13184 | APInt Mask = APInt::getBitsSetFrom(numBits: VT.getSizeInBits(), loBit: 1); |
13185 | if (!DAG.MaskedValueIsZero(Op: N00, Mask) || !DAG.MaskedValueIsZero(Op: N10, Mask)) |
13186 | return SDValue(); |
13187 | |
13188 | // Invert the opcode and insert a new xor. |
13189 | SDLoc DL(N); |
13190 | unsigned Opc = IsAnd ? ISD::OR : ISD::AND; |
13191 | SDValue Logic = DAG.getNode(Opcode: Opc, DL, VT, N1: N00, N2: N10); |
13192 | return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Logic, N2: DAG.getConstant(Val: 1, DL, VT)); |
13193 | } |
13194 | |
13195 | static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, |
13196 | const RISCVSubtarget &Subtarget) { |
13197 | SDValue N0 = N->getOperand(Num: 0); |
13198 | EVT VT = N->getValueType(ResNo: 0); |
13199 | |
13200 | // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero |
13201 | // extending X. This is safe since we only need the LSB after the shift and |
13202 | // shift amounts larger than 31 would produce poison. If we wait until |
13203 | // type legalization, we'll create RISCVISD::SRLW and we can't recover it |
13204 | // to use a BEXT instruction. |
13205 | if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 && |
13206 | N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL && |
13207 | !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) { |
13208 | SDLoc DL(N0); |
13209 | SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0)); |
13210 | SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1)); |
13211 | SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1); |
13212 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc(N), VT, Operand: Srl); |
13213 | } |
13214 | |
13215 | return SDValue(); |
13216 | } |
13217 | |
13218 | // Combines two comparison operation and logic operation to one selection |
13219 | // operation(min, max) and logic operation. Returns new constructed Node if |
13220 | // conditions for optimization are satisfied. |
13221 | static SDValue performANDCombine(SDNode *N, |
13222 | TargetLowering::DAGCombinerInfo &DCI, |
13223 | const RISCVSubtarget &Subtarget) { |
13224 | SelectionDAG &DAG = DCI.DAG; |
13225 | |
13226 | SDValue N0 = N->getOperand(Num: 0); |
13227 | // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero |
13228 | // extending X. This is safe since we only need the LSB after the shift and |
13229 | // shift amounts larger than 31 would produce poison. If we wait until |
13230 | // type legalization, we'll create RISCVISD::SRLW and we can't recover it |
13231 | // to use a BEXT instruction. |
13232 | if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && |
13233 | N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) && |
13234 | N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) && |
13235 | N0.hasOneUse()) { |
13236 | SDLoc DL(N); |
13237 | SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0)); |
13238 | SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1)); |
13239 | SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1); |
13240 | SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl, |
13241 | DAG.getConstant(1, DL, MVT::i64)); |
13242 | return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And); |
13243 | } |
13244 | |
13245 | if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) |
13246 | return V; |
13247 | if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) |
13248 | return V; |
13249 | |
13250 | if (DCI.isAfterLegalizeDAG()) |
13251 | if (SDValue V = combineDeMorganOfBoolean(N, DAG)) |
13252 | return V; |
13253 | |
13254 | // fold (and (select lhs, rhs, cc, -1, y), x) -> |
13255 | // (select lhs, rhs, cc, x, (and x, y)) |
13256 | return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget); |
13257 | } |
13258 | |
13259 | // Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez. |
13260 | // FIXME: Generalize to other binary operators with same operand. |
13261 | static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, |
13262 | SelectionDAG &DAG) { |
13263 | assert(N->getOpcode() == ISD::OR && "Unexpected opcode" ); |
13264 | |
13265 | if (N0.getOpcode() != RISCVISD::CZERO_EQZ || |
13266 | N1.getOpcode() != RISCVISD::CZERO_NEZ || |
13267 | !N0.hasOneUse() || !N1.hasOneUse()) |
13268 | return SDValue(); |
13269 | |
13270 | // Should have the same condition. |
13271 | SDValue Cond = N0.getOperand(i: 1); |
13272 | if (Cond != N1.getOperand(i: 1)) |
13273 | return SDValue(); |
13274 | |
13275 | SDValue TrueV = N0.getOperand(i: 0); |
13276 | SDValue FalseV = N1.getOperand(i: 0); |
13277 | |
13278 | if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR || |
13279 | TrueV.getOperand(i: 1) != FalseV.getOperand(i: 1) || |
13280 | !isOneConstant(V: TrueV.getOperand(i: 1)) || |
13281 | !TrueV.hasOneUse() || !FalseV.hasOneUse()) |
13282 | return SDValue(); |
13283 | |
13284 | EVT VT = N->getValueType(ResNo: 0); |
13285 | SDLoc DL(N); |
13286 | |
13287 | SDValue NewN0 = DAG.getNode(Opcode: RISCVISD::CZERO_EQZ, DL, VT, N1: TrueV.getOperand(i: 0), |
13288 | N2: Cond); |
13289 | SDValue NewN1 = DAG.getNode(Opcode: RISCVISD::CZERO_NEZ, DL, VT, N1: FalseV.getOperand(i: 0), |
13290 | N2: Cond); |
13291 | SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: NewN0, N2: NewN1); |
13292 | return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: NewOr, N2: TrueV.getOperand(i: 1)); |
13293 | } |
13294 | |
13295 | static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, |
13296 | const RISCVSubtarget &Subtarget) { |
13297 | SelectionDAG &DAG = DCI.DAG; |
13298 | |
13299 | if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) |
13300 | return V; |
13301 | if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) |
13302 | return V; |
13303 | |
13304 | if (DCI.isAfterLegalizeDAG()) |
13305 | if (SDValue V = combineDeMorganOfBoolean(N, DAG)) |
13306 | return V; |
13307 | |
13308 | // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom. |
13309 | // We may be able to pull a common operation out of the true and false value. |
13310 | SDValue N0 = N->getOperand(Num: 0); |
13311 | SDValue N1 = N->getOperand(Num: 1); |
13312 | if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG)) |
13313 | return V; |
13314 | if (SDValue V = combineOrOfCZERO(N, N0: N1, N1: N0, DAG)) |
13315 | return V; |
13316 | |
13317 | // fold (or (select cond, 0, y), x) -> |
13318 | // (select cond, x, (or x, y)) |
13319 | return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); |
13320 | } |
13321 | |
13322 | static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, |
13323 | const RISCVSubtarget &Subtarget) { |
13324 | SDValue N0 = N->getOperand(Num: 0); |
13325 | SDValue N1 = N->getOperand(Num: 1); |
13326 | |
13327 | // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use |
13328 | // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create |
13329 | // RISCVISD:::SLLW and we can't recover it to use a BSET instruction. |
13330 | if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && |
13331 | N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) && |
13332 | N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) && |
13333 | !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) { |
13334 | SDLoc DL(N); |
13335 | SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0)); |
13336 | SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1)); |
13337 | SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1); |
13338 | SDValue And = DAG.getNOT(DL, Shl, MVT::i64); |
13339 | return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And); |
13340 | } |
13341 | |
13342 | // fold (xor (sllw 1, x), -1) -> (rolw ~1, x) |
13343 | // NOTE: Assumes ROL being legal means ROLW is legal. |
13344 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
13345 | if (N0.getOpcode() == RISCVISD::SLLW && |
13346 | isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0)) && |
13347 | TLI.isOperationLegal(ISD::ROTL, MVT::i64)) { |
13348 | SDLoc DL(N); |
13349 | return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64, |
13350 | DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1)); |
13351 | } |
13352 | |
13353 | // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt) |
13354 | if (N0.getOpcode() == ISD::SETCC && isOneConstant(V: N1) && N0.hasOneUse()) { |
13355 | auto *ConstN00 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 0)); |
13356 | ISD::CondCode CC = cast<CondCodeSDNode>(Val: N0.getOperand(i: 2))->get(); |
13357 | if (ConstN00 && CC == ISD::SETLT) { |
13358 | EVT VT = N0.getValueType(); |
13359 | SDLoc DL(N0); |
13360 | const APInt &Imm = ConstN00->getAPIntValue(); |
13361 | if ((Imm + 1).isSignedIntN(N: 12)) |
13362 | return DAG.getSetCC(DL, VT, LHS: N0.getOperand(i: 1), |
13363 | RHS: DAG.getConstant(Val: Imm + 1, DL, VT), Cond: CC); |
13364 | } |
13365 | } |
13366 | |
13367 | // Combine (xor (trunc (X cc Y)) 1) -> (trunc (X !cc Y)). This is needed with |
13368 | // RV64LegalI32 when the setcc is created after type legalization. An i1 xor |
13369 | // would have been promoted to i32, but the setcc would have i64 result. |
13370 | if (N->getValueType(0) == MVT::i32 && N0.getOpcode() == ISD::TRUNCATE && |
13371 | isOneConstant(N1) && N0.getOperand(0).getOpcode() == ISD::SETCC) { |
13372 | SDValue N00 = N0.getOperand(i: 0); |
13373 | SDLoc DL(N); |
13374 | SDValue LHS = N00.getOperand(i: 0); |
13375 | SDValue RHS = N00.getOperand(i: 1); |
13376 | SDValue CC = N00.getOperand(i: 2); |
13377 | ISD::CondCode NotCC = ISD::getSetCCInverse(Operation: cast<CondCodeSDNode>(Val&: CC)->get(), |
13378 | Type: LHS.getValueType()); |
13379 | SDValue Setcc = DAG.getSetCC(DL: SDLoc(N00), VT: N0.getOperand(i: 0).getValueType(), |
13380 | LHS, RHS, Cond: NotCC); |
13381 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc(N0), VT: N->getValueType(ResNo: 0), Operand: Setcc); |
13382 | } |
13383 | |
13384 | if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) |
13385 | return V; |
13386 | if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) |
13387 | return V; |
13388 | |
13389 | // fold (xor (select cond, 0, y), x) -> |
13390 | // (select cond, x, (xor x, y)) |
13391 | return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); |
13392 | } |
13393 | |
13394 | // Try to expand a scalar multiply to a faster sequence. |
13395 | static SDValue expandMul(SDNode *N, SelectionDAG &DAG, |
13396 | TargetLowering::DAGCombinerInfo &DCI, |
13397 | const RISCVSubtarget &Subtarget) { |
13398 | |
13399 | EVT VT = N->getValueType(ResNo: 0); |
13400 | |
13401 | // LI + MUL is usually smaller than the alternative sequence. |
13402 | if (DAG.getMachineFunction().getFunction().hasMinSize()) |
13403 | return SDValue(); |
13404 | |
13405 | if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) |
13406 | return SDValue(); |
13407 | |
13408 | if (VT != Subtarget.getXLenVT()) |
13409 | return SDValue(); |
13410 | |
13411 | if (!Subtarget.hasStdExtZba() && !Subtarget.hasVendorXTHeadBa()) |
13412 | return SDValue(); |
13413 | |
13414 | ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1)); |
13415 | if (!CNode) |
13416 | return SDValue(); |
13417 | uint64_t MulAmt = CNode->getZExtValue(); |
13418 | |
13419 | for (uint64_t Divisor : {3, 5, 9}) { |
13420 | if (MulAmt % Divisor != 0) |
13421 | continue; |
13422 | uint64_t MulAmt2 = MulAmt / Divisor; |
13423 | // 3/5/9 * 2^N -> shXadd (sll X, C), (sll X, C) |
13424 | // Matched in tablegen, avoid perturbing patterns. |
13425 | if (isPowerOf2_64(Value: MulAmt2)) |
13426 | return SDValue(); |
13427 | |
13428 | // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X) |
13429 | if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) { |
13430 | SDLoc DL(N); |
13431 | SDValue X = DAG.getFreeze(V: N->getOperand(Num: 0)); |
13432 | SDValue Mul359 = |
13433 | DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: X, |
13434 | N2: DAG.getConstant(Val: Log2_64(Value: Divisor - 1), DL, VT), N3: X); |
13435 | return DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: Mul359, |
13436 | N2: DAG.getConstant(Val: Log2_64(Value: MulAmt2 - 1), DL, VT), |
13437 | N3: Mul359); |
13438 | } |
13439 | } |
13440 | |
13441 | // If this is a power 2 + 2/4/8, we can use a shift followed by a single |
13442 | // shXadd. First check if this a sum of two power of 2s because that's |
13443 | // easy. Then count how many zeros are up to the first bit. |
13444 | if (isPowerOf2_64(Value: MulAmt & (MulAmt - 1))) { |
13445 | unsigned ScaleShift = llvm::countr_zero(Val: MulAmt); |
13446 | if (ScaleShift >= 1 && ScaleShift < 4) { |
13447 | unsigned ShiftAmt = Log2_64(Value: (MulAmt & (MulAmt - 1))); |
13448 | SDLoc DL(N); |
13449 | SDValue X = DAG.getFreeze(V: N->getOperand(Num: 0)); |
13450 | SDValue Shift1 = |
13451 | DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: DAG.getConstant(Val: ShiftAmt, DL, VT)); |
13452 | return DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: X, |
13453 | N2: DAG.getConstant(Val: ScaleShift, DL, VT), N3: Shift1); |
13454 | } |
13455 | } |
13456 | |
13457 | // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x) |
13458 | // This is the two instruction form, there are also three instruction |
13459 | // variants we could implement. e.g. |
13460 | // (2^(1,2,3) * 3,5,9 + 1) << C2 |
13461 | // 2^(C1>3) * 3,5,9 +/- 1 |
13462 | for (uint64_t Divisor : {3, 5, 9}) { |
13463 | uint64_t C = MulAmt - 1; |
13464 | if (C <= Divisor) |
13465 | continue; |
13466 | unsigned TZ = llvm::countr_zero(Val: C); |
13467 | if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) { |
13468 | SDLoc DL(N); |
13469 | SDValue X = DAG.getFreeze(V: N->getOperand(Num: 0)); |
13470 | SDValue Mul359 = |
13471 | DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: X, |
13472 | N2: DAG.getConstant(Val: Log2_64(Value: Divisor - 1), DL, VT), N3: X); |
13473 | return DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: Mul359, |
13474 | N2: DAG.getConstant(Val: TZ, DL, VT), N3: X); |
13475 | } |
13476 | } |
13477 | |
13478 | // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X)) |
13479 | if (MulAmt > 2 && isPowerOf2_64(Value: (MulAmt - 1) & (MulAmt - 2))) { |
13480 | unsigned ScaleShift = llvm::countr_zero(Val: MulAmt - 1); |
13481 | if (ScaleShift >= 1 && ScaleShift < 4) { |
13482 | unsigned ShiftAmt = Log2_64(Value: ((MulAmt - 1) & (MulAmt - 2))); |
13483 | SDLoc DL(N); |
13484 | SDValue X = DAG.getFreeze(V: N->getOperand(Num: 0)); |
13485 | SDValue Shift1 = |
13486 | DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: DAG.getConstant(Val: ShiftAmt, DL, VT)); |
13487 | return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shift1, |
13488 | N2: DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: X, |
13489 | N2: DAG.getConstant(Val: ScaleShift, DL, VT), N3: X)); |
13490 | } |
13491 | } |
13492 | |
13493 | // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x)) |
13494 | for (uint64_t Offset : {3, 5, 9}) { |
13495 | if (isPowerOf2_64(Value: MulAmt + Offset)) { |
13496 | SDLoc DL(N); |
13497 | SDValue Shift1 = |
13498 | DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N->getOperand(Num: 0), |
13499 | N2: DAG.getConstant(Val: Log2_64(Value: MulAmt + Offset), DL, VT)); |
13500 | SDValue Mul359 = DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: N->getOperand(Num: 0), |
13501 | N2: DAG.getConstant(Val: Log2_64(Value: Offset - 1), DL, VT), |
13502 | N3: N->getOperand(Num: 0)); |
13503 | return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Shift1, N2: Mul359); |
13504 | } |
13505 | } |
13506 | |
13507 | return SDValue(); |
13508 | } |
13509 | |
13510 | |
13511 | static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, |
13512 | TargetLowering::DAGCombinerInfo &DCI, |
13513 | const RISCVSubtarget &Subtarget) { |
13514 | EVT VT = N->getValueType(ResNo: 0); |
13515 | if (!VT.isVector()) |
13516 | return expandMul(N, DAG, DCI, Subtarget); |
13517 | |
13518 | SDLoc DL(N); |
13519 | SDValue N0 = N->getOperand(Num: 0); |
13520 | SDValue N1 = N->getOperand(Num: 1); |
13521 | SDValue MulOper; |
13522 | unsigned AddSubOpc; |
13523 | |
13524 | // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y) |
13525 | // (mul x, add (y, 1)) -> (add x, (mul x, y)) |
13526 | // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y)) |
13527 | // (mul x, (sub 1, y)) -> (sub x, (mul x, y)) |
13528 | auto IsAddSubWith1 = [&](SDValue V) -> bool { |
13529 | AddSubOpc = V->getOpcode(); |
13530 | if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) { |
13531 | SDValue Opnd = V->getOperand(Num: 1); |
13532 | MulOper = V->getOperand(Num: 0); |
13533 | if (AddSubOpc == ISD::SUB) |
13534 | std::swap(a&: Opnd, b&: MulOper); |
13535 | if (isOneOrOneSplat(V: Opnd)) |
13536 | return true; |
13537 | } |
13538 | return false; |
13539 | }; |
13540 | |
13541 | if (IsAddSubWith1(N0)) { |
13542 | SDValue MulVal = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1, N2: MulOper); |
13543 | return DAG.getNode(Opcode: AddSubOpc, DL, VT, N1, N2: MulVal); |
13544 | } |
13545 | |
13546 | if (IsAddSubWith1(N1)) { |
13547 | SDValue MulVal = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N0, N2: MulOper); |
13548 | return DAG.getNode(Opcode: AddSubOpc, DL, VT, N1: N0, N2: MulVal); |
13549 | } |
13550 | |
13551 | if (SDValue V = combineBinOpOfZExt(N, DAG)) |
13552 | return V; |
13553 | |
13554 | return SDValue(); |
13555 | } |
13556 | |
13557 | /// According to the property that indexed load/store instructions zero-extend |
13558 | /// their indices, try to narrow the type of index operand. |
13559 | static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) { |
13560 | if (isIndexTypeSigned(IndexType)) |
13561 | return false; |
13562 | |
13563 | if (!N->hasOneUse()) |
13564 | return false; |
13565 | |
13566 | EVT VT = N.getValueType(); |
13567 | SDLoc DL(N); |
13568 | |
13569 | // In general, what we're doing here is seeing if we can sink a truncate to |
13570 | // a smaller element type into the expression tree building our index. |
13571 | // TODO: We can generalize this and handle a bunch more cases if useful. |
13572 | |
13573 | // Narrow a buildvector to the narrowest element type. This requires less |
13574 | // work and less register pressure at high LMUL, and creates smaller constants |
13575 | // which may be cheaper to materialize. |
13576 | if (ISD::isBuildVectorOfConstantSDNodes(N: N.getNode())) { |
13577 | KnownBits Known = DAG.computeKnownBits(Op: N); |
13578 | unsigned ActiveBits = std::max(a: 8u, b: Known.countMaxActiveBits()); |
13579 | LLVMContext &C = *DAG.getContext(); |
13580 | EVT ResultVT = EVT::getIntegerVT(Context&: C, BitWidth: ActiveBits).getRoundIntegerType(Context&: C); |
13581 | if (ResultVT.bitsLT(VT: VT.getVectorElementType())) { |
13582 | N = DAG.getNode(Opcode: ISD::TRUNCATE, DL, |
13583 | VT: VT.changeVectorElementType(EltVT: ResultVT), Operand: N); |
13584 | return true; |
13585 | } |
13586 | } |
13587 | |
13588 | // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty). |
13589 | if (N.getOpcode() != ISD::SHL) |
13590 | return false; |
13591 | |
13592 | SDValue N0 = N.getOperand(i: 0); |
13593 | if (N0.getOpcode() != ISD::ZERO_EXTEND && |
13594 | N0.getOpcode() != RISCVISD::VZEXT_VL) |
13595 | return false; |
13596 | if (!N0->hasOneUse()) |
13597 | return false; |
13598 | |
13599 | APInt ShAmt; |
13600 | SDValue N1 = N.getOperand(i: 1); |
13601 | if (!ISD::isConstantSplatVector(N: N1.getNode(), SplatValue&: ShAmt)) |
13602 | return false; |
13603 | |
13604 | SDValue Src = N0.getOperand(i: 0); |
13605 | EVT SrcVT = Src.getValueType(); |
13606 | unsigned SrcElen = SrcVT.getScalarSizeInBits(); |
13607 | unsigned ShAmtV = ShAmt.getZExtValue(); |
13608 | unsigned NewElen = PowerOf2Ceil(A: SrcElen + ShAmtV); |
13609 | NewElen = std::max(a: NewElen, b: 8U); |
13610 | |
13611 | // Skip if NewElen is not narrower than the original extended type. |
13612 | if (NewElen >= N0.getValueType().getScalarSizeInBits()) |
13613 | return false; |
13614 | |
13615 | EVT NewEltVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NewElen); |
13616 | EVT NewVT = SrcVT.changeVectorElementType(EltVT: NewEltVT); |
13617 | |
13618 | SDValue NewExt = DAG.getNode(Opcode: N0->getOpcode(), DL, VT: NewVT, Ops: N0->ops()); |
13619 | SDValue NewShAmtVec = DAG.getConstant(Val: ShAmtV, DL, VT: NewVT); |
13620 | N = DAG.getNode(Opcode: ISD::SHL, DL, VT: NewVT, N1: NewExt, N2: NewShAmtVec); |
13621 | return true; |
13622 | } |
13623 | |
13624 | // Replace (seteq (i64 (and X, 0xffffffff)), C1) with |
13625 | // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from |
13626 | // bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg |
13627 | // can become a sext.w instead of a shift pair. |
13628 | static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, |
13629 | const RISCVSubtarget &Subtarget) { |
13630 | SDValue N0 = N->getOperand(Num: 0); |
13631 | SDValue N1 = N->getOperand(Num: 1); |
13632 | EVT VT = N->getValueType(ResNo: 0); |
13633 | EVT OpVT = N0.getValueType(); |
13634 | |
13635 | if (OpVT != MVT::i64 || !Subtarget.is64Bit()) |
13636 | return SDValue(); |
13637 | |
13638 | // RHS needs to be a constant. |
13639 | auto *N1C = dyn_cast<ConstantSDNode>(Val&: N1); |
13640 | if (!N1C) |
13641 | return SDValue(); |
13642 | |
13643 | // LHS needs to be (and X, 0xffffffff). |
13644 | if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || |
13645 | !isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) || |
13646 | N0.getConstantOperandVal(i: 1) != UINT64_C(0xffffffff)) |
13647 | return SDValue(); |
13648 | |
13649 | // Looking for an equality compare. |
13650 | ISD::CondCode Cond = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get(); |
13651 | if (!isIntEqualitySetCC(Code: Cond)) |
13652 | return SDValue(); |
13653 | |
13654 | // Don't do this if the sign bit is provably zero, it will be turned back into |
13655 | // an AND. |
13656 | APInt SignMask = APInt::getOneBitSet(numBits: 64, BitNo: 31); |
13657 | if (DAG.MaskedValueIsZero(Op: N0.getOperand(i: 0), Mask: SignMask)) |
13658 | return SDValue(); |
13659 | |
13660 | const APInt &C1 = N1C->getAPIntValue(); |
13661 | |
13662 | SDLoc dl(N); |
13663 | // If the constant is larger than 2^32 - 1 it is impossible for both sides |
13664 | // to be equal. |
13665 | if (C1.getActiveBits() > 32) |
13666 | return DAG.getBoolConstant(V: Cond == ISD::SETNE, DL: dl, VT, OpVT); |
13667 | |
13668 | SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT, |
13669 | N0.getOperand(0), DAG.getValueType(MVT::i32)); |
13670 | return DAG.getSetCC(DL: dl, VT, LHS: SExtOp, RHS: DAG.getConstant(Val: C1.trunc(width: 32).sext(width: 64), |
13671 | DL: dl, VT: OpVT), Cond); |
13672 | } |
13673 | |
13674 | static SDValue |
13675 | performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, |
13676 | const RISCVSubtarget &Subtarget) { |
13677 | SDValue Src = N->getOperand(Num: 0); |
13678 | EVT VT = N->getValueType(ResNo: 0); |
13679 | |
13680 | // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X) |
13681 | if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH && |
13682 | cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16)) |
13683 | return DAG.getNode(Opcode: RISCVISD::FMV_X_SIGNEXTH, DL: SDLoc(N), VT, |
13684 | Operand: Src.getOperand(i: 0)); |
13685 | |
13686 | return SDValue(); |
13687 | } |
13688 | |
13689 | namespace { |
13690 | // Forward declaration of the structure holding the necessary information to |
13691 | // apply a combine. |
13692 | struct CombineResult; |
13693 | |
13694 | enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 }; |
13695 | /// Helper class for folding sign/zero extensions. |
13696 | /// In particular, this class is used for the following combines: |
13697 | /// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w |
13698 | /// sub | sub_vl -> vwsub(u) | vwsub(u)_w |
13699 | /// mul | mul_vl -> vwmul(u) | vwmul_su |
13700 | /// shl | shl_vl -> vwsll |
13701 | /// fadd -> vfwadd | vfwadd_w |
13702 | /// fsub -> vfwsub | vfwsub_w |
13703 | /// fmul -> vfwmul |
13704 | /// An object of this class represents an operand of the operation we want to |
13705 | /// combine. |
13706 | /// E.g., when trying to combine `mul_vl a, b`, we will have one instance of |
13707 | /// NodeExtensionHelper for `a` and one for `b`. |
13708 | /// |
13709 | /// This class abstracts away how the extension is materialized and |
13710 | /// how its number of users affect the combines. |
13711 | /// |
13712 | /// In particular: |
13713 | /// - VWADD_W is conceptually == add(op0, sext(op1)) |
13714 | /// - VWADDU_W == add(op0, zext(op1)) |
13715 | /// - VWSUB_W == sub(op0, sext(op1)) |
13716 | /// - VWSUBU_W == sub(op0, zext(op1)) |
13717 | /// - VFWADD_W == fadd(op0, fpext(op1)) |
13718 | /// - VFWSUB_W == fsub(op0, fpext(op1)) |
13719 | /// And VMV_V_X_VL, depending on the value, is conceptually equivalent to |
13720 | /// zext|sext(smaller_value). |
13721 | struct NodeExtensionHelper { |
13722 | /// Records if this operand is like being zero extended. |
13723 | bool SupportsZExt; |
13724 | /// Records if this operand is like being sign extended. |
13725 | /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For |
13726 | /// instance, a splat constant (e.g., 3), would support being both sign and |
13727 | /// zero extended. |
13728 | bool SupportsSExt; |
13729 | /// Records if this operand is like being floating-Point extended. |
13730 | bool SupportsFPExt; |
13731 | /// This boolean captures whether we care if this operand would still be |
13732 | /// around after the folding happens. |
13733 | bool EnforceOneUse; |
13734 | /// Original value that this NodeExtensionHelper represents. |
13735 | SDValue OrigOperand; |
13736 | |
13737 | /// Get the value feeding the extension or the value itself. |
13738 | /// E.g., for zext(a), this would return a. |
13739 | SDValue getSource() const { |
13740 | switch (OrigOperand.getOpcode()) { |
13741 | case ISD::ZERO_EXTEND: |
13742 | case ISD::SIGN_EXTEND: |
13743 | case RISCVISD::VSEXT_VL: |
13744 | case RISCVISD::VZEXT_VL: |
13745 | case RISCVISD::FP_EXTEND_VL: |
13746 | return OrigOperand.getOperand(i: 0); |
13747 | default: |
13748 | return OrigOperand; |
13749 | } |
13750 | } |
13751 | |
13752 | /// Check if this instance represents a splat. |
13753 | bool isSplat() const { |
13754 | return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL || |
13755 | OrigOperand.getOpcode() == ISD::SPLAT_VECTOR; |
13756 | } |
13757 | |
13758 | /// Get the extended opcode. |
13759 | unsigned getExtOpc(ExtKind SupportsExt) const { |
13760 | switch (SupportsExt) { |
13761 | case ExtKind::SExt: |
13762 | return RISCVISD::VSEXT_VL; |
13763 | case ExtKind::ZExt: |
13764 | return RISCVISD::VZEXT_VL; |
13765 | case ExtKind::FPExt: |
13766 | return RISCVISD::FP_EXTEND_VL; |
13767 | } |
13768 | llvm_unreachable("Unknown ExtKind enum" ); |
13769 | } |
13770 | |
13771 | /// Get or create a value that can feed \p Root with the given extension \p |
13772 | /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this |
13773 | /// operand. \see ::getSource(). |
13774 | SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG, |
13775 | const RISCVSubtarget &Subtarget, |
13776 | std::optional<ExtKind> SupportsExt) const { |
13777 | if (!SupportsExt.has_value()) |
13778 | return OrigOperand; |
13779 | |
13780 | MVT NarrowVT = getNarrowType(Root, SupportsExt: *SupportsExt); |
13781 | |
13782 | SDValue Source = getSource(); |
13783 | assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType())); |
13784 | if (Source.getValueType() == NarrowVT) |
13785 | return Source; |
13786 | |
13787 | unsigned ExtOpc = getExtOpc(SupportsExt: *SupportsExt); |
13788 | |
13789 | // If we need an extension, we should be changing the type. |
13790 | SDLoc DL(OrigOperand); |
13791 | auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget); |
13792 | switch (OrigOperand.getOpcode()) { |
13793 | case ISD::ZERO_EXTEND: |
13794 | case ISD::SIGN_EXTEND: |
13795 | case RISCVISD::VSEXT_VL: |
13796 | case RISCVISD::VZEXT_VL: |
13797 | case RISCVISD::FP_EXTEND_VL: |
13798 | return DAG.getNode(Opcode: ExtOpc, DL, VT: NarrowVT, N1: Source, N2: Mask, N3: VL); |
13799 | case ISD::SPLAT_VECTOR: |
13800 | return DAG.getSplat(VT: NarrowVT, DL, Op: Source.getOperand(i: 0)); |
13801 | case RISCVISD::VMV_V_X_VL: |
13802 | return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: NarrowVT, |
13803 | N1: DAG.getUNDEF(VT: NarrowVT), N2: Source.getOperand(i: 1), N3: VL); |
13804 | default: |
13805 | // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL |
13806 | // and that operand should already have the right NarrowVT so no |
13807 | // extension should be required at this point. |
13808 | llvm_unreachable("Unsupported opcode" ); |
13809 | } |
13810 | } |
13811 | |
13812 | /// Helper function to get the narrow type for \p Root. |
13813 | /// The narrow type is the type of \p Root where we divided the size of each |
13814 | /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>. |
13815 | /// \pre Both the narrow type and the original type should be legal. |
13816 | static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) { |
13817 | MVT VT = Root->getSimpleValueType(ResNo: 0); |
13818 | |
13819 | // Determine the narrow size. |
13820 | unsigned NarrowSize = VT.getScalarSizeInBits() / 2; |
13821 | |
13822 | MVT EltVT = SupportsExt == ExtKind::FPExt |
13823 | ? MVT::getFloatingPointVT(BitWidth: NarrowSize) |
13824 | : MVT::getIntegerVT(BitWidth: NarrowSize); |
13825 | |
13826 | assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) && |
13827 | "Trying to extend something we can't represent" ); |
13828 | MVT NarrowVT = MVT::getVectorVT(VT: EltVT, EC: VT.getVectorElementCount()); |
13829 | return NarrowVT; |
13830 | } |
13831 | |
13832 | /// Get the opcode to materialize: |
13833 | /// Opcode(sext(a), sext(b)) -> newOpcode(a, b) |
13834 | static unsigned getSExtOpcode(unsigned Opcode) { |
13835 | switch (Opcode) { |
13836 | case ISD::ADD: |
13837 | case RISCVISD::ADD_VL: |
13838 | case RISCVISD::VWADD_W_VL: |
13839 | case RISCVISD::VWADDU_W_VL: |
13840 | case ISD::OR: |
13841 | return RISCVISD::VWADD_VL; |
13842 | case ISD::SUB: |
13843 | case RISCVISD::SUB_VL: |
13844 | case RISCVISD::VWSUB_W_VL: |
13845 | case RISCVISD::VWSUBU_W_VL: |
13846 | return RISCVISD::VWSUB_VL; |
13847 | case ISD::MUL: |
13848 | case RISCVISD::MUL_VL: |
13849 | return RISCVISD::VWMUL_VL; |
13850 | default: |
13851 | llvm_unreachable("Unexpected opcode" ); |
13852 | } |
13853 | } |
13854 | |
13855 | /// Get the opcode to materialize: |
13856 | /// Opcode(zext(a), zext(b)) -> newOpcode(a, b) |
13857 | static unsigned getZExtOpcode(unsigned Opcode) { |
13858 | switch (Opcode) { |
13859 | case ISD::ADD: |
13860 | case RISCVISD::ADD_VL: |
13861 | case RISCVISD::VWADD_W_VL: |
13862 | case RISCVISD::VWADDU_W_VL: |
13863 | case ISD::OR: |
13864 | return RISCVISD::VWADDU_VL; |
13865 | case ISD::SUB: |
13866 | case RISCVISD::SUB_VL: |
13867 | case RISCVISD::VWSUB_W_VL: |
13868 | case RISCVISD::VWSUBU_W_VL: |
13869 | return RISCVISD::VWSUBU_VL; |
13870 | case ISD::MUL: |
13871 | case RISCVISD::MUL_VL: |
13872 | return RISCVISD::VWMULU_VL; |
13873 | case ISD::SHL: |
13874 | case RISCVISD::SHL_VL: |
13875 | return RISCVISD::VWSLL_VL; |
13876 | default: |
13877 | llvm_unreachable("Unexpected opcode" ); |
13878 | } |
13879 | } |
13880 | |
13881 | /// Get the opcode to materialize: |
13882 | /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b) |
13883 | static unsigned getFPExtOpcode(unsigned Opcode) { |
13884 | switch (Opcode) { |
13885 | case RISCVISD::FADD_VL: |
13886 | case RISCVISD::VFWADD_W_VL: |
13887 | return RISCVISD::VFWADD_VL; |
13888 | case RISCVISD::FSUB_VL: |
13889 | case RISCVISD::VFWSUB_W_VL: |
13890 | return RISCVISD::VFWSUB_VL; |
13891 | case RISCVISD::FMUL_VL: |
13892 | return RISCVISD::VFWMUL_VL; |
13893 | default: |
13894 | llvm_unreachable("Unexpected opcode" ); |
13895 | } |
13896 | } |
13897 | |
13898 | /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) -> |
13899 | /// newOpcode(a, b). |
13900 | static unsigned getSUOpcode(unsigned Opcode) { |
13901 | assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) && |
13902 | "SU is only supported for MUL" ); |
13903 | return RISCVISD::VWMULSU_VL; |
13904 | } |
13905 | |
13906 | /// Get the opcode to materialize |
13907 | /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b). |
13908 | static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) { |
13909 | switch (Opcode) { |
13910 | case ISD::ADD: |
13911 | case RISCVISD::ADD_VL: |
13912 | case ISD::OR: |
13913 | return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL |
13914 | : RISCVISD::VWADDU_W_VL; |
13915 | case ISD::SUB: |
13916 | case RISCVISD::SUB_VL: |
13917 | return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL |
13918 | : RISCVISD::VWSUBU_W_VL; |
13919 | case RISCVISD::FADD_VL: |
13920 | return RISCVISD::VFWADD_W_VL; |
13921 | case RISCVISD::FSUB_VL: |
13922 | return RISCVISD::VFWSUB_W_VL; |
13923 | default: |
13924 | llvm_unreachable("Unexpected opcode" ); |
13925 | } |
13926 | } |
13927 | |
13928 | using CombineToTry = std::function<std::optional<CombineResult>( |
13929 | SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/, |
13930 | const NodeExtensionHelper & /*RHS*/, SelectionDAG &, |
13931 | const RISCVSubtarget &)>; |
13932 | |
13933 | /// Check if this node needs to be fully folded or extended for all users. |
13934 | bool needToPromoteOtherUsers() const { return EnforceOneUse; } |
13935 | |
13936 | void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG, |
13937 | const RISCVSubtarget &Subtarget) { |
13938 | unsigned Opc = OrigOperand.getOpcode(); |
13939 | MVT VT = OrigOperand.getSimpleValueType(); |
13940 | |
13941 | assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) && |
13942 | "Unexpected Opcode" ); |
13943 | |
13944 | // The pasthru must be undef for tail agnostic. |
13945 | if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(i: 0).isUndef()) |
13946 | return; |
13947 | |
13948 | // Get the scalar value. |
13949 | SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(i: 0) |
13950 | : OrigOperand.getOperand(i: 1); |
13951 | |
13952 | // See if we have enough sign bits or zero bits in the scalar to use a |
13953 | // widening opcode by splatting to smaller element size. |
13954 | unsigned EltBits = VT.getScalarSizeInBits(); |
13955 | unsigned ScalarBits = Op.getValueSizeInBits(); |
13956 | // Make sure we're getting all element bits from the scalar register. |
13957 | // FIXME: Support implicit sign extension of vmv.v.x? |
13958 | if (ScalarBits < EltBits) |
13959 | return; |
13960 | |
13961 | unsigned NarrowSize = VT.getScalarSizeInBits() / 2; |
13962 | // If the narrow type cannot be expressed with a legal VMV, |
13963 | // this is not a valid candidate. |
13964 | if (NarrowSize < 8) |
13965 | return; |
13966 | |
13967 | if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize) |
13968 | SupportsSExt = true; |
13969 | |
13970 | if (DAG.MaskedValueIsZero(Op, |
13971 | Mask: APInt::getBitsSetFrom(numBits: ScalarBits, loBit: NarrowSize))) |
13972 | SupportsZExt = true; |
13973 | |
13974 | EnforceOneUse = false; |
13975 | } |
13976 | |
13977 | /// Helper method to set the various fields of this struct based on the |
13978 | /// type of \p Root. |
13979 | void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG, |
13980 | const RISCVSubtarget &Subtarget) { |
13981 | SupportsZExt = false; |
13982 | SupportsSExt = false; |
13983 | SupportsFPExt = false; |
13984 | EnforceOneUse = true; |
13985 | unsigned Opc = OrigOperand.getOpcode(); |
13986 | // For the nodes we handle below, we end up using their inputs directly: see |
13987 | // getSource(). However since they either don't have a passthru or we check |
13988 | // that their passthru is undef, we can safely ignore their mask and VL. |
13989 | switch (Opc) { |
13990 | case ISD::ZERO_EXTEND: |
13991 | case ISD::SIGN_EXTEND: { |
13992 | MVT VT = OrigOperand.getSimpleValueType(); |
13993 | if (!VT.isVector()) |
13994 | break; |
13995 | |
13996 | SDValue NarrowElt = OrigOperand.getOperand(i: 0); |
13997 | MVT NarrowVT = NarrowElt.getSimpleValueType(); |
13998 | // i1 types are legal but we can't select V{S,Z}EXT_VLs with them. |
13999 | if (NarrowVT.getVectorElementType() == MVT::i1) |
14000 | break; |
14001 | |
14002 | SupportsZExt = Opc == ISD::ZERO_EXTEND; |
14003 | SupportsSExt = Opc == ISD::SIGN_EXTEND; |
14004 | break; |
14005 | } |
14006 | case RISCVISD::VZEXT_VL: |
14007 | SupportsZExt = true; |
14008 | break; |
14009 | case RISCVISD::VSEXT_VL: |
14010 | SupportsSExt = true; |
14011 | break; |
14012 | case RISCVISD::FP_EXTEND_VL: |
14013 | SupportsFPExt = true; |
14014 | break; |
14015 | case ISD::SPLAT_VECTOR: |
14016 | case RISCVISD::VMV_V_X_VL: |
14017 | fillUpExtensionSupportForSplat(Root, DAG, Subtarget); |
14018 | break; |
14019 | default: |
14020 | break; |
14021 | } |
14022 | } |
14023 | |
14024 | /// Check if \p Root supports any extension folding combines. |
14025 | static bool isSupportedRoot(const SDNode *Root, |
14026 | const RISCVSubtarget &Subtarget) { |
14027 | switch (Root->getOpcode()) { |
14028 | case ISD::ADD: |
14029 | case ISD::SUB: |
14030 | case ISD::MUL: { |
14031 | return Root->getValueType(ResNo: 0).isScalableVector(); |
14032 | } |
14033 | case ISD::OR: { |
14034 | return Root->getValueType(ResNo: 0).isScalableVector() && |
14035 | Root->getFlags().hasDisjoint(); |
14036 | } |
14037 | // Vector Widening Integer Add/Sub/Mul Instructions |
14038 | case RISCVISD::ADD_VL: |
14039 | case RISCVISD::MUL_VL: |
14040 | case RISCVISD::VWADD_W_VL: |
14041 | case RISCVISD::VWADDU_W_VL: |
14042 | case RISCVISD::SUB_VL: |
14043 | case RISCVISD::VWSUB_W_VL: |
14044 | case RISCVISD::VWSUBU_W_VL: |
14045 | // Vector Widening Floating-Point Add/Sub/Mul Instructions |
14046 | case RISCVISD::FADD_VL: |
14047 | case RISCVISD::FSUB_VL: |
14048 | case RISCVISD::FMUL_VL: |
14049 | case RISCVISD::VFWADD_W_VL: |
14050 | case RISCVISD::VFWSUB_W_VL: |
14051 | return true; |
14052 | case ISD::SHL: |
14053 | return Root->getValueType(ResNo: 0).isScalableVector() && |
14054 | Subtarget.hasStdExtZvbb(); |
14055 | case RISCVISD::SHL_VL: |
14056 | return Subtarget.hasStdExtZvbb(); |
14057 | default: |
14058 | return false; |
14059 | } |
14060 | } |
14061 | |
14062 | /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx). |
14063 | NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG, |
14064 | const RISCVSubtarget &Subtarget) { |
14065 | assert(isSupportedRoot(Root, Subtarget) && |
14066 | "Trying to build an helper with an " |
14067 | "unsupported root" ); |
14068 | assert(OperandIdx < 2 && "Requesting something else than LHS or RHS" ); |
14069 | assert(DAG.getTargetLoweringInfo().isTypeLegal(Root->getValueType(0))); |
14070 | OrigOperand = Root->getOperand(Num: OperandIdx); |
14071 | |
14072 | unsigned Opc = Root->getOpcode(); |
14073 | switch (Opc) { |
14074 | // We consider |
14075 | // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS)) |
14076 | // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS)) |
14077 | // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS)) |
14078 | case RISCVISD::VWADD_W_VL: |
14079 | case RISCVISD::VWADDU_W_VL: |
14080 | case RISCVISD::VWSUB_W_VL: |
14081 | case RISCVISD::VWSUBU_W_VL: |
14082 | case RISCVISD::VFWADD_W_VL: |
14083 | case RISCVISD::VFWSUB_W_VL: |
14084 | if (OperandIdx == 1) { |
14085 | SupportsZExt = |
14086 | Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL; |
14087 | SupportsSExt = |
14088 | Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWSUB_W_VL; |
14089 | SupportsFPExt = |
14090 | Opc == RISCVISD::VFWADD_W_VL || Opc == RISCVISD::VFWSUB_W_VL; |
14091 | // There's no existing extension here, so we don't have to worry about |
14092 | // making sure it gets removed. |
14093 | EnforceOneUse = false; |
14094 | break; |
14095 | } |
14096 | [[fallthrough]]; |
14097 | default: |
14098 | fillUpExtensionSupport(Root, DAG, Subtarget); |
14099 | break; |
14100 | } |
14101 | } |
14102 | |
14103 | /// Helper function to get the Mask and VL from \p Root. |
14104 | static std::pair<SDValue, SDValue> |
14105 | getMaskAndVL(const SDNode *Root, SelectionDAG &DAG, |
14106 | const RISCVSubtarget &Subtarget) { |
14107 | assert(isSupportedRoot(Root, Subtarget) && "Unexpected root" ); |
14108 | switch (Root->getOpcode()) { |
14109 | case ISD::ADD: |
14110 | case ISD::SUB: |
14111 | case ISD::MUL: |
14112 | case ISD::OR: |
14113 | case ISD::SHL: { |
14114 | SDLoc DL(Root); |
14115 | MVT VT = Root->getSimpleValueType(ResNo: 0); |
14116 | return getDefaultScalableVLOps(VecVT: VT, DL, DAG, Subtarget); |
14117 | } |
14118 | default: |
14119 | return std::make_pair(x: Root->getOperand(Num: 3), y: Root->getOperand(Num: 4)); |
14120 | } |
14121 | } |
14122 | |
14123 | /// Helper function to check if \p N is commutative with respect to the |
14124 | /// foldings that are supported by this class. |
14125 | static bool isCommutative(const SDNode *N) { |
14126 | switch (N->getOpcode()) { |
14127 | case ISD::ADD: |
14128 | case ISD::MUL: |
14129 | case ISD::OR: |
14130 | case RISCVISD::ADD_VL: |
14131 | case RISCVISD::MUL_VL: |
14132 | case RISCVISD::VWADD_W_VL: |
14133 | case RISCVISD::VWADDU_W_VL: |
14134 | case RISCVISD::FADD_VL: |
14135 | case RISCVISD::FMUL_VL: |
14136 | case RISCVISD::VFWADD_W_VL: |
14137 | return true; |
14138 | case ISD::SUB: |
14139 | case RISCVISD::SUB_VL: |
14140 | case RISCVISD::VWSUB_W_VL: |
14141 | case RISCVISD::VWSUBU_W_VL: |
14142 | case RISCVISD::FSUB_VL: |
14143 | case RISCVISD::VFWSUB_W_VL: |
14144 | case ISD::SHL: |
14145 | case RISCVISD::SHL_VL: |
14146 | return false; |
14147 | default: |
14148 | llvm_unreachable("Unexpected opcode" ); |
14149 | } |
14150 | } |
14151 | |
14152 | /// Get a list of combine to try for folding extensions in \p Root. |
14153 | /// Note that each returned CombineToTry function doesn't actually modify |
14154 | /// anything. Instead they produce an optional CombineResult that if not None, |
14155 | /// need to be materialized for the combine to be applied. |
14156 | /// \see CombineResult::materialize. |
14157 | /// If the related CombineToTry function returns std::nullopt, that means the |
14158 | /// combine didn't match. |
14159 | static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root); |
14160 | }; |
14161 | |
14162 | /// Helper structure that holds all the necessary information to materialize a |
14163 | /// combine that does some extension folding. |
14164 | struct CombineResult { |
14165 | /// Opcode to be generated when materializing the combine. |
14166 | unsigned TargetOpcode; |
14167 | // No value means no extension is needed. |
14168 | std::optional<ExtKind> LHSExt; |
14169 | std::optional<ExtKind> RHSExt; |
14170 | /// Root of the combine. |
14171 | SDNode *Root; |
14172 | /// LHS of the TargetOpcode. |
14173 | NodeExtensionHelper LHS; |
14174 | /// RHS of the TargetOpcode. |
14175 | NodeExtensionHelper RHS; |
14176 | |
14177 | CombineResult(unsigned TargetOpcode, SDNode *Root, |
14178 | const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt, |
14179 | const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt) |
14180 | : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root), |
14181 | LHS(LHS), RHS(RHS) {} |
14182 | |
14183 | /// Return a value that uses TargetOpcode and that can be used to replace |
14184 | /// Root. |
14185 | /// The actual replacement is *not* done in that method. |
14186 | SDValue materialize(SelectionDAG &DAG, |
14187 | const RISCVSubtarget &Subtarget) const { |
14188 | SDValue Mask, VL, Merge; |
14189 | std::tie(args&: Mask, args&: VL) = |
14190 | NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget); |
14191 | switch (Root->getOpcode()) { |
14192 | default: |
14193 | Merge = Root->getOperand(Num: 2); |
14194 | break; |
14195 | case ISD::ADD: |
14196 | case ISD::SUB: |
14197 | case ISD::MUL: |
14198 | case ISD::OR: |
14199 | case ISD::SHL: |
14200 | Merge = DAG.getUNDEF(VT: Root->getValueType(ResNo: 0)); |
14201 | break; |
14202 | } |
14203 | return DAG.getNode(Opcode: TargetOpcode, DL: SDLoc(Root), VT: Root->getValueType(ResNo: 0), |
14204 | N1: LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SupportsExt: LHSExt), |
14205 | N2: RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SupportsExt: RHSExt), |
14206 | N3: Merge, N4: Mask, N5: VL); |
14207 | } |
14208 | }; |
14209 | |
14210 | /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS)) |
14211 | /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both |
14212 | /// are zext) and LHS and RHS can be folded into Root. |
14213 | /// AllowExtMask define which form `ext` can take in this pattern. |
14214 | /// |
14215 | /// \note If the pattern can match with both zext and sext, the returned |
14216 | /// CombineResult will feature the zext result. |
14217 | /// |
14218 | /// \returns std::nullopt if the pattern doesn't match or a CombineResult that |
14219 | /// can be used to apply the pattern. |
14220 | static std::optional<CombineResult> |
14221 | canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS, |
14222 | const NodeExtensionHelper &RHS, |
14223 | uint8_t AllowExtMask, SelectionDAG &DAG, |
14224 | const RISCVSubtarget &Subtarget) { |
14225 | if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt) |
14226 | return CombineResult(NodeExtensionHelper::getZExtOpcode(Opcode: Root->getOpcode()), |
14227 | Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS, |
14228 | /*RHSExt=*/{ExtKind::ZExt}); |
14229 | if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt) |
14230 | return CombineResult(NodeExtensionHelper::getSExtOpcode(Opcode: Root->getOpcode()), |
14231 | Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS, |
14232 | /*RHSExt=*/{ExtKind::SExt}); |
14233 | if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt) |
14234 | return CombineResult(NodeExtensionHelper::getFPExtOpcode(Opcode: Root->getOpcode()), |
14235 | Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS, |
14236 | /*RHSExt=*/{ExtKind::FPExt}); |
14237 | return std::nullopt; |
14238 | } |
14239 | |
14240 | /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS)) |
14241 | /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both |
14242 | /// are zext) and LHS and RHS can be folded into Root. |
14243 | /// |
14244 | /// \returns std::nullopt if the pattern doesn't match or a CombineResult that |
14245 | /// can be used to apply the pattern. |
14246 | static std::optional<CombineResult> |
14247 | canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS, |
14248 | const NodeExtensionHelper &RHS, SelectionDAG &DAG, |
14249 | const RISCVSubtarget &Subtarget) { |
14250 | return canFoldToVWWithSameExtensionImpl( |
14251 | Root, LHS, RHS, AllowExtMask: ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG, |
14252 | Subtarget); |
14253 | } |
14254 | |
14255 | /// Check if \p Root follows a pattern Root(LHS, ext(RHS)) |
14256 | /// |
14257 | /// \returns std::nullopt if the pattern doesn't match or a CombineResult that |
14258 | /// can be used to apply the pattern. |
14259 | static std::optional<CombineResult> |
14260 | canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS, |
14261 | const NodeExtensionHelper &RHS, SelectionDAG &DAG, |
14262 | const RISCVSubtarget &Subtarget) { |
14263 | if (RHS.SupportsFPExt) |
14264 | return CombineResult( |
14265 | NodeExtensionHelper::getWOpcode(Opcode: Root->getOpcode(), SupportsExt: ExtKind::FPExt), |
14266 | Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt}); |
14267 | |
14268 | // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar |
14269 | // sext/zext? |
14270 | // Control this behavior behind an option (AllowSplatInVW_W) for testing |
14271 | // purposes. |
14272 | if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W)) |
14273 | return CombineResult( |
14274 | NodeExtensionHelper::getWOpcode(Opcode: Root->getOpcode(), SupportsExt: ExtKind::ZExt), Root, |
14275 | LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt}); |
14276 | if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W)) |
14277 | return CombineResult( |
14278 | NodeExtensionHelper::getWOpcode(Opcode: Root->getOpcode(), SupportsExt: ExtKind::SExt), Root, |
14279 | LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt}); |
14280 | return std::nullopt; |
14281 | } |
14282 | |
14283 | /// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS)) |
14284 | /// |
14285 | /// \returns std::nullopt if the pattern doesn't match or a CombineResult that |
14286 | /// can be used to apply the pattern. |
14287 | static std::optional<CombineResult> |
14288 | canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS, |
14289 | const NodeExtensionHelper &RHS, SelectionDAG &DAG, |
14290 | const RISCVSubtarget &Subtarget) { |
14291 | return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, AllowExtMask: ExtKind::SExt, DAG, |
14292 | Subtarget); |
14293 | } |
14294 | |
14295 | /// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS)) |
14296 | /// |
14297 | /// \returns std::nullopt if the pattern doesn't match or a CombineResult that |
14298 | /// can be used to apply the pattern. |
14299 | static std::optional<CombineResult> |
14300 | canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS, |
14301 | const NodeExtensionHelper &RHS, SelectionDAG &DAG, |
14302 | const RISCVSubtarget &Subtarget) { |
14303 | return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, AllowExtMask: ExtKind::ZExt, DAG, |
14304 | Subtarget); |
14305 | } |
14306 | |
14307 | /// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS)) |
14308 | /// |
14309 | /// \returns std::nullopt if the pattern doesn't match or a CombineResult that |
14310 | /// can be used to apply the pattern. |
14311 | static std::optional<CombineResult> |
14312 | canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS, |
14313 | const NodeExtensionHelper &RHS, SelectionDAG &DAG, |
14314 | const RISCVSubtarget &Subtarget) { |
14315 | return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, AllowExtMask: ExtKind::FPExt, DAG, |
14316 | Subtarget); |
14317 | } |
14318 | |
14319 | /// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS)) |
14320 | /// |
14321 | /// \returns std::nullopt if the pattern doesn't match or a CombineResult that |
14322 | /// can be used to apply the pattern. |
14323 | static std::optional<CombineResult> |
14324 | canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS, |
14325 | const NodeExtensionHelper &RHS, SelectionDAG &DAG, |
14326 | const RISCVSubtarget &Subtarget) { |
14327 | |
14328 | if (!LHS.SupportsSExt || !RHS.SupportsZExt) |
14329 | return std::nullopt; |
14330 | return CombineResult(NodeExtensionHelper::getSUOpcode(Opcode: Root->getOpcode()), |
14331 | Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS, |
14332 | /*RHSExt=*/{ExtKind::ZExt}); |
14333 | } |
14334 | |
14335 | SmallVector<NodeExtensionHelper::CombineToTry> |
14336 | NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { |
14337 | SmallVector<CombineToTry> Strategies; |
14338 | switch (Root->getOpcode()) { |
14339 | case ISD::ADD: |
14340 | case ISD::SUB: |
14341 | case ISD::OR: |
14342 | case RISCVISD::ADD_VL: |
14343 | case RISCVISD::SUB_VL: |
14344 | case RISCVISD::FADD_VL: |
14345 | case RISCVISD::FSUB_VL: |
14346 | // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub |
14347 | Strategies.push_back(Elt: canFoldToVWWithSameExtension); |
14348 | // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w |
14349 | Strategies.push_back(Elt: canFoldToVW_W); |
14350 | break; |
14351 | case RISCVISD::FMUL_VL: |
14352 | Strategies.push_back(Elt: canFoldToVWWithSameExtension); |
14353 | break; |
14354 | case ISD::MUL: |
14355 | case RISCVISD::MUL_VL: |
14356 | // mul -> vwmul(u) |
14357 | Strategies.push_back(Elt: canFoldToVWWithSameExtension); |
14358 | // mul -> vwmulsu |
14359 | Strategies.push_back(Elt: canFoldToVW_SU); |
14360 | break; |
14361 | case ISD::SHL: |
14362 | case RISCVISD::SHL_VL: |
14363 | // shl -> vwsll |
14364 | Strategies.push_back(Elt: canFoldToVWWithZEXT); |
14365 | break; |
14366 | case RISCVISD::VWADD_W_VL: |
14367 | case RISCVISD::VWSUB_W_VL: |
14368 | // vwadd_w|vwsub_w -> vwadd|vwsub |
14369 | Strategies.push_back(Elt: canFoldToVWWithSEXT); |
14370 | break; |
14371 | case RISCVISD::VWADDU_W_VL: |
14372 | case RISCVISD::VWSUBU_W_VL: |
14373 | // vwaddu_w|vwsubu_w -> vwaddu|vwsubu |
14374 | Strategies.push_back(Elt: canFoldToVWWithZEXT); |
14375 | break; |
14376 | case RISCVISD::VFWADD_W_VL: |
14377 | case RISCVISD::VFWSUB_W_VL: |
14378 | // vfwadd_w|vfwsub_w -> vfwadd|vfwsub |
14379 | Strategies.push_back(Elt: canFoldToVWWithFPEXT); |
14380 | break; |
14381 | default: |
14382 | llvm_unreachable("Unexpected opcode" ); |
14383 | } |
14384 | return Strategies; |
14385 | } |
14386 | } // End anonymous namespace. |
14387 | |
14388 | /// Combine a binary operation to its equivalent VW or VW_W form. |
14389 | /// The supported combines are: |
14390 | /// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w |
14391 | /// sub | sub_vl -> vwsub(u) | vwsub(u)_w |
14392 | /// mul | mul_vl -> vwmul(u) | vwmul_su |
14393 | /// shl | shl_vl -> vwsll |
14394 | /// fadd_vl -> vfwadd | vfwadd_w |
14395 | /// fsub_vl -> vfwsub | vfwsub_w |
14396 | /// fmul_vl -> vfwmul |
14397 | /// vwadd_w(u) -> vwadd(u) |
14398 | /// vwsub_w(u) -> vwsub(u) |
14399 | /// vfwadd_w -> vfwadd |
14400 | /// vfwsub_w -> vfwsub |
14401 | static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, |
14402 | TargetLowering::DAGCombinerInfo &DCI, |
14403 | const RISCVSubtarget &Subtarget) { |
14404 | SelectionDAG &DAG = DCI.DAG; |
14405 | if (DCI.isBeforeLegalize()) |
14406 | return SDValue(); |
14407 | |
14408 | if (!NodeExtensionHelper::isSupportedRoot(Root: N, Subtarget)) |
14409 | return SDValue(); |
14410 | |
14411 | SmallVector<SDNode *> Worklist; |
14412 | SmallSet<SDNode *, 8> Inserted; |
14413 | Worklist.push_back(Elt: N); |
14414 | Inserted.insert(Ptr: N); |
14415 | SmallVector<CombineResult> CombinesToApply; |
14416 | |
14417 | while (!Worklist.empty()) { |
14418 | SDNode *Root = Worklist.pop_back_val(); |
14419 | if (!NodeExtensionHelper::isSupportedRoot(Root, Subtarget)) |
14420 | return SDValue(); |
14421 | |
14422 | NodeExtensionHelper LHS(N, 0, DAG, Subtarget); |
14423 | NodeExtensionHelper RHS(N, 1, DAG, Subtarget); |
14424 | auto AppendUsersIfNeeded = [&Worklist, |
14425 | &Inserted](const NodeExtensionHelper &Op) { |
14426 | if (Op.needToPromoteOtherUsers()) { |
14427 | for (SDNode *TheUse : Op.OrigOperand->uses()) { |
14428 | if (Inserted.insert(Ptr: TheUse).second) |
14429 | Worklist.push_back(Elt: TheUse); |
14430 | } |
14431 | } |
14432 | }; |
14433 | |
14434 | // Control the compile time by limiting the number of node we look at in |
14435 | // total. |
14436 | if (Inserted.size() > ExtensionMaxWebSize) |
14437 | return SDValue(); |
14438 | |
14439 | SmallVector<NodeExtensionHelper::CombineToTry> FoldingStrategies = |
14440 | NodeExtensionHelper::getSupportedFoldings(Root: N); |
14441 | |
14442 | assert(!FoldingStrategies.empty() && "Nothing to be folded" ); |
14443 | bool Matched = false; |
14444 | for (int Attempt = 0; |
14445 | (Attempt != 1 + NodeExtensionHelper::isCommutative(N)) && !Matched; |
14446 | ++Attempt) { |
14447 | |
14448 | for (NodeExtensionHelper::CombineToTry FoldingStrategy : |
14449 | FoldingStrategies) { |
14450 | std::optional<CombineResult> Res = |
14451 | FoldingStrategy(N, LHS, RHS, DAG, Subtarget); |
14452 | if (Res) { |
14453 | Matched = true; |
14454 | CombinesToApply.push_back(Elt: *Res); |
14455 | // All the inputs that are extended need to be folded, otherwise |
14456 | // we would be leaving the old input (since it is may still be used), |
14457 | // and the new one. |
14458 | if (Res->LHSExt.has_value()) |
14459 | AppendUsersIfNeeded(LHS); |
14460 | if (Res->RHSExt.has_value()) |
14461 | AppendUsersIfNeeded(RHS); |
14462 | break; |
14463 | } |
14464 | } |
14465 | std::swap(a&: LHS, b&: RHS); |
14466 | } |
14467 | // Right now we do an all or nothing approach. |
14468 | if (!Matched) |
14469 | return SDValue(); |
14470 | } |
14471 | // Store the value for the replacement of the input node separately. |
14472 | SDValue InputRootReplacement; |
14473 | // We do the RAUW after we materialize all the combines, because some replaced |
14474 | // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently, |
14475 | // some of these nodes may appear in the NodeExtensionHelpers of some of the |
14476 | // yet-to-be-visited CombinesToApply roots. |
14477 | SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace; |
14478 | ValuesToReplace.reserve(N: CombinesToApply.size()); |
14479 | for (CombineResult Res : CombinesToApply) { |
14480 | SDValue NewValue = Res.materialize(DAG, Subtarget); |
14481 | if (!InputRootReplacement) { |
14482 | assert(Res.Root == N && |
14483 | "First element is expected to be the current node" ); |
14484 | InputRootReplacement = NewValue; |
14485 | } else { |
14486 | ValuesToReplace.emplace_back(Args: SDValue(Res.Root, 0), Args&: NewValue); |
14487 | } |
14488 | } |
14489 | for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) { |
14490 | DAG.ReplaceAllUsesOfValueWith(From: OldNewValues.first, To: OldNewValues.second); |
14491 | DCI.AddToWorklist(N: OldNewValues.second.getNode()); |
14492 | } |
14493 | return InputRootReplacement; |
14494 | } |
14495 | |
14496 | // Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond |
14497 | // (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond |
14498 | // y will be the Passthru and cond will be the Mask. |
14499 | static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG) { |
14500 | unsigned Opc = N->getOpcode(); |
14501 | assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL || |
14502 | Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL); |
14503 | |
14504 | SDValue Y = N->getOperand(Num: 0); |
14505 | SDValue MergeOp = N->getOperand(Num: 1); |
14506 | unsigned MergeOpc = MergeOp.getOpcode(); |
14507 | |
14508 | if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT) |
14509 | return SDValue(); |
14510 | |
14511 | SDValue X = MergeOp->getOperand(Num: 1); |
14512 | |
14513 | if (!MergeOp.hasOneUse()) |
14514 | return SDValue(); |
14515 | |
14516 | // Passthru should be undef |
14517 | SDValue Passthru = N->getOperand(Num: 2); |
14518 | if (!Passthru.isUndef()) |
14519 | return SDValue(); |
14520 | |
14521 | // Mask should be all ones |
14522 | SDValue Mask = N->getOperand(Num: 3); |
14523 | if (Mask.getOpcode() != RISCVISD::VMSET_VL) |
14524 | return SDValue(); |
14525 | |
14526 | // False value of MergeOp should be all zeros |
14527 | SDValue Z = MergeOp->getOperand(Num: 2); |
14528 | |
14529 | if (Z.getOpcode() == ISD::INSERT_SUBVECTOR && |
14530 | (isNullOrNullSplat(V: Z.getOperand(i: 0)) || Z.getOperand(i: 0).isUndef())) |
14531 | Z = Z.getOperand(i: 1); |
14532 | |
14533 | if (!ISD::isConstantSplatVectorAllZeros(N: Z.getNode())) |
14534 | return SDValue(); |
14535 | |
14536 | return DAG.getNode(Opcode: Opc, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
14537 | Ops: {Y, X, Y, MergeOp->getOperand(Num: 0), N->getOperand(Num: 4)}, |
14538 | Flags: N->getFlags()); |
14539 | } |
14540 | |
14541 | static SDValue performVWADDSUBW_VLCombine(SDNode *N, |
14542 | TargetLowering::DAGCombinerInfo &DCI, |
14543 | const RISCVSubtarget &Subtarget) { |
14544 | [[maybe_unused]] unsigned Opc = N->getOpcode(); |
14545 | assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL || |
14546 | Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL); |
14547 | |
14548 | if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) |
14549 | return V; |
14550 | |
14551 | return combineVWADDSUBWSelect(N, DAG&: DCI.DAG); |
14552 | } |
14553 | |
14554 | // Helper function for performMemPairCombine. |
14555 | // Try to combine the memory loads/stores LSNode1 and LSNode2 |
14556 | // into a single memory pair operation. |
14557 | static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, |
14558 | LSBaseSDNode *LSNode2, SDValue BasePtr, |
14559 | uint64_t Imm) { |
14560 | SmallPtrSet<const SDNode *, 32> Visited; |
14561 | SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2}; |
14562 | |
14563 | if (SDNode::hasPredecessorHelper(N: LSNode1, Visited, Worklist) || |
14564 | SDNode::hasPredecessorHelper(N: LSNode2, Visited, Worklist)) |
14565 | return SDValue(); |
14566 | |
14567 | MachineFunction &MF = DAG.getMachineFunction(); |
14568 | const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>(); |
14569 | |
14570 | // The new operation has twice the width. |
14571 | MVT XLenVT = Subtarget.getXLenVT(); |
14572 | EVT MemVT = LSNode1->getMemoryVT(); |
14573 | EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128; |
14574 | MachineMemOperand *MMO = LSNode1->getMemOperand(); |
14575 | MachineMemOperand *NewMMO = MF.getMachineMemOperand( |
14576 | MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16); |
14577 | |
14578 | if (LSNode1->getOpcode() == ISD::LOAD) { |
14579 | auto Ext = cast<LoadSDNode>(Val: LSNode1)->getExtensionType(); |
14580 | unsigned Opcode; |
14581 | if (MemVT == MVT::i32) |
14582 | Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD; |
14583 | else |
14584 | Opcode = RISCVISD::TH_LDD; |
14585 | |
14586 | SDValue Res = DAG.getMemIntrinsicNode( |
14587 | Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}), |
14588 | {LSNode1->getChain(), BasePtr, |
14589 | DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)}, |
14590 | NewMemVT, NewMMO); |
14591 | |
14592 | SDValue Node1 = |
14593 | DAG.getMergeValues(Ops: {Res.getValue(R: 0), Res.getValue(R: 2)}, dl: SDLoc(LSNode1)); |
14594 | SDValue Node2 = |
14595 | DAG.getMergeValues(Ops: {Res.getValue(R: 1), Res.getValue(R: 2)}, dl: SDLoc(LSNode2)); |
14596 | |
14597 | DAG.ReplaceAllUsesWith(From: LSNode2, To: Node2.getNode()); |
14598 | return Node1; |
14599 | } else { |
14600 | unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD; |
14601 | |
14602 | SDValue Res = DAG.getMemIntrinsicNode( |
14603 | Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other), |
14604 | {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1), |
14605 | BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)}, |
14606 | NewMemVT, NewMMO); |
14607 | |
14608 | DAG.ReplaceAllUsesWith(From: LSNode2, To: Res.getNode()); |
14609 | return Res; |
14610 | } |
14611 | } |
14612 | |
14613 | // Try to combine two adjacent loads/stores to a single pair instruction from |
14614 | // the XTHeadMemPair vendor extension. |
14615 | static SDValue performMemPairCombine(SDNode *N, |
14616 | TargetLowering::DAGCombinerInfo &DCI) { |
14617 | SelectionDAG &DAG = DCI.DAG; |
14618 | MachineFunction &MF = DAG.getMachineFunction(); |
14619 | const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>(); |
14620 | |
14621 | // Target does not support load/store pair. |
14622 | if (!Subtarget.hasVendorXTHeadMemPair()) |
14623 | return SDValue(); |
14624 | |
14625 | LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(Val: N); |
14626 | EVT MemVT = LSNode1->getMemoryVT(); |
14627 | unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2; |
14628 | |
14629 | // No volatile, indexed or atomic loads/stores. |
14630 | if (!LSNode1->isSimple() || LSNode1->isIndexed()) |
14631 | return SDValue(); |
14632 | |
14633 | // Function to get a base + constant representation from a memory value. |
14634 | auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> { |
14635 | if (Ptr->getOpcode() == ISD::ADD) |
14636 | if (auto *C1 = dyn_cast<ConstantSDNode>(Val: Ptr->getOperand(Num: 1))) |
14637 | return {Ptr->getOperand(Num: 0), C1->getZExtValue()}; |
14638 | return {Ptr, 0}; |
14639 | }; |
14640 | |
14641 | auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(Num: OpNum)); |
14642 | |
14643 | SDValue Chain = N->getOperand(Num: 0); |
14644 | for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end(); |
14645 | UI != UE; ++UI) { |
14646 | SDUse &Use = UI.getUse(); |
14647 | if (Use.getUser() != N && Use.getResNo() == 0 && |
14648 | Use.getUser()->getOpcode() == N->getOpcode()) { |
14649 | LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Val: Use.getUser()); |
14650 | |
14651 | // No volatile, indexed or atomic loads/stores. |
14652 | if (!LSNode2->isSimple() || LSNode2->isIndexed()) |
14653 | continue; |
14654 | |
14655 | // Check if LSNode1 and LSNode2 have the same type and extension. |
14656 | if (LSNode1->getOpcode() == ISD::LOAD) |
14657 | if (cast<LoadSDNode>(Val: LSNode2)->getExtensionType() != |
14658 | cast<LoadSDNode>(Val: LSNode1)->getExtensionType()) |
14659 | continue; |
14660 | |
14661 | if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT()) |
14662 | continue; |
14663 | |
14664 | auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(Num: OpNum)); |
14665 | |
14666 | // Check if the base pointer is the same for both instruction. |
14667 | if (Base1 != Base2) |
14668 | continue; |
14669 | |
14670 | // Check if the offsets match the XTHeadMemPair encoding contraints. |
14671 | bool Valid = false; |
14672 | if (MemVT == MVT::i32) { |
14673 | // Check for adjacent i32 values and a 2-bit index. |
14674 | if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(x: Offset1)) |
14675 | Valid = true; |
14676 | } else if (MemVT == MVT::i64) { |
14677 | // Check for adjacent i64 values and a 2-bit index. |
14678 | if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(x: Offset1)) |
14679 | Valid = true; |
14680 | } |
14681 | |
14682 | if (!Valid) |
14683 | continue; |
14684 | |
14685 | // Try to combine. |
14686 | if (SDValue Res = |
14687 | tryMemPairCombine(DAG, LSNode1, LSNode2, BasePtr: Base1, Imm: Offset1)) |
14688 | return Res; |
14689 | } |
14690 | } |
14691 | |
14692 | return SDValue(); |
14693 | } |
14694 | |
14695 | // Fold |
14696 | // (fp_to_int (froundeven X)) -> fcvt X, rne |
14697 | // (fp_to_int (ftrunc X)) -> fcvt X, rtz |
14698 | // (fp_to_int (ffloor X)) -> fcvt X, rdn |
14699 | // (fp_to_int (fceil X)) -> fcvt X, rup |
14700 | // (fp_to_int (fround X)) -> fcvt X, rmm |
14701 | // (fp_to_int (frint X)) -> fcvt X |
14702 | static SDValue performFP_TO_INTCombine(SDNode *N, |
14703 | TargetLowering::DAGCombinerInfo &DCI, |
14704 | const RISCVSubtarget &Subtarget) { |
14705 | SelectionDAG &DAG = DCI.DAG; |
14706 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
14707 | MVT XLenVT = Subtarget.getXLenVT(); |
14708 | |
14709 | SDValue Src = N->getOperand(Num: 0); |
14710 | |
14711 | // Don't do this for strict-fp Src. |
14712 | if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode()) |
14713 | return SDValue(); |
14714 | |
14715 | // Ensure the FP type is legal. |
14716 | if (!TLI.isTypeLegal(VT: Src.getValueType())) |
14717 | return SDValue(); |
14718 | |
14719 | // Don't do this for f16 with Zfhmin and not Zfh. |
14720 | if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh()) |
14721 | return SDValue(); |
14722 | |
14723 | RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Opc: Src.getOpcode()); |
14724 | // If the result is invalid, we didn't find a foldable instruction. |
14725 | if (FRM == RISCVFPRndMode::Invalid) |
14726 | return SDValue(); |
14727 | |
14728 | SDLoc DL(N); |
14729 | bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; |
14730 | EVT VT = N->getValueType(ResNo: 0); |
14731 | |
14732 | if (VT.isVector() && TLI.isTypeLegal(VT)) { |
14733 | MVT SrcVT = Src.getSimpleValueType(); |
14734 | MVT SrcContainerVT = SrcVT; |
14735 | MVT ContainerVT = VT.getSimpleVT(); |
14736 | SDValue XVal = Src.getOperand(i: 0); |
14737 | |
14738 | // For widening and narrowing conversions we just combine it into a |
14739 | // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They |
14740 | // end up getting lowered to their appropriate pseudo instructions based on |
14741 | // their operand types |
14742 | if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 || |
14743 | VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits()) |
14744 | return SDValue(); |
14745 | |
14746 | // Make fixed-length vectors scalable first |
14747 | if (SrcVT.isFixedLengthVector()) { |
14748 | SrcContainerVT = getContainerForFixedLengthVector(DAG, VT: SrcVT, Subtarget); |
14749 | XVal = convertToScalableVector(VT: SrcContainerVT, V: XVal, DAG, Subtarget); |
14750 | ContainerVT = |
14751 | getContainerForFixedLengthVector(DAG, VT: ContainerVT, Subtarget); |
14752 | } |
14753 | |
14754 | auto [Mask, VL] = |
14755 | getDefaultVLOps(VecVT: SrcVT, ContainerVT: SrcContainerVT, DL, DAG, Subtarget); |
14756 | |
14757 | SDValue FpToInt; |
14758 | if (FRM == RISCVFPRndMode::RTZ) { |
14759 | // Use the dedicated trunc static rounding mode if we're truncating so we |
14760 | // don't need to generate calls to fsrmi/fsrm |
14761 | unsigned Opc = |
14762 | IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL; |
14763 | FpToInt = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: XVal, N2: Mask, N3: VL); |
14764 | } else if (FRM == RISCVFPRndMode::DYN) { |
14765 | unsigned Opc = |
14766 | IsSigned ? RISCVISD::VFCVT_X_F_VL : RISCVISD::VFCVT_XU_F_VL; |
14767 | FpToInt = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: XVal, N2: Mask, N3: VL); |
14768 | } else { |
14769 | unsigned Opc = |
14770 | IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL; |
14771 | FpToInt = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: XVal, N2: Mask, |
14772 | N3: DAG.getTargetConstant(Val: FRM, DL, VT: XLenVT), N4: VL); |
14773 | } |
14774 | |
14775 | // If converted from fixed-length to scalable, convert back |
14776 | if (VT.isFixedLengthVector()) |
14777 | FpToInt = convertFromScalableVector(VT, V: FpToInt, DAG, Subtarget); |
14778 | |
14779 | return FpToInt; |
14780 | } |
14781 | |
14782 | // Only handle XLen or i32 types. Other types narrower than XLen will |
14783 | // eventually be legalized to XLenVT. |
14784 | if (VT != MVT::i32 && VT != XLenVT) |
14785 | return SDValue(); |
14786 | |
14787 | unsigned Opc; |
14788 | if (VT == XLenVT) |
14789 | Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; |
14790 | else |
14791 | Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; |
14792 | |
14793 | SDValue FpToInt = DAG.getNode(Opcode: Opc, DL, VT: XLenVT, N1: Src.getOperand(i: 0), |
14794 | N2: DAG.getTargetConstant(Val: FRM, DL, VT: XLenVT)); |
14795 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: FpToInt); |
14796 | } |
14797 | |
14798 | // Fold |
14799 | // (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne)) |
14800 | // (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz)) |
14801 | // (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn)) |
14802 | // (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup)) |
14803 | // (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm)) |
14804 | // (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn)) |
14805 | static SDValue performFP_TO_INT_SATCombine(SDNode *N, |
14806 | TargetLowering::DAGCombinerInfo &DCI, |
14807 | const RISCVSubtarget &Subtarget) { |
14808 | SelectionDAG &DAG = DCI.DAG; |
14809 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
14810 | MVT XLenVT = Subtarget.getXLenVT(); |
14811 | |
14812 | // Only handle XLen types. Other types narrower than XLen will eventually be |
14813 | // legalized to XLenVT. |
14814 | EVT DstVT = N->getValueType(ResNo: 0); |
14815 | if (DstVT != XLenVT) |
14816 | return SDValue(); |
14817 | |
14818 | SDValue Src = N->getOperand(Num: 0); |
14819 | |
14820 | // Don't do this for strict-fp Src. |
14821 | if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode()) |
14822 | return SDValue(); |
14823 | |
14824 | // Ensure the FP type is also legal. |
14825 | if (!TLI.isTypeLegal(VT: Src.getValueType())) |
14826 | return SDValue(); |
14827 | |
14828 | // Don't do this for f16 with Zfhmin and not Zfh. |
14829 | if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh()) |
14830 | return SDValue(); |
14831 | |
14832 | EVT SatVT = cast<VTSDNode>(Val: N->getOperand(Num: 1))->getVT(); |
14833 | |
14834 | RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Opc: Src.getOpcode()); |
14835 | if (FRM == RISCVFPRndMode::Invalid) |
14836 | return SDValue(); |
14837 | |
14838 | bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT; |
14839 | |
14840 | unsigned Opc; |
14841 | if (SatVT == DstVT) |
14842 | Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; |
14843 | else if (DstVT == MVT::i64 && SatVT == MVT::i32) |
14844 | Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; |
14845 | else |
14846 | return SDValue(); |
14847 | // FIXME: Support other SatVTs by clamping before or after the conversion. |
14848 | |
14849 | Src = Src.getOperand(i: 0); |
14850 | |
14851 | SDLoc DL(N); |
14852 | SDValue FpToInt = DAG.getNode(Opcode: Opc, DL, VT: XLenVT, N1: Src, |
14853 | N2: DAG.getTargetConstant(Val: FRM, DL, VT: XLenVT)); |
14854 | |
14855 | // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero |
14856 | // extend. |
14857 | if (Opc == RISCVISD::FCVT_WU_RV64) |
14858 | FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32); |
14859 | |
14860 | // RISC-V FP-to-int conversions saturate to the destination register size, but |
14861 | // don't produce 0 for nan. |
14862 | SDValue ZeroInt = DAG.getConstant(Val: 0, DL, VT: DstVT); |
14863 | return DAG.getSelectCC(DL, LHS: Src, RHS: Src, True: ZeroInt, False: FpToInt, Cond: ISD::CondCode::SETUO); |
14864 | } |
14865 | |
14866 | // Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is |
14867 | // smaller than XLenVT. |
14868 | static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, |
14869 | const RISCVSubtarget &Subtarget) { |
14870 | assert(Subtarget.hasStdExtZbkb() && "Unexpected extension" ); |
14871 | |
14872 | SDValue Src = N->getOperand(Num: 0); |
14873 | if (Src.getOpcode() != ISD::BSWAP) |
14874 | return SDValue(); |
14875 | |
14876 | EVT VT = N->getValueType(ResNo: 0); |
14877 | if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() || |
14878 | !llvm::has_single_bit<uint32_t>(Value: VT.getSizeInBits())) |
14879 | return SDValue(); |
14880 | |
14881 | SDLoc DL(N); |
14882 | return DAG.getNode(Opcode: RISCVISD::BREV8, DL, VT, Operand: Src.getOperand(i: 0)); |
14883 | } |
14884 | |
14885 | // Convert from one FMA opcode to another based on whether we are negating the |
14886 | // multiply result and/or the accumulator. |
14887 | // NOTE: Only supports RVV operations with VL. |
14888 | static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) { |
14889 | // Negating the multiply result changes ADD<->SUB and toggles 'N'. |
14890 | if (NegMul) { |
14891 | // clang-format off |
14892 | switch (Opcode) { |
14893 | default: llvm_unreachable("Unexpected opcode" ); |
14894 | case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break; |
14895 | case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break; |
14896 | case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break; |
14897 | case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break; |
14898 | case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break; |
14899 | case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break; |
14900 | case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break; |
14901 | case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break; |
14902 | } |
14903 | // clang-format on |
14904 | } |
14905 | |
14906 | // Negating the accumulator changes ADD<->SUB. |
14907 | if (NegAcc) { |
14908 | // clang-format off |
14909 | switch (Opcode) { |
14910 | default: llvm_unreachable("Unexpected opcode" ); |
14911 | case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break; |
14912 | case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break; |
14913 | case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break; |
14914 | case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break; |
14915 | case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break; |
14916 | case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break; |
14917 | case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break; |
14918 | case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break; |
14919 | } |
14920 | // clang-format on |
14921 | } |
14922 | |
14923 | return Opcode; |
14924 | } |
14925 | |
14926 | static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG) { |
14927 | // Fold FNEG_VL into FMA opcodes. |
14928 | // The first operand of strict-fp is chain. |
14929 | unsigned Offset = N->isTargetStrictFPOpcode(); |
14930 | SDValue A = N->getOperand(Num: 0 + Offset); |
14931 | SDValue B = N->getOperand(Num: 1 + Offset); |
14932 | SDValue C = N->getOperand(Num: 2 + Offset); |
14933 | SDValue Mask = N->getOperand(Num: 3 + Offset); |
14934 | SDValue VL = N->getOperand(Num: 4 + Offset); |
14935 | |
14936 | auto invertIfNegative = [&Mask, &VL](SDValue &V) { |
14937 | if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(i: 1) == Mask && |
14938 | V.getOperand(i: 2) == VL) { |
14939 | // Return the negated input. |
14940 | V = V.getOperand(i: 0); |
14941 | return true; |
14942 | } |
14943 | |
14944 | return false; |
14945 | }; |
14946 | |
14947 | bool NegA = invertIfNegative(A); |
14948 | bool NegB = invertIfNegative(B); |
14949 | bool NegC = invertIfNegative(C); |
14950 | |
14951 | // If no operands are negated, we're done. |
14952 | if (!NegA && !NegB && !NegC) |
14953 | return SDValue(); |
14954 | |
14955 | unsigned NewOpcode = negateFMAOpcode(Opcode: N->getOpcode(), NegMul: NegA != NegB, NegAcc: NegC); |
14956 | if (N->isTargetStrictFPOpcode()) |
14957 | return DAG.getNode(Opcode: NewOpcode, DL: SDLoc(N), VTList: N->getVTList(), |
14958 | Ops: {N->getOperand(Num: 0), A, B, C, Mask, VL}); |
14959 | return DAG.getNode(Opcode: NewOpcode, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), N1: A, N2: B, N3: C, N4: Mask, |
14960 | N5: VL); |
14961 | } |
14962 | |
14963 | static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG, |
14964 | const RISCVSubtarget &Subtarget) { |
14965 | if (SDValue V = combineVFMADD_VLWithVFNEG_VL(N, DAG)) |
14966 | return V; |
14967 | |
14968 | if (N->getValueType(0).isScalableVector() && |
14969 | N->getValueType(0).getVectorElementType() == MVT::f32 && |
14970 | (Subtarget.hasVInstructionsF16Minimal() && |
14971 | !Subtarget.hasVInstructionsF16())) { |
14972 | return SDValue(); |
14973 | } |
14974 | |
14975 | // FIXME: Ignore strict opcodes for now. |
14976 | if (N->isTargetStrictFPOpcode()) |
14977 | return SDValue(); |
14978 | |
14979 | // Try to form widening FMA. |
14980 | SDValue Op0 = N->getOperand(Num: 0); |
14981 | SDValue Op1 = N->getOperand(Num: 1); |
14982 | SDValue Mask = N->getOperand(Num: 3); |
14983 | SDValue VL = N->getOperand(Num: 4); |
14984 | |
14985 | if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL || |
14986 | Op1.getOpcode() != RISCVISD::FP_EXTEND_VL) |
14987 | return SDValue(); |
14988 | |
14989 | // TODO: Refactor to handle more complex cases similar to |
14990 | // combineBinOp_VLToVWBinOp_VL. |
14991 | if ((!Op0.hasOneUse() || !Op1.hasOneUse()) && |
14992 | (Op0 != Op1 || !Op0->hasNUsesOfValue(NUses: 2, Value: 0))) |
14993 | return SDValue(); |
14994 | |
14995 | // Check the mask and VL are the same. |
14996 | if (Op0.getOperand(i: 1) != Mask || Op0.getOperand(i: 2) != VL || |
14997 | Op1.getOperand(i: 1) != Mask || Op1.getOperand(i: 2) != VL) |
14998 | return SDValue(); |
14999 | |
15000 | unsigned NewOpc; |
15001 | switch (N->getOpcode()) { |
15002 | default: |
15003 | llvm_unreachable("Unexpected opcode" ); |
15004 | case RISCVISD::VFMADD_VL: |
15005 | NewOpc = RISCVISD::VFWMADD_VL; |
15006 | break; |
15007 | case RISCVISD::VFNMSUB_VL: |
15008 | NewOpc = RISCVISD::VFWNMSUB_VL; |
15009 | break; |
15010 | case RISCVISD::VFNMADD_VL: |
15011 | NewOpc = RISCVISD::VFWNMADD_VL; |
15012 | break; |
15013 | case RISCVISD::VFMSUB_VL: |
15014 | NewOpc = RISCVISD::VFWMSUB_VL; |
15015 | break; |
15016 | } |
15017 | |
15018 | Op0 = Op0.getOperand(i: 0); |
15019 | Op1 = Op1.getOperand(i: 0); |
15020 | |
15021 | return DAG.getNode(Opcode: NewOpc, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), N1: Op0, N2: Op1, |
15022 | N3: N->getOperand(Num: 2), N4: Mask, N5: VL); |
15023 | } |
15024 | |
15025 | static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, |
15026 | const RISCVSubtarget &Subtarget) { |
15027 | assert(N->getOpcode() == ISD::SRA && "Unexpected opcode" ); |
15028 | |
15029 | if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit()) |
15030 | return SDValue(); |
15031 | |
15032 | if (!isa<ConstantSDNode>(Val: N->getOperand(Num: 1))) |
15033 | return SDValue(); |
15034 | uint64_t ShAmt = N->getConstantOperandVal(Num: 1); |
15035 | if (ShAmt > 32) |
15036 | return SDValue(); |
15037 | |
15038 | SDValue N0 = N->getOperand(Num: 0); |
15039 | |
15040 | // Combine (sra (sext_inreg (shl X, C1), i32), C2) -> |
15041 | // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of |
15042 | // SLLIW+SRAIW. SLLI+SRAI have compressed forms. |
15043 | if (ShAmt < 32 && |
15044 | N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() && |
15045 | cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 && |
15046 | N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() && |
15047 | isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) { |
15048 | uint64_t LShAmt = N0.getOperand(i: 0).getConstantOperandVal(i: 1); |
15049 | if (LShAmt < 32) { |
15050 | SDLoc ShlDL(N0.getOperand(i: 0)); |
15051 | SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64, |
15052 | N0.getOperand(0).getOperand(0), |
15053 | DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64)); |
15054 | SDLoc DL(N); |
15055 | return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl, |
15056 | DAG.getConstant(ShAmt + 32, DL, MVT::i64)); |
15057 | } |
15058 | } |
15059 | |
15060 | // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C) |
15061 | // FIXME: Should this be a generic combine? There's a similar combine on X86. |
15062 | // |
15063 | // Also try these folds where an add or sub is in the middle. |
15064 | // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C) |
15065 | // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C) |
15066 | SDValue Shl; |
15067 | ConstantSDNode *AddC = nullptr; |
15068 | |
15069 | // We might have an ADD or SUB between the SRA and SHL. |
15070 | bool IsAdd = N0.getOpcode() == ISD::ADD; |
15071 | if ((IsAdd || N0.getOpcode() == ISD::SUB)) { |
15072 | // Other operand needs to be a constant we can modify. |
15073 | AddC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: IsAdd ? 1 : 0)); |
15074 | if (!AddC) |
15075 | return SDValue(); |
15076 | |
15077 | // AddC needs to have at least 32 trailing zeros. |
15078 | if (AddC->getAPIntValue().countr_zero() < 32) |
15079 | return SDValue(); |
15080 | |
15081 | // All users should be a shift by constant less than or equal to 32. This |
15082 | // ensures we'll do this optimization for each of them to produce an |
15083 | // add/sub+sext_inreg they can all share. |
15084 | for (SDNode *U : N0->uses()) { |
15085 | if (U->getOpcode() != ISD::SRA || |
15086 | !isa<ConstantSDNode>(Val: U->getOperand(Num: 1)) || |
15087 | U->getConstantOperandVal(Num: 1) > 32) |
15088 | return SDValue(); |
15089 | } |
15090 | |
15091 | Shl = N0.getOperand(i: IsAdd ? 0 : 1); |
15092 | } else { |
15093 | // Not an ADD or SUB. |
15094 | Shl = N0; |
15095 | } |
15096 | |
15097 | // Look for a shift left by 32. |
15098 | if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Val: Shl.getOperand(i: 1)) || |
15099 | Shl.getConstantOperandVal(i: 1) != 32) |
15100 | return SDValue(); |
15101 | |
15102 | // We if we didn't look through an add/sub, then the shl should have one use. |
15103 | // If we did look through an add/sub, the sext_inreg we create is free so |
15104 | // we're only creating 2 new instructions. It's enough to only remove the |
15105 | // original sra+add/sub. |
15106 | if (!AddC && !Shl.hasOneUse()) |
15107 | return SDValue(); |
15108 | |
15109 | SDLoc DL(N); |
15110 | SDValue In = Shl.getOperand(i: 0); |
15111 | |
15112 | // If we looked through an ADD or SUB, we need to rebuild it with the shifted |
15113 | // constant. |
15114 | if (AddC) { |
15115 | SDValue ShiftedAddC = |
15116 | DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64); |
15117 | if (IsAdd) |
15118 | In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC); |
15119 | else |
15120 | In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In); |
15121 | } |
15122 | |
15123 | SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In, |
15124 | DAG.getValueType(MVT::i32)); |
15125 | if (ShAmt == 32) |
15126 | return SExt; |
15127 | |
15128 | return DAG.getNode( |
15129 | ISD::SHL, DL, MVT::i64, SExt, |
15130 | DAG.getConstant(32 - ShAmt, DL, MVT::i64)); |
15131 | } |
15132 | |
15133 | // Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if |
15134 | // the result is used as the conditon of a br_cc or select_cc we can invert, |
15135 | // inverting the setcc is free, and Z is 0/1. Caller will invert the |
15136 | // br_cc/select_cc. |
15137 | static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG) { |
15138 | bool IsAnd = Cond.getOpcode() == ISD::AND; |
15139 | if (!IsAnd && Cond.getOpcode() != ISD::OR) |
15140 | return SDValue(); |
15141 | |
15142 | if (!Cond.hasOneUse()) |
15143 | return SDValue(); |
15144 | |
15145 | SDValue Setcc = Cond.getOperand(i: 0); |
15146 | SDValue Xor = Cond.getOperand(i: 1); |
15147 | // Canonicalize setcc to LHS. |
15148 | if (Setcc.getOpcode() != ISD::SETCC) |
15149 | std::swap(a&: Setcc, b&: Xor); |
15150 | // LHS should be a setcc and RHS should be an xor. |
15151 | if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() || |
15152 | Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse()) |
15153 | return SDValue(); |
15154 | |
15155 | // If the condition is an And, SimplifyDemandedBits may have changed |
15156 | // (xor Z, 1) to (not Z). |
15157 | SDValue Xor1 = Xor.getOperand(i: 1); |
15158 | if (!isOneConstant(V: Xor1) && !(IsAnd && isAllOnesConstant(V: Xor1))) |
15159 | return SDValue(); |
15160 | |
15161 | EVT VT = Cond.getValueType(); |
15162 | SDValue Xor0 = Xor.getOperand(i: 0); |
15163 | |
15164 | // The LHS of the xor needs to be 0/1. |
15165 | APInt Mask = APInt::getBitsSetFrom(numBits: VT.getSizeInBits(), loBit: 1); |
15166 | if (!DAG.MaskedValueIsZero(Op: Xor0, Mask)) |
15167 | return SDValue(); |
15168 | |
15169 | // We can only invert integer setccs. |
15170 | EVT SetCCOpVT = Setcc.getOperand(i: 0).getValueType(); |
15171 | if (!SetCCOpVT.isScalarInteger()) |
15172 | return SDValue(); |
15173 | |
15174 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: Setcc.getOperand(i: 2))->get(); |
15175 | if (ISD::isIntEqualitySetCC(Code: CCVal)) { |
15176 | CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: SetCCOpVT); |
15177 | Setcc = DAG.getSetCC(DL: SDLoc(Setcc), VT, LHS: Setcc.getOperand(i: 0), |
15178 | RHS: Setcc.getOperand(i: 1), Cond: CCVal); |
15179 | } else if (CCVal == ISD::SETLT && isNullConstant(V: Setcc.getOperand(i: 0))) { |
15180 | // Invert (setlt 0, X) by converting to (setlt X, 1). |
15181 | Setcc = DAG.getSetCC(DL: SDLoc(Setcc), VT, LHS: Setcc.getOperand(i: 1), |
15182 | RHS: DAG.getConstant(Val: 1, DL: SDLoc(Setcc), VT), Cond: CCVal); |
15183 | } else if (CCVal == ISD::SETLT && isOneConstant(V: Setcc.getOperand(i: 1))) { |
15184 | // (setlt X, 1) by converting to (setlt 0, X). |
15185 | Setcc = DAG.getSetCC(DL: SDLoc(Setcc), VT, |
15186 | LHS: DAG.getConstant(Val: 0, DL: SDLoc(Setcc), VT), |
15187 | RHS: Setcc.getOperand(i: 0), Cond: CCVal); |
15188 | } else |
15189 | return SDValue(); |
15190 | |
15191 | unsigned Opc = IsAnd ? ISD::OR : ISD::AND; |
15192 | return DAG.getNode(Opcode: Opc, DL: SDLoc(Cond), VT, N1: Setcc, N2: Xor.getOperand(i: 0)); |
15193 | } |
15194 | |
15195 | // Perform common combines for BR_CC and SELECT_CC condtions. |
15196 | static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, |
15197 | SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { |
15198 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Val&: CC)->get(); |
15199 | |
15200 | // As far as arithmetic right shift always saves the sign, |
15201 | // shift can be omitted. |
15202 | // Fold setlt (sra X, N), 0 -> setlt X, 0 and |
15203 | // setge (sra X, N), 0 -> setge X, 0 |
15204 | if (isNullConstant(V: RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) && |
15205 | LHS.getOpcode() == ISD::SRA) { |
15206 | LHS = LHS.getOperand(i: 0); |
15207 | return true; |
15208 | } |
15209 | |
15210 | if (!ISD::isIntEqualitySetCC(Code: CCVal)) |
15211 | return false; |
15212 | |
15213 | // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt) |
15214 | // Sometimes the setcc is introduced after br_cc/select_cc has been formed. |
15215 | if (LHS.getOpcode() == ISD::SETCC && isNullConstant(V: RHS) && |
15216 | LHS.getOperand(i: 0).getValueType() == Subtarget.getXLenVT()) { |
15217 | // If we're looking for eq 0 instead of ne 0, we need to invert the |
15218 | // condition. |
15219 | bool Invert = CCVal == ISD::SETEQ; |
15220 | CCVal = cast<CondCodeSDNode>(Val: LHS.getOperand(i: 2))->get(); |
15221 | if (Invert) |
15222 | CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType()); |
15223 | |
15224 | RHS = LHS.getOperand(i: 1); |
15225 | LHS = LHS.getOperand(i: 0); |
15226 | translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG); |
15227 | |
15228 | CC = DAG.getCondCode(Cond: CCVal); |
15229 | return true; |
15230 | } |
15231 | |
15232 | // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne) |
15233 | if (LHS.getOpcode() == ISD::XOR && isNullConstant(V: RHS)) { |
15234 | RHS = LHS.getOperand(i: 1); |
15235 | LHS = LHS.getOperand(i: 0); |
15236 | return true; |
15237 | } |
15238 | |
15239 | // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt) |
15240 | if (isNullConstant(V: RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() && |
15241 | LHS.getOperand(i: 1).getOpcode() == ISD::Constant) { |
15242 | SDValue LHS0 = LHS.getOperand(i: 0); |
15243 | if (LHS0.getOpcode() == ISD::AND && |
15244 | LHS0.getOperand(i: 1).getOpcode() == ISD::Constant) { |
15245 | uint64_t Mask = LHS0.getConstantOperandVal(i: 1); |
15246 | uint64_t ShAmt = LHS.getConstantOperandVal(i: 1); |
15247 | if (isPowerOf2_64(Value: Mask) && Log2_64(Value: Mask) == ShAmt) { |
15248 | CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT; |
15249 | CC = DAG.getCondCode(Cond: CCVal); |
15250 | |
15251 | ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt; |
15252 | LHS = LHS0.getOperand(i: 0); |
15253 | if (ShAmt != 0) |
15254 | LHS = |
15255 | DAG.getNode(Opcode: ISD::SHL, DL, VT: LHS.getValueType(), N1: LHS0.getOperand(i: 0), |
15256 | N2: DAG.getConstant(Val: ShAmt, DL, VT: LHS.getValueType())); |
15257 | return true; |
15258 | } |
15259 | } |
15260 | } |
15261 | |
15262 | // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1. |
15263 | // This can occur when legalizing some floating point comparisons. |
15264 | APInt Mask = APInt::getBitsSetFrom(numBits: LHS.getValueSizeInBits(), loBit: 1); |
15265 | if (isOneConstant(V: RHS) && DAG.MaskedValueIsZero(Op: LHS, Mask)) { |
15266 | CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType()); |
15267 | CC = DAG.getCondCode(Cond: CCVal); |
15268 | RHS = DAG.getConstant(Val: 0, DL, VT: LHS.getValueType()); |
15269 | return true; |
15270 | } |
15271 | |
15272 | if (isNullConstant(V: RHS)) { |
15273 | if (SDValue NewCond = tryDemorganOfBooleanCondition(Cond: LHS, DAG)) { |
15274 | CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType()); |
15275 | CC = DAG.getCondCode(Cond: CCVal); |
15276 | LHS = NewCond; |
15277 | return true; |
15278 | } |
15279 | } |
15280 | |
15281 | return false; |
15282 | } |
15283 | |
15284 | // Fold |
15285 | // (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)). |
15286 | // (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)). |
15287 | // (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)). |
15288 | // (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)). |
15289 | static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, |
15290 | SDValue TrueVal, SDValue FalseVal, |
15291 | bool Swapped) { |
15292 | bool Commutative = true; |
15293 | unsigned Opc = TrueVal.getOpcode(); |
15294 | switch (Opc) { |
15295 | default: |
15296 | return SDValue(); |
15297 | case ISD::SHL: |
15298 | case ISD::SRA: |
15299 | case ISD::SRL: |
15300 | case ISD::SUB: |
15301 | Commutative = false; |
15302 | break; |
15303 | case ISD::ADD: |
15304 | case ISD::OR: |
15305 | case ISD::XOR: |
15306 | break; |
15307 | } |
15308 | |
15309 | if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(Val: FalseVal)) |
15310 | return SDValue(); |
15311 | |
15312 | unsigned OpToFold; |
15313 | if (FalseVal == TrueVal.getOperand(i: 0)) |
15314 | OpToFold = 0; |
15315 | else if (Commutative && FalseVal == TrueVal.getOperand(i: 1)) |
15316 | OpToFold = 1; |
15317 | else |
15318 | return SDValue(); |
15319 | |
15320 | EVT VT = N->getValueType(ResNo: 0); |
15321 | SDLoc DL(N); |
15322 | SDValue OtherOp = TrueVal.getOperand(i: 1 - OpToFold); |
15323 | EVT OtherOpVT = OtherOp->getValueType(ResNo: 0); |
15324 | SDValue IdentityOperand = |
15325 | DAG.getNeutralElement(Opcode: Opc, DL, VT: OtherOpVT, Flags: N->getFlags()); |
15326 | if (!Commutative) |
15327 | IdentityOperand = DAG.getConstant(Val: 0, DL, VT: OtherOpVT); |
15328 | assert(IdentityOperand && "No identity operand!" ); |
15329 | |
15330 | if (Swapped) |
15331 | std::swap(a&: OtherOp, b&: IdentityOperand); |
15332 | SDValue NewSel = |
15333 | DAG.getSelect(DL, VT: OtherOpVT, Cond: N->getOperand(Num: 0), LHS: OtherOp, RHS: IdentityOperand); |
15334 | return DAG.getNode(Opcode: TrueVal.getOpcode(), DL, VT, N1: FalseVal, N2: NewSel); |
15335 | } |
15336 | |
15337 | // This tries to get rid of `select` and `icmp` that are being used to handle |
15338 | // `Targets` that do not support `cttz(0)`/`ctlz(0)`. |
15339 | static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) { |
15340 | SDValue Cond = N->getOperand(Num: 0); |
15341 | |
15342 | // This represents either CTTZ or CTLZ instruction. |
15343 | SDValue CountZeroes; |
15344 | |
15345 | SDValue ValOnZero; |
15346 | |
15347 | if (Cond.getOpcode() != ISD::SETCC) |
15348 | return SDValue(); |
15349 | |
15350 | if (!isNullConstant(V: Cond->getOperand(Num: 1))) |
15351 | return SDValue(); |
15352 | |
15353 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: Cond->getOperand(Num: 2))->get(); |
15354 | if (CCVal == ISD::CondCode::SETEQ) { |
15355 | CountZeroes = N->getOperand(Num: 2); |
15356 | ValOnZero = N->getOperand(Num: 1); |
15357 | } else if (CCVal == ISD::CondCode::SETNE) { |
15358 | CountZeroes = N->getOperand(Num: 1); |
15359 | ValOnZero = N->getOperand(Num: 2); |
15360 | } else { |
15361 | return SDValue(); |
15362 | } |
15363 | |
15364 | if (CountZeroes.getOpcode() == ISD::TRUNCATE || |
15365 | CountZeroes.getOpcode() == ISD::ZERO_EXTEND) |
15366 | CountZeroes = CountZeroes.getOperand(i: 0); |
15367 | |
15368 | if (CountZeroes.getOpcode() != ISD::CTTZ && |
15369 | CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF && |
15370 | CountZeroes.getOpcode() != ISD::CTLZ && |
15371 | CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF) |
15372 | return SDValue(); |
15373 | |
15374 | if (!isNullConstant(V: ValOnZero)) |
15375 | return SDValue(); |
15376 | |
15377 | SDValue CountZeroesArgument = CountZeroes->getOperand(Num: 0); |
15378 | if (Cond->getOperand(Num: 0) != CountZeroesArgument) |
15379 | return SDValue(); |
15380 | |
15381 | if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) { |
15382 | CountZeroes = DAG.getNode(Opcode: ISD::CTTZ, DL: SDLoc(CountZeroes), |
15383 | VT: CountZeroes.getValueType(), Operand: CountZeroesArgument); |
15384 | } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) { |
15385 | CountZeroes = DAG.getNode(Opcode: ISD::CTLZ, DL: SDLoc(CountZeroes), |
15386 | VT: CountZeroes.getValueType(), Operand: CountZeroesArgument); |
15387 | } |
15388 | |
15389 | unsigned BitWidth = CountZeroes.getValueSizeInBits(); |
15390 | SDValue BitWidthMinusOne = |
15391 | DAG.getConstant(Val: BitWidth - 1, DL: SDLoc(N), VT: CountZeroes.getValueType()); |
15392 | |
15393 | auto AndNode = DAG.getNode(Opcode: ISD::AND, DL: SDLoc(N), VT: CountZeroes.getValueType(), |
15394 | N1: CountZeroes, N2: BitWidthMinusOne); |
15395 | return DAG.getZExtOrTrunc(Op: AndNode, DL: SDLoc(N), VT: N->getValueType(ResNo: 0)); |
15396 | } |
15397 | |
15398 | static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, |
15399 | const RISCVSubtarget &Subtarget) { |
15400 | SDValue Cond = N->getOperand(Num: 0); |
15401 | SDValue True = N->getOperand(Num: 1); |
15402 | SDValue False = N->getOperand(Num: 2); |
15403 | SDLoc DL(N); |
15404 | EVT VT = N->getValueType(ResNo: 0); |
15405 | EVT CondVT = Cond.getValueType(); |
15406 | |
15407 | if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) |
15408 | return SDValue(); |
15409 | |
15410 | // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate |
15411 | // BEXTI, where C is power of 2. |
15412 | if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() && |
15413 | (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) { |
15414 | SDValue LHS = Cond.getOperand(i: 0); |
15415 | SDValue RHS = Cond.getOperand(i: 1); |
15416 | ISD::CondCode CC = cast<CondCodeSDNode>(Val: Cond.getOperand(i: 2))->get(); |
15417 | if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND && |
15418 | isa<ConstantSDNode>(Val: LHS.getOperand(i: 1)) && isNullConstant(V: RHS)) { |
15419 | const APInt &MaskVal = LHS.getConstantOperandAPInt(i: 1); |
15420 | if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(N: 12)) |
15421 | return DAG.getSelect(DL, VT, |
15422 | Cond: DAG.getSetCC(DL, VT: CondVT, LHS, RHS, Cond: ISD::SETNE), |
15423 | LHS: False, RHS: True); |
15424 | } |
15425 | } |
15426 | return SDValue(); |
15427 | } |
15428 | |
15429 | static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, |
15430 | const RISCVSubtarget &Subtarget) { |
15431 | if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG)) |
15432 | return Folded; |
15433 | |
15434 | if (SDValue V = useInversedSetcc(N, DAG, Subtarget)) |
15435 | return V; |
15436 | |
15437 | if (Subtarget.hasConditionalMoveFusion()) |
15438 | return SDValue(); |
15439 | |
15440 | SDValue TrueVal = N->getOperand(Num: 1); |
15441 | SDValue FalseVal = N->getOperand(Num: 2); |
15442 | if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false)) |
15443 | return V; |
15444 | return tryFoldSelectIntoOp(N, DAG, TrueVal: FalseVal, FalseVal: TrueVal, /*Swapped*/true); |
15445 | } |
15446 | |
15447 | /// If we have a build_vector where each lane is binop X, C, where C |
15448 | /// is a constant (but not necessarily the same constant on all lanes), |
15449 | /// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..). |
15450 | /// We assume that materializing a constant build vector will be no more |
15451 | /// expensive that performing O(n) binops. |
15452 | static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, |
15453 | const RISCVSubtarget &Subtarget, |
15454 | const RISCVTargetLowering &TLI) { |
15455 | SDLoc DL(N); |
15456 | EVT VT = N->getValueType(ResNo: 0); |
15457 | |
15458 | assert(!VT.isScalableVector() && "unexpected build vector" ); |
15459 | |
15460 | if (VT.getVectorNumElements() == 1) |
15461 | return SDValue(); |
15462 | |
15463 | const unsigned Opcode = N->op_begin()->getNode()->getOpcode(); |
15464 | if (!TLI.isBinOp(Opcode)) |
15465 | return SDValue(); |
15466 | |
15467 | if (!TLI.isOperationLegalOrCustom(Op: Opcode, VT) || !TLI.isTypeLegal(VT)) |
15468 | return SDValue(); |
15469 | |
15470 | // This BUILD_VECTOR involves an implicit truncation, and sinking |
15471 | // truncates through binops is non-trivial. |
15472 | if (N->op_begin()->getValueType() != VT.getVectorElementType()) |
15473 | return SDValue(); |
15474 | |
15475 | SmallVector<SDValue> LHSOps; |
15476 | SmallVector<SDValue> RHSOps; |
15477 | for (SDValue Op : N->ops()) { |
15478 | if (Op.isUndef()) { |
15479 | // We can't form a divide or remainder from undef. |
15480 | if (!DAG.isSafeToSpeculativelyExecute(Opcode)) |
15481 | return SDValue(); |
15482 | |
15483 | LHSOps.push_back(Elt: Op); |
15484 | RHSOps.push_back(Elt: Op); |
15485 | continue; |
15486 | } |
15487 | |
15488 | // TODO: We can handle operations which have an neutral rhs value |
15489 | // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track |
15490 | // of profit in a more explicit manner. |
15491 | if (Op.getOpcode() != Opcode || !Op.hasOneUse()) |
15492 | return SDValue(); |
15493 | |
15494 | LHSOps.push_back(Elt: Op.getOperand(i: 0)); |
15495 | if (!isa<ConstantSDNode>(Val: Op.getOperand(i: 1)) && |
15496 | !isa<ConstantFPSDNode>(Val: Op.getOperand(i: 1))) |
15497 | return SDValue(); |
15498 | // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may |
15499 | // have different LHS and RHS types. |
15500 | if (Op.getOperand(i: 0).getValueType() != Op.getOperand(i: 1).getValueType()) |
15501 | return SDValue(); |
15502 | |
15503 | RHSOps.push_back(Elt: Op.getOperand(i: 1)); |
15504 | } |
15505 | |
15506 | return DAG.getNode(Opcode, DL, VT, N1: DAG.getBuildVector(VT, DL, Ops: LHSOps), |
15507 | N2: DAG.getBuildVector(VT, DL, Ops: RHSOps)); |
15508 | } |
15509 | |
15510 | static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, |
15511 | const RISCVSubtarget &Subtarget, |
15512 | const RISCVTargetLowering &TLI) { |
15513 | SDValue InVec = N->getOperand(Num: 0); |
15514 | SDValue InVal = N->getOperand(Num: 1); |
15515 | SDValue EltNo = N->getOperand(Num: 2); |
15516 | SDLoc DL(N); |
15517 | |
15518 | EVT VT = InVec.getValueType(); |
15519 | if (VT.isScalableVector()) |
15520 | return SDValue(); |
15521 | |
15522 | if (!InVec.hasOneUse()) |
15523 | return SDValue(); |
15524 | |
15525 | // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt |
15526 | // move the insert_vector_elts into the arms of the binop. Note that |
15527 | // the new RHS must be a constant. |
15528 | const unsigned InVecOpcode = InVec->getOpcode(); |
15529 | if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(Opcode: InVecOpcode) && |
15530 | InVal.hasOneUse()) { |
15531 | SDValue InVecLHS = InVec->getOperand(Num: 0); |
15532 | SDValue InVecRHS = InVec->getOperand(Num: 1); |
15533 | SDValue InValLHS = InVal->getOperand(Num: 0); |
15534 | SDValue InValRHS = InVal->getOperand(Num: 1); |
15535 | |
15536 | if (!ISD::isBuildVectorOfConstantSDNodes(N: InVecRHS.getNode())) |
15537 | return SDValue(); |
15538 | if (!isa<ConstantSDNode>(Val: InValRHS) && !isa<ConstantFPSDNode>(Val: InValRHS)) |
15539 | return SDValue(); |
15540 | // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may |
15541 | // have different LHS and RHS types. |
15542 | if (InVec.getOperand(i: 0).getValueType() != InVec.getOperand(i: 1).getValueType()) |
15543 | return SDValue(); |
15544 | SDValue LHS = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT, |
15545 | N1: InVecLHS, N2: InValLHS, N3: EltNo); |
15546 | SDValue RHS = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT, |
15547 | N1: InVecRHS, N2: InValRHS, N3: EltNo); |
15548 | return DAG.getNode(Opcode: InVecOpcode, DL, VT, N1: LHS, N2: RHS); |
15549 | } |
15550 | |
15551 | // Given insert_vector_elt (concat_vectors ...), InVal, Elt |
15552 | // move the insert_vector_elt to the source operand of the concat_vector. |
15553 | if (InVec.getOpcode() != ISD::CONCAT_VECTORS) |
15554 | return SDValue(); |
15555 | |
15556 | auto *IndexC = dyn_cast<ConstantSDNode>(Val&: EltNo); |
15557 | if (!IndexC) |
15558 | return SDValue(); |
15559 | unsigned Elt = IndexC->getZExtValue(); |
15560 | |
15561 | EVT ConcatVT = InVec.getOperand(i: 0).getValueType(); |
15562 | if (ConcatVT.getVectorElementType() != InVal.getValueType()) |
15563 | return SDValue(); |
15564 | unsigned ConcatNumElts = ConcatVT.getVectorNumElements(); |
15565 | SDValue NewIdx = DAG.getVectorIdxConstant(Val: Elt % ConcatNumElts, DL); |
15566 | |
15567 | unsigned ConcatOpIdx = Elt / ConcatNumElts; |
15568 | SDValue ConcatOp = InVec.getOperand(i: ConcatOpIdx); |
15569 | ConcatOp = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ConcatVT, |
15570 | N1: ConcatOp, N2: InVal, N3: NewIdx); |
15571 | |
15572 | SmallVector<SDValue> ConcatOps; |
15573 | ConcatOps.append(in_start: InVec->op_begin(), in_end: InVec->op_end()); |
15574 | ConcatOps[ConcatOpIdx] = ConcatOp; |
15575 | return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, Ops: ConcatOps); |
15576 | } |
15577 | |
15578 | // If we're concatenating a series of vector loads like |
15579 | // concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ... |
15580 | // Then we can turn this into a strided load by widening the vector elements |
15581 | // vlse32 p, stride=n |
15582 | static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, |
15583 | const RISCVSubtarget &Subtarget, |
15584 | const RISCVTargetLowering &TLI) { |
15585 | SDLoc DL(N); |
15586 | EVT VT = N->getValueType(ResNo: 0); |
15587 | |
15588 | // Only perform this combine on legal MVTs. |
15589 | if (!TLI.isTypeLegal(VT)) |
15590 | return SDValue(); |
15591 | |
15592 | // TODO: Potentially extend this to scalable vectors |
15593 | if (VT.isScalableVector()) |
15594 | return SDValue(); |
15595 | |
15596 | auto *BaseLd = dyn_cast<LoadSDNode>(Val: N->getOperand(Num: 0)); |
15597 | if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(N: BaseLd) || |
15598 | !SDValue(BaseLd, 0).hasOneUse()) |
15599 | return SDValue(); |
15600 | |
15601 | EVT BaseLdVT = BaseLd->getValueType(ResNo: 0); |
15602 | |
15603 | // Go through the loads and check that they're strided |
15604 | SmallVector<LoadSDNode *> Lds; |
15605 | Lds.push_back(Elt: BaseLd); |
15606 | Align Align = BaseLd->getAlign(); |
15607 | for (SDValue Op : N->ops().drop_front()) { |
15608 | auto *Ld = dyn_cast<LoadSDNode>(Val&: Op); |
15609 | if (!Ld || !Ld->isSimple() || !Op.hasOneUse() || |
15610 | Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(N: Ld) || |
15611 | Ld->getValueType(ResNo: 0) != BaseLdVT) |
15612 | return SDValue(); |
15613 | |
15614 | Lds.push_back(Elt: Ld); |
15615 | |
15616 | // The common alignment is the most restrictive (smallest) of all the loads |
15617 | Align = std::min(a: Align, b: Ld->getAlign()); |
15618 | } |
15619 | |
15620 | using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>; |
15621 | auto GetPtrDiff = [&DAG](LoadSDNode *Ld1, |
15622 | LoadSDNode *Ld2) -> std::optional<PtrDiff> { |
15623 | // If the load ptrs can be decomposed into a common (Base + Index) with a |
15624 | // common constant stride, then return the constant stride. |
15625 | BaseIndexOffset BIO1 = BaseIndexOffset::match(N: Ld1, DAG); |
15626 | BaseIndexOffset BIO2 = BaseIndexOffset::match(N: Ld2, DAG); |
15627 | if (BIO1.equalBaseIndex(Other: BIO2, DAG)) |
15628 | return {{BIO2.getOffset() - BIO1.getOffset(), false}}; |
15629 | |
15630 | // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride) |
15631 | SDValue P1 = Ld1->getBasePtr(); |
15632 | SDValue P2 = Ld2->getBasePtr(); |
15633 | if (P2.getOpcode() == ISD::ADD && P2.getOperand(i: 0) == P1) |
15634 | return {{P2.getOperand(i: 1), false}}; |
15635 | if (P1.getOpcode() == ISD::ADD && P1.getOperand(i: 0) == P2) |
15636 | return {{P1.getOperand(i: 1), true}}; |
15637 | |
15638 | return std::nullopt; |
15639 | }; |
15640 | |
15641 | // Get the distance between the first and second loads |
15642 | auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]); |
15643 | if (!BaseDiff) |
15644 | return SDValue(); |
15645 | |
15646 | // Check all the loads are the same distance apart |
15647 | for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++) |
15648 | if (GetPtrDiff(*It, *std::next(x: It)) != BaseDiff) |
15649 | return SDValue(); |
15650 | |
15651 | // TODO: At this point, we've successfully matched a generalized gather |
15652 | // load. Maybe we should emit that, and then move the specialized |
15653 | // matchers above and below into a DAG combine? |
15654 | |
15655 | // Get the widened scalar type, e.g. v4i8 -> i64 |
15656 | unsigned WideScalarBitWidth = |
15657 | BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements(); |
15658 | MVT WideScalarVT = MVT::getIntegerVT(BitWidth: WideScalarBitWidth); |
15659 | |
15660 | // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64 |
15661 | MVT WideVecVT = MVT::getVectorVT(VT: WideScalarVT, NumElements: N->getNumOperands()); |
15662 | if (!TLI.isTypeLegal(VT: WideVecVT)) |
15663 | return SDValue(); |
15664 | |
15665 | // Check that the operation is legal |
15666 | if (!TLI.isLegalStridedLoadStore(DataType: WideVecVT, Alignment: Align)) |
15667 | return SDValue(); |
15668 | |
15669 | auto [StrideVariant, MustNegateStride] = *BaseDiff; |
15670 | SDValue Stride = std::holds_alternative<SDValue>(v: StrideVariant) |
15671 | ? std::get<SDValue>(v&: StrideVariant) |
15672 | : DAG.getConstant(Val: std::get<int64_t>(v&: StrideVariant), DL, |
15673 | VT: Lds[0]->getOffset().getValueType()); |
15674 | if (MustNegateStride) |
15675 | Stride = DAG.getNegative(Val: Stride, DL, VT: Stride.getValueType()); |
15676 | |
15677 | SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other}); |
15678 | SDValue IntID = |
15679 | DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL, |
15680 | Subtarget.getXLenVT()); |
15681 | |
15682 | SDValue AllOneMask = |
15683 | DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL, |
15684 | DAG.getConstant(1, DL, MVT::i1)); |
15685 | |
15686 | SDValue Ops[] = {BaseLd->getChain(), IntID, DAG.getUNDEF(VT: WideVecVT), |
15687 | BaseLd->getBasePtr(), Stride, AllOneMask}; |
15688 | |
15689 | uint64_t MemSize; |
15690 | if (auto *ConstStride = dyn_cast<ConstantSDNode>(Val&: Stride); |
15691 | ConstStride && ConstStride->getSExtValue() >= 0) |
15692 | // total size = (elsize * n) + (stride - elsize) * (n-1) |
15693 | // = elsize + stride * (n-1) |
15694 | MemSize = WideScalarVT.getSizeInBits() + |
15695 | ConstStride->getSExtValue() * (N->getNumOperands() - 1); |
15696 | else |
15697 | // If Stride isn't constant, then we can't know how much it will load |
15698 | MemSize = MemoryLocation::UnknownSize; |
15699 | |
15700 | MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( |
15701 | PtrInfo: BaseLd->getPointerInfo(), F: BaseLd->getMemOperand()->getFlags(), Size: MemSize, |
15702 | BaseAlignment: Align); |
15703 | |
15704 | SDValue StridedLoad = DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, |
15705 | Ops, MemVT: WideVecVT, MMO); |
15706 | for (SDValue Ld : N->ops()) |
15707 | DAG.makeEquivalentMemoryOrdering(OldLoad: cast<LoadSDNode>(Val&: Ld), NewMemOp: StridedLoad); |
15708 | |
15709 | return DAG.getBitcast(VT: VT.getSimpleVT(), V: StridedLoad); |
15710 | } |
15711 | |
15712 | static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, |
15713 | const RISCVSubtarget &Subtarget) { |
15714 | |
15715 | assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD); |
15716 | |
15717 | if (N->getValueType(ResNo: 0).isFixedLengthVector()) |
15718 | return SDValue(); |
15719 | |
15720 | SDValue Addend = N->getOperand(Num: 0); |
15721 | SDValue MulOp = N->getOperand(Num: 1); |
15722 | |
15723 | if (N->getOpcode() == RISCVISD::ADD_VL) { |
15724 | SDValue AddMergeOp = N->getOperand(Num: 2); |
15725 | if (!AddMergeOp.isUndef()) |
15726 | return SDValue(); |
15727 | } |
15728 | |
15729 | auto IsVWMulOpc = [](unsigned Opc) { |
15730 | switch (Opc) { |
15731 | case RISCVISD::VWMUL_VL: |
15732 | case RISCVISD::VWMULU_VL: |
15733 | case RISCVISD::VWMULSU_VL: |
15734 | return true; |
15735 | default: |
15736 | return false; |
15737 | } |
15738 | }; |
15739 | |
15740 | if (!IsVWMulOpc(MulOp.getOpcode())) |
15741 | std::swap(a&: Addend, b&: MulOp); |
15742 | |
15743 | if (!IsVWMulOpc(MulOp.getOpcode())) |
15744 | return SDValue(); |
15745 | |
15746 | SDValue MulMergeOp = MulOp.getOperand(i: 2); |
15747 | |
15748 | if (!MulMergeOp.isUndef()) |
15749 | return SDValue(); |
15750 | |
15751 | auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG, |
15752 | const RISCVSubtarget &Subtarget) { |
15753 | if (N->getOpcode() == ISD::ADD) { |
15754 | SDLoc DL(N); |
15755 | return getDefaultScalableVLOps(VecVT: N->getSimpleValueType(ResNo: 0), DL, DAG, |
15756 | Subtarget); |
15757 | } |
15758 | return std::make_pair(x: N->getOperand(Num: 3), y: N->getOperand(Num: 4)); |
15759 | }(N, DAG, Subtarget); |
15760 | |
15761 | SDValue MulMask = MulOp.getOperand(i: 3); |
15762 | SDValue MulVL = MulOp.getOperand(i: 4); |
15763 | |
15764 | if (AddMask != MulMask || AddVL != MulVL) |
15765 | return SDValue(); |
15766 | |
15767 | unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL; |
15768 | static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL, |
15769 | "Unexpected opcode after VWMACC_VL" ); |
15770 | static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL, |
15771 | "Unexpected opcode after VWMACC_VL!" ); |
15772 | static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL, |
15773 | "Unexpected opcode after VWMUL_VL!" ); |
15774 | static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL, |
15775 | "Unexpected opcode after VWMUL_VL!" ); |
15776 | |
15777 | SDLoc DL(N); |
15778 | EVT VT = N->getValueType(ResNo: 0); |
15779 | SDValue Ops[] = {MulOp.getOperand(i: 0), MulOp.getOperand(i: 1), Addend, AddMask, |
15780 | AddVL}; |
15781 | return DAG.getNode(Opcode: Opc, DL, VT, Ops); |
15782 | } |
15783 | |
15784 | static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, |
15785 | ISD::MemIndexType &IndexType, |
15786 | RISCVTargetLowering::DAGCombinerInfo &DCI) { |
15787 | if (!DCI.isBeforeLegalize()) |
15788 | return false; |
15789 | |
15790 | SelectionDAG &DAG = DCI.DAG; |
15791 | const MVT XLenVT = |
15792 | DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT(); |
15793 | |
15794 | const EVT IndexVT = Index.getValueType(); |
15795 | |
15796 | // RISC-V indexed loads only support the "unsigned unscaled" addressing |
15797 | // mode, so anything else must be manually legalized. |
15798 | if (!isIndexTypeSigned(IndexType)) |
15799 | return false; |
15800 | |
15801 | if (IndexVT.getVectorElementType().bitsLT(VT: XLenVT)) { |
15802 | // Any index legalization should first promote to XLenVT, so we don't lose |
15803 | // bits when scaling. This may create an illegal index type so we let |
15804 | // LLVM's legalization take care of the splitting. |
15805 | // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet. |
15806 | Index = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, |
15807 | VT: IndexVT.changeVectorElementType(EltVT: XLenVT), Operand: Index); |
15808 | } |
15809 | IndexType = ISD::UNSIGNED_SCALED; |
15810 | return true; |
15811 | } |
15812 | |
15813 | /// Match the index vector of a scatter or gather node as the shuffle mask |
15814 | /// which performs the rearrangement if possible. Will only match if |
15815 | /// all lanes are touched, and thus replacing the scatter or gather with |
15816 | /// a unit strided access and shuffle is legal. |
15817 | static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, |
15818 | SmallVector<int> &ShuffleMask) { |
15819 | if (!ISD::isConstantSplatVectorAllOnes(N: Mask.getNode())) |
15820 | return false; |
15821 | if (!ISD::isBuildVectorOfConstantSDNodes(N: Index.getNode())) |
15822 | return false; |
15823 | |
15824 | const unsigned ElementSize = VT.getScalarStoreSize(); |
15825 | const unsigned NumElems = VT.getVectorNumElements(); |
15826 | |
15827 | // Create the shuffle mask and check all bits active |
15828 | assert(ShuffleMask.empty()); |
15829 | BitVector ActiveLanes(NumElems); |
15830 | for (unsigned i = 0; i < Index->getNumOperands(); i++) { |
15831 | // TODO: We've found an active bit of UB, and could be |
15832 | // more aggressive here if desired. |
15833 | if (Index->getOperand(Num: i)->isUndef()) |
15834 | return false; |
15835 | uint64_t C = Index->getConstantOperandVal(Num: i); |
15836 | if (C % ElementSize != 0) |
15837 | return false; |
15838 | C = C / ElementSize; |
15839 | if (C >= NumElems) |
15840 | return false; |
15841 | ShuffleMask.push_back(Elt: C); |
15842 | ActiveLanes.set(C); |
15843 | } |
15844 | return ActiveLanes.all(); |
15845 | } |
15846 | |
15847 | /// Match the index of a gather or scatter operation as an operation |
15848 | /// with twice the element width and half the number of elements. This is |
15849 | /// generally profitable (if legal) because these operations are linear |
15850 | /// in VL, so even if we cause some extract VTYPE/VL toggles, we still |
15851 | /// come out ahead. |
15852 | static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, |
15853 | Align BaseAlign, const RISCVSubtarget &ST) { |
15854 | if (!ISD::isConstantSplatVectorAllOnes(N: Mask.getNode())) |
15855 | return false; |
15856 | if (!ISD::isBuildVectorOfConstantSDNodes(N: Index.getNode())) |
15857 | return false; |
15858 | |
15859 | // Attempt a doubling. If we can use a element type 4x or 8x in |
15860 | // size, this will happen via multiply iterations of the transform. |
15861 | const unsigned NumElems = VT.getVectorNumElements(); |
15862 | if (NumElems % 2 != 0) |
15863 | return false; |
15864 | |
15865 | const unsigned ElementSize = VT.getScalarStoreSize(); |
15866 | const unsigned WiderElementSize = ElementSize * 2; |
15867 | if (WiderElementSize > ST.getELen()/8) |
15868 | return false; |
15869 | |
15870 | if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize) |
15871 | return false; |
15872 | |
15873 | for (unsigned i = 0; i < Index->getNumOperands(); i++) { |
15874 | // TODO: We've found an active bit of UB, and could be |
15875 | // more aggressive here if desired. |
15876 | if (Index->getOperand(Num: i)->isUndef()) |
15877 | return false; |
15878 | // TODO: This offset check is too strict if we support fully |
15879 | // misaligned memory operations. |
15880 | uint64_t C = Index->getConstantOperandVal(Num: i); |
15881 | if (i % 2 == 0) { |
15882 | if (C % WiderElementSize != 0) |
15883 | return false; |
15884 | continue; |
15885 | } |
15886 | uint64_t Last = Index->getConstantOperandVal(Num: i-1); |
15887 | if (C != Last + ElementSize) |
15888 | return false; |
15889 | } |
15890 | return true; |
15891 | } |
15892 | |
15893 | |
15894 | SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, |
15895 | DAGCombinerInfo &DCI) const { |
15896 | SelectionDAG &DAG = DCI.DAG; |
15897 | const MVT XLenVT = Subtarget.getXLenVT(); |
15898 | SDLoc DL(N); |
15899 | |
15900 | // Helper to call SimplifyDemandedBits on an operand of N where only some low |
15901 | // bits are demanded. N will be added to the Worklist if it was not deleted. |
15902 | // Caller should return SDValue(N, 0) if this returns true. |
15903 | auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) { |
15904 | SDValue Op = N->getOperand(Num: OpNo); |
15905 | APInt Mask = APInt::getLowBitsSet(numBits: Op.getValueSizeInBits(), loBitsSet: LowBits); |
15906 | if (!SimplifyDemandedBits(Op, DemandedBits: Mask, DCI)) |
15907 | return false; |
15908 | |
15909 | if (N->getOpcode() != ISD::DELETED_NODE) |
15910 | DCI.AddToWorklist(N); |
15911 | return true; |
15912 | }; |
15913 | |
15914 | switch (N->getOpcode()) { |
15915 | default: |
15916 | break; |
15917 | case RISCVISD::SplitF64: { |
15918 | SDValue Op0 = N->getOperand(Num: 0); |
15919 | // If the input to SplitF64 is just BuildPairF64 then the operation is |
15920 | // redundant. Instead, use BuildPairF64's operands directly. |
15921 | if (Op0->getOpcode() == RISCVISD::BuildPairF64) |
15922 | return DCI.CombineTo(N, Res0: Op0.getOperand(i: 0), Res1: Op0.getOperand(i: 1)); |
15923 | |
15924 | if (Op0->isUndef()) { |
15925 | SDValue Lo = DAG.getUNDEF(MVT::i32); |
15926 | SDValue Hi = DAG.getUNDEF(MVT::i32); |
15927 | return DCI.CombineTo(N, Res0: Lo, Res1: Hi); |
15928 | } |
15929 | |
15930 | // It's cheaper to materialise two 32-bit integers than to load a double |
15931 | // from the constant pool and transfer it to integer registers through the |
15932 | // stack. |
15933 | if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: Op0)) { |
15934 | APInt V = C->getValueAPF().bitcastToAPInt(); |
15935 | SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); |
15936 | SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); |
15937 | return DCI.CombineTo(N, Res0: Lo, Res1: Hi); |
15938 | } |
15939 | |
15940 | // This is a target-specific version of a DAGCombine performed in |
15941 | // DAGCombiner::visitBITCAST. It performs the equivalent of: |
15942 | // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) |
15943 | // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) |
15944 | if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || |
15945 | !Op0.getNode()->hasOneUse()) |
15946 | break; |
15947 | SDValue NewSplitF64 = |
15948 | DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), |
15949 | Op0.getOperand(0)); |
15950 | SDValue Lo = NewSplitF64.getValue(R: 0); |
15951 | SDValue Hi = NewSplitF64.getValue(R: 1); |
15952 | APInt SignBit = APInt::getSignMask(BitWidth: 32); |
15953 | if (Op0.getOpcode() == ISD::FNEG) { |
15954 | SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, |
15955 | DAG.getConstant(SignBit, DL, MVT::i32)); |
15956 | return DCI.CombineTo(N, Res0: Lo, Res1: NewHi); |
15957 | } |
15958 | assert(Op0.getOpcode() == ISD::FABS); |
15959 | SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, |
15960 | DAG.getConstant(~SignBit, DL, MVT::i32)); |
15961 | return DCI.CombineTo(N, Res0: Lo, Res1: NewHi); |
15962 | } |
15963 | case RISCVISD::SLLW: |
15964 | case RISCVISD::SRAW: |
15965 | case RISCVISD::SRLW: |
15966 | case RISCVISD::RORW: |
15967 | case RISCVISD::ROLW: { |
15968 | // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. |
15969 | if (SimplifyDemandedLowBitsHelper(0, 32) || |
15970 | SimplifyDemandedLowBitsHelper(1, 5)) |
15971 | return SDValue(N, 0); |
15972 | |
15973 | break; |
15974 | } |
15975 | case RISCVISD::CLZW: |
15976 | case RISCVISD::CTZW: { |
15977 | // Only the lower 32 bits of the first operand are read |
15978 | if (SimplifyDemandedLowBitsHelper(0, 32)) |
15979 | return SDValue(N, 0); |
15980 | break; |
15981 | } |
15982 | case RISCVISD::FMV_W_X_RV64: { |
15983 | // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the |
15984 | // conversion is unnecessary and can be replaced with the |
15985 | // FMV_X_ANYEXTW_RV64 operand. |
15986 | SDValue Op0 = N->getOperand(Num: 0); |
15987 | if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64) |
15988 | return Op0.getOperand(i: 0); |
15989 | break; |
15990 | } |
15991 | case RISCVISD::FMV_X_ANYEXTH: |
15992 | case RISCVISD::FMV_X_ANYEXTW_RV64: { |
15993 | SDLoc DL(N); |
15994 | SDValue Op0 = N->getOperand(Num: 0); |
15995 | MVT VT = N->getSimpleValueType(ResNo: 0); |
15996 | // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the |
15997 | // conversion is unnecessary and can be replaced with the FMV_W_X_RV64 |
15998 | // operand. Similar for FMV_X_ANYEXTH and FMV_H_X. |
15999 | if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 && |
16000 | Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) || |
16001 | (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH && |
16002 | Op0->getOpcode() == RISCVISD::FMV_H_X)) { |
16003 | assert(Op0.getOperand(0).getValueType() == VT && |
16004 | "Unexpected value type!" ); |
16005 | return Op0.getOperand(i: 0); |
16006 | } |
16007 | |
16008 | // This is a target-specific version of a DAGCombine performed in |
16009 | // DAGCombiner::visitBITCAST. It performs the equivalent of: |
16010 | // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) |
16011 | // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) |
16012 | if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || |
16013 | !Op0.getNode()->hasOneUse()) |
16014 | break; |
16015 | SDValue NewFMV = DAG.getNode(Opcode: N->getOpcode(), DL, VT, Operand: Op0.getOperand(i: 0)); |
16016 | unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16; |
16017 | APInt SignBit = APInt::getSignMask(BitWidth: FPBits).sext(width: VT.getSizeInBits()); |
16018 | if (Op0.getOpcode() == ISD::FNEG) |
16019 | return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: NewFMV, |
16020 | N2: DAG.getConstant(Val: SignBit, DL, VT)); |
16021 | |
16022 | assert(Op0.getOpcode() == ISD::FABS); |
16023 | return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: NewFMV, |
16024 | N2: DAG.getConstant(Val: ~SignBit, DL, VT)); |
16025 | } |
16026 | case ISD::ABS: { |
16027 | EVT VT = N->getValueType(ResNo: 0); |
16028 | SDValue N0 = N->getOperand(Num: 0); |
16029 | // abs (sext) -> zext (abs) |
16030 | // abs (zext) -> zext (handled elsewhere) |
16031 | if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) { |
16032 | SDValue Src = N0.getOperand(i: 0); |
16033 | SDLoc DL(N); |
16034 | return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, |
16035 | Operand: DAG.getNode(Opcode: ISD::ABS, DL, VT: Src.getValueType(), Operand: Src)); |
16036 | } |
16037 | break; |
16038 | } |
16039 | case ISD::ADD: { |
16040 | if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) |
16041 | return V; |
16042 | if (SDValue V = combineToVWMACC(N, DAG, Subtarget)) |
16043 | return V; |
16044 | return performADDCombine(N, DAG, Subtarget); |
16045 | } |
16046 | case ISD::SUB: { |
16047 | if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) |
16048 | return V; |
16049 | return performSUBCombine(N, DAG, Subtarget); |
16050 | } |
16051 | case ISD::AND: |
16052 | return performANDCombine(N, DCI, Subtarget); |
16053 | case ISD::OR: { |
16054 | if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) |
16055 | return V; |
16056 | return performORCombine(N, DCI, Subtarget); |
16057 | } |
16058 | case ISD::XOR: |
16059 | return performXORCombine(N, DAG, Subtarget); |
16060 | case ISD::MUL: |
16061 | if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) |
16062 | return V; |
16063 | return performMULCombine(N, DAG, DCI, Subtarget); |
16064 | case ISD::SDIV: |
16065 | case ISD::UDIV: |
16066 | case ISD::SREM: |
16067 | case ISD::UREM: |
16068 | if (SDValue V = combineBinOpOfZExt(N, DAG)) |
16069 | return V; |
16070 | break; |
16071 | case ISD::FADD: |
16072 | case ISD::UMAX: |
16073 | case ISD::UMIN: |
16074 | case ISD::SMAX: |
16075 | case ISD::SMIN: |
16076 | case ISD::FMAXNUM: |
16077 | case ISD::FMINNUM: { |
16078 | if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) |
16079 | return V; |
16080 | if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) |
16081 | return V; |
16082 | return SDValue(); |
16083 | } |
16084 | case ISD::SETCC: |
16085 | return performSETCCCombine(N, DAG, Subtarget); |
16086 | case ISD::SIGN_EXTEND_INREG: |
16087 | return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget); |
16088 | case ISD::ZERO_EXTEND: |
16089 | // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during |
16090 | // type legalization. This is safe because fp_to_uint produces poison if |
16091 | // it overflows. |
16092 | if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) { |
16093 | SDValue Src = N->getOperand(Num: 0); |
16094 | if (Src.getOpcode() == ISD::FP_TO_UINT && |
16095 | isTypeLegal(Src.getOperand(0).getValueType())) |
16096 | return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64, |
16097 | Src.getOperand(0)); |
16098 | if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() && |
16099 | isTypeLegal(VT: Src.getOperand(i: 1).getValueType())) { |
16100 | SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); |
16101 | SDValue Res = DAG.getNode(Opcode: ISD::STRICT_FP_TO_UINT, DL: SDLoc(N), VTList: VTs, |
16102 | N1: Src.getOperand(i: 0), N2: Src.getOperand(i: 1)); |
16103 | DCI.CombineTo(N, Res); |
16104 | DAG.ReplaceAllUsesOfValueWith(From: Src.getValue(R: 1), To: Res.getValue(R: 1)); |
16105 | DCI.recursivelyDeleteUnusedNodes(N: Src.getNode()); |
16106 | return SDValue(N, 0); // Return N so it doesn't get rechecked. |
16107 | } |
16108 | } |
16109 | return SDValue(); |
16110 | case RISCVISD::TRUNCATE_VECTOR_VL: { |
16111 | // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1)) |
16112 | // This would be benefit for the cases where X and Y are both the same value |
16113 | // type of low precision vectors. Since the truncate would be lowered into |
16114 | // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate |
16115 | // restriction, such pattern would be expanded into a series of "vsetvli" |
16116 | // and "vnsrl" instructions later to reach this point. |
16117 | auto IsTruncNode = [](SDValue V) { |
16118 | if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL) |
16119 | return false; |
16120 | SDValue VL = V.getOperand(i: 2); |
16121 | auto *C = dyn_cast<ConstantSDNode>(Val&: VL); |
16122 | // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand |
16123 | bool IsVLMAXForVMSET = (C && C->isAllOnes()) || |
16124 | (isa<RegisterSDNode>(VL) && |
16125 | cast<RegisterSDNode>(VL)->getReg() == RISCV::X0); |
16126 | return V.getOperand(i: 1).getOpcode() == RISCVISD::VMSET_VL && |
16127 | IsVLMAXForVMSET; |
16128 | }; |
16129 | |
16130 | SDValue Op = N->getOperand(Num: 0); |
16131 | |
16132 | // We need to first find the inner level of TRUNCATE_VECTOR_VL node |
16133 | // to distinguish such pattern. |
16134 | while (IsTruncNode(Op)) { |
16135 | if (!Op.hasOneUse()) |
16136 | return SDValue(); |
16137 | Op = Op.getOperand(i: 0); |
16138 | } |
16139 | |
16140 | if (Op.getOpcode() == ISD::SRA && Op.hasOneUse()) { |
16141 | SDValue N0 = Op.getOperand(i: 0); |
16142 | SDValue N1 = Op.getOperand(i: 1); |
16143 | if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() && |
16144 | N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) { |
16145 | SDValue N00 = N0.getOperand(i: 0); |
16146 | SDValue N10 = N1.getOperand(i: 0); |
16147 | if (N00.getValueType().isVector() && |
16148 | N00.getValueType() == N10.getValueType() && |
16149 | N->getValueType(ResNo: 0) == N10.getValueType()) { |
16150 | unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1; |
16151 | SDValue SMin = DAG.getNode( |
16152 | Opcode: ISD::SMIN, DL: SDLoc(N1), VT: N->getValueType(ResNo: 0), N1: N10, |
16153 | N2: DAG.getConstant(Val: MaxShAmt, DL: SDLoc(N1), VT: N->getValueType(ResNo: 0))); |
16154 | return DAG.getNode(Opcode: ISD::SRA, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), N1: N00, N2: SMin); |
16155 | } |
16156 | } |
16157 | } |
16158 | break; |
16159 | } |
16160 | case ISD::TRUNCATE: |
16161 | return performTRUNCATECombine(N, DAG, Subtarget); |
16162 | case ISD::SELECT: |
16163 | return performSELECTCombine(N, DAG, Subtarget); |
16164 | case RISCVISD::CZERO_EQZ: |
16165 | case RISCVISD::CZERO_NEZ: |
16166 | // czero_eq X, (xor Y, 1) -> czero_ne X, Y if Y is 0 or 1. |
16167 | // czero_ne X, (xor Y, 1) -> czero_eq X, Y if Y is 0 or 1. |
16168 | if (N->getOperand(Num: 1).getOpcode() == ISD::XOR && |
16169 | isOneConstant(V: N->getOperand(Num: 1).getOperand(i: 1))) { |
16170 | SDValue Cond = N->getOperand(Num: 1).getOperand(i: 0); |
16171 | APInt Mask = APInt::getBitsSetFrom(numBits: Cond.getValueSizeInBits(), loBit: 1); |
16172 | if (DAG.MaskedValueIsZero(Op: Cond, Mask)) { |
16173 | unsigned NewOpc = N->getOpcode() == RISCVISD::CZERO_EQZ |
16174 | ? RISCVISD::CZERO_NEZ |
16175 | : RISCVISD::CZERO_EQZ; |
16176 | return DAG.getNode(Opcode: NewOpc, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
16177 | N1: N->getOperand(Num: 0), N2: Cond); |
16178 | } |
16179 | } |
16180 | return SDValue(); |
16181 | |
16182 | case RISCVISD::SELECT_CC: { |
16183 | // Transform |
16184 | SDValue LHS = N->getOperand(Num: 0); |
16185 | SDValue RHS = N->getOperand(Num: 1); |
16186 | SDValue CC = N->getOperand(Num: 2); |
16187 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Val&: CC)->get(); |
16188 | SDValue TrueV = N->getOperand(Num: 3); |
16189 | SDValue FalseV = N->getOperand(Num: 4); |
16190 | SDLoc DL(N); |
16191 | EVT VT = N->getValueType(ResNo: 0); |
16192 | |
16193 | // If the True and False values are the same, we don't need a select_cc. |
16194 | if (TrueV == FalseV) |
16195 | return TrueV; |
16196 | |
16197 | // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z |
16198 | // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y |
16199 | if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(Val: TrueV) && |
16200 | isa<ConstantSDNode>(Val: FalseV) && isNullConstant(V: RHS) && |
16201 | (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) { |
16202 | if (CCVal == ISD::CondCode::SETGE) |
16203 | std::swap(a&: TrueV, b&: FalseV); |
16204 | |
16205 | int64_t TrueSImm = cast<ConstantSDNode>(Val&: TrueV)->getSExtValue(); |
16206 | int64_t FalseSImm = cast<ConstantSDNode>(Val&: FalseV)->getSExtValue(); |
16207 | // Only handle simm12, if it is not in this range, it can be considered as |
16208 | // register. |
16209 | if (isInt<12>(x: TrueSImm) && isInt<12>(x: FalseSImm) && |
16210 | isInt<12>(x: TrueSImm - FalseSImm)) { |
16211 | SDValue SRA = |
16212 | DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: LHS, |
16213 | N2: DAG.getConstant(Val: Subtarget.getXLen() - 1, DL, VT)); |
16214 | SDValue AND = |
16215 | DAG.getNode(Opcode: ISD::AND, DL, VT, N1: SRA, |
16216 | N2: DAG.getConstant(Val: TrueSImm - FalseSImm, DL, VT)); |
16217 | return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: AND, N2: FalseV); |
16218 | } |
16219 | |
16220 | if (CCVal == ISD::CondCode::SETGE) |
16221 | std::swap(a&: TrueV, b&: FalseV); |
16222 | } |
16223 | |
16224 | if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget)) |
16225 | return DAG.getNode(Opcode: RISCVISD::SELECT_CC, DL, VT: N->getValueType(ResNo: 0), |
16226 | Ops: {LHS, RHS, CC, TrueV, FalseV}); |
16227 | |
16228 | if (!Subtarget.hasConditionalMoveFusion()) { |
16229 | // (select c, -1, y) -> -c | y |
16230 | if (isAllOnesConstant(V: TrueV)) { |
16231 | SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, Cond: CCVal); |
16232 | SDValue Neg = DAG.getNegative(Val: C, DL, VT); |
16233 | return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: FalseV); |
16234 | } |
16235 | // (select c, y, -1) -> -!c | y |
16236 | if (isAllOnesConstant(V: FalseV)) { |
16237 | SDValue C = |
16238 | DAG.getSetCC(DL, VT, LHS, RHS, Cond: ISD::getSetCCInverse(Operation: CCVal, Type: VT)); |
16239 | SDValue Neg = DAG.getNegative(Val: C, DL, VT); |
16240 | return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: TrueV); |
16241 | } |
16242 | |
16243 | // (select c, 0, y) -> -!c & y |
16244 | if (isNullConstant(V: TrueV)) { |
16245 | SDValue C = |
16246 | DAG.getSetCC(DL, VT, LHS, RHS, Cond: ISD::getSetCCInverse(Operation: CCVal, Type: VT)); |
16247 | SDValue Neg = DAG.getNegative(Val: C, DL, VT); |
16248 | return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: FalseV); |
16249 | } |
16250 | // (select c, y, 0) -> -c & y |
16251 | if (isNullConstant(V: FalseV)) { |
16252 | SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, Cond: CCVal); |
16253 | SDValue Neg = DAG.getNegative(Val: C, DL, VT); |
16254 | return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: TrueV); |
16255 | } |
16256 | // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq)) |
16257 | // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq)) |
16258 | if (((isOneConstant(V: FalseV) && LHS == TrueV && |
16259 | CCVal == ISD::CondCode::SETNE) || |
16260 | (isOneConstant(V: TrueV) && LHS == FalseV && |
16261 | CCVal == ISD::CondCode::SETEQ)) && |
16262 | isNullConstant(V: RHS)) { |
16263 | // freeze it to be safe. |
16264 | LHS = DAG.getFreeze(V: LHS); |
16265 | SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, Cond: ISD::CondCode::SETEQ); |
16266 | return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: LHS, N2: C); |
16267 | } |
16268 | } |
16269 | |
16270 | // If both true/false are an xor with 1, pull through the select. |
16271 | // This can occur after op legalization if both operands are setccs that |
16272 | // require an xor to invert. |
16273 | // FIXME: Generalize to other binary ops with identical operand? |
16274 | if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR && |
16275 | TrueV.getOperand(i: 1) == FalseV.getOperand(i: 1) && |
16276 | isOneConstant(V: TrueV.getOperand(i: 1)) && |
16277 | TrueV.hasOneUse() && FalseV.hasOneUse()) { |
16278 | SDValue NewSel = DAG.getNode(Opcode: RISCVISD::SELECT_CC, DL, VT, N1: LHS, N2: RHS, N3: CC, |
16279 | N4: TrueV.getOperand(i: 0), N5: FalseV.getOperand(i: 0)); |
16280 | return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: NewSel, N2: TrueV.getOperand(i: 1)); |
16281 | } |
16282 | |
16283 | return SDValue(); |
16284 | } |
16285 | case RISCVISD::BR_CC: { |
16286 | SDValue LHS = N->getOperand(Num: 1); |
16287 | SDValue RHS = N->getOperand(Num: 2); |
16288 | SDValue CC = N->getOperand(Num: 3); |
16289 | SDLoc DL(N); |
16290 | |
16291 | if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget)) |
16292 | return DAG.getNode(Opcode: RISCVISD::BR_CC, DL, VT: N->getValueType(ResNo: 0), |
16293 | N1: N->getOperand(Num: 0), N2: LHS, N3: RHS, N4: CC, N5: N->getOperand(Num: 4)); |
16294 | |
16295 | return SDValue(); |
16296 | } |
16297 | case ISD::BITREVERSE: |
16298 | return performBITREVERSECombine(N, DAG, Subtarget); |
16299 | case ISD::FP_TO_SINT: |
16300 | case ISD::FP_TO_UINT: |
16301 | return performFP_TO_INTCombine(N, DCI, Subtarget); |
16302 | case ISD::FP_TO_SINT_SAT: |
16303 | case ISD::FP_TO_UINT_SAT: |
16304 | return performFP_TO_INT_SATCombine(N, DCI, Subtarget); |
16305 | case ISD::FCOPYSIGN: { |
16306 | EVT VT = N->getValueType(ResNo: 0); |
16307 | if (!VT.isVector()) |
16308 | break; |
16309 | // There is a form of VFSGNJ which injects the negated sign of its second |
16310 | // operand. Try and bubble any FNEG up after the extend/round to produce |
16311 | // this optimized pattern. Avoid modifying cases where FP_ROUND and |
16312 | // TRUNC=1. |
16313 | SDValue In2 = N->getOperand(Num: 1); |
16314 | // Avoid cases where the extend/round has multiple uses, as duplicating |
16315 | // those is typically more expensive than removing a fneg. |
16316 | if (!In2.hasOneUse()) |
16317 | break; |
16318 | if (In2.getOpcode() != ISD::FP_EXTEND && |
16319 | (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(i: 1) != 0)) |
16320 | break; |
16321 | In2 = In2.getOperand(i: 0); |
16322 | if (In2.getOpcode() != ISD::FNEG) |
16323 | break; |
16324 | SDLoc DL(N); |
16325 | SDValue NewFPExtRound = DAG.getFPExtendOrRound(Op: In2.getOperand(i: 0), DL, VT); |
16326 | return DAG.getNode(Opcode: ISD::FCOPYSIGN, DL, VT, N1: N->getOperand(Num: 0), |
16327 | N2: DAG.getNode(Opcode: ISD::FNEG, DL, VT, Operand: NewFPExtRound)); |
16328 | } |
16329 | case ISD::MGATHER: { |
16330 | const auto *MGN = dyn_cast<MaskedGatherSDNode>(Val: N); |
16331 | const EVT VT = N->getValueType(ResNo: 0); |
16332 | SDValue Index = MGN->getIndex(); |
16333 | SDValue ScaleOp = MGN->getScale(); |
16334 | ISD::MemIndexType IndexType = MGN->getIndexType(); |
16335 | assert(!MGN->isIndexScaled() && |
16336 | "Scaled gather/scatter should not be formed" ); |
16337 | |
16338 | SDLoc DL(N); |
16339 | if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) |
16340 | return DAG.getMaskedGather( |
16341 | VTs: N->getVTList(), MemVT: MGN->getMemoryVT(), dl: DL, |
16342 | Ops: {MGN->getChain(), MGN->getPassThru(), MGN->getMask(), |
16343 | MGN->getBasePtr(), Index, ScaleOp}, |
16344 | MMO: MGN->getMemOperand(), IndexType, ExtTy: MGN->getExtensionType()); |
16345 | |
16346 | if (narrowIndex(N&: Index, IndexType, DAG)) |
16347 | return DAG.getMaskedGather( |
16348 | VTs: N->getVTList(), MemVT: MGN->getMemoryVT(), dl: DL, |
16349 | Ops: {MGN->getChain(), MGN->getPassThru(), MGN->getMask(), |
16350 | MGN->getBasePtr(), Index, ScaleOp}, |
16351 | MMO: MGN->getMemOperand(), IndexType, ExtTy: MGN->getExtensionType()); |
16352 | |
16353 | if (Index.getOpcode() == ISD::BUILD_VECTOR && |
16354 | MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) { |
16355 | // The sequence will be XLenVT, not the type of Index. Tell |
16356 | // isSimpleVIDSequence this so we avoid overflow. |
16357 | if (std::optional<VIDSequence> SimpleVID = |
16358 | isSimpleVIDSequence(Op: Index, EltSizeInBits: Subtarget.getXLen()); |
16359 | SimpleVID && SimpleVID->StepDenominator == 1) { |
16360 | const int64_t StepNumerator = SimpleVID->StepNumerator; |
16361 | const int64_t Addend = SimpleVID->Addend; |
16362 | |
16363 | // Note: We don't need to check alignment here since (by assumption |
16364 | // from the existance of the gather), our offsets must be sufficiently |
16365 | // aligned. |
16366 | |
16367 | const EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
16368 | assert(MGN->getBasePtr()->getValueType(0) == PtrVT); |
16369 | assert(IndexType == ISD::UNSIGNED_SCALED); |
16370 | SDValue BasePtr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: MGN->getBasePtr(), |
16371 | N2: DAG.getConstant(Val: Addend, DL, VT: PtrVT)); |
16372 | |
16373 | SDVTList VTs = DAG.getVTList({VT, MVT::Other}); |
16374 | SDValue IntID = |
16375 | DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL, |
16376 | XLenVT); |
16377 | SDValue Ops[] = |
16378 | {MGN->getChain(), IntID, MGN->getPassThru(), BasePtr, |
16379 | DAG.getConstant(Val: StepNumerator, DL, VT: XLenVT), MGN->getMask()}; |
16380 | return DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, |
16381 | Ops, MemVT: VT, MMO: MGN->getMemOperand()); |
16382 | } |
16383 | } |
16384 | |
16385 | SmallVector<int> ShuffleMask; |
16386 | if (MGN->getExtensionType() == ISD::NON_EXTLOAD && |
16387 | matchIndexAsShuffle(VT, Index, Mask: MGN->getMask(), ShuffleMask)) { |
16388 | SDValue Load = DAG.getMaskedLoad(VT, dl: DL, Chain: MGN->getChain(), |
16389 | Base: MGN->getBasePtr(), Offset: DAG.getUNDEF(VT: XLenVT), |
16390 | Mask: MGN->getMask(), Src0: DAG.getUNDEF(VT), |
16391 | MemVT: MGN->getMemoryVT(), MMO: MGN->getMemOperand(), |
16392 | AM: ISD::UNINDEXED, ISD::NON_EXTLOAD); |
16393 | SDValue Shuffle = |
16394 | DAG.getVectorShuffle(VT, dl: DL, N1: Load, N2: DAG.getUNDEF(VT), Mask: ShuffleMask); |
16395 | return DAG.getMergeValues(Ops: {Shuffle, Load.getValue(R: 1)}, dl: DL); |
16396 | } |
16397 | |
16398 | if (MGN->getExtensionType() == ISD::NON_EXTLOAD && |
16399 | matchIndexAsWiderOp(VT, Index, Mask: MGN->getMask(), |
16400 | BaseAlign: MGN->getMemOperand()->getBaseAlign(), ST: Subtarget)) { |
16401 | SmallVector<SDValue> NewIndices; |
16402 | for (unsigned i = 0; i < Index->getNumOperands(); i += 2) |
16403 | NewIndices.push_back(Elt: Index.getOperand(i)); |
16404 | EVT IndexVT = Index.getValueType() |
16405 | .getHalfNumVectorElementsVT(Context&: *DAG.getContext()); |
16406 | Index = DAG.getBuildVector(VT: IndexVT, DL, Ops: NewIndices); |
16407 | |
16408 | unsigned ElementSize = VT.getScalarStoreSize(); |
16409 | EVT WideScalarVT = MVT::getIntegerVT(BitWidth: ElementSize * 8 * 2); |
16410 | auto EltCnt = VT.getVectorElementCount(); |
16411 | assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!" ); |
16412 | EVT WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideScalarVT, |
16413 | EC: EltCnt.divideCoefficientBy(RHS: 2)); |
16414 | SDValue Passthru = DAG.getBitcast(VT: WideVT, V: MGN->getPassThru()); |
16415 | EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, |
16416 | EltCnt.divideCoefficientBy(2)); |
16417 | SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1)); |
16418 | |
16419 | SDValue Gather = |
16420 | DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL, |
16421 | {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(), |
16422 | Index, ScaleOp}, |
16423 | MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD); |
16424 | SDValue Result = DAG.getBitcast(VT, V: Gather.getValue(R: 0)); |
16425 | return DAG.getMergeValues(Ops: {Result, Gather.getValue(R: 1)}, dl: DL); |
16426 | } |
16427 | break; |
16428 | } |
16429 | case ISD::MSCATTER:{ |
16430 | const auto *MSN = dyn_cast<MaskedScatterSDNode>(Val: N); |
16431 | SDValue Index = MSN->getIndex(); |
16432 | SDValue ScaleOp = MSN->getScale(); |
16433 | ISD::MemIndexType IndexType = MSN->getIndexType(); |
16434 | assert(!MSN->isIndexScaled() && |
16435 | "Scaled gather/scatter should not be formed" ); |
16436 | |
16437 | SDLoc DL(N); |
16438 | if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) |
16439 | return DAG.getMaskedScatter( |
16440 | VTs: N->getVTList(), MemVT: MSN->getMemoryVT(), dl: DL, |
16441 | Ops: {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(), |
16442 | Index, ScaleOp}, |
16443 | MMO: MSN->getMemOperand(), IndexType, IsTruncating: MSN->isTruncatingStore()); |
16444 | |
16445 | if (narrowIndex(N&: Index, IndexType, DAG)) |
16446 | return DAG.getMaskedScatter( |
16447 | VTs: N->getVTList(), MemVT: MSN->getMemoryVT(), dl: DL, |
16448 | Ops: {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(), |
16449 | Index, ScaleOp}, |
16450 | MMO: MSN->getMemOperand(), IndexType, IsTruncating: MSN->isTruncatingStore()); |
16451 | |
16452 | EVT VT = MSN->getValue()->getValueType(ResNo: 0); |
16453 | SmallVector<int> ShuffleMask; |
16454 | if (!MSN->isTruncatingStore() && |
16455 | matchIndexAsShuffle(VT, Index, Mask: MSN->getMask(), ShuffleMask)) { |
16456 | SDValue Shuffle = DAG.getVectorShuffle(VT, dl: DL, N1: MSN->getValue(), |
16457 | N2: DAG.getUNDEF(VT), Mask: ShuffleMask); |
16458 | return DAG.getMaskedStore(Chain: MSN->getChain(), dl: DL, Val: Shuffle, Base: MSN->getBasePtr(), |
16459 | Offset: DAG.getUNDEF(VT: XLenVT), Mask: MSN->getMask(), |
16460 | MemVT: MSN->getMemoryVT(), MMO: MSN->getMemOperand(), |
16461 | AM: ISD::UNINDEXED, IsTruncating: false); |
16462 | } |
16463 | break; |
16464 | } |
16465 | case ISD::VP_GATHER: { |
16466 | const auto *VPGN = dyn_cast<VPGatherSDNode>(Val: N); |
16467 | SDValue Index = VPGN->getIndex(); |
16468 | SDValue ScaleOp = VPGN->getScale(); |
16469 | ISD::MemIndexType IndexType = VPGN->getIndexType(); |
16470 | assert(!VPGN->isIndexScaled() && |
16471 | "Scaled gather/scatter should not be formed" ); |
16472 | |
16473 | SDLoc DL(N); |
16474 | if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) |
16475 | return DAG.getGatherVP(VTs: N->getVTList(), VT: VPGN->getMemoryVT(), dl: DL, |
16476 | Ops: {VPGN->getChain(), VPGN->getBasePtr(), Index, |
16477 | ScaleOp, VPGN->getMask(), |
16478 | VPGN->getVectorLength()}, |
16479 | MMO: VPGN->getMemOperand(), IndexType); |
16480 | |
16481 | if (narrowIndex(N&: Index, IndexType, DAG)) |
16482 | return DAG.getGatherVP(VTs: N->getVTList(), VT: VPGN->getMemoryVT(), dl: DL, |
16483 | Ops: {VPGN->getChain(), VPGN->getBasePtr(), Index, |
16484 | ScaleOp, VPGN->getMask(), |
16485 | VPGN->getVectorLength()}, |
16486 | MMO: VPGN->getMemOperand(), IndexType); |
16487 | |
16488 | break; |
16489 | } |
16490 | case ISD::VP_SCATTER: { |
16491 | const auto *VPSN = dyn_cast<VPScatterSDNode>(Val: N); |
16492 | SDValue Index = VPSN->getIndex(); |
16493 | SDValue ScaleOp = VPSN->getScale(); |
16494 | ISD::MemIndexType IndexType = VPSN->getIndexType(); |
16495 | assert(!VPSN->isIndexScaled() && |
16496 | "Scaled gather/scatter should not be formed" ); |
16497 | |
16498 | SDLoc DL(N); |
16499 | if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) |
16500 | return DAG.getScatterVP(VTs: N->getVTList(), VT: VPSN->getMemoryVT(), dl: DL, |
16501 | Ops: {VPSN->getChain(), VPSN->getValue(), |
16502 | VPSN->getBasePtr(), Index, ScaleOp, |
16503 | VPSN->getMask(), VPSN->getVectorLength()}, |
16504 | MMO: VPSN->getMemOperand(), IndexType); |
16505 | |
16506 | if (narrowIndex(N&: Index, IndexType, DAG)) |
16507 | return DAG.getScatterVP(VTs: N->getVTList(), VT: VPSN->getMemoryVT(), dl: DL, |
16508 | Ops: {VPSN->getChain(), VPSN->getValue(), |
16509 | VPSN->getBasePtr(), Index, ScaleOp, |
16510 | VPSN->getMask(), VPSN->getVectorLength()}, |
16511 | MMO: VPSN->getMemOperand(), IndexType); |
16512 | break; |
16513 | } |
16514 | case RISCVISD::SHL_VL: |
16515 | if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) |
16516 | return V; |
16517 | [[fallthrough]]; |
16518 | case RISCVISD::SRA_VL: |
16519 | case RISCVISD::SRL_VL: { |
16520 | SDValue ShAmt = N->getOperand(Num: 1); |
16521 | if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) { |
16522 | // We don't need the upper 32 bits of a 64-bit element for a shift amount. |
16523 | SDLoc DL(N); |
16524 | SDValue VL = N->getOperand(Num: 4); |
16525 | EVT VT = N->getValueType(ResNo: 0); |
16526 | ShAmt = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: DAG.getUNDEF(VT), |
16527 | N2: ShAmt.getOperand(i: 1), N3: VL); |
16528 | return DAG.getNode(Opcode: N->getOpcode(), DL, VT, N1: N->getOperand(Num: 0), N2: ShAmt, |
16529 | N3: N->getOperand(Num: 2), N4: N->getOperand(Num: 3), N5: N->getOperand(Num: 4)); |
16530 | } |
16531 | break; |
16532 | } |
16533 | case ISD::SRA: |
16534 | if (SDValue V = performSRACombine(N, DAG, Subtarget)) |
16535 | return V; |
16536 | [[fallthrough]]; |
16537 | case ISD::SRL: |
16538 | case ISD::SHL: { |
16539 | if (N->getOpcode() == ISD::SHL) { |
16540 | if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) |
16541 | return V; |
16542 | } |
16543 | SDValue ShAmt = N->getOperand(Num: 1); |
16544 | if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) { |
16545 | // We don't need the upper 32 bits of a 64-bit element for a shift amount. |
16546 | SDLoc DL(N); |
16547 | EVT VT = N->getValueType(ResNo: 0); |
16548 | ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), |
16549 | ShAmt.getOperand(1), |
16550 | DAG.getRegister(RISCV::X0, Subtarget.getXLenVT())); |
16551 | return DAG.getNode(Opcode: N->getOpcode(), DL, VT, N1: N->getOperand(Num: 0), N2: ShAmt); |
16552 | } |
16553 | break; |
16554 | } |
16555 | case RISCVISD::ADD_VL: |
16556 | if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) |
16557 | return V; |
16558 | return combineToVWMACC(N, DAG, Subtarget); |
16559 | case RISCVISD::VWADD_W_VL: |
16560 | case RISCVISD::VWADDU_W_VL: |
16561 | case RISCVISD::VWSUB_W_VL: |
16562 | case RISCVISD::VWSUBU_W_VL: |
16563 | return performVWADDSUBW_VLCombine(N, DCI, Subtarget); |
16564 | case RISCVISD::SUB_VL: |
16565 | case RISCVISD::MUL_VL: |
16566 | return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget); |
16567 | case RISCVISD::VFMADD_VL: |
16568 | case RISCVISD::VFNMADD_VL: |
16569 | case RISCVISD::VFMSUB_VL: |
16570 | case RISCVISD::VFNMSUB_VL: |
16571 | case RISCVISD::STRICT_VFMADD_VL: |
16572 | case RISCVISD::STRICT_VFNMADD_VL: |
16573 | case RISCVISD::STRICT_VFMSUB_VL: |
16574 | case RISCVISD::STRICT_VFNMSUB_VL: |
16575 | return performVFMADD_VLCombine(N, DAG, Subtarget); |
16576 | case RISCVISD::FADD_VL: |
16577 | case RISCVISD::FSUB_VL: |
16578 | case RISCVISD::FMUL_VL: |
16579 | case RISCVISD::VFWADD_W_VL: |
16580 | case RISCVISD::VFWSUB_W_VL: { |
16581 | if (N->getValueType(0).isScalableVector() && |
16582 | N->getValueType(0).getVectorElementType() == MVT::f32 && |
16583 | (Subtarget.hasVInstructionsF16Minimal() && |
16584 | !Subtarget.hasVInstructionsF16())) |
16585 | return SDValue(); |
16586 | return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget); |
16587 | } |
16588 | case ISD::LOAD: |
16589 | case ISD::STORE: { |
16590 | if (DCI.isAfterLegalizeDAG()) |
16591 | if (SDValue V = performMemPairCombine(N, DCI)) |
16592 | return V; |
16593 | |
16594 | if (N->getOpcode() != ISD::STORE) |
16595 | break; |
16596 | |
16597 | auto *Store = cast<StoreSDNode>(Val: N); |
16598 | SDValue Chain = Store->getChain(); |
16599 | EVT MemVT = Store->getMemoryVT(); |
16600 | SDValue Val = Store->getValue(); |
16601 | SDLoc DL(N); |
16602 | |
16603 | bool IsScalarizable = |
16604 | MemVT.isFixedLengthVector() && ISD::isNormalStore(N: Store) && |
16605 | Store->isSimple() && |
16606 | MemVT.getVectorElementType().bitsLE(VT: Subtarget.getXLenVT()) && |
16607 | isPowerOf2_64(Value: MemVT.getSizeInBits()) && |
16608 | MemVT.getSizeInBits() <= Subtarget.getXLen(); |
16609 | |
16610 | // If sufficiently aligned we can scalarize stores of constant vectors of |
16611 | // any power-of-two size up to XLen bits, provided that they aren't too |
16612 | // expensive to materialize. |
16613 | // vsetivli zero, 2, e8, m1, ta, ma |
16614 | // vmv.v.i v8, 4 |
16615 | // vse64.v v8, (a0) |
16616 | // -> |
16617 | // li a1, 1028 |
16618 | // sh a1, 0(a0) |
16619 | if (DCI.isBeforeLegalize() && IsScalarizable && |
16620 | ISD::isBuildVectorOfConstantSDNodes(N: Val.getNode())) { |
16621 | // Get the constant vector bits |
16622 | APInt NewC(Val.getValueSizeInBits(), 0); |
16623 | uint64_t EltSize = Val.getScalarValueSizeInBits(); |
16624 | for (unsigned i = 0; i < Val.getNumOperands(); i++) { |
16625 | if (Val.getOperand(i).isUndef()) |
16626 | continue; |
16627 | NewC.insertBits(SubBits: Val.getConstantOperandAPInt(i).trunc(width: EltSize), |
16628 | bitPosition: i * EltSize); |
16629 | } |
16630 | MVT NewVT = MVT::getIntegerVT(BitWidth: MemVT.getSizeInBits()); |
16631 | |
16632 | if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget, |
16633 | true) <= 2 && |
16634 | allowsMemoryAccessForAlignment(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), |
16635 | VT: NewVT, MMO: *Store->getMemOperand())) { |
16636 | SDValue NewV = DAG.getConstant(Val: NewC, DL, VT: NewVT); |
16637 | return DAG.getStore(Chain, dl: DL, Val: NewV, Ptr: Store->getBasePtr(), |
16638 | PtrInfo: Store->getPointerInfo(), Alignment: Store->getOriginalAlign(), |
16639 | MMOFlags: Store->getMemOperand()->getFlags()); |
16640 | } |
16641 | } |
16642 | |
16643 | // Similarly, if sufficiently aligned we can scalarize vector copies, e.g. |
16644 | // vsetivli zero, 2, e16, m1, ta, ma |
16645 | // vle16.v v8, (a0) |
16646 | // vse16.v v8, (a1) |
16647 | if (auto *L = dyn_cast<LoadSDNode>(Val); |
16648 | L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() && |
16649 | L->hasNUsesOfValue(NUses: 1, Value: 0) && L->hasNUsesOfValue(NUses: 1, Value: 1) && |
16650 | Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(N: L) && |
16651 | L->getMemoryVT() == MemVT) { |
16652 | MVT NewVT = MVT::getIntegerVT(BitWidth: MemVT.getSizeInBits()); |
16653 | if (allowsMemoryAccessForAlignment(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), |
16654 | VT: NewVT, MMO: *Store->getMemOperand()) && |
16655 | allowsMemoryAccessForAlignment(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), |
16656 | VT: NewVT, MMO: *L->getMemOperand())) { |
16657 | SDValue NewL = DAG.getLoad(VT: NewVT, dl: DL, Chain: L->getChain(), Ptr: L->getBasePtr(), |
16658 | PtrInfo: L->getPointerInfo(), Alignment: L->getOriginalAlign(), |
16659 | MMOFlags: L->getMemOperand()->getFlags()); |
16660 | return DAG.getStore(Chain, dl: DL, Val: NewL, Ptr: Store->getBasePtr(), |
16661 | PtrInfo: Store->getPointerInfo(), Alignment: Store->getOriginalAlign(), |
16662 | MMOFlags: Store->getMemOperand()->getFlags()); |
16663 | } |
16664 | } |
16665 | |
16666 | // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1. |
16667 | // vfmv.f.s is represented as extract element from 0. Match it late to avoid |
16668 | // any illegal types. |
16669 | if (Val.getOpcode() == RISCVISD::VMV_X_S || |
16670 | (DCI.isAfterLegalizeDAG() && |
16671 | Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
16672 | isNullConstant(V: Val.getOperand(i: 1)))) { |
16673 | SDValue Src = Val.getOperand(i: 0); |
16674 | MVT VecVT = Src.getSimpleValueType(); |
16675 | // VecVT should be scalable and memory VT should match the element type. |
16676 | if (!Store->isIndexed() && VecVT.isScalableVector() && |
16677 | MemVT == VecVT.getVectorElementType()) { |
16678 | SDLoc DL(N); |
16679 | MVT MaskVT = getMaskTypeFor(VecVT); |
16680 | return DAG.getStoreVP( |
16681 | Chain: Store->getChain(), dl: DL, Val: Src, Ptr: Store->getBasePtr(), Offset: Store->getOffset(), |
16682 | Mask: DAG.getConstant(Val: 1, DL, VT: MaskVT), |
16683 | EVL: DAG.getConstant(Val: 1, DL, VT: Subtarget.getXLenVT()), MemVT, |
16684 | MMO: Store->getMemOperand(), AM: Store->getAddressingMode(), |
16685 | IsTruncating: Store->isTruncatingStore(), /*IsCompress*/ IsCompressing: false); |
16686 | } |
16687 | } |
16688 | |
16689 | break; |
16690 | } |
16691 | case ISD::SPLAT_VECTOR: { |
16692 | EVT VT = N->getValueType(ResNo: 0); |
16693 | // Only perform this combine on legal MVT types. |
16694 | if (!isTypeLegal(VT)) |
16695 | break; |
16696 | if (auto Gather = matchSplatAsGather(SplatVal: N->getOperand(Num: 0), VT: VT.getSimpleVT(), DL: N, |
16697 | DAG, Subtarget)) |
16698 | return Gather; |
16699 | break; |
16700 | } |
16701 | case ISD::BUILD_VECTOR: |
16702 | if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, TLI: *this)) |
16703 | return V; |
16704 | break; |
16705 | case ISD::CONCAT_VECTORS: |
16706 | if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, TLI: *this)) |
16707 | return V; |
16708 | break; |
16709 | case ISD::INSERT_VECTOR_ELT: |
16710 | if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, TLI: *this)) |
16711 | return V; |
16712 | break; |
16713 | case RISCVISD::VFMV_V_F_VL: { |
16714 | const MVT VT = N->getSimpleValueType(ResNo: 0); |
16715 | SDValue Passthru = N->getOperand(Num: 0); |
16716 | SDValue Scalar = N->getOperand(Num: 1); |
16717 | SDValue VL = N->getOperand(Num: 2); |
16718 | |
16719 | // If VL is 1, we can use vfmv.s.f. |
16720 | if (isOneConstant(V: VL)) |
16721 | return DAG.getNode(Opcode: RISCVISD::VFMV_S_F_VL, DL, VT, N1: Passthru, N2: Scalar, N3: VL); |
16722 | break; |
16723 | } |
16724 | case RISCVISD::VMV_V_X_VL: { |
16725 | const MVT VT = N->getSimpleValueType(ResNo: 0); |
16726 | SDValue Passthru = N->getOperand(Num: 0); |
16727 | SDValue Scalar = N->getOperand(Num: 1); |
16728 | SDValue VL = N->getOperand(Num: 2); |
16729 | |
16730 | // Tail agnostic VMV.V.X only demands the vector element bitwidth from the |
16731 | // scalar input. |
16732 | unsigned ScalarSize = Scalar.getValueSizeInBits(); |
16733 | unsigned EltWidth = VT.getScalarSizeInBits(); |
16734 | if (ScalarSize > EltWidth && Passthru.isUndef()) |
16735 | if (SimplifyDemandedLowBitsHelper(1, EltWidth)) |
16736 | return SDValue(N, 0); |
16737 | |
16738 | // If VL is 1 and the scalar value won't benefit from immediate, we can |
16739 | // use vmv.s.x. |
16740 | ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val&: Scalar); |
16741 | if (isOneConstant(V: VL) && |
16742 | (!Const || Const->isZero() || |
16743 | !Const->getAPIntValue().sextOrTrunc(width: EltWidth).isSignedIntN(N: 5))) |
16744 | return DAG.getNode(Opcode: RISCVISD::VMV_S_X_VL, DL, VT, N1: Passthru, N2: Scalar, N3: VL); |
16745 | |
16746 | break; |
16747 | } |
16748 | case RISCVISD::VFMV_S_F_VL: { |
16749 | SDValue Src = N->getOperand(Num: 1); |
16750 | // Try to remove vector->scalar->vector if the scalar->vector is inserting |
16751 | // into an undef vector. |
16752 | // TODO: Could use a vslide or vmv.v.v for non-undef. |
16753 | if (N->getOperand(Num: 0).isUndef() && |
16754 | Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
16755 | isNullConstant(V: Src.getOperand(i: 1)) && |
16756 | Src.getOperand(i: 0).getValueType().isScalableVector()) { |
16757 | EVT VT = N->getValueType(ResNo: 0); |
16758 | EVT SrcVT = Src.getOperand(i: 0).getValueType(); |
16759 | assert(SrcVT.getVectorElementType() == VT.getVectorElementType()); |
16760 | // Widths match, just return the original vector. |
16761 | if (SrcVT == VT) |
16762 | return Src.getOperand(i: 0); |
16763 | // TODO: Use insert_subvector/extract_subvector to change widen/narrow? |
16764 | } |
16765 | [[fallthrough]]; |
16766 | } |
16767 | case RISCVISD::VMV_S_X_VL: { |
16768 | const MVT VT = N->getSimpleValueType(ResNo: 0); |
16769 | SDValue Passthru = N->getOperand(Num: 0); |
16770 | SDValue Scalar = N->getOperand(Num: 1); |
16771 | SDValue VL = N->getOperand(Num: 2); |
16772 | |
16773 | // Use M1 or smaller to avoid over constraining register allocation |
16774 | const MVT M1VT = getLMUL1VT(VT); |
16775 | if (M1VT.bitsLT(VT)) { |
16776 | SDValue M1Passthru = |
16777 | DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: M1VT, N1: Passthru, |
16778 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
16779 | SDValue Result = |
16780 | DAG.getNode(Opcode: N->getOpcode(), DL, VT: M1VT, N1: M1Passthru, N2: Scalar, N3: VL); |
16781 | Result = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: Passthru, N2: Result, |
16782 | N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
16783 | return Result; |
16784 | } |
16785 | |
16786 | // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or |
16787 | // higher would involve overly constraining the register allocator for |
16788 | // no purpose. |
16789 | if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val&: Scalar); |
16790 | Const && !Const->isZero() && isInt<5>(x: Const->getSExtValue()) && |
16791 | VT.bitsLE(VT: getLMUL1VT(VT)) && Passthru.isUndef()) |
16792 | return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: Passthru, N2: Scalar, N3: VL); |
16793 | |
16794 | break; |
16795 | } |
16796 | case RISCVISD::VMV_X_S: { |
16797 | SDValue Vec = N->getOperand(Num: 0); |
16798 | MVT VecVT = N->getOperand(Num: 0).getSimpleValueType(); |
16799 | const MVT M1VT = getLMUL1VT(VT: VecVT); |
16800 | if (M1VT.bitsLT(VT: VecVT)) { |
16801 | Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: M1VT, N1: Vec, |
16802 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
16803 | return DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: N->getSimpleValueType(ResNo: 0), Operand: Vec); |
16804 | } |
16805 | break; |
16806 | } |
16807 | case ISD::INTRINSIC_VOID: |
16808 | case ISD::INTRINSIC_W_CHAIN: |
16809 | case ISD::INTRINSIC_WO_CHAIN: { |
16810 | unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; |
16811 | unsigned IntNo = N->getConstantOperandVal(Num: IntOpNo); |
16812 | switch (IntNo) { |
16813 | // By default we do not combine any intrinsic. |
16814 | default: |
16815 | return SDValue(); |
16816 | case Intrinsic::riscv_masked_strided_load: { |
16817 | MVT VT = N->getSimpleValueType(ResNo: 0); |
16818 | auto *Load = cast<MemIntrinsicSDNode>(Val: N); |
16819 | SDValue PassThru = N->getOperand(Num: 2); |
16820 | SDValue Base = N->getOperand(Num: 3); |
16821 | SDValue Stride = N->getOperand(Num: 4); |
16822 | SDValue Mask = N->getOperand(Num: 5); |
16823 | |
16824 | // If the stride is equal to the element size in bytes, we can use |
16825 | // a masked.load. |
16826 | const unsigned ElementSize = VT.getScalarStoreSize(); |
16827 | if (auto *StrideC = dyn_cast<ConstantSDNode>(Val&: Stride); |
16828 | StrideC && StrideC->getZExtValue() == ElementSize) |
16829 | return DAG.getMaskedLoad(VT, dl: DL, Chain: Load->getChain(), Base, |
16830 | Offset: DAG.getUNDEF(VT: XLenVT), Mask, Src0: PassThru, |
16831 | MemVT: Load->getMemoryVT(), MMO: Load->getMemOperand(), |
16832 | AM: ISD::UNINDEXED, ISD::NON_EXTLOAD); |
16833 | return SDValue(); |
16834 | } |
16835 | case Intrinsic::riscv_masked_strided_store: { |
16836 | auto *Store = cast<MemIntrinsicSDNode>(Val: N); |
16837 | SDValue Value = N->getOperand(Num: 2); |
16838 | SDValue Base = N->getOperand(Num: 3); |
16839 | SDValue Stride = N->getOperand(Num: 4); |
16840 | SDValue Mask = N->getOperand(Num: 5); |
16841 | |
16842 | // If the stride is equal to the element size in bytes, we can use |
16843 | // a masked.store. |
16844 | const unsigned ElementSize = Value.getValueType().getScalarStoreSize(); |
16845 | if (auto *StrideC = dyn_cast<ConstantSDNode>(Val&: Stride); |
16846 | StrideC && StrideC->getZExtValue() == ElementSize) |
16847 | return DAG.getMaskedStore(Chain: Store->getChain(), dl: DL, Val: Value, Base, |
16848 | Offset: DAG.getUNDEF(VT: XLenVT), Mask, |
16849 | MemVT: Value.getValueType(), MMO: Store->getMemOperand(), |
16850 | AM: ISD::UNINDEXED, IsTruncating: false); |
16851 | return SDValue(); |
16852 | } |
16853 | case Intrinsic::riscv_vcpop: |
16854 | case Intrinsic::riscv_vcpop_mask: |
16855 | case Intrinsic::riscv_vfirst: |
16856 | case Intrinsic::riscv_vfirst_mask: { |
16857 | SDValue VL = N->getOperand(Num: 2); |
16858 | if (IntNo == Intrinsic::riscv_vcpop_mask || |
16859 | IntNo == Intrinsic::riscv_vfirst_mask) |
16860 | VL = N->getOperand(Num: 3); |
16861 | if (!isNullConstant(V: VL)) |
16862 | return SDValue(); |
16863 | // If VL is 0, vcpop -> li 0, vfirst -> li -1. |
16864 | SDLoc DL(N); |
16865 | EVT VT = N->getValueType(ResNo: 0); |
16866 | if (IntNo == Intrinsic::riscv_vfirst || |
16867 | IntNo == Intrinsic::riscv_vfirst_mask) |
16868 | return DAG.getConstant(Val: -1, DL, VT); |
16869 | return DAG.getConstant(Val: 0, DL, VT); |
16870 | } |
16871 | } |
16872 | } |
16873 | case ISD::BITCAST: { |
16874 | assert(Subtarget.useRVVForFixedLengthVectors()); |
16875 | SDValue N0 = N->getOperand(Num: 0); |
16876 | EVT VT = N->getValueType(ResNo: 0); |
16877 | EVT SrcVT = N0.getValueType(); |
16878 | // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer |
16879 | // type, widen both sides to avoid a trip through memory. |
16880 | if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) && |
16881 | VT.isScalarInteger()) { |
16882 | unsigned NumConcats = 8 / SrcVT.getVectorNumElements(); |
16883 | SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(VT: SrcVT)); |
16884 | Ops[0] = N0; |
16885 | SDLoc DL(N); |
16886 | N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops); |
16887 | N0 = DAG.getBitcast(MVT::i8, N0); |
16888 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: N0); |
16889 | } |
16890 | |
16891 | return SDValue(); |
16892 | } |
16893 | } |
16894 | |
16895 | return SDValue(); |
16896 | } |
16897 | |
16898 | bool RISCVTargetLowering::shouldTransformSignedTruncationCheck( |
16899 | EVT XVT, unsigned KeptBits) const { |
16900 | // For vectors, we don't have a preference.. |
16901 | if (XVT.isVector()) |
16902 | return false; |
16903 | |
16904 | if (XVT != MVT::i32 && XVT != MVT::i64) |
16905 | return false; |
16906 | |
16907 | // We can use sext.w for RV64 or an srai 31 on RV32. |
16908 | if (KeptBits == 32 || KeptBits == 64) |
16909 | return true; |
16910 | |
16911 | // With Zbb we can use sext.h/sext.b. |
16912 | return Subtarget.hasStdExtZbb() && |
16913 | ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) || |
16914 | KeptBits == 16); |
16915 | } |
16916 | |
16917 | bool RISCVTargetLowering::isDesirableToCommuteWithShift( |
16918 | const SDNode *N, CombineLevel Level) const { |
16919 | assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA || |
16920 | N->getOpcode() == ISD::SRL) && |
16921 | "Expected shift op" ); |
16922 | |
16923 | // The following folds are only desirable if `(OP _, c1 << c2)` can be |
16924 | // materialised in fewer instructions than `(OP _, c1)`: |
16925 | // |
16926 | // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) |
16927 | // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) |
16928 | SDValue N0 = N->getOperand(Num: 0); |
16929 | EVT Ty = N0.getValueType(); |
16930 | if (Ty.isScalarInteger() && |
16931 | (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { |
16932 | auto *C1 = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1)); |
16933 | auto *C2 = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1)); |
16934 | if (C1 && C2) { |
16935 | const APInt &C1Int = C1->getAPIntValue(); |
16936 | APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); |
16937 | |
16938 | // We can materialise `c1 << c2` into an add immediate, so it's "free", |
16939 | // and the combine should happen, to potentially allow further combines |
16940 | // later. |
16941 | if (ShiftedC1Int.getSignificantBits() <= 64 && |
16942 | isLegalAddImmediate(Imm: ShiftedC1Int.getSExtValue())) |
16943 | return true; |
16944 | |
16945 | // We can materialise `c1` in an add immediate, so it's "free", and the |
16946 | // combine should be prevented. |
16947 | if (C1Int.getSignificantBits() <= 64 && |
16948 | isLegalAddImmediate(Imm: C1Int.getSExtValue())) |
16949 | return false; |
16950 | |
16951 | // Neither constant will fit into an immediate, so find materialisation |
16952 | // costs. |
16953 | int C1Cost = |
16954 | RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget, |
16955 | /*CompressionCost*/ true); |
16956 | int ShiftedC1Cost = RISCVMatInt::getIntMatCost( |
16957 | ShiftedC1Int, Ty.getSizeInBits(), Subtarget, |
16958 | /*CompressionCost*/ true); |
16959 | |
16960 | // Materialising `c1` is cheaper than materialising `c1 << c2`, so the |
16961 | // combine should be prevented. |
16962 | if (C1Cost < ShiftedC1Cost) |
16963 | return false; |
16964 | } |
16965 | } |
16966 | return true; |
16967 | } |
16968 | |
16969 | bool RISCVTargetLowering::targetShrinkDemandedConstant( |
16970 | SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, |
16971 | TargetLoweringOpt &TLO) const { |
16972 | // Delay this optimization as late as possible. |
16973 | if (!TLO.LegalOps) |
16974 | return false; |
16975 | |
16976 | EVT VT = Op.getValueType(); |
16977 | if (VT.isVector()) |
16978 | return false; |
16979 | |
16980 | unsigned Opcode = Op.getOpcode(); |
16981 | if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR) |
16982 | return false; |
16983 | |
16984 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1)); |
16985 | if (!C) |
16986 | return false; |
16987 | |
16988 | const APInt &Mask = C->getAPIntValue(); |
16989 | |
16990 | // Clear all non-demanded bits initially. |
16991 | APInt ShrunkMask = Mask & DemandedBits; |
16992 | |
16993 | // Try to make a smaller immediate by setting undemanded bits. |
16994 | |
16995 | APInt ExpandedMask = Mask | ~DemandedBits; |
16996 | |
16997 | auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool { |
16998 | return ShrunkMask.isSubsetOf(RHS: Mask) && Mask.isSubsetOf(RHS: ExpandedMask); |
16999 | }; |
17000 | auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool { |
17001 | if (NewMask == Mask) |
17002 | return true; |
17003 | SDLoc DL(Op); |
17004 | SDValue NewC = TLO.DAG.getConstant(Val: NewMask, DL, VT: Op.getValueType()); |
17005 | SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(), |
17006 | N1: Op.getOperand(i: 0), N2: NewC); |
17007 | return TLO.CombineTo(O: Op, N: NewOp); |
17008 | }; |
17009 | |
17010 | // If the shrunk mask fits in sign extended 12 bits, let the target |
17011 | // independent code apply it. |
17012 | if (ShrunkMask.isSignedIntN(N: 12)) |
17013 | return false; |
17014 | |
17015 | // And has a few special cases for zext. |
17016 | if (Opcode == ISD::AND) { |
17017 | // Preserve (and X, 0xffff), if zext.h exists use zext.h, |
17018 | // otherwise use SLLI + SRLI. |
17019 | APInt NewMask = APInt(Mask.getBitWidth(), 0xffff); |
17020 | if (IsLegalMask(NewMask)) |
17021 | return UseMask(NewMask); |
17022 | |
17023 | // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern. |
17024 | if (VT == MVT::i64) { |
17025 | APInt NewMask = APInt(64, 0xffffffff); |
17026 | if (IsLegalMask(NewMask)) |
17027 | return UseMask(NewMask); |
17028 | } |
17029 | } |
17030 | |
17031 | // For the remaining optimizations, we need to be able to make a negative |
17032 | // number through a combination of mask and undemanded bits. |
17033 | if (!ExpandedMask.isNegative()) |
17034 | return false; |
17035 | |
17036 | // What is the fewest number of bits we need to represent the negative number. |
17037 | unsigned MinSignedBits = ExpandedMask.getSignificantBits(); |
17038 | |
17039 | // Try to make a 12 bit negative immediate. If that fails try to make a 32 |
17040 | // bit negative immediate unless the shrunk immediate already fits in 32 bits. |
17041 | // If we can't create a simm12, we shouldn't change opaque constants. |
17042 | APInt NewMask = ShrunkMask; |
17043 | if (MinSignedBits <= 12) |
17044 | NewMask.setBitsFrom(11); |
17045 | else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(N: 32)) |
17046 | NewMask.setBitsFrom(31); |
17047 | else |
17048 | return false; |
17049 | |
17050 | // Check that our new mask is a subset of the demanded mask. |
17051 | assert(IsLegalMask(NewMask)); |
17052 | return UseMask(NewMask); |
17053 | } |
17054 | |
17055 | static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) { |
17056 | static const uint64_t GREVMasks[] = { |
17057 | 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, |
17058 | 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL}; |
17059 | |
17060 | for (unsigned Stage = 0; Stage != 6; ++Stage) { |
17061 | unsigned Shift = 1 << Stage; |
17062 | if (ShAmt & Shift) { |
17063 | uint64_t Mask = GREVMasks[Stage]; |
17064 | uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask); |
17065 | if (IsGORC) |
17066 | Res |= x; |
17067 | x = Res; |
17068 | } |
17069 | } |
17070 | |
17071 | return x; |
17072 | } |
17073 | |
17074 | void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, |
17075 | KnownBits &Known, |
17076 | const APInt &DemandedElts, |
17077 | const SelectionDAG &DAG, |
17078 | unsigned Depth) const { |
17079 | unsigned BitWidth = Known.getBitWidth(); |
17080 | unsigned Opc = Op.getOpcode(); |
17081 | assert((Opc >= ISD::BUILTIN_OP_END || |
17082 | Opc == ISD::INTRINSIC_WO_CHAIN || |
17083 | Opc == ISD::INTRINSIC_W_CHAIN || |
17084 | Opc == ISD::INTRINSIC_VOID) && |
17085 | "Should use MaskedValueIsZero if you don't know whether Op" |
17086 | " is a target node!" ); |
17087 | |
17088 | Known.resetAll(); |
17089 | switch (Opc) { |
17090 | default: break; |
17091 | case RISCVISD::SELECT_CC: { |
17092 | Known = DAG.computeKnownBits(Op: Op.getOperand(i: 4), Depth: Depth + 1); |
17093 | // If we don't know any bits, early out. |
17094 | if (Known.isUnknown()) |
17095 | break; |
17096 | KnownBits Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: 3), Depth: Depth + 1); |
17097 | |
17098 | // Only known if known in both the LHS and RHS. |
17099 | Known = Known.intersectWith(RHS: Known2); |
17100 | break; |
17101 | } |
17102 | case RISCVISD::CZERO_EQZ: |
17103 | case RISCVISD::CZERO_NEZ: |
17104 | Known = DAG.computeKnownBits(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
17105 | // Result is either all zero or operand 0. We can propagate zeros, but not |
17106 | // ones. |
17107 | Known.One.clearAllBits(); |
17108 | break; |
17109 | case RISCVISD::REMUW: { |
17110 | KnownBits Known2; |
17111 | Known = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
17112 | Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
17113 | // We only care about the lower 32 bits. |
17114 | Known = KnownBits::urem(LHS: Known.trunc(BitWidth: 32), RHS: Known2.trunc(BitWidth: 32)); |
17115 | // Restore the original width by sign extending. |
17116 | Known = Known.sext(BitWidth); |
17117 | break; |
17118 | } |
17119 | case RISCVISD::DIVUW: { |
17120 | KnownBits Known2; |
17121 | Known = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
17122 | Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
17123 | // We only care about the lower 32 bits. |
17124 | Known = KnownBits::udiv(LHS: Known.trunc(BitWidth: 32), RHS: Known2.trunc(BitWidth: 32)); |
17125 | // Restore the original width by sign extending. |
17126 | Known = Known.sext(BitWidth); |
17127 | break; |
17128 | } |
17129 | case RISCVISD::SLLW: { |
17130 | KnownBits Known2; |
17131 | Known = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
17132 | Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
17133 | Known = KnownBits::shl(LHS: Known.trunc(BitWidth: 32), RHS: Known2.trunc(BitWidth: 5).zext(BitWidth: 32)); |
17134 | // Restore the original width by sign extending. |
17135 | Known = Known.sext(BitWidth); |
17136 | break; |
17137 | } |
17138 | case RISCVISD::CTZW: { |
17139 | KnownBits Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
17140 | unsigned PossibleTZ = Known2.trunc(BitWidth: 32).countMaxTrailingZeros(); |
17141 | unsigned LowBits = llvm::bit_width(Value: PossibleTZ); |
17142 | Known.Zero.setBitsFrom(LowBits); |
17143 | break; |
17144 | } |
17145 | case RISCVISD::CLZW: { |
17146 | KnownBits Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
17147 | unsigned PossibleLZ = Known2.trunc(BitWidth: 32).countMaxLeadingZeros(); |
17148 | unsigned LowBits = llvm::bit_width(Value: PossibleLZ); |
17149 | Known.Zero.setBitsFrom(LowBits); |
17150 | break; |
17151 | } |
17152 | case RISCVISD::BREV8: |
17153 | case RISCVISD::ORC_B: { |
17154 | // FIXME: This is based on the non-ratified Zbp GREV and GORC where a |
17155 | // control value of 7 is equivalent to brev8 and orc.b. |
17156 | Known = DAG.computeKnownBits(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
17157 | bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B; |
17158 | // To compute zeros, we need to invert the value and invert it back after. |
17159 | Known.Zero = |
17160 | ~computeGREVOrGORC(x: ~Known.Zero.getZExtValue(), ShAmt: 7, IsGORC); |
17161 | Known.One = computeGREVOrGORC(x: Known.One.getZExtValue(), ShAmt: 7, IsGORC); |
17162 | break; |
17163 | } |
17164 | case RISCVISD::READ_VLENB: { |
17165 | // We can use the minimum and maximum VLEN values to bound VLENB. We |
17166 | // know VLEN must be a power of two. |
17167 | const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8; |
17168 | const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8; |
17169 | assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?" ); |
17170 | Known.Zero.setLowBits(Log2_32(Value: MinVLenB)); |
17171 | Known.Zero.setBitsFrom(Log2_32(Value: MaxVLenB)+1); |
17172 | if (MaxVLenB == MinVLenB) |
17173 | Known.One.setBit(Log2_32(Value: MinVLenB)); |
17174 | break; |
17175 | } |
17176 | case RISCVISD::FCLASS: { |
17177 | // fclass will only set one of the low 10 bits. |
17178 | Known.Zero.setBitsFrom(10); |
17179 | break; |
17180 | } |
17181 | case ISD::INTRINSIC_W_CHAIN: |
17182 | case ISD::INTRINSIC_WO_CHAIN: { |
17183 | unsigned IntNo = |
17184 | Op.getConstantOperandVal(i: Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1); |
17185 | switch (IntNo) { |
17186 | default: |
17187 | // We can't do anything for most intrinsics. |
17188 | break; |
17189 | case Intrinsic::riscv_vsetvli: |
17190 | case Intrinsic::riscv_vsetvlimax: { |
17191 | bool HasAVL = IntNo == Intrinsic::riscv_vsetvli; |
17192 | unsigned VSEW = Op.getConstantOperandVal(i: HasAVL + 1); |
17193 | RISCVII::VLMUL VLMUL = |
17194 | static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(i: HasAVL + 2)); |
17195 | unsigned SEW = RISCVVType::decodeVSEW(VSEW); |
17196 | auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL); |
17197 | uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW; |
17198 | MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul; |
17199 | |
17200 | // Result of vsetvli must be not larger than AVL. |
17201 | if (HasAVL && isa<ConstantSDNode>(Val: Op.getOperand(i: 1))) |
17202 | MaxVL = std::min(a: MaxVL, b: Op.getConstantOperandVal(i: 1)); |
17203 | |
17204 | unsigned KnownZeroFirstBit = Log2_32(Value: MaxVL) + 1; |
17205 | if (BitWidth > KnownZeroFirstBit) |
17206 | Known.Zero.setBitsFrom(KnownZeroFirstBit); |
17207 | break; |
17208 | } |
17209 | } |
17210 | break; |
17211 | } |
17212 | } |
17213 | } |
17214 | |
17215 | unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( |
17216 | SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, |
17217 | unsigned Depth) const { |
17218 | switch (Op.getOpcode()) { |
17219 | default: |
17220 | break; |
17221 | case RISCVISD::SELECT_CC: { |
17222 | unsigned Tmp = |
17223 | DAG.ComputeNumSignBits(Op: Op.getOperand(i: 3), DemandedElts, Depth: Depth + 1); |
17224 | if (Tmp == 1) return 1; // Early out. |
17225 | unsigned Tmp2 = |
17226 | DAG.ComputeNumSignBits(Op: Op.getOperand(i: 4), DemandedElts, Depth: Depth + 1); |
17227 | return std::min(a: Tmp, b: Tmp2); |
17228 | } |
17229 | case RISCVISD::CZERO_EQZ: |
17230 | case RISCVISD::CZERO_NEZ: |
17231 | // Output is either all zero or operand 0. We can propagate sign bit count |
17232 | // from operand 0. |
17233 | return DAG.ComputeNumSignBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
17234 | case RISCVISD::ABSW: { |
17235 | // We expand this at isel to negw+max. The result will have 33 sign bits |
17236 | // if the input has at least 33 sign bits. |
17237 | unsigned Tmp = |
17238 | DAG.ComputeNumSignBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
17239 | if (Tmp < 33) return 1; |
17240 | return 33; |
17241 | } |
17242 | case RISCVISD::SLLW: |
17243 | case RISCVISD::SRAW: |
17244 | case RISCVISD::SRLW: |
17245 | case RISCVISD::DIVW: |
17246 | case RISCVISD::DIVUW: |
17247 | case RISCVISD::REMUW: |
17248 | case RISCVISD::ROLW: |
17249 | case RISCVISD::RORW: |
17250 | case RISCVISD::FCVT_W_RV64: |
17251 | case RISCVISD::FCVT_WU_RV64: |
17252 | case RISCVISD::STRICT_FCVT_W_RV64: |
17253 | case RISCVISD::STRICT_FCVT_WU_RV64: |
17254 | // TODO: As the result is sign-extended, this is conservatively correct. A |
17255 | // more precise answer could be calculated for SRAW depending on known |
17256 | // bits in the shift amount. |
17257 | return 33; |
17258 | case RISCVISD::VMV_X_S: { |
17259 | // The number of sign bits of the scalar result is computed by obtaining the |
17260 | // element type of the input vector operand, subtracting its width from the |
17261 | // XLEN, and then adding one (sign bit within the element type). If the |
17262 | // element type is wider than XLen, the least-significant XLEN bits are |
17263 | // taken. |
17264 | unsigned XLen = Subtarget.getXLen(); |
17265 | unsigned EltBits = Op.getOperand(i: 0).getScalarValueSizeInBits(); |
17266 | if (EltBits <= XLen) |
17267 | return XLen - EltBits + 1; |
17268 | break; |
17269 | } |
17270 | case ISD::INTRINSIC_W_CHAIN: { |
17271 | unsigned IntNo = Op.getConstantOperandVal(i: 1); |
17272 | switch (IntNo) { |
17273 | default: |
17274 | break; |
17275 | case Intrinsic::riscv_masked_atomicrmw_xchg_i64: |
17276 | case Intrinsic::riscv_masked_atomicrmw_add_i64: |
17277 | case Intrinsic::riscv_masked_atomicrmw_sub_i64: |
17278 | case Intrinsic::riscv_masked_atomicrmw_nand_i64: |
17279 | case Intrinsic::riscv_masked_atomicrmw_max_i64: |
17280 | case Intrinsic::riscv_masked_atomicrmw_min_i64: |
17281 | case Intrinsic::riscv_masked_atomicrmw_umax_i64: |
17282 | case Intrinsic::riscv_masked_atomicrmw_umin_i64: |
17283 | case Intrinsic::riscv_masked_cmpxchg_i64: |
17284 | // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated |
17285 | // narrow atomic operation. These are implemented using atomic |
17286 | // operations at the minimum supported atomicrmw/cmpxchg width whose |
17287 | // result is then sign extended to XLEN. With +A, the minimum width is |
17288 | // 32 for both 64 and 32. |
17289 | assert(Subtarget.getXLen() == 64); |
17290 | assert(getMinCmpXchgSizeInBits() == 32); |
17291 | assert(Subtarget.hasStdExtA()); |
17292 | return 33; |
17293 | } |
17294 | break; |
17295 | } |
17296 | } |
17297 | |
17298 | return 1; |
17299 | } |
17300 | |
17301 | bool RISCVTargetLowering::canCreateUndefOrPoisonForTargetNode( |
17302 | SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, |
17303 | bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const { |
17304 | |
17305 | // TODO: Add more target nodes. |
17306 | switch (Op.getOpcode()) { |
17307 | case RISCVISD::SELECT_CC: |
17308 | // Integer select_cc cannot create poison. |
17309 | // TODO: What are the FP poison semantics? |
17310 | // TODO: This instruction blocks poison from the unselected operand, can |
17311 | // we do anything with that? |
17312 | return !Op.getValueType().isInteger(); |
17313 | } |
17314 | return TargetLowering::canCreateUndefOrPoisonForTargetNode( |
17315 | Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth); |
17316 | } |
17317 | |
17318 | const Constant * |
17319 | RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode *Ld) const { |
17320 | assert(Ld && "Unexpected null LoadSDNode" ); |
17321 | if (!ISD::isNormalLoad(N: Ld)) |
17322 | return nullptr; |
17323 | |
17324 | SDValue Ptr = Ld->getBasePtr(); |
17325 | |
17326 | // Only constant pools with no offset are supported. |
17327 | auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * { |
17328 | auto *CNode = dyn_cast<ConstantPoolSDNode>(Val&: Ptr); |
17329 | if (!CNode || CNode->isMachineConstantPoolEntry() || |
17330 | CNode->getOffset() != 0) |
17331 | return nullptr; |
17332 | |
17333 | return CNode; |
17334 | }; |
17335 | |
17336 | // Simple case, LLA. |
17337 | if (Ptr.getOpcode() == RISCVISD::LLA) { |
17338 | auto *CNode = GetSupportedConstantPool(Ptr); |
17339 | if (!CNode || CNode->getTargetFlags() != 0) |
17340 | return nullptr; |
17341 | |
17342 | return CNode->getConstVal(); |
17343 | } |
17344 | |
17345 | // Look for a HI and ADD_LO pair. |
17346 | if (Ptr.getOpcode() != RISCVISD::ADD_LO || |
17347 | Ptr.getOperand(i: 0).getOpcode() != RISCVISD::HI) |
17348 | return nullptr; |
17349 | |
17350 | auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(i: 1)); |
17351 | auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(i: 0).getOperand(i: 0)); |
17352 | |
17353 | if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO || |
17354 | !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI) |
17355 | return nullptr; |
17356 | |
17357 | if (CNodeLo->getConstVal() != CNodeHi->getConstVal()) |
17358 | return nullptr; |
17359 | |
17360 | return CNodeLo->getConstVal(); |
17361 | } |
17362 | |
17363 | static MachineBasicBlock *emitReadCounterWidePseudo(MachineInstr &MI, |
17364 | MachineBasicBlock *BB) { |
17365 | assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction" ); |
17366 | |
17367 | // To read a 64-bit counter CSR on a 32-bit target, we read the two halves. |
17368 | // Should the count have wrapped while it was being read, we need to try |
17369 | // again. |
17370 | // For example: |
17371 | // ``` |
17372 | // read: |
17373 | // csrrs x3, counterh # load high word of counter |
17374 | // csrrs x2, counter # load low word of counter |
17375 | // csrrs x4, counterh # load high word of counter |
17376 | // bne x3, x4, read # check if high word reads match, otherwise try again |
17377 | // ``` |
17378 | |
17379 | MachineFunction &MF = *BB->getParent(); |
17380 | const BasicBlock *LLVMBB = BB->getBasicBlock(); |
17381 | MachineFunction::iterator It = ++BB->getIterator(); |
17382 | |
17383 | MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(BB: LLVMBB); |
17384 | MF.insert(MBBI: It, MBB: LoopMBB); |
17385 | |
17386 | MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(BB: LLVMBB); |
17387 | MF.insert(MBBI: It, MBB: DoneMBB); |
17388 | |
17389 | // Transfer the remainder of BB and its successor edges to DoneMBB. |
17390 | DoneMBB->splice(Where: DoneMBB->begin(), Other: BB, |
17391 | From: std::next(x: MachineBasicBlock::iterator(MI)), To: BB->end()); |
17392 | DoneMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB); |
17393 | |
17394 | BB->addSuccessor(Succ: LoopMBB); |
17395 | |
17396 | MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
17397 | Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); |
17398 | Register LoReg = MI.getOperand(i: 0).getReg(); |
17399 | Register HiReg = MI.getOperand(i: 1).getReg(); |
17400 | int64_t LoCounter = MI.getOperand(i: 2).getImm(); |
17401 | int64_t HiCounter = MI.getOperand(i: 3).getImm(); |
17402 | DebugLoc DL = MI.getDebugLoc(); |
17403 | |
17404 | const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); |
17405 | BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) |
17406 | .addImm(HiCounter) |
17407 | .addReg(RISCV::X0); |
17408 | BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) |
17409 | .addImm(LoCounter) |
17410 | .addReg(RISCV::X0); |
17411 | BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) |
17412 | .addImm(HiCounter) |
17413 | .addReg(RISCV::X0); |
17414 | |
17415 | BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) |
17416 | .addReg(HiReg) |
17417 | .addReg(ReadAgainReg) |
17418 | .addMBB(LoopMBB); |
17419 | |
17420 | LoopMBB->addSuccessor(Succ: LoopMBB); |
17421 | LoopMBB->addSuccessor(Succ: DoneMBB); |
17422 | |
17423 | MI.eraseFromParent(); |
17424 | |
17425 | return DoneMBB; |
17426 | } |
17427 | |
17428 | static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, |
17429 | MachineBasicBlock *BB, |
17430 | const RISCVSubtarget &Subtarget) { |
17431 | assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction" ); |
17432 | |
17433 | MachineFunction &MF = *BB->getParent(); |
17434 | DebugLoc DL = MI.getDebugLoc(); |
17435 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
17436 | const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); |
17437 | Register LoReg = MI.getOperand(i: 0).getReg(); |
17438 | Register HiReg = MI.getOperand(i: 1).getReg(); |
17439 | Register SrcReg = MI.getOperand(i: 2).getReg(); |
17440 | |
17441 | const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; |
17442 | int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); |
17443 | |
17444 | TII.storeRegToStackSlot(MBB&: *BB, MI, SrcReg, isKill: MI.getOperand(i: 2).isKill(), FrameIndex: FI, RC: SrcRC, |
17445 | TRI: RI, VReg: Register()); |
17446 | MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); |
17447 | MachineMemOperand *MMOLo = |
17448 | MF.getMachineMemOperand(PtrInfo: MPI, F: MachineMemOperand::MOLoad, Size: 4, BaseAlignment: Align(8)); |
17449 | MachineMemOperand *MMOHi = MF.getMachineMemOperand( |
17450 | PtrInfo: MPI.getWithOffset(O: 4), F: MachineMemOperand::MOLoad, Size: 4, BaseAlignment: Align(8)); |
17451 | BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) |
17452 | .addFrameIndex(FI) |
17453 | .addImm(0) |
17454 | .addMemOperand(MMOLo); |
17455 | BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) |
17456 | .addFrameIndex(FI) |
17457 | .addImm(4) |
17458 | .addMemOperand(MMOHi); |
17459 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
17460 | return BB; |
17461 | } |
17462 | |
17463 | static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, |
17464 | MachineBasicBlock *BB, |
17465 | const RISCVSubtarget &Subtarget) { |
17466 | assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && |
17467 | "Unexpected instruction" ); |
17468 | |
17469 | MachineFunction &MF = *BB->getParent(); |
17470 | DebugLoc DL = MI.getDebugLoc(); |
17471 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
17472 | const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); |
17473 | Register DstReg = MI.getOperand(i: 0).getReg(); |
17474 | Register LoReg = MI.getOperand(i: 1).getReg(); |
17475 | Register HiReg = MI.getOperand(i: 2).getReg(); |
17476 | |
17477 | const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; |
17478 | int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); |
17479 | |
17480 | MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); |
17481 | MachineMemOperand *MMOLo = |
17482 | MF.getMachineMemOperand(PtrInfo: MPI, F: MachineMemOperand::MOStore, Size: 4, BaseAlignment: Align(8)); |
17483 | MachineMemOperand *MMOHi = MF.getMachineMemOperand( |
17484 | PtrInfo: MPI.getWithOffset(O: 4), F: MachineMemOperand::MOStore, Size: 4, BaseAlignment: Align(8)); |
17485 | BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) |
17486 | .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) |
17487 | .addFrameIndex(FI) |
17488 | .addImm(0) |
17489 | .addMemOperand(MMOLo); |
17490 | BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) |
17491 | .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) |
17492 | .addFrameIndex(FI) |
17493 | .addImm(4) |
17494 | .addMemOperand(MMOHi); |
17495 | TII.loadRegFromStackSlot(MBB&: *BB, MI, DestReg: DstReg, FrameIndex: FI, RC: DstRC, TRI: RI, VReg: Register()); |
17496 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
17497 | return BB; |
17498 | } |
17499 | |
17500 | static bool isSelectPseudo(MachineInstr &MI) { |
17501 | switch (MI.getOpcode()) { |
17502 | default: |
17503 | return false; |
17504 | case RISCV::Select_GPR_Using_CC_GPR: |
17505 | case RISCV::Select_FPR16_Using_CC_GPR: |
17506 | case RISCV::Select_FPR16INX_Using_CC_GPR: |
17507 | case RISCV::Select_FPR32_Using_CC_GPR: |
17508 | case RISCV::Select_FPR32INX_Using_CC_GPR: |
17509 | case RISCV::Select_FPR64_Using_CC_GPR: |
17510 | case RISCV::Select_FPR64INX_Using_CC_GPR: |
17511 | case RISCV::Select_FPR64IN32X_Using_CC_GPR: |
17512 | return true; |
17513 | } |
17514 | } |
17515 | |
17516 | static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, |
17517 | unsigned RelOpcode, unsigned EqOpcode, |
17518 | const RISCVSubtarget &Subtarget) { |
17519 | DebugLoc DL = MI.getDebugLoc(); |
17520 | Register DstReg = MI.getOperand(i: 0).getReg(); |
17521 | Register Src1Reg = MI.getOperand(i: 1).getReg(); |
17522 | Register Src2Reg = MI.getOperand(i: 2).getReg(); |
17523 | MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); |
17524 | Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass); |
17525 | const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); |
17526 | |
17527 | // Save the current FFLAGS. |
17528 | BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags); |
17529 | |
17530 | auto MIB = BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: RelOpcode), DestReg: DstReg) |
17531 | .addReg(RegNo: Src1Reg) |
17532 | .addReg(RegNo: Src2Reg); |
17533 | if (MI.getFlag(Flag: MachineInstr::MIFlag::NoFPExcept)) |
17534 | MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); |
17535 | |
17536 | // Restore the FFLAGS. |
17537 | BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS)) |
17538 | .addReg(SavedFFlags, RegState::Kill); |
17539 | |
17540 | // Issue a dummy FEQ opcode to raise exception for signaling NaNs. |
17541 | auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0) |
17542 | .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill())) |
17543 | .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill())); |
17544 | if (MI.getFlag(Flag: MachineInstr::MIFlag::NoFPExcept)) |
17545 | MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept); |
17546 | |
17547 | // Erase the pseudoinstruction. |
17548 | MI.eraseFromParent(); |
17549 | return BB; |
17550 | } |
17551 | |
17552 | static MachineBasicBlock * |
17553 | EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, |
17554 | MachineBasicBlock *ThisMBB, |
17555 | const RISCVSubtarget &Subtarget) { |
17556 | // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5) |
17557 | // Without this, custom-inserter would have generated: |
17558 | // |
17559 | // A |
17560 | // | \ |
17561 | // | B |
17562 | // | / |
17563 | // C |
17564 | // | \ |
17565 | // | D |
17566 | // | / |
17567 | // E |
17568 | // |
17569 | // A: X = ...; Y = ... |
17570 | // B: empty |
17571 | // C: Z = PHI [X, A], [Y, B] |
17572 | // D: empty |
17573 | // E: PHI [X, C], [Z, D] |
17574 | // |
17575 | // If we lower both Select_FPRX_ in a single step, we can instead generate: |
17576 | // |
17577 | // A |
17578 | // | \ |
17579 | // | C |
17580 | // | /| |
17581 | // |/ | |
17582 | // | | |
17583 | // | D |
17584 | // | / |
17585 | // E |
17586 | // |
17587 | // A: X = ...; Y = ... |
17588 | // D: empty |
17589 | // E: PHI [X, A], [X, C], [Y, D] |
17590 | |
17591 | const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); |
17592 | const DebugLoc &DL = First.getDebugLoc(); |
17593 | const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); |
17594 | MachineFunction *F = ThisMBB->getParent(); |
17595 | MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
17596 | MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
17597 | MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
17598 | MachineFunction::iterator It = ++ThisMBB->getIterator(); |
17599 | F->insert(MBBI: It, MBB: FirstMBB); |
17600 | F->insert(MBBI: It, MBB: SecondMBB); |
17601 | F->insert(MBBI: It, MBB: SinkMBB); |
17602 | |
17603 | // Transfer the remainder of ThisMBB and its successor edges to SinkMBB. |
17604 | SinkMBB->splice(Where: SinkMBB->begin(), Other: ThisMBB, |
17605 | From: std::next(x: MachineBasicBlock::iterator(First)), |
17606 | To: ThisMBB->end()); |
17607 | SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: ThisMBB); |
17608 | |
17609 | // Fallthrough block for ThisMBB. |
17610 | ThisMBB->addSuccessor(Succ: FirstMBB); |
17611 | // Fallthrough block for FirstMBB. |
17612 | FirstMBB->addSuccessor(Succ: SecondMBB); |
17613 | ThisMBB->addSuccessor(Succ: SinkMBB); |
17614 | FirstMBB->addSuccessor(Succ: SinkMBB); |
17615 | // This is fallthrough. |
17616 | SecondMBB->addSuccessor(Succ: SinkMBB); |
17617 | |
17618 | auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(i: 3).getImm()); |
17619 | Register FLHS = First.getOperand(i: 1).getReg(); |
17620 | Register FRHS = First.getOperand(i: 2).getReg(); |
17621 | // Insert appropriate branch. |
17622 | BuildMI(BB: FirstMBB, MIMD: DL, MCID: TII.getBrCond(CC: FirstCC)) |
17623 | .addReg(RegNo: FLHS) |
17624 | .addReg(RegNo: FRHS) |
17625 | .addMBB(MBB: SinkMBB); |
17626 | |
17627 | Register SLHS = Second.getOperand(i: 1).getReg(); |
17628 | Register SRHS = Second.getOperand(i: 2).getReg(); |
17629 | Register Op1Reg4 = First.getOperand(i: 4).getReg(); |
17630 | Register Op1Reg5 = First.getOperand(i: 5).getReg(); |
17631 | |
17632 | auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(i: 3).getImm()); |
17633 | // Insert appropriate branch. |
17634 | BuildMI(BB: ThisMBB, MIMD: DL, MCID: TII.getBrCond(CC: SecondCC)) |
17635 | .addReg(RegNo: SLHS) |
17636 | .addReg(RegNo: SRHS) |
17637 | .addMBB(MBB: SinkMBB); |
17638 | |
17639 | Register DestReg = Second.getOperand(i: 0).getReg(); |
17640 | Register Op2Reg4 = Second.getOperand(i: 4).getReg(); |
17641 | BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg) |
17642 | .addReg(Op2Reg4) |
17643 | .addMBB(ThisMBB) |
17644 | .addReg(Op1Reg4) |
17645 | .addMBB(FirstMBB) |
17646 | .addReg(Op1Reg5) |
17647 | .addMBB(SecondMBB); |
17648 | |
17649 | // Now remove the Select_FPRX_s. |
17650 | First.eraseFromParent(); |
17651 | Second.eraseFromParent(); |
17652 | return SinkMBB; |
17653 | } |
17654 | |
17655 | static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, |
17656 | MachineBasicBlock *BB, |
17657 | const RISCVSubtarget &Subtarget) { |
17658 | // To "insert" Select_* instructions, we actually have to insert the triangle |
17659 | // control-flow pattern. The incoming instructions know the destination vreg |
17660 | // to set, the condition code register to branch on, the true/false values to |
17661 | // select between, and the condcode to use to select the appropriate branch. |
17662 | // |
17663 | // We produce the following control flow: |
17664 | // HeadMBB |
17665 | // | \ |
17666 | // | IfFalseMBB |
17667 | // | / |
17668 | // TailMBB |
17669 | // |
17670 | // When we find a sequence of selects we attempt to optimize their emission |
17671 | // by sharing the control flow. Currently we only handle cases where we have |
17672 | // multiple selects with the exact same condition (same LHS, RHS and CC). |
17673 | // The selects may be interleaved with other instructions if the other |
17674 | // instructions meet some requirements we deem safe: |
17675 | // - They are not pseudo instructions. |
17676 | // - They are debug instructions. Otherwise, |
17677 | // - They do not have side-effects, do not access memory and their inputs do |
17678 | // not depend on the results of the select pseudo-instructions. |
17679 | // The TrueV/FalseV operands of the selects cannot depend on the result of |
17680 | // previous selects in the sequence. |
17681 | // These conditions could be further relaxed. See the X86 target for a |
17682 | // related approach and more information. |
17683 | // |
17684 | // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)) |
17685 | // is checked here and handled by a separate function - |
17686 | // EmitLoweredCascadedSelect. |
17687 | Register LHS = MI.getOperand(i: 1).getReg(); |
17688 | Register RHS = MI.getOperand(i: 2).getReg(); |
17689 | auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(i: 3).getImm()); |
17690 | |
17691 | SmallVector<MachineInstr *, 4> SelectDebugValues; |
17692 | SmallSet<Register, 4> SelectDests; |
17693 | SelectDests.insert(V: MI.getOperand(i: 0).getReg()); |
17694 | |
17695 | MachineInstr *LastSelectPseudo = &MI; |
17696 | auto Next = next_nodbg(It: MI.getIterator(), End: BB->instr_end()); |
17697 | if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() && |
17698 | Next->getOpcode() == MI.getOpcode() && |
17699 | Next->getOperand(5).getReg() == MI.getOperand(0).getReg() && |
17700 | Next->getOperand(5).isKill()) { |
17701 | return EmitLoweredCascadedSelect(First&: MI, Second&: *Next, ThisMBB: BB, Subtarget); |
17702 | } |
17703 | |
17704 | for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); |
17705 | SequenceMBBI != E; ++SequenceMBBI) { |
17706 | if (SequenceMBBI->isDebugInstr()) |
17707 | continue; |
17708 | if (isSelectPseudo(MI&: *SequenceMBBI)) { |
17709 | if (SequenceMBBI->getOperand(i: 1).getReg() != LHS || |
17710 | SequenceMBBI->getOperand(i: 2).getReg() != RHS || |
17711 | SequenceMBBI->getOperand(i: 3).getImm() != CC || |
17712 | SelectDests.count(V: SequenceMBBI->getOperand(i: 4).getReg()) || |
17713 | SelectDests.count(V: SequenceMBBI->getOperand(i: 5).getReg())) |
17714 | break; |
17715 | LastSelectPseudo = &*SequenceMBBI; |
17716 | SequenceMBBI->collectDebugValues(DbgValues&: SelectDebugValues); |
17717 | SelectDests.insert(V: SequenceMBBI->getOperand(i: 0).getReg()); |
17718 | continue; |
17719 | } |
17720 | if (SequenceMBBI->hasUnmodeledSideEffects() || |
17721 | SequenceMBBI->mayLoadOrStore() || |
17722 | SequenceMBBI->usesCustomInsertionHook()) |
17723 | break; |
17724 | if (llvm::any_of(Range: SequenceMBBI->operands(), P: [&](MachineOperand &MO) { |
17725 | return MO.isReg() && MO.isUse() && SelectDests.count(V: MO.getReg()); |
17726 | })) |
17727 | break; |
17728 | } |
17729 | |
17730 | const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); |
17731 | const BasicBlock *LLVM_BB = BB->getBasicBlock(); |
17732 | DebugLoc DL = MI.getDebugLoc(); |
17733 | MachineFunction::iterator I = ++BB->getIterator(); |
17734 | |
17735 | MachineBasicBlock *HeadMBB = BB; |
17736 | MachineFunction *F = BB->getParent(); |
17737 | MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
17738 | MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
17739 | |
17740 | F->insert(MBBI: I, MBB: IfFalseMBB); |
17741 | F->insert(MBBI: I, MBB: TailMBB); |
17742 | |
17743 | // Transfer debug instructions associated with the selects to TailMBB. |
17744 | for (MachineInstr *DebugInstr : SelectDebugValues) { |
17745 | TailMBB->push_back(MI: DebugInstr->removeFromParent()); |
17746 | } |
17747 | |
17748 | // Move all instructions after the sequence to TailMBB. |
17749 | TailMBB->splice(Where: TailMBB->end(), Other: HeadMBB, |
17750 | From: std::next(x: LastSelectPseudo->getIterator()), To: HeadMBB->end()); |
17751 | // Update machine-CFG edges by transferring all successors of the current |
17752 | // block to the new block which will contain the Phi nodes for the selects. |
17753 | TailMBB->transferSuccessorsAndUpdatePHIs(FromMBB: HeadMBB); |
17754 | // Set the successors for HeadMBB. |
17755 | HeadMBB->addSuccessor(Succ: IfFalseMBB); |
17756 | HeadMBB->addSuccessor(Succ: TailMBB); |
17757 | |
17758 | // Insert appropriate branch. |
17759 | BuildMI(BB: HeadMBB, MIMD: DL, MCID: TII.getBrCond(CC)) |
17760 | .addReg(RegNo: LHS) |
17761 | .addReg(RegNo: RHS) |
17762 | .addMBB(MBB: TailMBB); |
17763 | |
17764 | // IfFalseMBB just falls through to TailMBB. |
17765 | IfFalseMBB->addSuccessor(Succ: TailMBB); |
17766 | |
17767 | // Create PHIs for all of the select pseudo-instructions. |
17768 | auto SelectMBBI = MI.getIterator(); |
17769 | auto SelectEnd = std::next(x: LastSelectPseudo->getIterator()); |
17770 | auto InsertionPoint = TailMBB->begin(); |
17771 | while (SelectMBBI != SelectEnd) { |
17772 | auto Next = std::next(x: SelectMBBI); |
17773 | if (isSelectPseudo(MI&: *SelectMBBI)) { |
17774 | // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] |
17775 | BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), |
17776 | TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) |
17777 | .addReg(SelectMBBI->getOperand(4).getReg()) |
17778 | .addMBB(HeadMBB) |
17779 | .addReg(SelectMBBI->getOperand(5).getReg()) |
17780 | .addMBB(IfFalseMBB); |
17781 | SelectMBBI->eraseFromParent(); |
17782 | } |
17783 | SelectMBBI = Next; |
17784 | } |
17785 | |
17786 | F->getProperties().reset(P: MachineFunctionProperties::Property::NoPHIs); |
17787 | return TailMBB; |
17788 | } |
17789 | |
17790 | // Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW. |
17791 | static const RISCV::RISCVMaskedPseudoInfo * |
17792 | lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) { |
17793 | const RISCVVInversePseudosTable::PseudoInfo *Inverse = |
17794 | RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW); |
17795 | assert(Inverse && "Unexpected LMUL and SEW pair for instruction" ); |
17796 | const RISCV::RISCVMaskedPseudoInfo *Masked = |
17797 | RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo); |
17798 | assert(Masked && "Could not find masked instruction for LMUL and SEW pair" ); |
17799 | return Masked; |
17800 | } |
17801 | |
17802 | static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, |
17803 | MachineBasicBlock *BB, |
17804 | unsigned CVTXOpc) { |
17805 | DebugLoc DL = MI.getDebugLoc(); |
17806 | |
17807 | const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); |
17808 | |
17809 | MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); |
17810 | Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass); |
17811 | |
17812 | // Save the old value of FFLAGS. |
17813 | BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS); |
17814 | |
17815 | assert(MI.getNumOperands() == 7); |
17816 | |
17817 | // Emit a VFCVT_X_F |
17818 | const TargetRegisterInfo *TRI = |
17819 | BB->getParent()->getSubtarget().getRegisterInfo(); |
17820 | const TargetRegisterClass *RC = MI.getRegClassConstraint(OpIdx: 0, TII: &TII, TRI); |
17821 | Register Tmp = MRI.createVirtualRegister(RegClass: RC); |
17822 | BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp) |
17823 | .add(MI.getOperand(1)) |
17824 | .add(MI.getOperand(2)) |
17825 | .add(MI.getOperand(3)) |
17826 | .add(MachineOperand::CreateImm(7)) // frm = DYN |
17827 | .add(MI.getOperand(4)) |
17828 | .add(MI.getOperand(5)) |
17829 | .add(MI.getOperand(6)) |
17830 | .add(MachineOperand::CreateReg(RISCV::FRM, |
17831 | /*IsDef*/ false, |
17832 | /*IsImp*/ true)); |
17833 | |
17834 | // Emit a VFCVT_F_X |
17835 | RISCVII::VLMUL LMul = RISCVII::getLMul(TSFlags: MI.getDesc().TSFlags); |
17836 | unsigned Log2SEW = MI.getOperand(i: RISCVII::getSEWOpNum(Desc: MI.getDesc())).getImm(); |
17837 | // There is no E8 variant for VFCVT_F_X. |
17838 | assert(Log2SEW >= 4); |
17839 | unsigned CVTFOpc = |
17840 | lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW) |
17841 | ->MaskedPseudo; |
17842 | |
17843 | BuildMI(*BB, MI, DL, TII.get(CVTFOpc)) |
17844 | .add(MI.getOperand(0)) |
17845 | .add(MI.getOperand(1)) |
17846 | .addReg(Tmp) |
17847 | .add(MI.getOperand(3)) |
17848 | .add(MachineOperand::CreateImm(7)) // frm = DYN |
17849 | .add(MI.getOperand(4)) |
17850 | .add(MI.getOperand(5)) |
17851 | .add(MI.getOperand(6)) |
17852 | .add(MachineOperand::CreateReg(RISCV::FRM, |
17853 | /*IsDef*/ false, |
17854 | /*IsImp*/ true)); |
17855 | |
17856 | // Restore FFLAGS. |
17857 | BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS)) |
17858 | .addReg(SavedFFLAGS, RegState::Kill); |
17859 | |
17860 | // Erase the pseudoinstruction. |
17861 | MI.eraseFromParent(); |
17862 | return BB; |
17863 | } |
17864 | |
17865 | static MachineBasicBlock *emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, |
17866 | const RISCVSubtarget &Subtarget) { |
17867 | unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc; |
17868 | const TargetRegisterClass *RC; |
17869 | switch (MI.getOpcode()) { |
17870 | default: |
17871 | llvm_unreachable("Unexpected opcode" ); |
17872 | case RISCV::PseudoFROUND_H: |
17873 | CmpOpc = RISCV::FLT_H; |
17874 | F2IOpc = RISCV::FCVT_W_H; |
17875 | I2FOpc = RISCV::FCVT_H_W; |
17876 | FSGNJOpc = RISCV::FSGNJ_H; |
17877 | FSGNJXOpc = RISCV::FSGNJX_H; |
17878 | RC = &RISCV::FPR16RegClass; |
17879 | break; |
17880 | case RISCV::PseudoFROUND_H_INX: |
17881 | CmpOpc = RISCV::FLT_H_INX; |
17882 | F2IOpc = RISCV::FCVT_W_H_INX; |
17883 | I2FOpc = RISCV::FCVT_H_W_INX; |
17884 | FSGNJOpc = RISCV::FSGNJ_H_INX; |
17885 | FSGNJXOpc = RISCV::FSGNJX_H_INX; |
17886 | RC = &RISCV::GPRF16RegClass; |
17887 | break; |
17888 | case RISCV::PseudoFROUND_S: |
17889 | CmpOpc = RISCV::FLT_S; |
17890 | F2IOpc = RISCV::FCVT_W_S; |
17891 | I2FOpc = RISCV::FCVT_S_W; |
17892 | FSGNJOpc = RISCV::FSGNJ_S; |
17893 | FSGNJXOpc = RISCV::FSGNJX_S; |
17894 | RC = &RISCV::FPR32RegClass; |
17895 | break; |
17896 | case RISCV::PseudoFROUND_S_INX: |
17897 | CmpOpc = RISCV::FLT_S_INX; |
17898 | F2IOpc = RISCV::FCVT_W_S_INX; |
17899 | I2FOpc = RISCV::FCVT_S_W_INX; |
17900 | FSGNJOpc = RISCV::FSGNJ_S_INX; |
17901 | FSGNJXOpc = RISCV::FSGNJX_S_INX; |
17902 | RC = &RISCV::GPRF32RegClass; |
17903 | break; |
17904 | case RISCV::PseudoFROUND_D: |
17905 | assert(Subtarget.is64Bit() && "Expected 64-bit GPR." ); |
17906 | CmpOpc = RISCV::FLT_D; |
17907 | F2IOpc = RISCV::FCVT_L_D; |
17908 | I2FOpc = RISCV::FCVT_D_L; |
17909 | FSGNJOpc = RISCV::FSGNJ_D; |
17910 | FSGNJXOpc = RISCV::FSGNJX_D; |
17911 | RC = &RISCV::FPR64RegClass; |
17912 | break; |
17913 | case RISCV::PseudoFROUND_D_INX: |
17914 | assert(Subtarget.is64Bit() && "Expected 64-bit GPR." ); |
17915 | CmpOpc = RISCV::FLT_D_INX; |
17916 | F2IOpc = RISCV::FCVT_L_D_INX; |
17917 | I2FOpc = RISCV::FCVT_D_L_INX; |
17918 | FSGNJOpc = RISCV::FSGNJ_D_INX; |
17919 | FSGNJXOpc = RISCV::FSGNJX_D_INX; |
17920 | RC = &RISCV::GPRRegClass; |
17921 | break; |
17922 | } |
17923 | |
17924 | const BasicBlock *BB = MBB->getBasicBlock(); |
17925 | DebugLoc DL = MI.getDebugLoc(); |
17926 | MachineFunction::iterator I = ++MBB->getIterator(); |
17927 | |
17928 | MachineFunction *F = MBB->getParent(); |
17929 | MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB); |
17930 | MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB); |
17931 | |
17932 | F->insert(MBBI: I, MBB: CvtMBB); |
17933 | F->insert(MBBI: I, MBB: DoneMBB); |
17934 | // Move all instructions after the sequence to DoneMBB. |
17935 | DoneMBB->splice(Where: DoneMBB->end(), Other: MBB, From: MachineBasicBlock::iterator(MI), |
17936 | To: MBB->end()); |
17937 | // Update machine-CFG edges by transferring all successors of the current |
17938 | // block to the new block which will contain the Phi nodes for the selects. |
17939 | DoneMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB); |
17940 | // Set the successors for MBB. |
17941 | MBB->addSuccessor(Succ: CvtMBB); |
17942 | MBB->addSuccessor(Succ: DoneMBB); |
17943 | |
17944 | Register DstReg = MI.getOperand(i: 0).getReg(); |
17945 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
17946 | Register MaxReg = MI.getOperand(i: 2).getReg(); |
17947 | int64_t FRM = MI.getOperand(i: 3).getImm(); |
17948 | |
17949 | const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); |
17950 | MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); |
17951 | |
17952 | Register FabsReg = MRI.createVirtualRegister(RegClass: RC); |
17953 | BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg); |
17954 | |
17955 | // Compare the FP value to the max value. |
17956 | Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); |
17957 | auto MIB = |
17958 | BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg); |
17959 | if (MI.getFlag(Flag: MachineInstr::MIFlag::NoFPExcept)) |
17960 | MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); |
17961 | |
17962 | // Insert branch. |
17963 | BuildMI(MBB, DL, TII.get(RISCV::BEQ)) |
17964 | .addReg(CmpReg) |
17965 | .addReg(RISCV::X0) |
17966 | .addMBB(DoneMBB); |
17967 | |
17968 | CvtMBB->addSuccessor(Succ: DoneMBB); |
17969 | |
17970 | // Convert to integer. |
17971 | Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); |
17972 | MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM); |
17973 | if (MI.getFlag(Flag: MachineInstr::MIFlag::NoFPExcept)) |
17974 | MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); |
17975 | |
17976 | // Convert back to FP. |
17977 | Register I2FReg = MRI.createVirtualRegister(RegClass: RC); |
17978 | MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM); |
17979 | if (MI.getFlag(Flag: MachineInstr::MIFlag::NoFPExcept)) |
17980 | MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); |
17981 | |
17982 | // Restore the sign bit. |
17983 | Register CvtReg = MRI.createVirtualRegister(RegClass: RC); |
17984 | BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg); |
17985 | |
17986 | // Merge the results. |
17987 | BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg) |
17988 | .addReg(SrcReg) |
17989 | .addMBB(MBB) |
17990 | .addReg(CvtReg) |
17991 | .addMBB(CvtMBB); |
17992 | |
17993 | MI.eraseFromParent(); |
17994 | return DoneMBB; |
17995 | } |
17996 | |
17997 | MachineBasicBlock * |
17998 | RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, |
17999 | MachineBasicBlock *BB) const { |
18000 | switch (MI.getOpcode()) { |
18001 | default: |
18002 | llvm_unreachable("Unexpected instr type to insert" ); |
18003 | case RISCV::ReadCounterWide: |
18004 | assert(!Subtarget.is64Bit() && |
18005 | "ReadCounterWide is only to be used on riscv32" ); |
18006 | return emitReadCounterWidePseudo(MI, BB); |
18007 | case RISCV::Select_GPR_Using_CC_GPR: |
18008 | case RISCV::Select_FPR16_Using_CC_GPR: |
18009 | case RISCV::Select_FPR16INX_Using_CC_GPR: |
18010 | case RISCV::Select_FPR32_Using_CC_GPR: |
18011 | case RISCV::Select_FPR32INX_Using_CC_GPR: |
18012 | case RISCV::Select_FPR64_Using_CC_GPR: |
18013 | case RISCV::Select_FPR64INX_Using_CC_GPR: |
18014 | case RISCV::Select_FPR64IN32X_Using_CC_GPR: |
18015 | return emitSelectPseudo(MI, BB, Subtarget); |
18016 | case RISCV::BuildPairF64Pseudo: |
18017 | return emitBuildPairF64Pseudo(MI, BB, Subtarget); |
18018 | case RISCV::SplitF64Pseudo: |
18019 | return emitSplitF64Pseudo(MI, BB, Subtarget); |
18020 | case RISCV::PseudoQuietFLE_H: |
18021 | return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget); |
18022 | case RISCV::PseudoQuietFLE_H_INX: |
18023 | return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget); |
18024 | case RISCV::PseudoQuietFLT_H: |
18025 | return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget); |
18026 | case RISCV::PseudoQuietFLT_H_INX: |
18027 | return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget); |
18028 | case RISCV::PseudoQuietFLE_S: |
18029 | return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget); |
18030 | case RISCV::PseudoQuietFLE_S_INX: |
18031 | return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget); |
18032 | case RISCV::PseudoQuietFLT_S: |
18033 | return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget); |
18034 | case RISCV::PseudoQuietFLT_S_INX: |
18035 | return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget); |
18036 | case RISCV::PseudoQuietFLE_D: |
18037 | return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget); |
18038 | case RISCV::PseudoQuietFLE_D_INX: |
18039 | return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget); |
18040 | case RISCV::PseudoQuietFLE_D_IN32X: |
18041 | return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X, |
18042 | Subtarget); |
18043 | case RISCV::PseudoQuietFLT_D: |
18044 | return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget); |
18045 | case RISCV::PseudoQuietFLT_D_INX: |
18046 | return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget); |
18047 | case RISCV::PseudoQuietFLT_D_IN32X: |
18048 | return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X, |
18049 | Subtarget); |
18050 | |
18051 | case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK: |
18052 | return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK); |
18053 | case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK: |
18054 | return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK); |
18055 | case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK: |
18056 | return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK); |
18057 | case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK: |
18058 | return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK); |
18059 | case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK: |
18060 | return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK); |
18061 | case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK: |
18062 | return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK); |
18063 | case RISCV::PseudoFROUND_H: |
18064 | case RISCV::PseudoFROUND_H_INX: |
18065 | case RISCV::PseudoFROUND_S: |
18066 | case RISCV::PseudoFROUND_S_INX: |
18067 | case RISCV::PseudoFROUND_D: |
18068 | case RISCV::PseudoFROUND_D_INX: |
18069 | case RISCV::PseudoFROUND_D_IN32X: |
18070 | return emitFROUND(MI, MBB: BB, Subtarget); |
18071 | case TargetOpcode::STATEPOINT: |
18072 | // STATEPOINT is a pseudo instruction which has no implicit defs/uses |
18073 | // while jal call instruction (where statepoint will be lowered at the end) |
18074 | // has implicit def. This def is early-clobber as it will be set at |
18075 | // the moment of the call and earlier than any use is read. |
18076 | // Add this implicit dead def here as a workaround. |
18077 | MI.addOperand(*MI.getMF(), |
18078 | MachineOperand::CreateReg( |
18079 | RISCV::X1, /*isDef*/ true, |
18080 | /*isImp*/ true, /*isKill*/ false, /*isDead*/ true, |
18081 | /*isUndef*/ false, /*isEarlyClobber*/ true)); |
18082 | [[fallthrough]]; |
18083 | case TargetOpcode::STACKMAP: |
18084 | case TargetOpcode::PATCHPOINT: |
18085 | if (!Subtarget.is64Bit()) |
18086 | report_fatal_error(reason: "STACKMAP, PATCHPOINT and STATEPOINT are only " |
18087 | "supported on 64-bit targets" ); |
18088 | return emitPatchPoint(MI, MBB: BB); |
18089 | } |
18090 | } |
18091 | |
18092 | void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, |
18093 | SDNode *Node) const { |
18094 | // Add FRM dependency to any instructions with dynamic rounding mode. |
18095 | int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm); |
18096 | if (Idx < 0) { |
18097 | // Vector pseudos have FRM index indicated by TSFlags. |
18098 | Idx = RISCVII::getFRMOpNum(Desc: MI.getDesc()); |
18099 | if (Idx < 0) |
18100 | return; |
18101 | } |
18102 | if (MI.getOperand(i: Idx).getImm() != RISCVFPRndMode::DYN) |
18103 | return; |
18104 | // If the instruction already reads FRM, don't add another read. |
18105 | if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr)) |
18106 | return; |
18107 | MI.addOperand( |
18108 | MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true)); |
18109 | } |
18110 | |
18111 | // Calling Convention Implementation. |
18112 | // The expectations for frontend ABI lowering vary from target to target. |
18113 | // Ideally, an LLVM frontend would be able to avoid worrying about many ABI |
18114 | // details, but this is a longer term goal. For now, we simply try to keep the |
18115 | // role of the frontend as simple and well-defined as possible. The rules can |
18116 | // be summarised as: |
18117 | // * Never split up large scalar arguments. We handle them here. |
18118 | // * If a hardfloat calling convention is being used, and the struct may be |
18119 | // passed in a pair of registers (fp+fp, int+fp), and both registers are |
18120 | // available, then pass as two separate arguments. If either the GPRs or FPRs |
18121 | // are exhausted, then pass according to the rule below. |
18122 | // * If a struct could never be passed in registers or directly in a stack |
18123 | // slot (as it is larger than 2*XLEN and the floating point rules don't |
18124 | // apply), then pass it using a pointer with the byval attribute. |
18125 | // * If a struct is less than 2*XLEN, then coerce to either a two-element |
18126 | // word-sized array or a 2*XLEN scalar (depending on alignment). |
18127 | // * The frontend can determine whether a struct is returned by reference or |
18128 | // not based on its size and fields. If it will be returned by reference, the |
18129 | // frontend must modify the prototype so a pointer with the sret annotation is |
18130 | // passed as the first argument. This is not necessary for large scalar |
18131 | // returns. |
18132 | // * Struct return values and varargs should be coerced to structs containing |
18133 | // register-size fields in the same situations they would be for fixed |
18134 | // arguments. |
18135 | |
18136 | static const MCPhysReg ArgFPR16s[] = { |
18137 | RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, |
18138 | RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H |
18139 | }; |
18140 | static const MCPhysReg ArgFPR32s[] = { |
18141 | RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, |
18142 | RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F |
18143 | }; |
18144 | static const MCPhysReg ArgFPR64s[] = { |
18145 | RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, |
18146 | RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D |
18147 | }; |
18148 | // This is an interim calling convention and it may be changed in the future. |
18149 | static const MCPhysReg ArgVRs[] = { |
18150 | RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13, |
18151 | RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, |
18152 | RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23}; |
18153 | static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2, |
18154 | RISCV::V14M2, RISCV::V16M2, RISCV::V18M2, |
18155 | RISCV::V20M2, RISCV::V22M2}; |
18156 | static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, |
18157 | RISCV::V20M4}; |
18158 | static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; |
18159 | |
18160 | ArrayRef<MCPhysReg> RISCV::getArgGPRs(const RISCVABI::ABI ABI) { |
18161 | // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except |
18162 | // the ILP32E ABI. |
18163 | static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, |
18164 | RISCV::X13, RISCV::X14, RISCV::X15, |
18165 | RISCV::X16, RISCV::X17}; |
18166 | // The GPRs used for passing arguments in the ILP32E/ILP64E ABI. |
18167 | static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, |
18168 | RISCV::X13, RISCV::X14, RISCV::X15}; |
18169 | |
18170 | if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E) |
18171 | return ArrayRef(ArgEGPRs); |
18172 | |
18173 | return ArrayRef(ArgIGPRs); |
18174 | } |
18175 | |
18176 | static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) { |
18177 | // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used |
18178 | // for save-restore libcall, so we don't use them. |
18179 | static const MCPhysReg FastCCIGPRs[] = { |
18180 | RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, |
18181 | RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, |
18182 | RISCV::X29, RISCV::X30, RISCV::X31}; |
18183 | |
18184 | // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E. |
18185 | static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, |
18186 | RISCV::X13, RISCV::X14, RISCV::X15, |
18187 | RISCV::X7}; |
18188 | |
18189 | if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E) |
18190 | return ArrayRef(FastCCEGPRs); |
18191 | |
18192 | return ArrayRef(FastCCIGPRs); |
18193 | } |
18194 | |
18195 | // Pass a 2*XLEN argument that has been split into two XLEN values through |
18196 | // registers or the stack as necessary. |
18197 | static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, |
18198 | ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, |
18199 | MVT ValVT2, MVT LocVT2, |
18200 | ISD::ArgFlagsTy ArgFlags2, bool EABI) { |
18201 | unsigned XLenInBytes = XLen / 8; |
18202 | const RISCVSubtarget &STI = |
18203 | State.getMachineFunction().getSubtarget<RISCVSubtarget>(); |
18204 | ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI: STI.getTargetABI()); |
18205 | |
18206 | if (Register Reg = State.AllocateReg(Regs: ArgGPRs)) { |
18207 | // At least one half can be passed via register. |
18208 | State.addLoc(V: CCValAssign::getReg(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(), RegNo: Reg, |
18209 | LocVT: VA1.getLocVT(), HTP: CCValAssign::Full)); |
18210 | } else { |
18211 | // Both halves must be passed on the stack, with proper alignment. |
18212 | // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte |
18213 | // alignment. This behavior may be changed when RV32E/ILP32E is ratified. |
18214 | Align StackAlign(XLenInBytes); |
18215 | if (!EABI || XLen != 32) |
18216 | StackAlign = std::max(a: StackAlign, b: ArgFlags1.getNonZeroOrigAlign()); |
18217 | State.addLoc( |
18218 | V: CCValAssign::getMem(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(), |
18219 | Offset: State.AllocateStack(Size: XLenInBytes, Alignment: StackAlign), |
18220 | LocVT: VA1.getLocVT(), HTP: CCValAssign::Full)); |
18221 | State.addLoc(V: CCValAssign::getMem( |
18222 | ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: XLenInBytes, Alignment: Align(XLenInBytes)), |
18223 | LocVT: LocVT2, HTP: CCValAssign::Full)); |
18224 | return false; |
18225 | } |
18226 | |
18227 | if (Register Reg = State.AllocateReg(Regs: ArgGPRs)) { |
18228 | // The second half can also be passed via register. |
18229 | State.addLoc( |
18230 | V: CCValAssign::getReg(ValNo: ValNo2, ValVT: ValVT2, RegNo: Reg, LocVT: LocVT2, HTP: CCValAssign::Full)); |
18231 | } else { |
18232 | // The second half is passed via the stack, without additional alignment. |
18233 | State.addLoc(V: CCValAssign::getMem( |
18234 | ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: XLenInBytes, Alignment: Align(XLenInBytes)), |
18235 | LocVT: LocVT2, HTP: CCValAssign::Full)); |
18236 | } |
18237 | |
18238 | return false; |
18239 | } |
18240 | |
18241 | // Implements the RISC-V calling convention. Returns true upon failure. |
18242 | bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, |
18243 | MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, |
18244 | ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, |
18245 | bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, |
18246 | RVVArgDispatcher &RVVDispatcher) { |
18247 | unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); |
18248 | assert(XLen == 32 || XLen == 64); |
18249 | MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; |
18250 | |
18251 | // Static chain parameter must not be passed in normal argument registers, |
18252 | // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain |
18253 | if (ArgFlags.isNest()) { |
18254 | if (unsigned Reg = State.AllocateReg(RISCV::X7)) { |
18255 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
18256 | return false; |
18257 | } |
18258 | } |
18259 | |
18260 | // Any return value split in to more than two values can't be returned |
18261 | // directly. Vectors are returned via the available vector registers. |
18262 | if (!LocVT.isVector() && IsRet && ValNo > 1) |
18263 | return true; |
18264 | |
18265 | // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a |
18266 | // variadic argument, or if no F16/F32 argument registers are available. |
18267 | bool UseGPRForF16_F32 = true; |
18268 | // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a |
18269 | // variadic argument, or if no F64 argument registers are available. |
18270 | bool UseGPRForF64 = true; |
18271 | |
18272 | switch (ABI) { |
18273 | default: |
18274 | llvm_unreachable("Unexpected ABI" ); |
18275 | case RISCVABI::ABI_ILP32: |
18276 | case RISCVABI::ABI_ILP32E: |
18277 | case RISCVABI::ABI_LP64: |
18278 | case RISCVABI::ABI_LP64E: |
18279 | break; |
18280 | case RISCVABI::ABI_ILP32F: |
18281 | case RISCVABI::ABI_LP64F: |
18282 | UseGPRForF16_F32 = !IsFixed; |
18283 | break; |
18284 | case RISCVABI::ABI_ILP32D: |
18285 | case RISCVABI::ABI_LP64D: |
18286 | UseGPRForF16_F32 = !IsFixed; |
18287 | UseGPRForF64 = !IsFixed; |
18288 | break; |
18289 | } |
18290 | |
18291 | // FPR16, FPR32, and FPR64 alias each other. |
18292 | if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) { |
18293 | UseGPRForF16_F32 = true; |
18294 | UseGPRForF64 = true; |
18295 | } |
18296 | |
18297 | // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and |
18298 | // similar local variables rather than directly checking against the target |
18299 | // ABI. |
18300 | |
18301 | if (UseGPRForF16_F32 && |
18302 | (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) { |
18303 | LocVT = XLenVT; |
18304 | LocInfo = CCValAssign::BCvt; |
18305 | } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { |
18306 | LocVT = MVT::i64; |
18307 | LocInfo = CCValAssign::BCvt; |
18308 | } |
18309 | |
18310 | ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI); |
18311 | |
18312 | // If this is a variadic argument, the RISC-V calling convention requires |
18313 | // that it is assigned an 'even' or 'aligned' register if it has 8-byte |
18314 | // alignment (RV32) or 16-byte alignment (RV64). An aligned register should |
18315 | // be used regardless of whether the original argument was split during |
18316 | // legalisation or not. The argument will not be passed by registers if the |
18317 | // original type is larger than 2*XLEN, so the register alignment rule does |
18318 | // not apply. |
18319 | // TODO: To be compatible with GCC's behaviors, we don't align registers |
18320 | // currently if we are using ILP32E calling convention. This behavior may be |
18321 | // changed when RV32E/ILP32E is ratified. |
18322 | unsigned TwoXLenInBytes = (2 * XLen) / 8; |
18323 | if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && |
18324 | DL.getTypeAllocSize(Ty: OrigTy) == TwoXLenInBytes && |
18325 | ABI != RISCVABI::ABI_ILP32E) { |
18326 | unsigned RegIdx = State.getFirstUnallocated(Regs: ArgGPRs); |
18327 | // Skip 'odd' register if necessary. |
18328 | if (RegIdx != std::size(cont: ArgGPRs) && RegIdx % 2 == 1) |
18329 | State.AllocateReg(Regs: ArgGPRs); |
18330 | } |
18331 | |
18332 | SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); |
18333 | SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = |
18334 | State.getPendingArgFlags(); |
18335 | |
18336 | assert(PendingLocs.size() == PendingArgFlags.size() && |
18337 | "PendingLocs and PendingArgFlags out of sync" ); |
18338 | |
18339 | // Handle passing f64 on RV32D with a soft float ABI or when floating point |
18340 | // registers are exhausted. |
18341 | if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { |
18342 | assert(PendingLocs.empty() && "Can't lower f64 if it is split" ); |
18343 | // Depending on available argument GPRS, f64 may be passed in a pair of |
18344 | // GPRs, split between a GPR and the stack, or passed completely on the |
18345 | // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these |
18346 | // cases. |
18347 | Register Reg = State.AllocateReg(Regs: ArgGPRs); |
18348 | if (!Reg) { |
18349 | unsigned StackOffset = State.AllocateStack(Size: 8, Alignment: Align(8)); |
18350 | State.addLoc( |
18351 | V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo)); |
18352 | return false; |
18353 | } |
18354 | LocVT = MVT::i32; |
18355 | State.addLoc(V: CCValAssign::getCustomReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
18356 | Register HiReg = State.AllocateReg(Regs: ArgGPRs); |
18357 | if (HiReg) { |
18358 | State.addLoc( |
18359 | V: CCValAssign::getCustomReg(ValNo, ValVT, RegNo: HiReg, LocVT, HTP: LocInfo)); |
18360 | } else { |
18361 | unsigned StackOffset = State.AllocateStack(Size: 4, Alignment: Align(4)); |
18362 | State.addLoc( |
18363 | V: CCValAssign::getCustomMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo)); |
18364 | } |
18365 | return false; |
18366 | } |
18367 | |
18368 | // Fixed-length vectors are located in the corresponding scalable-vector |
18369 | // container types. |
18370 | if (ValVT.isFixedLengthVector()) |
18371 | LocVT = TLI.getContainerForFixedLengthVector(VT: LocVT); |
18372 | |
18373 | // Split arguments might be passed indirectly, so keep track of the pending |
18374 | // values. Split vectors are passed via a mix of registers and indirectly, so |
18375 | // treat them as we would any other argument. |
18376 | if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) { |
18377 | LocVT = XLenVT; |
18378 | LocInfo = CCValAssign::Indirect; |
18379 | PendingLocs.push_back( |
18380 | Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo)); |
18381 | PendingArgFlags.push_back(Elt: ArgFlags); |
18382 | if (!ArgFlags.isSplitEnd()) { |
18383 | return false; |
18384 | } |
18385 | } |
18386 | |
18387 | // If the split argument only had two elements, it should be passed directly |
18388 | // in registers or on the stack. |
18389 | if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() && |
18390 | PendingLocs.size() <= 2) { |
18391 | assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()" ); |
18392 | // Apply the normal calling convention rules to the first half of the |
18393 | // split argument. |
18394 | CCValAssign VA = PendingLocs[0]; |
18395 | ISD::ArgFlagsTy AF = PendingArgFlags[0]; |
18396 | PendingLocs.clear(); |
18397 | PendingArgFlags.clear(); |
18398 | return CC_RISCVAssign2XLen( |
18399 | XLen, State, VA1: VA, ArgFlags1: AF, ValNo2: ValNo, ValVT2: ValVT, LocVT2: LocVT, ArgFlags2: ArgFlags, |
18400 | EABI: ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E); |
18401 | } |
18402 | |
18403 | // Allocate to a register if possible, or else a stack slot. |
18404 | Register Reg; |
18405 | unsigned StoreSizeBytes = XLen / 8; |
18406 | Align StackAlign = Align(XLen / 8); |
18407 | |
18408 | if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32) |
18409 | Reg = State.AllocateReg(ArgFPR16s); |
18410 | else if (ValVT == MVT::f32 && !UseGPRForF16_F32) |
18411 | Reg = State.AllocateReg(ArgFPR32s); |
18412 | else if (ValVT == MVT::f64 && !UseGPRForF64) |
18413 | Reg = State.AllocateReg(ArgFPR64s); |
18414 | else if (ValVT.isVector()) { |
18415 | Reg = RVVDispatcher.getNextPhysReg(); |
18416 | if (!Reg) { |
18417 | // For return values, the vector must be passed fully via registers or |
18418 | // via the stack. |
18419 | // FIXME: The proposed vector ABI only mandates v8-v15 for return values, |
18420 | // but we're using all of them. |
18421 | if (IsRet) |
18422 | return true; |
18423 | // Try using a GPR to pass the address |
18424 | if ((Reg = State.AllocateReg(Regs: ArgGPRs))) { |
18425 | LocVT = XLenVT; |
18426 | LocInfo = CCValAssign::Indirect; |
18427 | } else if (ValVT.isScalableVector()) { |
18428 | LocVT = XLenVT; |
18429 | LocInfo = CCValAssign::Indirect; |
18430 | } else { |
18431 | // Pass fixed-length vectors on the stack. |
18432 | LocVT = ValVT; |
18433 | StoreSizeBytes = ValVT.getStoreSize(); |
18434 | // Align vectors to their element sizes, being careful for vXi1 |
18435 | // vectors. |
18436 | StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne(); |
18437 | } |
18438 | } |
18439 | } else { |
18440 | Reg = State.AllocateReg(Regs: ArgGPRs); |
18441 | } |
18442 | |
18443 | unsigned StackOffset = |
18444 | Reg ? 0 : State.AllocateStack(Size: StoreSizeBytes, Alignment: StackAlign); |
18445 | |
18446 | // If we reach this point and PendingLocs is non-empty, we must be at the |
18447 | // end of a split argument that must be passed indirectly. |
18448 | if (!PendingLocs.empty()) { |
18449 | assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()" ); |
18450 | assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()" ); |
18451 | |
18452 | for (auto &It : PendingLocs) { |
18453 | if (Reg) |
18454 | It.convertToReg(RegNo: Reg); |
18455 | else |
18456 | It.convertToMem(Offset: StackOffset); |
18457 | State.addLoc(V: It); |
18458 | } |
18459 | PendingLocs.clear(); |
18460 | PendingArgFlags.clear(); |
18461 | return false; |
18462 | } |
18463 | |
18464 | assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT || |
18465 | (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) && |
18466 | "Expected an XLenVT or vector types at this stage" ); |
18467 | |
18468 | if (Reg) { |
18469 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
18470 | return false; |
18471 | } |
18472 | |
18473 | // When a scalar floating-point value is passed on the stack, no |
18474 | // bit-conversion is needed. |
18475 | if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) { |
18476 | assert(!ValVT.isVector()); |
18477 | LocVT = ValVT; |
18478 | LocInfo = CCValAssign::Full; |
18479 | } |
18480 | State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo)); |
18481 | return false; |
18482 | } |
18483 | |
18484 | template <typename ArgTy> |
18485 | static std::optional<unsigned> preAssignMask(const ArgTy &Args) { |
18486 | for (const auto &ArgIdx : enumerate(Args)) { |
18487 | MVT ArgVT = ArgIdx.value().VT; |
18488 | if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1) |
18489 | return ArgIdx.index(); |
18490 | } |
18491 | return std::nullopt; |
18492 | } |
18493 | |
18494 | void RISCVTargetLowering::analyzeInputArgs( |
18495 | MachineFunction &MF, CCState &CCInfo, |
18496 | const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, |
18497 | RISCVCCAssignFn Fn) const { |
18498 | unsigned NumArgs = Ins.size(); |
18499 | FunctionType *FType = MF.getFunction().getFunctionType(); |
18500 | |
18501 | RVVArgDispatcher Dispatcher; |
18502 | if (IsRet) { |
18503 | Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(Ins)}; |
18504 | } else { |
18505 | SmallVector<Type *, 4> TypeList; |
18506 | for (const Argument &Arg : MF.getFunction().args()) |
18507 | TypeList.push_back(Elt: Arg.getType()); |
18508 | Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(TypeList)}; |
18509 | } |
18510 | |
18511 | for (unsigned i = 0; i != NumArgs; ++i) { |
18512 | MVT ArgVT = Ins[i].VT; |
18513 | ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; |
18514 | |
18515 | Type *ArgTy = nullptr; |
18516 | if (IsRet) |
18517 | ArgTy = FType->getReturnType(); |
18518 | else if (Ins[i].isOrigArg()) |
18519 | ArgTy = FType->getParamType(i: Ins[i].getOrigArgIndex()); |
18520 | |
18521 | RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); |
18522 | if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, |
18523 | ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, |
18524 | Dispatcher)) { |
18525 | LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " |
18526 | << ArgVT << '\n'); |
18527 | llvm_unreachable(nullptr); |
18528 | } |
18529 | } |
18530 | } |
18531 | |
18532 | void RISCVTargetLowering::analyzeOutputArgs( |
18533 | MachineFunction &MF, CCState &CCInfo, |
18534 | const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, |
18535 | CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const { |
18536 | unsigned NumArgs = Outs.size(); |
18537 | |
18538 | SmallVector<Type *, 4> TypeList; |
18539 | if (IsRet) |
18540 | TypeList.push_back(Elt: MF.getFunction().getReturnType()); |
18541 | else if (CLI) |
18542 | for (const TargetLowering::ArgListEntry &Arg : CLI->getArgs()) |
18543 | TypeList.push_back(Elt: Arg.Ty); |
18544 | RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(TypeList)}; |
18545 | |
18546 | for (unsigned i = 0; i != NumArgs; i++) { |
18547 | MVT ArgVT = Outs[i].VT; |
18548 | ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; |
18549 | Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; |
18550 | |
18551 | RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); |
18552 | if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, |
18553 | ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, |
18554 | Dispatcher)) { |
18555 | LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " |
18556 | << ArgVT << "\n" ); |
18557 | llvm_unreachable(nullptr); |
18558 | } |
18559 | } |
18560 | } |
18561 | |
18562 | // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect |
18563 | // values. |
18564 | static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, |
18565 | const CCValAssign &VA, const SDLoc &DL, |
18566 | const RISCVSubtarget &Subtarget) { |
18567 | switch (VA.getLocInfo()) { |
18568 | default: |
18569 | llvm_unreachable("Unexpected CCValAssign::LocInfo" ); |
18570 | case CCValAssign::Full: |
18571 | if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector()) |
18572 | Val = convertFromScalableVector(VT: VA.getValVT(), V: Val, DAG, Subtarget); |
18573 | break; |
18574 | case CCValAssign::BCvt: |
18575 | if (VA.getLocVT().isInteger() && |
18576 | (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) { |
18577 | Val = DAG.getNode(Opcode: RISCVISD::FMV_H_X, DL, VT: VA.getValVT(), Operand: Val); |
18578 | } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) { |
18579 | if (RV64LegalI32) { |
18580 | Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val); |
18581 | Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val); |
18582 | } else { |
18583 | Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); |
18584 | } |
18585 | } else { |
18586 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VA.getValVT(), Operand: Val); |
18587 | } |
18588 | break; |
18589 | } |
18590 | return Val; |
18591 | } |
18592 | |
18593 | // The caller is responsible for loading the full value if the argument is |
18594 | // passed with CCValAssign::Indirect. |
18595 | static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, |
18596 | const CCValAssign &VA, const SDLoc &DL, |
18597 | const ISD::InputArg &In, |
18598 | const RISCVTargetLowering &TLI) { |
18599 | MachineFunction &MF = DAG.getMachineFunction(); |
18600 | MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
18601 | EVT LocVT = VA.getLocVT(); |
18602 | SDValue Val; |
18603 | const TargetRegisterClass *RC = TLI.getRegClassFor(VT: LocVT.getSimpleVT()); |
18604 | Register VReg = RegInfo.createVirtualRegister(RegClass: RC); |
18605 | RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: VReg); |
18606 | Val = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: LocVT); |
18607 | |
18608 | // If input is sign extended from 32 bits, note it for the SExtWRemoval pass. |
18609 | if (In.isOrigArg()) { |
18610 | Argument *OrigArg = MF.getFunction().getArg(i: In.getOrigArgIndex()); |
18611 | if (OrigArg->getType()->isIntegerTy()) { |
18612 | unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth(); |
18613 | // An input zero extended from i31 can also be considered sign extended. |
18614 | if ((BitWidth <= 32 && In.Flags.isSExt()) || |
18615 | (BitWidth < 32 && In.Flags.isZExt())) { |
18616 | RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); |
18617 | RVFI->addSExt32Register(Reg: VReg); |
18618 | } |
18619 | } |
18620 | } |
18621 | |
18622 | if (VA.getLocInfo() == CCValAssign::Indirect) |
18623 | return Val; |
18624 | |
18625 | return convertLocVTToValVT(DAG, Val, VA, DL, Subtarget: TLI.getSubtarget()); |
18626 | } |
18627 | |
18628 | static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, |
18629 | const CCValAssign &VA, const SDLoc &DL, |
18630 | const RISCVSubtarget &Subtarget) { |
18631 | EVT LocVT = VA.getLocVT(); |
18632 | |
18633 | switch (VA.getLocInfo()) { |
18634 | default: |
18635 | llvm_unreachable("Unexpected CCValAssign::LocInfo" ); |
18636 | case CCValAssign::Full: |
18637 | if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector()) |
18638 | Val = convertToScalableVector(VT: LocVT, V: Val, DAG, Subtarget); |
18639 | break; |
18640 | case CCValAssign::BCvt: |
18641 | if (LocVT.isInteger() && |
18642 | (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) { |
18643 | Val = DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTH, DL, VT: LocVT, Operand: Val); |
18644 | } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) { |
18645 | if (RV64LegalI32) { |
18646 | Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val); |
18647 | Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val); |
18648 | } else { |
18649 | Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); |
18650 | } |
18651 | } else { |
18652 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: LocVT, Operand: Val); |
18653 | } |
18654 | break; |
18655 | } |
18656 | return Val; |
18657 | } |
18658 | |
18659 | // The caller is responsible for loading the full value if the argument is |
18660 | // passed with CCValAssign::Indirect. |
18661 | static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, |
18662 | const CCValAssign &VA, const SDLoc &DL) { |
18663 | MachineFunction &MF = DAG.getMachineFunction(); |
18664 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
18665 | EVT LocVT = VA.getLocVT(); |
18666 | EVT ValVT = VA.getValVT(); |
18667 | EVT PtrVT = MVT::getIntegerVT(BitWidth: DAG.getDataLayout().getPointerSizeInBits(AS: 0)); |
18668 | if (ValVT.isScalableVector()) { |
18669 | // When the value is a scalable vector, we save the pointer which points to |
18670 | // the scalable vector value in the stack. The ValVT will be the pointer |
18671 | // type, instead of the scalable vector type. |
18672 | ValVT = LocVT; |
18673 | } |
18674 | int FI = MFI.CreateFixedObject(Size: ValVT.getStoreSize(), SPOffset: VA.getLocMemOffset(), |
18675 | /*IsImmutable=*/true); |
18676 | SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT); |
18677 | SDValue Val; |
18678 | |
18679 | ISD::LoadExtType ExtType; |
18680 | switch (VA.getLocInfo()) { |
18681 | default: |
18682 | llvm_unreachable("Unexpected CCValAssign::LocInfo" ); |
18683 | case CCValAssign::Full: |
18684 | case CCValAssign::Indirect: |
18685 | case CCValAssign::BCvt: |
18686 | ExtType = ISD::NON_EXTLOAD; |
18687 | break; |
18688 | } |
18689 | Val = DAG.getExtLoad( |
18690 | ExtType, dl: DL, VT: LocVT, Chain, Ptr: FIN, |
18691 | PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI), MemVT: ValVT); |
18692 | return Val; |
18693 | } |
18694 | |
18695 | static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, |
18696 | const CCValAssign &VA, |
18697 | const CCValAssign &HiVA, |
18698 | const SDLoc &DL) { |
18699 | assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && |
18700 | "Unexpected VA" ); |
18701 | MachineFunction &MF = DAG.getMachineFunction(); |
18702 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
18703 | MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
18704 | |
18705 | assert(VA.isRegLoc() && "Expected register VA assignment" ); |
18706 | |
18707 | Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); |
18708 | RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: LoVReg); |
18709 | SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); |
18710 | SDValue Hi; |
18711 | if (HiVA.isMemLoc()) { |
18712 | // Second half of f64 is passed on the stack. |
18713 | int FI = MFI.CreateFixedObject(Size: 4, SPOffset: HiVA.getLocMemOffset(), |
18714 | /*IsImmutable=*/true); |
18715 | SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); |
18716 | Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, |
18717 | MachinePointerInfo::getFixedStack(MF, FI)); |
18718 | } else { |
18719 | // Second half of f64 is passed in another GPR. |
18720 | Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); |
18721 | RegInfo.addLiveIn(Reg: HiVA.getLocReg(), vreg: HiVReg); |
18722 | Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); |
18723 | } |
18724 | return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); |
18725 | } |
18726 | |
18727 | // FastCC has less than 1% performance improvement for some particular |
18728 | // benchmark. But theoretically, it may has benenfit for some cases. |
18729 | bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, |
18730 | unsigned ValNo, MVT ValVT, MVT LocVT, |
18731 | CCValAssign::LocInfo LocInfo, |
18732 | ISD::ArgFlagsTy ArgFlags, CCState &State, |
18733 | bool IsFixed, bool IsRet, Type *OrigTy, |
18734 | const RISCVTargetLowering &TLI, |
18735 | RVVArgDispatcher &RVVDispatcher) { |
18736 | if (LocVT == MVT::i32 || LocVT == MVT::i64) { |
18737 | if (unsigned Reg = State.AllocateReg(Regs: getFastCCArgGPRs(ABI))) { |
18738 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
18739 | return false; |
18740 | } |
18741 | } |
18742 | |
18743 | const RISCVSubtarget &Subtarget = TLI.getSubtarget(); |
18744 | |
18745 | if (LocVT == MVT::f16 && |
18746 | (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) { |
18747 | static const MCPhysReg FPR16List[] = { |
18748 | RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, |
18749 | RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, |
18750 | RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, |
18751 | RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; |
18752 | if (unsigned Reg = State.AllocateReg(FPR16List)) { |
18753 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
18754 | return false; |
18755 | } |
18756 | } |
18757 | |
18758 | if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) { |
18759 | static const MCPhysReg FPR32List[] = { |
18760 | RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, |
18761 | RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, |
18762 | RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, |
18763 | RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; |
18764 | if (unsigned Reg = State.AllocateReg(FPR32List)) { |
18765 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
18766 | return false; |
18767 | } |
18768 | } |
18769 | |
18770 | if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) { |
18771 | static const MCPhysReg FPR64List[] = { |
18772 | RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, |
18773 | RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, |
18774 | RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, |
18775 | RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; |
18776 | if (unsigned Reg = State.AllocateReg(FPR64List)) { |
18777 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
18778 | return false; |
18779 | } |
18780 | } |
18781 | |
18782 | // Check if there is an available GPR before hitting the stack. |
18783 | if ((LocVT == MVT::f16 && |
18784 | (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) || |
18785 | (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || |
18786 | (LocVT == MVT::f64 && Subtarget.is64Bit() && |
18787 | Subtarget.hasStdExtZdinx())) { |
18788 | if (unsigned Reg = State.AllocateReg(Regs: getFastCCArgGPRs(ABI))) { |
18789 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
18790 | return false; |
18791 | } |
18792 | } |
18793 | |
18794 | if (LocVT == MVT::f16) { |
18795 | unsigned Offset2 = State.AllocateStack(Size: 2, Alignment: Align(2)); |
18796 | State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: Offset2, LocVT, HTP: LocInfo)); |
18797 | return false; |
18798 | } |
18799 | |
18800 | if (LocVT == MVT::i32 || LocVT == MVT::f32) { |
18801 | unsigned Offset4 = State.AllocateStack(Size: 4, Alignment: Align(4)); |
18802 | State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: Offset4, LocVT, HTP: LocInfo)); |
18803 | return false; |
18804 | } |
18805 | |
18806 | if (LocVT == MVT::i64 || LocVT == MVT::f64) { |
18807 | unsigned Offset5 = State.AllocateStack(Size: 8, Alignment: Align(8)); |
18808 | State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: Offset5, LocVT, HTP: LocInfo)); |
18809 | return false; |
18810 | } |
18811 | |
18812 | if (LocVT.isVector()) { |
18813 | MCPhysReg AllocatedVReg = RVVDispatcher.getNextPhysReg(); |
18814 | if (AllocatedVReg) { |
18815 | // Fixed-length vectors are located in the corresponding scalable-vector |
18816 | // container types. |
18817 | if (ValVT.isFixedLengthVector()) |
18818 | LocVT = TLI.getContainerForFixedLengthVector(VT: LocVT); |
18819 | State.addLoc( |
18820 | V: CCValAssign::getReg(ValNo, ValVT, RegNo: AllocatedVReg, LocVT, HTP: LocInfo)); |
18821 | } else { |
18822 | // Try and pass the address via a "fast" GPR. |
18823 | if (unsigned GPRReg = State.AllocateReg(Regs: getFastCCArgGPRs(ABI))) { |
18824 | LocInfo = CCValAssign::Indirect; |
18825 | LocVT = TLI.getSubtarget().getXLenVT(); |
18826 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: GPRReg, LocVT, HTP: LocInfo)); |
18827 | } else if (ValVT.isFixedLengthVector()) { |
18828 | auto StackAlign = |
18829 | MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne(); |
18830 | unsigned StackOffset = |
18831 | State.AllocateStack(Size: ValVT.getStoreSize(), Alignment: StackAlign); |
18832 | State.addLoc( |
18833 | V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo)); |
18834 | } else { |
18835 | // Can't pass scalable vectors on the stack. |
18836 | return true; |
18837 | } |
18838 | } |
18839 | |
18840 | return false; |
18841 | } |
18842 | |
18843 | return true; // CC didn't match. |
18844 | } |
18845 | |
18846 | bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, |
18847 | CCValAssign::LocInfo LocInfo, |
18848 | ISD::ArgFlagsTy ArgFlags, CCState &State) { |
18849 | if (ArgFlags.isNest()) { |
18850 | report_fatal_error( |
18851 | reason: "Attribute 'nest' is not supported in GHC calling convention" ); |
18852 | } |
18853 | |
18854 | static const MCPhysReg GPRList[] = { |
18855 | RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, |
18856 | RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27}; |
18857 | |
18858 | if (LocVT == MVT::i32 || LocVT == MVT::i64) { |
18859 | // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim |
18860 | // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 |
18861 | if (unsigned Reg = State.AllocateReg(GPRList)) { |
18862 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
18863 | return false; |
18864 | } |
18865 | } |
18866 | |
18867 | const RISCVSubtarget &Subtarget = |
18868 | State.getMachineFunction().getSubtarget<RISCVSubtarget>(); |
18869 | |
18870 | if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) { |
18871 | // Pass in STG registers: F1, ..., F6 |
18872 | // fs0 ... fs5 |
18873 | static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, |
18874 | RISCV::F18_F, RISCV::F19_F, |
18875 | RISCV::F20_F, RISCV::F21_F}; |
18876 | if (unsigned Reg = State.AllocateReg(FPR32List)) { |
18877 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
18878 | return false; |
18879 | } |
18880 | } |
18881 | |
18882 | if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) { |
18883 | // Pass in STG registers: D1, ..., D6 |
18884 | // fs6 ... fs11 |
18885 | static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, |
18886 | RISCV::F24_D, RISCV::F25_D, |
18887 | RISCV::F26_D, RISCV::F27_D}; |
18888 | if (unsigned Reg = State.AllocateReg(FPR64List)) { |
18889 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
18890 | return false; |
18891 | } |
18892 | } |
18893 | |
18894 | if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || |
18895 | (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() && |
18896 | Subtarget.is64Bit())) { |
18897 | if (unsigned Reg = State.AllocateReg(GPRList)) { |
18898 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
18899 | return false; |
18900 | } |
18901 | } |
18902 | |
18903 | report_fatal_error(reason: "No registers left in GHC calling convention" ); |
18904 | return true; |
18905 | } |
18906 | |
18907 | // Transform physical registers into virtual registers. |
18908 | SDValue RISCVTargetLowering::LowerFormalArguments( |
18909 | SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, |
18910 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, |
18911 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { |
18912 | |
18913 | MachineFunction &MF = DAG.getMachineFunction(); |
18914 | |
18915 | switch (CallConv) { |
18916 | default: |
18917 | report_fatal_error(reason: "Unsupported calling convention" ); |
18918 | case CallingConv::C: |
18919 | case CallingConv::Fast: |
18920 | case CallingConv::SPIR_KERNEL: |
18921 | case CallingConv::GRAAL: |
18922 | case CallingConv::RISCV_VectorCall: |
18923 | break; |
18924 | case CallingConv::GHC: |
18925 | if (Subtarget.hasStdExtE()) |
18926 | report_fatal_error(reason: "GHC calling convention is not supported on RVE!" ); |
18927 | if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx()) |
18928 | report_fatal_error(reason: "GHC calling convention requires the (Zfinx/F) and " |
18929 | "(Zdinx/D) instruction set extensions" ); |
18930 | } |
18931 | |
18932 | const Function &Func = MF.getFunction(); |
18933 | if (Func.hasFnAttribute(Kind: "interrupt" )) { |
18934 | if (!Func.arg_empty()) |
18935 | report_fatal_error( |
18936 | reason: "Functions with the interrupt attribute cannot have arguments!" ); |
18937 | |
18938 | StringRef Kind = |
18939 | MF.getFunction().getFnAttribute(Kind: "interrupt" ).getValueAsString(); |
18940 | |
18941 | if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine" )) |
18942 | report_fatal_error( |
18943 | reason: "Function interrupt attribute argument not supported!" ); |
18944 | } |
18945 | |
18946 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
18947 | MVT XLenVT = Subtarget.getXLenVT(); |
18948 | unsigned XLenInBytes = Subtarget.getXLen() / 8; |
18949 | // Used with vargs to acumulate store chains. |
18950 | std::vector<SDValue> OutChains; |
18951 | |
18952 | // Assign locations to all of the incoming arguments. |
18953 | SmallVector<CCValAssign, 16> ArgLocs; |
18954 | CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); |
18955 | |
18956 | if (CallConv == CallingConv::GHC) |
18957 | CCInfo.AnalyzeFormalArguments(Ins, Fn: RISCV::CC_RISCV_GHC); |
18958 | else |
18959 | analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, |
18960 | Fn: CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC |
18961 | : RISCV::CC_RISCV); |
18962 | |
18963 | for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) { |
18964 | CCValAssign &VA = ArgLocs[i]; |
18965 | SDValue ArgValue; |
18966 | // Passing f64 on RV32D with a soft float ABI must be handled as a special |
18967 | // case. |
18968 | if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { |
18969 | assert(VA.needsCustom()); |
18970 | ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, HiVA: ArgLocs[++i], DL); |
18971 | } else if (VA.isRegLoc()) |
18972 | ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, In: Ins[InsIdx], TLI: *this); |
18973 | else |
18974 | ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); |
18975 | |
18976 | if (VA.getLocInfo() == CCValAssign::Indirect) { |
18977 | // If the original argument was split and passed by reference (e.g. i128 |
18978 | // on RV32), we need to load all parts of it here (using the same |
18979 | // address). Vectors may be partly split to registers and partly to the |
18980 | // stack, in which case the base address is partly offset and subsequent |
18981 | // stores are relative to that. |
18982 | InVals.push_back(Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain, Ptr: ArgValue, |
18983 | PtrInfo: MachinePointerInfo())); |
18984 | unsigned ArgIndex = Ins[InsIdx].OrigArgIndex; |
18985 | unsigned ArgPartOffset = Ins[InsIdx].PartOffset; |
18986 | assert(VA.getValVT().isVector() || ArgPartOffset == 0); |
18987 | while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) { |
18988 | CCValAssign &PartVA = ArgLocs[i + 1]; |
18989 | unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset; |
18990 | SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL); |
18991 | if (PartVA.getValVT().isScalableVector()) |
18992 | Offset = DAG.getNode(Opcode: ISD::VSCALE, DL, VT: XLenVT, Operand: Offset); |
18993 | SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: ArgValue, N2: Offset); |
18994 | InVals.push_back(Elt: DAG.getLoad(VT: PartVA.getValVT(), dl: DL, Chain, Ptr: Address, |
18995 | PtrInfo: MachinePointerInfo())); |
18996 | ++i; |
18997 | ++InsIdx; |
18998 | } |
18999 | continue; |
19000 | } |
19001 | InVals.push_back(Elt: ArgValue); |
19002 | } |
19003 | |
19004 | if (any_of(Range&: ArgLocs, |
19005 | P: [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); })) |
19006 | MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall(); |
19007 | |
19008 | if (IsVarArg) { |
19009 | ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(ABI: Subtarget.getTargetABI()); |
19010 | unsigned Idx = CCInfo.getFirstUnallocated(Regs: ArgRegs); |
19011 | const TargetRegisterClass *RC = &RISCV::GPRRegClass; |
19012 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
19013 | MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
19014 | RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); |
19015 | |
19016 | // Size of the vararg save area. For now, the varargs save area is either |
19017 | // zero or large enough to hold a0-a7. |
19018 | int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); |
19019 | int FI; |
19020 | |
19021 | // If all registers are allocated, then all varargs must be passed on the |
19022 | // stack and we don't need to save any argregs. |
19023 | if (VarArgsSaveSize == 0) { |
19024 | int VaArgOffset = CCInfo.getStackSize(); |
19025 | FI = MFI.CreateFixedObject(Size: XLenInBytes, SPOffset: VaArgOffset, IsImmutable: true); |
19026 | } else { |
19027 | int VaArgOffset = -VarArgsSaveSize; |
19028 | FI = MFI.CreateFixedObject(Size: VarArgsSaveSize, SPOffset: VaArgOffset, IsImmutable: true); |
19029 | |
19030 | // If saving an odd number of registers then create an extra stack slot to |
19031 | // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures |
19032 | // offsets to even-numbered registered remain 2*XLEN-aligned. |
19033 | if (Idx % 2) { |
19034 | MFI.CreateFixedObject( |
19035 | Size: XLenInBytes, SPOffset: VaArgOffset - static_cast<int>(XLenInBytes), IsImmutable: true); |
19036 | VarArgsSaveSize += XLenInBytes; |
19037 | } |
19038 | |
19039 | SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT); |
19040 | |
19041 | // Copy the integer registers that may have been used for passing varargs |
19042 | // to the vararg save area. |
19043 | for (unsigned I = Idx; I < ArgRegs.size(); ++I) { |
19044 | const Register Reg = RegInfo.createVirtualRegister(RegClass: RC); |
19045 | RegInfo.addLiveIn(Reg: ArgRegs[I], vreg: Reg); |
19046 | SDValue ArgValue = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT: XLenVT); |
19047 | SDValue Store = DAG.getStore( |
19048 | Chain, dl: DL, Val: ArgValue, Ptr: FIN, |
19049 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI, Offset: (I - Idx) * XLenInBytes)); |
19050 | OutChains.push_back(x: Store); |
19051 | FIN = |
19052 | DAG.getMemBasePlusOffset(Base: FIN, Offset: TypeSize::getFixed(ExactSize: XLenInBytes), DL); |
19053 | } |
19054 | } |
19055 | |
19056 | // Record the frame index of the first variable argument |
19057 | // which is a value necessary to VASTART. |
19058 | RVFI->setVarArgsFrameIndex(FI); |
19059 | RVFI->setVarArgsSaveSize(VarArgsSaveSize); |
19060 | } |
19061 | |
19062 | // All stores are grouped in one node to allow the matching between |
19063 | // the size of Ins and InVals. This only happens for vararg functions. |
19064 | if (!OutChains.empty()) { |
19065 | OutChains.push_back(x: Chain); |
19066 | Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); |
19067 | } |
19068 | |
19069 | return Chain; |
19070 | } |
19071 | |
19072 | /// isEligibleForTailCallOptimization - Check whether the call is eligible |
19073 | /// for tail call optimization. |
19074 | /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. |
19075 | bool RISCVTargetLowering::isEligibleForTailCallOptimization( |
19076 | CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, |
19077 | const SmallVector<CCValAssign, 16> &ArgLocs) const { |
19078 | |
19079 | auto CalleeCC = CLI.CallConv; |
19080 | auto &Outs = CLI.Outs; |
19081 | auto &Caller = MF.getFunction(); |
19082 | auto CallerCC = Caller.getCallingConv(); |
19083 | |
19084 | // Exception-handling functions need a special set of instructions to |
19085 | // indicate a return to the hardware. Tail-calling another function would |
19086 | // probably break this. |
19087 | // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This |
19088 | // should be expanded as new function attributes are introduced. |
19089 | if (Caller.hasFnAttribute(Kind: "interrupt" )) |
19090 | return false; |
19091 | |
19092 | // Do not tail call opt if the stack is used to pass parameters. |
19093 | if (CCInfo.getStackSize() != 0) |
19094 | return false; |
19095 | |
19096 | // Do not tail call opt if any parameters need to be passed indirectly. |
19097 | // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are |
19098 | // passed indirectly. So the address of the value will be passed in a |
19099 | // register, or if not available, then the address is put on the stack. In |
19100 | // order to pass indirectly, space on the stack often needs to be allocated |
19101 | // in order to store the value. In this case the CCInfo.getNextStackOffset() |
19102 | // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs |
19103 | // are passed CCValAssign::Indirect. |
19104 | for (auto &VA : ArgLocs) |
19105 | if (VA.getLocInfo() == CCValAssign::Indirect) |
19106 | return false; |
19107 | |
19108 | // Do not tail call opt if either caller or callee uses struct return |
19109 | // semantics. |
19110 | auto IsCallerStructRet = Caller.hasStructRetAttr(); |
19111 | auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); |
19112 | if (IsCallerStructRet || IsCalleeStructRet) |
19113 | return false; |
19114 | |
19115 | // The callee has to preserve all registers the caller needs to preserve. |
19116 | const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
19117 | const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); |
19118 | if (CalleeCC != CallerCC) { |
19119 | const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); |
19120 | if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) |
19121 | return false; |
19122 | } |
19123 | |
19124 | // Byval parameters hand the function a pointer directly into the stack area |
19125 | // we want to reuse during a tail call. Working around this *is* possible |
19126 | // but less efficient and uglier in LowerCall. |
19127 | for (auto &Arg : Outs) |
19128 | if (Arg.Flags.isByVal()) |
19129 | return false; |
19130 | |
19131 | return true; |
19132 | } |
19133 | |
19134 | static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) { |
19135 | return DAG.getDataLayout().getPrefTypeAlign( |
19136 | Ty: VT.getTypeForEVT(Context&: *DAG.getContext())); |
19137 | } |
19138 | |
19139 | // Lower a call to a callseq_start + CALL + callseq_end chain, and add input |
19140 | // and output parameter nodes. |
19141 | SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, |
19142 | SmallVectorImpl<SDValue> &InVals) const { |
19143 | SelectionDAG &DAG = CLI.DAG; |
19144 | SDLoc &DL = CLI.DL; |
19145 | SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; |
19146 | SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; |
19147 | SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; |
19148 | SDValue Chain = CLI.Chain; |
19149 | SDValue Callee = CLI.Callee; |
19150 | bool &IsTailCall = CLI.IsTailCall; |
19151 | CallingConv::ID CallConv = CLI.CallConv; |
19152 | bool IsVarArg = CLI.IsVarArg; |
19153 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
19154 | MVT XLenVT = Subtarget.getXLenVT(); |
19155 | |
19156 | MachineFunction &MF = DAG.getMachineFunction(); |
19157 | |
19158 | // Analyze the operands of the call, assigning locations to each operand. |
19159 | SmallVector<CCValAssign, 16> ArgLocs; |
19160 | CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); |
19161 | |
19162 | if (CallConv == CallingConv::GHC) { |
19163 | if (Subtarget.hasStdExtE()) |
19164 | report_fatal_error(reason: "GHC calling convention is not supported on RVE!" ); |
19165 | ArgCCInfo.AnalyzeCallOperands(Outs, Fn: RISCV::CC_RISCV_GHC); |
19166 | } else |
19167 | analyzeOutputArgs(MF, CCInfo&: ArgCCInfo, Outs, /*IsRet=*/false, CLI: &CLI, |
19168 | Fn: CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC |
19169 | : RISCV::CC_RISCV); |
19170 | |
19171 | // Check if it's really possible to do a tail call. |
19172 | if (IsTailCall) |
19173 | IsTailCall = isEligibleForTailCallOptimization(CCInfo&: ArgCCInfo, CLI, MF, ArgLocs); |
19174 | |
19175 | if (IsTailCall) |
19176 | ++NumTailCalls; |
19177 | else if (CLI.CB && CLI.CB->isMustTailCall()) |
19178 | report_fatal_error(reason: "failed to perform tail call elimination on a call " |
19179 | "site marked musttail" ); |
19180 | |
19181 | // Get a count of how many bytes are to be pushed on the stack. |
19182 | unsigned NumBytes = ArgCCInfo.getStackSize(); |
19183 | |
19184 | // Create local copies for byval args |
19185 | SmallVector<SDValue, 8> ByValArgs; |
19186 | for (unsigned i = 0, e = Outs.size(); i != e; ++i) { |
19187 | ISD::ArgFlagsTy Flags = Outs[i].Flags; |
19188 | if (!Flags.isByVal()) |
19189 | continue; |
19190 | |
19191 | SDValue Arg = OutVals[i]; |
19192 | unsigned Size = Flags.getByValSize(); |
19193 | Align Alignment = Flags.getNonZeroByValAlign(); |
19194 | |
19195 | int FI = |
19196 | MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/isSpillSlot: false); |
19197 | SDValue FIPtr = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout())); |
19198 | SDValue SizeNode = DAG.getConstant(Val: Size, DL, VT: XLenVT); |
19199 | |
19200 | Chain = DAG.getMemcpy(Chain, dl: DL, Dst: FIPtr, Src: Arg, Size: SizeNode, Alignment, |
19201 | /*IsVolatile=*/isVol: false, |
19202 | /*AlwaysInline=*/false, isTailCall: IsTailCall, |
19203 | DstPtrInfo: MachinePointerInfo(), SrcPtrInfo: MachinePointerInfo()); |
19204 | ByValArgs.push_back(Elt: FIPtr); |
19205 | } |
19206 | |
19207 | if (!IsTailCall) |
19208 | Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL: CLI.DL); |
19209 | |
19210 | // Copy argument values to their designated locations. |
19211 | SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; |
19212 | SmallVector<SDValue, 8> MemOpChains; |
19213 | SDValue StackPtr; |
19214 | for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e; |
19215 | ++i, ++OutIdx) { |
19216 | CCValAssign &VA = ArgLocs[i]; |
19217 | SDValue ArgValue = OutVals[OutIdx]; |
19218 | ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags; |
19219 | |
19220 | // Handle passing f64 on RV32D with a soft float ABI as a special case. |
19221 | if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { |
19222 | assert(VA.isRegLoc() && "Expected register VA assignment" ); |
19223 | assert(VA.needsCustom()); |
19224 | SDValue SplitF64 = DAG.getNode( |
19225 | RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); |
19226 | SDValue Lo = SplitF64.getValue(R: 0); |
19227 | SDValue Hi = SplitF64.getValue(R: 1); |
19228 | |
19229 | Register RegLo = VA.getLocReg(); |
19230 | RegsToPass.push_back(Elt: std::make_pair(x&: RegLo, y&: Lo)); |
19231 | |
19232 | // Get the CCValAssign for the Hi part. |
19233 | CCValAssign &HiVA = ArgLocs[++i]; |
19234 | |
19235 | if (HiVA.isMemLoc()) { |
19236 | // Second half of f64 is passed on the stack. |
19237 | if (!StackPtr.getNode()) |
19238 | StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); |
19239 | SDValue Address = |
19240 | DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, |
19241 | N2: DAG.getIntPtrConstant(Val: HiVA.getLocMemOffset(), DL)); |
19242 | // Emit the store. |
19243 | MemOpChains.push_back( |
19244 | Elt: DAG.getStore(Chain, dl: DL, Val: Hi, Ptr: Address, PtrInfo: MachinePointerInfo())); |
19245 | } else { |
19246 | // Second half of f64 is passed in another GPR. |
19247 | Register RegHigh = HiVA.getLocReg(); |
19248 | RegsToPass.push_back(Elt: std::make_pair(x&: RegHigh, y&: Hi)); |
19249 | } |
19250 | continue; |
19251 | } |
19252 | |
19253 | // Promote the value if needed. |
19254 | // For now, only handle fully promoted and indirect arguments. |
19255 | if (VA.getLocInfo() == CCValAssign::Indirect) { |
19256 | // Store the argument in a stack slot and pass its address. |
19257 | Align StackAlign = |
19258 | std::max(a: getPrefTypeAlign(VT: Outs[OutIdx].ArgVT, DAG), |
19259 | b: getPrefTypeAlign(VT: ArgValue.getValueType(), DAG)); |
19260 | TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); |
19261 | // If the original argument was split (e.g. i128), we need |
19262 | // to store the required parts of it here (and pass just one address). |
19263 | // Vectors may be partly split to registers and partly to the stack, in |
19264 | // which case the base address is partly offset and subsequent stores are |
19265 | // relative to that. |
19266 | unsigned ArgIndex = Outs[OutIdx].OrigArgIndex; |
19267 | unsigned ArgPartOffset = Outs[OutIdx].PartOffset; |
19268 | assert(VA.getValVT().isVector() || ArgPartOffset == 0); |
19269 | // Calculate the total size to store. We don't have access to what we're |
19270 | // actually storing other than performing the loop and collecting the |
19271 | // info. |
19272 | SmallVector<std::pair<SDValue, SDValue>> Parts; |
19273 | while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) { |
19274 | SDValue PartValue = OutVals[OutIdx + 1]; |
19275 | unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset; |
19276 | SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL); |
19277 | EVT PartVT = PartValue.getValueType(); |
19278 | if (PartVT.isScalableVector()) |
19279 | Offset = DAG.getNode(Opcode: ISD::VSCALE, DL, VT: XLenVT, Operand: Offset); |
19280 | StoredSize += PartVT.getStoreSize(); |
19281 | StackAlign = std::max(a: StackAlign, b: getPrefTypeAlign(VT: PartVT, DAG)); |
19282 | Parts.push_back(Elt: std::make_pair(x&: PartValue, y&: Offset)); |
19283 | ++i; |
19284 | ++OutIdx; |
19285 | } |
19286 | SDValue SpillSlot = DAG.CreateStackTemporary(Bytes: StoredSize, Alignment: StackAlign); |
19287 | int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex(); |
19288 | MemOpChains.push_back( |
19289 | Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: SpillSlot, |
19290 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI))); |
19291 | for (const auto &Part : Parts) { |
19292 | SDValue PartValue = Part.first; |
19293 | SDValue PartOffset = Part.second; |
19294 | SDValue Address = |
19295 | DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: SpillSlot, N2: PartOffset); |
19296 | MemOpChains.push_back( |
19297 | Elt: DAG.getStore(Chain, dl: DL, Val: PartValue, Ptr: Address, |
19298 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI))); |
19299 | } |
19300 | ArgValue = SpillSlot; |
19301 | } else { |
19302 | ArgValue = convertValVTToLocVT(DAG, Val: ArgValue, VA, DL, Subtarget); |
19303 | } |
19304 | |
19305 | // Use local copy if it is a byval arg. |
19306 | if (Flags.isByVal()) |
19307 | ArgValue = ByValArgs[j++]; |
19308 | |
19309 | if (VA.isRegLoc()) { |
19310 | // Queue up the argument copies and emit them at the end. |
19311 | RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgValue)); |
19312 | } else { |
19313 | assert(VA.isMemLoc() && "Argument not register or memory" ); |
19314 | assert(!IsTailCall && "Tail call not allowed if stack is used " |
19315 | "for passing parameters" ); |
19316 | |
19317 | // Work out the address of the stack slot. |
19318 | if (!StackPtr.getNode()) |
19319 | StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); |
19320 | SDValue Address = |
19321 | DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, |
19322 | N2: DAG.getIntPtrConstant(Val: VA.getLocMemOffset(), DL)); |
19323 | |
19324 | // Emit the store. |
19325 | MemOpChains.push_back( |
19326 | Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: Address, PtrInfo: MachinePointerInfo())); |
19327 | } |
19328 | } |
19329 | |
19330 | // Join the stores, which are independent of one another. |
19331 | if (!MemOpChains.empty()) |
19332 | Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); |
19333 | |
19334 | SDValue Glue; |
19335 | |
19336 | // Build a sequence of copy-to-reg nodes, chained and glued together. |
19337 | for (auto &Reg : RegsToPass) { |
19338 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: Reg.first, N: Reg.second, Glue); |
19339 | Glue = Chain.getValue(R: 1); |
19340 | } |
19341 | |
19342 | // Validate that none of the argument registers have been marked as |
19343 | // reserved, if so report an error. Do the same for the return address if this |
19344 | // is not a tailcall. |
19345 | validateCCReservedRegs(Regs: RegsToPass, MF); |
19346 | if (!IsTailCall && |
19347 | MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1)) |
19348 | MF.getFunction().getContext().diagnose(DI: DiagnosticInfoUnsupported{ |
19349 | MF.getFunction(), |
19350 | "Return address register required, but has been reserved." }); |
19351 | |
19352 | // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a |
19353 | // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't |
19354 | // split it and then direct call can be matched by PseudoCALL. |
19355 | if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) { |
19356 | const GlobalValue *GV = S->getGlobal(); |
19357 | Callee = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: 0, TargetFlags: RISCVII::MO_CALL); |
19358 | } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) { |
19359 | Callee = DAG.getTargetExternalSymbol(Sym: S->getSymbol(), VT: PtrVT, TargetFlags: RISCVII::MO_CALL); |
19360 | } |
19361 | |
19362 | // The first call operand is the chain and the second is the target address. |
19363 | SmallVector<SDValue, 8> Ops; |
19364 | Ops.push_back(Elt: Chain); |
19365 | Ops.push_back(Elt: Callee); |
19366 | |
19367 | // Add argument registers to the end of the list so that they are |
19368 | // known live into the call. |
19369 | for (auto &Reg : RegsToPass) |
19370 | Ops.push_back(Elt: DAG.getRegister(Reg: Reg.first, VT: Reg.second.getValueType())); |
19371 | |
19372 | if (!IsTailCall) { |
19373 | // Add a register mask operand representing the call-preserved registers. |
19374 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
19375 | const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); |
19376 | assert(Mask && "Missing call preserved mask for calling convention" ); |
19377 | Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask)); |
19378 | } |
19379 | |
19380 | // Glue the call to the argument copies, if any. |
19381 | if (Glue.getNode()) |
19382 | Ops.push_back(Elt: Glue); |
19383 | |
19384 | assert((!CLI.CFIType || CLI.CB->isIndirectCall()) && |
19385 | "Unexpected CFI type for a direct call" ); |
19386 | |
19387 | // Emit the call. |
19388 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
19389 | |
19390 | if (IsTailCall) { |
19391 | MF.getFrameInfo().setHasTailCall(); |
19392 | SDValue Ret = DAG.getNode(Opcode: RISCVISD::TAIL, DL, VTList: NodeTys, Ops); |
19393 | if (CLI.CFIType) |
19394 | Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue()); |
19395 | DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge); |
19396 | return Ret; |
19397 | } |
19398 | |
19399 | Chain = DAG.getNode(Opcode: RISCVISD::CALL, DL, VTList: NodeTys, Ops); |
19400 | if (CLI.CFIType) |
19401 | Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue()); |
19402 | DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge); |
19403 | Glue = Chain.getValue(R: 1); |
19404 | |
19405 | // Mark the end of the call, which is glued to the call itself. |
19406 | Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: 0, Glue, DL); |
19407 | Glue = Chain.getValue(R: 1); |
19408 | |
19409 | // Assign locations to each value returned by this call. |
19410 | SmallVector<CCValAssign, 16> RVLocs; |
19411 | CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); |
19412 | analyzeInputArgs(MF, CCInfo&: RetCCInfo, Ins, /*IsRet=*/true, Fn: RISCV::CC_RISCV); |
19413 | |
19414 | // Copy all of the result registers out of their specified physreg. |
19415 | for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { |
19416 | auto &VA = RVLocs[i]; |
19417 | // Copy the value out |
19418 | SDValue RetValue = |
19419 | DAG.getCopyFromReg(Chain, dl: DL, Reg: VA.getLocReg(), VT: VA.getLocVT(), Glue); |
19420 | // Glue the RetValue to the end of the call sequence |
19421 | Chain = RetValue.getValue(R: 1); |
19422 | Glue = RetValue.getValue(R: 2); |
19423 | |
19424 | if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { |
19425 | assert(VA.needsCustom()); |
19426 | SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(), |
19427 | MVT::i32, Glue); |
19428 | Chain = RetValue2.getValue(R: 1); |
19429 | Glue = RetValue2.getValue(R: 2); |
19430 | RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, |
19431 | RetValue2); |
19432 | } |
19433 | |
19434 | RetValue = convertLocVTToValVT(DAG, Val: RetValue, VA, DL, Subtarget); |
19435 | |
19436 | InVals.push_back(Elt: RetValue); |
19437 | } |
19438 | |
19439 | return Chain; |
19440 | } |
19441 | |
19442 | bool RISCVTargetLowering::CanLowerReturn( |
19443 | CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, |
19444 | const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { |
19445 | SmallVector<CCValAssign, 16> RVLocs; |
19446 | CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); |
19447 | |
19448 | RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(Outs)}; |
19449 | |
19450 | for (unsigned i = 0, e = Outs.size(); i != e; ++i) { |
19451 | MVT VT = Outs[i].VT; |
19452 | ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; |
19453 | RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); |
19454 | if (RISCV::CC_RISCV(DL: MF.getDataLayout(), ABI, ValNo: i, ValVT: VT, LocVT: VT, LocInfo: CCValAssign::Full, |
19455 | ArgFlags, State&: CCInfo, /*IsFixed=*/true, /*IsRet=*/true, |
19456 | OrigTy: nullptr, TLI: *this, RVVDispatcher&: Dispatcher)) |
19457 | return false; |
19458 | } |
19459 | return true; |
19460 | } |
19461 | |
19462 | SDValue |
19463 | RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, |
19464 | bool IsVarArg, |
19465 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
19466 | const SmallVectorImpl<SDValue> &OutVals, |
19467 | const SDLoc &DL, SelectionDAG &DAG) const { |
19468 | MachineFunction &MF = DAG.getMachineFunction(); |
19469 | const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); |
19470 | |
19471 | // Stores the assignment of the return value to a location. |
19472 | SmallVector<CCValAssign, 16> RVLocs; |
19473 | |
19474 | // Info about the registers and stack slot. |
19475 | CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, |
19476 | *DAG.getContext()); |
19477 | |
19478 | analyzeOutputArgs(MF&: DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, |
19479 | CLI: nullptr, Fn: RISCV::CC_RISCV); |
19480 | |
19481 | if (CallConv == CallingConv::GHC && !RVLocs.empty()) |
19482 | report_fatal_error(reason: "GHC functions return void only" ); |
19483 | |
19484 | SDValue Glue; |
19485 | SmallVector<SDValue, 4> RetOps(1, Chain); |
19486 | |
19487 | // Copy the result values into the output registers. |
19488 | for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) { |
19489 | SDValue Val = OutVals[OutIdx]; |
19490 | CCValAssign &VA = RVLocs[i]; |
19491 | assert(VA.isRegLoc() && "Can only return in registers!" ); |
19492 | |
19493 | if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { |
19494 | // Handle returning f64 on RV32D with a soft float ABI. |
19495 | assert(VA.isRegLoc() && "Expected return via registers" ); |
19496 | assert(VA.needsCustom()); |
19497 | SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, |
19498 | DAG.getVTList(MVT::i32, MVT::i32), Val); |
19499 | SDValue Lo = SplitF64.getValue(R: 0); |
19500 | SDValue Hi = SplitF64.getValue(R: 1); |
19501 | Register RegLo = VA.getLocReg(); |
19502 | Register RegHi = RVLocs[++i].getLocReg(); |
19503 | |
19504 | if (STI.isRegisterReservedByUser(i: RegLo) || |
19505 | STI.isRegisterReservedByUser(i: RegHi)) |
19506 | MF.getFunction().getContext().diagnose(DI: DiagnosticInfoUnsupported{ |
19507 | MF.getFunction(), |
19508 | "Return value register required, but has been reserved." }); |
19509 | |
19510 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegLo, N: Lo, Glue); |
19511 | Glue = Chain.getValue(R: 1); |
19512 | RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); |
19513 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegHi, N: Hi, Glue); |
19514 | Glue = Chain.getValue(R: 1); |
19515 | RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); |
19516 | } else { |
19517 | // Handle a 'normal' return. |
19518 | Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget); |
19519 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: Val, Glue); |
19520 | |
19521 | if (STI.isRegisterReservedByUser(i: VA.getLocReg())) |
19522 | MF.getFunction().getContext().diagnose(DI: DiagnosticInfoUnsupported{ |
19523 | MF.getFunction(), |
19524 | "Return value register required, but has been reserved." }); |
19525 | |
19526 | // Guarantee that all emitted copies are stuck together. |
19527 | Glue = Chain.getValue(R: 1); |
19528 | RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT())); |
19529 | } |
19530 | } |
19531 | |
19532 | RetOps[0] = Chain; // Update chain. |
19533 | |
19534 | // Add the glue node if we have it. |
19535 | if (Glue.getNode()) { |
19536 | RetOps.push_back(Elt: Glue); |
19537 | } |
19538 | |
19539 | if (any_of(Range&: RVLocs, |
19540 | P: [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); })) |
19541 | MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall(); |
19542 | |
19543 | unsigned RetOpc = RISCVISD::RET_GLUE; |
19544 | // Interrupt service routines use different return instructions. |
19545 | const Function &Func = DAG.getMachineFunction().getFunction(); |
19546 | if (Func.hasFnAttribute(Kind: "interrupt" )) { |
19547 | if (!Func.getReturnType()->isVoidTy()) |
19548 | report_fatal_error( |
19549 | reason: "Functions with the interrupt attribute must have void return type!" ); |
19550 | |
19551 | MachineFunction &MF = DAG.getMachineFunction(); |
19552 | StringRef Kind = |
19553 | MF.getFunction().getFnAttribute(Kind: "interrupt" ).getValueAsString(); |
19554 | |
19555 | if (Kind == "supervisor" ) |
19556 | RetOpc = RISCVISD::SRET_GLUE; |
19557 | else |
19558 | RetOpc = RISCVISD::MRET_GLUE; |
19559 | } |
19560 | |
19561 | return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); |
19562 | } |
19563 | |
19564 | void RISCVTargetLowering::validateCCReservedRegs( |
19565 | const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, |
19566 | MachineFunction &MF) const { |
19567 | const Function &F = MF.getFunction(); |
19568 | const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); |
19569 | |
19570 | if (llvm::any_of(Range: Regs, P: [&STI](auto Reg) { |
19571 | return STI.isRegisterReservedByUser(i: Reg.first); |
19572 | })) |
19573 | F.getContext().diagnose(DI: DiagnosticInfoUnsupported{ |
19574 | F, "Argument register required, but has been reserved." }); |
19575 | } |
19576 | |
19577 | // Check if the result of the node is only used as a return value, as |
19578 | // otherwise we can't perform a tail-call. |
19579 | bool RISCVTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { |
19580 | if (N->getNumValues() != 1) |
19581 | return false; |
19582 | if (!N->hasNUsesOfValue(NUses: 1, Value: 0)) |
19583 | return false; |
19584 | |
19585 | SDNode *Copy = *N->use_begin(); |
19586 | |
19587 | if (Copy->getOpcode() == ISD::BITCAST) { |
19588 | return isUsedByReturnOnly(N: Copy, Chain); |
19589 | } |
19590 | |
19591 | // TODO: Handle additional opcodes in order to support tail-calling libcalls |
19592 | // with soft float ABIs. |
19593 | if (Copy->getOpcode() != ISD::CopyToReg) { |
19594 | return false; |
19595 | } |
19596 | |
19597 | // If the ISD::CopyToReg has a glue operand, we conservatively assume it |
19598 | // isn't safe to perform a tail call. |
19599 | if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue) |
19600 | return false; |
19601 | |
19602 | // The copy must be used by a RISCVISD::RET_GLUE, and nothing else. |
19603 | bool HasRet = false; |
19604 | for (SDNode *Node : Copy->uses()) { |
19605 | if (Node->getOpcode() != RISCVISD::RET_GLUE) |
19606 | return false; |
19607 | HasRet = true; |
19608 | } |
19609 | if (!HasRet) |
19610 | return false; |
19611 | |
19612 | Chain = Copy->getOperand(Num: 0); |
19613 | return true; |
19614 | } |
19615 | |
19616 | bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { |
19617 | return CI->isTailCall(); |
19618 | } |
19619 | |
19620 | const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { |
19621 | #define NODE_NAME_CASE(NODE) \ |
19622 | case RISCVISD::NODE: \ |
19623 | return "RISCVISD::" #NODE; |
19624 | // clang-format off |
19625 | switch ((RISCVISD::NodeType)Opcode) { |
19626 | case RISCVISD::FIRST_NUMBER: |
19627 | break; |
19628 | NODE_NAME_CASE(RET_GLUE) |
19629 | NODE_NAME_CASE(SRET_GLUE) |
19630 | NODE_NAME_CASE(MRET_GLUE) |
19631 | NODE_NAME_CASE(CALL) |
19632 | NODE_NAME_CASE(SELECT_CC) |
19633 | NODE_NAME_CASE(BR_CC) |
19634 | NODE_NAME_CASE(BuildPairF64) |
19635 | NODE_NAME_CASE(SplitF64) |
19636 | NODE_NAME_CASE(TAIL) |
19637 | NODE_NAME_CASE(ADD_LO) |
19638 | NODE_NAME_CASE(HI) |
19639 | NODE_NAME_CASE(LLA) |
19640 | NODE_NAME_CASE(ADD_TPREL) |
19641 | NODE_NAME_CASE(MULHSU) |
19642 | NODE_NAME_CASE(SHL_ADD) |
19643 | NODE_NAME_CASE(SLLW) |
19644 | NODE_NAME_CASE(SRAW) |
19645 | NODE_NAME_CASE(SRLW) |
19646 | NODE_NAME_CASE(DIVW) |
19647 | NODE_NAME_CASE(DIVUW) |
19648 | NODE_NAME_CASE(REMUW) |
19649 | NODE_NAME_CASE(ROLW) |
19650 | NODE_NAME_CASE(RORW) |
19651 | NODE_NAME_CASE(CLZW) |
19652 | NODE_NAME_CASE(CTZW) |
19653 | NODE_NAME_CASE(ABSW) |
19654 | NODE_NAME_CASE(FMV_H_X) |
19655 | NODE_NAME_CASE(FMV_X_ANYEXTH) |
19656 | NODE_NAME_CASE(FMV_X_SIGNEXTH) |
19657 | NODE_NAME_CASE(FMV_W_X_RV64) |
19658 | NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) |
19659 | NODE_NAME_CASE(FCVT_X) |
19660 | NODE_NAME_CASE(FCVT_XU) |
19661 | NODE_NAME_CASE(FCVT_W_RV64) |
19662 | NODE_NAME_CASE(FCVT_WU_RV64) |
19663 | NODE_NAME_CASE(STRICT_FCVT_W_RV64) |
19664 | NODE_NAME_CASE(STRICT_FCVT_WU_RV64) |
19665 | NODE_NAME_CASE(FP_ROUND_BF16) |
19666 | NODE_NAME_CASE(FP_EXTEND_BF16) |
19667 | NODE_NAME_CASE(FROUND) |
19668 | NODE_NAME_CASE(FCLASS) |
19669 | NODE_NAME_CASE(FMAX) |
19670 | NODE_NAME_CASE(FMIN) |
19671 | NODE_NAME_CASE(READ_COUNTER_WIDE) |
19672 | NODE_NAME_CASE(BREV8) |
19673 | NODE_NAME_CASE(ORC_B) |
19674 | NODE_NAME_CASE(ZIP) |
19675 | NODE_NAME_CASE(UNZIP) |
19676 | NODE_NAME_CASE(CLMUL) |
19677 | NODE_NAME_CASE(CLMULH) |
19678 | NODE_NAME_CASE(CLMULR) |
19679 | NODE_NAME_CASE(MOPR) |
19680 | NODE_NAME_CASE(MOPRR) |
19681 | NODE_NAME_CASE(SHA256SIG0) |
19682 | NODE_NAME_CASE(SHA256SIG1) |
19683 | NODE_NAME_CASE(SHA256SUM0) |
19684 | NODE_NAME_CASE(SHA256SUM1) |
19685 | NODE_NAME_CASE(SM4KS) |
19686 | NODE_NAME_CASE(SM4ED) |
19687 | NODE_NAME_CASE(SM3P0) |
19688 | NODE_NAME_CASE(SM3P1) |
19689 | NODE_NAME_CASE(TH_LWD) |
19690 | NODE_NAME_CASE(TH_LWUD) |
19691 | NODE_NAME_CASE(TH_LDD) |
19692 | NODE_NAME_CASE(TH_SWD) |
19693 | NODE_NAME_CASE(TH_SDD) |
19694 | NODE_NAME_CASE(VMV_V_V_VL) |
19695 | NODE_NAME_CASE(VMV_V_X_VL) |
19696 | NODE_NAME_CASE(VFMV_V_F_VL) |
19697 | NODE_NAME_CASE(VMV_X_S) |
19698 | NODE_NAME_CASE(VMV_S_X_VL) |
19699 | NODE_NAME_CASE(VFMV_S_F_VL) |
19700 | NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL) |
19701 | NODE_NAME_CASE(READ_VLENB) |
19702 | NODE_NAME_CASE(TRUNCATE_VECTOR_VL) |
19703 | NODE_NAME_CASE(VSLIDEUP_VL) |
19704 | NODE_NAME_CASE(VSLIDE1UP_VL) |
19705 | NODE_NAME_CASE(VSLIDEDOWN_VL) |
19706 | NODE_NAME_CASE(VSLIDE1DOWN_VL) |
19707 | NODE_NAME_CASE(VFSLIDE1UP_VL) |
19708 | NODE_NAME_CASE(VFSLIDE1DOWN_VL) |
19709 | NODE_NAME_CASE(VID_VL) |
19710 | NODE_NAME_CASE(VFNCVT_ROD_VL) |
19711 | NODE_NAME_CASE(VECREDUCE_ADD_VL) |
19712 | NODE_NAME_CASE(VECREDUCE_UMAX_VL) |
19713 | NODE_NAME_CASE(VECREDUCE_SMAX_VL) |
19714 | NODE_NAME_CASE(VECREDUCE_UMIN_VL) |
19715 | NODE_NAME_CASE(VECREDUCE_SMIN_VL) |
19716 | NODE_NAME_CASE(VECREDUCE_AND_VL) |
19717 | NODE_NAME_CASE(VECREDUCE_OR_VL) |
19718 | NODE_NAME_CASE(VECREDUCE_XOR_VL) |
19719 | NODE_NAME_CASE(VECREDUCE_FADD_VL) |
19720 | NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL) |
19721 | NODE_NAME_CASE(VECREDUCE_FMIN_VL) |
19722 | NODE_NAME_CASE(VECREDUCE_FMAX_VL) |
19723 | NODE_NAME_CASE(ADD_VL) |
19724 | NODE_NAME_CASE(AND_VL) |
19725 | NODE_NAME_CASE(MUL_VL) |
19726 | NODE_NAME_CASE(OR_VL) |
19727 | NODE_NAME_CASE(SDIV_VL) |
19728 | NODE_NAME_CASE(SHL_VL) |
19729 | NODE_NAME_CASE(SREM_VL) |
19730 | NODE_NAME_CASE(SRA_VL) |
19731 | NODE_NAME_CASE(SRL_VL) |
19732 | NODE_NAME_CASE(ROTL_VL) |
19733 | NODE_NAME_CASE(ROTR_VL) |
19734 | NODE_NAME_CASE(SUB_VL) |
19735 | NODE_NAME_CASE(UDIV_VL) |
19736 | NODE_NAME_CASE(UREM_VL) |
19737 | NODE_NAME_CASE(XOR_VL) |
19738 | NODE_NAME_CASE(AVGFLOORU_VL) |
19739 | NODE_NAME_CASE(AVGCEILU_VL) |
19740 | NODE_NAME_CASE(SADDSAT_VL) |
19741 | NODE_NAME_CASE(UADDSAT_VL) |
19742 | NODE_NAME_CASE(SSUBSAT_VL) |
19743 | NODE_NAME_CASE(USUBSAT_VL) |
19744 | NODE_NAME_CASE(FADD_VL) |
19745 | NODE_NAME_CASE(FSUB_VL) |
19746 | NODE_NAME_CASE(FMUL_VL) |
19747 | NODE_NAME_CASE(FDIV_VL) |
19748 | NODE_NAME_CASE(FNEG_VL) |
19749 | NODE_NAME_CASE(FABS_VL) |
19750 | NODE_NAME_CASE(FSQRT_VL) |
19751 | NODE_NAME_CASE(FCLASS_VL) |
19752 | NODE_NAME_CASE(VFMADD_VL) |
19753 | NODE_NAME_CASE(VFNMADD_VL) |
19754 | NODE_NAME_CASE(VFMSUB_VL) |
19755 | NODE_NAME_CASE(VFNMSUB_VL) |
19756 | NODE_NAME_CASE(VFWMADD_VL) |
19757 | NODE_NAME_CASE(VFWNMADD_VL) |
19758 | NODE_NAME_CASE(VFWMSUB_VL) |
19759 | NODE_NAME_CASE(VFWNMSUB_VL) |
19760 | NODE_NAME_CASE(FCOPYSIGN_VL) |
19761 | NODE_NAME_CASE(SMIN_VL) |
19762 | NODE_NAME_CASE(SMAX_VL) |
19763 | NODE_NAME_CASE(UMIN_VL) |
19764 | NODE_NAME_CASE(UMAX_VL) |
19765 | NODE_NAME_CASE(BITREVERSE_VL) |
19766 | NODE_NAME_CASE(BSWAP_VL) |
19767 | NODE_NAME_CASE(CTLZ_VL) |
19768 | NODE_NAME_CASE(CTTZ_VL) |
19769 | NODE_NAME_CASE(CTPOP_VL) |
19770 | NODE_NAME_CASE(VFMIN_VL) |
19771 | NODE_NAME_CASE(VFMAX_VL) |
19772 | NODE_NAME_CASE(MULHS_VL) |
19773 | NODE_NAME_CASE(MULHU_VL) |
19774 | NODE_NAME_CASE(VFCVT_RTZ_X_F_VL) |
19775 | NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL) |
19776 | NODE_NAME_CASE(VFCVT_RM_X_F_VL) |
19777 | NODE_NAME_CASE(VFCVT_RM_XU_F_VL) |
19778 | NODE_NAME_CASE(VFCVT_X_F_VL) |
19779 | NODE_NAME_CASE(VFCVT_XU_F_VL) |
19780 | NODE_NAME_CASE(VFROUND_NOEXCEPT_VL) |
19781 | NODE_NAME_CASE(SINT_TO_FP_VL) |
19782 | NODE_NAME_CASE(UINT_TO_FP_VL) |
19783 | NODE_NAME_CASE(VFCVT_RM_F_XU_VL) |
19784 | NODE_NAME_CASE(VFCVT_RM_F_X_VL) |
19785 | NODE_NAME_CASE(FP_EXTEND_VL) |
19786 | NODE_NAME_CASE(FP_ROUND_VL) |
19787 | NODE_NAME_CASE(STRICT_FADD_VL) |
19788 | NODE_NAME_CASE(STRICT_FSUB_VL) |
19789 | NODE_NAME_CASE(STRICT_FMUL_VL) |
19790 | NODE_NAME_CASE(STRICT_FDIV_VL) |
19791 | NODE_NAME_CASE(STRICT_FSQRT_VL) |
19792 | NODE_NAME_CASE(STRICT_VFMADD_VL) |
19793 | NODE_NAME_CASE(STRICT_VFNMADD_VL) |
19794 | NODE_NAME_CASE(STRICT_VFMSUB_VL) |
19795 | NODE_NAME_CASE(STRICT_VFNMSUB_VL) |
19796 | NODE_NAME_CASE(STRICT_FP_ROUND_VL) |
19797 | NODE_NAME_CASE(STRICT_FP_EXTEND_VL) |
19798 | NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL) |
19799 | NODE_NAME_CASE(STRICT_SINT_TO_FP_VL) |
19800 | NODE_NAME_CASE(STRICT_UINT_TO_FP_VL) |
19801 | NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL) |
19802 | NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL) |
19803 | NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL) |
19804 | NODE_NAME_CASE(STRICT_FSETCC_VL) |
19805 | NODE_NAME_CASE(STRICT_FSETCCS_VL) |
19806 | NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL) |
19807 | NODE_NAME_CASE(VWMUL_VL) |
19808 | NODE_NAME_CASE(VWMULU_VL) |
19809 | NODE_NAME_CASE(VWMULSU_VL) |
19810 | NODE_NAME_CASE(VWADD_VL) |
19811 | NODE_NAME_CASE(VWADDU_VL) |
19812 | NODE_NAME_CASE(VWSUB_VL) |
19813 | NODE_NAME_CASE(VWSUBU_VL) |
19814 | NODE_NAME_CASE(VWADD_W_VL) |
19815 | NODE_NAME_CASE(VWADDU_W_VL) |
19816 | NODE_NAME_CASE(VWSUB_W_VL) |
19817 | NODE_NAME_CASE(VWSUBU_W_VL) |
19818 | NODE_NAME_CASE(VWSLL_VL) |
19819 | NODE_NAME_CASE(VFWMUL_VL) |
19820 | NODE_NAME_CASE(VFWADD_VL) |
19821 | NODE_NAME_CASE(VFWSUB_VL) |
19822 | NODE_NAME_CASE(VFWADD_W_VL) |
19823 | NODE_NAME_CASE(VFWSUB_W_VL) |
19824 | NODE_NAME_CASE(VWMACC_VL) |
19825 | NODE_NAME_CASE(VWMACCU_VL) |
19826 | NODE_NAME_CASE(VWMACCSU_VL) |
19827 | NODE_NAME_CASE(VNSRL_VL) |
19828 | NODE_NAME_CASE(SETCC_VL) |
19829 | NODE_NAME_CASE(VMERGE_VL) |
19830 | NODE_NAME_CASE(VMAND_VL) |
19831 | NODE_NAME_CASE(VMOR_VL) |
19832 | NODE_NAME_CASE(VMXOR_VL) |
19833 | NODE_NAME_CASE(VMCLR_VL) |
19834 | NODE_NAME_CASE(VMSET_VL) |
19835 | NODE_NAME_CASE(VRGATHER_VX_VL) |
19836 | NODE_NAME_CASE(VRGATHER_VV_VL) |
19837 | NODE_NAME_CASE(VRGATHEREI16_VV_VL) |
19838 | NODE_NAME_CASE(VSEXT_VL) |
19839 | NODE_NAME_CASE(VZEXT_VL) |
19840 | NODE_NAME_CASE(VCPOP_VL) |
19841 | NODE_NAME_CASE(VFIRST_VL) |
19842 | NODE_NAME_CASE(READ_CSR) |
19843 | NODE_NAME_CASE(WRITE_CSR) |
19844 | NODE_NAME_CASE(SWAP_CSR) |
19845 | NODE_NAME_CASE(CZERO_EQZ) |
19846 | NODE_NAME_CASE(CZERO_NEZ) |
19847 | NODE_NAME_CASE(SF_VC_XV_SE) |
19848 | NODE_NAME_CASE(SF_VC_IV_SE) |
19849 | NODE_NAME_CASE(SF_VC_VV_SE) |
19850 | NODE_NAME_CASE(SF_VC_FV_SE) |
19851 | NODE_NAME_CASE(SF_VC_XVV_SE) |
19852 | NODE_NAME_CASE(SF_VC_IVV_SE) |
19853 | NODE_NAME_CASE(SF_VC_VVV_SE) |
19854 | NODE_NAME_CASE(SF_VC_FVV_SE) |
19855 | NODE_NAME_CASE(SF_VC_XVW_SE) |
19856 | NODE_NAME_CASE(SF_VC_IVW_SE) |
19857 | NODE_NAME_CASE(SF_VC_VVW_SE) |
19858 | NODE_NAME_CASE(SF_VC_FVW_SE) |
19859 | NODE_NAME_CASE(SF_VC_V_X_SE) |
19860 | NODE_NAME_CASE(SF_VC_V_I_SE) |
19861 | NODE_NAME_CASE(SF_VC_V_XV_SE) |
19862 | NODE_NAME_CASE(SF_VC_V_IV_SE) |
19863 | NODE_NAME_CASE(SF_VC_V_VV_SE) |
19864 | NODE_NAME_CASE(SF_VC_V_FV_SE) |
19865 | NODE_NAME_CASE(SF_VC_V_XVV_SE) |
19866 | NODE_NAME_CASE(SF_VC_V_IVV_SE) |
19867 | NODE_NAME_CASE(SF_VC_V_VVV_SE) |
19868 | NODE_NAME_CASE(SF_VC_V_FVV_SE) |
19869 | NODE_NAME_CASE(SF_VC_V_XVW_SE) |
19870 | NODE_NAME_CASE(SF_VC_V_IVW_SE) |
19871 | NODE_NAME_CASE(SF_VC_V_VVW_SE) |
19872 | NODE_NAME_CASE(SF_VC_V_FVW_SE) |
19873 | } |
19874 | // clang-format on |
19875 | return nullptr; |
19876 | #undef NODE_NAME_CASE |
19877 | } |
19878 | |
19879 | /// getConstraintType - Given a constraint letter, return the type of |
19880 | /// constraint it is for this target. |
19881 | RISCVTargetLowering::ConstraintType |
19882 | RISCVTargetLowering::getConstraintType(StringRef Constraint) const { |
19883 | if (Constraint.size() == 1) { |
19884 | switch (Constraint[0]) { |
19885 | default: |
19886 | break; |
19887 | case 'f': |
19888 | return C_RegisterClass; |
19889 | case 'I': |
19890 | case 'J': |
19891 | case 'K': |
19892 | return C_Immediate; |
19893 | case 'A': |
19894 | return C_Memory; |
19895 | case 's': |
19896 | case 'S': // A symbolic address |
19897 | return C_Other; |
19898 | } |
19899 | } else { |
19900 | if (Constraint == "vr" || Constraint == "vm" ) |
19901 | return C_RegisterClass; |
19902 | } |
19903 | return TargetLowering::getConstraintType(Constraint); |
19904 | } |
19905 | |
19906 | std::pair<unsigned, const TargetRegisterClass *> |
19907 | RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
19908 | StringRef Constraint, |
19909 | MVT VT) const { |
19910 | // First, see if this is a constraint that directly corresponds to a RISC-V |
19911 | // register class. |
19912 | if (Constraint.size() == 1) { |
19913 | switch (Constraint[0]) { |
19914 | case 'r': |
19915 | // TODO: Support fixed vectors up to XLen for P extension? |
19916 | if (VT.isVector()) |
19917 | break; |
19918 | if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin()) |
19919 | return std::make_pair(0U, &RISCV::GPRF16RegClass); |
19920 | if (VT == MVT::f32 && Subtarget.hasStdExtZfinx()) |
19921 | return std::make_pair(0U, &RISCV::GPRF32RegClass); |
19922 | if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit()) |
19923 | return std::make_pair(0U, &RISCV::GPRPairRegClass); |
19924 | return std::make_pair(0U, &RISCV::GPRNoX0RegClass); |
19925 | case 'f': |
19926 | if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) |
19927 | return std::make_pair(0U, &RISCV::FPR16RegClass); |
19928 | if (Subtarget.hasStdExtF() && VT == MVT::f32) |
19929 | return std::make_pair(0U, &RISCV::FPR32RegClass); |
19930 | if (Subtarget.hasStdExtD() && VT == MVT::f64) |
19931 | return std::make_pair(0U, &RISCV::FPR64RegClass); |
19932 | break; |
19933 | default: |
19934 | break; |
19935 | } |
19936 | } else if (Constraint == "vr" ) { |
19937 | for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass, |
19938 | &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { |
19939 | if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) |
19940 | return std::make_pair(0U, RC); |
19941 | } |
19942 | } else if (Constraint == "vm" ) { |
19943 | if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy)) |
19944 | return std::make_pair(0U, &RISCV::VMV0RegClass); |
19945 | } |
19946 | |
19947 | // Clang will correctly decode the usage of register name aliases into their |
19948 | // official names. However, other frontends like `rustc` do not. This allows |
19949 | // users of these frontends to use the ABI names for registers in LLVM-style |
19950 | // register constraints. |
19951 | unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) |
19952 | .Case("{zero}" , RISCV::X0) |
19953 | .Case("{ra}" , RISCV::X1) |
19954 | .Case("{sp}" , RISCV::X2) |
19955 | .Case("{gp}" , RISCV::X3) |
19956 | .Case("{tp}" , RISCV::X4) |
19957 | .Case("{t0}" , RISCV::X5) |
19958 | .Case("{t1}" , RISCV::X6) |
19959 | .Case("{t2}" , RISCV::X7) |
19960 | .Cases("{s0}" , "{fp}" , RISCV::X8) |
19961 | .Case("{s1}" , RISCV::X9) |
19962 | .Case("{a0}" , RISCV::X10) |
19963 | .Case("{a1}" , RISCV::X11) |
19964 | .Case("{a2}" , RISCV::X12) |
19965 | .Case("{a3}" , RISCV::X13) |
19966 | .Case("{a4}" , RISCV::X14) |
19967 | .Case("{a5}" , RISCV::X15) |
19968 | .Case("{a6}" , RISCV::X16) |
19969 | .Case("{a7}" , RISCV::X17) |
19970 | .Case("{s2}" , RISCV::X18) |
19971 | .Case("{s3}" , RISCV::X19) |
19972 | .Case("{s4}" , RISCV::X20) |
19973 | .Case("{s5}" , RISCV::X21) |
19974 | .Case("{s6}" , RISCV::X22) |
19975 | .Case("{s7}" , RISCV::X23) |
19976 | .Case("{s8}" , RISCV::X24) |
19977 | .Case("{s9}" , RISCV::X25) |
19978 | .Case("{s10}" , RISCV::X26) |
19979 | .Case("{s11}" , RISCV::X27) |
19980 | .Case("{t3}" , RISCV::X28) |
19981 | .Case("{t4}" , RISCV::X29) |
19982 | .Case("{t5}" , RISCV::X30) |
19983 | .Case("{t6}" , RISCV::X31) |
19984 | .Default(RISCV::NoRegister); |
19985 | if (XRegFromAlias != RISCV::NoRegister) |
19986 | return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); |
19987 | |
19988 | // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the |
19989 | // TableGen record rather than the AsmName to choose registers for InlineAsm |
19990 | // constraints, plus we want to match those names to the widest floating point |
19991 | // register type available, manually select floating point registers here. |
19992 | // |
19993 | // The second case is the ABI name of the register, so that frontends can also |
19994 | // use the ABI names in register constraint lists. |
19995 | if (Subtarget.hasStdExtF()) { |
19996 | unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) |
19997 | .Cases("{f0}" , "{ft0}" , RISCV::F0_F) |
19998 | .Cases("{f1}" , "{ft1}" , RISCV::F1_F) |
19999 | .Cases("{f2}" , "{ft2}" , RISCV::F2_F) |
20000 | .Cases("{f3}" , "{ft3}" , RISCV::F3_F) |
20001 | .Cases("{f4}" , "{ft4}" , RISCV::F4_F) |
20002 | .Cases("{f5}" , "{ft5}" , RISCV::F5_F) |
20003 | .Cases("{f6}" , "{ft6}" , RISCV::F6_F) |
20004 | .Cases("{f7}" , "{ft7}" , RISCV::F7_F) |
20005 | .Cases("{f8}" , "{fs0}" , RISCV::F8_F) |
20006 | .Cases("{f9}" , "{fs1}" , RISCV::F9_F) |
20007 | .Cases("{f10}" , "{fa0}" , RISCV::F10_F) |
20008 | .Cases("{f11}" , "{fa1}" , RISCV::F11_F) |
20009 | .Cases("{f12}" , "{fa2}" , RISCV::F12_F) |
20010 | .Cases("{f13}" , "{fa3}" , RISCV::F13_F) |
20011 | .Cases("{f14}" , "{fa4}" , RISCV::F14_F) |
20012 | .Cases("{f15}" , "{fa5}" , RISCV::F15_F) |
20013 | .Cases("{f16}" , "{fa6}" , RISCV::F16_F) |
20014 | .Cases("{f17}" , "{fa7}" , RISCV::F17_F) |
20015 | .Cases("{f18}" , "{fs2}" , RISCV::F18_F) |
20016 | .Cases("{f19}" , "{fs3}" , RISCV::F19_F) |
20017 | .Cases("{f20}" , "{fs4}" , RISCV::F20_F) |
20018 | .Cases("{f21}" , "{fs5}" , RISCV::F21_F) |
20019 | .Cases("{f22}" , "{fs6}" , RISCV::F22_F) |
20020 | .Cases("{f23}" , "{fs7}" , RISCV::F23_F) |
20021 | .Cases("{f24}" , "{fs8}" , RISCV::F24_F) |
20022 | .Cases("{f25}" , "{fs9}" , RISCV::F25_F) |
20023 | .Cases("{f26}" , "{fs10}" , RISCV::F26_F) |
20024 | .Cases("{f27}" , "{fs11}" , RISCV::F27_F) |
20025 | .Cases("{f28}" , "{ft8}" , RISCV::F28_F) |
20026 | .Cases("{f29}" , "{ft9}" , RISCV::F29_F) |
20027 | .Cases("{f30}" , "{ft10}" , RISCV::F30_F) |
20028 | .Cases("{f31}" , "{ft11}" , RISCV::F31_F) |
20029 | .Default(RISCV::NoRegister); |
20030 | if (FReg != RISCV::NoRegister) { |
20031 | assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg" ); |
20032 | if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) { |
20033 | unsigned RegNo = FReg - RISCV::F0_F; |
20034 | unsigned DReg = RISCV::F0_D + RegNo; |
20035 | return std::make_pair(DReg, &RISCV::FPR64RegClass); |
20036 | } |
20037 | if (VT == MVT::f32 || VT == MVT::Other) |
20038 | return std::make_pair(FReg, &RISCV::FPR32RegClass); |
20039 | if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) { |
20040 | unsigned RegNo = FReg - RISCV::F0_F; |
20041 | unsigned HReg = RISCV::F0_H + RegNo; |
20042 | return std::make_pair(HReg, &RISCV::FPR16RegClass); |
20043 | } |
20044 | } |
20045 | } |
20046 | |
20047 | if (Subtarget.hasVInstructions()) { |
20048 | Register VReg = StringSwitch<Register>(Constraint.lower()) |
20049 | .Case("{v0}" , RISCV::V0) |
20050 | .Case("{v1}" , RISCV::V1) |
20051 | .Case("{v2}" , RISCV::V2) |
20052 | .Case("{v3}" , RISCV::V3) |
20053 | .Case("{v4}" , RISCV::V4) |
20054 | .Case("{v5}" , RISCV::V5) |
20055 | .Case("{v6}" , RISCV::V6) |
20056 | .Case("{v7}" , RISCV::V7) |
20057 | .Case("{v8}" , RISCV::V8) |
20058 | .Case("{v9}" , RISCV::V9) |
20059 | .Case("{v10}" , RISCV::V10) |
20060 | .Case("{v11}" , RISCV::V11) |
20061 | .Case("{v12}" , RISCV::V12) |
20062 | .Case("{v13}" , RISCV::V13) |
20063 | .Case("{v14}" , RISCV::V14) |
20064 | .Case("{v15}" , RISCV::V15) |
20065 | .Case("{v16}" , RISCV::V16) |
20066 | .Case("{v17}" , RISCV::V17) |
20067 | .Case("{v18}" , RISCV::V18) |
20068 | .Case("{v19}" , RISCV::V19) |
20069 | .Case("{v20}" , RISCV::V20) |
20070 | .Case("{v21}" , RISCV::V21) |
20071 | .Case("{v22}" , RISCV::V22) |
20072 | .Case("{v23}" , RISCV::V23) |
20073 | .Case("{v24}" , RISCV::V24) |
20074 | .Case("{v25}" , RISCV::V25) |
20075 | .Case("{v26}" , RISCV::V26) |
20076 | .Case("{v27}" , RISCV::V27) |
20077 | .Case("{v28}" , RISCV::V28) |
20078 | .Case("{v29}" , RISCV::V29) |
20079 | .Case("{v30}" , RISCV::V30) |
20080 | .Case("{v31}" , RISCV::V31) |
20081 | .Default(RISCV::NoRegister); |
20082 | if (VReg != RISCV::NoRegister) { |
20083 | if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy)) |
20084 | return std::make_pair(VReg, &RISCV::VMRegClass); |
20085 | if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy)) |
20086 | return std::make_pair(VReg, &RISCV::VRRegClass); |
20087 | for (const auto *RC : |
20088 | {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { |
20089 | if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) { |
20090 | VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC); |
20091 | return std::make_pair(VReg, RC); |
20092 | } |
20093 | } |
20094 | } |
20095 | } |
20096 | |
20097 | std::pair<Register, const TargetRegisterClass *> Res = |
20098 | TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); |
20099 | |
20100 | // If we picked one of the Zfinx register classes, remap it to the GPR class. |
20101 | // FIXME: When Zfinx is supported in CodeGen this will need to take the |
20102 | // Subtarget into account. |
20103 | if (Res.second == &RISCV::GPRF16RegClass || |
20104 | Res.second == &RISCV::GPRF32RegClass || |
20105 | Res.second == &RISCV::GPRPairRegClass) |
20106 | return std::make_pair(Res.first, &RISCV::GPRRegClass); |
20107 | |
20108 | return Res; |
20109 | } |
20110 | |
20111 | InlineAsm::ConstraintCode |
20112 | RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { |
20113 | // Currently only support length 1 constraints. |
20114 | if (ConstraintCode.size() == 1) { |
20115 | switch (ConstraintCode[0]) { |
20116 | case 'A': |
20117 | return InlineAsm::ConstraintCode::A; |
20118 | default: |
20119 | break; |
20120 | } |
20121 | } |
20122 | |
20123 | return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); |
20124 | } |
20125 | |
20126 | void RISCVTargetLowering::LowerAsmOperandForConstraint( |
20127 | SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, |
20128 | SelectionDAG &DAG) const { |
20129 | // Currently only support length 1 constraints. |
20130 | if (Constraint.size() == 1) { |
20131 | switch (Constraint[0]) { |
20132 | case 'I': |
20133 | // Validate & create a 12-bit signed immediate operand. |
20134 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) { |
20135 | uint64_t CVal = C->getSExtValue(); |
20136 | if (isInt<12>(x: CVal)) |
20137 | Ops.push_back( |
20138 | x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getXLenVT())); |
20139 | } |
20140 | return; |
20141 | case 'J': |
20142 | // Validate & create an integer zero operand. |
20143 | if (isNullConstant(V: Op)) |
20144 | Ops.push_back( |
20145 | x: DAG.getTargetConstant(Val: 0, DL: SDLoc(Op), VT: Subtarget.getXLenVT())); |
20146 | return; |
20147 | case 'K': |
20148 | // Validate & create a 5-bit unsigned immediate operand. |
20149 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) { |
20150 | uint64_t CVal = C->getZExtValue(); |
20151 | if (isUInt<5>(x: CVal)) |
20152 | Ops.push_back( |
20153 | x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getXLenVT())); |
20154 | } |
20155 | return; |
20156 | case 'S': |
20157 | TargetLowering::LowerAsmOperandForConstraint(Op, Constraint: "s" , Ops, DAG); |
20158 | return; |
20159 | default: |
20160 | break; |
20161 | } |
20162 | } |
20163 | TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); |
20164 | } |
20165 | |
20166 | Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder, |
20167 | Instruction *Inst, |
20168 | AtomicOrdering Ord) const { |
20169 | if (Subtarget.hasStdExtZtso()) { |
20170 | if (isa<LoadInst>(Val: Inst) && Ord == AtomicOrdering::SequentiallyConsistent) |
20171 | return Builder.CreateFence(Ordering: Ord); |
20172 | return nullptr; |
20173 | } |
20174 | |
20175 | if (isa<LoadInst>(Val: Inst) && Ord == AtomicOrdering::SequentiallyConsistent) |
20176 | return Builder.CreateFence(Ordering: Ord); |
20177 | if (isa<StoreInst>(Val: Inst) && isReleaseOrStronger(AO: Ord)) |
20178 | return Builder.CreateFence(Ordering: AtomicOrdering::Release); |
20179 | return nullptr; |
20180 | } |
20181 | |
20182 | Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder, |
20183 | Instruction *Inst, |
20184 | AtomicOrdering Ord) const { |
20185 | if (Subtarget.hasStdExtZtso()) { |
20186 | if (isa<StoreInst>(Val: Inst) && Ord == AtomicOrdering::SequentiallyConsistent) |
20187 | return Builder.CreateFence(Ordering: Ord); |
20188 | return nullptr; |
20189 | } |
20190 | |
20191 | if (isa<LoadInst>(Val: Inst) && isAcquireOrStronger(AO: Ord)) |
20192 | return Builder.CreateFence(Ordering: AtomicOrdering::Acquire); |
20193 | if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Val: Inst) && |
20194 | Ord == AtomicOrdering::SequentiallyConsistent) |
20195 | return Builder.CreateFence(Ordering: AtomicOrdering::SequentiallyConsistent); |
20196 | return nullptr; |
20197 | } |
20198 | |
20199 | TargetLowering::AtomicExpansionKind |
20200 | RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { |
20201 | // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating |
20202 | // point operations can't be used in an lr/sc sequence without breaking the |
20203 | // forward-progress guarantee. |
20204 | if (AI->isFloatingPointOperation() || |
20205 | AI->getOperation() == AtomicRMWInst::UIncWrap || |
20206 | AI->getOperation() == AtomicRMWInst::UDecWrap) |
20207 | return AtomicExpansionKind::CmpXChg; |
20208 | |
20209 | // Don't expand forced atomics, we want to have __sync libcalls instead. |
20210 | if (Subtarget.hasForcedAtomics()) |
20211 | return AtomicExpansionKind::None; |
20212 | |
20213 | unsigned Size = AI->getType()->getPrimitiveSizeInBits(); |
20214 | if (AI->getOperation() == AtomicRMWInst::Nand) { |
20215 | if (Subtarget.hasStdExtZacas() && |
20216 | (Size >= 32 || Subtarget.hasStdExtZabha())) |
20217 | return AtomicExpansionKind::CmpXChg; |
20218 | if (Size < 32) |
20219 | return AtomicExpansionKind::MaskedIntrinsic; |
20220 | } |
20221 | |
20222 | if (Size < 32 && !Subtarget.hasStdExtZabha()) |
20223 | return AtomicExpansionKind::MaskedIntrinsic; |
20224 | |
20225 | return AtomicExpansionKind::None; |
20226 | } |
20227 | |
20228 | static Intrinsic::ID |
20229 | getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { |
20230 | if (XLen == 32) { |
20231 | switch (BinOp) { |
20232 | default: |
20233 | llvm_unreachable("Unexpected AtomicRMW BinOp" ); |
20234 | case AtomicRMWInst::Xchg: |
20235 | return Intrinsic::riscv_masked_atomicrmw_xchg_i32; |
20236 | case AtomicRMWInst::Add: |
20237 | return Intrinsic::riscv_masked_atomicrmw_add_i32; |
20238 | case AtomicRMWInst::Sub: |
20239 | return Intrinsic::riscv_masked_atomicrmw_sub_i32; |
20240 | case AtomicRMWInst::Nand: |
20241 | return Intrinsic::riscv_masked_atomicrmw_nand_i32; |
20242 | case AtomicRMWInst::Max: |
20243 | return Intrinsic::riscv_masked_atomicrmw_max_i32; |
20244 | case AtomicRMWInst::Min: |
20245 | return Intrinsic::riscv_masked_atomicrmw_min_i32; |
20246 | case AtomicRMWInst::UMax: |
20247 | return Intrinsic::riscv_masked_atomicrmw_umax_i32; |
20248 | case AtomicRMWInst::UMin: |
20249 | return Intrinsic::riscv_masked_atomicrmw_umin_i32; |
20250 | } |
20251 | } |
20252 | |
20253 | if (XLen == 64) { |
20254 | switch (BinOp) { |
20255 | default: |
20256 | llvm_unreachable("Unexpected AtomicRMW BinOp" ); |
20257 | case AtomicRMWInst::Xchg: |
20258 | return Intrinsic::riscv_masked_atomicrmw_xchg_i64; |
20259 | case AtomicRMWInst::Add: |
20260 | return Intrinsic::riscv_masked_atomicrmw_add_i64; |
20261 | case AtomicRMWInst::Sub: |
20262 | return Intrinsic::riscv_masked_atomicrmw_sub_i64; |
20263 | case AtomicRMWInst::Nand: |
20264 | return Intrinsic::riscv_masked_atomicrmw_nand_i64; |
20265 | case AtomicRMWInst::Max: |
20266 | return Intrinsic::riscv_masked_atomicrmw_max_i64; |
20267 | case AtomicRMWInst::Min: |
20268 | return Intrinsic::riscv_masked_atomicrmw_min_i64; |
20269 | case AtomicRMWInst::UMax: |
20270 | return Intrinsic::riscv_masked_atomicrmw_umax_i64; |
20271 | case AtomicRMWInst::UMin: |
20272 | return Intrinsic::riscv_masked_atomicrmw_umin_i64; |
20273 | } |
20274 | } |
20275 | |
20276 | llvm_unreachable("Unexpected XLen\n" ); |
20277 | } |
20278 | |
20279 | Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( |
20280 | IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, |
20281 | Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { |
20282 | // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace |
20283 | // the atomic instruction with an AtomicRMWInst::And/Or with appropriate |
20284 | // mask, as this produces better code than the LR/SC loop emitted by |
20285 | // int_riscv_masked_atomicrmw_xchg. |
20286 | if (AI->getOperation() == AtomicRMWInst::Xchg && |
20287 | isa<ConstantInt>(Val: AI->getValOperand())) { |
20288 | ConstantInt *CVal = cast<ConstantInt>(Val: AI->getValOperand()); |
20289 | if (CVal->isZero()) |
20290 | return Builder.CreateAtomicRMW(Op: AtomicRMWInst::And, Ptr: AlignedAddr, |
20291 | Val: Builder.CreateNot(V: Mask, Name: "Inv_Mask" ), |
20292 | Align: AI->getAlign(), Ordering: Ord); |
20293 | if (CVal->isMinusOne()) |
20294 | return Builder.CreateAtomicRMW(Op: AtomicRMWInst::Or, Ptr: AlignedAddr, Val: Mask, |
20295 | Align: AI->getAlign(), Ordering: Ord); |
20296 | } |
20297 | |
20298 | unsigned XLen = Subtarget.getXLen(); |
20299 | Value *Ordering = |
20300 | Builder.getIntN(N: XLen, C: static_cast<uint64_t>(AI->getOrdering())); |
20301 | Type *Tys[] = {AlignedAddr->getType()}; |
20302 | Function *LrwOpScwLoop = Intrinsic::getDeclaration( |
20303 | M: AI->getModule(), |
20304 | id: getIntrinsicForMaskedAtomicRMWBinOp(XLen, BinOp: AI->getOperation()), Tys); |
20305 | |
20306 | if (XLen == 64) { |
20307 | Incr = Builder.CreateSExt(V: Incr, DestTy: Builder.getInt64Ty()); |
20308 | Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty()); |
20309 | ShiftAmt = Builder.CreateSExt(V: ShiftAmt, DestTy: Builder.getInt64Ty()); |
20310 | } |
20311 | |
20312 | Value *Result; |
20313 | |
20314 | // Must pass the shift amount needed to sign extend the loaded value prior |
20315 | // to performing a signed comparison for min/max. ShiftAmt is the number of |
20316 | // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which |
20317 | // is the number of bits to left+right shift the value in order to |
20318 | // sign-extend. |
20319 | if (AI->getOperation() == AtomicRMWInst::Min || |
20320 | AI->getOperation() == AtomicRMWInst::Max) { |
20321 | const DataLayout &DL = AI->getModule()->getDataLayout(); |
20322 | unsigned ValWidth = |
20323 | DL.getTypeStoreSizeInBits(Ty: AI->getValOperand()->getType()); |
20324 | Value *SextShamt = |
20325 | Builder.CreateSub(LHS: Builder.getIntN(N: XLen, C: XLen - ValWidth), RHS: ShiftAmt); |
20326 | Result = Builder.CreateCall(Callee: LrwOpScwLoop, |
20327 | Args: {AlignedAddr, Incr, Mask, SextShamt, Ordering}); |
20328 | } else { |
20329 | Result = |
20330 | Builder.CreateCall(Callee: LrwOpScwLoop, Args: {AlignedAddr, Incr, Mask, Ordering}); |
20331 | } |
20332 | |
20333 | if (XLen == 64) |
20334 | Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty()); |
20335 | return Result; |
20336 | } |
20337 | |
20338 | TargetLowering::AtomicExpansionKind |
20339 | RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( |
20340 | AtomicCmpXchgInst *CI) const { |
20341 | // Don't expand forced atomics, we want to have __sync libcalls instead. |
20342 | if (Subtarget.hasForcedAtomics()) |
20343 | return AtomicExpansionKind::None; |
20344 | |
20345 | unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); |
20346 | if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) && |
20347 | (Size == 8 || Size == 16)) |
20348 | return AtomicExpansionKind::MaskedIntrinsic; |
20349 | return AtomicExpansionKind::None; |
20350 | } |
20351 | |
20352 | Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( |
20353 | IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, |
20354 | Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { |
20355 | unsigned XLen = Subtarget.getXLen(); |
20356 | Value *Ordering = Builder.getIntN(N: XLen, C: static_cast<uint64_t>(Ord)); |
20357 | Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; |
20358 | if (XLen == 64) { |
20359 | CmpVal = Builder.CreateSExt(V: CmpVal, DestTy: Builder.getInt64Ty()); |
20360 | NewVal = Builder.CreateSExt(V: NewVal, DestTy: Builder.getInt64Ty()); |
20361 | Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty()); |
20362 | CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; |
20363 | } |
20364 | Type *Tys[] = {AlignedAddr->getType()}; |
20365 | Function *MaskedCmpXchg = |
20366 | Intrinsic::getDeclaration(M: CI->getModule(), id: CmpXchgIntrID, Tys); |
20367 | Value *Result = Builder.CreateCall( |
20368 | Callee: MaskedCmpXchg, Args: {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); |
20369 | if (XLen == 64) |
20370 | Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty()); |
20371 | return Result; |
20372 | } |
20373 | |
20374 | bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend, |
20375 | EVT DataVT) const { |
20376 | // We have indexed loads for all supported EEW types. Indices are always |
20377 | // zero extended. |
20378 | return Extend.getOpcode() == ISD::ZERO_EXTEND && |
20379 | isTypeLegal(Extend.getValueType()) && |
20380 | isTypeLegal(Extend.getOperand(0).getValueType()) && |
20381 | Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1; |
20382 | } |
20383 | |
20384 | bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT, |
20385 | EVT VT) const { |
20386 | if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple()) |
20387 | return false; |
20388 | |
20389 | switch (FPVT.getSimpleVT().SimpleTy) { |
20390 | case MVT::f16: |
20391 | return Subtarget.hasStdExtZfhmin(); |
20392 | case MVT::f32: |
20393 | return Subtarget.hasStdExtF(); |
20394 | case MVT::f64: |
20395 | return Subtarget.hasStdExtD(); |
20396 | default: |
20397 | return false; |
20398 | } |
20399 | } |
20400 | |
20401 | unsigned RISCVTargetLowering::getJumpTableEncoding() const { |
20402 | // If we are using the small code model, we can reduce size of jump table |
20403 | // entry to 4 bytes. |
20404 | if (Subtarget.is64Bit() && !isPositionIndependent() && |
20405 | getTargetMachine().getCodeModel() == CodeModel::Small) { |
20406 | return MachineJumpTableInfo::EK_Custom32; |
20407 | } |
20408 | return TargetLowering::getJumpTableEncoding(); |
20409 | } |
20410 | |
20411 | const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry( |
20412 | const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, |
20413 | unsigned uid, MCContext &Ctx) const { |
20414 | assert(Subtarget.is64Bit() && !isPositionIndependent() && |
20415 | getTargetMachine().getCodeModel() == CodeModel::Small); |
20416 | return MCSymbolRefExpr::create(Symbol: MBB->getSymbol(), Ctx); |
20417 | } |
20418 | |
20419 | bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const { |
20420 | // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power |
20421 | // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be |
20422 | // a power of two as well. |
20423 | // FIXME: This doesn't work for zve32, but that's already broken |
20424 | // elsewhere for the same reason. |
20425 | assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported" ); |
20426 | static_assert(RISCV::RVVBitsPerBlock == 64, |
20427 | "RVVBitsPerBlock changed, audit needed" ); |
20428 | return true; |
20429 | } |
20430 | |
20431 | bool RISCVTargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base, |
20432 | SDValue &Offset, |
20433 | ISD::MemIndexedMode &AM, |
20434 | SelectionDAG &DAG) const { |
20435 | // Target does not support indexed loads. |
20436 | if (!Subtarget.hasVendorXTHeadMemIdx()) |
20437 | return false; |
20438 | |
20439 | if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) |
20440 | return false; |
20441 | |
20442 | Base = Op->getOperand(Num: 0); |
20443 | if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: Op->getOperand(Num: 1))) { |
20444 | int64_t RHSC = RHS->getSExtValue(); |
20445 | if (Op->getOpcode() == ISD::SUB) |
20446 | RHSC = -(uint64_t)RHSC; |
20447 | |
20448 | // The constants that can be encoded in the THeadMemIdx instructions |
20449 | // are of the form (sign_extend(imm5) << imm2). |
20450 | bool isLegalIndexedOffset = false; |
20451 | for (unsigned i = 0; i < 4; i++) |
20452 | if (isInt<5>(x: RHSC >> i) && ((RHSC % (1LL << i)) == 0)) { |
20453 | isLegalIndexedOffset = true; |
20454 | break; |
20455 | } |
20456 | |
20457 | if (!isLegalIndexedOffset) |
20458 | return false; |
20459 | |
20460 | Offset = Op->getOperand(Num: 1); |
20461 | return true; |
20462 | } |
20463 | |
20464 | return false; |
20465 | } |
20466 | |
20467 | bool RISCVTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, |
20468 | SDValue &Offset, |
20469 | ISD::MemIndexedMode &AM, |
20470 | SelectionDAG &DAG) const { |
20471 | EVT VT; |
20472 | SDValue Ptr; |
20473 | if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) { |
20474 | VT = LD->getMemoryVT(); |
20475 | Ptr = LD->getBasePtr(); |
20476 | } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Val: N)) { |
20477 | VT = ST->getMemoryVT(); |
20478 | Ptr = ST->getBasePtr(); |
20479 | } else |
20480 | return false; |
20481 | |
20482 | if (!getIndexedAddressParts(Op: Ptr.getNode(), Base, Offset, AM, DAG)) |
20483 | return false; |
20484 | |
20485 | AM = ISD::PRE_INC; |
20486 | return true; |
20487 | } |
20488 | |
20489 | bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, |
20490 | SDValue &Base, |
20491 | SDValue &Offset, |
20492 | ISD::MemIndexedMode &AM, |
20493 | SelectionDAG &DAG) const { |
20494 | EVT VT; |
20495 | SDValue Ptr; |
20496 | if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) { |
20497 | VT = LD->getMemoryVT(); |
20498 | Ptr = LD->getBasePtr(); |
20499 | } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Val: N)) { |
20500 | VT = ST->getMemoryVT(); |
20501 | Ptr = ST->getBasePtr(); |
20502 | } else |
20503 | return false; |
20504 | |
20505 | if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG)) |
20506 | return false; |
20507 | // Post-indexing updates the base, so it's not a valid transform |
20508 | // if that's not the same as the load's pointer. |
20509 | if (Ptr != Base) |
20510 | return false; |
20511 | |
20512 | AM = ISD::POST_INC; |
20513 | return true; |
20514 | } |
20515 | |
20516 | bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
20517 | EVT VT) const { |
20518 | EVT SVT = VT.getScalarType(); |
20519 | |
20520 | if (!SVT.isSimple()) |
20521 | return false; |
20522 | |
20523 | switch (SVT.getSimpleVT().SimpleTy) { |
20524 | case MVT::f16: |
20525 | return VT.isVector() ? Subtarget.hasVInstructionsF16() |
20526 | : Subtarget.hasStdExtZfhOrZhinx(); |
20527 | case MVT::f32: |
20528 | return Subtarget.hasStdExtFOrZfinx(); |
20529 | case MVT::f64: |
20530 | return Subtarget.hasStdExtDOrZdinx(); |
20531 | default: |
20532 | break; |
20533 | } |
20534 | |
20535 | return false; |
20536 | } |
20537 | |
20538 | ISD::NodeType RISCVTargetLowering::getExtendForAtomicCmpSwapArg() const { |
20539 | // Zacas will use amocas.w which does not require extension. |
20540 | return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND; |
20541 | } |
20542 | |
20543 | Register RISCVTargetLowering::getExceptionPointerRegister( |
20544 | const Constant *PersonalityFn) const { |
20545 | return RISCV::X10; |
20546 | } |
20547 | |
20548 | Register RISCVTargetLowering::getExceptionSelectorRegister( |
20549 | const Constant *PersonalityFn) const { |
20550 | return RISCV::X11; |
20551 | } |
20552 | |
20553 | bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { |
20554 | // Return false to suppress the unnecessary extensions if the LibCall |
20555 | // arguments or return value is a float narrower than XLEN on a soft FP ABI. |
20556 | if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() && |
20557 | Type.getSizeInBits() < Subtarget.getXLen())) |
20558 | return false; |
20559 | |
20560 | return true; |
20561 | } |
20562 | |
20563 | bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { |
20564 | if (Subtarget.is64Bit() && Type == MVT::i32) |
20565 | return true; |
20566 | |
20567 | return IsSigned; |
20568 | } |
20569 | |
20570 | bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, |
20571 | SDValue C) const { |
20572 | // Check integral scalar types. |
20573 | const bool HasExtMOrZmmul = |
20574 | Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul(); |
20575 | if (!VT.isScalarInteger()) |
20576 | return false; |
20577 | |
20578 | // Omit the optimization if the sub target has the M extension and the data |
20579 | // size exceeds XLen. |
20580 | if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen()) |
20581 | return false; |
20582 | |
20583 | if (auto *ConstNode = dyn_cast<ConstantSDNode>(Val: C.getNode())) { |
20584 | // Break the MUL to a SLLI and an ADD/SUB. |
20585 | const APInt &Imm = ConstNode->getAPIntValue(); |
20586 | if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || |
20587 | (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) |
20588 | return true; |
20589 | |
20590 | // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12. |
20591 | if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(N: 12) && |
20592 | ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() || |
20593 | (Imm - 8).isPowerOf2())) |
20594 | return true; |
20595 | |
20596 | // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs |
20597 | // a pair of LUI/ADDI. |
20598 | if (!Imm.isSignedIntN(N: 12) && Imm.countr_zero() < 12 && |
20599 | ConstNode->hasOneUse()) { |
20600 | APInt ImmS = Imm.ashr(ShiftAmt: Imm.countr_zero()); |
20601 | if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || |
20602 | (1 - ImmS).isPowerOf2()) |
20603 | return true; |
20604 | } |
20605 | } |
20606 | |
20607 | return false; |
20608 | } |
20609 | |
20610 | bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode, |
20611 | SDValue ConstNode) const { |
20612 | // Let the DAGCombiner decide for vectors. |
20613 | EVT VT = AddNode.getValueType(); |
20614 | if (VT.isVector()) |
20615 | return true; |
20616 | |
20617 | // Let the DAGCombiner decide for larger types. |
20618 | if (VT.getScalarSizeInBits() > Subtarget.getXLen()) |
20619 | return true; |
20620 | |
20621 | // It is worse if c1 is simm12 while c1*c2 is not. |
20622 | ConstantSDNode *C1Node = cast<ConstantSDNode>(Val: AddNode.getOperand(i: 1)); |
20623 | ConstantSDNode *C2Node = cast<ConstantSDNode>(Val&: ConstNode); |
20624 | const APInt &C1 = C1Node->getAPIntValue(); |
20625 | const APInt &C2 = C2Node->getAPIntValue(); |
20626 | if (C1.isSignedIntN(N: 12) && !(C1 * C2).isSignedIntN(N: 12)) |
20627 | return false; |
20628 | |
20629 | // Default to true and let the DAGCombiner decide. |
20630 | return true; |
20631 | } |
20632 | |
20633 | bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( |
20634 | EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, |
20635 | unsigned *Fast) const { |
20636 | if (!VT.isVector()) { |
20637 | if (Fast) |
20638 | *Fast = Subtarget.enableUnalignedScalarMem(); |
20639 | return Subtarget.enableUnalignedScalarMem(); |
20640 | } |
20641 | |
20642 | // All vector implementations must support element alignment |
20643 | EVT ElemVT = VT.getVectorElementType(); |
20644 | if (Alignment >= ElemVT.getStoreSize()) { |
20645 | if (Fast) |
20646 | *Fast = 1; |
20647 | return true; |
20648 | } |
20649 | |
20650 | // Note: We lower an unmasked unaligned vector access to an equally sized |
20651 | // e8 element type access. Given this, we effectively support all unmasked |
20652 | // misaligned accesses. TODO: Work through the codegen implications of |
20653 | // allowing such accesses to be formed, and considered fast. |
20654 | if (Fast) |
20655 | *Fast = Subtarget.enableUnalignedVectorMem(); |
20656 | return Subtarget.enableUnalignedVectorMem(); |
20657 | } |
20658 | |
20659 | |
20660 | EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op, |
20661 | const AttributeList &FuncAttributes) const { |
20662 | if (!Subtarget.hasVInstructions()) |
20663 | return MVT::Other; |
20664 | |
20665 | if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) |
20666 | return MVT::Other; |
20667 | |
20668 | // We use LMUL1 memory operations here for a non-obvious reason. Our caller |
20669 | // has an expansion threshold, and we want the number of hardware memory |
20670 | // operations to correspond roughly to that threshold. LMUL>1 operations |
20671 | // are typically expanded linearly internally, and thus correspond to more |
20672 | // than one actual memory operation. Note that store merging and load |
20673 | // combining will typically form larger LMUL operations from the LMUL1 |
20674 | // operations emitted here, and that's okay because combining isn't |
20675 | // introducing new memory operations; it's just merging existing ones. |
20676 | const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8; |
20677 | if (Op.size() < MinVLenInBytes) |
20678 | // TODO: Figure out short memops. For the moment, do the default thing |
20679 | // which ends up using scalar sequences. |
20680 | return MVT::Other; |
20681 | |
20682 | // Prefer i8 for non-zero memset as it allows us to avoid materializing |
20683 | // a large scalar constant and instead use vmv.v.x/i to do the |
20684 | // broadcast. For everything else, prefer ELenVT to minimize VL and thus |
20685 | // maximize the chance we can encode the size in the vsetvli. |
20686 | MVT ELenVT = MVT::getIntegerVT(BitWidth: Subtarget.getELen()); |
20687 | MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT; |
20688 | |
20689 | // Do we have sufficient alignment for our preferred VT? If not, revert |
20690 | // to largest size allowed by our alignment criteria. |
20691 | if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) { |
20692 | Align RequiredAlign(PreferredVT.getStoreSize()); |
20693 | if (Op.isFixedDstAlign()) |
20694 | RequiredAlign = std::min(a: RequiredAlign, b: Op.getDstAlign()); |
20695 | if (Op.isMemcpy()) |
20696 | RequiredAlign = std::min(a: RequiredAlign, b: Op.getSrcAlign()); |
20697 | PreferredVT = MVT::getIntegerVT(BitWidth: RequiredAlign.value() * 8); |
20698 | } |
20699 | return MVT::getVectorVT(VT: PreferredVT, NumElements: MinVLenInBytes/PreferredVT.getStoreSize()); |
20700 | } |
20701 | |
20702 | bool RISCVTargetLowering::splitValueIntoRegisterParts( |
20703 | SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, |
20704 | unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { |
20705 | bool IsABIRegCopy = CC.has_value(); |
20706 | EVT ValueVT = Val.getValueType(); |
20707 | if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && |
20708 | PartVT == MVT::f32) { |
20709 | // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float |
20710 | // nan, and cast to f32. |
20711 | Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val); |
20712 | Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val); |
20713 | Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val, |
20714 | DAG.getConstant(0xFFFF0000, DL, MVT::i32)); |
20715 | Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val); |
20716 | Parts[0] = Val; |
20717 | return true; |
20718 | } |
20719 | |
20720 | if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { |
20721 | LLVMContext &Context = *DAG.getContext(); |
20722 | EVT ValueEltVT = ValueVT.getVectorElementType(); |
20723 | EVT PartEltVT = PartVT.getVectorElementType(); |
20724 | unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue(); |
20725 | unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue(); |
20726 | if (PartVTBitSize % ValueVTBitSize == 0) { |
20727 | assert(PartVTBitSize >= ValueVTBitSize); |
20728 | // If the element types are different, bitcast to the same element type of |
20729 | // PartVT first. |
20730 | // Give an example here, we want copy a <vscale x 1 x i8> value to |
20731 | // <vscale x 4 x i16>. |
20732 | // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert |
20733 | // subvector, then we can bitcast to <vscale x 4 x i16>. |
20734 | if (ValueEltVT != PartEltVT) { |
20735 | if (PartVTBitSize > ValueVTBitSize) { |
20736 | unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits(); |
20737 | assert(Count != 0 && "The number of element should not be zero." ); |
20738 | EVT SameEltTypeVT = |
20739 | EVT::getVectorVT(Context, VT: ValueEltVT, NumElements: Count, /*IsScalable=*/true); |
20740 | Val = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: SameEltTypeVT, |
20741 | N1: DAG.getUNDEF(VT: SameEltTypeVT), N2: Val, |
20742 | N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
20743 | } |
20744 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Val); |
20745 | } else { |
20746 | Val = |
20747 | DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: PartVT, N1: DAG.getUNDEF(VT: PartVT), |
20748 | N2: Val, N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
20749 | } |
20750 | Parts[0] = Val; |
20751 | return true; |
20752 | } |
20753 | } |
20754 | return false; |
20755 | } |
20756 | |
20757 | SDValue RISCVTargetLowering::joinRegisterPartsIntoValue( |
20758 | SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, |
20759 | MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const { |
20760 | bool IsABIRegCopy = CC.has_value(); |
20761 | if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && |
20762 | PartVT == MVT::f32) { |
20763 | SDValue Val = Parts[0]; |
20764 | |
20765 | // Cast the f32 to i32, truncate to i16, and cast back to [b]f16. |
20766 | Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val); |
20767 | Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val); |
20768 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val); |
20769 | return Val; |
20770 | } |
20771 | |
20772 | if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { |
20773 | LLVMContext &Context = *DAG.getContext(); |
20774 | SDValue Val = Parts[0]; |
20775 | EVT ValueEltVT = ValueVT.getVectorElementType(); |
20776 | EVT PartEltVT = PartVT.getVectorElementType(); |
20777 | unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue(); |
20778 | unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue(); |
20779 | if (PartVTBitSize % ValueVTBitSize == 0) { |
20780 | assert(PartVTBitSize >= ValueVTBitSize); |
20781 | EVT SameEltTypeVT = ValueVT; |
20782 | // If the element types are different, convert it to the same element type |
20783 | // of PartVT. |
20784 | // Give an example here, we want copy a <vscale x 1 x i8> value from |
20785 | // <vscale x 4 x i16>. |
20786 | // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first, |
20787 | // then we can extract <vscale x 1 x i8>. |
20788 | if (ValueEltVT != PartEltVT) { |
20789 | unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits(); |
20790 | assert(Count != 0 && "The number of element should not be zero." ); |
20791 | SameEltTypeVT = |
20792 | EVT::getVectorVT(Context, VT: ValueEltVT, NumElements: Count, /*IsScalable=*/true); |
20793 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: SameEltTypeVT, Operand: Val); |
20794 | } |
20795 | Val = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ValueVT, N1: Val, |
20796 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
20797 | return Val; |
20798 | } |
20799 | } |
20800 | return SDValue(); |
20801 | } |
20802 | |
20803 | bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const { |
20804 | // When aggressively optimizing for code size, we prefer to use a div |
20805 | // instruction, as it is usually smaller than the alternative sequence. |
20806 | // TODO: Add vector division? |
20807 | bool OptSize = Attr.hasFnAttr(Attribute::MinSize); |
20808 | return OptSize && !VT.isVector(); |
20809 | } |
20810 | |
20811 | bool RISCVTargetLowering::preferScalarizeSplat(SDNode *N) const { |
20812 | // Scalarize zero_ext and sign_ext might stop match to widening instruction in |
20813 | // some situation. |
20814 | unsigned Opc = N->getOpcode(); |
20815 | if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND) |
20816 | return false; |
20817 | return true; |
20818 | } |
20819 | |
20820 | static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) { |
20821 | Module *M = IRB.GetInsertBlock()->getParent()->getParent(); |
20822 | Function *ThreadPointerFunc = |
20823 | Intrinsic::getDeclaration(M, Intrinsic::thread_pointer); |
20824 | return IRB.CreateConstGEP1_32(Ty: IRB.getInt8Ty(), |
20825 | Ptr: IRB.CreateCall(Callee: ThreadPointerFunc), Idx0: Offset); |
20826 | } |
20827 | |
20828 | Value *RISCVTargetLowering::getIRStackGuard(IRBuilderBase &IRB) const { |
20829 | // Fuchsia provides a fixed TLS slot for the stack cookie. |
20830 | // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value. |
20831 | if (Subtarget.isTargetFuchsia()) |
20832 | return useTpOffset(IRB, Offset: -0x10); |
20833 | |
20834 | return TargetLowering::getIRStackGuard(IRB); |
20835 | } |
20836 | |
20837 | bool RISCVTargetLowering::isLegalInterleavedAccessType( |
20838 | VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, |
20839 | const DataLayout &DL) const { |
20840 | EVT VT = getValueType(DL, Ty: VTy); |
20841 | // Don't lower vlseg/vsseg for vector types that can't be split. |
20842 | if (!isTypeLegal(VT)) |
20843 | return false; |
20844 | |
20845 | if (!isLegalElementTypeForRVV(ScalarTy: VT.getScalarType()) || |
20846 | !allowsMemoryAccessForAlignment(Context&: VTy->getContext(), DL, VT, AddrSpace, |
20847 | Alignment)) |
20848 | return false; |
20849 | |
20850 | MVT ContainerVT = VT.getSimpleVT(); |
20851 | |
20852 | if (auto *FVTy = dyn_cast<FixedVectorType>(Val: VTy)) { |
20853 | if (!Subtarget.useRVVForFixedLengthVectors()) |
20854 | return false; |
20855 | // Sometimes the interleaved access pass picks up splats as interleaves of |
20856 | // one element. Don't lower these. |
20857 | if (FVTy->getNumElements() < 2) |
20858 | return false; |
20859 | |
20860 | ContainerVT = getContainerForFixedLengthVector(VT: VT.getSimpleVT()); |
20861 | } |
20862 | |
20863 | // Need to make sure that EMUL * NFIELDS ≤ 8 |
20864 | auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(VLMUL: getLMUL(VT: ContainerVT)); |
20865 | if (Fractional) |
20866 | return true; |
20867 | return Factor * LMUL <= 8; |
20868 | } |
20869 | |
20870 | bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType, |
20871 | Align Alignment) const { |
20872 | if (!Subtarget.hasVInstructions()) |
20873 | return false; |
20874 | |
20875 | // Only support fixed vectors if we know the minimum vector size. |
20876 | if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors()) |
20877 | return false; |
20878 | |
20879 | EVT ScalarType = DataType.getScalarType(); |
20880 | if (!isLegalElementTypeForRVV(ScalarTy: ScalarType)) |
20881 | return false; |
20882 | |
20883 | if (!Subtarget.enableUnalignedVectorMem() && |
20884 | Alignment < ScalarType.getStoreSize()) |
20885 | return false; |
20886 | |
20887 | return true; |
20888 | } |
20889 | |
20890 | static const Intrinsic::ID FixedVlsegIntrIds[] = { |
20891 | Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load, |
20892 | Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load, |
20893 | Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load, |
20894 | Intrinsic::riscv_seg8_load}; |
20895 | |
20896 | /// Lower an interleaved load into a vlsegN intrinsic. |
20897 | /// |
20898 | /// E.g. Lower an interleaved load (Factor = 2): |
20899 | /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr |
20900 | /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements |
20901 | /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements |
20902 | /// |
20903 | /// Into: |
20904 | /// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64( |
20905 | /// %ptr, i64 4) |
20906 | /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0 |
20907 | /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1 |
20908 | bool RISCVTargetLowering::lowerInterleavedLoad( |
20909 | LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles, |
20910 | ArrayRef<unsigned> Indices, unsigned Factor) const { |
20911 | IRBuilder<> Builder(LI); |
20912 | |
20913 | auto *VTy = cast<FixedVectorType>(Val: Shuffles[0]->getType()); |
20914 | if (!isLegalInterleavedAccessType(VTy, Factor, Alignment: LI->getAlign(), |
20915 | AddrSpace: LI->getPointerAddressSpace(), |
20916 | DL: LI->getModule()->getDataLayout())) |
20917 | return false; |
20918 | |
20919 | auto *XLenTy = Type::getIntNTy(C&: LI->getContext(), N: Subtarget.getXLen()); |
20920 | |
20921 | Function *VlsegNFunc = |
20922 | Intrinsic::getDeclaration(M: LI->getModule(), id: FixedVlsegIntrIds[Factor - 2], |
20923 | Tys: {VTy, LI->getPointerOperandType(), XLenTy}); |
20924 | |
20925 | Value *VL = ConstantInt::get(Ty: XLenTy, V: VTy->getNumElements()); |
20926 | |
20927 | CallInst *VlsegN = |
20928 | Builder.CreateCall(Callee: VlsegNFunc, Args: {LI->getPointerOperand(), VL}); |
20929 | |
20930 | for (unsigned i = 0; i < Shuffles.size(); i++) { |
20931 | Value *SubVec = Builder.CreateExtractValue(Agg: VlsegN, Idxs: Indices[i]); |
20932 | Shuffles[i]->replaceAllUsesWith(V: SubVec); |
20933 | } |
20934 | |
20935 | return true; |
20936 | } |
20937 | |
20938 | static const Intrinsic::ID FixedVssegIntrIds[] = { |
20939 | Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store, |
20940 | Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store, |
20941 | Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store, |
20942 | Intrinsic::riscv_seg8_store}; |
20943 | |
20944 | /// Lower an interleaved store into a vssegN intrinsic. |
20945 | /// |
20946 | /// E.g. Lower an interleaved store (Factor = 3): |
20947 | /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1, |
20948 | /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> |
20949 | /// store <12 x i32> %i.vec, <12 x i32>* %ptr |
20950 | /// |
20951 | /// Into: |
20952 | /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3> |
20953 | /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7> |
20954 | /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11> |
20955 | /// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2, |
20956 | /// %ptr, i32 4) |
20957 | /// |
20958 | /// Note that the new shufflevectors will be removed and we'll only generate one |
20959 | /// vsseg3 instruction in CodeGen. |
20960 | bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI, |
20961 | ShuffleVectorInst *SVI, |
20962 | unsigned Factor) const { |
20963 | IRBuilder<> Builder(SI); |
20964 | auto *ShuffleVTy = cast<FixedVectorType>(Val: SVI->getType()); |
20965 | // Given SVI : <n*factor x ty>, then VTy : <n x ty> |
20966 | auto *VTy = FixedVectorType::get(ElementType: ShuffleVTy->getElementType(), |
20967 | NumElts: ShuffleVTy->getNumElements() / Factor); |
20968 | if (!isLegalInterleavedAccessType(VTy, Factor, Alignment: SI->getAlign(), |
20969 | AddrSpace: SI->getPointerAddressSpace(), |
20970 | DL: SI->getModule()->getDataLayout())) |
20971 | return false; |
20972 | |
20973 | auto *XLenTy = Type::getIntNTy(C&: SI->getContext(), N: Subtarget.getXLen()); |
20974 | |
20975 | Function *VssegNFunc = |
20976 | Intrinsic::getDeclaration(M: SI->getModule(), id: FixedVssegIntrIds[Factor - 2], |
20977 | Tys: {VTy, SI->getPointerOperandType(), XLenTy}); |
20978 | |
20979 | auto Mask = SVI->getShuffleMask(); |
20980 | SmallVector<Value *, 10> Ops; |
20981 | |
20982 | for (unsigned i = 0; i < Factor; i++) { |
20983 | Value *Shuffle = Builder.CreateShuffleVector( |
20984 | V1: SVI->getOperand(i_nocapture: 0), V2: SVI->getOperand(i_nocapture: 1), |
20985 | Mask: createSequentialMask(Start: Mask[i], NumInts: VTy->getNumElements(), NumUndefs: 0)); |
20986 | Ops.push_back(Elt: Shuffle); |
20987 | } |
20988 | // This VL should be OK (should be executable in one vsseg instruction, |
20989 | // potentially under larger LMULs) because we checked that the fixed vector |
20990 | // type fits in isLegalInterleavedAccessType |
20991 | Value *VL = ConstantInt::get(Ty: XLenTy, V: VTy->getNumElements()); |
20992 | Ops.append(IL: {SI->getPointerOperand(), VL}); |
20993 | |
20994 | Builder.CreateCall(Callee: VssegNFunc, Args: Ops); |
20995 | |
20996 | return true; |
20997 | } |
20998 | |
20999 | bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI, |
21000 | LoadInst *LI) const { |
21001 | assert(LI->isSimple()); |
21002 | IRBuilder<> Builder(LI); |
21003 | |
21004 | // Only deinterleave2 supported at present. |
21005 | if (DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2) |
21006 | return false; |
21007 | |
21008 | unsigned Factor = 2; |
21009 | |
21010 | VectorType *VTy = cast<VectorType>(Val: DI->getOperand(i_nocapture: 0)->getType()); |
21011 | VectorType *ResVTy = cast<VectorType>(Val: DI->getType()->getContainedType(i: 0)); |
21012 | |
21013 | if (!isLegalInterleavedAccessType(VTy: ResVTy, Factor, Alignment: LI->getAlign(), |
21014 | AddrSpace: LI->getPointerAddressSpace(), |
21015 | DL: LI->getModule()->getDataLayout())) |
21016 | return false; |
21017 | |
21018 | Function *VlsegNFunc; |
21019 | Value *VL; |
21020 | Type *XLenTy = Type::getIntNTy(C&: LI->getContext(), N: Subtarget.getXLen()); |
21021 | SmallVector<Value *, 10> Ops; |
21022 | |
21023 | if (auto *FVTy = dyn_cast<FixedVectorType>(Val: VTy)) { |
21024 | VlsegNFunc = Intrinsic::getDeclaration( |
21025 | M: LI->getModule(), id: FixedVlsegIntrIds[Factor - 2], |
21026 | Tys: {ResVTy, LI->getPointerOperandType(), XLenTy}); |
21027 | VL = ConstantInt::get(Ty: XLenTy, V: FVTy->getNumElements()); |
21028 | } else { |
21029 | static const Intrinsic::ID IntrIds[] = { |
21030 | Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3, |
21031 | Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5, |
21032 | Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7, |
21033 | Intrinsic::riscv_vlseg8}; |
21034 | |
21035 | VlsegNFunc = Intrinsic::getDeclaration(M: LI->getModule(), id: IntrIds[Factor - 2], |
21036 | Tys: {ResVTy, XLenTy}); |
21037 | VL = Constant::getAllOnesValue(Ty: XLenTy); |
21038 | Ops.append(NumInputs: Factor, Elt: PoisonValue::get(T: ResVTy)); |
21039 | } |
21040 | |
21041 | Ops.append(IL: {LI->getPointerOperand(), VL}); |
21042 | |
21043 | Value *Vlseg = Builder.CreateCall(Callee: VlsegNFunc, Args: Ops); |
21044 | DI->replaceAllUsesWith(V: Vlseg); |
21045 | |
21046 | return true; |
21047 | } |
21048 | |
21049 | bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II, |
21050 | StoreInst *SI) const { |
21051 | assert(SI->isSimple()); |
21052 | IRBuilder<> Builder(SI); |
21053 | |
21054 | // Only interleave2 supported at present. |
21055 | if (II->getIntrinsicID() != Intrinsic::experimental_vector_interleave2) |
21056 | return false; |
21057 | |
21058 | unsigned Factor = 2; |
21059 | |
21060 | VectorType *VTy = cast<VectorType>(Val: II->getType()); |
21061 | VectorType *InVTy = cast<VectorType>(Val: II->getOperand(i_nocapture: 0)->getType()); |
21062 | |
21063 | if (!isLegalInterleavedAccessType(VTy: InVTy, Factor, Alignment: SI->getAlign(), |
21064 | AddrSpace: SI->getPointerAddressSpace(), |
21065 | DL: SI->getModule()->getDataLayout())) |
21066 | return false; |
21067 | |
21068 | Function *VssegNFunc; |
21069 | Value *VL; |
21070 | Type *XLenTy = Type::getIntNTy(C&: SI->getContext(), N: Subtarget.getXLen()); |
21071 | |
21072 | if (auto *FVTy = dyn_cast<FixedVectorType>(Val: VTy)) { |
21073 | VssegNFunc = Intrinsic::getDeclaration( |
21074 | M: SI->getModule(), id: FixedVssegIntrIds[Factor - 2], |
21075 | Tys: {InVTy, SI->getPointerOperandType(), XLenTy}); |
21076 | VL = ConstantInt::get(Ty: XLenTy, V: FVTy->getNumElements()); |
21077 | } else { |
21078 | static const Intrinsic::ID IntrIds[] = { |
21079 | Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3, |
21080 | Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5, |
21081 | Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7, |
21082 | Intrinsic::riscv_vsseg8}; |
21083 | |
21084 | VssegNFunc = Intrinsic::getDeclaration(M: SI->getModule(), id: IntrIds[Factor - 2], |
21085 | Tys: {InVTy, XLenTy}); |
21086 | VL = Constant::getAllOnesValue(Ty: XLenTy); |
21087 | } |
21088 | |
21089 | Builder.CreateCall(Callee: VssegNFunc, Args: {II->getOperand(i_nocapture: 0), II->getOperand(i_nocapture: 1), |
21090 | SI->getPointerOperand(), VL}); |
21091 | |
21092 | return true; |
21093 | } |
21094 | |
21095 | MachineInstr * |
21096 | RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB, |
21097 | MachineBasicBlock::instr_iterator &MBBI, |
21098 | const TargetInstrInfo *TII) const { |
21099 | assert(MBBI->isCall() && MBBI->getCFIType() && |
21100 | "Invalid call instruction for a KCFI check" ); |
21101 | assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect}, |
21102 | MBBI->getOpcode())); |
21103 | |
21104 | MachineOperand &Target = MBBI->getOperand(i: 0); |
21105 | Target.setIsRenamable(false); |
21106 | |
21107 | return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK)) |
21108 | .addReg(Target.getReg()) |
21109 | .addImm(MBBI->getCFIType()) |
21110 | .getInstr(); |
21111 | } |
21112 | |
21113 | #define GET_REGISTER_MATCHER |
21114 | #include "RISCVGenAsmMatcher.inc" |
21115 | |
21116 | Register |
21117 | RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, |
21118 | const MachineFunction &MF) const { |
21119 | Register Reg = MatchRegisterAltName(RegName); |
21120 | if (Reg == RISCV::NoRegister) |
21121 | Reg = MatchRegisterName(RegName); |
21122 | if (Reg == RISCV::NoRegister) |
21123 | report_fatal_error( |
21124 | reason: Twine("Invalid register name \"" + StringRef(RegName) + "\"." )); |
21125 | BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); |
21126 | if (!ReservedRegs.test(Idx: Reg) && !Subtarget.isRegisterReservedByUser(i: Reg)) |
21127 | report_fatal_error(reason: Twine("Trying to obtain non-reserved register \"" + |
21128 | StringRef(RegName) + "\"." )); |
21129 | return Reg; |
21130 | } |
21131 | |
21132 | MachineMemOperand::Flags |
21133 | RISCVTargetLowering::getTargetMMOFlags(const Instruction &I) const { |
21134 | const MDNode *NontemporalInfo = I.getMetadata(KindID: LLVMContext::MD_nontemporal); |
21135 | |
21136 | if (NontemporalInfo == nullptr) |
21137 | return MachineMemOperand::MONone; |
21138 | |
21139 | // 1 for default value work as __RISCV_NTLH_ALL |
21140 | // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE |
21141 | // 3 -> __RISCV_NTLH_ALL_PRIVATE |
21142 | // 4 -> __RISCV_NTLH_INNERMOST_SHARED |
21143 | // 5 -> __RISCV_NTLH_ALL |
21144 | int NontemporalLevel = 5; |
21145 | const MDNode *RISCVNontemporalInfo = |
21146 | I.getMetadata(Kind: "riscv-nontemporal-domain" ); |
21147 | if (RISCVNontemporalInfo != nullptr) |
21148 | NontemporalLevel = |
21149 | cast<ConstantInt>( |
21150 | Val: cast<ConstantAsMetadata>(Val: RISCVNontemporalInfo->getOperand(I: 0)) |
21151 | ->getValue()) |
21152 | ->getZExtValue(); |
21153 | |
21154 | assert((1 <= NontemporalLevel && NontemporalLevel <= 5) && |
21155 | "RISC-V target doesn't support this non-temporal domain." ); |
21156 | |
21157 | NontemporalLevel -= 2; |
21158 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone; |
21159 | if (NontemporalLevel & 0b1) |
21160 | Flags |= MONontemporalBit0; |
21161 | if (NontemporalLevel & 0b10) |
21162 | Flags |= MONontemporalBit1; |
21163 | |
21164 | return Flags; |
21165 | } |
21166 | |
21167 | MachineMemOperand::Flags |
21168 | RISCVTargetLowering::getTargetMMOFlags(const MemSDNode &Node) const { |
21169 | |
21170 | MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags(); |
21171 | MachineMemOperand::Flags TargetFlags = MachineMemOperand::MONone; |
21172 | TargetFlags |= (NodeFlags & MONontemporalBit0); |
21173 | TargetFlags |= (NodeFlags & MONontemporalBit1); |
21174 | return TargetFlags; |
21175 | } |
21176 | |
21177 | bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable( |
21178 | const MemSDNode &NodeX, const MemSDNode &NodeY) const { |
21179 | return getTargetMMOFlags(Node: NodeX) == getTargetMMOFlags(Node: NodeY); |
21180 | } |
21181 | |
21182 | bool RISCVTargetLowering::isCtpopFast(EVT VT) const { |
21183 | if (VT.isScalableVector()) |
21184 | return isTypeLegal(VT) && Subtarget.hasStdExtZvbb(); |
21185 | if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb()) |
21186 | return true; |
21187 | return Subtarget.hasStdExtZbb() && |
21188 | (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector()); |
21189 | } |
21190 | |
21191 | unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT, |
21192 | ISD::CondCode Cond) const { |
21193 | return isCtpopFast(VT) ? 0 : 1; |
21194 | } |
21195 | |
21196 | bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const { |
21197 | |
21198 | // GISel support is in progress or complete for these opcodes. |
21199 | unsigned Op = Inst.getOpcode(); |
21200 | if (Op == Instruction::Add || Op == Instruction::Sub || |
21201 | Op == Instruction::And || Op == Instruction::Or || |
21202 | Op == Instruction::Xor || Op == Instruction::InsertElement || |
21203 | Op == Instruction::ShuffleVector || Op == Instruction::Load) |
21204 | return false; |
21205 | |
21206 | if (Inst.getType()->isScalableTy()) |
21207 | return true; |
21208 | |
21209 | for (unsigned i = 0; i < Inst.getNumOperands(); ++i) |
21210 | if (Inst.getOperand(i)->getType()->isScalableTy() && |
21211 | !isa<ReturnInst>(Val: &Inst)) |
21212 | return true; |
21213 | |
21214 | if (const AllocaInst *AI = dyn_cast<AllocaInst>(Val: &Inst)) { |
21215 | if (AI->getAllocatedType()->isScalableTy()) |
21216 | return true; |
21217 | } |
21218 | |
21219 | return false; |
21220 | } |
21221 | |
21222 | SDValue |
21223 | RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, |
21224 | SelectionDAG &DAG, |
21225 | SmallVectorImpl<SDNode *> &Created) const { |
21226 | AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); |
21227 | if (isIntDivCheap(VT: N->getValueType(ResNo: 0), Attr)) |
21228 | return SDValue(N, 0); // Lower SDIV as SDIV |
21229 | |
21230 | // Only perform this transform if short forward branch opt is supported. |
21231 | if (!Subtarget.hasShortForwardBranchOpt()) |
21232 | return SDValue(); |
21233 | EVT VT = N->getValueType(ResNo: 0); |
21234 | if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit()))) |
21235 | return SDValue(); |
21236 | |
21237 | // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw. |
21238 | if (Divisor.sgt(RHS: 2048) || Divisor.slt(RHS: -2048)) |
21239 | return SDValue(); |
21240 | return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created); |
21241 | } |
21242 | |
21243 | bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest( |
21244 | EVT VT, const APInt &AndMask) const { |
21245 | if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) |
21246 | return !Subtarget.hasStdExtZbs() && AndMask.ugt(RHS: 1024); |
21247 | return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask); |
21248 | } |
21249 | |
21250 | unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const { |
21251 | return Subtarget.getMinimumJumpTableEntries(); |
21252 | } |
21253 | |
21254 | // Handle single arg such as return value. |
21255 | template <typename Arg> |
21256 | void RVVArgDispatcher::constructArgInfos(ArrayRef<Arg> ArgList) { |
21257 | // This lambda determines whether an array of types are constructed by |
21258 | // homogeneous vector types. |
21259 | auto isHomogeneousScalableVectorType = [](ArrayRef<Arg> ArgList) { |
21260 | // First, extract the first element in the argument type. |
21261 | auto It = ArgList.begin(); |
21262 | MVT FirstArgRegType = It->VT; |
21263 | |
21264 | // Return if there is no return or the type needs split. |
21265 | if (It == ArgList.end() || It->Flags.isSplit()) |
21266 | return false; |
21267 | |
21268 | ++It; |
21269 | |
21270 | // Return if this argument type contains only 1 element, or it's not a |
21271 | // vector type. |
21272 | if (It == ArgList.end() || !FirstArgRegType.isScalableVector()) |
21273 | return false; |
21274 | |
21275 | // Second, check if the following elements in this argument type are all the |
21276 | // same. |
21277 | for (; It != ArgList.end(); ++It) |
21278 | if (It->Flags.isSplit() || It->VT != FirstArgRegType) |
21279 | return false; |
21280 | |
21281 | return true; |
21282 | }; |
21283 | |
21284 | if (isHomogeneousScalableVectorType(ArgList)) { |
21285 | // Handle as tuple type |
21286 | RVVArgInfos.push_back(Elt: {(unsigned)ArgList.size(), ArgList[0].VT, false}); |
21287 | } else { |
21288 | // Handle as normal vector type |
21289 | bool FirstVMaskAssigned = false; |
21290 | for (const auto &OutArg : ArgList) { |
21291 | MVT RegisterVT = OutArg.VT; |
21292 | |
21293 | // Skip non-RVV register type |
21294 | if (!RegisterVT.isVector()) |
21295 | continue; |
21296 | |
21297 | if (RegisterVT.isFixedLengthVector()) |
21298 | RegisterVT = TLI->getContainerForFixedLengthVector(VT: RegisterVT); |
21299 | |
21300 | if (!FirstVMaskAssigned && RegisterVT.getVectorElementType() == MVT::i1) { |
21301 | RVVArgInfos.push_back(Elt: {.NF: 1, .VT: RegisterVT, .FirstVMask: true}); |
21302 | FirstVMaskAssigned = true; |
21303 | continue; |
21304 | } |
21305 | |
21306 | RVVArgInfos.push_back(Elt: {.NF: 1, .VT: RegisterVT, .FirstVMask: false}); |
21307 | } |
21308 | } |
21309 | } |
21310 | |
21311 | // Handle multiple args. |
21312 | template <> |
21313 | void RVVArgDispatcher::constructArgInfos<Type *>(ArrayRef<Type *> TypeList) { |
21314 | const DataLayout &DL = MF->getDataLayout(); |
21315 | const Function &F = MF->getFunction(); |
21316 | LLVMContext &Context = F.getContext(); |
21317 | |
21318 | bool FirstVMaskAssigned = false; |
21319 | for (Type *Ty : TypeList) { |
21320 | StructType *STy = dyn_cast<StructType>(Val: Ty); |
21321 | if (STy && STy->containsHomogeneousScalableVectorTypes()) { |
21322 | Type *ElemTy = STy->getTypeAtIndex(N: 0U); |
21323 | EVT VT = TLI->getValueType(DL, Ty: ElemTy); |
21324 | MVT RegisterVT = |
21325 | TLI->getRegisterTypeForCallingConv(Context, CC: F.getCallingConv(), VT); |
21326 | unsigned NumRegs = |
21327 | TLI->getNumRegistersForCallingConv(Context, CC: F.getCallingConv(), VT); |
21328 | |
21329 | RVVArgInfos.push_back( |
21330 | Elt: {.NF: NumRegs * STy->getNumElements(), .VT: RegisterVT, .FirstVMask: false}); |
21331 | } else { |
21332 | SmallVector<EVT, 4> ValueVTs; |
21333 | ComputeValueVTs(TLI: *TLI, DL, Ty, ValueVTs); |
21334 | |
21335 | for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; |
21336 | ++Value) { |
21337 | EVT VT = ValueVTs[Value]; |
21338 | MVT RegisterVT = |
21339 | TLI->getRegisterTypeForCallingConv(Context, CC: F.getCallingConv(), VT); |
21340 | unsigned NumRegs = |
21341 | TLI->getNumRegistersForCallingConv(Context, CC: F.getCallingConv(), VT); |
21342 | |
21343 | // Skip non-RVV register type |
21344 | if (!RegisterVT.isVector()) |
21345 | continue; |
21346 | |
21347 | if (RegisterVT.isFixedLengthVector()) |
21348 | RegisterVT = TLI->getContainerForFixedLengthVector(VT: RegisterVT); |
21349 | |
21350 | if (!FirstVMaskAssigned && |
21351 | RegisterVT.getVectorElementType() == MVT::i1) { |
21352 | RVVArgInfos.push_back(Elt: {.NF: 1, .VT: RegisterVT, .FirstVMask: true}); |
21353 | FirstVMaskAssigned = true; |
21354 | --NumRegs; |
21355 | } |
21356 | |
21357 | RVVArgInfos.insert(I: RVVArgInfos.end(), NumToInsert: NumRegs, Elt: {.NF: 1, .VT: RegisterVT, .FirstVMask: false}); |
21358 | } |
21359 | } |
21360 | } |
21361 | } |
21362 | |
21363 | void RVVArgDispatcher::allocatePhysReg(unsigned NF, unsigned LMul, |
21364 | unsigned StartReg) { |
21365 | assert((StartReg % LMul) == 0 && |
21366 | "Start register number should be multiple of lmul" ); |
21367 | const MCPhysReg *VRArrays; |
21368 | switch (LMul) { |
21369 | default: |
21370 | report_fatal_error(reason: "Invalid lmul" ); |
21371 | case 1: |
21372 | VRArrays = ArgVRs; |
21373 | break; |
21374 | case 2: |
21375 | VRArrays = ArgVRM2s; |
21376 | break; |
21377 | case 4: |
21378 | VRArrays = ArgVRM4s; |
21379 | break; |
21380 | case 8: |
21381 | VRArrays = ArgVRM8s; |
21382 | break; |
21383 | } |
21384 | |
21385 | for (unsigned i = 0; i < NF; ++i) |
21386 | if (StartReg) |
21387 | AllocatedPhysRegs.push_back(Elt: VRArrays[(StartReg - 8) / LMul + i]); |
21388 | else |
21389 | AllocatedPhysRegs.push_back(Elt: MCPhysReg()); |
21390 | } |
21391 | |
21392 | /// This function determines if each RVV argument is passed by register, if the |
21393 | /// argument can be assigned to a VR, then give it a specific register. |
21394 | /// Otherwise, assign the argument to 0 which is a invalid MCPhysReg. |
21395 | void RVVArgDispatcher::compute() { |
21396 | uint32_t AssignedMap = 0; |
21397 | auto allocate = [&](const RVVArgInfo &ArgInfo) { |
21398 | // Allocate first vector mask argument to V0. |
21399 | if (ArgInfo.FirstVMask) { |
21400 | AllocatedPhysRegs.push_back(RISCV::V0); |
21401 | return; |
21402 | } |
21403 | |
21404 | unsigned RegsNeeded = divideCeil( |
21405 | Numerator: ArgInfo.VT.getSizeInBits().getKnownMinValue(), Denominator: RISCV::RVVBitsPerBlock); |
21406 | unsigned TotalRegsNeeded = ArgInfo.NF * RegsNeeded; |
21407 | for (unsigned StartReg = 0; StartReg + TotalRegsNeeded <= NumArgVRs; |
21408 | StartReg += RegsNeeded) { |
21409 | uint32_t Map = ((1 << TotalRegsNeeded) - 1) << StartReg; |
21410 | if ((AssignedMap & Map) == 0) { |
21411 | allocatePhysReg(NF: ArgInfo.NF, LMul: RegsNeeded, StartReg: StartReg + 8); |
21412 | AssignedMap |= Map; |
21413 | return; |
21414 | } |
21415 | } |
21416 | |
21417 | allocatePhysReg(NF: ArgInfo.NF, LMul: RegsNeeded, StartReg: 0); |
21418 | }; |
21419 | |
21420 | for (unsigned i = 0; i < RVVArgInfos.size(); ++i) |
21421 | allocate(RVVArgInfos[i]); |
21422 | } |
21423 | |
21424 | MCPhysReg RVVArgDispatcher::getNextPhysReg() { |
21425 | assert(CurIdx < AllocatedPhysRegs.size() && "Index out of range" ); |
21426 | return AllocatedPhysRegs[CurIdx++]; |
21427 | } |
21428 | |
21429 | namespace llvm::RISCVVIntrinsicsTable { |
21430 | |
21431 | #define GET_RISCVVIntrinsicsTable_IMPL |
21432 | #include "RISCVGenSearchableTables.inc" |
21433 | |
21434 | } // namespace llvm::RISCVVIntrinsicsTable |
21435 | |