1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "LoongArchISelLowering.h"
15#include "LoongArch.h"
16#include "LoongArchMachineFunctionInfo.h"
17#include "LoongArchRegisterInfo.h"
18#include "LoongArchSubtarget.h"
19#include "LoongArchTargetMachine.h"
20#include "MCTargetDesc/LoongArchBaseInfo.h"
21#include "MCTargetDesc/LoongArchMCTargetDesc.h"
22#include "llvm/ADT/Statistic.h"
23#include "llvm/ADT/StringExtras.h"
24#include "llvm/CodeGen/ISDOpcodes.h"
25#include "llvm/CodeGen/RuntimeLibcalls.h"
26#include "llvm/CodeGen/SelectionDAGNodes.h"
27#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/IntrinsicsLoongArch.h"
29#include "llvm/Support/CodeGen.h"
30#include "llvm/Support/Debug.h"
31#include "llvm/Support/ErrorHandling.h"
32#include "llvm/Support/KnownBits.h"
33#include "llvm/Support/MathExtras.h"
34
35using namespace llvm;
36
37#define DEBUG_TYPE "loongarch-isel-lowering"
38
39STATISTIC(NumTailCalls, "Number of tail calls");
40
41static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42 cl::desc("Trap on integer division by zero."),
43 cl::init(Val: false));
44
45LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
46 const LoongArchSubtarget &STI)
47 : TargetLowering(TM), Subtarget(STI) {
48
49 MVT GRLenVT = Subtarget.getGRLenVT();
50
51 // Set up the register classes.
52
53 addRegisterClass(VT: GRLenVT, RC: &LoongArch::GPRRegClass);
54 if (Subtarget.hasBasicF())
55 addRegisterClass(MVT::VT: f32, RC: &LoongArch::FPR32RegClass);
56 if (Subtarget.hasBasicD())
57 addRegisterClass(MVT::VT: f64, RC: &LoongArch::FPR64RegClass);
58
59 static const MVT::SimpleValueType LSXVTs[] = {
60 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61 static const MVT::SimpleValueType LASXVTs[] = {
62 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
63
64 if (Subtarget.hasExtLSX())
65 for (MVT VT : LSXVTs)
66 addRegisterClass(VT, &LoongArch::LSX128RegClass);
67
68 if (Subtarget.hasExtLASX())
69 for (MVT VT : LASXVTs)
70 addRegisterClass(VT, &LoongArch::LASX256RegClass);
71
72 // Set operations for LA32 and LA64.
73
74 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT,
75 MVT::i1, Promote);
76
77 setOperationAction(Op: ISD::SHL_PARTS, VT: GRLenVT, Action: Custom);
78 setOperationAction(Op: ISD::SRA_PARTS, VT: GRLenVT, Action: Custom);
79 setOperationAction(Op: ISD::SRL_PARTS, VT: GRLenVT, Action: Custom);
80 setOperationAction(Op: ISD::FP_TO_SINT, VT: GRLenVT, Action: Custom);
81 setOperationAction(Op: ISD::ROTL, VT: GRLenVT, Action: Expand);
82 setOperationAction(Op: ISD::CTPOP, VT: GRLenVT, Action: Expand);
83
84 setOperationAction(Ops: {ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
85 ISD::JumpTable, ISD::GlobalTLSAddress},
86 VT: GRLenVT, Action: Custom);
87
88 setOperationAction(Op: ISD::EH_DWARF_CFA, VT: GRLenVT, Action: Custom);
89
90 setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: GRLenVT, Action: Expand);
91 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
92 setOperationAction(ISD::VASTART, MVT::Other, Custom);
93 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
94
95 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
96 setOperationAction(ISD::TRAP, MVT::Other, Legal);
97
98 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
99 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
100 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
101
102 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
103 // we get to know which of sll and revb.2h is faster.
104 setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
105 setOperationAction(Op: ISD::BITREVERSE, VT: GRLenVT, Action: Legal);
106
107 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
108 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
109 // and i32 could still be byte-swapped relatively cheaply.
110 setOperationAction(ISD::BSWAP, MVT::i16, Custom);
111
112 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
113 setOperationAction(Op: ISD::BR_CC, VT: GRLenVT, Action: Expand);
114 setOperationAction(Op: ISD::SELECT_CC, VT: GRLenVT, Action: Expand);
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
116 setOperationAction(Ops: {ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT: GRLenVT, Action: Expand);
117
118 setOperationAction(Op: ISD::FP_TO_UINT, VT: GRLenVT, Action: Custom);
119 setOperationAction(Op: ISD::UINT_TO_FP, VT: GRLenVT, Action: Expand);
120
121 // Set operations for LA64 only.
122
123 if (Subtarget.is64Bit()) {
124 setOperationAction(ISD::SHL, MVT::i32, Custom);
125 setOperationAction(ISD::SRA, MVT::i32, Custom);
126 setOperationAction(ISD::SRL, MVT::i32, Custom);
127 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
128 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
129 setOperationAction(ISD::ROTR, MVT::i32, Custom);
130 setOperationAction(ISD::ROTL, MVT::i32, Custom);
131 setOperationAction(ISD::CTTZ, MVT::i32, Custom);
132 setOperationAction(ISD::CTLZ, MVT::i32, Custom);
133 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
134 setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom);
135 setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom);
136 setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
137 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
138 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
139
140 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
141 setOperationAction(ISD::BSWAP, MVT::i32, Custom);
142 }
143
144 // Set operations for LA32 only.
145
146 if (!Subtarget.is64Bit()) {
147 setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
148 setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
149 setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom);
150 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
151 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
152
153 // Set libcalls.
154 setLibcallName(Call: RTLIB::MUL_I128, Name: nullptr);
155 // The MULO libcall is not part of libgcc, only compiler-rt.
156 setLibcallName(Call: RTLIB::MULO_I64, Name: nullptr);
157 }
158
159 // The MULO libcall is not part of libgcc, only compiler-rt.
160 setLibcallName(Call: RTLIB::MULO_I128, Name: nullptr);
161
162 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
163
164 static const ISD::CondCode FPCCToExpand[] = {
165 ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
166 ISD::SETGE, ISD::SETNE, ISD::SETGT};
167
168 // Set operations for 'F' feature.
169
170 if (Subtarget.hasBasicF()) {
171 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
172
173 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
174 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
175 setOperationAction(ISD::FMA, MVT::f32, Legal);
176 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
177 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
178 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
179 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
180 setOperationAction(ISD::IS_FPCLASS, MVT::f32, Legal);
181 setOperationAction(ISD::FSIN, MVT::f32, Expand);
182 setOperationAction(ISD::FCOS, MVT::f32, Expand);
183 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
184 setOperationAction(ISD::FPOW, MVT::f32, Expand);
185 setOperationAction(ISD::FREM, MVT::f32, Expand);
186
187 if (Subtarget.is64Bit())
188 setOperationAction(ISD::FRINT, MVT::f32, Legal);
189
190 if (!Subtarget.hasBasicD()) {
191 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
192 if (Subtarget.is64Bit()) {
193 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
194 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
195 }
196 }
197 }
198
199 // Set operations for 'D' feature.
200
201 if (Subtarget.hasBasicD()) {
202 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
203 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
204 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
205
206 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
207 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
208 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
209 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
210 setOperationAction(ISD::FMA, MVT::f64, Legal);
211 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
212 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
213 setOperationAction(ISD::IS_FPCLASS, MVT::f64, Legal);
214 setOperationAction(ISD::FSIN, MVT::f64, Expand);
215 setOperationAction(ISD::FCOS, MVT::f64, Expand);
216 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
217 setOperationAction(ISD::FPOW, MVT::f64, Expand);
218 setOperationAction(ISD::FREM, MVT::f64, Expand);
219
220 if (Subtarget.is64Bit())
221 setOperationAction(ISD::FRINT, MVT::f64, Legal);
222 }
223
224 // Set operations for 'LSX' feature.
225
226 if (Subtarget.hasExtLSX()) {
227 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
228 // Expand all truncating stores and extending loads.
229 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
230 setTruncStoreAction(VT, InnerVT, Expand);
231 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
232 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
233 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
234 }
235 // By default everything must be expanded. Then we will selectively turn
236 // on ones that can be effectively codegen'd.
237 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
238 setOperationAction(Op, VT, Expand);
239 }
240
241 for (MVT VT : LSXVTs) {
242 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
243 setOperationAction(ISD::BITCAST, VT, Legal);
244 setOperationAction(ISD::UNDEF, VT, Legal);
245
246 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
247 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
248 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
249
250 setOperationAction(ISD::SETCC, VT, Legal);
251 setOperationAction(ISD::VSELECT, VT, Legal);
252 }
253 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
254 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
255 setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
256 setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
257 Legal);
258 setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
259 VT, Legal);
260 setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
261 setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
262 setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
263 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
264 setCondCodeAction(
265 {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
266 Expand);
267 }
268 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
269 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
270 setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);
271 }
272 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
273 setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
274 setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
275 setOperationAction(ISD::FMA, VT, Legal);
276 setOperationAction(ISD::FSQRT, VT, Legal);
277 setOperationAction(ISD::FNEG, VT, Legal);
278 setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
279 ISD::SETUGE, ISD::SETUGT},
280 VT, Expand);
281 }
282 }
283
284 // Set operations for 'LASX' feature.
285
286 if (Subtarget.hasExtLASX()) {
287 for (MVT VT : LASXVTs) {
288 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
289 setOperationAction(ISD::BITCAST, VT, Legal);
290 setOperationAction(ISD::UNDEF, VT, Legal);
291
292 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
293 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
294 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
295
296 setOperationAction(ISD::SETCC, VT, Legal);
297 setOperationAction(ISD::VSELECT, VT, Legal);
298 }
299 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
300 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
301 setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
302 setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
303 Legal);
304 setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
305 VT, Legal);
306 setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
307 setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
308 setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
309 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
310 setCondCodeAction(
311 {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
312 Expand);
313 }
314 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
315 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
316 setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);
317 }
318 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
319 setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
320 setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
321 setOperationAction(ISD::FMA, VT, Legal);
322 setOperationAction(ISD::FSQRT, VT, Legal);
323 setOperationAction(ISD::FNEG, VT, Legal);
324 setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
325 ISD::SETUGE, ISD::SETUGT},
326 VT, Expand);
327 }
328 }
329
330 // Set DAG combine for LA32 and LA64.
331
332 setTargetDAGCombine(ISD::AND);
333 setTargetDAGCombine(ISD::OR);
334 setTargetDAGCombine(ISD::SRL);
335
336 // Set DAG combine for 'LSX' feature.
337
338 if (Subtarget.hasExtLSX())
339 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
340
341 // Compute derived properties from the register classes.
342 computeRegisterProperties(Subtarget.getRegisterInfo());
343
344 setStackPointerRegisterToSaveRestore(LoongArch::R3);
345
346 setBooleanContents(ZeroOrOneBooleanContent);
347 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
348
349 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
350
351 setMinCmpXchgSizeInBits(32);
352
353 // Function alignments.
354 setMinFunctionAlignment(Align(4));
355 // Set preferred alignments.
356 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
357 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
358 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
359}
360
361bool LoongArchTargetLowering::isOffsetFoldingLegal(
362 const GlobalAddressSDNode *GA) const {
363 // In order to maximise the opportunity for common subexpression elimination,
364 // keep a separate ADD node for the global address offset instead of folding
365 // it in the global address node. Later peephole optimisations may choose to
366 // fold it back in when profitable.
367 return false;
368}
369
370SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
371 SelectionDAG &DAG) const {
372 switch (Op.getOpcode()) {
373 case ISD::ATOMIC_FENCE:
374 return lowerATOMIC_FENCE(Op, DAG);
375 case ISD::EH_DWARF_CFA:
376 return lowerEH_DWARF_CFA(Op, DAG);
377 case ISD::GlobalAddress:
378 return lowerGlobalAddress(Op, DAG);
379 case ISD::GlobalTLSAddress:
380 return lowerGlobalTLSAddress(Op, DAG);
381 case ISD::INTRINSIC_WO_CHAIN:
382 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
383 case ISD::INTRINSIC_W_CHAIN:
384 return lowerINTRINSIC_W_CHAIN(Op, DAG);
385 case ISD::INTRINSIC_VOID:
386 return lowerINTRINSIC_VOID(Op, DAG);
387 case ISD::BlockAddress:
388 return lowerBlockAddress(Op, DAG);
389 case ISD::JumpTable:
390 return lowerJumpTable(Op, DAG);
391 case ISD::SHL_PARTS:
392 return lowerShiftLeftParts(Op, DAG);
393 case ISD::SRA_PARTS:
394 return lowerShiftRightParts(Op, DAG, IsSRA: true);
395 case ISD::SRL_PARTS:
396 return lowerShiftRightParts(Op, DAG, IsSRA: false);
397 case ISD::ConstantPool:
398 return lowerConstantPool(Op, DAG);
399 case ISD::FP_TO_SINT:
400 return lowerFP_TO_SINT(Op, DAG);
401 case ISD::BITCAST:
402 return lowerBITCAST(Op, DAG);
403 case ISD::UINT_TO_FP:
404 return lowerUINT_TO_FP(Op, DAG);
405 case ISD::SINT_TO_FP:
406 return lowerSINT_TO_FP(Op, DAG);
407 case ISD::VASTART:
408 return lowerVASTART(Op, DAG);
409 case ISD::FRAMEADDR:
410 return lowerFRAMEADDR(Op, DAG);
411 case ISD::RETURNADDR:
412 return lowerRETURNADDR(Op, DAG);
413 case ISD::WRITE_REGISTER:
414 return lowerWRITE_REGISTER(Op, DAG);
415 case ISD::INSERT_VECTOR_ELT:
416 return lowerINSERT_VECTOR_ELT(Op, DAG);
417 case ISD::EXTRACT_VECTOR_ELT:
418 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
419 case ISD::BUILD_VECTOR:
420 return lowerBUILD_VECTOR(Op, DAG);
421 case ISD::VECTOR_SHUFFLE:
422 return lowerVECTOR_SHUFFLE(Op, DAG);
423 }
424 return SDValue();
425}
426
427SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
428 SelectionDAG &DAG) const {
429 // TODO: custom shuffle.
430 return SDValue();
431}
432
433static bool isConstantOrUndef(const SDValue Op) {
434 if (Op->isUndef())
435 return true;
436 if (isa<ConstantSDNode>(Val: Op))
437 return true;
438 if (isa<ConstantFPSDNode>(Val: Op))
439 return true;
440 return false;
441}
442
443static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
444 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
445 if (isConstantOrUndef(Op: Op->getOperand(Num: i)))
446 return true;
447 return false;
448}
449
450SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
451 SelectionDAG &DAG) const {
452 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Val&: Op);
453 EVT ResTy = Op->getValueType(ResNo: 0);
454 SDLoc DL(Op);
455 APInt SplatValue, SplatUndef;
456 unsigned SplatBitSize;
457 bool HasAnyUndefs;
458 bool Is128Vec = ResTy.is128BitVector();
459 bool Is256Vec = ResTy.is256BitVector();
460
461 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
462 (!Subtarget.hasExtLASX() || !Is256Vec))
463 return SDValue();
464
465 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
466 /*MinSplatBits=*/8) &&
467 SplatBitSize <= 64) {
468 // We can only cope with 8, 16, 32, or 64-bit elements.
469 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
470 SplatBitSize != 64)
471 return SDValue();
472
473 EVT ViaVecTy;
474
475 switch (SplatBitSize) {
476 default:
477 return SDValue();
478 case 8:
479 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
480 break;
481 case 16:
482 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
483 break;
484 case 32:
485 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
486 break;
487 case 64:
488 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
489 break;
490 }
491
492 // SelectionDAG::getConstant will promote SplatValue appropriately.
493 SDValue Result = DAG.getConstant(Val: SplatValue, DL, VT: ViaVecTy);
494
495 // Bitcast to the type we originally wanted.
496 if (ViaVecTy != ResTy)
497 Result = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(Node), VT: ResTy, Operand: Result);
498
499 return Result;
500 }
501
502 if (DAG.isSplatValue(V: Op, /*AllowUndefs=*/false))
503 return Op;
504
505 if (!isConstantOrUndefBUILD_VECTOR(Op: Node)) {
506 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
507 // The resulting code is the same length as the expansion, but it doesn't
508 // use memory operations.
509 EVT ResTy = Node->getValueType(ResNo: 0);
510
511 assert(ResTy.isVector());
512
513 unsigned NumElts = ResTy.getVectorNumElements();
514 SDValue Vector = DAG.getUNDEF(VT: ResTy);
515 for (unsigned i = 0; i < NumElts; ++i) {
516 Vector = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ResTy, N1: Vector,
517 N2: Node->getOperand(Num: i),
518 N3: DAG.getConstant(Val: i, DL, VT: Subtarget.getGRLenVT()));
519 }
520 return Vector;
521 }
522
523 return SDValue();
524}
525
526SDValue
527LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
528 SelectionDAG &DAG) const {
529 EVT VecTy = Op->getOperand(Num: 0)->getValueType(ResNo: 0);
530 SDValue Idx = Op->getOperand(Num: 1);
531 EVT EltTy = VecTy.getVectorElementType();
532 unsigned NumElts = VecTy.getVectorNumElements();
533
534 if (isa<ConstantSDNode>(Idx) &&
535 (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
536 EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
537 return Op;
538
539 return SDValue();
540}
541
542SDValue
543LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
544 SelectionDAG &DAG) const {
545 if (isa<ConstantSDNode>(Val: Op->getOperand(Num: 2)))
546 return Op;
547 return SDValue();
548}
549
550SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
551 SelectionDAG &DAG) const {
552 SDLoc DL(Op);
553 SyncScope::ID FenceSSID =
554 static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: 2));
555
556 // singlethread fences only synchronize with signal handlers on the same
557 // thread and thus only need to preserve instruction order, not actually
558 // enforce memory ordering.
559 if (FenceSSID == SyncScope::SingleThread)
560 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
561 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
562
563 return Op;
564}
565
566SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
567 SelectionDAG &DAG) const {
568
569 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
570 DAG.getContext()->emitError(
571 ErrorStr: "On LA64, only 64-bit registers can be written.");
572 return Op.getOperand(i: 0);
573 }
574
575 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
576 DAG.getContext()->emitError(
577 ErrorStr: "On LA32, only 32-bit registers can be written.");
578 return Op.getOperand(i: 0);
579 }
580
581 return Op;
582}
583
584SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
585 SelectionDAG &DAG) const {
586 if (!isa<ConstantSDNode>(Val: Op.getOperand(i: 0))) {
587 DAG.getContext()->emitError(ErrorStr: "argument to '__builtin_frame_address' must "
588 "be a constant integer");
589 return SDValue();
590 }
591
592 MachineFunction &MF = DAG.getMachineFunction();
593 MF.getFrameInfo().setFrameAddressIsTaken(true);
594 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
595 EVT VT = Op.getValueType();
596 SDLoc DL(Op);
597 SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: FrameReg, VT);
598 unsigned Depth = Op.getConstantOperandVal(i: 0);
599 int GRLenInBytes = Subtarget.getGRLen() / 8;
600
601 while (Depth--) {
602 int Offset = -(GRLenInBytes * 2);
603 SDValue Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr,
604 N2: DAG.getIntPtrConstant(Val: Offset, DL));
605 FrameAddr =
606 DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(), Ptr, PtrInfo: MachinePointerInfo());
607 }
608 return FrameAddr;
609}
610
611SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
612 SelectionDAG &DAG) const {
613 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
614 return SDValue();
615
616 // Currently only support lowering return address for current frame.
617 if (Op.getConstantOperandVal(i: 0) != 0) {
618 DAG.getContext()->emitError(
619 ErrorStr: "return address can only be determined for the current frame");
620 return SDValue();
621 }
622
623 MachineFunction &MF = DAG.getMachineFunction();
624 MF.getFrameInfo().setReturnAddressIsTaken(true);
625 MVT GRLenVT = Subtarget.getGRLenVT();
626
627 // Return the value of the return address register, marking it an implicit
628 // live-in.
629 Register Reg = MF.addLiveIn(PReg: Subtarget.getRegisterInfo()->getRARegister(),
630 RC: getRegClassFor(VT: GRLenVT));
631 return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: SDLoc(Op), Reg, VT: GRLenVT);
632}
633
634SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
635 SelectionDAG &DAG) const {
636 MachineFunction &MF = DAG.getMachineFunction();
637 auto Size = Subtarget.getGRLen() / 8;
638 auto FI = MF.getFrameInfo().CreateFixedObject(Size, SPOffset: 0, IsImmutable: false);
639 return DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
640}
641
642SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
643 SelectionDAG &DAG) const {
644 MachineFunction &MF = DAG.getMachineFunction();
645 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
646
647 SDLoc DL(Op);
648 SDValue FI = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(),
649 VT: getPointerTy(DL: MF.getDataLayout()));
650
651 // vastart just stores the address of the VarArgsFrameIndex slot into the
652 // memory location argument.
653 const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue();
654 return DAG.getStore(Chain: Op.getOperand(i: 0), dl: DL, Val: FI, Ptr: Op.getOperand(i: 1),
655 PtrInfo: MachinePointerInfo(SV));
656}
657
658SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
659 SelectionDAG &DAG) const {
660 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
661 !Subtarget.hasBasicD() && "unexpected target features");
662
663 SDLoc DL(Op);
664 SDValue Op0 = Op.getOperand(i: 0);
665 if (Op0->getOpcode() == ISD::AND) {
666 auto *C = dyn_cast<ConstantSDNode>(Val: Op0.getOperand(i: 1));
667 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
668 return Op;
669 }
670
671 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
672 Op0.getConstantOperandVal(i: 1) < UINT64_C(0X1F) &&
673 Op0.getConstantOperandVal(i: 2) == UINT64_C(0))
674 return Op;
675
676 if (Op0.getOpcode() == ISD::AssertZext &&
677 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
678 return Op;
679
680 EVT OpVT = Op0.getValueType();
681 EVT RetVT = Op.getValueType();
682 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
683 MakeLibCallOptions CallOptions;
684 CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT, Value: true);
685 SDValue Chain = SDValue();
686 SDValue Result;
687 std::tie(args&: Result, args&: Chain) =
688 makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain);
689 return Result;
690}
691
692SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
693 SelectionDAG &DAG) const {
694 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
695 !Subtarget.hasBasicD() && "unexpected target features");
696
697 SDLoc DL(Op);
698 SDValue Op0 = Op.getOperand(i: 0);
699
700 if ((Op0.getOpcode() == ISD::AssertSext ||
701 Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) &&
702 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
703 return Op;
704
705 EVT OpVT = Op0.getValueType();
706 EVT RetVT = Op.getValueType();
707 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
708 MakeLibCallOptions CallOptions;
709 CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT, Value: true);
710 SDValue Chain = SDValue();
711 SDValue Result;
712 std::tie(args&: Result, args&: Chain) =
713 makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain);
714 return Result;
715}
716
717SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
718 SelectionDAG &DAG) const {
719
720 SDLoc DL(Op);
721 SDValue Op0 = Op.getOperand(i: 0);
722
723 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
724 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
725 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
726 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
727 }
728 return Op;
729}
730
731SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
732 SelectionDAG &DAG) const {
733
734 SDLoc DL(Op);
735
736 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
737 !Subtarget.hasBasicD()) {
738 SDValue Dst =
739 DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0));
740 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
741 }
742
743 EVT FPTy = EVT::getFloatingPointVT(BitWidth: Op.getValueSizeInBits());
744 SDValue Trunc = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FPTy, Operand: Op.getOperand(i: 0));
745 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Op.getValueType(), Operand: Trunc);
746}
747
748static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
749 SelectionDAG &DAG, unsigned Flags) {
750 return DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: Flags);
751}
752
753static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
754 SelectionDAG &DAG, unsigned Flags) {
755 return DAG.getTargetBlockAddress(BA: N->getBlockAddress(), VT: Ty, Offset: N->getOffset(),
756 TargetFlags: Flags);
757}
758
759static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
760 SelectionDAG &DAG, unsigned Flags) {
761 return DAG.getTargetConstantPool(C: N->getConstVal(), VT: Ty, Align: N->getAlign(),
762 Offset: N->getOffset(), TargetFlags: Flags);
763}
764
765static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
766 SelectionDAG &DAG, unsigned Flags) {
767 return DAG.getTargetJumpTable(JTI: N->getIndex(), VT: Ty, TargetFlags: Flags);
768}
769
770template <class NodeTy>
771SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
772 CodeModel::Model M,
773 bool IsLocal) const {
774 SDLoc DL(N);
775 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
776 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
777
778 switch (M) {
779 default:
780 report_fatal_error(reason: "Unsupported code model");
781
782 case CodeModel::Large: {
783 assert(Subtarget.is64Bit() && "Large code model requires LA64");
784
785 // This is not actually used, but is necessary for successfully matching
786 // the PseudoLA_*_LARGE nodes.
787 SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty);
788 if (IsLocal)
789 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
790 // eventually becomes the desired 5-insn code sequence.
791 return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
792 Tmp, Addr),
793 0);
794
795 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that eventually
796 // becomes the desired 5-insn code sequence.
797 return SDValue(
798 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
799 0);
800 }
801
802 case CodeModel::Small:
803 case CodeModel::Medium:
804 if (IsLocal)
805 // This generates the pattern (PseudoLA_PCREL sym), which expands to
806 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
807 return SDValue(
808 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
809
810 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
811 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
812 return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr),
813 0);
814 }
815}
816
817SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
818 SelectionDAG &DAG) const {
819 return getAddr(N: cast<BlockAddressSDNode>(Val&: Op), DAG,
820 M: DAG.getTarget().getCodeModel());
821}
822
823SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
824 SelectionDAG &DAG) const {
825 return getAddr(N: cast<JumpTableSDNode>(Val&: Op), DAG,
826 M: DAG.getTarget().getCodeModel());
827}
828
829SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
830 SelectionDAG &DAG) const {
831 return getAddr(N: cast<ConstantPoolSDNode>(Val&: Op), DAG,
832 M: DAG.getTarget().getCodeModel());
833}
834
835SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
836 SelectionDAG &DAG) const {
837 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
838 assert(N->getOffset() == 0 && "unexpected offset in global node");
839 auto CM = DAG.getTarget().getCodeModel();
840 const GlobalValue *GV = N->getGlobal();
841
842 if (GV->isDSOLocal() && isa<GlobalVariable>(Val: GV)) {
843 if (auto GCM = dyn_cast<GlobalVariable>(Val: GV)->getCodeModel())
844 CM = *GCM;
845 }
846
847 return getAddr(N, DAG, M: CM, IsLocal: GV->isDSOLocal());
848}
849
850SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
851 SelectionDAG &DAG,
852 unsigned Opc,
853 bool Large) const {
854 SDLoc DL(N);
855 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
856 MVT GRLenVT = Subtarget.getGRLenVT();
857
858 // This is not actually used, but is necessary for successfully matching the
859 // PseudoLA_*_LARGE nodes.
860 SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty);
861 SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: 0);
862 SDValue Offset = Large
863 ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0)
864 : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0);
865
866 // Add the thread pointer.
867 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
868 DAG.getRegister(LoongArch::R2, GRLenVT));
869}
870
871SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
872 SelectionDAG &DAG,
873 unsigned Opc,
874 bool Large) const {
875 SDLoc DL(N);
876 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
877 IntegerType *CallTy = Type::getIntNTy(C&: *DAG.getContext(), N: Ty.getSizeInBits());
878
879 // This is not actually used, but is necessary for successfully matching the
880 // PseudoLA_*_LARGE nodes.
881 SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty);
882
883 // Use a PC-relative addressing mode to access the dynamic GOT address.
884 SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: 0);
885 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0)
886 : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0);
887
888 // Prepare argument list to generate call.
889 ArgListTy Args;
890 ArgListEntry Entry;
891 Entry.Node = Load;
892 Entry.Ty = CallTy;
893 Args.push_back(x: Entry);
894
895 // Setup call to __tls_get_addr.
896 TargetLowering::CallLoweringInfo CLI(DAG);
897 CLI.setDebugLoc(DL)
898 .setChain(DAG.getEntryNode())
899 .setLibCallee(CC: CallingConv::C, ResultType: CallTy,
900 Target: DAG.getExternalSymbol(Sym: "__tls_get_addr", VT: Ty),
901 ArgsList: std::move(Args));
902
903 return LowerCallTo(CLI).first;
904}
905
906SDValue
907LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
908 SelectionDAG &DAG) const {
909 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
910 CallingConv::GHC)
911 report_fatal_error(reason: "In GHC calling convention TLS is not supported");
912
913 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
914 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
915
916 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
917 assert(N->getOffset() == 0 && "unexpected offset in global node");
918
919 SDValue Addr;
920 switch (getTargetMachine().getTLSModel(GV: N->getGlobal())) {
921 case TLSModel::GeneralDynamic:
922 // In this model, application code calls the dynamic linker function
923 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
924 // runtime.
925 Addr = getDynamicTLSAddr(N, DAG,
926 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
927 : LoongArch::PseudoLA_TLS_GD,
928 Large);
929 break;
930 case TLSModel::LocalDynamic:
931 // Same as GeneralDynamic, except for assembly modifiers and relocation
932 // records.
933 Addr = getDynamicTLSAddr(N, DAG,
934 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
935 : LoongArch::PseudoLA_TLS_LD,
936 Large);
937 break;
938 case TLSModel::InitialExec:
939 // This model uses the GOT to resolve TLS offsets.
940 Addr = getStaticTLSAddr(N, DAG,
941 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
942 : LoongArch::PseudoLA_TLS_IE,
943 Large);
944 break;
945 case TLSModel::LocalExec:
946 // This model is used when static linking as the TLS offsets are resolved
947 // during program linking.
948 //
949 // This node doesn't need an extra argument for the large code model.
950 Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE);
951 break;
952 }
953
954 return Addr;
955}
956
957template <unsigned N>
958static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
959 SelectionDAG &DAG, bool IsSigned = false) {
960 auto *CImm = cast<ConstantSDNode>(Val: Op->getOperand(Num: ImmOp));
961 // Check the ImmArg.
962 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
963 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
964 DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) +
965 ": argument out of range.");
966 return DAG.getNode(Opcode: ISD::UNDEF, DL: SDLoc(Op), VT: Op.getValueType());
967 }
968 return SDValue();
969}
970
971SDValue
972LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
973 SelectionDAG &DAG) const {
974 SDLoc DL(Op);
975 switch (Op.getConstantOperandVal(i: 0)) {
976 default:
977 return SDValue(); // Don't custom lower most intrinsics.
978 case Intrinsic::thread_pointer: {
979 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
980 return DAG.getRegister(LoongArch::R2, PtrVT);
981 }
982 case Intrinsic::loongarch_lsx_vpickve2gr_d:
983 case Intrinsic::loongarch_lsx_vpickve2gr_du:
984 case Intrinsic::loongarch_lsx_vreplvei_d:
985 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
986 return checkIntrinsicImmArg<1>(Op, ImmOp: 2, DAG);
987 case Intrinsic::loongarch_lsx_vreplvei_w:
988 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
989 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
990 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
991 case Intrinsic::loongarch_lasx_xvpickve_d:
992 case Intrinsic::loongarch_lasx_xvpickve_d_f:
993 return checkIntrinsicImmArg<2>(Op, ImmOp: 2, DAG);
994 case Intrinsic::loongarch_lasx_xvinsve0_d:
995 return checkIntrinsicImmArg<2>(Op, ImmOp: 3, DAG);
996 case Intrinsic::loongarch_lsx_vsat_b:
997 case Intrinsic::loongarch_lsx_vsat_bu:
998 case Intrinsic::loongarch_lsx_vrotri_b:
999 case Intrinsic::loongarch_lsx_vsllwil_h_b:
1000 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
1001 case Intrinsic::loongarch_lsx_vsrlri_b:
1002 case Intrinsic::loongarch_lsx_vsrari_b:
1003 case Intrinsic::loongarch_lsx_vreplvei_h:
1004 case Intrinsic::loongarch_lasx_xvsat_b:
1005 case Intrinsic::loongarch_lasx_xvsat_bu:
1006 case Intrinsic::loongarch_lasx_xvrotri_b:
1007 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
1008 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
1009 case Intrinsic::loongarch_lasx_xvsrlri_b:
1010 case Intrinsic::loongarch_lasx_xvsrari_b:
1011 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
1012 case Intrinsic::loongarch_lasx_xvpickve_w:
1013 case Intrinsic::loongarch_lasx_xvpickve_w_f:
1014 return checkIntrinsicImmArg<3>(Op, ImmOp: 2, DAG);
1015 case Intrinsic::loongarch_lasx_xvinsve0_w:
1016 return checkIntrinsicImmArg<3>(Op, ImmOp: 3, DAG);
1017 case Intrinsic::loongarch_lsx_vsat_h:
1018 case Intrinsic::loongarch_lsx_vsat_hu:
1019 case Intrinsic::loongarch_lsx_vrotri_h:
1020 case Intrinsic::loongarch_lsx_vsllwil_w_h:
1021 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
1022 case Intrinsic::loongarch_lsx_vsrlri_h:
1023 case Intrinsic::loongarch_lsx_vsrari_h:
1024 case Intrinsic::loongarch_lsx_vreplvei_b:
1025 case Intrinsic::loongarch_lasx_xvsat_h:
1026 case Intrinsic::loongarch_lasx_xvsat_hu:
1027 case Intrinsic::loongarch_lasx_xvrotri_h:
1028 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
1029 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
1030 case Intrinsic::loongarch_lasx_xvsrlri_h:
1031 case Intrinsic::loongarch_lasx_xvsrari_h:
1032 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
1033 return checkIntrinsicImmArg<4>(Op, ImmOp: 2, DAG);
1034 case Intrinsic::loongarch_lsx_vsrlni_b_h:
1035 case Intrinsic::loongarch_lsx_vsrani_b_h:
1036 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
1037 case Intrinsic::loongarch_lsx_vsrarni_b_h:
1038 case Intrinsic::loongarch_lsx_vssrlni_b_h:
1039 case Intrinsic::loongarch_lsx_vssrani_b_h:
1040 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
1041 case Intrinsic::loongarch_lsx_vssrani_bu_h:
1042 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
1043 case Intrinsic::loongarch_lsx_vssrarni_b_h:
1044 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
1045 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
1046 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
1047 case Intrinsic::loongarch_lasx_xvsrani_b_h:
1048 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
1049 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
1050 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
1051 case Intrinsic::loongarch_lasx_xvssrani_b_h:
1052 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
1053 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
1054 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
1055 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
1056 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
1057 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
1058 return checkIntrinsicImmArg<4>(Op, ImmOp: 3, DAG);
1059 case Intrinsic::loongarch_lsx_vsat_w:
1060 case Intrinsic::loongarch_lsx_vsat_wu:
1061 case Intrinsic::loongarch_lsx_vrotri_w:
1062 case Intrinsic::loongarch_lsx_vsllwil_d_w:
1063 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
1064 case Intrinsic::loongarch_lsx_vsrlri_w:
1065 case Intrinsic::loongarch_lsx_vsrari_w:
1066 case Intrinsic::loongarch_lsx_vslei_bu:
1067 case Intrinsic::loongarch_lsx_vslei_hu:
1068 case Intrinsic::loongarch_lsx_vslei_wu:
1069 case Intrinsic::loongarch_lsx_vslei_du:
1070 case Intrinsic::loongarch_lsx_vslti_bu:
1071 case Intrinsic::loongarch_lsx_vslti_hu:
1072 case Intrinsic::loongarch_lsx_vslti_wu:
1073 case Intrinsic::loongarch_lsx_vslti_du:
1074 case Intrinsic::loongarch_lsx_vbsll_v:
1075 case Intrinsic::loongarch_lsx_vbsrl_v:
1076 case Intrinsic::loongarch_lasx_xvsat_w:
1077 case Intrinsic::loongarch_lasx_xvsat_wu:
1078 case Intrinsic::loongarch_lasx_xvrotri_w:
1079 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
1080 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
1081 case Intrinsic::loongarch_lasx_xvsrlri_w:
1082 case Intrinsic::loongarch_lasx_xvsrari_w:
1083 case Intrinsic::loongarch_lasx_xvslei_bu:
1084 case Intrinsic::loongarch_lasx_xvslei_hu:
1085 case Intrinsic::loongarch_lasx_xvslei_wu:
1086 case Intrinsic::loongarch_lasx_xvslei_du:
1087 case Intrinsic::loongarch_lasx_xvslti_bu:
1088 case Intrinsic::loongarch_lasx_xvslti_hu:
1089 case Intrinsic::loongarch_lasx_xvslti_wu:
1090 case Intrinsic::loongarch_lasx_xvslti_du:
1091 case Intrinsic::loongarch_lasx_xvbsll_v:
1092 case Intrinsic::loongarch_lasx_xvbsrl_v:
1093 return checkIntrinsicImmArg<5>(Op, ImmOp: 2, DAG);
1094 case Intrinsic::loongarch_lsx_vseqi_b:
1095 case Intrinsic::loongarch_lsx_vseqi_h:
1096 case Intrinsic::loongarch_lsx_vseqi_w:
1097 case Intrinsic::loongarch_lsx_vseqi_d:
1098 case Intrinsic::loongarch_lsx_vslei_b:
1099 case Intrinsic::loongarch_lsx_vslei_h:
1100 case Intrinsic::loongarch_lsx_vslei_w:
1101 case Intrinsic::loongarch_lsx_vslei_d:
1102 case Intrinsic::loongarch_lsx_vslti_b:
1103 case Intrinsic::loongarch_lsx_vslti_h:
1104 case Intrinsic::loongarch_lsx_vslti_w:
1105 case Intrinsic::loongarch_lsx_vslti_d:
1106 case Intrinsic::loongarch_lasx_xvseqi_b:
1107 case Intrinsic::loongarch_lasx_xvseqi_h:
1108 case Intrinsic::loongarch_lasx_xvseqi_w:
1109 case Intrinsic::loongarch_lasx_xvseqi_d:
1110 case Intrinsic::loongarch_lasx_xvslei_b:
1111 case Intrinsic::loongarch_lasx_xvslei_h:
1112 case Intrinsic::loongarch_lasx_xvslei_w:
1113 case Intrinsic::loongarch_lasx_xvslei_d:
1114 case Intrinsic::loongarch_lasx_xvslti_b:
1115 case Intrinsic::loongarch_lasx_xvslti_h:
1116 case Intrinsic::loongarch_lasx_xvslti_w:
1117 case Intrinsic::loongarch_lasx_xvslti_d:
1118 return checkIntrinsicImmArg<5>(Op, ImmOp: 2, DAG, /*IsSigned=*/true);
1119 case Intrinsic::loongarch_lsx_vsrlni_h_w:
1120 case Intrinsic::loongarch_lsx_vsrani_h_w:
1121 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
1122 case Intrinsic::loongarch_lsx_vsrarni_h_w:
1123 case Intrinsic::loongarch_lsx_vssrlni_h_w:
1124 case Intrinsic::loongarch_lsx_vssrani_h_w:
1125 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
1126 case Intrinsic::loongarch_lsx_vssrani_hu_w:
1127 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
1128 case Intrinsic::loongarch_lsx_vssrarni_h_w:
1129 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
1130 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
1131 case Intrinsic::loongarch_lsx_vfrstpi_b:
1132 case Intrinsic::loongarch_lsx_vfrstpi_h:
1133 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
1134 case Intrinsic::loongarch_lasx_xvsrani_h_w:
1135 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
1136 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
1137 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
1138 case Intrinsic::loongarch_lasx_xvssrani_h_w:
1139 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
1140 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
1141 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
1142 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
1143 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
1144 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
1145 case Intrinsic::loongarch_lasx_xvfrstpi_b:
1146 case Intrinsic::loongarch_lasx_xvfrstpi_h:
1147 return checkIntrinsicImmArg<5>(Op, ImmOp: 3, DAG);
1148 case Intrinsic::loongarch_lsx_vsat_d:
1149 case Intrinsic::loongarch_lsx_vsat_du:
1150 case Intrinsic::loongarch_lsx_vrotri_d:
1151 case Intrinsic::loongarch_lsx_vsrlri_d:
1152 case Intrinsic::loongarch_lsx_vsrari_d:
1153 case Intrinsic::loongarch_lasx_xvsat_d:
1154 case Intrinsic::loongarch_lasx_xvsat_du:
1155 case Intrinsic::loongarch_lasx_xvrotri_d:
1156 case Intrinsic::loongarch_lasx_xvsrlri_d:
1157 case Intrinsic::loongarch_lasx_xvsrari_d:
1158 return checkIntrinsicImmArg<6>(Op, ImmOp: 2, DAG);
1159 case Intrinsic::loongarch_lsx_vsrlni_w_d:
1160 case Intrinsic::loongarch_lsx_vsrani_w_d:
1161 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
1162 case Intrinsic::loongarch_lsx_vsrarni_w_d:
1163 case Intrinsic::loongarch_lsx_vssrlni_w_d:
1164 case Intrinsic::loongarch_lsx_vssrani_w_d:
1165 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
1166 case Intrinsic::loongarch_lsx_vssrani_wu_d:
1167 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
1168 case Intrinsic::loongarch_lsx_vssrarni_w_d:
1169 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
1170 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
1171 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
1172 case Intrinsic::loongarch_lasx_xvsrani_w_d:
1173 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
1174 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
1175 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
1176 case Intrinsic::loongarch_lasx_xvssrani_w_d:
1177 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
1178 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
1179 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
1180 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
1181 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
1182 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
1183 return checkIntrinsicImmArg<6>(Op, ImmOp: 3, DAG);
1184 case Intrinsic::loongarch_lsx_vsrlni_d_q:
1185 case Intrinsic::loongarch_lsx_vsrani_d_q:
1186 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
1187 case Intrinsic::loongarch_lsx_vsrarni_d_q:
1188 case Intrinsic::loongarch_lsx_vssrlni_d_q:
1189 case Intrinsic::loongarch_lsx_vssrani_d_q:
1190 case Intrinsic::loongarch_lsx_vssrlni_du_q:
1191 case Intrinsic::loongarch_lsx_vssrani_du_q:
1192 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
1193 case Intrinsic::loongarch_lsx_vssrarni_d_q:
1194 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
1195 case Intrinsic::loongarch_lsx_vssrarni_du_q:
1196 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
1197 case Intrinsic::loongarch_lasx_xvsrani_d_q:
1198 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
1199 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
1200 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
1201 case Intrinsic::loongarch_lasx_xvssrani_d_q:
1202 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
1203 case Intrinsic::loongarch_lasx_xvssrani_du_q:
1204 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
1205 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
1206 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
1207 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
1208 return checkIntrinsicImmArg<7>(Op, ImmOp: 3, DAG);
1209 case Intrinsic::loongarch_lsx_vnori_b:
1210 case Intrinsic::loongarch_lsx_vshuf4i_b:
1211 case Intrinsic::loongarch_lsx_vshuf4i_h:
1212 case Intrinsic::loongarch_lsx_vshuf4i_w:
1213 case Intrinsic::loongarch_lasx_xvnori_b:
1214 case Intrinsic::loongarch_lasx_xvshuf4i_b:
1215 case Intrinsic::loongarch_lasx_xvshuf4i_h:
1216 case Intrinsic::loongarch_lasx_xvshuf4i_w:
1217 case Intrinsic::loongarch_lasx_xvpermi_d:
1218 return checkIntrinsicImmArg<8>(Op, ImmOp: 2, DAG);
1219 case Intrinsic::loongarch_lsx_vshuf4i_d:
1220 case Intrinsic::loongarch_lsx_vpermi_w:
1221 case Intrinsic::loongarch_lsx_vbitseli_b:
1222 case Intrinsic::loongarch_lsx_vextrins_b:
1223 case Intrinsic::loongarch_lsx_vextrins_h:
1224 case Intrinsic::loongarch_lsx_vextrins_w:
1225 case Intrinsic::loongarch_lsx_vextrins_d:
1226 case Intrinsic::loongarch_lasx_xvshuf4i_d:
1227 case Intrinsic::loongarch_lasx_xvpermi_w:
1228 case Intrinsic::loongarch_lasx_xvpermi_q:
1229 case Intrinsic::loongarch_lasx_xvbitseli_b:
1230 case Intrinsic::loongarch_lasx_xvextrins_b:
1231 case Intrinsic::loongarch_lasx_xvextrins_h:
1232 case Intrinsic::loongarch_lasx_xvextrins_w:
1233 case Intrinsic::loongarch_lasx_xvextrins_d:
1234 return checkIntrinsicImmArg<8>(Op, ImmOp: 3, DAG);
1235 case Intrinsic::loongarch_lsx_vrepli_b:
1236 case Intrinsic::loongarch_lsx_vrepli_h:
1237 case Intrinsic::loongarch_lsx_vrepli_w:
1238 case Intrinsic::loongarch_lsx_vrepli_d:
1239 case Intrinsic::loongarch_lasx_xvrepli_b:
1240 case Intrinsic::loongarch_lasx_xvrepli_h:
1241 case Intrinsic::loongarch_lasx_xvrepli_w:
1242 case Intrinsic::loongarch_lasx_xvrepli_d:
1243 return checkIntrinsicImmArg<10>(Op, ImmOp: 1, DAG, /*IsSigned=*/true);
1244 case Intrinsic::loongarch_lsx_vldi:
1245 case Intrinsic::loongarch_lasx_xvldi:
1246 return checkIntrinsicImmArg<13>(Op, ImmOp: 1, DAG, /*IsSigned=*/true);
1247 }
1248}
1249
1250// Helper function that emits error message for intrinsics with chain and return
1251// merge values of a UNDEF and the chain.
1252static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
1253 StringRef ErrorMsg,
1254 SelectionDAG &DAG) {
1255 DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) + ": " + ErrorMsg + ".");
1256 return DAG.getMergeValues(Ops: {DAG.getUNDEF(VT: Op.getValueType()), Op.getOperand(i: 0)},
1257 dl: SDLoc(Op));
1258}
1259
1260SDValue
1261LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
1262 SelectionDAG &DAG) const {
1263 SDLoc DL(Op);
1264 MVT GRLenVT = Subtarget.getGRLenVT();
1265 EVT VT = Op.getValueType();
1266 SDValue Chain = Op.getOperand(i: 0);
1267 const StringRef ErrorMsgOOR = "argument out of range";
1268 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1269 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1270
1271 switch (Op.getConstantOperandVal(i: 1)) {
1272 default:
1273 return Op;
1274 case Intrinsic::loongarch_crc_w_b_w:
1275 case Intrinsic::loongarch_crc_w_h_w:
1276 case Intrinsic::loongarch_crc_w_w_w:
1277 case Intrinsic::loongarch_crc_w_d_w:
1278 case Intrinsic::loongarch_crcc_w_b_w:
1279 case Intrinsic::loongarch_crcc_w_h_w:
1280 case Intrinsic::loongarch_crcc_w_w_w:
1281 case Intrinsic::loongarch_crcc_w_d_w:
1282 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG);
1283 case Intrinsic::loongarch_csrrd_w:
1284 case Intrinsic::loongarch_csrrd_d: {
1285 unsigned Imm = Op.getConstantOperandVal(i: 2);
1286 return !isUInt<14>(Imm)
1287 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1288 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
1289 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1290 }
1291 case Intrinsic::loongarch_csrwr_w:
1292 case Intrinsic::loongarch_csrwr_d: {
1293 unsigned Imm = Op.getConstantOperandVal(i: 3);
1294 return !isUInt<14>(Imm)
1295 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1296 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
1297 {Chain, Op.getOperand(2),
1298 DAG.getConstant(Imm, DL, GRLenVT)});
1299 }
1300 case Intrinsic::loongarch_csrxchg_w:
1301 case Intrinsic::loongarch_csrxchg_d: {
1302 unsigned Imm = Op.getConstantOperandVal(i: 4);
1303 return !isUInt<14>(Imm)
1304 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1305 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
1306 {Chain, Op.getOperand(2), Op.getOperand(3),
1307 DAG.getConstant(Imm, DL, GRLenVT)});
1308 }
1309 case Intrinsic::loongarch_iocsrrd_d: {
1310 return DAG.getNode(
1311 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
1312 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
1313 }
1314#define IOCSRRD_CASE(NAME, NODE) \
1315 case Intrinsic::loongarch_##NAME: { \
1316 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
1317 {Chain, Op.getOperand(2)}); \
1318 }
1319 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
1320 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
1321 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
1322#undef IOCSRRD_CASE
1323 case Intrinsic::loongarch_cpucfg: {
1324 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
1325 {Chain, Op.getOperand(2)});
1326 }
1327 case Intrinsic::loongarch_lddir_d: {
1328 unsigned Imm = Op.getConstantOperandVal(i: 3);
1329 return !isUInt<8>(x: Imm)
1330 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
1331 : Op;
1332 }
1333 case Intrinsic::loongarch_movfcsr2gr: {
1334 if (!Subtarget.hasBasicF())
1335 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG);
1336 unsigned Imm = Op.getConstantOperandVal(i: 2);
1337 return !isUInt<2>(Imm)
1338 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1339 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
1340 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1341 }
1342 case Intrinsic::loongarch_lsx_vld:
1343 case Intrinsic::loongarch_lsx_vldrepl_b:
1344 case Intrinsic::loongarch_lasx_xvld:
1345 case Intrinsic::loongarch_lasx_xvldrepl_b:
1346 return !isInt<12>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue())
1347 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
1348 : SDValue();
1349 case Intrinsic::loongarch_lsx_vldrepl_h:
1350 case Intrinsic::loongarch_lasx_xvldrepl_h:
1351 return !isShiftedInt<11, 1>(
1352 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue())
1353 ? emitIntrinsicWithChainErrorMessage(
1354 Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
1355 : SDValue();
1356 case Intrinsic::loongarch_lsx_vldrepl_w:
1357 case Intrinsic::loongarch_lasx_xvldrepl_w:
1358 return !isShiftedInt<10, 2>(
1359 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue())
1360 ? emitIntrinsicWithChainErrorMessage(
1361 Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
1362 : SDValue();
1363 case Intrinsic::loongarch_lsx_vldrepl_d:
1364 case Intrinsic::loongarch_lasx_xvldrepl_d:
1365 return !isShiftedInt<9, 3>(
1366 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue())
1367 ? emitIntrinsicWithChainErrorMessage(
1368 Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
1369 : SDValue();
1370 }
1371}
1372
1373// Helper function that emits error message for intrinsics with void return
1374// value and return the chain.
1375static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
1376 SelectionDAG &DAG) {
1377
1378 DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) + ": " + ErrorMsg + ".");
1379 return Op.getOperand(i: 0);
1380}
1381
1382SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
1383 SelectionDAG &DAG) const {
1384 SDLoc DL(Op);
1385 MVT GRLenVT = Subtarget.getGRLenVT();
1386 SDValue Chain = Op.getOperand(i: 0);
1387 uint64_t IntrinsicEnum = Op.getConstantOperandVal(i: 1);
1388 SDValue Op2 = Op.getOperand(i: 2);
1389 const StringRef ErrorMsgOOR = "argument out of range";
1390 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1391 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
1392 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1393
1394 switch (IntrinsicEnum) {
1395 default:
1396 // TODO: Add more Intrinsics.
1397 return SDValue();
1398 case Intrinsic::loongarch_cacop_d:
1399 case Intrinsic::loongarch_cacop_w: {
1400 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
1401 return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG);
1402 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
1403 return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA32, DAG);
1404 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
1405 unsigned Imm1 = Op2->getAsZExtVal();
1406 int Imm2 = cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue();
1407 if (!isUInt<5>(x: Imm1) || !isInt<12>(x: Imm2))
1408 return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG);
1409 return Op;
1410 }
1411 case Intrinsic::loongarch_dbar: {
1412 unsigned Imm = Op2->getAsZExtVal();
1413 return !isUInt<15>(Imm)
1414 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1415 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
1416 DAG.getConstant(Imm, DL, GRLenVT));
1417 }
1418 case Intrinsic::loongarch_ibar: {
1419 unsigned Imm = Op2->getAsZExtVal();
1420 return !isUInt<15>(Imm)
1421 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1422 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
1423 DAG.getConstant(Imm, DL, GRLenVT));
1424 }
1425 case Intrinsic::loongarch_break: {
1426 unsigned Imm = Op2->getAsZExtVal();
1427 return !isUInt<15>(Imm)
1428 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1429 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
1430 DAG.getConstant(Imm, DL, GRLenVT));
1431 }
1432 case Intrinsic::loongarch_movgr2fcsr: {
1433 if (!Subtarget.hasBasicF())
1434 return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG);
1435 unsigned Imm = Op2->getAsZExtVal();
1436 return !isUInt<2>(Imm)
1437 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1438 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
1439 DAG.getConstant(Imm, DL, GRLenVT),
1440 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
1441 Op.getOperand(3)));
1442 }
1443 case Intrinsic::loongarch_syscall: {
1444 unsigned Imm = Op2->getAsZExtVal();
1445 return !isUInt<15>(Imm)
1446 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1447 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
1448 DAG.getConstant(Imm, DL, GRLenVT));
1449 }
1450#define IOCSRWR_CASE(NAME, NODE) \
1451 case Intrinsic::loongarch_##NAME: { \
1452 SDValue Op3 = Op.getOperand(3); \
1453 return Subtarget.is64Bit() \
1454 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
1455 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
1456 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
1457 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
1458 Op3); \
1459 }
1460 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
1461 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
1462 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
1463#undef IOCSRWR_CASE
1464 case Intrinsic::loongarch_iocsrwr_d: {
1465 return !Subtarget.is64Bit()
1466 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
1467 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
1468 Op2,
1469 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
1470 Op.getOperand(3)));
1471 }
1472#define ASRT_LE_GT_CASE(NAME) \
1473 case Intrinsic::loongarch_##NAME: { \
1474 return !Subtarget.is64Bit() \
1475 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
1476 : Op; \
1477 }
1478 ASRT_LE_GT_CASE(asrtle_d)
1479 ASRT_LE_GT_CASE(asrtgt_d)
1480#undef ASRT_LE_GT_CASE
1481 case Intrinsic::loongarch_ldpte_d: {
1482 unsigned Imm = Op.getConstantOperandVal(i: 3);
1483 return !Subtarget.is64Bit()
1484 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG)
1485 : !isUInt<8>(x: Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
1486 : Op;
1487 }
1488 case Intrinsic::loongarch_lsx_vst:
1489 case Intrinsic::loongarch_lasx_xvst:
1490 return !isInt<12>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue())
1491 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
1492 : SDValue();
1493 case Intrinsic::loongarch_lasx_xvstelm_b:
1494 return (!isInt<8>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
1495 !isUInt<5>(x: Op.getConstantOperandVal(i: 5)))
1496 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
1497 : SDValue();
1498 case Intrinsic::loongarch_lsx_vstelm_b:
1499 return (!isInt<8>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
1500 !isUInt<4>(x: Op.getConstantOperandVal(i: 5)))
1501 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
1502 : SDValue();
1503 case Intrinsic::loongarch_lasx_xvstelm_h:
1504 return (!isShiftedInt<8, 1>(
1505 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
1506 !isUInt<4>(x: Op.getConstantOperandVal(i: 5)))
1507 ? emitIntrinsicErrorMessage(
1508 Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
1509 : SDValue();
1510 case Intrinsic::loongarch_lsx_vstelm_h:
1511 return (!isShiftedInt<8, 1>(
1512 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
1513 !isUInt<3>(x: Op.getConstantOperandVal(i: 5)))
1514 ? emitIntrinsicErrorMessage(
1515 Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
1516 : SDValue();
1517 case Intrinsic::loongarch_lasx_xvstelm_w:
1518 return (!isShiftedInt<8, 2>(
1519 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
1520 !isUInt<3>(x: Op.getConstantOperandVal(i: 5)))
1521 ? emitIntrinsicErrorMessage(
1522 Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
1523 : SDValue();
1524 case Intrinsic::loongarch_lsx_vstelm_w:
1525 return (!isShiftedInt<8, 2>(
1526 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
1527 !isUInt<2>(x: Op.getConstantOperandVal(i: 5)))
1528 ? emitIntrinsicErrorMessage(
1529 Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
1530 : SDValue();
1531 case Intrinsic::loongarch_lasx_xvstelm_d:
1532 return (!isShiftedInt<8, 3>(
1533 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
1534 !isUInt<2>(x: Op.getConstantOperandVal(i: 5)))
1535 ? emitIntrinsicErrorMessage(
1536 Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
1537 : SDValue();
1538 case Intrinsic::loongarch_lsx_vstelm_d:
1539 return (!isShiftedInt<8, 3>(
1540 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
1541 !isUInt<1>(x: Op.getConstantOperandVal(i: 5)))
1542 ? emitIntrinsicErrorMessage(
1543 Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
1544 : SDValue();
1545 }
1546}
1547
1548SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
1549 SelectionDAG &DAG) const {
1550 SDLoc DL(Op);
1551 SDValue Lo = Op.getOperand(i: 0);
1552 SDValue Hi = Op.getOperand(i: 1);
1553 SDValue Shamt = Op.getOperand(i: 2);
1554 EVT VT = Lo.getValueType();
1555
1556 // if Shamt-GRLen < 0: // Shamt < GRLen
1557 // Lo = Lo << Shamt
1558 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
1559 // else:
1560 // Lo = 0
1561 // Hi = Lo << (Shamt-GRLen)
1562
1563 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
1564 SDValue One = DAG.getConstant(Val: 1, DL, VT);
1565 SDValue MinusGRLen = DAG.getConstant(Val: -(int)Subtarget.getGRLen(), DL, VT);
1566 SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - 1, DL, VT);
1567 SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen);
1568 SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1);
1569
1570 SDValue LoTrue = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: Shamt);
1571 SDValue ShiftRight1Lo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: One);
1572 SDValue ShiftRightLo =
1573 DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShiftRight1Lo, N2: GRLenMinus1Shamt);
1574 SDValue ShiftLeftHi = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: Shamt);
1575 SDValue HiTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftLeftHi, N2: ShiftRightLo);
1576 SDValue HiFalse = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: ShamtMinusGRLen);
1577
1578 SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT);
1579
1580 Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: Zero);
1581 Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse);
1582
1583 SDValue Parts[2] = {Lo, Hi};
1584 return DAG.getMergeValues(Ops: Parts, dl: DL);
1585}
1586
1587SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
1588 SelectionDAG &DAG,
1589 bool IsSRA) const {
1590 SDLoc DL(Op);
1591 SDValue Lo = Op.getOperand(i: 0);
1592 SDValue Hi = Op.getOperand(i: 1);
1593 SDValue Shamt = Op.getOperand(i: 2);
1594 EVT VT = Lo.getValueType();
1595
1596 // SRA expansion:
1597 // if Shamt-GRLen < 0: // Shamt < GRLen
1598 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
1599 // Hi = Hi >>s Shamt
1600 // else:
1601 // Lo = Hi >>s (Shamt-GRLen);
1602 // Hi = Hi >>s (GRLen-1)
1603 //
1604 // SRL expansion:
1605 // if Shamt-GRLen < 0: // Shamt < GRLen
1606 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
1607 // Hi = Hi >>u Shamt
1608 // else:
1609 // Lo = Hi >>u (Shamt-GRLen);
1610 // Hi = 0;
1611
1612 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
1613
1614 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
1615 SDValue One = DAG.getConstant(Val: 1, DL, VT);
1616 SDValue MinusGRLen = DAG.getConstant(Val: -(int)Subtarget.getGRLen(), DL, VT);
1617 SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - 1, DL, VT);
1618 SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen);
1619 SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1);
1620
1621 SDValue ShiftRightLo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: Shamt);
1622 SDValue ShiftLeftHi1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: One);
1623 SDValue ShiftLeftHi =
1624 DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShiftLeftHi1, N2: GRLenMinus1Shamt);
1625 SDValue LoTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftRightLo, N2: ShiftLeftHi);
1626 SDValue HiTrue = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: Shamt);
1627 SDValue LoFalse = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: ShamtMinusGRLen);
1628 SDValue HiFalse =
1629 IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Hi, N2: GRLenMinus1) : Zero;
1630
1631 SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT);
1632
1633 Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: LoFalse);
1634 Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse);
1635
1636 SDValue Parts[2] = {Lo, Hi};
1637 return DAG.getMergeValues(Ops: Parts, dl: DL);
1638}
1639
1640// Returns the opcode of the target-specific SDNode that implements the 32-bit
1641// form of the given Opcode.
1642static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
1643 switch (Opcode) {
1644 default:
1645 llvm_unreachable("Unexpected opcode");
1646 case ISD::SHL:
1647 return LoongArchISD::SLL_W;
1648 case ISD::SRA:
1649 return LoongArchISD::SRA_W;
1650 case ISD::SRL:
1651 return LoongArchISD::SRL_W;
1652 case ISD::ROTR:
1653 return LoongArchISD::ROTR_W;
1654 case ISD::ROTL:
1655 return LoongArchISD::ROTL_W;
1656 case ISD::CTTZ:
1657 return LoongArchISD::CTZ_W;
1658 case ISD::CTLZ:
1659 return LoongArchISD::CLZ_W;
1660 }
1661}
1662
1663// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
1664// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
1665// otherwise be promoted to i64, making it difficult to select the
1666// SLL_W/.../*W later one because the fact the operation was originally of
1667// type i8/i16/i32 is lost.
1668static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
1669 unsigned ExtOpc = ISD::ANY_EXTEND) {
1670 SDLoc DL(N);
1671 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(Opcode: N->getOpcode());
1672 SDValue NewOp0, NewRes;
1673
1674 switch (NumOp) {
1675 default:
1676 llvm_unreachable("Unexpected NumOp");
1677 case 1: {
1678 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1679 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
1680 break;
1681 }
1682 case 2: {
1683 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1684 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
1685 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
1686 break;
1687 }
1688 // TODO:Handle more NumOp.
1689 }
1690
1691 // ReplaceNodeResults requires we maintain the same type for the return
1692 // value.
1693 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: NewRes);
1694}
1695
1696// Helper function that emits error message for intrinsics with/without chain
1697// and return a UNDEF or and the chain as the results.
1698static void emitErrorAndReplaceIntrinsicResults(
1699 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,
1700 StringRef ErrorMsg, bool WithChain = true) {
1701 DAG.getContext()->emitError(ErrorStr: N->getOperationName(G: 0) + ": " + ErrorMsg + ".");
1702 Results.push_back(Elt: DAG.getUNDEF(VT: N->getValueType(ResNo: 0)));
1703 if (!WithChain)
1704 return;
1705 Results.push_back(Elt: N->getOperand(Num: 0));
1706}
1707
1708template <unsigned N>
1709static void
1710replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results,
1711 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
1712 unsigned ResOp) {
1713 const StringRef ErrorMsgOOR = "argument out of range";
1714 unsigned Imm = Node->getConstantOperandVal(Num: 2);
1715 if (!isUInt<N>(Imm)) {
1716 emitErrorAndReplaceIntrinsicResults(N: Node, Results, DAG, ErrorMsg: ErrorMsgOOR,
1717 /*WithChain=*/false);
1718 return;
1719 }
1720 SDLoc DL(Node);
1721 SDValue Vec = Node->getOperand(Num: 1);
1722
1723 SDValue PickElt =
1724 DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), N1: Vec,
1725 N2: DAG.getConstant(Val: Imm, DL, VT: Subtarget.getGRLenVT()),
1726 N3: DAG.getValueType(Vec.getValueType().getVectorElementType()));
1727 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Node->getValueType(ResNo: 0),
1728 Operand: PickElt.getValue(R: 0)));
1729}
1730
1731static void replaceVecCondBranchResults(SDNode *N,
1732 SmallVectorImpl<SDValue> &Results,
1733 SelectionDAG &DAG,
1734 const LoongArchSubtarget &Subtarget,
1735 unsigned ResOp) {
1736 SDLoc DL(N);
1737 SDValue Vec = N->getOperand(Num: 1);
1738
1739 SDValue CB = DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), Operand: Vec);
1740 Results.push_back(
1741 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: CB.getValue(R: 0)));
1742}
1743
1744static void
1745replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1746 SelectionDAG &DAG,
1747 const LoongArchSubtarget &Subtarget) {
1748 switch (N->getConstantOperandVal(Num: 0)) {
1749 default:
1750 llvm_unreachable("Unexpected Intrinsic.");
1751 case Intrinsic::loongarch_lsx_vpickve2gr_b:
1752 replaceVPICKVE2GRResults<4>(Node: N, Results, DAG, Subtarget,
1753 ResOp: LoongArchISD::VPICK_SEXT_ELT);
1754 break;
1755 case Intrinsic::loongarch_lsx_vpickve2gr_h:
1756 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
1757 replaceVPICKVE2GRResults<3>(Node: N, Results, DAG, Subtarget,
1758 ResOp: LoongArchISD::VPICK_SEXT_ELT);
1759 break;
1760 case Intrinsic::loongarch_lsx_vpickve2gr_w:
1761 replaceVPICKVE2GRResults<2>(Node: N, Results, DAG, Subtarget,
1762 ResOp: LoongArchISD::VPICK_SEXT_ELT);
1763 break;
1764 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
1765 replaceVPICKVE2GRResults<4>(Node: N, Results, DAG, Subtarget,
1766 ResOp: LoongArchISD::VPICK_ZEXT_ELT);
1767 break;
1768 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
1769 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
1770 replaceVPICKVE2GRResults<3>(Node: N, Results, DAG, Subtarget,
1771 ResOp: LoongArchISD::VPICK_ZEXT_ELT);
1772 break;
1773 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
1774 replaceVPICKVE2GRResults<2>(Node: N, Results, DAG, Subtarget,
1775 ResOp: LoongArchISD::VPICK_ZEXT_ELT);
1776 break;
1777 case Intrinsic::loongarch_lsx_bz_b:
1778 case Intrinsic::loongarch_lsx_bz_h:
1779 case Intrinsic::loongarch_lsx_bz_w:
1780 case Intrinsic::loongarch_lsx_bz_d:
1781 case Intrinsic::loongarch_lasx_xbz_b:
1782 case Intrinsic::loongarch_lasx_xbz_h:
1783 case Intrinsic::loongarch_lasx_xbz_w:
1784 case Intrinsic::loongarch_lasx_xbz_d:
1785 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1786 ResOp: LoongArchISD::VALL_ZERO);
1787 break;
1788 case Intrinsic::loongarch_lsx_bz_v:
1789 case Intrinsic::loongarch_lasx_xbz_v:
1790 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1791 ResOp: LoongArchISD::VANY_ZERO);
1792 break;
1793 case Intrinsic::loongarch_lsx_bnz_b:
1794 case Intrinsic::loongarch_lsx_bnz_h:
1795 case Intrinsic::loongarch_lsx_bnz_w:
1796 case Intrinsic::loongarch_lsx_bnz_d:
1797 case Intrinsic::loongarch_lasx_xbnz_b:
1798 case Intrinsic::loongarch_lasx_xbnz_h:
1799 case Intrinsic::loongarch_lasx_xbnz_w:
1800 case Intrinsic::loongarch_lasx_xbnz_d:
1801 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1802 ResOp: LoongArchISD::VALL_NONZERO);
1803 break;
1804 case Intrinsic::loongarch_lsx_bnz_v:
1805 case Intrinsic::loongarch_lasx_xbnz_v:
1806 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1807 ResOp: LoongArchISD::VANY_NONZERO);
1808 break;
1809 }
1810}
1811
1812void LoongArchTargetLowering::ReplaceNodeResults(
1813 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
1814 SDLoc DL(N);
1815 EVT VT = N->getValueType(ResNo: 0);
1816 switch (N->getOpcode()) {
1817 default:
1818 llvm_unreachable("Don't know how to legalize this operation");
1819 case ISD::SHL:
1820 case ISD::SRA:
1821 case ISD::SRL:
1822 case ISD::ROTR:
1823 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1824 "Unexpected custom legalisation");
1825 if (N->getOperand(Num: 1).getOpcode() != ISD::Constant) {
1826 Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2));
1827 break;
1828 }
1829 break;
1830 case ISD::ROTL:
1831 ConstantSDNode *CN;
1832 if ((CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1)))) {
1833 Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2));
1834 break;
1835 }
1836 break;
1837 case ISD::FP_TO_SINT: {
1838 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1839 "Unexpected custom legalisation");
1840 SDValue Src = N->getOperand(Num: 0);
1841 EVT FVT = EVT::getFloatingPointVT(BitWidth: N->getValueSizeInBits(ResNo: 0));
1842 if (getTypeAction(Context&: *DAG.getContext(), VT: Src.getValueType()) !=
1843 TargetLowering::TypeSoftenFloat) {
1844 SDValue Dst = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FVT, Operand: Src);
1845 Results.push_back(Elt: DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Dst));
1846 return;
1847 }
1848 // If the FP type needs to be softened, emit a library call using the 'si'
1849 // version. If we left it to default legalization we'd end up with 'di'.
1850 RTLIB::Libcall LC;
1851 LC = RTLIB::getFPTOSINT(OpVT: Src.getValueType(), RetVT: VT);
1852 MakeLibCallOptions CallOptions;
1853 EVT OpVT = Src.getValueType();
1854 CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: VT, Value: true);
1855 SDValue Chain = SDValue();
1856 SDValue Result;
1857 std::tie(args&: Result, args&: Chain) =
1858 makeLibCall(DAG, LC, RetVT: VT, Ops: Src, CallOptions, dl: DL, Chain);
1859 Results.push_back(Elt: Result);
1860 break;
1861 }
1862 case ISD::BITCAST: {
1863 SDValue Src = N->getOperand(Num: 0);
1864 EVT SrcVT = Src.getValueType();
1865 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
1866 Subtarget.hasBasicF()) {
1867 SDValue Dst =
1868 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
1869 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
1870 }
1871 break;
1872 }
1873 case ISD::FP_TO_UINT: {
1874 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1875 "Unexpected custom legalisation");
1876 auto &TLI = DAG.getTargetLoweringInfo();
1877 SDValue Tmp1, Tmp2;
1878 TLI.expandFP_TO_UINT(N, Result&: Tmp1, Chain&: Tmp2, DAG);
1879 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
1880 break;
1881 }
1882 case ISD::BSWAP: {
1883 SDValue Src = N->getOperand(Num: 0);
1884 assert((VT == MVT::i16 || VT == MVT::i32) &&
1885 "Unexpected custom legalization");
1886 MVT GRLenVT = Subtarget.getGRLenVT();
1887 SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src);
1888 SDValue Tmp;
1889 switch (VT.getSizeInBits()) {
1890 default:
1891 llvm_unreachable("Unexpected operand width");
1892 case 16:
1893 Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2H, DL, VT: GRLenVT, Operand: NewSrc);
1894 break;
1895 case 32:
1896 // Only LA64 will get to here due to the size mismatch between VT and
1897 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
1898 Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2W, DL, VT: GRLenVT, Operand: NewSrc);
1899 break;
1900 }
1901 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp));
1902 break;
1903 }
1904 case ISD::BITREVERSE: {
1905 SDValue Src = N->getOperand(Num: 0);
1906 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
1907 "Unexpected custom legalization");
1908 MVT GRLenVT = Subtarget.getGRLenVT();
1909 SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src);
1910 SDValue Tmp;
1911 switch (VT.getSizeInBits()) {
1912 default:
1913 llvm_unreachable("Unexpected operand width");
1914 case 8:
1915 Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL, VT: GRLenVT, Operand: NewSrc);
1916 break;
1917 case 32:
1918 Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_W, DL, VT: GRLenVT, Operand: NewSrc);
1919 break;
1920 }
1921 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp));
1922 break;
1923 }
1924 case ISD::CTLZ:
1925 case ISD::CTTZ: {
1926 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1927 "Unexpected custom legalisation");
1928 Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 1));
1929 break;
1930 }
1931 case ISD::INTRINSIC_W_CHAIN: {
1932 SDValue Chain = N->getOperand(Num: 0);
1933 SDValue Op2 = N->getOperand(Num: 2);
1934 MVT GRLenVT = Subtarget.getGRLenVT();
1935 const StringRef ErrorMsgOOR = "argument out of range";
1936 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1937 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1938
1939 switch (N->getConstantOperandVal(Num: 1)) {
1940 default:
1941 llvm_unreachable("Unexpected Intrinsic.");
1942 case Intrinsic::loongarch_movfcsr2gr: {
1943 if (!Subtarget.hasBasicF()) {
1944 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqF);
1945 return;
1946 }
1947 unsigned Imm = Op2->getAsZExtVal();
1948 if (!isUInt<2>(x: Imm)) {
1949 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
1950 return;
1951 }
1952 SDValue MOVFCSR2GRResults = DAG.getNode(
1953 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
1954 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1955 Results.push_back(
1956 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: MOVFCSR2GRResults.getValue(R: 0)));
1957 Results.push_back(Elt: MOVFCSR2GRResults.getValue(R: 1));
1958 break;
1959 }
1960#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
1961 case Intrinsic::loongarch_##NAME: { \
1962 SDValue NODE = DAG.getNode( \
1963 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
1964 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
1965 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
1966 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
1967 Results.push_back(NODE.getValue(1)); \
1968 break; \
1969 }
1970 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
1971 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
1972 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
1973 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
1974 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
1975 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
1976#undef CRC_CASE_EXT_BINARYOP
1977
1978#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
1979 case Intrinsic::loongarch_##NAME: { \
1980 SDValue NODE = DAG.getNode( \
1981 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
1982 {Chain, Op2, \
1983 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
1984 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
1985 Results.push_back(NODE.getValue(1)); \
1986 break; \
1987 }
1988 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
1989 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
1990#undef CRC_CASE_EXT_UNARYOP
1991#define CSR_CASE(ID) \
1992 case Intrinsic::loongarch_##ID: { \
1993 if (!Subtarget.is64Bit()) \
1994 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
1995 break; \
1996 }
1997 CSR_CASE(csrrd_d);
1998 CSR_CASE(csrwr_d);
1999 CSR_CASE(csrxchg_d);
2000 CSR_CASE(iocsrrd_d);
2001#undef CSR_CASE
2002 case Intrinsic::loongarch_csrrd_w: {
2003 unsigned Imm = Op2->getAsZExtVal();
2004 if (!isUInt<14>(x: Imm)) {
2005 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
2006 return;
2007 }
2008 SDValue CSRRDResults =
2009 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
2010 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2011 Results.push_back(
2012 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRRDResults.getValue(R: 0)));
2013 Results.push_back(Elt: CSRRDResults.getValue(R: 1));
2014 break;
2015 }
2016 case Intrinsic::loongarch_csrwr_w: {
2017 unsigned Imm = N->getConstantOperandVal(Num: 3);
2018 if (!isUInt<14>(x: Imm)) {
2019 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
2020 return;
2021 }
2022 SDValue CSRWRResults =
2023 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
2024 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
2025 DAG.getConstant(Imm, DL, GRLenVT)});
2026 Results.push_back(
2027 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRWRResults.getValue(R: 0)));
2028 Results.push_back(Elt: CSRWRResults.getValue(R: 1));
2029 break;
2030 }
2031 case Intrinsic::loongarch_csrxchg_w: {
2032 unsigned Imm = N->getConstantOperandVal(Num: 4);
2033 if (!isUInt<14>(x: Imm)) {
2034 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
2035 return;
2036 }
2037 SDValue CSRXCHGResults = DAG.getNode(
2038 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
2039 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
2040 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
2041 DAG.getConstant(Imm, DL, GRLenVT)});
2042 Results.push_back(
2043 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRXCHGResults.getValue(R: 0)));
2044 Results.push_back(Elt: CSRXCHGResults.getValue(R: 1));
2045 break;
2046 }
2047#define IOCSRRD_CASE(NAME, NODE) \
2048 case Intrinsic::loongarch_##NAME: { \
2049 SDValue IOCSRRDResults = \
2050 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
2051 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
2052 Results.push_back( \
2053 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
2054 Results.push_back(IOCSRRDResults.getValue(1)); \
2055 break; \
2056 }
2057 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
2058 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
2059 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
2060#undef IOCSRRD_CASE
2061 case Intrinsic::loongarch_cpucfg: {
2062 SDValue CPUCFGResults =
2063 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
2064 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
2065 Results.push_back(
2066 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CPUCFGResults.getValue(R: 0)));
2067 Results.push_back(Elt: CPUCFGResults.getValue(R: 1));
2068 break;
2069 }
2070 case Intrinsic::loongarch_lddir_d: {
2071 if (!Subtarget.is64Bit()) {
2072 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqLA64);
2073 return;
2074 }
2075 break;
2076 }
2077 }
2078 break;
2079 }
2080 case ISD::READ_REGISTER: {
2081 if (Subtarget.is64Bit())
2082 DAG.getContext()->emitError(
2083 ErrorStr: "On LA64, only 64-bit registers can be read.");
2084 else
2085 DAG.getContext()->emitError(
2086 ErrorStr: "On LA32, only 32-bit registers can be read.");
2087 Results.push_back(Elt: DAG.getUNDEF(VT));
2088 Results.push_back(Elt: N->getOperand(Num: 0));
2089 break;
2090 }
2091 case ISD::INTRINSIC_WO_CHAIN: {
2092 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
2093 break;
2094 }
2095 }
2096}
2097
2098static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
2099 TargetLowering::DAGCombinerInfo &DCI,
2100 const LoongArchSubtarget &Subtarget) {
2101 if (DCI.isBeforeLegalizeOps())
2102 return SDValue();
2103
2104 SDValue FirstOperand = N->getOperand(Num: 0);
2105 SDValue SecondOperand = N->getOperand(Num: 1);
2106 unsigned FirstOperandOpc = FirstOperand.getOpcode();
2107 EVT ValTy = N->getValueType(ResNo: 0);
2108 SDLoc DL(N);
2109 uint64_t lsb, msb;
2110 unsigned SMIdx, SMLen;
2111 ConstantSDNode *CN;
2112 SDValue NewOperand;
2113 MVT GRLenVT = Subtarget.getGRLenVT();
2114
2115 // Op's second operand must be a shifted mask.
2116 if (!(CN = dyn_cast<ConstantSDNode>(Val&: SecondOperand)) ||
2117 !isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx&: SMIdx, MaskLen&: SMLen))
2118 return SDValue();
2119
2120 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
2121 // Pattern match BSTRPICK.
2122 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
2123 // => BSTRPICK $dst, $src, msb, lsb
2124 // where msb = lsb + len - 1
2125
2126 // The second operand of the shift must be an immediate.
2127 if (!(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: 1))))
2128 return SDValue();
2129
2130 lsb = CN->getZExtValue();
2131
2132 // Return if the shifted mask does not start at bit 0 or the sum of its
2133 // length and lsb exceeds the word's size.
2134 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
2135 return SDValue();
2136
2137 NewOperand = FirstOperand.getOperand(i: 0);
2138 } else {
2139 // Pattern match BSTRPICK.
2140 // $dst = and $src, (2**len- 1) , if len > 12
2141 // => BSTRPICK $dst, $src, msb, lsb
2142 // where lsb = 0 and msb = len - 1
2143
2144 // If the mask is <= 0xfff, andi can be used instead.
2145 if (CN->getZExtValue() <= 0xfff)
2146 return SDValue();
2147
2148 // Return if the MSB exceeds.
2149 if (SMIdx + SMLen > ValTy.getSizeInBits())
2150 return SDValue();
2151
2152 if (SMIdx > 0) {
2153 // Omit if the constant has more than 2 uses. This a conservative
2154 // decision. Whether it is a win depends on the HW microarchitecture.
2155 // However it should always be better for 1 and 2 uses.
2156 if (CN->use_size() > 2)
2157 return SDValue();
2158 // Return if the constant can be composed by a single LU12I.W.
2159 if ((CN->getZExtValue() & 0xfff) == 0)
2160 return SDValue();
2161 // Return if the constand can be composed by a single ADDI with
2162 // the zero register.
2163 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
2164 return SDValue();
2165 }
2166
2167 lsb = SMIdx;
2168 NewOperand = FirstOperand;
2169 }
2170
2171 msb = lsb + SMLen - 1;
2172 SDValue NR0 = DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy, N1: NewOperand,
2173 N2: DAG.getConstant(Val: msb, DL, VT: GRLenVT),
2174 N3: DAG.getConstant(Val: lsb, DL, VT: GRLenVT));
2175 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
2176 return NR0;
2177 // Try to optimize to
2178 // bstrpick $Rd, $Rs, msb, lsb
2179 // slli $Rd, $Rd, lsb
2180 return DAG.getNode(Opcode: ISD::SHL, DL, VT: ValTy, N1: NR0,
2181 N2: DAG.getConstant(Val: lsb, DL, VT: GRLenVT));
2182}
2183
2184static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
2185 TargetLowering::DAGCombinerInfo &DCI,
2186 const LoongArchSubtarget &Subtarget) {
2187 if (DCI.isBeforeLegalizeOps())
2188 return SDValue();
2189
2190 // $dst = srl (and $src, Mask), Shamt
2191 // =>
2192 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
2193 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
2194 //
2195
2196 SDValue FirstOperand = N->getOperand(Num: 0);
2197 ConstantSDNode *CN;
2198 EVT ValTy = N->getValueType(ResNo: 0);
2199 SDLoc DL(N);
2200 MVT GRLenVT = Subtarget.getGRLenVT();
2201 unsigned MaskIdx, MaskLen;
2202 uint64_t Shamt;
2203
2204 // The first operand must be an AND and the second operand of the AND must be
2205 // a shifted mask.
2206 if (FirstOperand.getOpcode() != ISD::AND ||
2207 !(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: 1))) ||
2208 !isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx, MaskLen))
2209 return SDValue();
2210
2211 // The second operand (shift amount) must be an immediate.
2212 if (!(CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1))))
2213 return SDValue();
2214
2215 Shamt = CN->getZExtValue();
2216 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
2217 return DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy,
2218 N1: FirstOperand->getOperand(Num: 0),
2219 N2: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT),
2220 N3: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
2221
2222 return SDValue();
2223}
2224
2225static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
2226 TargetLowering::DAGCombinerInfo &DCI,
2227 const LoongArchSubtarget &Subtarget) {
2228 MVT GRLenVT = Subtarget.getGRLenVT();
2229 EVT ValTy = N->getValueType(ResNo: 0);
2230 SDValue N0 = N->getOperand(Num: 0), N1 = N->getOperand(Num: 1);
2231 ConstantSDNode *CN0, *CN1;
2232 SDLoc DL(N);
2233 unsigned ValBits = ValTy.getSizeInBits();
2234 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
2235 unsigned Shamt;
2236 bool SwapAndRetried = false;
2237
2238 if (DCI.isBeforeLegalizeOps())
2239 return SDValue();
2240
2241 if (ValBits != 32 && ValBits != 64)
2242 return SDValue();
2243
2244Retry:
2245 // 1st pattern to match BSTRINS:
2246 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
2247 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
2248 // =>
2249 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
2250 if (N0.getOpcode() == ISD::AND &&
2251 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
2252 isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
2253 N1.getOpcode() == ISD::AND && N1.getOperand(i: 0).getOpcode() == ISD::SHL &&
2254 (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
2255 isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) &&
2256 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
2257 (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) &&
2258 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
2259 (MaskIdx0 + MaskLen0 <= ValBits)) {
2260 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
2261 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
2262 N2: N1.getOperand(i: 0).getOperand(i: 0),
2263 N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - 1), DL, VT: GRLenVT),
2264 N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
2265 }
2266
2267 // 2nd pattern to match BSTRINS:
2268 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
2269 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
2270 // =>
2271 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
2272 if (N0.getOpcode() == ISD::AND &&
2273 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
2274 isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
2275 N1.getOpcode() == ISD::SHL && N1.getOperand(i: 0).getOpcode() == ISD::AND &&
2276 (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
2277 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
2278 (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) &&
2279 isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) &&
2280 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
2281 (MaskIdx0 + MaskLen0 <= ValBits)) {
2282 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
2283 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
2284 N2: N1.getOperand(i: 0).getOperand(i: 0),
2285 N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - 1), DL, VT: GRLenVT),
2286 N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
2287 }
2288
2289 // 3rd pattern to match BSTRINS:
2290 // R = or (and X, mask0), (and Y, mask1)
2291 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
2292 // =>
2293 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
2294 // where msb = lsb + size - 1
2295 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
2296 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
2297 isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
2298 (MaskIdx0 + MaskLen0 <= 64) &&
2299 (CN1 = dyn_cast<ConstantSDNode>(Val: N1->getOperand(Num: 1))) &&
2300 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
2301 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
2302 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
2303 N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1->getValueType(ResNo: 0), N1,
2304 N2: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)),
2305 N3: DAG.getConstant(Val: ValBits == 32
2306 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
2307 : (MaskIdx0 + MaskLen0 - 1),
2308 DL, VT: GRLenVT),
2309 N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
2310 }
2311
2312 // 4th pattern to match BSTRINS:
2313 // R = or (and X, mask), (shl Y, shamt)
2314 // where mask = (2**shamt - 1)
2315 // =>
2316 // R = BSTRINS X, Y, ValBits - 1, shamt
2317 // where ValBits = 32 or 64
2318 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
2319 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
2320 isShiftedMask_64(Value: CN0->getZExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
2321 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
2322 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
2323 (MaskIdx0 + MaskLen0 <= ValBits)) {
2324 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
2325 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
2326 N2: N1.getOperand(i: 0),
2327 N3: DAG.getConstant(Val: (ValBits - 1), DL, VT: GRLenVT),
2328 N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
2329 }
2330
2331 // 5th pattern to match BSTRINS:
2332 // R = or (and X, mask), const
2333 // where ~mask = (2**size - 1) << lsb, mask & const = 0
2334 // =>
2335 // R = BSTRINS X, (const >> lsb), msb, lsb
2336 // where msb = lsb + size - 1
2337 if (N0.getOpcode() == ISD::AND &&
2338 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
2339 isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
2340 (CN1 = dyn_cast<ConstantSDNode>(Val&: N1)) &&
2341 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
2342 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
2343 return DAG.getNode(
2344 Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
2345 N2: DAG.getConstant(Val: CN1->getSExtValue() >> MaskIdx0, DL, VT: ValTy),
2346 N3: DAG.getConstant(Val: ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
2347 : (MaskIdx0 + MaskLen0 - 1),
2348 DL, VT: GRLenVT),
2349 N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
2350 }
2351
2352 // 6th pattern.
2353 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
2354 // by the incoming bits are known to be zero.
2355 // =>
2356 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
2357 //
2358 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
2359 // pattern is more common than the 1st. So we put the 1st before the 6th in
2360 // order to match as many nodes as possible.
2361 ConstantSDNode *CNMask, *CNShamt;
2362 unsigned MaskIdx, MaskLen;
2363 if (N1.getOpcode() == ISD::SHL && N1.getOperand(i: 0).getOpcode() == ISD::AND &&
2364 (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) &&
2365 isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) &&
2366 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
2367 CNShamt->getZExtValue() + MaskLen <= ValBits) {
2368 Shamt = CNShamt->getZExtValue();
2369 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
2370 if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
2371 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
2372 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
2373 N2: N1.getOperand(i: 0).getOperand(i: 0),
2374 N3: DAG.getConstant(Val: Shamt + MaskLen - 1, DL, VT: GRLenVT),
2375 N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
2376 }
2377 }
2378
2379 // 7th pattern.
2380 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
2381 // overwritten by the incoming bits are known to be zero.
2382 // =>
2383 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
2384 //
2385 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
2386 // before the 7th in order to match as many nodes as possible.
2387 if (N1.getOpcode() == ISD::AND &&
2388 (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
2389 isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) &&
2390 N1.getOperand(i: 0).getOpcode() == ISD::SHL &&
2391 (CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) &&
2392 CNShamt->getZExtValue() == MaskIdx) {
2393 APInt ShMask(ValBits, CNMask->getZExtValue());
2394 if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
2395 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
2396 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
2397 N2: N1.getOperand(i: 0).getOperand(i: 0),
2398 N3: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT),
2399 N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT));
2400 }
2401 }
2402
2403 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
2404 if (!SwapAndRetried) {
2405 std::swap(a&: N0, b&: N1);
2406 SwapAndRetried = true;
2407 goto Retry;
2408 }
2409
2410 SwapAndRetried = false;
2411Retry2:
2412 // 8th pattern.
2413 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
2414 // the incoming bits are known to be zero.
2415 // =>
2416 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
2417 //
2418 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
2419 // we put it here in order to match as many nodes as possible or generate less
2420 // instructions.
2421 if (N1.getOpcode() == ISD::AND &&
2422 (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
2423 isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen)) {
2424 APInt ShMask(ValBits, CNMask->getZExtValue());
2425 if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
2426 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
2427 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
2428 N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1->getValueType(ResNo: 0),
2429 N1: N1->getOperand(Num: 0),
2430 N2: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT)),
2431 N3: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT),
2432 N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT));
2433 }
2434 }
2435 // Swap N0/N1 and retry.
2436 if (!SwapAndRetried) {
2437 std::swap(a&: N0, b&: N1);
2438 SwapAndRetried = true;
2439 goto Retry2;
2440 }
2441
2442 return SDValue();
2443}
2444
2445// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
2446static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
2447 TargetLowering::DAGCombinerInfo &DCI,
2448 const LoongArchSubtarget &Subtarget) {
2449 if (DCI.isBeforeLegalizeOps())
2450 return SDValue();
2451
2452 SDValue Src = N->getOperand(Num: 0);
2453 if (Src.getOpcode() != LoongArchISD::REVB_2W)
2454 return SDValue();
2455
2456 return DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
2457 Operand: Src.getOperand(i: 0));
2458}
2459
2460template <unsigned N>
2461static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp,
2462 SelectionDAG &DAG,
2463 const LoongArchSubtarget &Subtarget,
2464 bool IsSigned = false) {
2465 SDLoc DL(Node);
2466 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp));
2467 // Check the ImmArg.
2468 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2469 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2470 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
2471 ": argument out of range.");
2472 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: Subtarget.getGRLenVT());
2473 }
2474 return DAG.getConstant(Val: CImm->getZExtValue(), DL, VT: Subtarget.getGRLenVT());
2475}
2476
2477template <unsigned N>
2478static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
2479 SelectionDAG &DAG, bool IsSigned = false) {
2480 SDLoc DL(Node);
2481 EVT ResTy = Node->getValueType(ResNo: 0);
2482 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp));
2483
2484 // Check the ImmArg.
2485 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2486 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2487 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
2488 ": argument out of range.");
2489 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
2490 }
2491 return DAG.getConstant(
2492 Val: APInt(ResTy.getScalarType().getSizeInBits(),
2493 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
2494 DL, VT: ResTy);
2495}
2496
2497static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) {
2498 SDLoc DL(Node);
2499 EVT ResTy = Node->getValueType(ResNo: 0);
2500 SDValue Vec = Node->getOperand(Num: 2);
2501 SDValue Mask = DAG.getConstant(Val: Vec.getScalarValueSizeInBits() - 1, DL, VT: ResTy);
2502 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Vec, N2: Mask);
2503}
2504
2505static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) {
2506 SDLoc DL(Node);
2507 EVT ResTy = Node->getValueType(ResNo: 0);
2508 SDValue One = DAG.getConstant(Val: 1, DL, VT: ResTy);
2509 SDValue Bit =
2510 DAG.getNode(Opcode: ISD::SHL, DL, VT: ResTy, N1: One, N2: truncateVecElts(Node, DAG));
2511
2512 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: 1),
2513 N2: DAG.getNOT(DL, Val: Bit, VT: ResTy));
2514}
2515
2516template <unsigned N>
2517static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) {
2518 SDLoc DL(Node);
2519 EVT ResTy = Node->getValueType(ResNo: 0);
2520 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2));
2521 // Check the unsigned ImmArg.
2522 if (!isUInt<N>(CImm->getZExtValue())) {
2523 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
2524 ": argument out of range.");
2525 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
2526 }
2527
2528 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2529 SDValue Mask = DAG.getConstant(Val: ~BitImm, DL, VT: ResTy);
2530
2531 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: Mask);
2532}
2533
2534template <unsigned N>
2535static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) {
2536 SDLoc DL(Node);
2537 EVT ResTy = Node->getValueType(ResNo: 0);
2538 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2));
2539 // Check the unsigned ImmArg.
2540 if (!isUInt<N>(CImm->getZExtValue())) {
2541 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
2542 ": argument out of range.");
2543 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
2544 }
2545
2546 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2547 SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy);
2548 return DAG.getNode(Opcode: ISD::OR, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: BitImm);
2549}
2550
2551template <unsigned N>
2552static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) {
2553 SDLoc DL(Node);
2554 EVT ResTy = Node->getValueType(ResNo: 0);
2555 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2));
2556 // Check the unsigned ImmArg.
2557 if (!isUInt<N>(CImm->getZExtValue())) {
2558 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
2559 ": argument out of range.");
2560 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
2561 }
2562
2563 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2564 SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy);
2565 return DAG.getNode(Opcode: ISD::XOR, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: BitImm);
2566}
2567
2568static SDValue
2569performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
2570 TargetLowering::DAGCombinerInfo &DCI,
2571 const LoongArchSubtarget &Subtarget) {
2572 SDLoc DL(N);
2573 switch (N->getConstantOperandVal(Num: 0)) {
2574 default:
2575 break;
2576 case Intrinsic::loongarch_lsx_vadd_b:
2577 case Intrinsic::loongarch_lsx_vadd_h:
2578 case Intrinsic::loongarch_lsx_vadd_w:
2579 case Intrinsic::loongarch_lsx_vadd_d:
2580 case Intrinsic::loongarch_lasx_xvadd_b:
2581 case Intrinsic::loongarch_lasx_xvadd_h:
2582 case Intrinsic::loongarch_lasx_xvadd_w:
2583 case Intrinsic::loongarch_lasx_xvadd_d:
2584 return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2585 N2: N->getOperand(Num: 2));
2586 case Intrinsic::loongarch_lsx_vaddi_bu:
2587 case Intrinsic::loongarch_lsx_vaddi_hu:
2588 case Intrinsic::loongarch_lsx_vaddi_wu:
2589 case Intrinsic::loongarch_lsx_vaddi_du:
2590 case Intrinsic::loongarch_lasx_xvaddi_bu:
2591 case Intrinsic::loongarch_lasx_xvaddi_hu:
2592 case Intrinsic::loongarch_lasx_xvaddi_wu:
2593 case Intrinsic::loongarch_lasx_xvaddi_du:
2594 return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2595 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
2596 case Intrinsic::loongarch_lsx_vsub_b:
2597 case Intrinsic::loongarch_lsx_vsub_h:
2598 case Intrinsic::loongarch_lsx_vsub_w:
2599 case Intrinsic::loongarch_lsx_vsub_d:
2600 case Intrinsic::loongarch_lasx_xvsub_b:
2601 case Intrinsic::loongarch_lasx_xvsub_h:
2602 case Intrinsic::loongarch_lasx_xvsub_w:
2603 case Intrinsic::loongarch_lasx_xvsub_d:
2604 return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2605 N2: N->getOperand(Num: 2));
2606 case Intrinsic::loongarch_lsx_vsubi_bu:
2607 case Intrinsic::loongarch_lsx_vsubi_hu:
2608 case Intrinsic::loongarch_lsx_vsubi_wu:
2609 case Intrinsic::loongarch_lsx_vsubi_du:
2610 case Intrinsic::loongarch_lasx_xvsubi_bu:
2611 case Intrinsic::loongarch_lasx_xvsubi_hu:
2612 case Intrinsic::loongarch_lasx_xvsubi_wu:
2613 case Intrinsic::loongarch_lasx_xvsubi_du:
2614 return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2615 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
2616 case Intrinsic::loongarch_lsx_vneg_b:
2617 case Intrinsic::loongarch_lsx_vneg_h:
2618 case Intrinsic::loongarch_lsx_vneg_w:
2619 case Intrinsic::loongarch_lsx_vneg_d:
2620 case Intrinsic::loongarch_lasx_xvneg_b:
2621 case Intrinsic::loongarch_lasx_xvneg_h:
2622 case Intrinsic::loongarch_lasx_xvneg_w:
2623 case Intrinsic::loongarch_lasx_xvneg_d:
2624 return DAG.getNode(
2625 Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0),
2626 N1: DAG.getConstant(
2627 Val: APInt(N->getValueType(ResNo: 0).getScalarType().getSizeInBits(), 0,
2628 /*isSigned=*/true),
2629 DL: SDLoc(N), VT: N->getValueType(ResNo: 0)),
2630 N2: N->getOperand(Num: 1));
2631 case Intrinsic::loongarch_lsx_vmax_b:
2632 case Intrinsic::loongarch_lsx_vmax_h:
2633 case Intrinsic::loongarch_lsx_vmax_w:
2634 case Intrinsic::loongarch_lsx_vmax_d:
2635 case Intrinsic::loongarch_lasx_xvmax_b:
2636 case Intrinsic::loongarch_lasx_xvmax_h:
2637 case Intrinsic::loongarch_lasx_xvmax_w:
2638 case Intrinsic::loongarch_lasx_xvmax_d:
2639 return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2640 N2: N->getOperand(Num: 2));
2641 case Intrinsic::loongarch_lsx_vmax_bu:
2642 case Intrinsic::loongarch_lsx_vmax_hu:
2643 case Intrinsic::loongarch_lsx_vmax_wu:
2644 case Intrinsic::loongarch_lsx_vmax_du:
2645 case Intrinsic::loongarch_lasx_xvmax_bu:
2646 case Intrinsic::loongarch_lasx_xvmax_hu:
2647 case Intrinsic::loongarch_lasx_xvmax_wu:
2648 case Intrinsic::loongarch_lasx_xvmax_du:
2649 return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2650 N2: N->getOperand(Num: 2));
2651 case Intrinsic::loongarch_lsx_vmaxi_b:
2652 case Intrinsic::loongarch_lsx_vmaxi_h:
2653 case Intrinsic::loongarch_lsx_vmaxi_w:
2654 case Intrinsic::loongarch_lsx_vmaxi_d:
2655 case Intrinsic::loongarch_lasx_xvmaxi_b:
2656 case Intrinsic::loongarch_lasx_xvmaxi_h:
2657 case Intrinsic::loongarch_lasx_xvmaxi_w:
2658 case Intrinsic::loongarch_lasx_xvmaxi_d:
2659 return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2660 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG, /*IsSigned=*/true));
2661 case Intrinsic::loongarch_lsx_vmaxi_bu:
2662 case Intrinsic::loongarch_lsx_vmaxi_hu:
2663 case Intrinsic::loongarch_lsx_vmaxi_wu:
2664 case Intrinsic::loongarch_lsx_vmaxi_du:
2665 case Intrinsic::loongarch_lasx_xvmaxi_bu:
2666 case Intrinsic::loongarch_lasx_xvmaxi_hu:
2667 case Intrinsic::loongarch_lasx_xvmaxi_wu:
2668 case Intrinsic::loongarch_lasx_xvmaxi_du:
2669 return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2670 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
2671 case Intrinsic::loongarch_lsx_vmin_b:
2672 case Intrinsic::loongarch_lsx_vmin_h:
2673 case Intrinsic::loongarch_lsx_vmin_w:
2674 case Intrinsic::loongarch_lsx_vmin_d:
2675 case Intrinsic::loongarch_lasx_xvmin_b:
2676 case Intrinsic::loongarch_lasx_xvmin_h:
2677 case Intrinsic::loongarch_lasx_xvmin_w:
2678 case Intrinsic::loongarch_lasx_xvmin_d:
2679 return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2680 N2: N->getOperand(Num: 2));
2681 case Intrinsic::loongarch_lsx_vmin_bu:
2682 case Intrinsic::loongarch_lsx_vmin_hu:
2683 case Intrinsic::loongarch_lsx_vmin_wu:
2684 case Intrinsic::loongarch_lsx_vmin_du:
2685 case Intrinsic::loongarch_lasx_xvmin_bu:
2686 case Intrinsic::loongarch_lasx_xvmin_hu:
2687 case Intrinsic::loongarch_lasx_xvmin_wu:
2688 case Intrinsic::loongarch_lasx_xvmin_du:
2689 return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2690 N2: N->getOperand(Num: 2));
2691 case Intrinsic::loongarch_lsx_vmini_b:
2692 case Intrinsic::loongarch_lsx_vmini_h:
2693 case Intrinsic::loongarch_lsx_vmini_w:
2694 case Intrinsic::loongarch_lsx_vmini_d:
2695 case Intrinsic::loongarch_lasx_xvmini_b:
2696 case Intrinsic::loongarch_lasx_xvmini_h:
2697 case Intrinsic::loongarch_lasx_xvmini_w:
2698 case Intrinsic::loongarch_lasx_xvmini_d:
2699 return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2700 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG, /*IsSigned=*/true));
2701 case Intrinsic::loongarch_lsx_vmini_bu:
2702 case Intrinsic::loongarch_lsx_vmini_hu:
2703 case Intrinsic::loongarch_lsx_vmini_wu:
2704 case Intrinsic::loongarch_lsx_vmini_du:
2705 case Intrinsic::loongarch_lasx_xvmini_bu:
2706 case Intrinsic::loongarch_lasx_xvmini_hu:
2707 case Intrinsic::loongarch_lasx_xvmini_wu:
2708 case Intrinsic::loongarch_lasx_xvmini_du:
2709 return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2710 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
2711 case Intrinsic::loongarch_lsx_vmul_b:
2712 case Intrinsic::loongarch_lsx_vmul_h:
2713 case Intrinsic::loongarch_lsx_vmul_w:
2714 case Intrinsic::loongarch_lsx_vmul_d:
2715 case Intrinsic::loongarch_lasx_xvmul_b:
2716 case Intrinsic::loongarch_lasx_xvmul_h:
2717 case Intrinsic::loongarch_lasx_xvmul_w:
2718 case Intrinsic::loongarch_lasx_xvmul_d:
2719 return DAG.getNode(Opcode: ISD::MUL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2720 N2: N->getOperand(Num: 2));
2721 case Intrinsic::loongarch_lsx_vmadd_b:
2722 case Intrinsic::loongarch_lsx_vmadd_h:
2723 case Intrinsic::loongarch_lsx_vmadd_w:
2724 case Intrinsic::loongarch_lsx_vmadd_d:
2725 case Intrinsic::loongarch_lasx_xvmadd_b:
2726 case Intrinsic::loongarch_lasx_xvmadd_h:
2727 case Intrinsic::loongarch_lasx_xvmadd_w:
2728 case Intrinsic::loongarch_lasx_xvmadd_d: {
2729 EVT ResTy = N->getValueType(ResNo: 0);
2730 return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 1),
2731 N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 2),
2732 N2: N->getOperand(Num: 3)));
2733 }
2734 case Intrinsic::loongarch_lsx_vmsub_b:
2735 case Intrinsic::loongarch_lsx_vmsub_h:
2736 case Intrinsic::loongarch_lsx_vmsub_w:
2737 case Intrinsic::loongarch_lsx_vmsub_d:
2738 case Intrinsic::loongarch_lasx_xvmsub_b:
2739 case Intrinsic::loongarch_lasx_xvmsub_h:
2740 case Intrinsic::loongarch_lasx_xvmsub_w:
2741 case Intrinsic::loongarch_lasx_xvmsub_d: {
2742 EVT ResTy = N->getValueType(ResNo: 0);
2743 return DAG.getNode(Opcode: ISD::SUB, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 1),
2744 N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 2),
2745 N2: N->getOperand(Num: 3)));
2746 }
2747 case Intrinsic::loongarch_lsx_vdiv_b:
2748 case Intrinsic::loongarch_lsx_vdiv_h:
2749 case Intrinsic::loongarch_lsx_vdiv_w:
2750 case Intrinsic::loongarch_lsx_vdiv_d:
2751 case Intrinsic::loongarch_lasx_xvdiv_b:
2752 case Intrinsic::loongarch_lasx_xvdiv_h:
2753 case Intrinsic::loongarch_lasx_xvdiv_w:
2754 case Intrinsic::loongarch_lasx_xvdiv_d:
2755 return DAG.getNode(Opcode: ISD::SDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2756 N2: N->getOperand(Num: 2));
2757 case Intrinsic::loongarch_lsx_vdiv_bu:
2758 case Intrinsic::loongarch_lsx_vdiv_hu:
2759 case Intrinsic::loongarch_lsx_vdiv_wu:
2760 case Intrinsic::loongarch_lsx_vdiv_du:
2761 case Intrinsic::loongarch_lasx_xvdiv_bu:
2762 case Intrinsic::loongarch_lasx_xvdiv_hu:
2763 case Intrinsic::loongarch_lasx_xvdiv_wu:
2764 case Intrinsic::loongarch_lasx_xvdiv_du:
2765 return DAG.getNode(Opcode: ISD::UDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2766 N2: N->getOperand(Num: 2));
2767 case Intrinsic::loongarch_lsx_vmod_b:
2768 case Intrinsic::loongarch_lsx_vmod_h:
2769 case Intrinsic::loongarch_lsx_vmod_w:
2770 case Intrinsic::loongarch_lsx_vmod_d:
2771 case Intrinsic::loongarch_lasx_xvmod_b:
2772 case Intrinsic::loongarch_lasx_xvmod_h:
2773 case Intrinsic::loongarch_lasx_xvmod_w:
2774 case Intrinsic::loongarch_lasx_xvmod_d:
2775 return DAG.getNode(Opcode: ISD::SREM, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2776 N2: N->getOperand(Num: 2));
2777 case Intrinsic::loongarch_lsx_vmod_bu:
2778 case Intrinsic::loongarch_lsx_vmod_hu:
2779 case Intrinsic::loongarch_lsx_vmod_wu:
2780 case Intrinsic::loongarch_lsx_vmod_du:
2781 case Intrinsic::loongarch_lasx_xvmod_bu:
2782 case Intrinsic::loongarch_lasx_xvmod_hu:
2783 case Intrinsic::loongarch_lasx_xvmod_wu:
2784 case Intrinsic::loongarch_lasx_xvmod_du:
2785 return DAG.getNode(Opcode: ISD::UREM, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2786 N2: N->getOperand(Num: 2));
2787 case Intrinsic::loongarch_lsx_vand_v:
2788 case Intrinsic::loongarch_lasx_xvand_v:
2789 return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2790 N2: N->getOperand(Num: 2));
2791 case Intrinsic::loongarch_lsx_vor_v:
2792 case Intrinsic::loongarch_lasx_xvor_v:
2793 return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2794 N2: N->getOperand(Num: 2));
2795 case Intrinsic::loongarch_lsx_vxor_v:
2796 case Intrinsic::loongarch_lasx_xvxor_v:
2797 return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2798 N2: N->getOperand(Num: 2));
2799 case Intrinsic::loongarch_lsx_vnor_v:
2800 case Intrinsic::loongarch_lasx_xvnor_v: {
2801 SDValue Res = DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2802 N2: N->getOperand(Num: 2));
2803 return DAG.getNOT(DL, Val: Res, VT: Res->getValueType(ResNo: 0));
2804 }
2805 case Intrinsic::loongarch_lsx_vandi_b:
2806 case Intrinsic::loongarch_lasx_xvandi_b:
2807 return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2808 N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG));
2809 case Intrinsic::loongarch_lsx_vori_b:
2810 case Intrinsic::loongarch_lasx_xvori_b:
2811 return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2812 N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG));
2813 case Intrinsic::loongarch_lsx_vxori_b:
2814 case Intrinsic::loongarch_lasx_xvxori_b:
2815 return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2816 N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG));
2817 case Intrinsic::loongarch_lsx_vsll_b:
2818 case Intrinsic::loongarch_lsx_vsll_h:
2819 case Intrinsic::loongarch_lsx_vsll_w:
2820 case Intrinsic::loongarch_lsx_vsll_d:
2821 case Intrinsic::loongarch_lasx_xvsll_b:
2822 case Intrinsic::loongarch_lasx_xvsll_h:
2823 case Intrinsic::loongarch_lasx_xvsll_w:
2824 case Intrinsic::loongarch_lasx_xvsll_d:
2825 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2826 N2: truncateVecElts(Node: N, DAG));
2827 case Intrinsic::loongarch_lsx_vslli_b:
2828 case Intrinsic::loongarch_lasx_xvslli_b:
2829 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2830 N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG));
2831 case Intrinsic::loongarch_lsx_vslli_h:
2832 case Intrinsic::loongarch_lasx_xvslli_h:
2833 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2834 N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG));
2835 case Intrinsic::loongarch_lsx_vslli_w:
2836 case Intrinsic::loongarch_lasx_xvslli_w:
2837 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2838 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
2839 case Intrinsic::loongarch_lsx_vslli_d:
2840 case Intrinsic::loongarch_lasx_xvslli_d:
2841 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2842 N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG));
2843 case Intrinsic::loongarch_lsx_vsrl_b:
2844 case Intrinsic::loongarch_lsx_vsrl_h:
2845 case Intrinsic::loongarch_lsx_vsrl_w:
2846 case Intrinsic::loongarch_lsx_vsrl_d:
2847 case Intrinsic::loongarch_lasx_xvsrl_b:
2848 case Intrinsic::loongarch_lasx_xvsrl_h:
2849 case Intrinsic::loongarch_lasx_xvsrl_w:
2850 case Intrinsic::loongarch_lasx_xvsrl_d:
2851 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2852 N2: truncateVecElts(Node: N, DAG));
2853 case Intrinsic::loongarch_lsx_vsrli_b:
2854 case Intrinsic::loongarch_lasx_xvsrli_b:
2855 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2856 N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG));
2857 case Intrinsic::loongarch_lsx_vsrli_h:
2858 case Intrinsic::loongarch_lasx_xvsrli_h:
2859 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2860 N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG));
2861 case Intrinsic::loongarch_lsx_vsrli_w:
2862 case Intrinsic::loongarch_lasx_xvsrli_w:
2863 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2864 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
2865 case Intrinsic::loongarch_lsx_vsrli_d:
2866 case Intrinsic::loongarch_lasx_xvsrli_d:
2867 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2868 N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG));
2869 case Intrinsic::loongarch_lsx_vsra_b:
2870 case Intrinsic::loongarch_lsx_vsra_h:
2871 case Intrinsic::loongarch_lsx_vsra_w:
2872 case Intrinsic::loongarch_lsx_vsra_d:
2873 case Intrinsic::loongarch_lasx_xvsra_b:
2874 case Intrinsic::loongarch_lasx_xvsra_h:
2875 case Intrinsic::loongarch_lasx_xvsra_w:
2876 case Intrinsic::loongarch_lasx_xvsra_d:
2877 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2878 N2: truncateVecElts(Node: N, DAG));
2879 case Intrinsic::loongarch_lsx_vsrai_b:
2880 case Intrinsic::loongarch_lasx_xvsrai_b:
2881 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2882 N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG));
2883 case Intrinsic::loongarch_lsx_vsrai_h:
2884 case Intrinsic::loongarch_lasx_xvsrai_h:
2885 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2886 N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG));
2887 case Intrinsic::loongarch_lsx_vsrai_w:
2888 case Intrinsic::loongarch_lasx_xvsrai_w:
2889 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2890 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
2891 case Intrinsic::loongarch_lsx_vsrai_d:
2892 case Intrinsic::loongarch_lasx_xvsrai_d:
2893 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2894 N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG));
2895 case Intrinsic::loongarch_lsx_vclz_b:
2896 case Intrinsic::loongarch_lsx_vclz_h:
2897 case Intrinsic::loongarch_lsx_vclz_w:
2898 case Intrinsic::loongarch_lsx_vclz_d:
2899 case Intrinsic::loongarch_lasx_xvclz_b:
2900 case Intrinsic::loongarch_lasx_xvclz_h:
2901 case Intrinsic::loongarch_lasx_xvclz_w:
2902 case Intrinsic::loongarch_lasx_xvclz_d:
2903 return DAG.getNode(Opcode: ISD::CTLZ, DL, VT: N->getValueType(ResNo: 0), Operand: N->getOperand(Num: 1));
2904 case Intrinsic::loongarch_lsx_vpcnt_b:
2905 case Intrinsic::loongarch_lsx_vpcnt_h:
2906 case Intrinsic::loongarch_lsx_vpcnt_w:
2907 case Intrinsic::loongarch_lsx_vpcnt_d:
2908 case Intrinsic::loongarch_lasx_xvpcnt_b:
2909 case Intrinsic::loongarch_lasx_xvpcnt_h:
2910 case Intrinsic::loongarch_lasx_xvpcnt_w:
2911 case Intrinsic::loongarch_lasx_xvpcnt_d:
2912 return DAG.getNode(Opcode: ISD::CTPOP, DL, VT: N->getValueType(ResNo: 0), Operand: N->getOperand(Num: 1));
2913 case Intrinsic::loongarch_lsx_vbitclr_b:
2914 case Intrinsic::loongarch_lsx_vbitclr_h:
2915 case Intrinsic::loongarch_lsx_vbitclr_w:
2916 case Intrinsic::loongarch_lsx_vbitclr_d:
2917 case Intrinsic::loongarch_lasx_xvbitclr_b:
2918 case Intrinsic::loongarch_lasx_xvbitclr_h:
2919 case Intrinsic::loongarch_lasx_xvbitclr_w:
2920 case Intrinsic::loongarch_lasx_xvbitclr_d:
2921 return lowerVectorBitClear(Node: N, DAG);
2922 case Intrinsic::loongarch_lsx_vbitclri_b:
2923 case Intrinsic::loongarch_lasx_xvbitclri_b:
2924 return lowerVectorBitClearImm<3>(Node: N, DAG);
2925 case Intrinsic::loongarch_lsx_vbitclri_h:
2926 case Intrinsic::loongarch_lasx_xvbitclri_h:
2927 return lowerVectorBitClearImm<4>(Node: N, DAG);
2928 case Intrinsic::loongarch_lsx_vbitclri_w:
2929 case Intrinsic::loongarch_lasx_xvbitclri_w:
2930 return lowerVectorBitClearImm<5>(Node: N, DAG);
2931 case Intrinsic::loongarch_lsx_vbitclri_d:
2932 case Intrinsic::loongarch_lasx_xvbitclri_d:
2933 return lowerVectorBitClearImm<6>(Node: N, DAG);
2934 case Intrinsic::loongarch_lsx_vbitset_b:
2935 case Intrinsic::loongarch_lsx_vbitset_h:
2936 case Intrinsic::loongarch_lsx_vbitset_w:
2937 case Intrinsic::loongarch_lsx_vbitset_d:
2938 case Intrinsic::loongarch_lasx_xvbitset_b:
2939 case Intrinsic::loongarch_lasx_xvbitset_h:
2940 case Intrinsic::loongarch_lasx_xvbitset_w:
2941 case Intrinsic::loongarch_lasx_xvbitset_d: {
2942 EVT VecTy = N->getValueType(ResNo: 0);
2943 SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy);
2944 return DAG.getNode(
2945 Opcode: ISD::OR, DL, VT: VecTy, N1: N->getOperand(Num: 1),
2946 N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG)));
2947 }
2948 case Intrinsic::loongarch_lsx_vbitseti_b:
2949 case Intrinsic::loongarch_lasx_xvbitseti_b:
2950 return lowerVectorBitSetImm<3>(Node: N, DAG);
2951 case Intrinsic::loongarch_lsx_vbitseti_h:
2952 case Intrinsic::loongarch_lasx_xvbitseti_h:
2953 return lowerVectorBitSetImm<4>(Node: N, DAG);
2954 case Intrinsic::loongarch_lsx_vbitseti_w:
2955 case Intrinsic::loongarch_lasx_xvbitseti_w:
2956 return lowerVectorBitSetImm<5>(Node: N, DAG);
2957 case Intrinsic::loongarch_lsx_vbitseti_d:
2958 case Intrinsic::loongarch_lasx_xvbitseti_d:
2959 return lowerVectorBitSetImm<6>(Node: N, DAG);
2960 case Intrinsic::loongarch_lsx_vbitrev_b:
2961 case Intrinsic::loongarch_lsx_vbitrev_h:
2962 case Intrinsic::loongarch_lsx_vbitrev_w:
2963 case Intrinsic::loongarch_lsx_vbitrev_d:
2964 case Intrinsic::loongarch_lasx_xvbitrev_b:
2965 case Intrinsic::loongarch_lasx_xvbitrev_h:
2966 case Intrinsic::loongarch_lasx_xvbitrev_w:
2967 case Intrinsic::loongarch_lasx_xvbitrev_d: {
2968 EVT VecTy = N->getValueType(ResNo: 0);
2969 SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy);
2970 return DAG.getNode(
2971 Opcode: ISD::XOR, DL, VT: VecTy, N1: N->getOperand(Num: 1),
2972 N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG)));
2973 }
2974 case Intrinsic::loongarch_lsx_vbitrevi_b:
2975 case Intrinsic::loongarch_lasx_xvbitrevi_b:
2976 return lowerVectorBitRevImm<3>(Node: N, DAG);
2977 case Intrinsic::loongarch_lsx_vbitrevi_h:
2978 case Intrinsic::loongarch_lasx_xvbitrevi_h:
2979 return lowerVectorBitRevImm<4>(Node: N, DAG);
2980 case Intrinsic::loongarch_lsx_vbitrevi_w:
2981 case Intrinsic::loongarch_lasx_xvbitrevi_w:
2982 return lowerVectorBitRevImm<5>(Node: N, DAG);
2983 case Intrinsic::loongarch_lsx_vbitrevi_d:
2984 case Intrinsic::loongarch_lasx_xvbitrevi_d:
2985 return lowerVectorBitRevImm<6>(Node: N, DAG);
2986 case Intrinsic::loongarch_lsx_vfadd_s:
2987 case Intrinsic::loongarch_lsx_vfadd_d:
2988 case Intrinsic::loongarch_lasx_xvfadd_s:
2989 case Intrinsic::loongarch_lasx_xvfadd_d:
2990 return DAG.getNode(Opcode: ISD::FADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2991 N2: N->getOperand(Num: 2));
2992 case Intrinsic::loongarch_lsx_vfsub_s:
2993 case Intrinsic::loongarch_lsx_vfsub_d:
2994 case Intrinsic::loongarch_lasx_xvfsub_s:
2995 case Intrinsic::loongarch_lasx_xvfsub_d:
2996 return DAG.getNode(Opcode: ISD::FSUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
2997 N2: N->getOperand(Num: 2));
2998 case Intrinsic::loongarch_lsx_vfmul_s:
2999 case Intrinsic::loongarch_lsx_vfmul_d:
3000 case Intrinsic::loongarch_lasx_xvfmul_s:
3001 case Intrinsic::loongarch_lasx_xvfmul_d:
3002 return DAG.getNode(Opcode: ISD::FMUL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3003 N2: N->getOperand(Num: 2));
3004 case Intrinsic::loongarch_lsx_vfdiv_s:
3005 case Intrinsic::loongarch_lsx_vfdiv_d:
3006 case Intrinsic::loongarch_lasx_xvfdiv_s:
3007 case Intrinsic::loongarch_lasx_xvfdiv_d:
3008 return DAG.getNode(Opcode: ISD::FDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3009 N2: N->getOperand(Num: 2));
3010 case Intrinsic::loongarch_lsx_vfmadd_s:
3011 case Intrinsic::loongarch_lsx_vfmadd_d:
3012 case Intrinsic::loongarch_lasx_xvfmadd_s:
3013 case Intrinsic::loongarch_lasx_xvfmadd_d:
3014 return DAG.getNode(Opcode: ISD::FMA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3015 N2: N->getOperand(Num: 2), N3: N->getOperand(Num: 3));
3016 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
3017 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
3018 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2),
3019 N3: legalizeIntrinsicImmArg<4>(Node: N, ImmOp: 3, DAG, Subtarget));
3020 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
3021 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
3022 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
3023 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2),
3024 N3: legalizeIntrinsicImmArg<3>(Node: N, ImmOp: 3, DAG, Subtarget));
3025 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
3026 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
3027 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
3028 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2),
3029 N3: legalizeIntrinsicImmArg<2>(Node: N, ImmOp: 3, DAG, Subtarget));
3030 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
3031 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
3032 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2),
3033 N3: legalizeIntrinsicImmArg<1>(Node: N, ImmOp: 3, DAG, Subtarget));
3034 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
3035 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
3036 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
3037 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
3038 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
3039 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
3040 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
3041 case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {
3042 EVT ResTy = N->getValueType(ResNo: 0);
3043 SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(Num: 1));
3044 return DAG.getBuildVector(VT: ResTy, DL, Ops);
3045 }
3046 case Intrinsic::loongarch_lsx_vreplve_b:
3047 case Intrinsic::loongarch_lsx_vreplve_h:
3048 case Intrinsic::loongarch_lsx_vreplve_w:
3049 case Intrinsic::loongarch_lsx_vreplve_d:
3050 case Intrinsic::loongarch_lasx_xvreplve_b:
3051 case Intrinsic::loongarch_lasx_xvreplve_h:
3052 case Intrinsic::loongarch_lasx_xvreplve_w:
3053 case Intrinsic::loongarch_lasx_xvreplve_d:
3054 return DAG.getNode(Opcode: LoongArchISD::VREPLVE, DL, VT: N->getValueType(ResNo: 0),
3055 N1: N->getOperand(Num: 1),
3056 N2: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getGRLenVT(),
3057 Operand: N->getOperand(Num: 2)));
3058 }
3059 return SDValue();
3060}
3061
3062SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
3063 DAGCombinerInfo &DCI) const {
3064 SelectionDAG &DAG = DCI.DAG;
3065 switch (N->getOpcode()) {
3066 default:
3067 break;
3068 case ISD::AND:
3069 return performANDCombine(N, DAG, DCI, Subtarget);
3070 case ISD::OR:
3071 return performORCombine(N, DAG, DCI, Subtarget);
3072 case ISD::SRL:
3073 return performSRLCombine(N, DAG, DCI, Subtarget);
3074 case LoongArchISD::BITREV_W:
3075 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
3076 case ISD::INTRINSIC_WO_CHAIN:
3077 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
3078 }
3079 return SDValue();
3080}
3081
3082static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
3083 MachineBasicBlock *MBB) {
3084 if (!ZeroDivCheck)
3085 return MBB;
3086
3087 // Build instructions:
3088 // MBB:
3089 // div(or mod) $dst, $dividend, $divisor
3090 // bnez $divisor, SinkMBB
3091 // BreakMBB:
3092 // break 7 // BRK_DIVZERO
3093 // SinkMBB:
3094 // fallthrough
3095 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
3096 MachineFunction::iterator It = ++MBB->getIterator();
3097 MachineFunction *MF = MBB->getParent();
3098 auto BreakMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB);
3099 auto SinkMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB);
3100 MF->insert(MBBI: It, MBB: BreakMBB);
3101 MF->insert(MBBI: It, MBB: SinkMBB);
3102
3103 // Transfer the remainder of MBB and its successor edges to SinkMBB.
3104 SinkMBB->splice(Where: SinkMBB->end(), Other: MBB, From: std::next(x: MI.getIterator()), To: MBB->end());
3105 SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
3106
3107 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
3108 DebugLoc DL = MI.getDebugLoc();
3109 MachineOperand &Divisor = MI.getOperand(i: 2);
3110 Register DivisorReg = Divisor.getReg();
3111
3112 // MBB:
3113 BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
3114 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
3115 .addMBB(SinkMBB);
3116 MBB->addSuccessor(Succ: BreakMBB);
3117 MBB->addSuccessor(Succ: SinkMBB);
3118
3119 // BreakMBB:
3120 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
3121 // definition of BRK_DIVZERO.
3122 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
3123 BreakMBB->addSuccessor(Succ: SinkMBB);
3124
3125 // Clear Divisor's kill flag.
3126 Divisor.setIsKill(false);
3127
3128 return SinkMBB;
3129}
3130
3131static MachineBasicBlock *
3132emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB,
3133 const LoongArchSubtarget &Subtarget) {
3134 unsigned CondOpc;
3135 switch (MI.getOpcode()) {
3136 default:
3137 llvm_unreachable("Unexpected opcode");
3138 case LoongArch::PseudoVBZ:
3139 CondOpc = LoongArch::VSETEQZ_V;
3140 break;
3141 case LoongArch::PseudoVBZ_B:
3142 CondOpc = LoongArch::VSETANYEQZ_B;
3143 break;
3144 case LoongArch::PseudoVBZ_H:
3145 CondOpc = LoongArch::VSETANYEQZ_H;
3146 break;
3147 case LoongArch::PseudoVBZ_W:
3148 CondOpc = LoongArch::VSETANYEQZ_W;
3149 break;
3150 case LoongArch::PseudoVBZ_D:
3151 CondOpc = LoongArch::VSETANYEQZ_D;
3152 break;
3153 case LoongArch::PseudoVBNZ:
3154 CondOpc = LoongArch::VSETNEZ_V;
3155 break;
3156 case LoongArch::PseudoVBNZ_B:
3157 CondOpc = LoongArch::VSETALLNEZ_B;
3158 break;
3159 case LoongArch::PseudoVBNZ_H:
3160 CondOpc = LoongArch::VSETALLNEZ_H;
3161 break;
3162 case LoongArch::PseudoVBNZ_W:
3163 CondOpc = LoongArch::VSETALLNEZ_W;
3164 break;
3165 case LoongArch::PseudoVBNZ_D:
3166 CondOpc = LoongArch::VSETALLNEZ_D;
3167 break;
3168 case LoongArch::PseudoXVBZ:
3169 CondOpc = LoongArch::XVSETEQZ_V;
3170 break;
3171 case LoongArch::PseudoXVBZ_B:
3172 CondOpc = LoongArch::XVSETANYEQZ_B;
3173 break;
3174 case LoongArch::PseudoXVBZ_H:
3175 CondOpc = LoongArch::XVSETANYEQZ_H;
3176 break;
3177 case LoongArch::PseudoXVBZ_W:
3178 CondOpc = LoongArch::XVSETANYEQZ_W;
3179 break;
3180 case LoongArch::PseudoXVBZ_D:
3181 CondOpc = LoongArch::XVSETANYEQZ_D;
3182 break;
3183 case LoongArch::PseudoXVBNZ:
3184 CondOpc = LoongArch::XVSETNEZ_V;
3185 break;
3186 case LoongArch::PseudoXVBNZ_B:
3187 CondOpc = LoongArch::XVSETALLNEZ_B;
3188 break;
3189 case LoongArch::PseudoXVBNZ_H:
3190 CondOpc = LoongArch::XVSETALLNEZ_H;
3191 break;
3192 case LoongArch::PseudoXVBNZ_W:
3193 CondOpc = LoongArch::XVSETALLNEZ_W;
3194 break;
3195 case LoongArch::PseudoXVBNZ_D:
3196 CondOpc = LoongArch::XVSETALLNEZ_D;
3197 break;
3198 }
3199
3200 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3201 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3202 DebugLoc DL = MI.getDebugLoc();
3203 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
3204 MachineFunction::iterator It = ++BB->getIterator();
3205
3206 MachineFunction *F = BB->getParent();
3207 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
3208 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
3209 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
3210
3211 F->insert(MBBI: It, MBB: FalseBB);
3212 F->insert(MBBI: It, MBB: TrueBB);
3213 F->insert(MBBI: It, MBB: SinkBB);
3214
3215 // Transfer the remainder of MBB and its successor edges to Sink.
3216 SinkBB->splice(Where: SinkBB->end(), Other: BB, From: std::next(x: MI.getIterator()), To: BB->end());
3217 SinkBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
3218
3219 // Insert the real instruction to BB.
3220 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
3221 BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: CondOpc), DestReg: FCC).addReg(RegNo: MI.getOperand(i: 1).getReg());
3222
3223 // Insert branch.
3224 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
3225 BB->addSuccessor(Succ: FalseBB);
3226 BB->addSuccessor(Succ: TrueBB);
3227
3228 // FalseBB.
3229 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
3230 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
3231 .addReg(LoongArch::R0)
3232 .addImm(0);
3233 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
3234 FalseBB->addSuccessor(Succ: SinkBB);
3235
3236 // TrueBB.
3237 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
3238 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
3239 .addReg(LoongArch::R0)
3240 .addImm(1);
3241 TrueBB->addSuccessor(Succ: SinkBB);
3242
3243 // SinkBB: merge the results.
3244 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
3245 MI.getOperand(0).getReg())
3246 .addReg(RD1)
3247 .addMBB(FalseBB)
3248 .addReg(RD2)
3249 .addMBB(TrueBB);
3250
3251 // The pseudo instruction is gone now.
3252 MI.eraseFromParent();
3253 return SinkBB;
3254}
3255
3256static MachineBasicBlock *
3257emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
3258 const LoongArchSubtarget &Subtarget) {
3259 unsigned InsOp;
3260 unsigned HalfSize;
3261 switch (MI.getOpcode()) {
3262 default:
3263 llvm_unreachable("Unexpected opcode");
3264 case LoongArch::PseudoXVINSGR2VR_B:
3265 HalfSize = 16;
3266 InsOp = LoongArch::VINSGR2VR_B;
3267 break;
3268 case LoongArch::PseudoXVINSGR2VR_H:
3269 HalfSize = 8;
3270 InsOp = LoongArch::VINSGR2VR_H;
3271 break;
3272 }
3273 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3274 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
3275 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
3276 DebugLoc DL = MI.getDebugLoc();
3277 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
3278 // XDst = vector_insert XSrc, Elt, Idx
3279 Register XDst = MI.getOperand(i: 0).getReg();
3280 Register XSrc = MI.getOperand(i: 1).getReg();
3281 Register Elt = MI.getOperand(i: 2).getReg();
3282 unsigned Idx = MI.getOperand(i: 3).getImm();
3283
3284 Register ScratchReg1 = XSrc;
3285 if (Idx >= HalfSize) {
3286 ScratchReg1 = MRI.createVirtualRegister(RegClass: RC);
3287 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
3288 .addReg(XSrc)
3289 .addReg(XSrc)
3290 .addImm(1);
3291 }
3292
3293 Register ScratchSubReg1 = MRI.createVirtualRegister(RegClass: SubRC);
3294 Register ScratchSubReg2 = MRI.createVirtualRegister(RegClass: SubRC);
3295 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
3296 .addReg(ScratchReg1, 0, LoongArch::sub_128);
3297 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: InsOp), DestReg: ScratchSubReg2)
3298 .addReg(RegNo: ScratchSubReg1)
3299 .addReg(RegNo: Elt)
3300 .addImm(Val: Idx >= HalfSize ? Idx - HalfSize : Idx);
3301
3302 Register ScratchReg2 = XDst;
3303 if (Idx >= HalfSize)
3304 ScratchReg2 = MRI.createVirtualRegister(RegClass: RC);
3305
3306 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
3307 .addImm(0)
3308 .addReg(ScratchSubReg2)
3309 .addImm(LoongArch::sub_128);
3310
3311 if (Idx >= HalfSize)
3312 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
3313 .addReg(XSrc)
3314 .addReg(ScratchReg2)
3315 .addImm(2);
3316
3317 MI.eraseFromParent();
3318 return BB;
3319}
3320
3321MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
3322 MachineInstr &MI, MachineBasicBlock *BB) const {
3323 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3324 DebugLoc DL = MI.getDebugLoc();
3325
3326 switch (MI.getOpcode()) {
3327 default:
3328 llvm_unreachable("Unexpected instr type to insert");
3329 case LoongArch::DIV_W:
3330 case LoongArch::DIV_WU:
3331 case LoongArch::MOD_W:
3332 case LoongArch::MOD_WU:
3333 case LoongArch::DIV_D:
3334 case LoongArch::DIV_DU:
3335 case LoongArch::MOD_D:
3336 case LoongArch::MOD_DU:
3337 return insertDivByZeroTrap(MI, MBB: BB);
3338 break;
3339 case LoongArch::WRFCSR: {
3340 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
3341 LoongArch::FCSR0 + MI.getOperand(0).getImm())
3342 .addReg(MI.getOperand(1).getReg());
3343 MI.eraseFromParent();
3344 return BB;
3345 }
3346 case LoongArch::RDFCSR: {
3347 MachineInstr *ReadFCSR =
3348 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
3349 MI.getOperand(0).getReg())
3350 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
3351 ReadFCSR->getOperand(i: 1).setIsUndef();
3352 MI.eraseFromParent();
3353 return BB;
3354 }
3355 case LoongArch::PseudoVBZ:
3356 case LoongArch::PseudoVBZ_B:
3357 case LoongArch::PseudoVBZ_H:
3358 case LoongArch::PseudoVBZ_W:
3359 case LoongArch::PseudoVBZ_D:
3360 case LoongArch::PseudoVBNZ:
3361 case LoongArch::PseudoVBNZ_B:
3362 case LoongArch::PseudoVBNZ_H:
3363 case LoongArch::PseudoVBNZ_W:
3364 case LoongArch::PseudoVBNZ_D:
3365 case LoongArch::PseudoXVBZ:
3366 case LoongArch::PseudoXVBZ_B:
3367 case LoongArch::PseudoXVBZ_H:
3368 case LoongArch::PseudoXVBZ_W:
3369 case LoongArch::PseudoXVBZ_D:
3370 case LoongArch::PseudoXVBNZ:
3371 case LoongArch::PseudoXVBNZ_B:
3372 case LoongArch::PseudoXVBNZ_H:
3373 case LoongArch::PseudoXVBNZ_W:
3374 case LoongArch::PseudoXVBNZ_D:
3375 return emitVecCondBranchPseudo(MI, BB, Subtarget);
3376 case LoongArch::PseudoXVINSGR2VR_B:
3377 case LoongArch::PseudoXVINSGR2VR_H:
3378 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
3379 }
3380}
3381
3382bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
3383 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
3384 unsigned *Fast) const {
3385 if (!Subtarget.hasUAL())
3386 return false;
3387
3388 // TODO: set reasonable speed number.
3389 if (Fast)
3390 *Fast = 1;
3391 return true;
3392}
3393
3394const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
3395 switch ((LoongArchISD::NodeType)Opcode) {
3396 case LoongArchISD::FIRST_NUMBER:
3397 break;
3398
3399#define NODE_NAME_CASE(node) \
3400 case LoongArchISD::node: \
3401 return "LoongArchISD::" #node;
3402
3403 // TODO: Add more target-dependent nodes later.
3404 NODE_NAME_CASE(CALL)
3405 NODE_NAME_CASE(CALL_MEDIUM)
3406 NODE_NAME_CASE(CALL_LARGE)
3407 NODE_NAME_CASE(RET)
3408 NODE_NAME_CASE(TAIL)
3409 NODE_NAME_CASE(TAIL_MEDIUM)
3410 NODE_NAME_CASE(TAIL_LARGE)
3411 NODE_NAME_CASE(SLL_W)
3412 NODE_NAME_CASE(SRA_W)
3413 NODE_NAME_CASE(SRL_W)
3414 NODE_NAME_CASE(BSTRINS)
3415 NODE_NAME_CASE(BSTRPICK)
3416 NODE_NAME_CASE(MOVGR2FR_W_LA64)
3417 NODE_NAME_CASE(MOVFR2GR_S_LA64)
3418 NODE_NAME_CASE(FTINT)
3419 NODE_NAME_CASE(REVB_2H)
3420 NODE_NAME_CASE(REVB_2W)
3421 NODE_NAME_CASE(BITREV_4B)
3422 NODE_NAME_CASE(BITREV_W)
3423 NODE_NAME_CASE(ROTR_W)
3424 NODE_NAME_CASE(ROTL_W)
3425 NODE_NAME_CASE(CLZ_W)
3426 NODE_NAME_CASE(CTZ_W)
3427 NODE_NAME_CASE(DBAR)
3428 NODE_NAME_CASE(IBAR)
3429 NODE_NAME_CASE(BREAK)
3430 NODE_NAME_CASE(SYSCALL)
3431 NODE_NAME_CASE(CRC_W_B_W)
3432 NODE_NAME_CASE(CRC_W_H_W)
3433 NODE_NAME_CASE(CRC_W_W_W)
3434 NODE_NAME_CASE(CRC_W_D_W)
3435 NODE_NAME_CASE(CRCC_W_B_W)
3436 NODE_NAME_CASE(CRCC_W_H_W)
3437 NODE_NAME_CASE(CRCC_W_W_W)
3438 NODE_NAME_CASE(CRCC_W_D_W)
3439 NODE_NAME_CASE(CSRRD)
3440 NODE_NAME_CASE(CSRWR)
3441 NODE_NAME_CASE(CSRXCHG)
3442 NODE_NAME_CASE(IOCSRRD_B)
3443 NODE_NAME_CASE(IOCSRRD_H)
3444 NODE_NAME_CASE(IOCSRRD_W)
3445 NODE_NAME_CASE(IOCSRRD_D)
3446 NODE_NAME_CASE(IOCSRWR_B)
3447 NODE_NAME_CASE(IOCSRWR_H)
3448 NODE_NAME_CASE(IOCSRWR_W)
3449 NODE_NAME_CASE(IOCSRWR_D)
3450 NODE_NAME_CASE(CPUCFG)
3451 NODE_NAME_CASE(MOVGR2FCSR)
3452 NODE_NAME_CASE(MOVFCSR2GR)
3453 NODE_NAME_CASE(CACOP_D)
3454 NODE_NAME_CASE(CACOP_W)
3455 NODE_NAME_CASE(VPICK_SEXT_ELT)
3456 NODE_NAME_CASE(VPICK_ZEXT_ELT)
3457 NODE_NAME_CASE(VREPLVE)
3458 NODE_NAME_CASE(VALL_ZERO)
3459 NODE_NAME_CASE(VANY_ZERO)
3460 NODE_NAME_CASE(VALL_NONZERO)
3461 NODE_NAME_CASE(VANY_NONZERO)
3462 }
3463#undef NODE_NAME_CASE
3464 return nullptr;
3465}
3466
3467//===----------------------------------------------------------------------===//
3468// Calling Convention Implementation
3469//===----------------------------------------------------------------------===//
3470
3471// Eight general-purpose registers a0-a7 used for passing integer arguments,
3472// with a0-a1 reused to return values. Generally, the GPRs are used to pass
3473// fixed-point arguments, and floating-point arguments when no FPR is available
3474// or with soft float ABI.
3475const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
3476 LoongArch::R7, LoongArch::R8, LoongArch::R9,
3477 LoongArch::R10, LoongArch::R11};
3478// Eight floating-point registers fa0-fa7 used for passing floating-point
3479// arguments, and fa0-fa1 are also used to return values.
3480const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
3481 LoongArch::F3, LoongArch::F4, LoongArch::F5,
3482 LoongArch::F6, LoongArch::F7};
3483// FPR32 and FPR64 alias each other.
3484const MCPhysReg ArgFPR64s[] = {
3485 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
3486 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
3487
3488const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
3489 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
3490 LoongArch::VR6, LoongArch::VR7};
3491
3492const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
3493 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
3494 LoongArch::XR6, LoongArch::XR7};
3495
3496// Pass a 2*GRLen argument that has been split into two GRLen values through
3497// registers or the stack as necessary.
3498static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
3499 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
3500 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
3501 ISD::ArgFlagsTy ArgFlags2) {
3502 unsigned GRLenInBytes = GRLen / 8;
3503 if (Register Reg = State.AllocateReg(ArgGPRs)) {
3504 // At least one half can be passed via register.
3505 State.addLoc(V: CCValAssign::getReg(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(), RegNo: Reg,
3506 LocVT: VA1.getLocVT(), HTP: CCValAssign::Full));
3507 } else {
3508 // Both halves must be passed on the stack, with proper alignment.
3509 Align StackAlign =
3510 std::max(a: Align(GRLenInBytes), b: ArgFlags1.getNonZeroOrigAlign());
3511 State.addLoc(
3512 V: CCValAssign::getMem(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(),
3513 Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: StackAlign),
3514 LocVT: VA1.getLocVT(), HTP: CCValAssign::Full));
3515 State.addLoc(V: CCValAssign::getMem(
3516 ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align(GRLenInBytes)),
3517 LocVT: LocVT2, HTP: CCValAssign::Full));
3518 return false;
3519 }
3520 if (Register Reg = State.AllocateReg(ArgGPRs)) {
3521 // The second half can also be passed via register.
3522 State.addLoc(
3523 V: CCValAssign::getReg(ValNo: ValNo2, ValVT: ValVT2, RegNo: Reg, LocVT: LocVT2, HTP: CCValAssign::Full));
3524 } else {
3525 // The second half is passed via the stack, without additional alignment.
3526 State.addLoc(V: CCValAssign::getMem(
3527 ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align(GRLenInBytes)),
3528 LocVT: LocVT2, HTP: CCValAssign::Full));
3529 }
3530 return false;
3531}
3532
3533// Implements the LoongArch calling convention. Returns true upon failure.
3534static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
3535 unsigned ValNo, MVT ValVT,
3536 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
3537 CCState &State, bool IsFixed, bool IsRet,
3538 Type *OrigTy) {
3539 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
3540 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
3541 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
3542 MVT LocVT = ValVT;
3543
3544 // Any return value split into more than two values can't be returned
3545 // directly.
3546 if (IsRet && ValNo > 1)
3547 return true;
3548
3549 // If passing a variadic argument, or if no FPR is available.
3550 bool UseGPRForFloat = true;
3551
3552 switch (ABI) {
3553 default:
3554 llvm_unreachable("Unexpected ABI");
3555 case LoongArchABI::ABI_ILP32S:
3556 case LoongArchABI::ABI_ILP32F:
3557 case LoongArchABI::ABI_LP64F:
3558 report_fatal_error(reason: "Unimplemented ABI");
3559 break;
3560 case LoongArchABI::ABI_ILP32D:
3561 case LoongArchABI::ABI_LP64D:
3562 UseGPRForFloat = !IsFixed;
3563 break;
3564 case LoongArchABI::ABI_LP64S:
3565 break;
3566 }
3567
3568 // FPR32 and FPR64 alias each other.
3569 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
3570 UseGPRForFloat = true;
3571
3572 if (UseGPRForFloat && ValVT == MVT::f32) {
3573 LocVT = GRLenVT;
3574 LocInfo = CCValAssign::BCvt;
3575 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
3576 LocVT = MVT::i64;
3577 LocInfo = CCValAssign::BCvt;
3578 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
3579 // TODO: Handle passing f64 on LA32 with D feature.
3580 report_fatal_error(reason: "Passing f64 with GPR on LA32 is undefined");
3581 }
3582
3583 // If this is a variadic argument, the LoongArch calling convention requires
3584 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
3585 // byte alignment. An aligned register should be used regardless of whether
3586 // the original argument was split during legalisation or not. The argument
3587 // will not be passed by registers if the original type is larger than
3588 // 2*GRLen, so the register alignment rule does not apply.
3589 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
3590 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
3591 DL.getTypeAllocSize(Ty: OrigTy) == TwoGRLenInBytes) {
3592 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
3593 // Skip 'odd' register if necessary.
3594 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
3595 State.AllocateReg(ArgGPRs);
3596 }
3597
3598 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
3599 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
3600 State.getPendingArgFlags();
3601
3602 assert(PendingLocs.size() == PendingArgFlags.size() &&
3603 "PendingLocs and PendingArgFlags out of sync");
3604
3605 // Split arguments might be passed indirectly, so keep track of the pending
3606 // values.
3607 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
3608 LocVT = GRLenVT;
3609 LocInfo = CCValAssign::Indirect;
3610 PendingLocs.push_back(
3611 Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo));
3612 PendingArgFlags.push_back(Elt: ArgFlags);
3613 if (!ArgFlags.isSplitEnd()) {
3614 return false;
3615 }
3616 }
3617
3618 // If the split argument only had two elements, it should be passed directly
3619 // in registers or on the stack.
3620 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
3621 PendingLocs.size() <= 2) {
3622 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
3623 // Apply the normal calling convention rules to the first half of the
3624 // split argument.
3625 CCValAssign VA = PendingLocs[0];
3626 ISD::ArgFlagsTy AF = PendingArgFlags[0];
3627 PendingLocs.clear();
3628 PendingArgFlags.clear();
3629 return CC_LoongArchAssign2GRLen(GRLen, State, VA1: VA, ArgFlags1: AF, ValNo2: ValNo, ValVT2: ValVT, LocVT2: LocVT,
3630 ArgFlags2: ArgFlags);
3631 }
3632
3633 // Allocate to a register if possible, or else a stack slot.
3634 Register Reg;
3635 unsigned StoreSizeBytes = GRLen / 8;
3636 Align StackAlign = Align(GRLen / 8);
3637
3638 if (ValVT == MVT::f32 && !UseGPRForFloat)
3639 Reg = State.AllocateReg(ArgFPR32s);
3640 else if (ValVT == MVT::f64 && !UseGPRForFloat)
3641 Reg = State.AllocateReg(ArgFPR64s);
3642 else if (ValVT.is128BitVector())
3643 Reg = State.AllocateReg(ArgVRs);
3644 else if (ValVT.is256BitVector())
3645 Reg = State.AllocateReg(ArgXRs);
3646 else
3647 Reg = State.AllocateReg(ArgGPRs);
3648
3649 unsigned StackOffset =
3650 Reg ? 0 : State.AllocateStack(Size: StoreSizeBytes, Alignment: StackAlign);
3651
3652 // If we reach this point and PendingLocs is non-empty, we must be at the
3653 // end of a split argument that must be passed indirectly.
3654 if (!PendingLocs.empty()) {
3655 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
3656 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
3657 for (auto &It : PendingLocs) {
3658 if (Reg)
3659 It.convertToReg(RegNo: Reg);
3660 else
3661 It.convertToMem(Offset: StackOffset);
3662 State.addLoc(V: It);
3663 }
3664 PendingLocs.clear();
3665 PendingArgFlags.clear();
3666 return false;
3667 }
3668 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
3669 "Expected an GRLenVT at this stage");
3670
3671 if (Reg) {
3672 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
3673 return false;
3674 }
3675
3676 // When a floating-point value is passed on the stack, no bit-cast is needed.
3677 if (ValVT.isFloatingPoint()) {
3678 LocVT = ValVT;
3679 LocInfo = CCValAssign::Full;
3680 }
3681
3682 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
3683 return false;
3684}
3685
3686void LoongArchTargetLowering::analyzeInputArgs(
3687 MachineFunction &MF, CCState &CCInfo,
3688 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
3689 LoongArchCCAssignFn Fn) const {
3690 FunctionType *FType = MF.getFunction().getFunctionType();
3691 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3692 MVT ArgVT = Ins[i].VT;
3693 Type *ArgTy = nullptr;
3694 if (IsRet)
3695 ArgTy = FType->getReturnType();
3696 else if (Ins[i].isOrigArg())
3697 ArgTy = FType->getParamType(i: Ins[i].getOrigArgIndex());
3698 LoongArchABI::ABI ABI =
3699 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
3700 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
3701 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
3702 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
3703 << '\n');
3704 llvm_unreachable("");
3705 }
3706 }
3707}
3708
3709void LoongArchTargetLowering::analyzeOutputArgs(
3710 MachineFunction &MF, CCState &CCInfo,
3711 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
3712 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
3713 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
3714 MVT ArgVT = Outs[i].VT;
3715 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
3716 LoongArchABI::ABI ABI =
3717 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
3718 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
3719 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
3720 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
3721 << "\n");
3722 llvm_unreachable("");
3723 }
3724 }
3725}
3726
3727// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
3728// values.
3729static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
3730 const CCValAssign &VA, const SDLoc &DL) {
3731 switch (VA.getLocInfo()) {
3732 default:
3733 llvm_unreachable("Unexpected CCValAssign::LocInfo");
3734 case CCValAssign::Full:
3735 case CCValAssign::Indirect:
3736 break;
3737 case CCValAssign::BCvt:
3738 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3739 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
3740 else
3741 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VA.getValVT(), Operand: Val);
3742 break;
3743 }
3744 return Val;
3745}
3746
3747static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
3748 const CCValAssign &VA, const SDLoc &DL,
3749 const LoongArchTargetLowering &TLI) {
3750 MachineFunction &MF = DAG.getMachineFunction();
3751 MachineRegisterInfo &RegInfo = MF.getRegInfo();
3752 EVT LocVT = VA.getLocVT();
3753 SDValue Val;
3754 const TargetRegisterClass *RC = TLI.getRegClassFor(VT: LocVT.getSimpleVT());
3755 Register VReg = RegInfo.createVirtualRegister(RegClass: RC);
3756 RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: VReg);
3757 Val = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: LocVT);
3758
3759 return convertLocVTToValVT(DAG, Val, VA, DL);
3760}
3761
3762// The caller is responsible for loading the full value if the argument is
3763// passed with CCValAssign::Indirect.
3764static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
3765 const CCValAssign &VA, const SDLoc &DL) {
3766 MachineFunction &MF = DAG.getMachineFunction();
3767 MachineFrameInfo &MFI = MF.getFrameInfo();
3768 EVT ValVT = VA.getValVT();
3769 int FI = MFI.CreateFixedObject(Size: ValVT.getStoreSize(), SPOffset: VA.getLocMemOffset(),
3770 /*IsImmutable=*/true);
3771 SDValue FIN = DAG.getFrameIndex(
3772 FI, VT: MVT::getIntegerVT(BitWidth: DAG.getDataLayout().getPointerSizeInBits(AS: 0)));
3773
3774 ISD::LoadExtType ExtType;
3775 switch (VA.getLocInfo()) {
3776 default:
3777 llvm_unreachable("Unexpected CCValAssign::LocInfo");
3778 case CCValAssign::Full:
3779 case CCValAssign::Indirect:
3780 case CCValAssign::BCvt:
3781 ExtType = ISD::NON_EXTLOAD;
3782 break;
3783 }
3784 return DAG.getExtLoad(
3785 ExtType, dl: DL, VT: VA.getLocVT(), Chain, Ptr: FIN,
3786 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI), MemVT: ValVT);
3787}
3788
3789static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
3790 const CCValAssign &VA, const SDLoc &DL) {
3791 EVT LocVT = VA.getLocVT();
3792
3793 switch (VA.getLocInfo()) {
3794 default:
3795 llvm_unreachable("Unexpected CCValAssign::LocInfo");
3796 case CCValAssign::Full:
3797 break;
3798 case CCValAssign::BCvt:
3799 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3800 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
3801 else
3802 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: LocVT, Operand: Val);
3803 break;
3804 }
3805 return Val;
3806}
3807
3808static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
3809 CCValAssign::LocInfo LocInfo,
3810 ISD::ArgFlagsTy ArgFlags, CCState &State) {
3811 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
3812 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
3813 // s0 s1 s2 s3 s4 s5 s6 s7 s8
3814 static const MCPhysReg GPRList[] = {
3815 LoongArch::R23, LoongArch::R24, LoongArch::R25,
3816 LoongArch::R26, LoongArch::R27, LoongArch::R28,
3817 LoongArch::R29, LoongArch::R30, LoongArch::R31};
3818 if (unsigned Reg = State.AllocateReg(GPRList)) {
3819 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
3820 return false;
3821 }
3822 }
3823
3824 if (LocVT == MVT::f32) {
3825 // Pass in STG registers: F1, F2, F3, F4
3826 // fs0,fs1,fs2,fs3
3827 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
3828 LoongArch::F26, LoongArch::F27};
3829 if (unsigned Reg = State.AllocateReg(FPR32List)) {
3830 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
3831 return false;
3832 }
3833 }
3834
3835 if (LocVT == MVT::f64) {
3836 // Pass in STG registers: D1, D2, D3, D4
3837 // fs4,fs5,fs6,fs7
3838 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
3839 LoongArch::F30_64, LoongArch::F31_64};
3840 if (unsigned Reg = State.AllocateReg(FPR64List)) {
3841 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
3842 return false;
3843 }
3844 }
3845
3846 report_fatal_error(reason: "No registers left in GHC calling convention");
3847 return true;
3848}
3849
3850// Transform physical registers into virtual registers.
3851SDValue LoongArchTargetLowering::LowerFormalArguments(
3852 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
3853 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3854 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3855
3856 MachineFunction &MF = DAG.getMachineFunction();
3857
3858 switch (CallConv) {
3859 default:
3860 llvm_unreachable("Unsupported calling convention");
3861 case CallingConv::C:
3862 case CallingConv::Fast:
3863 break;
3864 case CallingConv::GHC:
3865 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
3866 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
3867 report_fatal_error(
3868 reason: "GHC calling convention requires the F and D extensions");
3869 }
3870
3871 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3872 MVT GRLenVT = Subtarget.getGRLenVT();
3873 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
3874 // Used with varargs to acumulate store chains.
3875 std::vector<SDValue> OutChains;
3876
3877 // Assign locations to all of the incoming arguments.
3878 SmallVector<CCValAssign> ArgLocs;
3879 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3880
3881 if (CallConv == CallingConv::GHC)
3882 CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_LoongArch_GHC);
3883 else
3884 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, Fn: CC_LoongArch);
3885
3886 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3887 CCValAssign &VA = ArgLocs[i];
3888 SDValue ArgValue;
3889 if (VA.isRegLoc())
3890 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, TLI: *this);
3891 else
3892 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
3893 if (VA.getLocInfo() == CCValAssign::Indirect) {
3894 // If the original argument was split and passed by reference, we need to
3895 // load all parts of it here (using the same address).
3896 InVals.push_back(Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain, Ptr: ArgValue,
3897 PtrInfo: MachinePointerInfo()));
3898 unsigned ArgIndex = Ins[i].OrigArgIndex;
3899 unsigned ArgPartOffset = Ins[i].PartOffset;
3900 assert(ArgPartOffset == 0);
3901 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
3902 CCValAssign &PartVA = ArgLocs[i + 1];
3903 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
3904 SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL);
3905 SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: ArgValue, N2: Offset);
3906 InVals.push_back(Elt: DAG.getLoad(VT: PartVA.getValVT(), dl: DL, Chain, Ptr: Address,
3907 PtrInfo: MachinePointerInfo()));
3908 ++i;
3909 }
3910 continue;
3911 }
3912 InVals.push_back(Elt: ArgValue);
3913 }
3914
3915 if (IsVarArg) {
3916 ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
3917 unsigned Idx = CCInfo.getFirstUnallocated(Regs: ArgRegs);
3918 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
3919 MachineFrameInfo &MFI = MF.getFrameInfo();
3920 MachineRegisterInfo &RegInfo = MF.getRegInfo();
3921 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
3922
3923 // Offset of the first variable argument from stack pointer, and size of
3924 // the vararg save area. For now, the varargs save area is either zero or
3925 // large enough to hold a0-a7.
3926 int VaArgOffset, VarArgsSaveSize;
3927
3928 // If all registers are allocated, then all varargs must be passed on the
3929 // stack and we don't need to save any argregs.
3930 if (ArgRegs.size() == Idx) {
3931 VaArgOffset = CCInfo.getStackSize();
3932 VarArgsSaveSize = 0;
3933 } else {
3934 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
3935 VaArgOffset = -VarArgsSaveSize;
3936 }
3937
3938 // Record the frame index of the first variable argument
3939 // which is a value necessary to VASTART.
3940 int FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true);
3941 LoongArchFI->setVarArgsFrameIndex(FI);
3942
3943 // If saving an odd number of registers then create an extra stack slot to
3944 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
3945 // offsets to even-numbered registered remain 2*GRLen-aligned.
3946 if (Idx % 2) {
3947 MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset - (int)GRLenInBytes,
3948 IsImmutable: true);
3949 VarArgsSaveSize += GRLenInBytes;
3950 }
3951
3952 // Copy the integer registers that may have been used for passing varargs
3953 // to the vararg save area.
3954 for (unsigned I = Idx; I < ArgRegs.size();
3955 ++I, VaArgOffset += GRLenInBytes) {
3956 const Register Reg = RegInfo.createVirtualRegister(RegClass: RC);
3957 RegInfo.addLiveIn(Reg: ArgRegs[I], vreg: Reg);
3958 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT: GRLenVT);
3959 FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true);
3960 SDValue PtrOff = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
3961 SDValue Store = DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: PtrOff,
3962 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI));
3963 cast<StoreSDNode>(Val: Store.getNode())
3964 ->getMemOperand()
3965 ->setValue((Value *)nullptr);
3966 OutChains.push_back(x: Store);
3967 }
3968 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
3969 }
3970
3971 // All stores are grouped in one node to allow the matching between
3972 // the size of Ins and InVals. This only happens for vararg functions.
3973 if (!OutChains.empty()) {
3974 OutChains.push_back(x: Chain);
3975 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
3976 }
3977
3978 return Chain;
3979}
3980
3981bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3982 return CI->isTailCall();
3983}
3984
3985// Check if the return value is used as only a return value, as otherwise
3986// we can't perform a tail-call.
3987bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N,
3988 SDValue &Chain) const {
3989 if (N->getNumValues() != 1)
3990 return false;
3991 if (!N->hasNUsesOfValue(NUses: 1, Value: 0))
3992 return false;
3993
3994 SDNode *Copy = *N->use_begin();
3995 if (Copy->getOpcode() != ISD::CopyToReg)
3996 return false;
3997
3998 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
3999 // isn't safe to perform a tail call.
4000 if (Copy->getGluedNode())
4001 return false;
4002
4003 // The copy must be used by a LoongArchISD::RET, and nothing else.
4004 bool HasRet = false;
4005 for (SDNode *Node : Copy->uses()) {
4006 if (Node->getOpcode() != LoongArchISD::RET)
4007 return false;
4008 HasRet = true;
4009 }
4010
4011 if (!HasRet)
4012 return false;
4013
4014 Chain = Copy->getOperand(Num: 0);
4015 return true;
4016}
4017
4018// Check whether the call is eligible for tail call optimization.
4019bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
4020 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
4021 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
4022
4023 auto CalleeCC = CLI.CallConv;
4024 auto &Outs = CLI.Outs;
4025 auto &Caller = MF.getFunction();
4026 auto CallerCC = Caller.getCallingConv();
4027
4028 // Do not tail call opt if the stack is used to pass parameters.
4029 if (CCInfo.getStackSize() != 0)
4030 return false;
4031
4032 // Do not tail call opt if any parameters need to be passed indirectly.
4033 for (auto &VA : ArgLocs)
4034 if (VA.getLocInfo() == CCValAssign::Indirect)
4035 return false;
4036
4037 // Do not tail call opt if either caller or callee uses struct return
4038 // semantics.
4039 auto IsCallerStructRet = Caller.hasStructRetAttr();
4040 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
4041 if (IsCallerStructRet || IsCalleeStructRet)
4042 return false;
4043
4044 // Do not tail call opt if either the callee or caller has a byval argument.
4045 for (auto &Arg : Outs)
4046 if (Arg.Flags.isByVal())
4047 return false;
4048
4049 // The callee has to preserve all registers the caller needs to preserve.
4050 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
4051 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4052 if (CalleeCC != CallerCC) {
4053 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4054 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4055 return false;
4056 }
4057 return true;
4058}
4059
4060static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
4061 return DAG.getDataLayout().getPrefTypeAlign(
4062 Ty: VT.getTypeForEVT(Context&: *DAG.getContext()));
4063}
4064
4065// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
4066// and output parameter nodes.
4067SDValue
4068LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
4069 SmallVectorImpl<SDValue> &InVals) const {
4070 SelectionDAG &DAG = CLI.DAG;
4071 SDLoc &DL = CLI.DL;
4072 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
4073 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
4074 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
4075 SDValue Chain = CLI.Chain;
4076 SDValue Callee = CLI.Callee;
4077 CallingConv::ID CallConv = CLI.CallConv;
4078 bool IsVarArg = CLI.IsVarArg;
4079 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
4080 MVT GRLenVT = Subtarget.getGRLenVT();
4081 bool &IsTailCall = CLI.IsTailCall;
4082
4083 MachineFunction &MF = DAG.getMachineFunction();
4084
4085 // Analyze the operands of the call, assigning locations to each operand.
4086 SmallVector<CCValAssign> ArgLocs;
4087 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
4088
4089 if (CallConv == CallingConv::GHC)
4090 ArgCCInfo.AnalyzeCallOperands(Outs, Fn: CC_LoongArch_GHC);
4091 else
4092 analyzeOutputArgs(MF, CCInfo&: ArgCCInfo, Outs, /*IsRet=*/false, CLI: &CLI, Fn: CC_LoongArch);
4093
4094 // Check if it's really possible to do a tail call.
4095 if (IsTailCall)
4096 IsTailCall = isEligibleForTailCallOptimization(CCInfo&: ArgCCInfo, CLI, MF, ArgLocs);
4097
4098 if (IsTailCall)
4099 ++NumTailCalls;
4100 else if (CLI.CB && CLI.CB->isMustTailCall())
4101 report_fatal_error(reason: "failed to perform tail call elimination on a call "
4102 "site marked musttail");
4103
4104 // Get a count of how many bytes are to be pushed on the stack.
4105 unsigned NumBytes = ArgCCInfo.getStackSize();
4106
4107 // Create local copies for byval args.
4108 SmallVector<SDValue> ByValArgs;
4109 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4110 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4111 if (!Flags.isByVal())
4112 continue;
4113
4114 SDValue Arg = OutVals[i];
4115 unsigned Size = Flags.getByValSize();
4116 Align Alignment = Flags.getNonZeroByValAlign();
4117
4118 int FI =
4119 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/isSpillSlot: false);
4120 SDValue FIPtr = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
4121 SDValue SizeNode = DAG.getConstant(Val: Size, DL, VT: GRLenVT);
4122
4123 Chain = DAG.getMemcpy(Chain, dl: DL, Dst: FIPtr, Src: Arg, Size: SizeNode, Alignment,
4124 /*IsVolatile=*/isVol: false,
4125 /*AlwaysInline=*/false, /*isTailCall=*/IsTailCall,
4126 DstPtrInfo: MachinePointerInfo(), SrcPtrInfo: MachinePointerInfo());
4127 ByValArgs.push_back(Elt: FIPtr);
4128 }
4129
4130 if (!IsTailCall)
4131 Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL: CLI.DL);
4132
4133 // Copy argument values to their designated locations.
4134 SmallVector<std::pair<Register, SDValue>> RegsToPass;
4135 SmallVector<SDValue> MemOpChains;
4136 SDValue StackPtr;
4137 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
4138 CCValAssign &VA = ArgLocs[i];
4139 SDValue ArgValue = OutVals[i];
4140 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4141
4142 // Promote the value if needed.
4143 // For now, only handle fully promoted and indirect arguments.
4144 if (VA.getLocInfo() == CCValAssign::Indirect) {
4145 // Store the argument in a stack slot and pass its address.
4146 Align StackAlign =
4147 std::max(a: getPrefTypeAlign(VT: Outs[i].ArgVT, DAG),
4148 b: getPrefTypeAlign(VT: ArgValue.getValueType(), DAG));
4149 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
4150 // If the original argument was split and passed by reference, we need to
4151 // store the required parts of it here (and pass just one address).
4152 unsigned ArgIndex = Outs[i].OrigArgIndex;
4153 unsigned ArgPartOffset = Outs[i].PartOffset;
4154 assert(ArgPartOffset == 0);
4155 // Calculate the total size to store. We don't have access to what we're
4156 // actually storing other than performing the loop and collecting the
4157 // info.
4158 SmallVector<std::pair<SDValue, SDValue>> Parts;
4159 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
4160 SDValue PartValue = OutVals[i + 1];
4161 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
4162 SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL);
4163 EVT PartVT = PartValue.getValueType();
4164
4165 StoredSize += PartVT.getStoreSize();
4166 StackAlign = std::max(a: StackAlign, b: getPrefTypeAlign(VT: PartVT, DAG));
4167 Parts.push_back(Elt: std::make_pair(x&: PartValue, y&: Offset));
4168 ++i;
4169 }
4170 SDValue SpillSlot = DAG.CreateStackTemporary(Bytes: StoredSize, Alignment: StackAlign);
4171 int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex();
4172 MemOpChains.push_back(
4173 Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: SpillSlot,
4174 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
4175 for (const auto &Part : Parts) {
4176 SDValue PartValue = Part.first;
4177 SDValue PartOffset = Part.second;
4178 SDValue Address =
4179 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: SpillSlot, N2: PartOffset);
4180 MemOpChains.push_back(
4181 Elt: DAG.getStore(Chain, dl: DL, Val: PartValue, Ptr: Address,
4182 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
4183 }
4184 ArgValue = SpillSlot;
4185 } else {
4186 ArgValue = convertValVTToLocVT(DAG, Val: ArgValue, VA, DL);
4187 }
4188
4189 // Use local copy if it is a byval arg.
4190 if (Flags.isByVal())
4191 ArgValue = ByValArgs[j++];
4192
4193 if (VA.isRegLoc()) {
4194 // Queue up the argument copies and emit them at the end.
4195 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgValue));
4196 } else {
4197 assert(VA.isMemLoc() && "Argument not register or memory");
4198 assert(!IsTailCall && "Tail call not allowed if stack is used "
4199 "for passing parameters");
4200
4201 // Work out the address of the stack slot.
4202 if (!StackPtr.getNode())
4203 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
4204 SDValue Address =
4205 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr,
4206 N2: DAG.getIntPtrConstant(Val: VA.getLocMemOffset(), DL));
4207
4208 // Emit the store.
4209 MemOpChains.push_back(
4210 Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: Address, PtrInfo: MachinePointerInfo()));
4211 }
4212 }
4213
4214 // Join the stores, which are independent of one another.
4215 if (!MemOpChains.empty())
4216 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
4217
4218 SDValue Glue;
4219
4220 // Build a sequence of copy-to-reg nodes, chained and glued together.
4221 for (auto &Reg : RegsToPass) {
4222 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: Reg.first, N: Reg.second, Glue);
4223 Glue = Chain.getValue(R: 1);
4224 }
4225
4226 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
4227 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
4228 // split it and then direct call can be matched by PseudoCALL.
4229 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) {
4230 const GlobalValue *GV = S->getGlobal();
4231 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
4232 ? LoongArchII::MO_CALL
4233 : LoongArchII::MO_CALL_PLT;
4234 Callee = DAG.getTargetGlobalAddress(GV: S->getGlobal(), DL, VT: PtrVT, offset: 0, TargetFlags: OpFlags);
4235 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
4236 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV: nullptr)
4237 ? LoongArchII::MO_CALL
4238 : LoongArchII::MO_CALL_PLT;
4239 Callee = DAG.getTargetExternalSymbol(Sym: S->getSymbol(), VT: PtrVT, TargetFlags: OpFlags);
4240 }
4241
4242 // The first call operand is the chain and the second is the target address.
4243 SmallVector<SDValue> Ops;
4244 Ops.push_back(Elt: Chain);
4245 Ops.push_back(Elt: Callee);
4246
4247 // Add argument registers to the end of the list so that they are
4248 // known live into the call.
4249 for (auto &Reg : RegsToPass)
4250 Ops.push_back(Elt: DAG.getRegister(Reg: Reg.first, VT: Reg.second.getValueType()));
4251
4252 if (!IsTailCall) {
4253 // Add a register mask operand representing the call-preserved registers.
4254 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4255 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
4256 assert(Mask && "Missing call preserved mask for calling convention");
4257 Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
4258 }
4259
4260 // Glue the call to the argument copies, if any.
4261 if (Glue.getNode())
4262 Ops.push_back(Elt: Glue);
4263
4264 // Emit the call.
4265 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4266 unsigned Op;
4267 switch (DAG.getTarget().getCodeModel()) {
4268 default:
4269 report_fatal_error(reason: "Unsupported code model");
4270 case CodeModel::Small:
4271 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
4272 break;
4273 case CodeModel::Medium:
4274 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
4275 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
4276 break;
4277 case CodeModel::Large:
4278 assert(Subtarget.is64Bit() && "Large code model requires LA64");
4279 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
4280 break;
4281 }
4282
4283 if (IsTailCall) {
4284 MF.getFrameInfo().setHasTailCall();
4285 SDValue Ret = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops);
4286 DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge);
4287 return Ret;
4288 }
4289
4290 Chain = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops);
4291 DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge);
4292 Glue = Chain.getValue(R: 1);
4293
4294 // Mark the end of the call, which is glued to the call itself.
4295 Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: 0, Glue, DL);
4296 Glue = Chain.getValue(R: 1);
4297
4298 // Assign locations to each value returned by this call.
4299 SmallVector<CCValAssign> RVLocs;
4300 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
4301 analyzeInputArgs(MF, CCInfo&: RetCCInfo, Ins, /*IsRet=*/true, Fn: CC_LoongArch);
4302
4303 // Copy all of the result registers out of their specified physreg.
4304 for (auto &VA : RVLocs) {
4305 // Copy the value out.
4306 SDValue RetValue =
4307 DAG.getCopyFromReg(Chain, dl: DL, Reg: VA.getLocReg(), VT: VA.getLocVT(), Glue);
4308 // Glue the RetValue to the end of the call sequence.
4309 Chain = RetValue.getValue(R: 1);
4310 Glue = RetValue.getValue(R: 2);
4311
4312 RetValue = convertLocVTToValVT(DAG, Val: RetValue, VA, DL);
4313
4314 InVals.push_back(Elt: RetValue);
4315 }
4316
4317 return Chain;
4318}
4319
4320bool LoongArchTargetLowering::CanLowerReturn(
4321 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
4322 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
4323 SmallVector<CCValAssign> RVLocs;
4324 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
4325
4326 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4327 LoongArchABI::ABI ABI =
4328 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
4329 if (CC_LoongArch(DL: MF.getDataLayout(), ABI, ValNo: i, ValVT: Outs[i].VT, LocInfo: CCValAssign::Full,
4330 ArgFlags: Outs[i].Flags, State&: CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
4331 OrigTy: nullptr))
4332 return false;
4333 }
4334 return true;
4335}
4336
4337SDValue LoongArchTargetLowering::LowerReturn(
4338 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
4339 const SmallVectorImpl<ISD::OutputArg> &Outs,
4340 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
4341 SelectionDAG &DAG) const {
4342 // Stores the assignment of the return value to a location.
4343 SmallVector<CCValAssign> RVLocs;
4344
4345 // Info about the registers and stack slot.
4346 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
4347 *DAG.getContext());
4348
4349 analyzeOutputArgs(MF&: DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
4350 CLI: nullptr, Fn: CC_LoongArch);
4351 if (CallConv == CallingConv::GHC && !RVLocs.empty())
4352 report_fatal_error(reason: "GHC functions return void only");
4353 SDValue Glue;
4354 SmallVector<SDValue, 4> RetOps(1, Chain);
4355
4356 // Copy the result values into the output registers.
4357 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
4358 CCValAssign &VA = RVLocs[i];
4359 assert(VA.isRegLoc() && "Can only return in registers!");
4360
4361 // Handle a 'normal' return.
4362 SDValue Val = convertValVTToLocVT(DAG, Val: OutVals[i], VA, DL);
4363 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: Val, Glue);
4364
4365 // Guarantee that all emitted copies are stuck together.
4366 Glue = Chain.getValue(R: 1);
4367 RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
4368 }
4369
4370 RetOps[0] = Chain; // Update chain.
4371
4372 // Add the glue node if we have it.
4373 if (Glue.getNode())
4374 RetOps.push_back(Elt: Glue);
4375
4376 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
4377}
4378
4379bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
4380 bool ForCodeSize) const {
4381 // TODO: Maybe need more checks here after vector extension is supported.
4382 if (VT == MVT::f32 && !Subtarget.hasBasicF())
4383 return false;
4384 if (VT == MVT::f64 && !Subtarget.hasBasicD())
4385 return false;
4386 return (Imm.isZero() || Imm.isExactlyValue(V: +1.0));
4387}
4388
4389bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const {
4390 return true;
4391}
4392
4393bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const {
4394 return true;
4395}
4396
4397bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
4398 const Instruction *I) const {
4399 if (!Subtarget.is64Bit())
4400 return isa<LoadInst>(Val: I) || isa<StoreInst>(Val: I);
4401
4402 if (isa<LoadInst>(Val: I))
4403 return true;
4404
4405 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
4406 // require fences beacuse we can use amswap_db.[w/d].
4407 if (isa<StoreInst>(Val: I)) {
4408 unsigned Size = I->getOperand(i: 0)->getType()->getIntegerBitWidth();
4409 return (Size == 8 || Size == 16);
4410 }
4411
4412 return false;
4413}
4414
4415EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL,
4416 LLVMContext &Context,
4417 EVT VT) const {
4418 if (!VT.isVector())
4419 return getPointerTy(DL);
4420 return VT.changeVectorElementTypeToInteger();
4421}
4422
4423bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
4424 // TODO: Support vectors.
4425 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Val: Y);
4426}
4427
4428bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
4429 const CallInst &I,
4430 MachineFunction &MF,
4431 unsigned Intrinsic) const {
4432 switch (Intrinsic) {
4433 default:
4434 return false;
4435 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
4436 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
4437 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
4438 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
4439 Info.opc = ISD::INTRINSIC_W_CHAIN;
4440 Info.memVT = MVT::i32;
4441 Info.ptrVal = I.getArgOperand(i: 0);
4442 Info.offset = 0;
4443 Info.align = Align(4);
4444 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
4445 MachineMemOperand::MOVolatile;
4446 return true;
4447 // TODO: Add more Intrinsics later.
4448 }
4449}
4450
4451TargetLowering::AtomicExpansionKind
4452LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
4453 // TODO: Add more AtomicRMWInst that needs to be extended.
4454
4455 // Since floating-point operation requires a non-trivial set of data
4456 // operations, use CmpXChg to expand.
4457 if (AI->isFloatingPointOperation() ||
4458 AI->getOperation() == AtomicRMWInst::UIncWrap ||
4459 AI->getOperation() == AtomicRMWInst::UDecWrap)
4460 return AtomicExpansionKind::CmpXChg;
4461
4462 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
4463 if (Size == 8 || Size == 16)
4464 return AtomicExpansionKind::MaskedIntrinsic;
4465 return AtomicExpansionKind::None;
4466}
4467
4468static Intrinsic::ID
4469getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
4470 AtomicRMWInst::BinOp BinOp) {
4471 if (GRLen == 64) {
4472 switch (BinOp) {
4473 default:
4474 llvm_unreachable("Unexpected AtomicRMW BinOp");
4475 case AtomicRMWInst::Xchg:
4476 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
4477 case AtomicRMWInst::Add:
4478 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
4479 case AtomicRMWInst::Sub:
4480 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
4481 case AtomicRMWInst::Nand:
4482 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
4483 case AtomicRMWInst::UMax:
4484 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
4485 case AtomicRMWInst::UMin:
4486 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
4487 case AtomicRMWInst::Max:
4488 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
4489 case AtomicRMWInst::Min:
4490 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
4491 // TODO: support other AtomicRMWInst.
4492 }
4493 }
4494
4495 if (GRLen == 32) {
4496 switch (BinOp) {
4497 default:
4498 llvm_unreachable("Unexpected AtomicRMW BinOp");
4499 case AtomicRMWInst::Xchg:
4500 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
4501 case AtomicRMWInst::Add:
4502 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
4503 case AtomicRMWInst::Sub:
4504 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
4505 case AtomicRMWInst::Nand:
4506 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
4507 // TODO: support other AtomicRMWInst.
4508 }
4509 }
4510
4511 llvm_unreachable("Unexpected GRLen\n");
4512}
4513
4514TargetLowering::AtomicExpansionKind
4515LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
4516 AtomicCmpXchgInst *CI) const {
4517 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
4518 if (Size == 8 || Size == 16)
4519 return AtomicExpansionKind::MaskedIntrinsic;
4520 return AtomicExpansionKind::None;
4521}
4522
4523Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
4524 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
4525 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
4526 AtomicOrdering FailOrd = CI->getFailureOrdering();
4527 Value *FailureOrdering =
4528 Builder.getIntN(N: Subtarget.getGRLen(), C: static_cast<uint64_t>(FailOrd));
4529
4530 // TODO: Support cmpxchg on LA32.
4531 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
4532 CmpVal = Builder.CreateSExt(V: CmpVal, DestTy: Builder.getInt64Ty());
4533 NewVal = Builder.CreateSExt(V: NewVal, DestTy: Builder.getInt64Ty());
4534 Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty());
4535 Type *Tys[] = {AlignedAddr->getType()};
4536 Function *MaskedCmpXchg =
4537 Intrinsic::getDeclaration(M: CI->getModule(), id: CmpXchgIntrID, Tys);
4538 Value *Result = Builder.CreateCall(
4539 Callee: MaskedCmpXchg, Args: {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
4540 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
4541 return Result;
4542}
4543
4544Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
4545 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
4546 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
4547 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
4548 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
4549 // mask, as this produces better code than the LL/SC loop emitted by
4550 // int_loongarch_masked_atomicrmw_xchg.
4551 if (AI->getOperation() == AtomicRMWInst::Xchg &&
4552 isa<ConstantInt>(Val: AI->getValOperand())) {
4553 ConstantInt *CVal = cast<ConstantInt>(Val: AI->getValOperand());
4554 if (CVal->isZero())
4555 return Builder.CreateAtomicRMW(Op: AtomicRMWInst::And, Ptr: AlignedAddr,
4556 Val: Builder.CreateNot(V: Mask, Name: "Inv_Mask"),
4557 Align: AI->getAlign(), Ordering: Ord);
4558 if (CVal->isMinusOne())
4559 return Builder.CreateAtomicRMW(Op: AtomicRMWInst::Or, Ptr: AlignedAddr, Val: Mask,
4560 Align: AI->getAlign(), Ordering: Ord);
4561 }
4562
4563 unsigned GRLen = Subtarget.getGRLen();
4564 Value *Ordering =
4565 Builder.getIntN(N: GRLen, C: static_cast<uint64_t>(AI->getOrdering()));
4566 Type *Tys[] = {AlignedAddr->getType()};
4567 Function *LlwOpScwLoop = Intrinsic::getDeclaration(
4568 M: AI->getModule(),
4569 id: getIntrinsicForMaskedAtomicRMWBinOp(GRLen, BinOp: AI->getOperation()), Tys);
4570
4571 if (GRLen == 64) {
4572 Incr = Builder.CreateSExt(V: Incr, DestTy: Builder.getInt64Ty());
4573 Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty());
4574 ShiftAmt = Builder.CreateSExt(V: ShiftAmt, DestTy: Builder.getInt64Ty());
4575 }
4576
4577 Value *Result;
4578
4579 // Must pass the shift amount needed to sign extend the loaded value prior
4580 // to performing a signed comparison for min/max. ShiftAmt is the number of
4581 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
4582 // is the number of bits to left+right shift the value in order to
4583 // sign-extend.
4584 if (AI->getOperation() == AtomicRMWInst::Min ||
4585 AI->getOperation() == AtomicRMWInst::Max) {
4586 const DataLayout &DL = AI->getModule()->getDataLayout();
4587 unsigned ValWidth =
4588 DL.getTypeStoreSizeInBits(Ty: AI->getValOperand()->getType());
4589 Value *SextShamt =
4590 Builder.CreateSub(LHS: Builder.getIntN(N: GRLen, C: GRLen - ValWidth), RHS: ShiftAmt);
4591 Result = Builder.CreateCall(Callee: LlwOpScwLoop,
4592 Args: {AlignedAddr, Incr, Mask, SextShamt, Ordering});
4593 } else {
4594 Result =
4595 Builder.CreateCall(Callee: LlwOpScwLoop, Args: {AlignedAddr, Incr, Mask, Ordering});
4596 }
4597
4598 if (GRLen == 64)
4599 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
4600 return Result;
4601}
4602
4603bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(
4604 const MachineFunction &MF, EVT VT) const {
4605 VT = VT.getScalarType();
4606
4607 if (!VT.isSimple())
4608 return false;
4609
4610 switch (VT.getSimpleVT().SimpleTy) {
4611 case MVT::f32:
4612 case MVT::f64:
4613 return true;
4614 default:
4615 break;
4616 }
4617
4618 return false;
4619}
4620
4621Register LoongArchTargetLowering::getExceptionPointerRegister(
4622 const Constant *PersonalityFn) const {
4623 return LoongArch::R4;
4624}
4625
4626Register LoongArchTargetLowering::getExceptionSelectorRegister(
4627 const Constant *PersonalityFn) const {
4628 return LoongArch::R5;
4629}
4630
4631//===----------------------------------------------------------------------===//
4632// LoongArch Inline Assembly Support
4633//===----------------------------------------------------------------------===//
4634
4635LoongArchTargetLowering::ConstraintType
4636LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
4637 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
4638 //
4639 // 'f': A floating-point register (if available).
4640 // 'k': A memory operand whose address is formed by a base register and
4641 // (optionally scaled) index register.
4642 // 'l': A signed 16-bit constant.
4643 // 'm': A memory operand whose address is formed by a base register and
4644 // offset that is suitable for use in instructions with the same
4645 // addressing mode as st.w and ld.w.
4646 // 'I': A signed 12-bit constant (for arithmetic instructions).
4647 // 'J': Integer zero.
4648 // 'K': An unsigned 12-bit constant (for logic instructions).
4649 // "ZB": An address that is held in a general-purpose register. The offset is
4650 // zero.
4651 // "ZC": A memory operand whose address is formed by a base register and
4652 // offset that is suitable for use in instructions with the same
4653 // addressing mode as ll.w and sc.w.
4654 if (Constraint.size() == 1) {
4655 switch (Constraint[0]) {
4656 default:
4657 break;
4658 case 'f':
4659 return C_RegisterClass;
4660 case 'l':
4661 case 'I':
4662 case 'J':
4663 case 'K':
4664 return C_Immediate;
4665 case 'k':
4666 return C_Memory;
4667 }
4668 }
4669
4670 if (Constraint == "ZC" || Constraint == "ZB")
4671 return C_Memory;
4672
4673 // 'm' is handled here.
4674 return TargetLowering::getConstraintType(Constraint);
4675}
4676
4677InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
4678 StringRef ConstraintCode) const {
4679 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
4680 .Case(S: "k", Value: InlineAsm::ConstraintCode::k)
4681 .Case(S: "ZB", Value: InlineAsm::ConstraintCode::ZB)
4682 .Case(S: "ZC", Value: InlineAsm::ConstraintCode::ZC)
4683 .Default(Value: TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
4684}
4685
4686std::pair<unsigned, const TargetRegisterClass *>
4687LoongArchTargetLowering::getRegForInlineAsmConstraint(
4688 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
4689 // First, see if this is a constraint that directly corresponds to a LoongArch
4690 // register class.
4691 if (Constraint.size() == 1) {
4692 switch (Constraint[0]) {
4693 case 'r':
4694 // TODO: Support fixed vectors up to GRLen?
4695 if (VT.isVector())
4696 break;
4697 return std::make_pair(0U, &LoongArch::GPRRegClass);
4698 case 'f':
4699 if (Subtarget.hasBasicF() && VT == MVT::f32)
4700 return std::make_pair(0U, &LoongArch::FPR32RegClass);
4701 if (Subtarget.hasBasicD() && VT == MVT::f64)
4702 return std::make_pair(0U, &LoongArch::FPR64RegClass);
4703 if (Subtarget.hasExtLSX() &&
4704 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
4705 return std::make_pair(0U, &LoongArch::LSX128RegClass);
4706 if (Subtarget.hasExtLASX() &&
4707 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
4708 return std::make_pair(0U, &LoongArch::LASX256RegClass);
4709 break;
4710 default:
4711 break;
4712 }
4713 }
4714
4715 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
4716 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
4717 // constraints while the official register name is prefixed with a '$'. So we
4718 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
4719 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
4720 // case insensitive, so no need to convert the constraint to upper case here.
4721 //
4722 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
4723 // decode the usage of register name aliases into their official names. And
4724 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
4725 // official register names.
4726 if (Constraint.starts_with(Prefix: "{$r") || Constraint.starts_with(Prefix: "{$f") ||
4727 Constraint.starts_with(Prefix: "{$vr") || Constraint.starts_with(Prefix: "{$xr")) {
4728 bool IsFP = Constraint[2] == 'f';
4729 std::pair<StringRef, StringRef> Temp = Constraint.split(Separator: '$');
4730 std::pair<unsigned, const TargetRegisterClass *> R;
4731 R = TargetLowering::getRegForInlineAsmConstraint(
4732 TRI, Constraint: join_items(Separator: "", Items&: Temp.first, Items&: Temp.second), VT);
4733 // Match those names to the widest floating point register type available.
4734 if (IsFP) {
4735 unsigned RegNo = R.first;
4736 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
4737 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
4738 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
4739 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
4740 }
4741 }
4742 }
4743 return R;
4744 }
4745
4746 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
4747}
4748
4749void LoongArchTargetLowering::LowerAsmOperandForConstraint(
4750 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
4751 SelectionDAG &DAG) const {
4752 // Currently only support length 1 constraints.
4753 if (Constraint.size() == 1) {
4754 switch (Constraint[0]) {
4755 case 'l':
4756 // Validate & create a 16-bit signed immediate operand.
4757 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
4758 uint64_t CVal = C->getSExtValue();
4759 if (isInt<16>(x: CVal))
4760 Ops.push_back(
4761 x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getGRLenVT()));
4762 }
4763 return;
4764 case 'I':
4765 // Validate & create a 12-bit signed immediate operand.
4766 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
4767 uint64_t CVal = C->getSExtValue();
4768 if (isInt<12>(x: CVal))
4769 Ops.push_back(
4770 x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getGRLenVT()));
4771 }
4772 return;
4773 case 'J':
4774 // Validate & create an integer zero operand.
4775 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op))
4776 if (C->getZExtValue() == 0)
4777 Ops.push_back(
4778 x: DAG.getTargetConstant(Val: 0, DL: SDLoc(Op), VT: Subtarget.getGRLenVT()));
4779 return;
4780 case 'K':
4781 // Validate & create a 12-bit unsigned immediate operand.
4782 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
4783 uint64_t CVal = C->getZExtValue();
4784 if (isUInt<12>(x: CVal))
4785 Ops.push_back(
4786 x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getGRLenVT()));
4787 }
4788 return;
4789 default:
4790 break;
4791 }
4792 }
4793 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
4794}
4795
4796#define GET_REGISTER_MATCHER
4797#include "LoongArchGenAsmMatcher.inc"
4798
4799Register
4800LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,
4801 const MachineFunction &MF) const {
4802 std::pair<StringRef, StringRef> Name = StringRef(RegName).split(Separator: '$');
4803 std::string NewRegName = Name.second.str();
4804 Register Reg = MatchRegisterAltName(NewRegName);
4805 if (Reg == LoongArch::NoRegister)
4806 Reg = MatchRegisterName(NewRegName);
4807 if (Reg == LoongArch::NoRegister)
4808 report_fatal_error(
4809 reason: Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
4810 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
4811 if (!ReservedRegs.test(Idx: Reg))
4812 report_fatal_error(reason: Twine("Trying to obtain non-reserved register \"" +
4813 StringRef(RegName) + "\"."));
4814 return Reg;
4815}
4816
4817bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
4818 EVT VT, SDValue C) const {
4819 // TODO: Support vectors.
4820 if (!VT.isScalarInteger())
4821 return false;
4822
4823 // Omit the optimization if the data size exceeds GRLen.
4824 if (VT.getSizeInBits() > Subtarget.getGRLen())
4825 return false;
4826
4827 if (auto *ConstNode = dyn_cast<ConstantSDNode>(Val: C.getNode())) {
4828 const APInt &Imm = ConstNode->getAPIntValue();
4829 // Break MUL into (SLLI + ADD/SUB) or ALSL.
4830 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
4831 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
4832 return true;
4833 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
4834 if (ConstNode->hasOneUse() &&
4835 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
4836 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
4837 return true;
4838 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
4839 // in which the immediate has two set bits. Or Break (MUL x, imm)
4840 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
4841 // equals to (1 << s0) - (1 << s1).
4842 if (ConstNode->hasOneUse() && !(Imm.sge(RHS: -2048) && Imm.sle(RHS: 4095))) {
4843 unsigned Shifts = Imm.countr_zero();
4844 // Reject immediates which can be composed via a single LUI.
4845 if (Shifts >= 12)
4846 return false;
4847 // Reject multiplications can be optimized to
4848 // (SLLI (ALSL x, x, 1/2/3/4), s).
4849 APInt ImmPop = Imm.ashr(ShiftAmt: Shifts);
4850 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
4851 return false;
4852 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
4853 // since it needs one more instruction than other 3 cases.
4854 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
4855 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
4856 (ImmSmall - Imm).isPowerOf2())
4857 return true;
4858 }
4859 }
4860
4861 return false;
4862}
4863
4864bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL,
4865 const AddrMode &AM,
4866 Type *Ty, unsigned AS,
4867 Instruction *I) const {
4868 // LoongArch has four basic addressing modes:
4869 // 1. reg
4870 // 2. reg + 12-bit signed offset
4871 // 3. reg + 14-bit signed offset left-shifted by 2
4872 // 4. reg1 + reg2
4873 // TODO: Add more checks after support vector extension.
4874
4875 // No global is ever allowed as a base.
4876 if (AM.BaseGV)
4877 return false;
4878
4879 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
4880 // with `UAL` feature.
4881 if (!isInt<12>(x: AM.BaseOffs) &&
4882 !(isShiftedInt<14, 2>(x: AM.BaseOffs) && Subtarget.hasUAL()))
4883 return false;
4884
4885 switch (AM.Scale) {
4886 case 0:
4887 // "r+i" or just "i", depending on HasBaseReg.
4888 break;
4889 case 1:
4890 // "r+r+i" is not allowed.
4891 if (AM.HasBaseReg && AM.BaseOffs)
4892 return false;
4893 // Otherwise we have "r+r" or "r+i".
4894 break;
4895 case 2:
4896 // "2*r+r" or "2*r+i" is not allowed.
4897 if (AM.HasBaseReg || AM.BaseOffs)
4898 return false;
4899 // Allow "2*r" as "r+r".
4900 break;
4901 default:
4902 return false;
4903 }
4904
4905 return true;
4906}
4907
4908bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
4909 return isInt<12>(x: Imm);
4910}
4911
4912bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const {
4913 return isInt<12>(x: Imm);
4914}
4915
4916bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
4917 // Zexts are free if they can be combined with a load.
4918 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
4919 // poorly with type legalization of compares preferring sext.
4920 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
4921 EVT MemVT = LD->getMemoryVT();
4922 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
4923 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
4924 LD->getExtensionType() == ISD::ZEXTLOAD))
4925 return true;
4926 }
4927
4928 return TargetLowering::isZExtFree(Val, VT2);
4929}
4930
4931bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
4932 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
4933}
4934
4935bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
4936 // TODO: Support vectors.
4937 if (Y.getValueType().isVector())
4938 return false;
4939
4940 return !isa<ConstantSDNode>(Val: Y);
4941}
4942
4943ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {
4944 // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension.
4945 return ISD::SIGN_EXTEND;
4946}
4947

source code of llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp