1 | //=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the interfaces that LoongArch uses to lower LLVM code into |
10 | // a selection DAG. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "LoongArchISelLowering.h" |
15 | #include "LoongArch.h" |
16 | #include "LoongArchMachineFunctionInfo.h" |
17 | #include "LoongArchRegisterInfo.h" |
18 | #include "LoongArchSubtarget.h" |
19 | #include "LoongArchTargetMachine.h" |
20 | #include "MCTargetDesc/LoongArchBaseInfo.h" |
21 | #include "MCTargetDesc/LoongArchMCTargetDesc.h" |
22 | #include "llvm/ADT/Statistic.h" |
23 | #include "llvm/ADT/StringExtras.h" |
24 | #include "llvm/CodeGen/ISDOpcodes.h" |
25 | #include "llvm/CodeGen/RuntimeLibcalls.h" |
26 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
27 | #include "llvm/IR/IRBuilder.h" |
28 | #include "llvm/IR/IntrinsicsLoongArch.h" |
29 | #include "llvm/Support/CodeGen.h" |
30 | #include "llvm/Support/Debug.h" |
31 | #include "llvm/Support/ErrorHandling.h" |
32 | #include "llvm/Support/KnownBits.h" |
33 | #include "llvm/Support/MathExtras.h" |
34 | |
35 | using namespace llvm; |
36 | |
37 | #define DEBUG_TYPE "loongarch-isel-lowering" |
38 | |
39 | STATISTIC(NumTailCalls, "Number of tail calls" ); |
40 | |
41 | static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division" , cl::Hidden, |
42 | cl::desc("Trap on integer division by zero." ), |
43 | cl::init(Val: false)); |
44 | |
45 | LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, |
46 | const LoongArchSubtarget &STI) |
47 | : TargetLowering(TM), Subtarget(STI) { |
48 | |
49 | MVT GRLenVT = Subtarget.getGRLenVT(); |
50 | |
51 | // Set up the register classes. |
52 | |
53 | addRegisterClass(VT: GRLenVT, RC: &LoongArch::GPRRegClass); |
54 | if (Subtarget.hasBasicF()) |
55 | addRegisterClass(MVT::VT: f32, RC: &LoongArch::FPR32RegClass); |
56 | if (Subtarget.hasBasicD()) |
57 | addRegisterClass(MVT::VT: f64, RC: &LoongArch::FPR64RegClass); |
58 | |
59 | static const MVT::SimpleValueType LSXVTs[] = { |
60 | MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}; |
61 | static const MVT::SimpleValueType LASXVTs[] = { |
62 | MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64}; |
63 | |
64 | if (Subtarget.hasExtLSX()) |
65 | for (MVT VT : LSXVTs) |
66 | addRegisterClass(VT, &LoongArch::LSX128RegClass); |
67 | |
68 | if (Subtarget.hasExtLASX()) |
69 | for (MVT VT : LASXVTs) |
70 | addRegisterClass(VT, &LoongArch::LASX256RegClass); |
71 | |
72 | // Set operations for LA32 and LA64. |
73 | |
74 | setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT, |
75 | MVT::i1, Promote); |
76 | |
77 | setOperationAction(Op: ISD::SHL_PARTS, VT: GRLenVT, Action: Custom); |
78 | setOperationAction(Op: ISD::SRA_PARTS, VT: GRLenVT, Action: Custom); |
79 | setOperationAction(Op: ISD::SRL_PARTS, VT: GRLenVT, Action: Custom); |
80 | setOperationAction(Op: ISD::FP_TO_SINT, VT: GRLenVT, Action: Custom); |
81 | setOperationAction(Op: ISD::ROTL, VT: GRLenVT, Action: Expand); |
82 | setOperationAction(Op: ISD::CTPOP, VT: GRLenVT, Action: Expand); |
83 | |
84 | setOperationAction(Ops: {ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, |
85 | ISD::JumpTable, ISD::GlobalTLSAddress}, |
86 | VT: GRLenVT, Action: Custom); |
87 | |
88 | setOperationAction(Op: ISD::EH_DWARF_CFA, VT: GRLenVT, Action: Custom); |
89 | |
90 | setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: GRLenVT, Action: Expand); |
91 | setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand); |
92 | setOperationAction(ISD::VASTART, MVT::Other, Custom); |
93 | setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand); |
94 | |
95 | setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); |
96 | setOperationAction(ISD::TRAP, MVT::Other, Legal); |
97 | |
98 | setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); |
99 | setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); |
100 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); |
101 | |
102 | // Expand bitreverse.i16 with native-width bitrev and shift for now, before |
103 | // we get to know which of sll and revb.2h is faster. |
104 | setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); |
105 | setOperationAction(Op: ISD::BITREVERSE, VT: GRLenVT, Action: Legal); |
106 | |
107 | // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and |
108 | // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16 |
109 | // and i32 could still be byte-swapped relatively cheaply. |
110 | setOperationAction(ISD::BSWAP, MVT::i16, Custom); |
111 | |
112 | setOperationAction(ISD::BR_JT, MVT::Other, Expand); |
113 | setOperationAction(Op: ISD::BR_CC, VT: GRLenVT, Action: Expand); |
114 | setOperationAction(Op: ISD::SELECT_CC, VT: GRLenVT, Action: Expand); |
115 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); |
116 | setOperationAction(Ops: {ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT: GRLenVT, Action: Expand); |
117 | |
118 | setOperationAction(Op: ISD::FP_TO_UINT, VT: GRLenVT, Action: Custom); |
119 | setOperationAction(Op: ISD::UINT_TO_FP, VT: GRLenVT, Action: Expand); |
120 | |
121 | // Set operations for LA64 only. |
122 | |
123 | if (Subtarget.is64Bit()) { |
124 | setOperationAction(ISD::SHL, MVT::i32, Custom); |
125 | setOperationAction(ISD::SRA, MVT::i32, Custom); |
126 | setOperationAction(ISD::SRL, MVT::i32, Custom); |
127 | setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); |
128 | setOperationAction(ISD::BITCAST, MVT::i32, Custom); |
129 | setOperationAction(ISD::ROTR, MVT::i32, Custom); |
130 | setOperationAction(ISD::ROTL, MVT::i32, Custom); |
131 | setOperationAction(ISD::CTTZ, MVT::i32, Custom); |
132 | setOperationAction(ISD::CTLZ, MVT::i32, Custom); |
133 | setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); |
134 | setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom); |
135 | setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom); |
136 | setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); |
137 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); |
138 | setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); |
139 | |
140 | setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); |
141 | setOperationAction(ISD::BSWAP, MVT::i32, Custom); |
142 | } |
143 | |
144 | // Set operations for LA32 only. |
145 | |
146 | if (!Subtarget.is64Bit()) { |
147 | setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); |
148 | setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); |
149 | setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom); |
150 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); |
151 | setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); |
152 | |
153 | // Set libcalls. |
154 | setLibcallName(Call: RTLIB::MUL_I128, Name: nullptr); |
155 | // The MULO libcall is not part of libgcc, only compiler-rt. |
156 | setLibcallName(Call: RTLIB::MULO_I64, Name: nullptr); |
157 | } |
158 | |
159 | // The MULO libcall is not part of libgcc, only compiler-rt. |
160 | setLibcallName(Call: RTLIB::MULO_I128, Name: nullptr); |
161 | |
162 | setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); |
163 | |
164 | static const ISD::CondCode FPCCToExpand[] = { |
165 | ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, |
166 | ISD::SETGE, ISD::SETNE, ISD::SETGT}; |
167 | |
168 | // Set operations for 'F' feature. |
169 | |
170 | if (Subtarget.hasBasicF()) { |
171 | setCondCodeAction(FPCCToExpand, MVT::f32, Expand); |
172 | |
173 | setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); |
174 | setOperationAction(ISD::BR_CC, MVT::f32, Expand); |
175 | setOperationAction(ISD::FMA, MVT::f32, Legal); |
176 | setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal); |
177 | setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal); |
178 | setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal); |
179 | setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal); |
180 | setOperationAction(ISD::IS_FPCLASS, MVT::f32, Legal); |
181 | setOperationAction(ISD::FSIN, MVT::f32, Expand); |
182 | setOperationAction(ISD::FCOS, MVT::f32, Expand); |
183 | setOperationAction(ISD::FSINCOS, MVT::f32, Expand); |
184 | setOperationAction(ISD::FPOW, MVT::f32, Expand); |
185 | setOperationAction(ISD::FREM, MVT::f32, Expand); |
186 | |
187 | if (Subtarget.is64Bit()) |
188 | setOperationAction(ISD::FRINT, MVT::f32, Legal); |
189 | |
190 | if (!Subtarget.hasBasicD()) { |
191 | setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); |
192 | if (Subtarget.is64Bit()) { |
193 | setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); |
194 | setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); |
195 | } |
196 | } |
197 | } |
198 | |
199 | // Set operations for 'D' feature. |
200 | |
201 | if (Subtarget.hasBasicD()) { |
202 | setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); |
203 | setTruncStoreAction(MVT::f64, MVT::f32, Expand); |
204 | setCondCodeAction(FPCCToExpand, MVT::f64, Expand); |
205 | |
206 | setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); |
207 | setOperationAction(ISD::BR_CC, MVT::f64, Expand); |
208 | setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal); |
209 | setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal); |
210 | setOperationAction(ISD::FMA, MVT::f64, Legal); |
211 | setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); |
212 | setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); |
213 | setOperationAction(ISD::IS_FPCLASS, MVT::f64, Legal); |
214 | setOperationAction(ISD::FSIN, MVT::f64, Expand); |
215 | setOperationAction(ISD::FCOS, MVT::f64, Expand); |
216 | setOperationAction(ISD::FSINCOS, MVT::f64, Expand); |
217 | setOperationAction(ISD::FPOW, MVT::f64, Expand); |
218 | setOperationAction(ISD::FREM, MVT::f64, Expand); |
219 | |
220 | if (Subtarget.is64Bit()) |
221 | setOperationAction(ISD::FRINT, MVT::f64, Legal); |
222 | } |
223 | |
224 | // Set operations for 'LSX' feature. |
225 | |
226 | if (Subtarget.hasExtLSX()) { |
227 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
228 | // Expand all truncating stores and extending loads. |
229 | for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { |
230 | setTruncStoreAction(VT, InnerVT, Expand); |
231 | setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); |
232 | setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); |
233 | setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); |
234 | } |
235 | // By default everything must be expanded. Then we will selectively turn |
236 | // on ones that can be effectively codegen'd. |
237 | for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) |
238 | setOperationAction(Op, VT, Expand); |
239 | } |
240 | |
241 | for (MVT VT : LSXVTs) { |
242 | setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal); |
243 | setOperationAction(ISD::BITCAST, VT, Legal); |
244 | setOperationAction(ISD::UNDEF, VT, Legal); |
245 | |
246 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
247 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); |
248 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
249 | |
250 | setOperationAction(ISD::SETCC, VT, Legal); |
251 | setOperationAction(ISD::VSELECT, VT, Legal); |
252 | } |
253 | for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { |
254 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
255 | setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); |
256 | setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, |
257 | Legal); |
258 | setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, |
259 | VT, Legal); |
260 | setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); |
261 | setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); |
262 | setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); |
263 | setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); |
264 | setCondCodeAction( |
265 | {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, |
266 | Expand); |
267 | } |
268 | for (MVT VT : {MVT::v4i32, MVT::v2i64}) { |
269 | setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal); |
270 | setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal); |
271 | } |
272 | for (MVT VT : {MVT::v4f32, MVT::v2f64}) { |
273 | setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); |
274 | setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); |
275 | setOperationAction(ISD::FMA, VT, Legal); |
276 | setOperationAction(ISD::FSQRT, VT, Legal); |
277 | setOperationAction(ISD::FNEG, VT, Legal); |
278 | setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, |
279 | ISD::SETUGE, ISD::SETUGT}, |
280 | VT, Expand); |
281 | } |
282 | } |
283 | |
284 | // Set operations for 'LASX' feature. |
285 | |
286 | if (Subtarget.hasExtLASX()) { |
287 | for (MVT VT : LASXVTs) { |
288 | setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal); |
289 | setOperationAction(ISD::BITCAST, VT, Legal); |
290 | setOperationAction(ISD::UNDEF, VT, Legal); |
291 | |
292 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
293 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
294 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
295 | |
296 | setOperationAction(ISD::SETCC, VT, Legal); |
297 | setOperationAction(ISD::VSELECT, VT, Legal); |
298 | } |
299 | for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { |
300 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
301 | setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); |
302 | setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, |
303 | Legal); |
304 | setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, |
305 | VT, Legal); |
306 | setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); |
307 | setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); |
308 | setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); |
309 | setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); |
310 | setCondCodeAction( |
311 | {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, |
312 | Expand); |
313 | } |
314 | for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) { |
315 | setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal); |
316 | setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal); |
317 | } |
318 | for (MVT VT : {MVT::v8f32, MVT::v4f64}) { |
319 | setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); |
320 | setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); |
321 | setOperationAction(ISD::FMA, VT, Legal); |
322 | setOperationAction(ISD::FSQRT, VT, Legal); |
323 | setOperationAction(ISD::FNEG, VT, Legal); |
324 | setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, |
325 | ISD::SETUGE, ISD::SETUGT}, |
326 | VT, Expand); |
327 | } |
328 | } |
329 | |
330 | // Set DAG combine for LA32 and LA64. |
331 | |
332 | setTargetDAGCombine(ISD::AND); |
333 | setTargetDAGCombine(ISD::OR); |
334 | setTargetDAGCombine(ISD::SRL); |
335 | |
336 | // Set DAG combine for 'LSX' feature. |
337 | |
338 | if (Subtarget.hasExtLSX()) |
339 | setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); |
340 | |
341 | // Compute derived properties from the register classes. |
342 | computeRegisterProperties(Subtarget.getRegisterInfo()); |
343 | |
344 | setStackPointerRegisterToSaveRestore(LoongArch::R3); |
345 | |
346 | setBooleanContents(ZeroOrOneBooleanContent); |
347 | setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); |
348 | |
349 | setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen()); |
350 | |
351 | setMinCmpXchgSizeInBits(32); |
352 | |
353 | // Function alignments. |
354 | setMinFunctionAlignment(Align(4)); |
355 | // Set preferred alignments. |
356 | setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment()); |
357 | setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); |
358 | setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment()); |
359 | } |
360 | |
361 | bool LoongArchTargetLowering::isOffsetFoldingLegal( |
362 | const GlobalAddressSDNode *GA) const { |
363 | // In order to maximise the opportunity for common subexpression elimination, |
364 | // keep a separate ADD node for the global address offset instead of folding |
365 | // it in the global address node. Later peephole optimisations may choose to |
366 | // fold it back in when profitable. |
367 | return false; |
368 | } |
369 | |
370 | SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, |
371 | SelectionDAG &DAG) const { |
372 | switch (Op.getOpcode()) { |
373 | case ISD::ATOMIC_FENCE: |
374 | return lowerATOMIC_FENCE(Op, DAG); |
375 | case ISD::EH_DWARF_CFA: |
376 | return lowerEH_DWARF_CFA(Op, DAG); |
377 | case ISD::GlobalAddress: |
378 | return lowerGlobalAddress(Op, DAG); |
379 | case ISD::GlobalTLSAddress: |
380 | return lowerGlobalTLSAddress(Op, DAG); |
381 | case ISD::INTRINSIC_WO_CHAIN: |
382 | return lowerINTRINSIC_WO_CHAIN(Op, DAG); |
383 | case ISD::INTRINSIC_W_CHAIN: |
384 | return lowerINTRINSIC_W_CHAIN(Op, DAG); |
385 | case ISD::INTRINSIC_VOID: |
386 | return lowerINTRINSIC_VOID(Op, DAG); |
387 | case ISD::BlockAddress: |
388 | return lowerBlockAddress(Op, DAG); |
389 | case ISD::JumpTable: |
390 | return lowerJumpTable(Op, DAG); |
391 | case ISD::SHL_PARTS: |
392 | return lowerShiftLeftParts(Op, DAG); |
393 | case ISD::SRA_PARTS: |
394 | return lowerShiftRightParts(Op, DAG, IsSRA: true); |
395 | case ISD::SRL_PARTS: |
396 | return lowerShiftRightParts(Op, DAG, IsSRA: false); |
397 | case ISD::ConstantPool: |
398 | return lowerConstantPool(Op, DAG); |
399 | case ISD::FP_TO_SINT: |
400 | return lowerFP_TO_SINT(Op, DAG); |
401 | case ISD::BITCAST: |
402 | return lowerBITCAST(Op, DAG); |
403 | case ISD::UINT_TO_FP: |
404 | return lowerUINT_TO_FP(Op, DAG); |
405 | case ISD::SINT_TO_FP: |
406 | return lowerSINT_TO_FP(Op, DAG); |
407 | case ISD::VASTART: |
408 | return lowerVASTART(Op, DAG); |
409 | case ISD::FRAMEADDR: |
410 | return lowerFRAMEADDR(Op, DAG); |
411 | case ISD::RETURNADDR: |
412 | return lowerRETURNADDR(Op, DAG); |
413 | case ISD::WRITE_REGISTER: |
414 | return lowerWRITE_REGISTER(Op, DAG); |
415 | case ISD::INSERT_VECTOR_ELT: |
416 | return lowerINSERT_VECTOR_ELT(Op, DAG); |
417 | case ISD::EXTRACT_VECTOR_ELT: |
418 | return lowerEXTRACT_VECTOR_ELT(Op, DAG); |
419 | case ISD::BUILD_VECTOR: |
420 | return lowerBUILD_VECTOR(Op, DAG); |
421 | case ISD::VECTOR_SHUFFLE: |
422 | return lowerVECTOR_SHUFFLE(Op, DAG); |
423 | } |
424 | return SDValue(); |
425 | } |
426 | |
427 | SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, |
428 | SelectionDAG &DAG) const { |
429 | // TODO: custom shuffle. |
430 | return SDValue(); |
431 | } |
432 | |
433 | static bool isConstantOrUndef(const SDValue Op) { |
434 | if (Op->isUndef()) |
435 | return true; |
436 | if (isa<ConstantSDNode>(Val: Op)) |
437 | return true; |
438 | if (isa<ConstantFPSDNode>(Val: Op)) |
439 | return true; |
440 | return false; |
441 | } |
442 | |
443 | static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { |
444 | for (unsigned i = 0; i < Op->getNumOperands(); ++i) |
445 | if (isConstantOrUndef(Op: Op->getOperand(Num: i))) |
446 | return true; |
447 | return false; |
448 | } |
449 | |
450 | SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, |
451 | SelectionDAG &DAG) const { |
452 | BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Val&: Op); |
453 | EVT ResTy = Op->getValueType(ResNo: 0); |
454 | SDLoc DL(Op); |
455 | APInt SplatValue, SplatUndef; |
456 | unsigned SplatBitSize; |
457 | bool HasAnyUndefs; |
458 | bool Is128Vec = ResTy.is128BitVector(); |
459 | bool Is256Vec = ResTy.is256BitVector(); |
460 | |
461 | if ((!Subtarget.hasExtLSX() || !Is128Vec) && |
462 | (!Subtarget.hasExtLASX() || !Is256Vec)) |
463 | return SDValue(); |
464 | |
465 | if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, |
466 | /*MinSplatBits=*/8) && |
467 | SplatBitSize <= 64) { |
468 | // We can only cope with 8, 16, 32, or 64-bit elements. |
469 | if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && |
470 | SplatBitSize != 64) |
471 | return SDValue(); |
472 | |
473 | EVT ViaVecTy; |
474 | |
475 | switch (SplatBitSize) { |
476 | default: |
477 | return SDValue(); |
478 | case 8: |
479 | ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8; |
480 | break; |
481 | case 16: |
482 | ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16; |
483 | break; |
484 | case 32: |
485 | ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32; |
486 | break; |
487 | case 64: |
488 | ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64; |
489 | break; |
490 | } |
491 | |
492 | // SelectionDAG::getConstant will promote SplatValue appropriately. |
493 | SDValue Result = DAG.getConstant(Val: SplatValue, DL, VT: ViaVecTy); |
494 | |
495 | // Bitcast to the type we originally wanted. |
496 | if (ViaVecTy != ResTy) |
497 | Result = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(Node), VT: ResTy, Operand: Result); |
498 | |
499 | return Result; |
500 | } |
501 | |
502 | if (DAG.isSplatValue(V: Op, /*AllowUndefs=*/false)) |
503 | return Op; |
504 | |
505 | if (!isConstantOrUndefBUILD_VECTOR(Op: Node)) { |
506 | // Use INSERT_VECTOR_ELT operations rather than expand to stores. |
507 | // The resulting code is the same length as the expansion, but it doesn't |
508 | // use memory operations. |
509 | EVT ResTy = Node->getValueType(ResNo: 0); |
510 | |
511 | assert(ResTy.isVector()); |
512 | |
513 | unsigned NumElts = ResTy.getVectorNumElements(); |
514 | SDValue Vector = DAG.getUNDEF(VT: ResTy); |
515 | for (unsigned i = 0; i < NumElts; ++i) { |
516 | Vector = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ResTy, N1: Vector, |
517 | N2: Node->getOperand(Num: i), |
518 | N3: DAG.getConstant(Val: i, DL, VT: Subtarget.getGRLenVT())); |
519 | } |
520 | return Vector; |
521 | } |
522 | |
523 | return SDValue(); |
524 | } |
525 | |
526 | SDValue |
527 | LoongArchTargetLowering::(SDValue Op, |
528 | SelectionDAG &DAG) const { |
529 | EVT VecTy = Op->getOperand(Num: 0)->getValueType(ResNo: 0); |
530 | SDValue Idx = Op->getOperand(Num: 1); |
531 | EVT EltTy = VecTy.getVectorElementType(); |
532 | unsigned NumElts = VecTy.getVectorNumElements(); |
533 | |
534 | if (isa<ConstantSDNode>(Idx) && |
535 | (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 || |
536 | EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2)) |
537 | return Op; |
538 | |
539 | return SDValue(); |
540 | } |
541 | |
542 | SDValue |
543 | LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, |
544 | SelectionDAG &DAG) const { |
545 | if (isa<ConstantSDNode>(Val: Op->getOperand(Num: 2))) |
546 | return Op; |
547 | return SDValue(); |
548 | } |
549 | |
550 | SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op, |
551 | SelectionDAG &DAG) const { |
552 | SDLoc DL(Op); |
553 | SyncScope::ID FenceSSID = |
554 | static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: 2)); |
555 | |
556 | // singlethread fences only synchronize with signal handlers on the same |
557 | // thread and thus only need to preserve instruction order, not actually |
558 | // enforce memory ordering. |
559 | if (FenceSSID == SyncScope::SingleThread) |
560 | // MEMBARRIER is a compiler barrier; it codegens to a no-op. |
561 | return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); |
562 | |
563 | return Op; |
564 | } |
565 | |
566 | SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op, |
567 | SelectionDAG &DAG) const { |
568 | |
569 | if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) { |
570 | DAG.getContext()->emitError( |
571 | ErrorStr: "On LA64, only 64-bit registers can be written." ); |
572 | return Op.getOperand(i: 0); |
573 | } |
574 | |
575 | if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) { |
576 | DAG.getContext()->emitError( |
577 | ErrorStr: "On LA32, only 32-bit registers can be written." ); |
578 | return Op.getOperand(i: 0); |
579 | } |
580 | |
581 | return Op; |
582 | } |
583 | |
584 | SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op, |
585 | SelectionDAG &DAG) const { |
586 | if (!isa<ConstantSDNode>(Val: Op.getOperand(i: 0))) { |
587 | DAG.getContext()->emitError(ErrorStr: "argument to '__builtin_frame_address' must " |
588 | "be a constant integer" ); |
589 | return SDValue(); |
590 | } |
591 | |
592 | MachineFunction &MF = DAG.getMachineFunction(); |
593 | MF.getFrameInfo().setFrameAddressIsTaken(true); |
594 | Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF); |
595 | EVT VT = Op.getValueType(); |
596 | SDLoc DL(Op); |
597 | SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: FrameReg, VT); |
598 | unsigned Depth = Op.getConstantOperandVal(i: 0); |
599 | int GRLenInBytes = Subtarget.getGRLen() / 8; |
600 | |
601 | while (Depth--) { |
602 | int Offset = -(GRLenInBytes * 2); |
603 | SDValue Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr, |
604 | N2: DAG.getIntPtrConstant(Val: Offset, DL)); |
605 | FrameAddr = |
606 | DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(), Ptr, PtrInfo: MachinePointerInfo()); |
607 | } |
608 | return FrameAddr; |
609 | } |
610 | |
611 | SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op, |
612 | SelectionDAG &DAG) const { |
613 | if (verifyReturnAddressArgumentIsConstant(Op, DAG)) |
614 | return SDValue(); |
615 | |
616 | // Currently only support lowering return address for current frame. |
617 | if (Op.getConstantOperandVal(i: 0) != 0) { |
618 | DAG.getContext()->emitError( |
619 | ErrorStr: "return address can only be determined for the current frame" ); |
620 | return SDValue(); |
621 | } |
622 | |
623 | MachineFunction &MF = DAG.getMachineFunction(); |
624 | MF.getFrameInfo().setReturnAddressIsTaken(true); |
625 | MVT GRLenVT = Subtarget.getGRLenVT(); |
626 | |
627 | // Return the value of the return address register, marking it an implicit |
628 | // live-in. |
629 | Register Reg = MF.addLiveIn(PReg: Subtarget.getRegisterInfo()->getRARegister(), |
630 | RC: getRegClassFor(VT: GRLenVT)); |
631 | return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: SDLoc(Op), Reg, VT: GRLenVT); |
632 | } |
633 | |
634 | SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op, |
635 | SelectionDAG &DAG) const { |
636 | MachineFunction &MF = DAG.getMachineFunction(); |
637 | auto Size = Subtarget.getGRLen() / 8; |
638 | auto FI = MF.getFrameInfo().CreateFixedObject(Size, SPOffset: 0, IsImmutable: false); |
639 | return DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout())); |
640 | } |
641 | |
642 | SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op, |
643 | SelectionDAG &DAG) const { |
644 | MachineFunction &MF = DAG.getMachineFunction(); |
645 | auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>(); |
646 | |
647 | SDLoc DL(Op); |
648 | SDValue FI = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), |
649 | VT: getPointerTy(DL: MF.getDataLayout())); |
650 | |
651 | // vastart just stores the address of the VarArgsFrameIndex slot into the |
652 | // memory location argument. |
653 | const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue(); |
654 | return DAG.getStore(Chain: Op.getOperand(i: 0), dl: DL, Val: FI, Ptr: Op.getOperand(i: 1), |
655 | PtrInfo: MachinePointerInfo(SV)); |
656 | } |
657 | |
658 | SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op, |
659 | SelectionDAG &DAG) const { |
660 | assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && |
661 | !Subtarget.hasBasicD() && "unexpected target features" ); |
662 | |
663 | SDLoc DL(Op); |
664 | SDValue Op0 = Op.getOperand(i: 0); |
665 | if (Op0->getOpcode() == ISD::AND) { |
666 | auto *C = dyn_cast<ConstantSDNode>(Val: Op0.getOperand(i: 1)); |
667 | if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF)) |
668 | return Op; |
669 | } |
670 | |
671 | if (Op0->getOpcode() == LoongArchISD::BSTRPICK && |
672 | Op0.getConstantOperandVal(i: 1) < UINT64_C(0X1F) && |
673 | Op0.getConstantOperandVal(i: 2) == UINT64_C(0)) |
674 | return Op; |
675 | |
676 | if (Op0.getOpcode() == ISD::AssertZext && |
677 | dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32)) |
678 | return Op; |
679 | |
680 | EVT OpVT = Op0.getValueType(); |
681 | EVT RetVT = Op.getValueType(); |
682 | RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT); |
683 | MakeLibCallOptions CallOptions; |
684 | CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT, Value: true); |
685 | SDValue Chain = SDValue(); |
686 | SDValue Result; |
687 | std::tie(args&: Result, args&: Chain) = |
688 | makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain); |
689 | return Result; |
690 | } |
691 | |
692 | SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op, |
693 | SelectionDAG &DAG) const { |
694 | assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && |
695 | !Subtarget.hasBasicD() && "unexpected target features" ); |
696 | |
697 | SDLoc DL(Op); |
698 | SDValue Op0 = Op.getOperand(i: 0); |
699 | |
700 | if ((Op0.getOpcode() == ISD::AssertSext || |
701 | Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) && |
702 | dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32)) |
703 | return Op; |
704 | |
705 | EVT OpVT = Op0.getValueType(); |
706 | EVT RetVT = Op.getValueType(); |
707 | RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT); |
708 | MakeLibCallOptions CallOptions; |
709 | CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT, Value: true); |
710 | SDValue Chain = SDValue(); |
711 | SDValue Result; |
712 | std::tie(args&: Result, args&: Chain) = |
713 | makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain); |
714 | return Result; |
715 | } |
716 | |
717 | SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op, |
718 | SelectionDAG &DAG) const { |
719 | |
720 | SDLoc DL(Op); |
721 | SDValue Op0 = Op.getOperand(i: 0); |
722 | |
723 | if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 && |
724 | Subtarget.is64Bit() && Subtarget.hasBasicF()) { |
725 | SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); |
726 | return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0); |
727 | } |
728 | return Op; |
729 | } |
730 | |
731 | SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op, |
732 | SelectionDAG &DAG) const { |
733 | |
734 | SDLoc DL(Op); |
735 | |
736 | if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() && |
737 | !Subtarget.hasBasicD()) { |
738 | SDValue Dst = |
739 | DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0)); |
740 | return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst); |
741 | } |
742 | |
743 | EVT FPTy = EVT::getFloatingPointVT(BitWidth: Op.getValueSizeInBits()); |
744 | SDValue Trunc = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FPTy, Operand: Op.getOperand(i: 0)); |
745 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Op.getValueType(), Operand: Trunc); |
746 | } |
747 | |
748 | static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, |
749 | SelectionDAG &DAG, unsigned Flags) { |
750 | return DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: Flags); |
751 | } |
752 | |
753 | static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, |
754 | SelectionDAG &DAG, unsigned Flags) { |
755 | return DAG.getTargetBlockAddress(BA: N->getBlockAddress(), VT: Ty, Offset: N->getOffset(), |
756 | TargetFlags: Flags); |
757 | } |
758 | |
759 | static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, |
760 | SelectionDAG &DAG, unsigned Flags) { |
761 | return DAG.getTargetConstantPool(C: N->getConstVal(), VT: Ty, Align: N->getAlign(), |
762 | Offset: N->getOffset(), TargetFlags: Flags); |
763 | } |
764 | |
765 | static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, |
766 | SelectionDAG &DAG, unsigned Flags) { |
767 | return DAG.getTargetJumpTable(JTI: N->getIndex(), VT: Ty, TargetFlags: Flags); |
768 | } |
769 | |
770 | template <class NodeTy> |
771 | SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, |
772 | CodeModel::Model M, |
773 | bool IsLocal) const { |
774 | SDLoc DL(N); |
775 | EVT Ty = getPointerTy(DL: DAG.getDataLayout()); |
776 | SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); |
777 | |
778 | switch (M) { |
779 | default: |
780 | report_fatal_error(reason: "Unsupported code model" ); |
781 | |
782 | case CodeModel::Large: { |
783 | assert(Subtarget.is64Bit() && "Large code model requires LA64" ); |
784 | |
785 | // This is not actually used, but is necessary for successfully matching |
786 | // the PseudoLA_*_LARGE nodes. |
787 | SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty); |
788 | if (IsLocal) |
789 | // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that |
790 | // eventually becomes the desired 5-insn code sequence. |
791 | return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty, |
792 | Tmp, Addr), |
793 | 0); |
794 | |
795 | // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that eventually |
796 | // becomes the desired 5-insn code sequence. |
797 | return SDValue( |
798 | DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr), |
799 | 0); |
800 | } |
801 | |
802 | case CodeModel::Small: |
803 | case CodeModel::Medium: |
804 | if (IsLocal) |
805 | // This generates the pattern (PseudoLA_PCREL sym), which expands to |
806 | // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)). |
807 | return SDValue( |
808 | DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0); |
809 | |
810 | // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d |
811 | // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)). |
812 | return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), |
813 | 0); |
814 | } |
815 | } |
816 | |
817 | SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op, |
818 | SelectionDAG &DAG) const { |
819 | return getAddr(N: cast<BlockAddressSDNode>(Val&: Op), DAG, |
820 | M: DAG.getTarget().getCodeModel()); |
821 | } |
822 | |
823 | SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op, |
824 | SelectionDAG &DAG) const { |
825 | return getAddr(N: cast<JumpTableSDNode>(Val&: Op), DAG, |
826 | M: DAG.getTarget().getCodeModel()); |
827 | } |
828 | |
829 | SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op, |
830 | SelectionDAG &DAG) const { |
831 | return getAddr(N: cast<ConstantPoolSDNode>(Val&: Op), DAG, |
832 | M: DAG.getTarget().getCodeModel()); |
833 | } |
834 | |
835 | SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op, |
836 | SelectionDAG &DAG) const { |
837 | GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op); |
838 | assert(N->getOffset() == 0 && "unexpected offset in global node" ); |
839 | auto CM = DAG.getTarget().getCodeModel(); |
840 | const GlobalValue *GV = N->getGlobal(); |
841 | |
842 | if (GV->isDSOLocal() && isa<GlobalVariable>(Val: GV)) { |
843 | if (auto GCM = dyn_cast<GlobalVariable>(Val: GV)->getCodeModel()) |
844 | CM = *GCM; |
845 | } |
846 | |
847 | return getAddr(N, DAG, M: CM, IsLocal: GV->isDSOLocal()); |
848 | } |
849 | |
850 | SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, |
851 | SelectionDAG &DAG, |
852 | unsigned Opc, |
853 | bool Large) const { |
854 | SDLoc DL(N); |
855 | EVT Ty = getPointerTy(DL: DAG.getDataLayout()); |
856 | MVT GRLenVT = Subtarget.getGRLenVT(); |
857 | |
858 | // This is not actually used, but is necessary for successfully matching the |
859 | // PseudoLA_*_LARGE nodes. |
860 | SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty); |
861 | SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: 0); |
862 | SDValue Offset = Large |
863 | ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0) |
864 | : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0); |
865 | |
866 | // Add the thread pointer. |
867 | return DAG.getNode(ISD::ADD, DL, Ty, Offset, |
868 | DAG.getRegister(LoongArch::R2, GRLenVT)); |
869 | } |
870 | |
871 | SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, |
872 | SelectionDAG &DAG, |
873 | unsigned Opc, |
874 | bool Large) const { |
875 | SDLoc DL(N); |
876 | EVT Ty = getPointerTy(DL: DAG.getDataLayout()); |
877 | IntegerType *CallTy = Type::getIntNTy(C&: *DAG.getContext(), N: Ty.getSizeInBits()); |
878 | |
879 | // This is not actually used, but is necessary for successfully matching the |
880 | // PseudoLA_*_LARGE nodes. |
881 | SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty); |
882 | |
883 | // Use a PC-relative addressing mode to access the dynamic GOT address. |
884 | SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: 0); |
885 | SDValue Load = Large ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0) |
886 | : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0); |
887 | |
888 | // Prepare argument list to generate call. |
889 | ArgListTy Args; |
890 | ArgListEntry Entry; |
891 | Entry.Node = Load; |
892 | Entry.Ty = CallTy; |
893 | Args.push_back(x: Entry); |
894 | |
895 | // Setup call to __tls_get_addr. |
896 | TargetLowering::CallLoweringInfo CLI(DAG); |
897 | CLI.setDebugLoc(DL) |
898 | .setChain(DAG.getEntryNode()) |
899 | .setLibCallee(CC: CallingConv::C, ResultType: CallTy, |
900 | Target: DAG.getExternalSymbol(Sym: "__tls_get_addr" , VT: Ty), |
901 | ArgsList: std::move(Args)); |
902 | |
903 | return LowerCallTo(CLI).first; |
904 | } |
905 | |
906 | SDValue |
907 | LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op, |
908 | SelectionDAG &DAG) const { |
909 | if (DAG.getMachineFunction().getFunction().getCallingConv() == |
910 | CallingConv::GHC) |
911 | report_fatal_error(reason: "In GHC calling convention TLS is not supported" ); |
912 | |
913 | bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large; |
914 | assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64" ); |
915 | |
916 | GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op); |
917 | assert(N->getOffset() == 0 && "unexpected offset in global node" ); |
918 | |
919 | SDValue Addr; |
920 | switch (getTargetMachine().getTLSModel(GV: N->getGlobal())) { |
921 | case TLSModel::GeneralDynamic: |
922 | // In this model, application code calls the dynamic linker function |
923 | // __tls_get_addr to locate TLS offsets into the dynamic thread vector at |
924 | // runtime. |
925 | Addr = getDynamicTLSAddr(N, DAG, |
926 | Large ? LoongArch::PseudoLA_TLS_GD_LARGE |
927 | : LoongArch::PseudoLA_TLS_GD, |
928 | Large); |
929 | break; |
930 | case TLSModel::LocalDynamic: |
931 | // Same as GeneralDynamic, except for assembly modifiers and relocation |
932 | // records. |
933 | Addr = getDynamicTLSAddr(N, DAG, |
934 | Large ? LoongArch::PseudoLA_TLS_LD_LARGE |
935 | : LoongArch::PseudoLA_TLS_LD, |
936 | Large); |
937 | break; |
938 | case TLSModel::InitialExec: |
939 | // This model uses the GOT to resolve TLS offsets. |
940 | Addr = getStaticTLSAddr(N, DAG, |
941 | Large ? LoongArch::PseudoLA_TLS_IE_LARGE |
942 | : LoongArch::PseudoLA_TLS_IE, |
943 | Large); |
944 | break; |
945 | case TLSModel::LocalExec: |
946 | // This model is used when static linking as the TLS offsets are resolved |
947 | // during program linking. |
948 | // |
949 | // This node doesn't need an extra argument for the large code model. |
950 | Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE); |
951 | break; |
952 | } |
953 | |
954 | return Addr; |
955 | } |
956 | |
957 | template <unsigned N> |
958 | static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, |
959 | SelectionDAG &DAG, bool IsSigned = false) { |
960 | auto *CImm = cast<ConstantSDNode>(Val: Op->getOperand(Num: ImmOp)); |
961 | // Check the ImmArg. |
962 | if ((IsSigned && !isInt<N>(CImm->getSExtValue())) || |
963 | (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) { |
964 | DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) + |
965 | ": argument out of range." ); |
966 | return DAG.getNode(Opcode: ISD::UNDEF, DL: SDLoc(Op), VT: Op.getValueType()); |
967 | } |
968 | return SDValue(); |
969 | } |
970 | |
971 | SDValue |
972 | LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, |
973 | SelectionDAG &DAG) const { |
974 | SDLoc DL(Op); |
975 | switch (Op.getConstantOperandVal(i: 0)) { |
976 | default: |
977 | return SDValue(); // Don't custom lower most intrinsics. |
978 | case Intrinsic::thread_pointer: { |
979 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
980 | return DAG.getRegister(LoongArch::R2, PtrVT); |
981 | } |
982 | case Intrinsic::loongarch_lsx_vpickve2gr_d: |
983 | case Intrinsic::loongarch_lsx_vpickve2gr_du: |
984 | case Intrinsic::loongarch_lsx_vreplvei_d: |
985 | case Intrinsic::loongarch_lasx_xvrepl128vei_d: |
986 | return checkIntrinsicImmArg<1>(Op, ImmOp: 2, DAG); |
987 | case Intrinsic::loongarch_lsx_vreplvei_w: |
988 | case Intrinsic::loongarch_lasx_xvrepl128vei_w: |
989 | case Intrinsic::loongarch_lasx_xvpickve2gr_d: |
990 | case Intrinsic::loongarch_lasx_xvpickve2gr_du: |
991 | case Intrinsic::loongarch_lasx_xvpickve_d: |
992 | case Intrinsic::loongarch_lasx_xvpickve_d_f: |
993 | return checkIntrinsicImmArg<2>(Op, ImmOp: 2, DAG); |
994 | case Intrinsic::loongarch_lasx_xvinsve0_d: |
995 | return checkIntrinsicImmArg<2>(Op, ImmOp: 3, DAG); |
996 | case Intrinsic::loongarch_lsx_vsat_b: |
997 | case Intrinsic::loongarch_lsx_vsat_bu: |
998 | case Intrinsic::loongarch_lsx_vrotri_b: |
999 | case Intrinsic::loongarch_lsx_vsllwil_h_b: |
1000 | case Intrinsic::loongarch_lsx_vsllwil_hu_bu: |
1001 | case Intrinsic::loongarch_lsx_vsrlri_b: |
1002 | case Intrinsic::loongarch_lsx_vsrari_b: |
1003 | case Intrinsic::loongarch_lsx_vreplvei_h: |
1004 | case Intrinsic::loongarch_lasx_xvsat_b: |
1005 | case Intrinsic::loongarch_lasx_xvsat_bu: |
1006 | case Intrinsic::loongarch_lasx_xvrotri_b: |
1007 | case Intrinsic::loongarch_lasx_xvsllwil_h_b: |
1008 | case Intrinsic::loongarch_lasx_xvsllwil_hu_bu: |
1009 | case Intrinsic::loongarch_lasx_xvsrlri_b: |
1010 | case Intrinsic::loongarch_lasx_xvsrari_b: |
1011 | case Intrinsic::loongarch_lasx_xvrepl128vei_h: |
1012 | case Intrinsic::loongarch_lasx_xvpickve_w: |
1013 | case Intrinsic::loongarch_lasx_xvpickve_w_f: |
1014 | return checkIntrinsicImmArg<3>(Op, ImmOp: 2, DAG); |
1015 | case Intrinsic::loongarch_lasx_xvinsve0_w: |
1016 | return checkIntrinsicImmArg<3>(Op, ImmOp: 3, DAG); |
1017 | case Intrinsic::loongarch_lsx_vsat_h: |
1018 | case Intrinsic::loongarch_lsx_vsat_hu: |
1019 | case Intrinsic::loongarch_lsx_vrotri_h: |
1020 | case Intrinsic::loongarch_lsx_vsllwil_w_h: |
1021 | case Intrinsic::loongarch_lsx_vsllwil_wu_hu: |
1022 | case Intrinsic::loongarch_lsx_vsrlri_h: |
1023 | case Intrinsic::loongarch_lsx_vsrari_h: |
1024 | case Intrinsic::loongarch_lsx_vreplvei_b: |
1025 | case Intrinsic::loongarch_lasx_xvsat_h: |
1026 | case Intrinsic::loongarch_lasx_xvsat_hu: |
1027 | case Intrinsic::loongarch_lasx_xvrotri_h: |
1028 | case Intrinsic::loongarch_lasx_xvsllwil_w_h: |
1029 | case Intrinsic::loongarch_lasx_xvsllwil_wu_hu: |
1030 | case Intrinsic::loongarch_lasx_xvsrlri_h: |
1031 | case Intrinsic::loongarch_lasx_xvsrari_h: |
1032 | case Intrinsic::loongarch_lasx_xvrepl128vei_b: |
1033 | return checkIntrinsicImmArg<4>(Op, ImmOp: 2, DAG); |
1034 | case Intrinsic::loongarch_lsx_vsrlni_b_h: |
1035 | case Intrinsic::loongarch_lsx_vsrani_b_h: |
1036 | case Intrinsic::loongarch_lsx_vsrlrni_b_h: |
1037 | case Intrinsic::loongarch_lsx_vsrarni_b_h: |
1038 | case Intrinsic::loongarch_lsx_vssrlni_b_h: |
1039 | case Intrinsic::loongarch_lsx_vssrani_b_h: |
1040 | case Intrinsic::loongarch_lsx_vssrlni_bu_h: |
1041 | case Intrinsic::loongarch_lsx_vssrani_bu_h: |
1042 | case Intrinsic::loongarch_lsx_vssrlrni_b_h: |
1043 | case Intrinsic::loongarch_lsx_vssrarni_b_h: |
1044 | case Intrinsic::loongarch_lsx_vssrlrni_bu_h: |
1045 | case Intrinsic::loongarch_lsx_vssrarni_bu_h: |
1046 | case Intrinsic::loongarch_lasx_xvsrlni_b_h: |
1047 | case Intrinsic::loongarch_lasx_xvsrani_b_h: |
1048 | case Intrinsic::loongarch_lasx_xvsrlrni_b_h: |
1049 | case Intrinsic::loongarch_lasx_xvsrarni_b_h: |
1050 | case Intrinsic::loongarch_lasx_xvssrlni_b_h: |
1051 | case Intrinsic::loongarch_lasx_xvssrani_b_h: |
1052 | case Intrinsic::loongarch_lasx_xvssrlni_bu_h: |
1053 | case Intrinsic::loongarch_lasx_xvssrani_bu_h: |
1054 | case Intrinsic::loongarch_lasx_xvssrlrni_b_h: |
1055 | case Intrinsic::loongarch_lasx_xvssrarni_b_h: |
1056 | case Intrinsic::loongarch_lasx_xvssrlrni_bu_h: |
1057 | case Intrinsic::loongarch_lasx_xvssrarni_bu_h: |
1058 | return checkIntrinsicImmArg<4>(Op, ImmOp: 3, DAG); |
1059 | case Intrinsic::loongarch_lsx_vsat_w: |
1060 | case Intrinsic::loongarch_lsx_vsat_wu: |
1061 | case Intrinsic::loongarch_lsx_vrotri_w: |
1062 | case Intrinsic::loongarch_lsx_vsllwil_d_w: |
1063 | case Intrinsic::loongarch_lsx_vsllwil_du_wu: |
1064 | case Intrinsic::loongarch_lsx_vsrlri_w: |
1065 | case Intrinsic::loongarch_lsx_vsrari_w: |
1066 | case Intrinsic::loongarch_lsx_vslei_bu: |
1067 | case Intrinsic::loongarch_lsx_vslei_hu: |
1068 | case Intrinsic::loongarch_lsx_vslei_wu: |
1069 | case Intrinsic::loongarch_lsx_vslei_du: |
1070 | case Intrinsic::loongarch_lsx_vslti_bu: |
1071 | case Intrinsic::loongarch_lsx_vslti_hu: |
1072 | case Intrinsic::loongarch_lsx_vslti_wu: |
1073 | case Intrinsic::loongarch_lsx_vslti_du: |
1074 | case Intrinsic::loongarch_lsx_vbsll_v: |
1075 | case Intrinsic::loongarch_lsx_vbsrl_v: |
1076 | case Intrinsic::loongarch_lasx_xvsat_w: |
1077 | case Intrinsic::loongarch_lasx_xvsat_wu: |
1078 | case Intrinsic::loongarch_lasx_xvrotri_w: |
1079 | case Intrinsic::loongarch_lasx_xvsllwil_d_w: |
1080 | case Intrinsic::loongarch_lasx_xvsllwil_du_wu: |
1081 | case Intrinsic::loongarch_lasx_xvsrlri_w: |
1082 | case Intrinsic::loongarch_lasx_xvsrari_w: |
1083 | case Intrinsic::loongarch_lasx_xvslei_bu: |
1084 | case Intrinsic::loongarch_lasx_xvslei_hu: |
1085 | case Intrinsic::loongarch_lasx_xvslei_wu: |
1086 | case Intrinsic::loongarch_lasx_xvslei_du: |
1087 | case Intrinsic::loongarch_lasx_xvslti_bu: |
1088 | case Intrinsic::loongarch_lasx_xvslti_hu: |
1089 | case Intrinsic::loongarch_lasx_xvslti_wu: |
1090 | case Intrinsic::loongarch_lasx_xvslti_du: |
1091 | case Intrinsic::loongarch_lasx_xvbsll_v: |
1092 | case Intrinsic::loongarch_lasx_xvbsrl_v: |
1093 | return checkIntrinsicImmArg<5>(Op, ImmOp: 2, DAG); |
1094 | case Intrinsic::loongarch_lsx_vseqi_b: |
1095 | case Intrinsic::loongarch_lsx_vseqi_h: |
1096 | case Intrinsic::loongarch_lsx_vseqi_w: |
1097 | case Intrinsic::loongarch_lsx_vseqi_d: |
1098 | case Intrinsic::loongarch_lsx_vslei_b: |
1099 | case Intrinsic::loongarch_lsx_vslei_h: |
1100 | case Intrinsic::loongarch_lsx_vslei_w: |
1101 | case Intrinsic::loongarch_lsx_vslei_d: |
1102 | case Intrinsic::loongarch_lsx_vslti_b: |
1103 | case Intrinsic::loongarch_lsx_vslti_h: |
1104 | case Intrinsic::loongarch_lsx_vslti_w: |
1105 | case Intrinsic::loongarch_lsx_vslti_d: |
1106 | case Intrinsic::loongarch_lasx_xvseqi_b: |
1107 | case Intrinsic::loongarch_lasx_xvseqi_h: |
1108 | case Intrinsic::loongarch_lasx_xvseqi_w: |
1109 | case Intrinsic::loongarch_lasx_xvseqi_d: |
1110 | case Intrinsic::loongarch_lasx_xvslei_b: |
1111 | case Intrinsic::loongarch_lasx_xvslei_h: |
1112 | case Intrinsic::loongarch_lasx_xvslei_w: |
1113 | case Intrinsic::loongarch_lasx_xvslei_d: |
1114 | case Intrinsic::loongarch_lasx_xvslti_b: |
1115 | case Intrinsic::loongarch_lasx_xvslti_h: |
1116 | case Intrinsic::loongarch_lasx_xvslti_w: |
1117 | case Intrinsic::loongarch_lasx_xvslti_d: |
1118 | return checkIntrinsicImmArg<5>(Op, ImmOp: 2, DAG, /*IsSigned=*/true); |
1119 | case Intrinsic::loongarch_lsx_vsrlni_h_w: |
1120 | case Intrinsic::loongarch_lsx_vsrani_h_w: |
1121 | case Intrinsic::loongarch_lsx_vsrlrni_h_w: |
1122 | case Intrinsic::loongarch_lsx_vsrarni_h_w: |
1123 | case Intrinsic::loongarch_lsx_vssrlni_h_w: |
1124 | case Intrinsic::loongarch_lsx_vssrani_h_w: |
1125 | case Intrinsic::loongarch_lsx_vssrlni_hu_w: |
1126 | case Intrinsic::loongarch_lsx_vssrani_hu_w: |
1127 | case Intrinsic::loongarch_lsx_vssrlrni_h_w: |
1128 | case Intrinsic::loongarch_lsx_vssrarni_h_w: |
1129 | case Intrinsic::loongarch_lsx_vssrlrni_hu_w: |
1130 | case Intrinsic::loongarch_lsx_vssrarni_hu_w: |
1131 | case Intrinsic::loongarch_lsx_vfrstpi_b: |
1132 | case Intrinsic::loongarch_lsx_vfrstpi_h: |
1133 | case Intrinsic::loongarch_lasx_xvsrlni_h_w: |
1134 | case Intrinsic::loongarch_lasx_xvsrani_h_w: |
1135 | case Intrinsic::loongarch_lasx_xvsrlrni_h_w: |
1136 | case Intrinsic::loongarch_lasx_xvsrarni_h_w: |
1137 | case Intrinsic::loongarch_lasx_xvssrlni_h_w: |
1138 | case Intrinsic::loongarch_lasx_xvssrani_h_w: |
1139 | case Intrinsic::loongarch_lasx_xvssrlni_hu_w: |
1140 | case Intrinsic::loongarch_lasx_xvssrani_hu_w: |
1141 | case Intrinsic::loongarch_lasx_xvssrlrni_h_w: |
1142 | case Intrinsic::loongarch_lasx_xvssrarni_h_w: |
1143 | case Intrinsic::loongarch_lasx_xvssrlrni_hu_w: |
1144 | case Intrinsic::loongarch_lasx_xvssrarni_hu_w: |
1145 | case Intrinsic::loongarch_lasx_xvfrstpi_b: |
1146 | case Intrinsic::loongarch_lasx_xvfrstpi_h: |
1147 | return checkIntrinsicImmArg<5>(Op, ImmOp: 3, DAG); |
1148 | case Intrinsic::loongarch_lsx_vsat_d: |
1149 | case Intrinsic::loongarch_lsx_vsat_du: |
1150 | case Intrinsic::loongarch_lsx_vrotri_d: |
1151 | case Intrinsic::loongarch_lsx_vsrlri_d: |
1152 | case Intrinsic::loongarch_lsx_vsrari_d: |
1153 | case Intrinsic::loongarch_lasx_xvsat_d: |
1154 | case Intrinsic::loongarch_lasx_xvsat_du: |
1155 | case Intrinsic::loongarch_lasx_xvrotri_d: |
1156 | case Intrinsic::loongarch_lasx_xvsrlri_d: |
1157 | case Intrinsic::loongarch_lasx_xvsrari_d: |
1158 | return checkIntrinsicImmArg<6>(Op, ImmOp: 2, DAG); |
1159 | case Intrinsic::loongarch_lsx_vsrlni_w_d: |
1160 | case Intrinsic::loongarch_lsx_vsrani_w_d: |
1161 | case Intrinsic::loongarch_lsx_vsrlrni_w_d: |
1162 | case Intrinsic::loongarch_lsx_vsrarni_w_d: |
1163 | case Intrinsic::loongarch_lsx_vssrlni_w_d: |
1164 | case Intrinsic::loongarch_lsx_vssrani_w_d: |
1165 | case Intrinsic::loongarch_lsx_vssrlni_wu_d: |
1166 | case Intrinsic::loongarch_lsx_vssrani_wu_d: |
1167 | case Intrinsic::loongarch_lsx_vssrlrni_w_d: |
1168 | case Intrinsic::loongarch_lsx_vssrarni_w_d: |
1169 | case Intrinsic::loongarch_lsx_vssrlrni_wu_d: |
1170 | case Intrinsic::loongarch_lsx_vssrarni_wu_d: |
1171 | case Intrinsic::loongarch_lasx_xvsrlni_w_d: |
1172 | case Intrinsic::loongarch_lasx_xvsrani_w_d: |
1173 | case Intrinsic::loongarch_lasx_xvsrlrni_w_d: |
1174 | case Intrinsic::loongarch_lasx_xvsrarni_w_d: |
1175 | case Intrinsic::loongarch_lasx_xvssrlni_w_d: |
1176 | case Intrinsic::loongarch_lasx_xvssrani_w_d: |
1177 | case Intrinsic::loongarch_lasx_xvssrlni_wu_d: |
1178 | case Intrinsic::loongarch_lasx_xvssrani_wu_d: |
1179 | case Intrinsic::loongarch_lasx_xvssrlrni_w_d: |
1180 | case Intrinsic::loongarch_lasx_xvssrarni_w_d: |
1181 | case Intrinsic::loongarch_lasx_xvssrlrni_wu_d: |
1182 | case Intrinsic::loongarch_lasx_xvssrarni_wu_d: |
1183 | return checkIntrinsicImmArg<6>(Op, ImmOp: 3, DAG); |
1184 | case Intrinsic::loongarch_lsx_vsrlni_d_q: |
1185 | case Intrinsic::loongarch_lsx_vsrani_d_q: |
1186 | case Intrinsic::loongarch_lsx_vsrlrni_d_q: |
1187 | case Intrinsic::loongarch_lsx_vsrarni_d_q: |
1188 | case Intrinsic::loongarch_lsx_vssrlni_d_q: |
1189 | case Intrinsic::loongarch_lsx_vssrani_d_q: |
1190 | case Intrinsic::loongarch_lsx_vssrlni_du_q: |
1191 | case Intrinsic::loongarch_lsx_vssrani_du_q: |
1192 | case Intrinsic::loongarch_lsx_vssrlrni_d_q: |
1193 | case Intrinsic::loongarch_lsx_vssrarni_d_q: |
1194 | case Intrinsic::loongarch_lsx_vssrlrni_du_q: |
1195 | case Intrinsic::loongarch_lsx_vssrarni_du_q: |
1196 | case Intrinsic::loongarch_lasx_xvsrlni_d_q: |
1197 | case Intrinsic::loongarch_lasx_xvsrani_d_q: |
1198 | case Intrinsic::loongarch_lasx_xvsrlrni_d_q: |
1199 | case Intrinsic::loongarch_lasx_xvsrarni_d_q: |
1200 | case Intrinsic::loongarch_lasx_xvssrlni_d_q: |
1201 | case Intrinsic::loongarch_lasx_xvssrani_d_q: |
1202 | case Intrinsic::loongarch_lasx_xvssrlni_du_q: |
1203 | case Intrinsic::loongarch_lasx_xvssrani_du_q: |
1204 | case Intrinsic::loongarch_lasx_xvssrlrni_d_q: |
1205 | case Intrinsic::loongarch_lasx_xvssrarni_d_q: |
1206 | case Intrinsic::loongarch_lasx_xvssrlrni_du_q: |
1207 | case Intrinsic::loongarch_lasx_xvssrarni_du_q: |
1208 | return checkIntrinsicImmArg<7>(Op, ImmOp: 3, DAG); |
1209 | case Intrinsic::loongarch_lsx_vnori_b: |
1210 | case Intrinsic::loongarch_lsx_vshuf4i_b: |
1211 | case Intrinsic::loongarch_lsx_vshuf4i_h: |
1212 | case Intrinsic::loongarch_lsx_vshuf4i_w: |
1213 | case Intrinsic::loongarch_lasx_xvnori_b: |
1214 | case Intrinsic::loongarch_lasx_xvshuf4i_b: |
1215 | case Intrinsic::loongarch_lasx_xvshuf4i_h: |
1216 | case Intrinsic::loongarch_lasx_xvshuf4i_w: |
1217 | case Intrinsic::loongarch_lasx_xvpermi_d: |
1218 | return checkIntrinsicImmArg<8>(Op, ImmOp: 2, DAG); |
1219 | case Intrinsic::loongarch_lsx_vshuf4i_d: |
1220 | case Intrinsic::loongarch_lsx_vpermi_w: |
1221 | case Intrinsic::loongarch_lsx_vbitseli_b: |
1222 | case Intrinsic::loongarch_lsx_vextrins_b: |
1223 | case Intrinsic::loongarch_lsx_vextrins_h: |
1224 | case Intrinsic::loongarch_lsx_vextrins_w: |
1225 | case Intrinsic::loongarch_lsx_vextrins_d: |
1226 | case Intrinsic::loongarch_lasx_xvshuf4i_d: |
1227 | case Intrinsic::loongarch_lasx_xvpermi_w: |
1228 | case Intrinsic::loongarch_lasx_xvpermi_q: |
1229 | case Intrinsic::loongarch_lasx_xvbitseli_b: |
1230 | case Intrinsic::loongarch_lasx_xvextrins_b: |
1231 | case Intrinsic::loongarch_lasx_xvextrins_h: |
1232 | case Intrinsic::loongarch_lasx_xvextrins_w: |
1233 | case Intrinsic::loongarch_lasx_xvextrins_d: |
1234 | return checkIntrinsicImmArg<8>(Op, ImmOp: 3, DAG); |
1235 | case Intrinsic::loongarch_lsx_vrepli_b: |
1236 | case Intrinsic::loongarch_lsx_vrepli_h: |
1237 | case Intrinsic::loongarch_lsx_vrepli_w: |
1238 | case Intrinsic::loongarch_lsx_vrepli_d: |
1239 | case Intrinsic::loongarch_lasx_xvrepli_b: |
1240 | case Intrinsic::loongarch_lasx_xvrepli_h: |
1241 | case Intrinsic::loongarch_lasx_xvrepli_w: |
1242 | case Intrinsic::loongarch_lasx_xvrepli_d: |
1243 | return checkIntrinsicImmArg<10>(Op, ImmOp: 1, DAG, /*IsSigned=*/true); |
1244 | case Intrinsic::loongarch_lsx_vldi: |
1245 | case Intrinsic::loongarch_lasx_xvldi: |
1246 | return checkIntrinsicImmArg<13>(Op, ImmOp: 1, DAG, /*IsSigned=*/true); |
1247 | } |
1248 | } |
1249 | |
1250 | // Helper function that emits error message for intrinsics with chain and return |
1251 | // merge values of a UNDEF and the chain. |
1252 | static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, |
1253 | StringRef ErrorMsg, |
1254 | SelectionDAG &DAG) { |
1255 | DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) + ": " + ErrorMsg + "." ); |
1256 | return DAG.getMergeValues(Ops: {DAG.getUNDEF(VT: Op.getValueType()), Op.getOperand(i: 0)}, |
1257 | dl: SDLoc(Op)); |
1258 | } |
1259 | |
1260 | SDValue |
1261 | LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, |
1262 | SelectionDAG &DAG) const { |
1263 | SDLoc DL(Op); |
1264 | MVT GRLenVT = Subtarget.getGRLenVT(); |
1265 | EVT VT = Op.getValueType(); |
1266 | SDValue Chain = Op.getOperand(i: 0); |
1267 | const StringRef ErrorMsgOOR = "argument out of range" ; |
1268 | const StringRef ErrorMsgReqLA64 = "requires loongarch64" ; |
1269 | const StringRef ErrorMsgReqF = "requires basic 'f' target feature" ; |
1270 | |
1271 | switch (Op.getConstantOperandVal(i: 1)) { |
1272 | default: |
1273 | return Op; |
1274 | case Intrinsic::loongarch_crc_w_b_w: |
1275 | case Intrinsic::loongarch_crc_w_h_w: |
1276 | case Intrinsic::loongarch_crc_w_w_w: |
1277 | case Intrinsic::loongarch_crc_w_d_w: |
1278 | case Intrinsic::loongarch_crcc_w_b_w: |
1279 | case Intrinsic::loongarch_crcc_w_h_w: |
1280 | case Intrinsic::loongarch_crcc_w_w_w: |
1281 | case Intrinsic::loongarch_crcc_w_d_w: |
1282 | return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG); |
1283 | case Intrinsic::loongarch_csrrd_w: |
1284 | case Intrinsic::loongarch_csrrd_d: { |
1285 | unsigned Imm = Op.getConstantOperandVal(i: 2); |
1286 | return !isUInt<14>(Imm) |
1287 | ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) |
1288 | : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other}, |
1289 | {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); |
1290 | } |
1291 | case Intrinsic::loongarch_csrwr_w: |
1292 | case Intrinsic::loongarch_csrwr_d: { |
1293 | unsigned Imm = Op.getConstantOperandVal(i: 3); |
1294 | return !isUInt<14>(Imm) |
1295 | ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) |
1296 | : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other}, |
1297 | {Chain, Op.getOperand(2), |
1298 | DAG.getConstant(Imm, DL, GRLenVT)}); |
1299 | } |
1300 | case Intrinsic::loongarch_csrxchg_w: |
1301 | case Intrinsic::loongarch_csrxchg_d: { |
1302 | unsigned Imm = Op.getConstantOperandVal(i: 4); |
1303 | return !isUInt<14>(Imm) |
1304 | ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) |
1305 | : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other}, |
1306 | {Chain, Op.getOperand(2), Op.getOperand(3), |
1307 | DAG.getConstant(Imm, DL, GRLenVT)}); |
1308 | } |
1309 | case Intrinsic::loongarch_iocsrrd_d: { |
1310 | return DAG.getNode( |
1311 | LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other}, |
1312 | {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))}); |
1313 | } |
1314 | #define IOCSRRD_CASE(NAME, NODE) \ |
1315 | case Intrinsic::loongarch_##NAME: { \ |
1316 | return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \ |
1317 | {Chain, Op.getOperand(2)}); \ |
1318 | } |
1319 | IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); |
1320 | IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H); |
1321 | IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); |
1322 | #undef IOCSRRD_CASE |
1323 | case Intrinsic::loongarch_cpucfg: { |
1324 | return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other}, |
1325 | {Chain, Op.getOperand(2)}); |
1326 | } |
1327 | case Intrinsic::loongarch_lddir_d: { |
1328 | unsigned Imm = Op.getConstantOperandVal(i: 3); |
1329 | return !isUInt<8>(x: Imm) |
1330 | ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
1331 | : Op; |
1332 | } |
1333 | case Intrinsic::loongarch_movfcsr2gr: { |
1334 | if (!Subtarget.hasBasicF()) |
1335 | return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG); |
1336 | unsigned Imm = Op.getConstantOperandVal(i: 2); |
1337 | return !isUInt<2>(Imm) |
1338 | ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) |
1339 | : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other}, |
1340 | {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); |
1341 | } |
1342 | case Intrinsic::loongarch_lsx_vld: |
1343 | case Intrinsic::loongarch_lsx_vldrepl_b: |
1344 | case Intrinsic::loongarch_lasx_xvld: |
1345 | case Intrinsic::loongarch_lasx_xvldrepl_b: |
1346 | return !isInt<12>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue()) |
1347 | ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
1348 | : SDValue(); |
1349 | case Intrinsic::loongarch_lsx_vldrepl_h: |
1350 | case Intrinsic::loongarch_lasx_xvldrepl_h: |
1351 | return !isShiftedInt<11, 1>( |
1352 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue()) |
1353 | ? emitIntrinsicWithChainErrorMessage( |
1354 | Op, ErrorMsg: "argument out of range or not a multiple of 2" , DAG) |
1355 | : SDValue(); |
1356 | case Intrinsic::loongarch_lsx_vldrepl_w: |
1357 | case Intrinsic::loongarch_lasx_xvldrepl_w: |
1358 | return !isShiftedInt<10, 2>( |
1359 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue()) |
1360 | ? emitIntrinsicWithChainErrorMessage( |
1361 | Op, ErrorMsg: "argument out of range or not a multiple of 4" , DAG) |
1362 | : SDValue(); |
1363 | case Intrinsic::loongarch_lsx_vldrepl_d: |
1364 | case Intrinsic::loongarch_lasx_xvldrepl_d: |
1365 | return !isShiftedInt<9, 3>( |
1366 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue()) |
1367 | ? emitIntrinsicWithChainErrorMessage( |
1368 | Op, ErrorMsg: "argument out of range or not a multiple of 8" , DAG) |
1369 | : SDValue(); |
1370 | } |
1371 | } |
1372 | |
1373 | // Helper function that emits error message for intrinsics with void return |
1374 | // value and return the chain. |
1375 | static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, |
1376 | SelectionDAG &DAG) { |
1377 | |
1378 | DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) + ": " + ErrorMsg + "." ); |
1379 | return Op.getOperand(i: 0); |
1380 | } |
1381 | |
1382 | SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, |
1383 | SelectionDAG &DAG) const { |
1384 | SDLoc DL(Op); |
1385 | MVT GRLenVT = Subtarget.getGRLenVT(); |
1386 | SDValue Chain = Op.getOperand(i: 0); |
1387 | uint64_t IntrinsicEnum = Op.getConstantOperandVal(i: 1); |
1388 | SDValue Op2 = Op.getOperand(i: 2); |
1389 | const StringRef ErrorMsgOOR = "argument out of range" ; |
1390 | const StringRef ErrorMsgReqLA64 = "requires loongarch64" ; |
1391 | const StringRef ErrorMsgReqLA32 = "requires loongarch32" ; |
1392 | const StringRef ErrorMsgReqF = "requires basic 'f' target feature" ; |
1393 | |
1394 | switch (IntrinsicEnum) { |
1395 | default: |
1396 | // TODO: Add more Intrinsics. |
1397 | return SDValue(); |
1398 | case Intrinsic::loongarch_cacop_d: |
1399 | case Intrinsic::loongarch_cacop_w: { |
1400 | if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit()) |
1401 | return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG); |
1402 | if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit()) |
1403 | return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA32, DAG); |
1404 | // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12) |
1405 | unsigned Imm1 = Op2->getAsZExtVal(); |
1406 | int Imm2 = cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue(); |
1407 | if (!isUInt<5>(x: Imm1) || !isInt<12>(x: Imm2)) |
1408 | return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG); |
1409 | return Op; |
1410 | } |
1411 | case Intrinsic::loongarch_dbar: { |
1412 | unsigned Imm = Op2->getAsZExtVal(); |
1413 | return !isUInt<15>(Imm) |
1414 | ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) |
1415 | : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain, |
1416 | DAG.getConstant(Imm, DL, GRLenVT)); |
1417 | } |
1418 | case Intrinsic::loongarch_ibar: { |
1419 | unsigned Imm = Op2->getAsZExtVal(); |
1420 | return !isUInt<15>(Imm) |
1421 | ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) |
1422 | : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain, |
1423 | DAG.getConstant(Imm, DL, GRLenVT)); |
1424 | } |
1425 | case Intrinsic::loongarch_break: { |
1426 | unsigned Imm = Op2->getAsZExtVal(); |
1427 | return !isUInt<15>(Imm) |
1428 | ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) |
1429 | : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain, |
1430 | DAG.getConstant(Imm, DL, GRLenVT)); |
1431 | } |
1432 | case Intrinsic::loongarch_movgr2fcsr: { |
1433 | if (!Subtarget.hasBasicF()) |
1434 | return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG); |
1435 | unsigned Imm = Op2->getAsZExtVal(); |
1436 | return !isUInt<2>(Imm) |
1437 | ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) |
1438 | : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain, |
1439 | DAG.getConstant(Imm, DL, GRLenVT), |
1440 | DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, |
1441 | Op.getOperand(3))); |
1442 | } |
1443 | case Intrinsic::loongarch_syscall: { |
1444 | unsigned Imm = Op2->getAsZExtVal(); |
1445 | return !isUInt<15>(Imm) |
1446 | ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) |
1447 | : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain, |
1448 | DAG.getConstant(Imm, DL, GRLenVT)); |
1449 | } |
1450 | #define IOCSRWR_CASE(NAME, NODE) \ |
1451 | case Intrinsic::loongarch_##NAME: { \ |
1452 | SDValue Op3 = Op.getOperand(3); \ |
1453 | return Subtarget.is64Bit() \ |
1454 | ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \ |
1455 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ |
1456 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \ |
1457 | : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \ |
1458 | Op3); \ |
1459 | } |
1460 | IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B); |
1461 | IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H); |
1462 | IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W); |
1463 | #undef IOCSRWR_CASE |
1464 | case Intrinsic::loongarch_iocsrwr_d: { |
1465 | return !Subtarget.is64Bit() |
1466 | ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) |
1467 | : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain, |
1468 | Op2, |
1469 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, |
1470 | Op.getOperand(3))); |
1471 | } |
1472 | #define ASRT_LE_GT_CASE(NAME) \ |
1473 | case Intrinsic::loongarch_##NAME: { \ |
1474 | return !Subtarget.is64Bit() \ |
1475 | ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \ |
1476 | : Op; \ |
1477 | } |
1478 | ASRT_LE_GT_CASE(asrtle_d) |
1479 | ASRT_LE_GT_CASE(asrtgt_d) |
1480 | #undef ASRT_LE_GT_CASE |
1481 | case Intrinsic::loongarch_ldpte_d: { |
1482 | unsigned Imm = Op.getConstantOperandVal(i: 3); |
1483 | return !Subtarget.is64Bit() |
1484 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG) |
1485 | : !isUInt<8>(x: Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
1486 | : Op; |
1487 | } |
1488 | case Intrinsic::loongarch_lsx_vst: |
1489 | case Intrinsic::loongarch_lasx_xvst: |
1490 | return !isInt<12>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) |
1491 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
1492 | : SDValue(); |
1493 | case Intrinsic::loongarch_lasx_xvstelm_b: |
1494 | return (!isInt<8>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
1495 | !isUInt<5>(x: Op.getConstantOperandVal(i: 5))) |
1496 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
1497 | : SDValue(); |
1498 | case Intrinsic::loongarch_lsx_vstelm_b: |
1499 | return (!isInt<8>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
1500 | !isUInt<4>(x: Op.getConstantOperandVal(i: 5))) |
1501 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
1502 | : SDValue(); |
1503 | case Intrinsic::loongarch_lasx_xvstelm_h: |
1504 | return (!isShiftedInt<8, 1>( |
1505 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
1506 | !isUInt<4>(x: Op.getConstantOperandVal(i: 5))) |
1507 | ? emitIntrinsicErrorMessage( |
1508 | Op, ErrorMsg: "argument out of range or not a multiple of 2" , DAG) |
1509 | : SDValue(); |
1510 | case Intrinsic::loongarch_lsx_vstelm_h: |
1511 | return (!isShiftedInt<8, 1>( |
1512 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
1513 | !isUInt<3>(x: Op.getConstantOperandVal(i: 5))) |
1514 | ? emitIntrinsicErrorMessage( |
1515 | Op, ErrorMsg: "argument out of range or not a multiple of 2" , DAG) |
1516 | : SDValue(); |
1517 | case Intrinsic::loongarch_lasx_xvstelm_w: |
1518 | return (!isShiftedInt<8, 2>( |
1519 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
1520 | !isUInt<3>(x: Op.getConstantOperandVal(i: 5))) |
1521 | ? emitIntrinsicErrorMessage( |
1522 | Op, ErrorMsg: "argument out of range or not a multiple of 4" , DAG) |
1523 | : SDValue(); |
1524 | case Intrinsic::loongarch_lsx_vstelm_w: |
1525 | return (!isShiftedInt<8, 2>( |
1526 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
1527 | !isUInt<2>(x: Op.getConstantOperandVal(i: 5))) |
1528 | ? emitIntrinsicErrorMessage( |
1529 | Op, ErrorMsg: "argument out of range or not a multiple of 4" , DAG) |
1530 | : SDValue(); |
1531 | case Intrinsic::loongarch_lasx_xvstelm_d: |
1532 | return (!isShiftedInt<8, 3>( |
1533 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
1534 | !isUInt<2>(x: Op.getConstantOperandVal(i: 5))) |
1535 | ? emitIntrinsicErrorMessage( |
1536 | Op, ErrorMsg: "argument out of range or not a multiple of 8" , DAG) |
1537 | : SDValue(); |
1538 | case Intrinsic::loongarch_lsx_vstelm_d: |
1539 | return (!isShiftedInt<8, 3>( |
1540 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
1541 | !isUInt<1>(x: Op.getConstantOperandVal(i: 5))) |
1542 | ? emitIntrinsicErrorMessage( |
1543 | Op, ErrorMsg: "argument out of range or not a multiple of 8" , DAG) |
1544 | : SDValue(); |
1545 | } |
1546 | } |
1547 | |
1548 | SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op, |
1549 | SelectionDAG &DAG) const { |
1550 | SDLoc DL(Op); |
1551 | SDValue Lo = Op.getOperand(i: 0); |
1552 | SDValue Hi = Op.getOperand(i: 1); |
1553 | SDValue Shamt = Op.getOperand(i: 2); |
1554 | EVT VT = Lo.getValueType(); |
1555 | |
1556 | // if Shamt-GRLen < 0: // Shamt < GRLen |
1557 | // Lo = Lo << Shamt |
1558 | // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt)) |
1559 | // else: |
1560 | // Lo = 0 |
1561 | // Hi = Lo << (Shamt-GRLen) |
1562 | |
1563 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT); |
1564 | SDValue One = DAG.getConstant(Val: 1, DL, VT); |
1565 | SDValue MinusGRLen = DAG.getConstant(Val: -(int)Subtarget.getGRLen(), DL, VT); |
1566 | SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - 1, DL, VT); |
1567 | SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen); |
1568 | SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1); |
1569 | |
1570 | SDValue LoTrue = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: Shamt); |
1571 | SDValue ShiftRight1Lo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: One); |
1572 | SDValue ShiftRightLo = |
1573 | DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShiftRight1Lo, N2: GRLenMinus1Shamt); |
1574 | SDValue ShiftLeftHi = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: Shamt); |
1575 | SDValue HiTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftLeftHi, N2: ShiftRightLo); |
1576 | SDValue HiFalse = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: ShamtMinusGRLen); |
1577 | |
1578 | SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT); |
1579 | |
1580 | Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: Zero); |
1581 | Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse); |
1582 | |
1583 | SDValue Parts[2] = {Lo, Hi}; |
1584 | return DAG.getMergeValues(Ops: Parts, dl: DL); |
1585 | } |
1586 | |
1587 | SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op, |
1588 | SelectionDAG &DAG, |
1589 | bool IsSRA) const { |
1590 | SDLoc DL(Op); |
1591 | SDValue Lo = Op.getOperand(i: 0); |
1592 | SDValue Hi = Op.getOperand(i: 1); |
1593 | SDValue Shamt = Op.getOperand(i: 2); |
1594 | EVT VT = Lo.getValueType(); |
1595 | |
1596 | // SRA expansion: |
1597 | // if Shamt-GRLen < 0: // Shamt < GRLen |
1598 | // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) |
1599 | // Hi = Hi >>s Shamt |
1600 | // else: |
1601 | // Lo = Hi >>s (Shamt-GRLen); |
1602 | // Hi = Hi >>s (GRLen-1) |
1603 | // |
1604 | // SRL expansion: |
1605 | // if Shamt-GRLen < 0: // Shamt < GRLen |
1606 | // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) |
1607 | // Hi = Hi >>u Shamt |
1608 | // else: |
1609 | // Lo = Hi >>u (Shamt-GRLen); |
1610 | // Hi = 0; |
1611 | |
1612 | unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; |
1613 | |
1614 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT); |
1615 | SDValue One = DAG.getConstant(Val: 1, DL, VT); |
1616 | SDValue MinusGRLen = DAG.getConstant(Val: -(int)Subtarget.getGRLen(), DL, VT); |
1617 | SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - 1, DL, VT); |
1618 | SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen); |
1619 | SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1); |
1620 | |
1621 | SDValue ShiftRightLo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: Shamt); |
1622 | SDValue ShiftLeftHi1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: One); |
1623 | SDValue ShiftLeftHi = |
1624 | DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShiftLeftHi1, N2: GRLenMinus1Shamt); |
1625 | SDValue LoTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftRightLo, N2: ShiftLeftHi); |
1626 | SDValue HiTrue = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: Shamt); |
1627 | SDValue LoFalse = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: ShamtMinusGRLen); |
1628 | SDValue HiFalse = |
1629 | IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Hi, N2: GRLenMinus1) : Zero; |
1630 | |
1631 | SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT); |
1632 | |
1633 | Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: LoFalse); |
1634 | Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse); |
1635 | |
1636 | SDValue Parts[2] = {Lo, Hi}; |
1637 | return DAG.getMergeValues(Ops: Parts, dl: DL); |
1638 | } |
1639 | |
1640 | // Returns the opcode of the target-specific SDNode that implements the 32-bit |
1641 | // form of the given Opcode. |
1642 | static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) { |
1643 | switch (Opcode) { |
1644 | default: |
1645 | llvm_unreachable("Unexpected opcode" ); |
1646 | case ISD::SHL: |
1647 | return LoongArchISD::SLL_W; |
1648 | case ISD::SRA: |
1649 | return LoongArchISD::SRA_W; |
1650 | case ISD::SRL: |
1651 | return LoongArchISD::SRL_W; |
1652 | case ISD::ROTR: |
1653 | return LoongArchISD::ROTR_W; |
1654 | case ISD::ROTL: |
1655 | return LoongArchISD::ROTL_W; |
1656 | case ISD::CTTZ: |
1657 | return LoongArchISD::CTZ_W; |
1658 | case ISD::CTLZ: |
1659 | return LoongArchISD::CLZ_W; |
1660 | } |
1661 | } |
1662 | |
1663 | // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG |
1664 | // node. Because i8/i16/i32 isn't a legal type for LA64, these operations would |
1665 | // otherwise be promoted to i64, making it difficult to select the |
1666 | // SLL_W/.../*W later one because the fact the operation was originally of |
1667 | // type i8/i16/i32 is lost. |
1668 | static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, |
1669 | unsigned ExtOpc = ISD::ANY_EXTEND) { |
1670 | SDLoc DL(N); |
1671 | LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(Opcode: N->getOpcode()); |
1672 | SDValue NewOp0, NewRes; |
1673 | |
1674 | switch (NumOp) { |
1675 | default: |
1676 | llvm_unreachable("Unexpected NumOp" ); |
1677 | case 1: { |
1678 | NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); |
1679 | NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0); |
1680 | break; |
1681 | } |
1682 | case 2: { |
1683 | NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); |
1684 | SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); |
1685 | NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); |
1686 | break; |
1687 | } |
1688 | // TODO:Handle more NumOp. |
1689 | } |
1690 | |
1691 | // ReplaceNodeResults requires we maintain the same type for the return |
1692 | // value. |
1693 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: NewRes); |
1694 | } |
1695 | |
1696 | // Helper function that emits error message for intrinsics with/without chain |
1697 | // and return a UNDEF or and the chain as the results. |
1698 | static void emitErrorAndReplaceIntrinsicResults( |
1699 | SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG, |
1700 | StringRef ErrorMsg, bool WithChain = true) { |
1701 | DAG.getContext()->emitError(ErrorStr: N->getOperationName(G: 0) + ": " + ErrorMsg + "." ); |
1702 | Results.push_back(Elt: DAG.getUNDEF(VT: N->getValueType(ResNo: 0))); |
1703 | if (!WithChain) |
1704 | return; |
1705 | Results.push_back(Elt: N->getOperand(Num: 0)); |
1706 | } |
1707 | |
1708 | template <unsigned N> |
1709 | static void |
1710 | replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results, |
1711 | SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, |
1712 | unsigned ResOp) { |
1713 | const StringRef ErrorMsgOOR = "argument out of range" ; |
1714 | unsigned Imm = Node->getConstantOperandVal(Num: 2); |
1715 | if (!isUInt<N>(Imm)) { |
1716 | emitErrorAndReplaceIntrinsicResults(N: Node, Results, DAG, ErrorMsg: ErrorMsgOOR, |
1717 | /*WithChain=*/false); |
1718 | return; |
1719 | } |
1720 | SDLoc DL(Node); |
1721 | SDValue Vec = Node->getOperand(Num: 1); |
1722 | |
1723 | SDValue PickElt = |
1724 | DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), N1: Vec, |
1725 | N2: DAG.getConstant(Val: Imm, DL, VT: Subtarget.getGRLenVT()), |
1726 | N3: DAG.getValueType(Vec.getValueType().getVectorElementType())); |
1727 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Node->getValueType(ResNo: 0), |
1728 | Operand: PickElt.getValue(R: 0))); |
1729 | } |
1730 | |
1731 | static void replaceVecCondBranchResults(SDNode *N, |
1732 | SmallVectorImpl<SDValue> &Results, |
1733 | SelectionDAG &DAG, |
1734 | const LoongArchSubtarget &Subtarget, |
1735 | unsigned ResOp) { |
1736 | SDLoc DL(N); |
1737 | SDValue Vec = N->getOperand(Num: 1); |
1738 | |
1739 | SDValue CB = DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), Operand: Vec); |
1740 | Results.push_back( |
1741 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: CB.getValue(R: 0))); |
1742 | } |
1743 | |
1744 | static void |
1745 | replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results, |
1746 | SelectionDAG &DAG, |
1747 | const LoongArchSubtarget &Subtarget) { |
1748 | switch (N->getConstantOperandVal(Num: 0)) { |
1749 | default: |
1750 | llvm_unreachable("Unexpected Intrinsic." ); |
1751 | case Intrinsic::loongarch_lsx_vpickve2gr_b: |
1752 | replaceVPICKVE2GRResults<4>(Node: N, Results, DAG, Subtarget, |
1753 | ResOp: LoongArchISD::VPICK_SEXT_ELT); |
1754 | break; |
1755 | case Intrinsic::loongarch_lsx_vpickve2gr_h: |
1756 | case Intrinsic::loongarch_lasx_xvpickve2gr_w: |
1757 | replaceVPICKVE2GRResults<3>(Node: N, Results, DAG, Subtarget, |
1758 | ResOp: LoongArchISD::VPICK_SEXT_ELT); |
1759 | break; |
1760 | case Intrinsic::loongarch_lsx_vpickve2gr_w: |
1761 | replaceVPICKVE2GRResults<2>(Node: N, Results, DAG, Subtarget, |
1762 | ResOp: LoongArchISD::VPICK_SEXT_ELT); |
1763 | break; |
1764 | case Intrinsic::loongarch_lsx_vpickve2gr_bu: |
1765 | replaceVPICKVE2GRResults<4>(Node: N, Results, DAG, Subtarget, |
1766 | ResOp: LoongArchISD::VPICK_ZEXT_ELT); |
1767 | break; |
1768 | case Intrinsic::loongarch_lsx_vpickve2gr_hu: |
1769 | case Intrinsic::loongarch_lasx_xvpickve2gr_wu: |
1770 | replaceVPICKVE2GRResults<3>(Node: N, Results, DAG, Subtarget, |
1771 | ResOp: LoongArchISD::VPICK_ZEXT_ELT); |
1772 | break; |
1773 | case Intrinsic::loongarch_lsx_vpickve2gr_wu: |
1774 | replaceVPICKVE2GRResults<2>(Node: N, Results, DAG, Subtarget, |
1775 | ResOp: LoongArchISD::VPICK_ZEXT_ELT); |
1776 | break; |
1777 | case Intrinsic::loongarch_lsx_bz_b: |
1778 | case Intrinsic::loongarch_lsx_bz_h: |
1779 | case Intrinsic::loongarch_lsx_bz_w: |
1780 | case Intrinsic::loongarch_lsx_bz_d: |
1781 | case Intrinsic::loongarch_lasx_xbz_b: |
1782 | case Intrinsic::loongarch_lasx_xbz_h: |
1783 | case Intrinsic::loongarch_lasx_xbz_w: |
1784 | case Intrinsic::loongarch_lasx_xbz_d: |
1785 | replaceVecCondBranchResults(N, Results, DAG, Subtarget, |
1786 | ResOp: LoongArchISD::VALL_ZERO); |
1787 | break; |
1788 | case Intrinsic::loongarch_lsx_bz_v: |
1789 | case Intrinsic::loongarch_lasx_xbz_v: |
1790 | replaceVecCondBranchResults(N, Results, DAG, Subtarget, |
1791 | ResOp: LoongArchISD::VANY_ZERO); |
1792 | break; |
1793 | case Intrinsic::loongarch_lsx_bnz_b: |
1794 | case Intrinsic::loongarch_lsx_bnz_h: |
1795 | case Intrinsic::loongarch_lsx_bnz_w: |
1796 | case Intrinsic::loongarch_lsx_bnz_d: |
1797 | case Intrinsic::loongarch_lasx_xbnz_b: |
1798 | case Intrinsic::loongarch_lasx_xbnz_h: |
1799 | case Intrinsic::loongarch_lasx_xbnz_w: |
1800 | case Intrinsic::loongarch_lasx_xbnz_d: |
1801 | replaceVecCondBranchResults(N, Results, DAG, Subtarget, |
1802 | ResOp: LoongArchISD::VALL_NONZERO); |
1803 | break; |
1804 | case Intrinsic::loongarch_lsx_bnz_v: |
1805 | case Intrinsic::loongarch_lasx_xbnz_v: |
1806 | replaceVecCondBranchResults(N, Results, DAG, Subtarget, |
1807 | ResOp: LoongArchISD::VANY_NONZERO); |
1808 | break; |
1809 | } |
1810 | } |
1811 | |
1812 | void LoongArchTargetLowering::ReplaceNodeResults( |
1813 | SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { |
1814 | SDLoc DL(N); |
1815 | EVT VT = N->getValueType(ResNo: 0); |
1816 | switch (N->getOpcode()) { |
1817 | default: |
1818 | llvm_unreachable("Don't know how to legalize this operation" ); |
1819 | case ISD::SHL: |
1820 | case ISD::SRA: |
1821 | case ISD::SRL: |
1822 | case ISD::ROTR: |
1823 | assert(VT == MVT::i32 && Subtarget.is64Bit() && |
1824 | "Unexpected custom legalisation" ); |
1825 | if (N->getOperand(Num: 1).getOpcode() != ISD::Constant) { |
1826 | Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2)); |
1827 | break; |
1828 | } |
1829 | break; |
1830 | case ISD::ROTL: |
1831 | ConstantSDNode *CN; |
1832 | if ((CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1)))) { |
1833 | Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2)); |
1834 | break; |
1835 | } |
1836 | break; |
1837 | case ISD::FP_TO_SINT: { |
1838 | assert(VT == MVT::i32 && Subtarget.is64Bit() && |
1839 | "Unexpected custom legalisation" ); |
1840 | SDValue Src = N->getOperand(Num: 0); |
1841 | EVT FVT = EVT::getFloatingPointVT(BitWidth: N->getValueSizeInBits(ResNo: 0)); |
1842 | if (getTypeAction(Context&: *DAG.getContext(), VT: Src.getValueType()) != |
1843 | TargetLowering::TypeSoftenFloat) { |
1844 | SDValue Dst = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FVT, Operand: Src); |
1845 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Dst)); |
1846 | return; |
1847 | } |
1848 | // If the FP type needs to be softened, emit a library call using the 'si' |
1849 | // version. If we left it to default legalization we'd end up with 'di'. |
1850 | RTLIB::Libcall LC; |
1851 | LC = RTLIB::getFPTOSINT(OpVT: Src.getValueType(), RetVT: VT); |
1852 | MakeLibCallOptions CallOptions; |
1853 | EVT OpVT = Src.getValueType(); |
1854 | CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: VT, Value: true); |
1855 | SDValue Chain = SDValue(); |
1856 | SDValue Result; |
1857 | std::tie(args&: Result, args&: Chain) = |
1858 | makeLibCall(DAG, LC, RetVT: VT, Ops: Src, CallOptions, dl: DL, Chain); |
1859 | Results.push_back(Elt: Result); |
1860 | break; |
1861 | } |
1862 | case ISD::BITCAST: { |
1863 | SDValue Src = N->getOperand(Num: 0); |
1864 | EVT SrcVT = Src.getValueType(); |
1865 | if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() && |
1866 | Subtarget.hasBasicF()) { |
1867 | SDValue Dst = |
1868 | DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src); |
1869 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst)); |
1870 | } |
1871 | break; |
1872 | } |
1873 | case ISD::FP_TO_UINT: { |
1874 | assert(VT == MVT::i32 && Subtarget.is64Bit() && |
1875 | "Unexpected custom legalisation" ); |
1876 | auto &TLI = DAG.getTargetLoweringInfo(); |
1877 | SDValue Tmp1, Tmp2; |
1878 | TLI.expandFP_TO_UINT(N, Result&: Tmp1, Chain&: Tmp2, DAG); |
1879 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1)); |
1880 | break; |
1881 | } |
1882 | case ISD::BSWAP: { |
1883 | SDValue Src = N->getOperand(Num: 0); |
1884 | assert((VT == MVT::i16 || VT == MVT::i32) && |
1885 | "Unexpected custom legalization" ); |
1886 | MVT GRLenVT = Subtarget.getGRLenVT(); |
1887 | SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src); |
1888 | SDValue Tmp; |
1889 | switch (VT.getSizeInBits()) { |
1890 | default: |
1891 | llvm_unreachable("Unexpected operand width" ); |
1892 | case 16: |
1893 | Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2H, DL, VT: GRLenVT, Operand: NewSrc); |
1894 | break; |
1895 | case 32: |
1896 | // Only LA64 will get to here due to the size mismatch between VT and |
1897 | // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo. |
1898 | Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2W, DL, VT: GRLenVT, Operand: NewSrc); |
1899 | break; |
1900 | } |
1901 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp)); |
1902 | break; |
1903 | } |
1904 | case ISD::BITREVERSE: { |
1905 | SDValue Src = N->getOperand(Num: 0); |
1906 | assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) && |
1907 | "Unexpected custom legalization" ); |
1908 | MVT GRLenVT = Subtarget.getGRLenVT(); |
1909 | SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src); |
1910 | SDValue Tmp; |
1911 | switch (VT.getSizeInBits()) { |
1912 | default: |
1913 | llvm_unreachable("Unexpected operand width" ); |
1914 | case 8: |
1915 | Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL, VT: GRLenVT, Operand: NewSrc); |
1916 | break; |
1917 | case 32: |
1918 | Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_W, DL, VT: GRLenVT, Operand: NewSrc); |
1919 | break; |
1920 | } |
1921 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp)); |
1922 | break; |
1923 | } |
1924 | case ISD::CTLZ: |
1925 | case ISD::CTTZ: { |
1926 | assert(VT == MVT::i32 && Subtarget.is64Bit() && |
1927 | "Unexpected custom legalisation" ); |
1928 | Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 1)); |
1929 | break; |
1930 | } |
1931 | case ISD::INTRINSIC_W_CHAIN: { |
1932 | SDValue Chain = N->getOperand(Num: 0); |
1933 | SDValue Op2 = N->getOperand(Num: 2); |
1934 | MVT GRLenVT = Subtarget.getGRLenVT(); |
1935 | const StringRef ErrorMsgOOR = "argument out of range" ; |
1936 | const StringRef ErrorMsgReqLA64 = "requires loongarch64" ; |
1937 | const StringRef ErrorMsgReqF = "requires basic 'f' target feature" ; |
1938 | |
1939 | switch (N->getConstantOperandVal(Num: 1)) { |
1940 | default: |
1941 | llvm_unreachable("Unexpected Intrinsic." ); |
1942 | case Intrinsic::loongarch_movfcsr2gr: { |
1943 | if (!Subtarget.hasBasicF()) { |
1944 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqF); |
1945 | return; |
1946 | } |
1947 | unsigned Imm = Op2->getAsZExtVal(); |
1948 | if (!isUInt<2>(x: Imm)) { |
1949 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR); |
1950 | return; |
1951 | } |
1952 | SDValue MOVFCSR2GRResults = DAG.getNode( |
1953 | LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other}, |
1954 | {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); |
1955 | Results.push_back( |
1956 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: MOVFCSR2GRResults.getValue(R: 0))); |
1957 | Results.push_back(Elt: MOVFCSR2GRResults.getValue(R: 1)); |
1958 | break; |
1959 | } |
1960 | #define CRC_CASE_EXT_BINARYOP(NAME, NODE) \ |
1961 | case Intrinsic::loongarch_##NAME: { \ |
1962 | SDValue NODE = DAG.getNode( \ |
1963 | LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ |
1964 | {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ |
1965 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ |
1966 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ |
1967 | Results.push_back(NODE.getValue(1)); \ |
1968 | break; \ |
1969 | } |
1970 | CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W) |
1971 | CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W) |
1972 | CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W) |
1973 | CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W) |
1974 | CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W) |
1975 | CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W) |
1976 | #undef CRC_CASE_EXT_BINARYOP |
1977 | |
1978 | #define CRC_CASE_EXT_UNARYOP(NAME, NODE) \ |
1979 | case Intrinsic::loongarch_##NAME: { \ |
1980 | SDValue NODE = DAG.getNode( \ |
1981 | LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ |
1982 | {Chain, Op2, \ |
1983 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ |
1984 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ |
1985 | Results.push_back(NODE.getValue(1)); \ |
1986 | break; \ |
1987 | } |
1988 | CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W) |
1989 | CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W) |
1990 | #undef CRC_CASE_EXT_UNARYOP |
1991 | #define CSR_CASE(ID) \ |
1992 | case Intrinsic::loongarch_##ID: { \ |
1993 | if (!Subtarget.is64Bit()) \ |
1994 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \ |
1995 | break; \ |
1996 | } |
1997 | CSR_CASE(csrrd_d); |
1998 | CSR_CASE(csrwr_d); |
1999 | CSR_CASE(csrxchg_d); |
2000 | CSR_CASE(iocsrrd_d); |
2001 | #undef CSR_CASE |
2002 | case Intrinsic::loongarch_csrrd_w: { |
2003 | unsigned Imm = Op2->getAsZExtVal(); |
2004 | if (!isUInt<14>(x: Imm)) { |
2005 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR); |
2006 | return; |
2007 | } |
2008 | SDValue CSRRDResults = |
2009 | DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other}, |
2010 | {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); |
2011 | Results.push_back( |
2012 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRRDResults.getValue(R: 0))); |
2013 | Results.push_back(Elt: CSRRDResults.getValue(R: 1)); |
2014 | break; |
2015 | } |
2016 | case Intrinsic::loongarch_csrwr_w: { |
2017 | unsigned Imm = N->getConstantOperandVal(Num: 3); |
2018 | if (!isUInt<14>(x: Imm)) { |
2019 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR); |
2020 | return; |
2021 | } |
2022 | SDValue CSRWRResults = |
2023 | DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other}, |
2024 | {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), |
2025 | DAG.getConstant(Imm, DL, GRLenVT)}); |
2026 | Results.push_back( |
2027 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRWRResults.getValue(R: 0))); |
2028 | Results.push_back(Elt: CSRWRResults.getValue(R: 1)); |
2029 | break; |
2030 | } |
2031 | case Intrinsic::loongarch_csrxchg_w: { |
2032 | unsigned Imm = N->getConstantOperandVal(Num: 4); |
2033 | if (!isUInt<14>(x: Imm)) { |
2034 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR); |
2035 | return; |
2036 | } |
2037 | SDValue CSRXCHGResults = DAG.getNode( |
2038 | LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other}, |
2039 | {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), |
2040 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)), |
2041 | DAG.getConstant(Imm, DL, GRLenVT)}); |
2042 | Results.push_back( |
2043 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRXCHGResults.getValue(R: 0))); |
2044 | Results.push_back(Elt: CSRXCHGResults.getValue(R: 1)); |
2045 | break; |
2046 | } |
2047 | #define IOCSRRD_CASE(NAME, NODE) \ |
2048 | case Intrinsic::loongarch_##NAME: { \ |
2049 | SDValue IOCSRRDResults = \ |
2050 | DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ |
2051 | {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \ |
2052 | Results.push_back( \ |
2053 | DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \ |
2054 | Results.push_back(IOCSRRDResults.getValue(1)); \ |
2055 | break; \ |
2056 | } |
2057 | IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); |
2058 | IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H); |
2059 | IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); |
2060 | #undef IOCSRRD_CASE |
2061 | case Intrinsic::loongarch_cpucfg: { |
2062 | SDValue CPUCFGResults = |
2063 | DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other}, |
2064 | {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); |
2065 | Results.push_back( |
2066 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CPUCFGResults.getValue(R: 0))); |
2067 | Results.push_back(Elt: CPUCFGResults.getValue(R: 1)); |
2068 | break; |
2069 | } |
2070 | case Intrinsic::loongarch_lddir_d: { |
2071 | if (!Subtarget.is64Bit()) { |
2072 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqLA64); |
2073 | return; |
2074 | } |
2075 | break; |
2076 | } |
2077 | } |
2078 | break; |
2079 | } |
2080 | case ISD::READ_REGISTER: { |
2081 | if (Subtarget.is64Bit()) |
2082 | DAG.getContext()->emitError( |
2083 | ErrorStr: "On LA64, only 64-bit registers can be read." ); |
2084 | else |
2085 | DAG.getContext()->emitError( |
2086 | ErrorStr: "On LA32, only 32-bit registers can be read." ); |
2087 | Results.push_back(Elt: DAG.getUNDEF(VT)); |
2088 | Results.push_back(Elt: N->getOperand(Num: 0)); |
2089 | break; |
2090 | } |
2091 | case ISD::INTRINSIC_WO_CHAIN: { |
2092 | replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget); |
2093 | break; |
2094 | } |
2095 | } |
2096 | } |
2097 | |
2098 | static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, |
2099 | TargetLowering::DAGCombinerInfo &DCI, |
2100 | const LoongArchSubtarget &Subtarget) { |
2101 | if (DCI.isBeforeLegalizeOps()) |
2102 | return SDValue(); |
2103 | |
2104 | SDValue FirstOperand = N->getOperand(Num: 0); |
2105 | SDValue SecondOperand = N->getOperand(Num: 1); |
2106 | unsigned FirstOperandOpc = FirstOperand.getOpcode(); |
2107 | EVT ValTy = N->getValueType(ResNo: 0); |
2108 | SDLoc DL(N); |
2109 | uint64_t lsb, msb; |
2110 | unsigned SMIdx, SMLen; |
2111 | ConstantSDNode *CN; |
2112 | SDValue NewOperand; |
2113 | MVT GRLenVT = Subtarget.getGRLenVT(); |
2114 | |
2115 | // Op's second operand must be a shifted mask. |
2116 | if (!(CN = dyn_cast<ConstantSDNode>(Val&: SecondOperand)) || |
2117 | !isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx&: SMIdx, MaskLen&: SMLen)) |
2118 | return SDValue(); |
2119 | |
2120 | if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) { |
2121 | // Pattern match BSTRPICK. |
2122 | // $dst = and ((sra or srl) $src , lsb), (2**len - 1) |
2123 | // => BSTRPICK $dst, $src, msb, lsb |
2124 | // where msb = lsb + len - 1 |
2125 | |
2126 | // The second operand of the shift must be an immediate. |
2127 | if (!(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: 1)))) |
2128 | return SDValue(); |
2129 | |
2130 | lsb = CN->getZExtValue(); |
2131 | |
2132 | // Return if the shifted mask does not start at bit 0 or the sum of its |
2133 | // length and lsb exceeds the word's size. |
2134 | if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits()) |
2135 | return SDValue(); |
2136 | |
2137 | NewOperand = FirstOperand.getOperand(i: 0); |
2138 | } else { |
2139 | // Pattern match BSTRPICK. |
2140 | // $dst = and $src, (2**len- 1) , if len > 12 |
2141 | // => BSTRPICK $dst, $src, msb, lsb |
2142 | // where lsb = 0 and msb = len - 1 |
2143 | |
2144 | // If the mask is <= 0xfff, andi can be used instead. |
2145 | if (CN->getZExtValue() <= 0xfff) |
2146 | return SDValue(); |
2147 | |
2148 | // Return if the MSB exceeds. |
2149 | if (SMIdx + SMLen > ValTy.getSizeInBits()) |
2150 | return SDValue(); |
2151 | |
2152 | if (SMIdx > 0) { |
2153 | // Omit if the constant has more than 2 uses. This a conservative |
2154 | // decision. Whether it is a win depends on the HW microarchitecture. |
2155 | // However it should always be better for 1 and 2 uses. |
2156 | if (CN->use_size() > 2) |
2157 | return SDValue(); |
2158 | // Return if the constant can be composed by a single LU12I.W. |
2159 | if ((CN->getZExtValue() & 0xfff) == 0) |
2160 | return SDValue(); |
2161 | // Return if the constand can be composed by a single ADDI with |
2162 | // the zero register. |
2163 | if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0) |
2164 | return SDValue(); |
2165 | } |
2166 | |
2167 | lsb = SMIdx; |
2168 | NewOperand = FirstOperand; |
2169 | } |
2170 | |
2171 | msb = lsb + SMLen - 1; |
2172 | SDValue NR0 = DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy, N1: NewOperand, |
2173 | N2: DAG.getConstant(Val: msb, DL, VT: GRLenVT), |
2174 | N3: DAG.getConstant(Val: lsb, DL, VT: GRLenVT)); |
2175 | if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0) |
2176 | return NR0; |
2177 | // Try to optimize to |
2178 | // bstrpick $Rd, $Rs, msb, lsb |
2179 | // slli $Rd, $Rd, lsb |
2180 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: ValTy, N1: NR0, |
2181 | N2: DAG.getConstant(Val: lsb, DL, VT: GRLenVT)); |
2182 | } |
2183 | |
2184 | static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, |
2185 | TargetLowering::DAGCombinerInfo &DCI, |
2186 | const LoongArchSubtarget &Subtarget) { |
2187 | if (DCI.isBeforeLegalizeOps()) |
2188 | return SDValue(); |
2189 | |
2190 | // $dst = srl (and $src, Mask), Shamt |
2191 | // => |
2192 | // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt |
2193 | // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1 |
2194 | // |
2195 | |
2196 | SDValue FirstOperand = N->getOperand(Num: 0); |
2197 | ConstantSDNode *CN; |
2198 | EVT ValTy = N->getValueType(ResNo: 0); |
2199 | SDLoc DL(N); |
2200 | MVT GRLenVT = Subtarget.getGRLenVT(); |
2201 | unsigned MaskIdx, MaskLen; |
2202 | uint64_t Shamt; |
2203 | |
2204 | // The first operand must be an AND and the second operand of the AND must be |
2205 | // a shifted mask. |
2206 | if (FirstOperand.getOpcode() != ISD::AND || |
2207 | !(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: 1))) || |
2208 | !isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx, MaskLen)) |
2209 | return SDValue(); |
2210 | |
2211 | // The second operand (shift amount) must be an immediate. |
2212 | if (!(CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1)))) |
2213 | return SDValue(); |
2214 | |
2215 | Shamt = CN->getZExtValue(); |
2216 | if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1) |
2217 | return DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy, |
2218 | N1: FirstOperand->getOperand(Num: 0), |
2219 | N2: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT), |
2220 | N3: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT)); |
2221 | |
2222 | return SDValue(); |
2223 | } |
2224 | |
2225 | static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, |
2226 | TargetLowering::DAGCombinerInfo &DCI, |
2227 | const LoongArchSubtarget &Subtarget) { |
2228 | MVT GRLenVT = Subtarget.getGRLenVT(); |
2229 | EVT ValTy = N->getValueType(ResNo: 0); |
2230 | SDValue N0 = N->getOperand(Num: 0), N1 = N->getOperand(Num: 1); |
2231 | ConstantSDNode *CN0, *CN1; |
2232 | SDLoc DL(N); |
2233 | unsigned ValBits = ValTy.getSizeInBits(); |
2234 | unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1; |
2235 | unsigned Shamt; |
2236 | bool SwapAndRetried = false; |
2237 | |
2238 | if (DCI.isBeforeLegalizeOps()) |
2239 | return SDValue(); |
2240 | |
2241 | if (ValBits != 32 && ValBits != 64) |
2242 | return SDValue(); |
2243 | |
2244 | Retry: |
2245 | // 1st pattern to match BSTRINS: |
2246 | // R = or (and X, mask0), (and (shl Y, lsb), mask1) |
2247 | // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1 |
2248 | // => |
2249 | // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) |
2250 | if (N0.getOpcode() == ISD::AND && |
2251 | (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) && |
2252 | isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) && |
2253 | N1.getOpcode() == ISD::AND && N1.getOperand(i: 0).getOpcode() == ISD::SHL && |
2254 | (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) && |
2255 | isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) && |
2256 | MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 && |
2257 | (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) && |
2258 | (Shamt = CN1->getZExtValue()) == MaskIdx0 && |
2259 | (MaskIdx0 + MaskLen0 <= ValBits)) { |
2260 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n" ); |
2261 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0), |
2262 | N2: N1.getOperand(i: 0).getOperand(i: 0), |
2263 | N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - 1), DL, VT: GRLenVT), |
2264 | N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)); |
2265 | } |
2266 | |
2267 | // 2nd pattern to match BSTRINS: |
2268 | // R = or (and X, mask0), (shl (and Y, mask1), lsb) |
2269 | // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb) |
2270 | // => |
2271 | // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) |
2272 | if (N0.getOpcode() == ISD::AND && |
2273 | (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) && |
2274 | isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) && |
2275 | N1.getOpcode() == ISD::SHL && N1.getOperand(i: 0).getOpcode() == ISD::AND && |
2276 | (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) && |
2277 | (Shamt = CN1->getZExtValue()) == MaskIdx0 && |
2278 | (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) && |
2279 | isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) && |
2280 | MaskLen0 == MaskLen1 && MaskIdx1 == 0 && |
2281 | (MaskIdx0 + MaskLen0 <= ValBits)) { |
2282 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n" ); |
2283 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0), |
2284 | N2: N1.getOperand(i: 0).getOperand(i: 0), |
2285 | N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - 1), DL, VT: GRLenVT), |
2286 | N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)); |
2287 | } |
2288 | |
2289 | // 3rd pattern to match BSTRINS: |
2290 | // R = or (and X, mask0), (and Y, mask1) |
2291 | // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0 |
2292 | // => |
2293 | // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb |
2294 | // where msb = lsb + size - 1 |
2295 | if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && |
2296 | (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) && |
2297 | isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) && |
2298 | (MaskIdx0 + MaskLen0 <= 64) && |
2299 | (CN1 = dyn_cast<ConstantSDNode>(Val: N1->getOperand(Num: 1))) && |
2300 | (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { |
2301 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n" ); |
2302 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0), |
2303 | N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1->getValueType(ResNo: 0), N1, |
2304 | N2: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)), |
2305 | N3: DAG.getConstant(Val: ValBits == 32 |
2306 | ? (MaskIdx0 + (MaskLen0 & 31) - 1) |
2307 | : (MaskIdx0 + MaskLen0 - 1), |
2308 | DL, VT: GRLenVT), |
2309 | N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)); |
2310 | } |
2311 | |
2312 | // 4th pattern to match BSTRINS: |
2313 | // R = or (and X, mask), (shl Y, shamt) |
2314 | // where mask = (2**shamt - 1) |
2315 | // => |
2316 | // R = BSTRINS X, Y, ValBits - 1, shamt |
2317 | // where ValBits = 32 or 64 |
2318 | if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL && |
2319 | (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) && |
2320 | isShiftedMask_64(Value: CN0->getZExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) && |
2321 | MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) && |
2322 | (Shamt = CN1->getZExtValue()) == MaskLen0 && |
2323 | (MaskIdx0 + MaskLen0 <= ValBits)) { |
2324 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n" ); |
2325 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0), |
2326 | N2: N1.getOperand(i: 0), |
2327 | N3: DAG.getConstant(Val: (ValBits - 1), DL, VT: GRLenVT), |
2328 | N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT)); |
2329 | } |
2330 | |
2331 | // 5th pattern to match BSTRINS: |
2332 | // R = or (and X, mask), const |
2333 | // where ~mask = (2**size - 1) << lsb, mask & const = 0 |
2334 | // => |
2335 | // R = BSTRINS X, (const >> lsb), msb, lsb |
2336 | // where msb = lsb + size - 1 |
2337 | if (N0.getOpcode() == ISD::AND && |
2338 | (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) && |
2339 | isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) && |
2340 | (CN1 = dyn_cast<ConstantSDNode>(Val&: N1)) && |
2341 | (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { |
2342 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n" ); |
2343 | return DAG.getNode( |
2344 | Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0), |
2345 | N2: DAG.getConstant(Val: CN1->getSExtValue() >> MaskIdx0, DL, VT: ValTy), |
2346 | N3: DAG.getConstant(Val: ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1) |
2347 | : (MaskIdx0 + MaskLen0 - 1), |
2348 | DL, VT: GRLenVT), |
2349 | N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)); |
2350 | } |
2351 | |
2352 | // 6th pattern. |
2353 | // a = b | ((c & mask) << shamt), where all positions in b to be overwritten |
2354 | // by the incoming bits are known to be zero. |
2355 | // => |
2356 | // a = BSTRINS b, c, shamt + MaskLen - 1, shamt |
2357 | // |
2358 | // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th |
2359 | // pattern is more common than the 1st. So we put the 1st before the 6th in |
2360 | // order to match as many nodes as possible. |
2361 | ConstantSDNode *CNMask, *CNShamt; |
2362 | unsigned MaskIdx, MaskLen; |
2363 | if (N1.getOpcode() == ISD::SHL && N1.getOperand(i: 0).getOpcode() == ISD::AND && |
2364 | (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) && |
2365 | isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) && |
2366 | MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) && |
2367 | CNShamt->getZExtValue() + MaskLen <= ValBits) { |
2368 | Shamt = CNShamt->getZExtValue(); |
2369 | APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt); |
2370 | if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) { |
2371 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n" ); |
2372 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0, |
2373 | N2: N1.getOperand(i: 0).getOperand(i: 0), |
2374 | N3: DAG.getConstant(Val: Shamt + MaskLen - 1, DL, VT: GRLenVT), |
2375 | N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT)); |
2376 | } |
2377 | } |
2378 | |
2379 | // 7th pattern. |
2380 | // a = b | ((c << shamt) & shifted_mask), where all positions in b to be |
2381 | // overwritten by the incoming bits are known to be zero. |
2382 | // => |
2383 | // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx |
2384 | // |
2385 | // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd |
2386 | // before the 7th in order to match as many nodes as possible. |
2387 | if (N1.getOpcode() == ISD::AND && |
2388 | (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) && |
2389 | isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) && |
2390 | N1.getOperand(i: 0).getOpcode() == ISD::SHL && |
2391 | (CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) && |
2392 | CNShamt->getZExtValue() == MaskIdx) { |
2393 | APInt ShMask(ValBits, CNMask->getZExtValue()); |
2394 | if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) { |
2395 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n" ); |
2396 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0, |
2397 | N2: N1.getOperand(i: 0).getOperand(i: 0), |
2398 | N3: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT), |
2399 | N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT)); |
2400 | } |
2401 | } |
2402 | |
2403 | // (or a, b) and (or b, a) are equivalent, so swap the operands and retry. |
2404 | if (!SwapAndRetried) { |
2405 | std::swap(a&: N0, b&: N1); |
2406 | SwapAndRetried = true; |
2407 | goto Retry; |
2408 | } |
2409 | |
2410 | SwapAndRetried = false; |
2411 | Retry2: |
2412 | // 8th pattern. |
2413 | // a = b | (c & shifted_mask), where all positions in b to be overwritten by |
2414 | // the incoming bits are known to be zero. |
2415 | // => |
2416 | // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx |
2417 | // |
2418 | // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So |
2419 | // we put it here in order to match as many nodes as possible or generate less |
2420 | // instructions. |
2421 | if (N1.getOpcode() == ISD::AND && |
2422 | (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) && |
2423 | isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen)) { |
2424 | APInt ShMask(ValBits, CNMask->getZExtValue()); |
2425 | if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) { |
2426 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n" ); |
2427 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0, |
2428 | N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1->getValueType(ResNo: 0), |
2429 | N1: N1->getOperand(Num: 0), |
2430 | N2: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT)), |
2431 | N3: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT), |
2432 | N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT)); |
2433 | } |
2434 | } |
2435 | // Swap N0/N1 and retry. |
2436 | if (!SwapAndRetried) { |
2437 | std::swap(a&: N0, b&: N1); |
2438 | SwapAndRetried = true; |
2439 | goto Retry2; |
2440 | } |
2441 | |
2442 | return SDValue(); |
2443 | } |
2444 | |
2445 | // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b. |
2446 | static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, |
2447 | TargetLowering::DAGCombinerInfo &DCI, |
2448 | const LoongArchSubtarget &Subtarget) { |
2449 | if (DCI.isBeforeLegalizeOps()) |
2450 | return SDValue(); |
2451 | |
2452 | SDValue Src = N->getOperand(Num: 0); |
2453 | if (Src.getOpcode() != LoongArchISD::REVB_2W) |
2454 | return SDValue(); |
2455 | |
2456 | return DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
2457 | Operand: Src.getOperand(i: 0)); |
2458 | } |
2459 | |
2460 | template <unsigned N> |
2461 | static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, |
2462 | SelectionDAG &DAG, |
2463 | const LoongArchSubtarget &Subtarget, |
2464 | bool IsSigned = false) { |
2465 | SDLoc DL(Node); |
2466 | auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp)); |
2467 | // Check the ImmArg. |
2468 | if ((IsSigned && !isInt<N>(CImm->getSExtValue())) || |
2469 | (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) { |
2470 | DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) + |
2471 | ": argument out of range." ); |
2472 | return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: Subtarget.getGRLenVT()); |
2473 | } |
2474 | return DAG.getConstant(Val: CImm->getZExtValue(), DL, VT: Subtarget.getGRLenVT()); |
2475 | } |
2476 | |
2477 | template <unsigned N> |
2478 | static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, |
2479 | SelectionDAG &DAG, bool IsSigned = false) { |
2480 | SDLoc DL(Node); |
2481 | EVT ResTy = Node->getValueType(ResNo: 0); |
2482 | auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp)); |
2483 | |
2484 | // Check the ImmArg. |
2485 | if ((IsSigned && !isInt<N>(CImm->getSExtValue())) || |
2486 | (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) { |
2487 | DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) + |
2488 | ": argument out of range." ); |
2489 | return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy); |
2490 | } |
2491 | return DAG.getConstant( |
2492 | Val: APInt(ResTy.getScalarType().getSizeInBits(), |
2493 | IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned), |
2494 | DL, VT: ResTy); |
2495 | } |
2496 | |
2497 | static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) { |
2498 | SDLoc DL(Node); |
2499 | EVT ResTy = Node->getValueType(ResNo: 0); |
2500 | SDValue Vec = Node->getOperand(Num: 2); |
2501 | SDValue Mask = DAG.getConstant(Val: Vec.getScalarValueSizeInBits() - 1, DL, VT: ResTy); |
2502 | return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Vec, N2: Mask); |
2503 | } |
2504 | |
2505 | static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) { |
2506 | SDLoc DL(Node); |
2507 | EVT ResTy = Node->getValueType(ResNo: 0); |
2508 | SDValue One = DAG.getConstant(Val: 1, DL, VT: ResTy); |
2509 | SDValue Bit = |
2510 | DAG.getNode(Opcode: ISD::SHL, DL, VT: ResTy, N1: One, N2: truncateVecElts(Node, DAG)); |
2511 | |
2512 | return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: 1), |
2513 | N2: DAG.getNOT(DL, Val: Bit, VT: ResTy)); |
2514 | } |
2515 | |
2516 | template <unsigned N> |
2517 | static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) { |
2518 | SDLoc DL(Node); |
2519 | EVT ResTy = Node->getValueType(ResNo: 0); |
2520 | auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2)); |
2521 | // Check the unsigned ImmArg. |
2522 | if (!isUInt<N>(CImm->getZExtValue())) { |
2523 | DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) + |
2524 | ": argument out of range." ); |
2525 | return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy); |
2526 | } |
2527 | |
2528 | APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); |
2529 | SDValue Mask = DAG.getConstant(Val: ~BitImm, DL, VT: ResTy); |
2530 | |
2531 | return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: Mask); |
2532 | } |
2533 | |
2534 | template <unsigned N> |
2535 | static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) { |
2536 | SDLoc DL(Node); |
2537 | EVT ResTy = Node->getValueType(ResNo: 0); |
2538 | auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2)); |
2539 | // Check the unsigned ImmArg. |
2540 | if (!isUInt<N>(CImm->getZExtValue())) { |
2541 | DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) + |
2542 | ": argument out of range." ); |
2543 | return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy); |
2544 | } |
2545 | |
2546 | APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); |
2547 | SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy); |
2548 | return DAG.getNode(Opcode: ISD::OR, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: BitImm); |
2549 | } |
2550 | |
2551 | template <unsigned N> |
2552 | static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) { |
2553 | SDLoc DL(Node); |
2554 | EVT ResTy = Node->getValueType(ResNo: 0); |
2555 | auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2)); |
2556 | // Check the unsigned ImmArg. |
2557 | if (!isUInt<N>(CImm->getZExtValue())) { |
2558 | DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) + |
2559 | ": argument out of range." ); |
2560 | return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy); |
2561 | } |
2562 | |
2563 | APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); |
2564 | SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy); |
2565 | return DAG.getNode(Opcode: ISD::XOR, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: BitImm); |
2566 | } |
2567 | |
2568 | static SDValue |
2569 | performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, |
2570 | TargetLowering::DAGCombinerInfo &DCI, |
2571 | const LoongArchSubtarget &Subtarget) { |
2572 | SDLoc DL(N); |
2573 | switch (N->getConstantOperandVal(Num: 0)) { |
2574 | default: |
2575 | break; |
2576 | case Intrinsic::loongarch_lsx_vadd_b: |
2577 | case Intrinsic::loongarch_lsx_vadd_h: |
2578 | case Intrinsic::loongarch_lsx_vadd_w: |
2579 | case Intrinsic::loongarch_lsx_vadd_d: |
2580 | case Intrinsic::loongarch_lasx_xvadd_b: |
2581 | case Intrinsic::loongarch_lasx_xvadd_h: |
2582 | case Intrinsic::loongarch_lasx_xvadd_w: |
2583 | case Intrinsic::loongarch_lasx_xvadd_d: |
2584 | return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2585 | N2: N->getOperand(Num: 2)); |
2586 | case Intrinsic::loongarch_lsx_vaddi_bu: |
2587 | case Intrinsic::loongarch_lsx_vaddi_hu: |
2588 | case Intrinsic::loongarch_lsx_vaddi_wu: |
2589 | case Intrinsic::loongarch_lsx_vaddi_du: |
2590 | case Intrinsic::loongarch_lasx_xvaddi_bu: |
2591 | case Intrinsic::loongarch_lasx_xvaddi_hu: |
2592 | case Intrinsic::loongarch_lasx_xvaddi_wu: |
2593 | case Intrinsic::loongarch_lasx_xvaddi_du: |
2594 | return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2595 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
2596 | case Intrinsic::loongarch_lsx_vsub_b: |
2597 | case Intrinsic::loongarch_lsx_vsub_h: |
2598 | case Intrinsic::loongarch_lsx_vsub_w: |
2599 | case Intrinsic::loongarch_lsx_vsub_d: |
2600 | case Intrinsic::loongarch_lasx_xvsub_b: |
2601 | case Intrinsic::loongarch_lasx_xvsub_h: |
2602 | case Intrinsic::loongarch_lasx_xvsub_w: |
2603 | case Intrinsic::loongarch_lasx_xvsub_d: |
2604 | return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2605 | N2: N->getOperand(Num: 2)); |
2606 | case Intrinsic::loongarch_lsx_vsubi_bu: |
2607 | case Intrinsic::loongarch_lsx_vsubi_hu: |
2608 | case Intrinsic::loongarch_lsx_vsubi_wu: |
2609 | case Intrinsic::loongarch_lsx_vsubi_du: |
2610 | case Intrinsic::loongarch_lasx_xvsubi_bu: |
2611 | case Intrinsic::loongarch_lasx_xvsubi_hu: |
2612 | case Intrinsic::loongarch_lasx_xvsubi_wu: |
2613 | case Intrinsic::loongarch_lasx_xvsubi_du: |
2614 | return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2615 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
2616 | case Intrinsic::loongarch_lsx_vneg_b: |
2617 | case Intrinsic::loongarch_lsx_vneg_h: |
2618 | case Intrinsic::loongarch_lsx_vneg_w: |
2619 | case Intrinsic::loongarch_lsx_vneg_d: |
2620 | case Intrinsic::loongarch_lasx_xvneg_b: |
2621 | case Intrinsic::loongarch_lasx_xvneg_h: |
2622 | case Intrinsic::loongarch_lasx_xvneg_w: |
2623 | case Intrinsic::loongarch_lasx_xvneg_d: |
2624 | return DAG.getNode( |
2625 | Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0), |
2626 | N1: DAG.getConstant( |
2627 | Val: APInt(N->getValueType(ResNo: 0).getScalarType().getSizeInBits(), 0, |
2628 | /*isSigned=*/true), |
2629 | DL: SDLoc(N), VT: N->getValueType(ResNo: 0)), |
2630 | N2: N->getOperand(Num: 1)); |
2631 | case Intrinsic::loongarch_lsx_vmax_b: |
2632 | case Intrinsic::loongarch_lsx_vmax_h: |
2633 | case Intrinsic::loongarch_lsx_vmax_w: |
2634 | case Intrinsic::loongarch_lsx_vmax_d: |
2635 | case Intrinsic::loongarch_lasx_xvmax_b: |
2636 | case Intrinsic::loongarch_lasx_xvmax_h: |
2637 | case Intrinsic::loongarch_lasx_xvmax_w: |
2638 | case Intrinsic::loongarch_lasx_xvmax_d: |
2639 | return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2640 | N2: N->getOperand(Num: 2)); |
2641 | case Intrinsic::loongarch_lsx_vmax_bu: |
2642 | case Intrinsic::loongarch_lsx_vmax_hu: |
2643 | case Intrinsic::loongarch_lsx_vmax_wu: |
2644 | case Intrinsic::loongarch_lsx_vmax_du: |
2645 | case Intrinsic::loongarch_lasx_xvmax_bu: |
2646 | case Intrinsic::loongarch_lasx_xvmax_hu: |
2647 | case Intrinsic::loongarch_lasx_xvmax_wu: |
2648 | case Intrinsic::loongarch_lasx_xvmax_du: |
2649 | return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2650 | N2: N->getOperand(Num: 2)); |
2651 | case Intrinsic::loongarch_lsx_vmaxi_b: |
2652 | case Intrinsic::loongarch_lsx_vmaxi_h: |
2653 | case Intrinsic::loongarch_lsx_vmaxi_w: |
2654 | case Intrinsic::loongarch_lsx_vmaxi_d: |
2655 | case Intrinsic::loongarch_lasx_xvmaxi_b: |
2656 | case Intrinsic::loongarch_lasx_xvmaxi_h: |
2657 | case Intrinsic::loongarch_lasx_xvmaxi_w: |
2658 | case Intrinsic::loongarch_lasx_xvmaxi_d: |
2659 | return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2660 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG, /*IsSigned=*/true)); |
2661 | case Intrinsic::loongarch_lsx_vmaxi_bu: |
2662 | case Intrinsic::loongarch_lsx_vmaxi_hu: |
2663 | case Intrinsic::loongarch_lsx_vmaxi_wu: |
2664 | case Intrinsic::loongarch_lsx_vmaxi_du: |
2665 | case Intrinsic::loongarch_lasx_xvmaxi_bu: |
2666 | case Intrinsic::loongarch_lasx_xvmaxi_hu: |
2667 | case Intrinsic::loongarch_lasx_xvmaxi_wu: |
2668 | case Intrinsic::loongarch_lasx_xvmaxi_du: |
2669 | return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2670 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
2671 | case Intrinsic::loongarch_lsx_vmin_b: |
2672 | case Intrinsic::loongarch_lsx_vmin_h: |
2673 | case Intrinsic::loongarch_lsx_vmin_w: |
2674 | case Intrinsic::loongarch_lsx_vmin_d: |
2675 | case Intrinsic::loongarch_lasx_xvmin_b: |
2676 | case Intrinsic::loongarch_lasx_xvmin_h: |
2677 | case Intrinsic::loongarch_lasx_xvmin_w: |
2678 | case Intrinsic::loongarch_lasx_xvmin_d: |
2679 | return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2680 | N2: N->getOperand(Num: 2)); |
2681 | case Intrinsic::loongarch_lsx_vmin_bu: |
2682 | case Intrinsic::loongarch_lsx_vmin_hu: |
2683 | case Intrinsic::loongarch_lsx_vmin_wu: |
2684 | case Intrinsic::loongarch_lsx_vmin_du: |
2685 | case Intrinsic::loongarch_lasx_xvmin_bu: |
2686 | case Intrinsic::loongarch_lasx_xvmin_hu: |
2687 | case Intrinsic::loongarch_lasx_xvmin_wu: |
2688 | case Intrinsic::loongarch_lasx_xvmin_du: |
2689 | return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2690 | N2: N->getOperand(Num: 2)); |
2691 | case Intrinsic::loongarch_lsx_vmini_b: |
2692 | case Intrinsic::loongarch_lsx_vmini_h: |
2693 | case Intrinsic::loongarch_lsx_vmini_w: |
2694 | case Intrinsic::loongarch_lsx_vmini_d: |
2695 | case Intrinsic::loongarch_lasx_xvmini_b: |
2696 | case Intrinsic::loongarch_lasx_xvmini_h: |
2697 | case Intrinsic::loongarch_lasx_xvmini_w: |
2698 | case Intrinsic::loongarch_lasx_xvmini_d: |
2699 | return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2700 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG, /*IsSigned=*/true)); |
2701 | case Intrinsic::loongarch_lsx_vmini_bu: |
2702 | case Intrinsic::loongarch_lsx_vmini_hu: |
2703 | case Intrinsic::loongarch_lsx_vmini_wu: |
2704 | case Intrinsic::loongarch_lsx_vmini_du: |
2705 | case Intrinsic::loongarch_lasx_xvmini_bu: |
2706 | case Intrinsic::loongarch_lasx_xvmini_hu: |
2707 | case Intrinsic::loongarch_lasx_xvmini_wu: |
2708 | case Intrinsic::loongarch_lasx_xvmini_du: |
2709 | return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2710 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
2711 | case Intrinsic::loongarch_lsx_vmul_b: |
2712 | case Intrinsic::loongarch_lsx_vmul_h: |
2713 | case Intrinsic::loongarch_lsx_vmul_w: |
2714 | case Intrinsic::loongarch_lsx_vmul_d: |
2715 | case Intrinsic::loongarch_lasx_xvmul_b: |
2716 | case Intrinsic::loongarch_lasx_xvmul_h: |
2717 | case Intrinsic::loongarch_lasx_xvmul_w: |
2718 | case Intrinsic::loongarch_lasx_xvmul_d: |
2719 | return DAG.getNode(Opcode: ISD::MUL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2720 | N2: N->getOperand(Num: 2)); |
2721 | case Intrinsic::loongarch_lsx_vmadd_b: |
2722 | case Intrinsic::loongarch_lsx_vmadd_h: |
2723 | case Intrinsic::loongarch_lsx_vmadd_w: |
2724 | case Intrinsic::loongarch_lsx_vmadd_d: |
2725 | case Intrinsic::loongarch_lasx_xvmadd_b: |
2726 | case Intrinsic::loongarch_lasx_xvmadd_h: |
2727 | case Intrinsic::loongarch_lasx_xvmadd_w: |
2728 | case Intrinsic::loongarch_lasx_xvmadd_d: { |
2729 | EVT ResTy = N->getValueType(ResNo: 0); |
2730 | return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 1), |
2731 | N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 2), |
2732 | N2: N->getOperand(Num: 3))); |
2733 | } |
2734 | case Intrinsic::loongarch_lsx_vmsub_b: |
2735 | case Intrinsic::loongarch_lsx_vmsub_h: |
2736 | case Intrinsic::loongarch_lsx_vmsub_w: |
2737 | case Intrinsic::loongarch_lsx_vmsub_d: |
2738 | case Intrinsic::loongarch_lasx_xvmsub_b: |
2739 | case Intrinsic::loongarch_lasx_xvmsub_h: |
2740 | case Intrinsic::loongarch_lasx_xvmsub_w: |
2741 | case Intrinsic::loongarch_lasx_xvmsub_d: { |
2742 | EVT ResTy = N->getValueType(ResNo: 0); |
2743 | return DAG.getNode(Opcode: ISD::SUB, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 1), |
2744 | N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 2), |
2745 | N2: N->getOperand(Num: 3))); |
2746 | } |
2747 | case Intrinsic::loongarch_lsx_vdiv_b: |
2748 | case Intrinsic::loongarch_lsx_vdiv_h: |
2749 | case Intrinsic::loongarch_lsx_vdiv_w: |
2750 | case Intrinsic::loongarch_lsx_vdiv_d: |
2751 | case Intrinsic::loongarch_lasx_xvdiv_b: |
2752 | case Intrinsic::loongarch_lasx_xvdiv_h: |
2753 | case Intrinsic::loongarch_lasx_xvdiv_w: |
2754 | case Intrinsic::loongarch_lasx_xvdiv_d: |
2755 | return DAG.getNode(Opcode: ISD::SDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2756 | N2: N->getOperand(Num: 2)); |
2757 | case Intrinsic::loongarch_lsx_vdiv_bu: |
2758 | case Intrinsic::loongarch_lsx_vdiv_hu: |
2759 | case Intrinsic::loongarch_lsx_vdiv_wu: |
2760 | case Intrinsic::loongarch_lsx_vdiv_du: |
2761 | case Intrinsic::loongarch_lasx_xvdiv_bu: |
2762 | case Intrinsic::loongarch_lasx_xvdiv_hu: |
2763 | case Intrinsic::loongarch_lasx_xvdiv_wu: |
2764 | case Intrinsic::loongarch_lasx_xvdiv_du: |
2765 | return DAG.getNode(Opcode: ISD::UDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2766 | N2: N->getOperand(Num: 2)); |
2767 | case Intrinsic::loongarch_lsx_vmod_b: |
2768 | case Intrinsic::loongarch_lsx_vmod_h: |
2769 | case Intrinsic::loongarch_lsx_vmod_w: |
2770 | case Intrinsic::loongarch_lsx_vmod_d: |
2771 | case Intrinsic::loongarch_lasx_xvmod_b: |
2772 | case Intrinsic::loongarch_lasx_xvmod_h: |
2773 | case Intrinsic::loongarch_lasx_xvmod_w: |
2774 | case Intrinsic::loongarch_lasx_xvmod_d: |
2775 | return DAG.getNode(Opcode: ISD::SREM, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2776 | N2: N->getOperand(Num: 2)); |
2777 | case Intrinsic::loongarch_lsx_vmod_bu: |
2778 | case Intrinsic::loongarch_lsx_vmod_hu: |
2779 | case Intrinsic::loongarch_lsx_vmod_wu: |
2780 | case Intrinsic::loongarch_lsx_vmod_du: |
2781 | case Intrinsic::loongarch_lasx_xvmod_bu: |
2782 | case Intrinsic::loongarch_lasx_xvmod_hu: |
2783 | case Intrinsic::loongarch_lasx_xvmod_wu: |
2784 | case Intrinsic::loongarch_lasx_xvmod_du: |
2785 | return DAG.getNode(Opcode: ISD::UREM, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2786 | N2: N->getOperand(Num: 2)); |
2787 | case Intrinsic::loongarch_lsx_vand_v: |
2788 | case Intrinsic::loongarch_lasx_xvand_v: |
2789 | return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2790 | N2: N->getOperand(Num: 2)); |
2791 | case Intrinsic::loongarch_lsx_vor_v: |
2792 | case Intrinsic::loongarch_lasx_xvor_v: |
2793 | return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2794 | N2: N->getOperand(Num: 2)); |
2795 | case Intrinsic::loongarch_lsx_vxor_v: |
2796 | case Intrinsic::loongarch_lasx_xvxor_v: |
2797 | return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2798 | N2: N->getOperand(Num: 2)); |
2799 | case Intrinsic::loongarch_lsx_vnor_v: |
2800 | case Intrinsic::loongarch_lasx_xvnor_v: { |
2801 | SDValue Res = DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2802 | N2: N->getOperand(Num: 2)); |
2803 | return DAG.getNOT(DL, Val: Res, VT: Res->getValueType(ResNo: 0)); |
2804 | } |
2805 | case Intrinsic::loongarch_lsx_vandi_b: |
2806 | case Intrinsic::loongarch_lasx_xvandi_b: |
2807 | return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2808 | N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG)); |
2809 | case Intrinsic::loongarch_lsx_vori_b: |
2810 | case Intrinsic::loongarch_lasx_xvori_b: |
2811 | return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2812 | N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG)); |
2813 | case Intrinsic::loongarch_lsx_vxori_b: |
2814 | case Intrinsic::loongarch_lasx_xvxori_b: |
2815 | return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2816 | N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG)); |
2817 | case Intrinsic::loongarch_lsx_vsll_b: |
2818 | case Intrinsic::loongarch_lsx_vsll_h: |
2819 | case Intrinsic::loongarch_lsx_vsll_w: |
2820 | case Intrinsic::loongarch_lsx_vsll_d: |
2821 | case Intrinsic::loongarch_lasx_xvsll_b: |
2822 | case Intrinsic::loongarch_lasx_xvsll_h: |
2823 | case Intrinsic::loongarch_lasx_xvsll_w: |
2824 | case Intrinsic::loongarch_lasx_xvsll_d: |
2825 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2826 | N2: truncateVecElts(Node: N, DAG)); |
2827 | case Intrinsic::loongarch_lsx_vslli_b: |
2828 | case Intrinsic::loongarch_lasx_xvslli_b: |
2829 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2830 | N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG)); |
2831 | case Intrinsic::loongarch_lsx_vslli_h: |
2832 | case Intrinsic::loongarch_lasx_xvslli_h: |
2833 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2834 | N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG)); |
2835 | case Intrinsic::loongarch_lsx_vslli_w: |
2836 | case Intrinsic::loongarch_lasx_xvslli_w: |
2837 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2838 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
2839 | case Intrinsic::loongarch_lsx_vslli_d: |
2840 | case Intrinsic::loongarch_lasx_xvslli_d: |
2841 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2842 | N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG)); |
2843 | case Intrinsic::loongarch_lsx_vsrl_b: |
2844 | case Intrinsic::loongarch_lsx_vsrl_h: |
2845 | case Intrinsic::loongarch_lsx_vsrl_w: |
2846 | case Intrinsic::loongarch_lsx_vsrl_d: |
2847 | case Intrinsic::loongarch_lasx_xvsrl_b: |
2848 | case Intrinsic::loongarch_lasx_xvsrl_h: |
2849 | case Intrinsic::loongarch_lasx_xvsrl_w: |
2850 | case Intrinsic::loongarch_lasx_xvsrl_d: |
2851 | return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2852 | N2: truncateVecElts(Node: N, DAG)); |
2853 | case Intrinsic::loongarch_lsx_vsrli_b: |
2854 | case Intrinsic::loongarch_lasx_xvsrli_b: |
2855 | return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2856 | N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG)); |
2857 | case Intrinsic::loongarch_lsx_vsrli_h: |
2858 | case Intrinsic::loongarch_lasx_xvsrli_h: |
2859 | return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2860 | N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG)); |
2861 | case Intrinsic::loongarch_lsx_vsrli_w: |
2862 | case Intrinsic::loongarch_lasx_xvsrli_w: |
2863 | return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2864 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
2865 | case Intrinsic::loongarch_lsx_vsrli_d: |
2866 | case Intrinsic::loongarch_lasx_xvsrli_d: |
2867 | return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2868 | N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG)); |
2869 | case Intrinsic::loongarch_lsx_vsra_b: |
2870 | case Intrinsic::loongarch_lsx_vsra_h: |
2871 | case Intrinsic::loongarch_lsx_vsra_w: |
2872 | case Intrinsic::loongarch_lsx_vsra_d: |
2873 | case Intrinsic::loongarch_lasx_xvsra_b: |
2874 | case Intrinsic::loongarch_lasx_xvsra_h: |
2875 | case Intrinsic::loongarch_lasx_xvsra_w: |
2876 | case Intrinsic::loongarch_lasx_xvsra_d: |
2877 | return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2878 | N2: truncateVecElts(Node: N, DAG)); |
2879 | case Intrinsic::loongarch_lsx_vsrai_b: |
2880 | case Intrinsic::loongarch_lasx_xvsrai_b: |
2881 | return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2882 | N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG)); |
2883 | case Intrinsic::loongarch_lsx_vsrai_h: |
2884 | case Intrinsic::loongarch_lasx_xvsrai_h: |
2885 | return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2886 | N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG)); |
2887 | case Intrinsic::loongarch_lsx_vsrai_w: |
2888 | case Intrinsic::loongarch_lasx_xvsrai_w: |
2889 | return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2890 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
2891 | case Intrinsic::loongarch_lsx_vsrai_d: |
2892 | case Intrinsic::loongarch_lasx_xvsrai_d: |
2893 | return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2894 | N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG)); |
2895 | case Intrinsic::loongarch_lsx_vclz_b: |
2896 | case Intrinsic::loongarch_lsx_vclz_h: |
2897 | case Intrinsic::loongarch_lsx_vclz_w: |
2898 | case Intrinsic::loongarch_lsx_vclz_d: |
2899 | case Intrinsic::loongarch_lasx_xvclz_b: |
2900 | case Intrinsic::loongarch_lasx_xvclz_h: |
2901 | case Intrinsic::loongarch_lasx_xvclz_w: |
2902 | case Intrinsic::loongarch_lasx_xvclz_d: |
2903 | return DAG.getNode(Opcode: ISD::CTLZ, DL, VT: N->getValueType(ResNo: 0), Operand: N->getOperand(Num: 1)); |
2904 | case Intrinsic::loongarch_lsx_vpcnt_b: |
2905 | case Intrinsic::loongarch_lsx_vpcnt_h: |
2906 | case Intrinsic::loongarch_lsx_vpcnt_w: |
2907 | case Intrinsic::loongarch_lsx_vpcnt_d: |
2908 | case Intrinsic::loongarch_lasx_xvpcnt_b: |
2909 | case Intrinsic::loongarch_lasx_xvpcnt_h: |
2910 | case Intrinsic::loongarch_lasx_xvpcnt_w: |
2911 | case Intrinsic::loongarch_lasx_xvpcnt_d: |
2912 | return DAG.getNode(Opcode: ISD::CTPOP, DL, VT: N->getValueType(ResNo: 0), Operand: N->getOperand(Num: 1)); |
2913 | case Intrinsic::loongarch_lsx_vbitclr_b: |
2914 | case Intrinsic::loongarch_lsx_vbitclr_h: |
2915 | case Intrinsic::loongarch_lsx_vbitclr_w: |
2916 | case Intrinsic::loongarch_lsx_vbitclr_d: |
2917 | case Intrinsic::loongarch_lasx_xvbitclr_b: |
2918 | case Intrinsic::loongarch_lasx_xvbitclr_h: |
2919 | case Intrinsic::loongarch_lasx_xvbitclr_w: |
2920 | case Intrinsic::loongarch_lasx_xvbitclr_d: |
2921 | return lowerVectorBitClear(Node: N, DAG); |
2922 | case Intrinsic::loongarch_lsx_vbitclri_b: |
2923 | case Intrinsic::loongarch_lasx_xvbitclri_b: |
2924 | return lowerVectorBitClearImm<3>(Node: N, DAG); |
2925 | case Intrinsic::loongarch_lsx_vbitclri_h: |
2926 | case Intrinsic::loongarch_lasx_xvbitclri_h: |
2927 | return lowerVectorBitClearImm<4>(Node: N, DAG); |
2928 | case Intrinsic::loongarch_lsx_vbitclri_w: |
2929 | case Intrinsic::loongarch_lasx_xvbitclri_w: |
2930 | return lowerVectorBitClearImm<5>(Node: N, DAG); |
2931 | case Intrinsic::loongarch_lsx_vbitclri_d: |
2932 | case Intrinsic::loongarch_lasx_xvbitclri_d: |
2933 | return lowerVectorBitClearImm<6>(Node: N, DAG); |
2934 | case Intrinsic::loongarch_lsx_vbitset_b: |
2935 | case Intrinsic::loongarch_lsx_vbitset_h: |
2936 | case Intrinsic::loongarch_lsx_vbitset_w: |
2937 | case Intrinsic::loongarch_lsx_vbitset_d: |
2938 | case Intrinsic::loongarch_lasx_xvbitset_b: |
2939 | case Intrinsic::loongarch_lasx_xvbitset_h: |
2940 | case Intrinsic::loongarch_lasx_xvbitset_w: |
2941 | case Intrinsic::loongarch_lasx_xvbitset_d: { |
2942 | EVT VecTy = N->getValueType(ResNo: 0); |
2943 | SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy); |
2944 | return DAG.getNode( |
2945 | Opcode: ISD::OR, DL, VT: VecTy, N1: N->getOperand(Num: 1), |
2946 | N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG))); |
2947 | } |
2948 | case Intrinsic::loongarch_lsx_vbitseti_b: |
2949 | case Intrinsic::loongarch_lasx_xvbitseti_b: |
2950 | return lowerVectorBitSetImm<3>(Node: N, DAG); |
2951 | case Intrinsic::loongarch_lsx_vbitseti_h: |
2952 | case Intrinsic::loongarch_lasx_xvbitseti_h: |
2953 | return lowerVectorBitSetImm<4>(Node: N, DAG); |
2954 | case Intrinsic::loongarch_lsx_vbitseti_w: |
2955 | case Intrinsic::loongarch_lasx_xvbitseti_w: |
2956 | return lowerVectorBitSetImm<5>(Node: N, DAG); |
2957 | case Intrinsic::loongarch_lsx_vbitseti_d: |
2958 | case Intrinsic::loongarch_lasx_xvbitseti_d: |
2959 | return lowerVectorBitSetImm<6>(Node: N, DAG); |
2960 | case Intrinsic::loongarch_lsx_vbitrev_b: |
2961 | case Intrinsic::loongarch_lsx_vbitrev_h: |
2962 | case Intrinsic::loongarch_lsx_vbitrev_w: |
2963 | case Intrinsic::loongarch_lsx_vbitrev_d: |
2964 | case Intrinsic::loongarch_lasx_xvbitrev_b: |
2965 | case Intrinsic::loongarch_lasx_xvbitrev_h: |
2966 | case Intrinsic::loongarch_lasx_xvbitrev_w: |
2967 | case Intrinsic::loongarch_lasx_xvbitrev_d: { |
2968 | EVT VecTy = N->getValueType(ResNo: 0); |
2969 | SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy); |
2970 | return DAG.getNode( |
2971 | Opcode: ISD::XOR, DL, VT: VecTy, N1: N->getOperand(Num: 1), |
2972 | N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG))); |
2973 | } |
2974 | case Intrinsic::loongarch_lsx_vbitrevi_b: |
2975 | case Intrinsic::loongarch_lasx_xvbitrevi_b: |
2976 | return lowerVectorBitRevImm<3>(Node: N, DAG); |
2977 | case Intrinsic::loongarch_lsx_vbitrevi_h: |
2978 | case Intrinsic::loongarch_lasx_xvbitrevi_h: |
2979 | return lowerVectorBitRevImm<4>(Node: N, DAG); |
2980 | case Intrinsic::loongarch_lsx_vbitrevi_w: |
2981 | case Intrinsic::loongarch_lasx_xvbitrevi_w: |
2982 | return lowerVectorBitRevImm<5>(Node: N, DAG); |
2983 | case Intrinsic::loongarch_lsx_vbitrevi_d: |
2984 | case Intrinsic::loongarch_lasx_xvbitrevi_d: |
2985 | return lowerVectorBitRevImm<6>(Node: N, DAG); |
2986 | case Intrinsic::loongarch_lsx_vfadd_s: |
2987 | case Intrinsic::loongarch_lsx_vfadd_d: |
2988 | case Intrinsic::loongarch_lasx_xvfadd_s: |
2989 | case Intrinsic::loongarch_lasx_xvfadd_d: |
2990 | return DAG.getNode(Opcode: ISD::FADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2991 | N2: N->getOperand(Num: 2)); |
2992 | case Intrinsic::loongarch_lsx_vfsub_s: |
2993 | case Intrinsic::loongarch_lsx_vfsub_d: |
2994 | case Intrinsic::loongarch_lasx_xvfsub_s: |
2995 | case Intrinsic::loongarch_lasx_xvfsub_d: |
2996 | return DAG.getNode(Opcode: ISD::FSUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
2997 | N2: N->getOperand(Num: 2)); |
2998 | case Intrinsic::loongarch_lsx_vfmul_s: |
2999 | case Intrinsic::loongarch_lsx_vfmul_d: |
3000 | case Intrinsic::loongarch_lasx_xvfmul_s: |
3001 | case Intrinsic::loongarch_lasx_xvfmul_d: |
3002 | return DAG.getNode(Opcode: ISD::FMUL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3003 | N2: N->getOperand(Num: 2)); |
3004 | case Intrinsic::loongarch_lsx_vfdiv_s: |
3005 | case Intrinsic::loongarch_lsx_vfdiv_d: |
3006 | case Intrinsic::loongarch_lasx_xvfdiv_s: |
3007 | case Intrinsic::loongarch_lasx_xvfdiv_d: |
3008 | return DAG.getNode(Opcode: ISD::FDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3009 | N2: N->getOperand(Num: 2)); |
3010 | case Intrinsic::loongarch_lsx_vfmadd_s: |
3011 | case Intrinsic::loongarch_lsx_vfmadd_d: |
3012 | case Intrinsic::loongarch_lasx_xvfmadd_s: |
3013 | case Intrinsic::loongarch_lasx_xvfmadd_d: |
3014 | return DAG.getNode(Opcode: ISD::FMA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3015 | N2: N->getOperand(Num: 2), N3: N->getOperand(Num: 3)); |
3016 | case Intrinsic::loongarch_lsx_vinsgr2vr_b: |
3017 | return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
3018 | N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2), |
3019 | N3: legalizeIntrinsicImmArg<4>(Node: N, ImmOp: 3, DAG, Subtarget)); |
3020 | case Intrinsic::loongarch_lsx_vinsgr2vr_h: |
3021 | case Intrinsic::loongarch_lasx_xvinsgr2vr_w: |
3022 | return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
3023 | N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2), |
3024 | N3: legalizeIntrinsicImmArg<3>(Node: N, ImmOp: 3, DAG, Subtarget)); |
3025 | case Intrinsic::loongarch_lsx_vinsgr2vr_w: |
3026 | case Intrinsic::loongarch_lasx_xvinsgr2vr_d: |
3027 | return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
3028 | N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2), |
3029 | N3: legalizeIntrinsicImmArg<2>(Node: N, ImmOp: 3, DAG, Subtarget)); |
3030 | case Intrinsic::loongarch_lsx_vinsgr2vr_d: |
3031 | return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
3032 | N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2), |
3033 | N3: legalizeIntrinsicImmArg<1>(Node: N, ImmOp: 3, DAG, Subtarget)); |
3034 | case Intrinsic::loongarch_lsx_vreplgr2vr_b: |
3035 | case Intrinsic::loongarch_lsx_vreplgr2vr_h: |
3036 | case Intrinsic::loongarch_lsx_vreplgr2vr_w: |
3037 | case Intrinsic::loongarch_lsx_vreplgr2vr_d: |
3038 | case Intrinsic::loongarch_lasx_xvreplgr2vr_b: |
3039 | case Intrinsic::loongarch_lasx_xvreplgr2vr_h: |
3040 | case Intrinsic::loongarch_lasx_xvreplgr2vr_w: |
3041 | case Intrinsic::loongarch_lasx_xvreplgr2vr_d: { |
3042 | EVT ResTy = N->getValueType(ResNo: 0); |
3043 | SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(Num: 1)); |
3044 | return DAG.getBuildVector(VT: ResTy, DL, Ops); |
3045 | } |
3046 | case Intrinsic::loongarch_lsx_vreplve_b: |
3047 | case Intrinsic::loongarch_lsx_vreplve_h: |
3048 | case Intrinsic::loongarch_lsx_vreplve_w: |
3049 | case Intrinsic::loongarch_lsx_vreplve_d: |
3050 | case Intrinsic::loongarch_lasx_xvreplve_b: |
3051 | case Intrinsic::loongarch_lasx_xvreplve_h: |
3052 | case Intrinsic::loongarch_lasx_xvreplve_w: |
3053 | case Intrinsic::loongarch_lasx_xvreplve_d: |
3054 | return DAG.getNode(Opcode: LoongArchISD::VREPLVE, DL, VT: N->getValueType(ResNo: 0), |
3055 | N1: N->getOperand(Num: 1), |
3056 | N2: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getGRLenVT(), |
3057 | Operand: N->getOperand(Num: 2))); |
3058 | } |
3059 | return SDValue(); |
3060 | } |
3061 | |
3062 | SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, |
3063 | DAGCombinerInfo &DCI) const { |
3064 | SelectionDAG &DAG = DCI.DAG; |
3065 | switch (N->getOpcode()) { |
3066 | default: |
3067 | break; |
3068 | case ISD::AND: |
3069 | return performANDCombine(N, DAG, DCI, Subtarget); |
3070 | case ISD::OR: |
3071 | return performORCombine(N, DAG, DCI, Subtarget); |
3072 | case ISD::SRL: |
3073 | return performSRLCombine(N, DAG, DCI, Subtarget); |
3074 | case LoongArchISD::BITREV_W: |
3075 | return performBITREV_WCombine(N, DAG, DCI, Subtarget); |
3076 | case ISD::INTRINSIC_WO_CHAIN: |
3077 | return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget); |
3078 | } |
3079 | return SDValue(); |
3080 | } |
3081 | |
3082 | static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, |
3083 | MachineBasicBlock *MBB) { |
3084 | if (!ZeroDivCheck) |
3085 | return MBB; |
3086 | |
3087 | // Build instructions: |
3088 | // MBB: |
3089 | // div(or mod) $dst, $dividend, $divisor |
3090 | // bnez $divisor, SinkMBB |
3091 | // BreakMBB: |
3092 | // break 7 // BRK_DIVZERO |
3093 | // SinkMBB: |
3094 | // fallthrough |
3095 | const BasicBlock *LLVM_BB = MBB->getBasicBlock(); |
3096 | MachineFunction::iterator It = ++MBB->getIterator(); |
3097 | MachineFunction *MF = MBB->getParent(); |
3098 | auto BreakMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB); |
3099 | auto SinkMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB); |
3100 | MF->insert(MBBI: It, MBB: BreakMBB); |
3101 | MF->insert(MBBI: It, MBB: SinkMBB); |
3102 | |
3103 | // Transfer the remainder of MBB and its successor edges to SinkMBB. |
3104 | SinkMBB->splice(Where: SinkMBB->end(), Other: MBB, From: std::next(x: MI.getIterator()), To: MBB->end()); |
3105 | SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB); |
3106 | |
3107 | const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo(); |
3108 | DebugLoc DL = MI.getDebugLoc(); |
3109 | MachineOperand &Divisor = MI.getOperand(i: 2); |
3110 | Register DivisorReg = Divisor.getReg(); |
3111 | |
3112 | // MBB: |
3113 | BuildMI(MBB, DL, TII.get(LoongArch::BNEZ)) |
3114 | .addReg(DivisorReg, getKillRegState(Divisor.isKill())) |
3115 | .addMBB(SinkMBB); |
3116 | MBB->addSuccessor(Succ: BreakMBB); |
3117 | MBB->addSuccessor(Succ: SinkMBB); |
3118 | |
3119 | // BreakMBB: |
3120 | // See linux header file arch/loongarch/include/uapi/asm/break.h for the |
3121 | // definition of BRK_DIVZERO. |
3122 | BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/); |
3123 | BreakMBB->addSuccessor(Succ: SinkMBB); |
3124 | |
3125 | // Clear Divisor's kill flag. |
3126 | Divisor.setIsKill(false); |
3127 | |
3128 | return SinkMBB; |
3129 | } |
3130 | |
3131 | static MachineBasicBlock * |
3132 | emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, |
3133 | const LoongArchSubtarget &Subtarget) { |
3134 | unsigned CondOpc; |
3135 | switch (MI.getOpcode()) { |
3136 | default: |
3137 | llvm_unreachable("Unexpected opcode" ); |
3138 | case LoongArch::PseudoVBZ: |
3139 | CondOpc = LoongArch::VSETEQZ_V; |
3140 | break; |
3141 | case LoongArch::PseudoVBZ_B: |
3142 | CondOpc = LoongArch::VSETANYEQZ_B; |
3143 | break; |
3144 | case LoongArch::PseudoVBZ_H: |
3145 | CondOpc = LoongArch::VSETANYEQZ_H; |
3146 | break; |
3147 | case LoongArch::PseudoVBZ_W: |
3148 | CondOpc = LoongArch::VSETANYEQZ_W; |
3149 | break; |
3150 | case LoongArch::PseudoVBZ_D: |
3151 | CondOpc = LoongArch::VSETANYEQZ_D; |
3152 | break; |
3153 | case LoongArch::PseudoVBNZ: |
3154 | CondOpc = LoongArch::VSETNEZ_V; |
3155 | break; |
3156 | case LoongArch::PseudoVBNZ_B: |
3157 | CondOpc = LoongArch::VSETALLNEZ_B; |
3158 | break; |
3159 | case LoongArch::PseudoVBNZ_H: |
3160 | CondOpc = LoongArch::VSETALLNEZ_H; |
3161 | break; |
3162 | case LoongArch::PseudoVBNZ_W: |
3163 | CondOpc = LoongArch::VSETALLNEZ_W; |
3164 | break; |
3165 | case LoongArch::PseudoVBNZ_D: |
3166 | CondOpc = LoongArch::VSETALLNEZ_D; |
3167 | break; |
3168 | case LoongArch::PseudoXVBZ: |
3169 | CondOpc = LoongArch::XVSETEQZ_V; |
3170 | break; |
3171 | case LoongArch::PseudoXVBZ_B: |
3172 | CondOpc = LoongArch::XVSETANYEQZ_B; |
3173 | break; |
3174 | case LoongArch::PseudoXVBZ_H: |
3175 | CondOpc = LoongArch::XVSETANYEQZ_H; |
3176 | break; |
3177 | case LoongArch::PseudoXVBZ_W: |
3178 | CondOpc = LoongArch::XVSETANYEQZ_W; |
3179 | break; |
3180 | case LoongArch::PseudoXVBZ_D: |
3181 | CondOpc = LoongArch::XVSETANYEQZ_D; |
3182 | break; |
3183 | case LoongArch::PseudoXVBNZ: |
3184 | CondOpc = LoongArch::XVSETNEZ_V; |
3185 | break; |
3186 | case LoongArch::PseudoXVBNZ_B: |
3187 | CondOpc = LoongArch::XVSETALLNEZ_B; |
3188 | break; |
3189 | case LoongArch::PseudoXVBNZ_H: |
3190 | CondOpc = LoongArch::XVSETALLNEZ_H; |
3191 | break; |
3192 | case LoongArch::PseudoXVBNZ_W: |
3193 | CondOpc = LoongArch::XVSETALLNEZ_W; |
3194 | break; |
3195 | case LoongArch::PseudoXVBNZ_D: |
3196 | CondOpc = LoongArch::XVSETALLNEZ_D; |
3197 | break; |
3198 | } |
3199 | |
3200 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3201 | const BasicBlock *LLVM_BB = BB->getBasicBlock(); |
3202 | DebugLoc DL = MI.getDebugLoc(); |
3203 | MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); |
3204 | MachineFunction::iterator It = ++BB->getIterator(); |
3205 | |
3206 | MachineFunction *F = BB->getParent(); |
3207 | MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
3208 | MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
3209 | MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
3210 | |
3211 | F->insert(MBBI: It, MBB: FalseBB); |
3212 | F->insert(MBBI: It, MBB: TrueBB); |
3213 | F->insert(MBBI: It, MBB: SinkBB); |
3214 | |
3215 | // Transfer the remainder of MBB and its successor edges to Sink. |
3216 | SinkBB->splice(Where: SinkBB->end(), Other: BB, From: std::next(x: MI.getIterator()), To: BB->end()); |
3217 | SinkBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB); |
3218 | |
3219 | // Insert the real instruction to BB. |
3220 | Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass); |
3221 | BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: CondOpc), DestReg: FCC).addReg(RegNo: MI.getOperand(i: 1).getReg()); |
3222 | |
3223 | // Insert branch. |
3224 | BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB); |
3225 | BB->addSuccessor(Succ: FalseBB); |
3226 | BB->addSuccessor(Succ: TrueBB); |
3227 | |
3228 | // FalseBB. |
3229 | Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass); |
3230 | BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1) |
3231 | .addReg(LoongArch::R0) |
3232 | .addImm(0); |
3233 | BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB); |
3234 | FalseBB->addSuccessor(Succ: SinkBB); |
3235 | |
3236 | // TrueBB. |
3237 | Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass); |
3238 | BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2) |
3239 | .addReg(LoongArch::R0) |
3240 | .addImm(1); |
3241 | TrueBB->addSuccessor(Succ: SinkBB); |
3242 | |
3243 | // SinkBB: merge the results. |
3244 | BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI), |
3245 | MI.getOperand(0).getReg()) |
3246 | .addReg(RD1) |
3247 | .addMBB(FalseBB) |
3248 | .addReg(RD2) |
3249 | .addMBB(TrueBB); |
3250 | |
3251 | // The pseudo instruction is gone now. |
3252 | MI.eraseFromParent(); |
3253 | return SinkBB; |
3254 | } |
3255 | |
3256 | static MachineBasicBlock * |
3257 | emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, |
3258 | const LoongArchSubtarget &Subtarget) { |
3259 | unsigned InsOp; |
3260 | unsigned HalfSize; |
3261 | switch (MI.getOpcode()) { |
3262 | default: |
3263 | llvm_unreachable("Unexpected opcode" ); |
3264 | case LoongArch::PseudoXVINSGR2VR_B: |
3265 | HalfSize = 16; |
3266 | InsOp = LoongArch::VINSGR2VR_B; |
3267 | break; |
3268 | case LoongArch::PseudoXVINSGR2VR_H: |
3269 | HalfSize = 8; |
3270 | InsOp = LoongArch::VINSGR2VR_H; |
3271 | break; |
3272 | } |
3273 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3274 | const TargetRegisterClass *RC = &LoongArch::LASX256RegClass; |
3275 | const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass; |
3276 | DebugLoc DL = MI.getDebugLoc(); |
3277 | MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); |
3278 | // XDst = vector_insert XSrc, Elt, Idx |
3279 | Register XDst = MI.getOperand(i: 0).getReg(); |
3280 | Register XSrc = MI.getOperand(i: 1).getReg(); |
3281 | Register Elt = MI.getOperand(i: 2).getReg(); |
3282 | unsigned Idx = MI.getOperand(i: 3).getImm(); |
3283 | |
3284 | Register ScratchReg1 = XSrc; |
3285 | if (Idx >= HalfSize) { |
3286 | ScratchReg1 = MRI.createVirtualRegister(RegClass: RC); |
3287 | BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1) |
3288 | .addReg(XSrc) |
3289 | .addReg(XSrc) |
3290 | .addImm(1); |
3291 | } |
3292 | |
3293 | Register ScratchSubReg1 = MRI.createVirtualRegister(RegClass: SubRC); |
3294 | Register ScratchSubReg2 = MRI.createVirtualRegister(RegClass: SubRC); |
3295 | BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1) |
3296 | .addReg(ScratchReg1, 0, LoongArch::sub_128); |
3297 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: InsOp), DestReg: ScratchSubReg2) |
3298 | .addReg(RegNo: ScratchSubReg1) |
3299 | .addReg(RegNo: Elt) |
3300 | .addImm(Val: Idx >= HalfSize ? Idx - HalfSize : Idx); |
3301 | |
3302 | Register ScratchReg2 = XDst; |
3303 | if (Idx >= HalfSize) |
3304 | ScratchReg2 = MRI.createVirtualRegister(RegClass: RC); |
3305 | |
3306 | BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2) |
3307 | .addImm(0) |
3308 | .addReg(ScratchSubReg2) |
3309 | .addImm(LoongArch::sub_128); |
3310 | |
3311 | if (Idx >= HalfSize) |
3312 | BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst) |
3313 | .addReg(XSrc) |
3314 | .addReg(ScratchReg2) |
3315 | .addImm(2); |
3316 | |
3317 | MI.eraseFromParent(); |
3318 | return BB; |
3319 | } |
3320 | |
3321 | MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( |
3322 | MachineInstr &MI, MachineBasicBlock *BB) const { |
3323 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3324 | DebugLoc DL = MI.getDebugLoc(); |
3325 | |
3326 | switch (MI.getOpcode()) { |
3327 | default: |
3328 | llvm_unreachable("Unexpected instr type to insert" ); |
3329 | case LoongArch::DIV_W: |
3330 | case LoongArch::DIV_WU: |
3331 | case LoongArch::MOD_W: |
3332 | case LoongArch::MOD_WU: |
3333 | case LoongArch::DIV_D: |
3334 | case LoongArch::DIV_DU: |
3335 | case LoongArch::MOD_D: |
3336 | case LoongArch::MOD_DU: |
3337 | return insertDivByZeroTrap(MI, MBB: BB); |
3338 | break; |
3339 | case LoongArch::WRFCSR: { |
3340 | BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR), |
3341 | LoongArch::FCSR0 + MI.getOperand(0).getImm()) |
3342 | .addReg(MI.getOperand(1).getReg()); |
3343 | MI.eraseFromParent(); |
3344 | return BB; |
3345 | } |
3346 | case LoongArch::RDFCSR: { |
3347 | MachineInstr *ReadFCSR = |
3348 | BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR), |
3349 | MI.getOperand(0).getReg()) |
3350 | .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm()); |
3351 | ReadFCSR->getOperand(i: 1).setIsUndef(); |
3352 | MI.eraseFromParent(); |
3353 | return BB; |
3354 | } |
3355 | case LoongArch::PseudoVBZ: |
3356 | case LoongArch::PseudoVBZ_B: |
3357 | case LoongArch::PseudoVBZ_H: |
3358 | case LoongArch::PseudoVBZ_W: |
3359 | case LoongArch::PseudoVBZ_D: |
3360 | case LoongArch::PseudoVBNZ: |
3361 | case LoongArch::PseudoVBNZ_B: |
3362 | case LoongArch::PseudoVBNZ_H: |
3363 | case LoongArch::PseudoVBNZ_W: |
3364 | case LoongArch::PseudoVBNZ_D: |
3365 | case LoongArch::PseudoXVBZ: |
3366 | case LoongArch::PseudoXVBZ_B: |
3367 | case LoongArch::PseudoXVBZ_H: |
3368 | case LoongArch::PseudoXVBZ_W: |
3369 | case LoongArch::PseudoXVBZ_D: |
3370 | case LoongArch::PseudoXVBNZ: |
3371 | case LoongArch::PseudoXVBNZ_B: |
3372 | case LoongArch::PseudoXVBNZ_H: |
3373 | case LoongArch::PseudoXVBNZ_W: |
3374 | case LoongArch::PseudoXVBNZ_D: |
3375 | return emitVecCondBranchPseudo(MI, BB, Subtarget); |
3376 | case LoongArch::PseudoXVINSGR2VR_B: |
3377 | case LoongArch::PseudoXVINSGR2VR_H: |
3378 | return emitPseudoXVINSGR2VR(MI, BB, Subtarget); |
3379 | } |
3380 | } |
3381 | |
3382 | bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses( |
3383 | EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, |
3384 | unsigned *Fast) const { |
3385 | if (!Subtarget.hasUAL()) |
3386 | return false; |
3387 | |
3388 | // TODO: set reasonable speed number. |
3389 | if (Fast) |
3390 | *Fast = 1; |
3391 | return true; |
3392 | } |
3393 | |
3394 | const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { |
3395 | switch ((LoongArchISD::NodeType)Opcode) { |
3396 | case LoongArchISD::FIRST_NUMBER: |
3397 | break; |
3398 | |
3399 | #define NODE_NAME_CASE(node) \ |
3400 | case LoongArchISD::node: \ |
3401 | return "LoongArchISD::" #node; |
3402 | |
3403 | // TODO: Add more target-dependent nodes later. |
3404 | NODE_NAME_CASE(CALL) |
3405 | NODE_NAME_CASE(CALL_MEDIUM) |
3406 | NODE_NAME_CASE(CALL_LARGE) |
3407 | NODE_NAME_CASE(RET) |
3408 | NODE_NAME_CASE(TAIL) |
3409 | NODE_NAME_CASE(TAIL_MEDIUM) |
3410 | NODE_NAME_CASE(TAIL_LARGE) |
3411 | NODE_NAME_CASE(SLL_W) |
3412 | NODE_NAME_CASE(SRA_W) |
3413 | NODE_NAME_CASE(SRL_W) |
3414 | NODE_NAME_CASE(BSTRINS) |
3415 | NODE_NAME_CASE(BSTRPICK) |
3416 | NODE_NAME_CASE(MOVGR2FR_W_LA64) |
3417 | NODE_NAME_CASE(MOVFR2GR_S_LA64) |
3418 | NODE_NAME_CASE(FTINT) |
3419 | NODE_NAME_CASE(REVB_2H) |
3420 | NODE_NAME_CASE(REVB_2W) |
3421 | NODE_NAME_CASE(BITREV_4B) |
3422 | NODE_NAME_CASE(BITREV_W) |
3423 | NODE_NAME_CASE(ROTR_W) |
3424 | NODE_NAME_CASE(ROTL_W) |
3425 | NODE_NAME_CASE(CLZ_W) |
3426 | NODE_NAME_CASE(CTZ_W) |
3427 | NODE_NAME_CASE(DBAR) |
3428 | NODE_NAME_CASE(IBAR) |
3429 | NODE_NAME_CASE(BREAK) |
3430 | NODE_NAME_CASE(SYSCALL) |
3431 | NODE_NAME_CASE(CRC_W_B_W) |
3432 | NODE_NAME_CASE(CRC_W_H_W) |
3433 | NODE_NAME_CASE(CRC_W_W_W) |
3434 | NODE_NAME_CASE(CRC_W_D_W) |
3435 | NODE_NAME_CASE(CRCC_W_B_W) |
3436 | NODE_NAME_CASE(CRCC_W_H_W) |
3437 | NODE_NAME_CASE(CRCC_W_W_W) |
3438 | NODE_NAME_CASE(CRCC_W_D_W) |
3439 | NODE_NAME_CASE(CSRRD) |
3440 | NODE_NAME_CASE(CSRWR) |
3441 | NODE_NAME_CASE(CSRXCHG) |
3442 | NODE_NAME_CASE(IOCSRRD_B) |
3443 | NODE_NAME_CASE(IOCSRRD_H) |
3444 | NODE_NAME_CASE(IOCSRRD_W) |
3445 | NODE_NAME_CASE(IOCSRRD_D) |
3446 | NODE_NAME_CASE(IOCSRWR_B) |
3447 | NODE_NAME_CASE(IOCSRWR_H) |
3448 | NODE_NAME_CASE(IOCSRWR_W) |
3449 | NODE_NAME_CASE(IOCSRWR_D) |
3450 | NODE_NAME_CASE(CPUCFG) |
3451 | NODE_NAME_CASE(MOVGR2FCSR) |
3452 | NODE_NAME_CASE(MOVFCSR2GR) |
3453 | NODE_NAME_CASE(CACOP_D) |
3454 | NODE_NAME_CASE(CACOP_W) |
3455 | NODE_NAME_CASE(VPICK_SEXT_ELT) |
3456 | NODE_NAME_CASE(VPICK_ZEXT_ELT) |
3457 | NODE_NAME_CASE(VREPLVE) |
3458 | NODE_NAME_CASE(VALL_ZERO) |
3459 | NODE_NAME_CASE(VANY_ZERO) |
3460 | NODE_NAME_CASE(VALL_NONZERO) |
3461 | NODE_NAME_CASE(VANY_NONZERO) |
3462 | } |
3463 | #undef NODE_NAME_CASE |
3464 | return nullptr; |
3465 | } |
3466 | |
3467 | //===----------------------------------------------------------------------===// |
3468 | // Calling Convention Implementation |
3469 | //===----------------------------------------------------------------------===// |
3470 | |
3471 | // Eight general-purpose registers a0-a7 used for passing integer arguments, |
3472 | // with a0-a1 reused to return values. Generally, the GPRs are used to pass |
3473 | // fixed-point arguments, and floating-point arguments when no FPR is available |
3474 | // or with soft float ABI. |
3475 | const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6, |
3476 | LoongArch::R7, LoongArch::R8, LoongArch::R9, |
3477 | LoongArch::R10, LoongArch::R11}; |
3478 | // Eight floating-point registers fa0-fa7 used for passing floating-point |
3479 | // arguments, and fa0-fa1 are also used to return values. |
3480 | const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2, |
3481 | LoongArch::F3, LoongArch::F4, LoongArch::F5, |
3482 | LoongArch::F6, LoongArch::F7}; |
3483 | // FPR32 and FPR64 alias each other. |
3484 | const MCPhysReg ArgFPR64s[] = { |
3485 | LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64, |
3486 | LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64}; |
3487 | |
3488 | const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2, |
3489 | LoongArch::VR3, LoongArch::VR4, LoongArch::VR5, |
3490 | LoongArch::VR6, LoongArch::VR7}; |
3491 | |
3492 | const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2, |
3493 | LoongArch::XR3, LoongArch::XR4, LoongArch::XR5, |
3494 | LoongArch::XR6, LoongArch::XR7}; |
3495 | |
3496 | // Pass a 2*GRLen argument that has been split into two GRLen values through |
3497 | // registers or the stack as necessary. |
3498 | static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, |
3499 | CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, |
3500 | unsigned ValNo2, MVT ValVT2, MVT LocVT2, |
3501 | ISD::ArgFlagsTy ArgFlags2) { |
3502 | unsigned GRLenInBytes = GRLen / 8; |
3503 | if (Register Reg = State.AllocateReg(ArgGPRs)) { |
3504 | // At least one half can be passed via register. |
3505 | State.addLoc(V: CCValAssign::getReg(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(), RegNo: Reg, |
3506 | LocVT: VA1.getLocVT(), HTP: CCValAssign::Full)); |
3507 | } else { |
3508 | // Both halves must be passed on the stack, with proper alignment. |
3509 | Align StackAlign = |
3510 | std::max(a: Align(GRLenInBytes), b: ArgFlags1.getNonZeroOrigAlign()); |
3511 | State.addLoc( |
3512 | V: CCValAssign::getMem(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(), |
3513 | Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: StackAlign), |
3514 | LocVT: VA1.getLocVT(), HTP: CCValAssign::Full)); |
3515 | State.addLoc(V: CCValAssign::getMem( |
3516 | ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align(GRLenInBytes)), |
3517 | LocVT: LocVT2, HTP: CCValAssign::Full)); |
3518 | return false; |
3519 | } |
3520 | if (Register Reg = State.AllocateReg(ArgGPRs)) { |
3521 | // The second half can also be passed via register. |
3522 | State.addLoc( |
3523 | V: CCValAssign::getReg(ValNo: ValNo2, ValVT: ValVT2, RegNo: Reg, LocVT: LocVT2, HTP: CCValAssign::Full)); |
3524 | } else { |
3525 | // The second half is passed via the stack, without additional alignment. |
3526 | State.addLoc(V: CCValAssign::getMem( |
3527 | ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align(GRLenInBytes)), |
3528 | LocVT: LocVT2, HTP: CCValAssign::Full)); |
3529 | } |
3530 | return false; |
3531 | } |
3532 | |
3533 | // Implements the LoongArch calling convention. Returns true upon failure. |
3534 | static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, |
3535 | unsigned ValNo, MVT ValVT, |
3536 | CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, |
3537 | CCState &State, bool IsFixed, bool IsRet, |
3538 | Type *OrigTy) { |
3539 | unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits(); |
3540 | assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen" ); |
3541 | MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64; |
3542 | MVT LocVT = ValVT; |
3543 | |
3544 | // Any return value split into more than two values can't be returned |
3545 | // directly. |
3546 | if (IsRet && ValNo > 1) |
3547 | return true; |
3548 | |
3549 | // If passing a variadic argument, or if no FPR is available. |
3550 | bool UseGPRForFloat = true; |
3551 | |
3552 | switch (ABI) { |
3553 | default: |
3554 | llvm_unreachable("Unexpected ABI" ); |
3555 | case LoongArchABI::ABI_ILP32S: |
3556 | case LoongArchABI::ABI_ILP32F: |
3557 | case LoongArchABI::ABI_LP64F: |
3558 | report_fatal_error(reason: "Unimplemented ABI" ); |
3559 | break; |
3560 | case LoongArchABI::ABI_ILP32D: |
3561 | case LoongArchABI::ABI_LP64D: |
3562 | UseGPRForFloat = !IsFixed; |
3563 | break; |
3564 | case LoongArchABI::ABI_LP64S: |
3565 | break; |
3566 | } |
3567 | |
3568 | // FPR32 and FPR64 alias each other. |
3569 | if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) |
3570 | UseGPRForFloat = true; |
3571 | |
3572 | if (UseGPRForFloat && ValVT == MVT::f32) { |
3573 | LocVT = GRLenVT; |
3574 | LocInfo = CCValAssign::BCvt; |
3575 | } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) { |
3576 | LocVT = MVT::i64; |
3577 | LocInfo = CCValAssign::BCvt; |
3578 | } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) { |
3579 | // TODO: Handle passing f64 on LA32 with D feature. |
3580 | report_fatal_error(reason: "Passing f64 with GPR on LA32 is undefined" ); |
3581 | } |
3582 | |
3583 | // If this is a variadic argument, the LoongArch calling convention requires |
3584 | // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8 |
3585 | // byte alignment. An aligned register should be used regardless of whether |
3586 | // the original argument was split during legalisation or not. The argument |
3587 | // will not be passed by registers if the original type is larger than |
3588 | // 2*GRLen, so the register alignment rule does not apply. |
3589 | unsigned TwoGRLenInBytes = (2 * GRLen) / 8; |
3590 | if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes && |
3591 | DL.getTypeAllocSize(Ty: OrigTy) == TwoGRLenInBytes) { |
3592 | unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); |
3593 | // Skip 'odd' register if necessary. |
3594 | if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1) |
3595 | State.AllocateReg(ArgGPRs); |
3596 | } |
3597 | |
3598 | SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); |
3599 | SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = |
3600 | State.getPendingArgFlags(); |
3601 | |
3602 | assert(PendingLocs.size() == PendingArgFlags.size() && |
3603 | "PendingLocs and PendingArgFlags out of sync" ); |
3604 | |
3605 | // Split arguments might be passed indirectly, so keep track of the pending |
3606 | // values. |
3607 | if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) { |
3608 | LocVT = GRLenVT; |
3609 | LocInfo = CCValAssign::Indirect; |
3610 | PendingLocs.push_back( |
3611 | Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo)); |
3612 | PendingArgFlags.push_back(Elt: ArgFlags); |
3613 | if (!ArgFlags.isSplitEnd()) { |
3614 | return false; |
3615 | } |
3616 | } |
3617 | |
3618 | // If the split argument only had two elements, it should be passed directly |
3619 | // in registers or on the stack. |
3620 | if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() && |
3621 | PendingLocs.size() <= 2) { |
3622 | assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()" ); |
3623 | // Apply the normal calling convention rules to the first half of the |
3624 | // split argument. |
3625 | CCValAssign VA = PendingLocs[0]; |
3626 | ISD::ArgFlagsTy AF = PendingArgFlags[0]; |
3627 | PendingLocs.clear(); |
3628 | PendingArgFlags.clear(); |
3629 | return CC_LoongArchAssign2GRLen(GRLen, State, VA1: VA, ArgFlags1: AF, ValNo2: ValNo, ValVT2: ValVT, LocVT2: LocVT, |
3630 | ArgFlags2: ArgFlags); |
3631 | } |
3632 | |
3633 | // Allocate to a register if possible, or else a stack slot. |
3634 | Register Reg; |
3635 | unsigned StoreSizeBytes = GRLen / 8; |
3636 | Align StackAlign = Align(GRLen / 8); |
3637 | |
3638 | if (ValVT == MVT::f32 && !UseGPRForFloat) |
3639 | Reg = State.AllocateReg(ArgFPR32s); |
3640 | else if (ValVT == MVT::f64 && !UseGPRForFloat) |
3641 | Reg = State.AllocateReg(ArgFPR64s); |
3642 | else if (ValVT.is128BitVector()) |
3643 | Reg = State.AllocateReg(ArgVRs); |
3644 | else if (ValVT.is256BitVector()) |
3645 | Reg = State.AllocateReg(ArgXRs); |
3646 | else |
3647 | Reg = State.AllocateReg(ArgGPRs); |
3648 | |
3649 | unsigned StackOffset = |
3650 | Reg ? 0 : State.AllocateStack(Size: StoreSizeBytes, Alignment: StackAlign); |
3651 | |
3652 | // If we reach this point and PendingLocs is non-empty, we must be at the |
3653 | // end of a split argument that must be passed indirectly. |
3654 | if (!PendingLocs.empty()) { |
3655 | assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()" ); |
3656 | assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()" ); |
3657 | for (auto &It : PendingLocs) { |
3658 | if (Reg) |
3659 | It.convertToReg(RegNo: Reg); |
3660 | else |
3661 | It.convertToMem(Offset: StackOffset); |
3662 | State.addLoc(V: It); |
3663 | } |
3664 | PendingLocs.clear(); |
3665 | PendingArgFlags.clear(); |
3666 | return false; |
3667 | } |
3668 | assert((!UseGPRForFloat || LocVT == GRLenVT) && |
3669 | "Expected an GRLenVT at this stage" ); |
3670 | |
3671 | if (Reg) { |
3672 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
3673 | return false; |
3674 | } |
3675 | |
3676 | // When a floating-point value is passed on the stack, no bit-cast is needed. |
3677 | if (ValVT.isFloatingPoint()) { |
3678 | LocVT = ValVT; |
3679 | LocInfo = CCValAssign::Full; |
3680 | } |
3681 | |
3682 | State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo)); |
3683 | return false; |
3684 | } |
3685 | |
3686 | void LoongArchTargetLowering::analyzeInputArgs( |
3687 | MachineFunction &MF, CCState &CCInfo, |
3688 | const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, |
3689 | LoongArchCCAssignFn Fn) const { |
3690 | FunctionType *FType = MF.getFunction().getFunctionType(); |
3691 | for (unsigned i = 0, e = Ins.size(); i != e; ++i) { |
3692 | MVT ArgVT = Ins[i].VT; |
3693 | Type *ArgTy = nullptr; |
3694 | if (IsRet) |
3695 | ArgTy = FType->getReturnType(); |
3696 | else if (Ins[i].isOrigArg()) |
3697 | ArgTy = FType->getParamType(i: Ins[i].getOrigArgIndex()); |
3698 | LoongArchABI::ABI ABI = |
3699 | MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); |
3700 | if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags, |
3701 | CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) { |
3702 | LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT |
3703 | << '\n'); |
3704 | llvm_unreachable("" ); |
3705 | } |
3706 | } |
3707 | } |
3708 | |
3709 | void LoongArchTargetLowering::analyzeOutputArgs( |
3710 | MachineFunction &MF, CCState &CCInfo, |
3711 | const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, |
3712 | CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const { |
3713 | for (unsigned i = 0, e = Outs.size(); i != e; ++i) { |
3714 | MVT ArgVT = Outs[i].VT; |
3715 | Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; |
3716 | LoongArchABI::ABI ABI = |
3717 | MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); |
3718 | if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags, |
3719 | CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) { |
3720 | LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT |
3721 | << "\n" ); |
3722 | llvm_unreachable("" ); |
3723 | } |
3724 | } |
3725 | } |
3726 | |
3727 | // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect |
3728 | // values. |
3729 | static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, |
3730 | const CCValAssign &VA, const SDLoc &DL) { |
3731 | switch (VA.getLocInfo()) { |
3732 | default: |
3733 | llvm_unreachable("Unexpected CCValAssign::LocInfo" ); |
3734 | case CCValAssign::Full: |
3735 | case CCValAssign::Indirect: |
3736 | break; |
3737 | case CCValAssign::BCvt: |
3738 | if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) |
3739 | Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val); |
3740 | else |
3741 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VA.getValVT(), Operand: Val); |
3742 | break; |
3743 | } |
3744 | return Val; |
3745 | } |
3746 | |
3747 | static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, |
3748 | const CCValAssign &VA, const SDLoc &DL, |
3749 | const LoongArchTargetLowering &TLI) { |
3750 | MachineFunction &MF = DAG.getMachineFunction(); |
3751 | MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
3752 | EVT LocVT = VA.getLocVT(); |
3753 | SDValue Val; |
3754 | const TargetRegisterClass *RC = TLI.getRegClassFor(VT: LocVT.getSimpleVT()); |
3755 | Register VReg = RegInfo.createVirtualRegister(RegClass: RC); |
3756 | RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: VReg); |
3757 | Val = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: LocVT); |
3758 | |
3759 | return convertLocVTToValVT(DAG, Val, VA, DL); |
3760 | } |
3761 | |
3762 | // The caller is responsible for loading the full value if the argument is |
3763 | // passed with CCValAssign::Indirect. |
3764 | static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, |
3765 | const CCValAssign &VA, const SDLoc &DL) { |
3766 | MachineFunction &MF = DAG.getMachineFunction(); |
3767 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
3768 | EVT ValVT = VA.getValVT(); |
3769 | int FI = MFI.CreateFixedObject(Size: ValVT.getStoreSize(), SPOffset: VA.getLocMemOffset(), |
3770 | /*IsImmutable=*/true); |
3771 | SDValue FIN = DAG.getFrameIndex( |
3772 | FI, VT: MVT::getIntegerVT(BitWidth: DAG.getDataLayout().getPointerSizeInBits(AS: 0))); |
3773 | |
3774 | ISD::LoadExtType ExtType; |
3775 | switch (VA.getLocInfo()) { |
3776 | default: |
3777 | llvm_unreachable("Unexpected CCValAssign::LocInfo" ); |
3778 | case CCValAssign::Full: |
3779 | case CCValAssign::Indirect: |
3780 | case CCValAssign::BCvt: |
3781 | ExtType = ISD::NON_EXTLOAD; |
3782 | break; |
3783 | } |
3784 | return DAG.getExtLoad( |
3785 | ExtType, dl: DL, VT: VA.getLocVT(), Chain, Ptr: FIN, |
3786 | PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI), MemVT: ValVT); |
3787 | } |
3788 | |
3789 | static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, |
3790 | const CCValAssign &VA, const SDLoc &DL) { |
3791 | EVT LocVT = VA.getLocVT(); |
3792 | |
3793 | switch (VA.getLocInfo()) { |
3794 | default: |
3795 | llvm_unreachable("Unexpected CCValAssign::LocInfo" ); |
3796 | case CCValAssign::Full: |
3797 | break; |
3798 | case CCValAssign::BCvt: |
3799 | if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) |
3800 | Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val); |
3801 | else |
3802 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: LocVT, Operand: Val); |
3803 | break; |
3804 | } |
3805 | return Val; |
3806 | } |
3807 | |
3808 | static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, |
3809 | CCValAssign::LocInfo LocInfo, |
3810 | ISD::ArgFlagsTy ArgFlags, CCState &State) { |
3811 | if (LocVT == MVT::i32 || LocVT == MVT::i64) { |
3812 | // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim |
3813 | // s0 s1 s2 s3 s4 s5 s6 s7 s8 |
3814 | static const MCPhysReg GPRList[] = { |
3815 | LoongArch::R23, LoongArch::R24, LoongArch::R25, |
3816 | LoongArch::R26, LoongArch::R27, LoongArch::R28, |
3817 | LoongArch::R29, LoongArch::R30, LoongArch::R31}; |
3818 | if (unsigned Reg = State.AllocateReg(GPRList)) { |
3819 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
3820 | return false; |
3821 | } |
3822 | } |
3823 | |
3824 | if (LocVT == MVT::f32) { |
3825 | // Pass in STG registers: F1, F2, F3, F4 |
3826 | // fs0,fs1,fs2,fs3 |
3827 | static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25, |
3828 | LoongArch::F26, LoongArch::F27}; |
3829 | if (unsigned Reg = State.AllocateReg(FPR32List)) { |
3830 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
3831 | return false; |
3832 | } |
3833 | } |
3834 | |
3835 | if (LocVT == MVT::f64) { |
3836 | // Pass in STG registers: D1, D2, D3, D4 |
3837 | // fs4,fs5,fs6,fs7 |
3838 | static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64, |
3839 | LoongArch::F30_64, LoongArch::F31_64}; |
3840 | if (unsigned Reg = State.AllocateReg(FPR64List)) { |
3841 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
3842 | return false; |
3843 | } |
3844 | } |
3845 | |
3846 | report_fatal_error(reason: "No registers left in GHC calling convention" ); |
3847 | return true; |
3848 | } |
3849 | |
3850 | // Transform physical registers into virtual registers. |
3851 | SDValue LoongArchTargetLowering::LowerFormalArguments( |
3852 | SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, |
3853 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, |
3854 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { |
3855 | |
3856 | MachineFunction &MF = DAG.getMachineFunction(); |
3857 | |
3858 | switch (CallConv) { |
3859 | default: |
3860 | llvm_unreachable("Unsupported calling convention" ); |
3861 | case CallingConv::C: |
3862 | case CallingConv::Fast: |
3863 | break; |
3864 | case CallingConv::GHC: |
3865 | if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) || |
3866 | !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD)) |
3867 | report_fatal_error( |
3868 | reason: "GHC calling convention requires the F and D extensions" ); |
3869 | } |
3870 | |
3871 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
3872 | MVT GRLenVT = Subtarget.getGRLenVT(); |
3873 | unsigned GRLenInBytes = Subtarget.getGRLen() / 8; |
3874 | // Used with varargs to acumulate store chains. |
3875 | std::vector<SDValue> OutChains; |
3876 | |
3877 | // Assign locations to all of the incoming arguments. |
3878 | SmallVector<CCValAssign> ArgLocs; |
3879 | CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); |
3880 | |
3881 | if (CallConv == CallingConv::GHC) |
3882 | CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_LoongArch_GHC); |
3883 | else |
3884 | analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, Fn: CC_LoongArch); |
3885 | |
3886 | for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { |
3887 | CCValAssign &VA = ArgLocs[i]; |
3888 | SDValue ArgValue; |
3889 | if (VA.isRegLoc()) |
3890 | ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, TLI: *this); |
3891 | else |
3892 | ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); |
3893 | if (VA.getLocInfo() == CCValAssign::Indirect) { |
3894 | // If the original argument was split and passed by reference, we need to |
3895 | // load all parts of it here (using the same address). |
3896 | InVals.push_back(Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain, Ptr: ArgValue, |
3897 | PtrInfo: MachinePointerInfo())); |
3898 | unsigned ArgIndex = Ins[i].OrigArgIndex; |
3899 | unsigned ArgPartOffset = Ins[i].PartOffset; |
3900 | assert(ArgPartOffset == 0); |
3901 | while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { |
3902 | CCValAssign &PartVA = ArgLocs[i + 1]; |
3903 | unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset; |
3904 | SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL); |
3905 | SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: ArgValue, N2: Offset); |
3906 | InVals.push_back(Elt: DAG.getLoad(VT: PartVA.getValVT(), dl: DL, Chain, Ptr: Address, |
3907 | PtrInfo: MachinePointerInfo())); |
3908 | ++i; |
3909 | } |
3910 | continue; |
3911 | } |
3912 | InVals.push_back(Elt: ArgValue); |
3913 | } |
3914 | |
3915 | if (IsVarArg) { |
3916 | ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs); |
3917 | unsigned Idx = CCInfo.getFirstUnallocated(Regs: ArgRegs); |
3918 | const TargetRegisterClass *RC = &LoongArch::GPRRegClass; |
3919 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
3920 | MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
3921 | auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>(); |
3922 | |
3923 | // Offset of the first variable argument from stack pointer, and size of |
3924 | // the vararg save area. For now, the varargs save area is either zero or |
3925 | // large enough to hold a0-a7. |
3926 | int VaArgOffset, VarArgsSaveSize; |
3927 | |
3928 | // If all registers are allocated, then all varargs must be passed on the |
3929 | // stack and we don't need to save any argregs. |
3930 | if (ArgRegs.size() == Idx) { |
3931 | VaArgOffset = CCInfo.getStackSize(); |
3932 | VarArgsSaveSize = 0; |
3933 | } else { |
3934 | VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx); |
3935 | VaArgOffset = -VarArgsSaveSize; |
3936 | } |
3937 | |
3938 | // Record the frame index of the first variable argument |
3939 | // which is a value necessary to VASTART. |
3940 | int FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true); |
3941 | LoongArchFI->setVarArgsFrameIndex(FI); |
3942 | |
3943 | // If saving an odd number of registers then create an extra stack slot to |
3944 | // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures |
3945 | // offsets to even-numbered registered remain 2*GRLen-aligned. |
3946 | if (Idx % 2) { |
3947 | MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset - (int)GRLenInBytes, |
3948 | IsImmutable: true); |
3949 | VarArgsSaveSize += GRLenInBytes; |
3950 | } |
3951 | |
3952 | // Copy the integer registers that may have been used for passing varargs |
3953 | // to the vararg save area. |
3954 | for (unsigned I = Idx; I < ArgRegs.size(); |
3955 | ++I, VaArgOffset += GRLenInBytes) { |
3956 | const Register Reg = RegInfo.createVirtualRegister(RegClass: RC); |
3957 | RegInfo.addLiveIn(Reg: ArgRegs[I], vreg: Reg); |
3958 | SDValue ArgValue = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT: GRLenVT); |
3959 | FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true); |
3960 | SDValue PtrOff = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout())); |
3961 | SDValue Store = DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: PtrOff, |
3962 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)); |
3963 | cast<StoreSDNode>(Val: Store.getNode()) |
3964 | ->getMemOperand() |
3965 | ->setValue((Value *)nullptr); |
3966 | OutChains.push_back(x: Store); |
3967 | } |
3968 | LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize); |
3969 | } |
3970 | |
3971 | // All stores are grouped in one node to allow the matching between |
3972 | // the size of Ins and InVals. This only happens for vararg functions. |
3973 | if (!OutChains.empty()) { |
3974 | OutChains.push_back(x: Chain); |
3975 | Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); |
3976 | } |
3977 | |
3978 | return Chain; |
3979 | } |
3980 | |
3981 | bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { |
3982 | return CI->isTailCall(); |
3983 | } |
3984 | |
3985 | // Check if the return value is used as only a return value, as otherwise |
3986 | // we can't perform a tail-call. |
3987 | bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N, |
3988 | SDValue &Chain) const { |
3989 | if (N->getNumValues() != 1) |
3990 | return false; |
3991 | if (!N->hasNUsesOfValue(NUses: 1, Value: 0)) |
3992 | return false; |
3993 | |
3994 | SDNode *Copy = *N->use_begin(); |
3995 | if (Copy->getOpcode() != ISD::CopyToReg) |
3996 | return false; |
3997 | |
3998 | // If the ISD::CopyToReg has a glue operand, we conservatively assume it |
3999 | // isn't safe to perform a tail call. |
4000 | if (Copy->getGluedNode()) |
4001 | return false; |
4002 | |
4003 | // The copy must be used by a LoongArchISD::RET, and nothing else. |
4004 | bool HasRet = false; |
4005 | for (SDNode *Node : Copy->uses()) { |
4006 | if (Node->getOpcode() != LoongArchISD::RET) |
4007 | return false; |
4008 | HasRet = true; |
4009 | } |
4010 | |
4011 | if (!HasRet) |
4012 | return false; |
4013 | |
4014 | Chain = Copy->getOperand(Num: 0); |
4015 | return true; |
4016 | } |
4017 | |
4018 | // Check whether the call is eligible for tail call optimization. |
4019 | bool LoongArchTargetLowering::isEligibleForTailCallOptimization( |
4020 | CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, |
4021 | const SmallVectorImpl<CCValAssign> &ArgLocs) const { |
4022 | |
4023 | auto CalleeCC = CLI.CallConv; |
4024 | auto &Outs = CLI.Outs; |
4025 | auto &Caller = MF.getFunction(); |
4026 | auto CallerCC = Caller.getCallingConv(); |
4027 | |
4028 | // Do not tail call opt if the stack is used to pass parameters. |
4029 | if (CCInfo.getStackSize() != 0) |
4030 | return false; |
4031 | |
4032 | // Do not tail call opt if any parameters need to be passed indirectly. |
4033 | for (auto &VA : ArgLocs) |
4034 | if (VA.getLocInfo() == CCValAssign::Indirect) |
4035 | return false; |
4036 | |
4037 | // Do not tail call opt if either caller or callee uses struct return |
4038 | // semantics. |
4039 | auto IsCallerStructRet = Caller.hasStructRetAttr(); |
4040 | auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); |
4041 | if (IsCallerStructRet || IsCalleeStructRet) |
4042 | return false; |
4043 | |
4044 | // Do not tail call opt if either the callee or caller has a byval argument. |
4045 | for (auto &Arg : Outs) |
4046 | if (Arg.Flags.isByVal()) |
4047 | return false; |
4048 | |
4049 | // The callee has to preserve all registers the caller needs to preserve. |
4050 | const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
4051 | const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); |
4052 | if (CalleeCC != CallerCC) { |
4053 | const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); |
4054 | if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) |
4055 | return false; |
4056 | } |
4057 | return true; |
4058 | } |
4059 | |
4060 | static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) { |
4061 | return DAG.getDataLayout().getPrefTypeAlign( |
4062 | Ty: VT.getTypeForEVT(Context&: *DAG.getContext())); |
4063 | } |
4064 | |
4065 | // Lower a call to a callseq_start + CALL + callseq_end chain, and add input |
4066 | // and output parameter nodes. |
4067 | SDValue |
4068 | LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, |
4069 | SmallVectorImpl<SDValue> &InVals) const { |
4070 | SelectionDAG &DAG = CLI.DAG; |
4071 | SDLoc &DL = CLI.DL; |
4072 | SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; |
4073 | SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; |
4074 | SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; |
4075 | SDValue Chain = CLI.Chain; |
4076 | SDValue Callee = CLI.Callee; |
4077 | CallingConv::ID CallConv = CLI.CallConv; |
4078 | bool IsVarArg = CLI.IsVarArg; |
4079 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
4080 | MVT GRLenVT = Subtarget.getGRLenVT(); |
4081 | bool &IsTailCall = CLI.IsTailCall; |
4082 | |
4083 | MachineFunction &MF = DAG.getMachineFunction(); |
4084 | |
4085 | // Analyze the operands of the call, assigning locations to each operand. |
4086 | SmallVector<CCValAssign> ArgLocs; |
4087 | CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); |
4088 | |
4089 | if (CallConv == CallingConv::GHC) |
4090 | ArgCCInfo.AnalyzeCallOperands(Outs, Fn: CC_LoongArch_GHC); |
4091 | else |
4092 | analyzeOutputArgs(MF, CCInfo&: ArgCCInfo, Outs, /*IsRet=*/false, CLI: &CLI, Fn: CC_LoongArch); |
4093 | |
4094 | // Check if it's really possible to do a tail call. |
4095 | if (IsTailCall) |
4096 | IsTailCall = isEligibleForTailCallOptimization(CCInfo&: ArgCCInfo, CLI, MF, ArgLocs); |
4097 | |
4098 | if (IsTailCall) |
4099 | ++NumTailCalls; |
4100 | else if (CLI.CB && CLI.CB->isMustTailCall()) |
4101 | report_fatal_error(reason: "failed to perform tail call elimination on a call " |
4102 | "site marked musttail" ); |
4103 | |
4104 | // Get a count of how many bytes are to be pushed on the stack. |
4105 | unsigned NumBytes = ArgCCInfo.getStackSize(); |
4106 | |
4107 | // Create local copies for byval args. |
4108 | SmallVector<SDValue> ByValArgs; |
4109 | for (unsigned i = 0, e = Outs.size(); i != e; ++i) { |
4110 | ISD::ArgFlagsTy Flags = Outs[i].Flags; |
4111 | if (!Flags.isByVal()) |
4112 | continue; |
4113 | |
4114 | SDValue Arg = OutVals[i]; |
4115 | unsigned Size = Flags.getByValSize(); |
4116 | Align Alignment = Flags.getNonZeroByValAlign(); |
4117 | |
4118 | int FI = |
4119 | MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/isSpillSlot: false); |
4120 | SDValue FIPtr = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout())); |
4121 | SDValue SizeNode = DAG.getConstant(Val: Size, DL, VT: GRLenVT); |
4122 | |
4123 | Chain = DAG.getMemcpy(Chain, dl: DL, Dst: FIPtr, Src: Arg, Size: SizeNode, Alignment, |
4124 | /*IsVolatile=*/isVol: false, |
4125 | /*AlwaysInline=*/false, /*isTailCall=*/IsTailCall, |
4126 | DstPtrInfo: MachinePointerInfo(), SrcPtrInfo: MachinePointerInfo()); |
4127 | ByValArgs.push_back(Elt: FIPtr); |
4128 | } |
4129 | |
4130 | if (!IsTailCall) |
4131 | Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL: CLI.DL); |
4132 | |
4133 | // Copy argument values to their designated locations. |
4134 | SmallVector<std::pair<Register, SDValue>> RegsToPass; |
4135 | SmallVector<SDValue> MemOpChains; |
4136 | SDValue StackPtr; |
4137 | for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { |
4138 | CCValAssign &VA = ArgLocs[i]; |
4139 | SDValue ArgValue = OutVals[i]; |
4140 | ISD::ArgFlagsTy Flags = Outs[i].Flags; |
4141 | |
4142 | // Promote the value if needed. |
4143 | // For now, only handle fully promoted and indirect arguments. |
4144 | if (VA.getLocInfo() == CCValAssign::Indirect) { |
4145 | // Store the argument in a stack slot and pass its address. |
4146 | Align StackAlign = |
4147 | std::max(a: getPrefTypeAlign(VT: Outs[i].ArgVT, DAG), |
4148 | b: getPrefTypeAlign(VT: ArgValue.getValueType(), DAG)); |
4149 | TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); |
4150 | // If the original argument was split and passed by reference, we need to |
4151 | // store the required parts of it here (and pass just one address). |
4152 | unsigned ArgIndex = Outs[i].OrigArgIndex; |
4153 | unsigned ArgPartOffset = Outs[i].PartOffset; |
4154 | assert(ArgPartOffset == 0); |
4155 | // Calculate the total size to store. We don't have access to what we're |
4156 | // actually storing other than performing the loop and collecting the |
4157 | // info. |
4158 | SmallVector<std::pair<SDValue, SDValue>> Parts; |
4159 | while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { |
4160 | SDValue PartValue = OutVals[i + 1]; |
4161 | unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset; |
4162 | SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL); |
4163 | EVT PartVT = PartValue.getValueType(); |
4164 | |
4165 | StoredSize += PartVT.getStoreSize(); |
4166 | StackAlign = std::max(a: StackAlign, b: getPrefTypeAlign(VT: PartVT, DAG)); |
4167 | Parts.push_back(Elt: std::make_pair(x&: PartValue, y&: Offset)); |
4168 | ++i; |
4169 | } |
4170 | SDValue SpillSlot = DAG.CreateStackTemporary(Bytes: StoredSize, Alignment: StackAlign); |
4171 | int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex(); |
4172 | MemOpChains.push_back( |
4173 | Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: SpillSlot, |
4174 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI))); |
4175 | for (const auto &Part : Parts) { |
4176 | SDValue PartValue = Part.first; |
4177 | SDValue PartOffset = Part.second; |
4178 | SDValue Address = |
4179 | DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: SpillSlot, N2: PartOffset); |
4180 | MemOpChains.push_back( |
4181 | Elt: DAG.getStore(Chain, dl: DL, Val: PartValue, Ptr: Address, |
4182 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI))); |
4183 | } |
4184 | ArgValue = SpillSlot; |
4185 | } else { |
4186 | ArgValue = convertValVTToLocVT(DAG, Val: ArgValue, VA, DL); |
4187 | } |
4188 | |
4189 | // Use local copy if it is a byval arg. |
4190 | if (Flags.isByVal()) |
4191 | ArgValue = ByValArgs[j++]; |
4192 | |
4193 | if (VA.isRegLoc()) { |
4194 | // Queue up the argument copies and emit them at the end. |
4195 | RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgValue)); |
4196 | } else { |
4197 | assert(VA.isMemLoc() && "Argument not register or memory" ); |
4198 | assert(!IsTailCall && "Tail call not allowed if stack is used " |
4199 | "for passing parameters" ); |
4200 | |
4201 | // Work out the address of the stack slot. |
4202 | if (!StackPtr.getNode()) |
4203 | StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT); |
4204 | SDValue Address = |
4205 | DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, |
4206 | N2: DAG.getIntPtrConstant(Val: VA.getLocMemOffset(), DL)); |
4207 | |
4208 | // Emit the store. |
4209 | MemOpChains.push_back( |
4210 | Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: Address, PtrInfo: MachinePointerInfo())); |
4211 | } |
4212 | } |
4213 | |
4214 | // Join the stores, which are independent of one another. |
4215 | if (!MemOpChains.empty()) |
4216 | Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); |
4217 | |
4218 | SDValue Glue; |
4219 | |
4220 | // Build a sequence of copy-to-reg nodes, chained and glued together. |
4221 | for (auto &Reg : RegsToPass) { |
4222 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: Reg.first, N: Reg.second, Glue); |
4223 | Glue = Chain.getValue(R: 1); |
4224 | } |
4225 | |
4226 | // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a |
4227 | // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't |
4228 | // split it and then direct call can be matched by PseudoCALL. |
4229 | if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) { |
4230 | const GlobalValue *GV = S->getGlobal(); |
4231 | unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV) |
4232 | ? LoongArchII::MO_CALL |
4233 | : LoongArchII::MO_CALL_PLT; |
4234 | Callee = DAG.getTargetGlobalAddress(GV: S->getGlobal(), DL, VT: PtrVT, offset: 0, TargetFlags: OpFlags); |
4235 | } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) { |
4236 | unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV: nullptr) |
4237 | ? LoongArchII::MO_CALL |
4238 | : LoongArchII::MO_CALL_PLT; |
4239 | Callee = DAG.getTargetExternalSymbol(Sym: S->getSymbol(), VT: PtrVT, TargetFlags: OpFlags); |
4240 | } |
4241 | |
4242 | // The first call operand is the chain and the second is the target address. |
4243 | SmallVector<SDValue> Ops; |
4244 | Ops.push_back(Elt: Chain); |
4245 | Ops.push_back(Elt: Callee); |
4246 | |
4247 | // Add argument registers to the end of the list so that they are |
4248 | // known live into the call. |
4249 | for (auto &Reg : RegsToPass) |
4250 | Ops.push_back(Elt: DAG.getRegister(Reg: Reg.first, VT: Reg.second.getValueType())); |
4251 | |
4252 | if (!IsTailCall) { |
4253 | // Add a register mask operand representing the call-preserved registers. |
4254 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
4255 | const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); |
4256 | assert(Mask && "Missing call preserved mask for calling convention" ); |
4257 | Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask)); |
4258 | } |
4259 | |
4260 | // Glue the call to the argument copies, if any. |
4261 | if (Glue.getNode()) |
4262 | Ops.push_back(Elt: Glue); |
4263 | |
4264 | // Emit the call. |
4265 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
4266 | unsigned Op; |
4267 | switch (DAG.getTarget().getCodeModel()) { |
4268 | default: |
4269 | report_fatal_error(reason: "Unsupported code model" ); |
4270 | case CodeModel::Small: |
4271 | Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL; |
4272 | break; |
4273 | case CodeModel::Medium: |
4274 | assert(Subtarget.is64Bit() && "Medium code model requires LA64" ); |
4275 | Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM; |
4276 | break; |
4277 | case CodeModel::Large: |
4278 | assert(Subtarget.is64Bit() && "Large code model requires LA64" ); |
4279 | Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE; |
4280 | break; |
4281 | } |
4282 | |
4283 | if (IsTailCall) { |
4284 | MF.getFrameInfo().setHasTailCall(); |
4285 | SDValue Ret = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops); |
4286 | DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge); |
4287 | return Ret; |
4288 | } |
4289 | |
4290 | Chain = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops); |
4291 | DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge); |
4292 | Glue = Chain.getValue(R: 1); |
4293 | |
4294 | // Mark the end of the call, which is glued to the call itself. |
4295 | Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: 0, Glue, DL); |
4296 | Glue = Chain.getValue(R: 1); |
4297 | |
4298 | // Assign locations to each value returned by this call. |
4299 | SmallVector<CCValAssign> RVLocs; |
4300 | CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); |
4301 | analyzeInputArgs(MF, CCInfo&: RetCCInfo, Ins, /*IsRet=*/true, Fn: CC_LoongArch); |
4302 | |
4303 | // Copy all of the result registers out of their specified physreg. |
4304 | for (auto &VA : RVLocs) { |
4305 | // Copy the value out. |
4306 | SDValue RetValue = |
4307 | DAG.getCopyFromReg(Chain, dl: DL, Reg: VA.getLocReg(), VT: VA.getLocVT(), Glue); |
4308 | // Glue the RetValue to the end of the call sequence. |
4309 | Chain = RetValue.getValue(R: 1); |
4310 | Glue = RetValue.getValue(R: 2); |
4311 | |
4312 | RetValue = convertLocVTToValVT(DAG, Val: RetValue, VA, DL); |
4313 | |
4314 | InVals.push_back(Elt: RetValue); |
4315 | } |
4316 | |
4317 | return Chain; |
4318 | } |
4319 | |
4320 | bool LoongArchTargetLowering::CanLowerReturn( |
4321 | CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, |
4322 | const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { |
4323 | SmallVector<CCValAssign> RVLocs; |
4324 | CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); |
4325 | |
4326 | for (unsigned i = 0, e = Outs.size(); i != e; ++i) { |
4327 | LoongArchABI::ABI ABI = |
4328 | MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); |
4329 | if (CC_LoongArch(DL: MF.getDataLayout(), ABI, ValNo: i, ValVT: Outs[i].VT, LocInfo: CCValAssign::Full, |
4330 | ArgFlags: Outs[i].Flags, State&: CCInfo, /*IsFixed=*/true, /*IsRet=*/true, |
4331 | OrigTy: nullptr)) |
4332 | return false; |
4333 | } |
4334 | return true; |
4335 | } |
4336 | |
4337 | SDValue LoongArchTargetLowering::LowerReturn( |
4338 | SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, |
4339 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
4340 | const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, |
4341 | SelectionDAG &DAG) const { |
4342 | // Stores the assignment of the return value to a location. |
4343 | SmallVector<CCValAssign> RVLocs; |
4344 | |
4345 | // Info about the registers and stack slot. |
4346 | CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, |
4347 | *DAG.getContext()); |
4348 | |
4349 | analyzeOutputArgs(MF&: DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, |
4350 | CLI: nullptr, Fn: CC_LoongArch); |
4351 | if (CallConv == CallingConv::GHC && !RVLocs.empty()) |
4352 | report_fatal_error(reason: "GHC functions return void only" ); |
4353 | SDValue Glue; |
4354 | SmallVector<SDValue, 4> RetOps(1, Chain); |
4355 | |
4356 | // Copy the result values into the output registers. |
4357 | for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { |
4358 | CCValAssign &VA = RVLocs[i]; |
4359 | assert(VA.isRegLoc() && "Can only return in registers!" ); |
4360 | |
4361 | // Handle a 'normal' return. |
4362 | SDValue Val = convertValVTToLocVT(DAG, Val: OutVals[i], VA, DL); |
4363 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: Val, Glue); |
4364 | |
4365 | // Guarantee that all emitted copies are stuck together. |
4366 | Glue = Chain.getValue(R: 1); |
4367 | RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT())); |
4368 | } |
4369 | |
4370 | RetOps[0] = Chain; // Update chain. |
4371 | |
4372 | // Add the glue node if we have it. |
4373 | if (Glue.getNode()) |
4374 | RetOps.push_back(Elt: Glue); |
4375 | |
4376 | return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps); |
4377 | } |
4378 | |
4379 | bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, |
4380 | bool ForCodeSize) const { |
4381 | // TODO: Maybe need more checks here after vector extension is supported. |
4382 | if (VT == MVT::f32 && !Subtarget.hasBasicF()) |
4383 | return false; |
4384 | if (VT == MVT::f64 && !Subtarget.hasBasicD()) |
4385 | return false; |
4386 | return (Imm.isZero() || Imm.isExactlyValue(V: +1.0)); |
4387 | } |
4388 | |
4389 | bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const { |
4390 | return true; |
4391 | } |
4392 | |
4393 | bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const { |
4394 | return true; |
4395 | } |
4396 | |
4397 | bool LoongArchTargetLowering::shouldInsertFencesForAtomic( |
4398 | const Instruction *I) const { |
4399 | if (!Subtarget.is64Bit()) |
4400 | return isa<LoadInst>(Val: I) || isa<StoreInst>(Val: I); |
4401 | |
4402 | if (isa<LoadInst>(Val: I)) |
4403 | return true; |
4404 | |
4405 | // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not |
4406 | // require fences beacuse we can use amswap_db.[w/d]. |
4407 | if (isa<StoreInst>(Val: I)) { |
4408 | unsigned Size = I->getOperand(i: 0)->getType()->getIntegerBitWidth(); |
4409 | return (Size == 8 || Size == 16); |
4410 | } |
4411 | |
4412 | return false; |
4413 | } |
4414 | |
4415 | EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL, |
4416 | LLVMContext &Context, |
4417 | EVT VT) const { |
4418 | if (!VT.isVector()) |
4419 | return getPointerTy(DL); |
4420 | return VT.changeVectorElementTypeToInteger(); |
4421 | } |
4422 | |
4423 | bool LoongArchTargetLowering::hasAndNot(SDValue Y) const { |
4424 | // TODO: Support vectors. |
4425 | return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Val: Y); |
4426 | } |
4427 | |
4428 | bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, |
4429 | const CallInst &I, |
4430 | MachineFunction &MF, |
4431 | unsigned Intrinsic) const { |
4432 | switch (Intrinsic) { |
4433 | default: |
4434 | return false; |
4435 | case Intrinsic::loongarch_masked_atomicrmw_xchg_i32: |
4436 | case Intrinsic::loongarch_masked_atomicrmw_add_i32: |
4437 | case Intrinsic::loongarch_masked_atomicrmw_sub_i32: |
4438 | case Intrinsic::loongarch_masked_atomicrmw_nand_i32: |
4439 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
4440 | Info.memVT = MVT::i32; |
4441 | Info.ptrVal = I.getArgOperand(i: 0); |
4442 | Info.offset = 0; |
4443 | Info.align = Align(4); |
4444 | Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | |
4445 | MachineMemOperand::MOVolatile; |
4446 | return true; |
4447 | // TODO: Add more Intrinsics later. |
4448 | } |
4449 | } |
4450 | |
4451 | TargetLowering::AtomicExpansionKind |
4452 | LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { |
4453 | // TODO: Add more AtomicRMWInst that needs to be extended. |
4454 | |
4455 | // Since floating-point operation requires a non-trivial set of data |
4456 | // operations, use CmpXChg to expand. |
4457 | if (AI->isFloatingPointOperation() || |
4458 | AI->getOperation() == AtomicRMWInst::UIncWrap || |
4459 | AI->getOperation() == AtomicRMWInst::UDecWrap) |
4460 | return AtomicExpansionKind::CmpXChg; |
4461 | |
4462 | unsigned Size = AI->getType()->getPrimitiveSizeInBits(); |
4463 | if (Size == 8 || Size == 16) |
4464 | return AtomicExpansionKind::MaskedIntrinsic; |
4465 | return AtomicExpansionKind::None; |
4466 | } |
4467 | |
4468 | static Intrinsic::ID |
4469 | getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, |
4470 | AtomicRMWInst::BinOp BinOp) { |
4471 | if (GRLen == 64) { |
4472 | switch (BinOp) { |
4473 | default: |
4474 | llvm_unreachable("Unexpected AtomicRMW BinOp" ); |
4475 | case AtomicRMWInst::Xchg: |
4476 | return Intrinsic::loongarch_masked_atomicrmw_xchg_i64; |
4477 | case AtomicRMWInst::Add: |
4478 | return Intrinsic::loongarch_masked_atomicrmw_add_i64; |
4479 | case AtomicRMWInst::Sub: |
4480 | return Intrinsic::loongarch_masked_atomicrmw_sub_i64; |
4481 | case AtomicRMWInst::Nand: |
4482 | return Intrinsic::loongarch_masked_atomicrmw_nand_i64; |
4483 | case AtomicRMWInst::UMax: |
4484 | return Intrinsic::loongarch_masked_atomicrmw_umax_i64; |
4485 | case AtomicRMWInst::UMin: |
4486 | return Intrinsic::loongarch_masked_atomicrmw_umin_i64; |
4487 | case AtomicRMWInst::Max: |
4488 | return Intrinsic::loongarch_masked_atomicrmw_max_i64; |
4489 | case AtomicRMWInst::Min: |
4490 | return Intrinsic::loongarch_masked_atomicrmw_min_i64; |
4491 | // TODO: support other AtomicRMWInst. |
4492 | } |
4493 | } |
4494 | |
4495 | if (GRLen == 32) { |
4496 | switch (BinOp) { |
4497 | default: |
4498 | llvm_unreachable("Unexpected AtomicRMW BinOp" ); |
4499 | case AtomicRMWInst::Xchg: |
4500 | return Intrinsic::loongarch_masked_atomicrmw_xchg_i32; |
4501 | case AtomicRMWInst::Add: |
4502 | return Intrinsic::loongarch_masked_atomicrmw_add_i32; |
4503 | case AtomicRMWInst::Sub: |
4504 | return Intrinsic::loongarch_masked_atomicrmw_sub_i32; |
4505 | case AtomicRMWInst::Nand: |
4506 | return Intrinsic::loongarch_masked_atomicrmw_nand_i32; |
4507 | // TODO: support other AtomicRMWInst. |
4508 | } |
4509 | } |
4510 | |
4511 | llvm_unreachable("Unexpected GRLen\n" ); |
4512 | } |
4513 | |
4514 | TargetLowering::AtomicExpansionKind |
4515 | LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR( |
4516 | AtomicCmpXchgInst *CI) const { |
4517 | unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); |
4518 | if (Size == 8 || Size == 16) |
4519 | return AtomicExpansionKind::MaskedIntrinsic; |
4520 | return AtomicExpansionKind::None; |
4521 | } |
4522 | |
4523 | Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( |
4524 | IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, |
4525 | Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { |
4526 | AtomicOrdering FailOrd = CI->getFailureOrdering(); |
4527 | Value *FailureOrdering = |
4528 | Builder.getIntN(N: Subtarget.getGRLen(), C: static_cast<uint64_t>(FailOrd)); |
4529 | |
4530 | // TODO: Support cmpxchg on LA32. |
4531 | Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64; |
4532 | CmpVal = Builder.CreateSExt(V: CmpVal, DestTy: Builder.getInt64Ty()); |
4533 | NewVal = Builder.CreateSExt(V: NewVal, DestTy: Builder.getInt64Ty()); |
4534 | Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty()); |
4535 | Type *Tys[] = {AlignedAddr->getType()}; |
4536 | Function *MaskedCmpXchg = |
4537 | Intrinsic::getDeclaration(M: CI->getModule(), id: CmpXchgIntrID, Tys); |
4538 | Value *Result = Builder.CreateCall( |
4539 | Callee: MaskedCmpXchg, Args: {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering}); |
4540 | Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty()); |
4541 | return Result; |
4542 | } |
4543 | |
4544 | Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic( |
4545 | IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, |
4546 | Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { |
4547 | // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace |
4548 | // the atomic instruction with an AtomicRMWInst::And/Or with appropriate |
4549 | // mask, as this produces better code than the LL/SC loop emitted by |
4550 | // int_loongarch_masked_atomicrmw_xchg. |
4551 | if (AI->getOperation() == AtomicRMWInst::Xchg && |
4552 | isa<ConstantInt>(Val: AI->getValOperand())) { |
4553 | ConstantInt *CVal = cast<ConstantInt>(Val: AI->getValOperand()); |
4554 | if (CVal->isZero()) |
4555 | return Builder.CreateAtomicRMW(Op: AtomicRMWInst::And, Ptr: AlignedAddr, |
4556 | Val: Builder.CreateNot(V: Mask, Name: "Inv_Mask" ), |
4557 | Align: AI->getAlign(), Ordering: Ord); |
4558 | if (CVal->isMinusOne()) |
4559 | return Builder.CreateAtomicRMW(Op: AtomicRMWInst::Or, Ptr: AlignedAddr, Val: Mask, |
4560 | Align: AI->getAlign(), Ordering: Ord); |
4561 | } |
4562 | |
4563 | unsigned GRLen = Subtarget.getGRLen(); |
4564 | Value *Ordering = |
4565 | Builder.getIntN(N: GRLen, C: static_cast<uint64_t>(AI->getOrdering())); |
4566 | Type *Tys[] = {AlignedAddr->getType()}; |
4567 | Function *LlwOpScwLoop = Intrinsic::getDeclaration( |
4568 | M: AI->getModule(), |
4569 | id: getIntrinsicForMaskedAtomicRMWBinOp(GRLen, BinOp: AI->getOperation()), Tys); |
4570 | |
4571 | if (GRLen == 64) { |
4572 | Incr = Builder.CreateSExt(V: Incr, DestTy: Builder.getInt64Ty()); |
4573 | Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty()); |
4574 | ShiftAmt = Builder.CreateSExt(V: ShiftAmt, DestTy: Builder.getInt64Ty()); |
4575 | } |
4576 | |
4577 | Value *Result; |
4578 | |
4579 | // Must pass the shift amount needed to sign extend the loaded value prior |
4580 | // to performing a signed comparison for min/max. ShiftAmt is the number of |
4581 | // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which |
4582 | // is the number of bits to left+right shift the value in order to |
4583 | // sign-extend. |
4584 | if (AI->getOperation() == AtomicRMWInst::Min || |
4585 | AI->getOperation() == AtomicRMWInst::Max) { |
4586 | const DataLayout &DL = AI->getModule()->getDataLayout(); |
4587 | unsigned ValWidth = |
4588 | DL.getTypeStoreSizeInBits(Ty: AI->getValOperand()->getType()); |
4589 | Value *SextShamt = |
4590 | Builder.CreateSub(LHS: Builder.getIntN(N: GRLen, C: GRLen - ValWidth), RHS: ShiftAmt); |
4591 | Result = Builder.CreateCall(Callee: LlwOpScwLoop, |
4592 | Args: {AlignedAddr, Incr, Mask, SextShamt, Ordering}); |
4593 | } else { |
4594 | Result = |
4595 | Builder.CreateCall(Callee: LlwOpScwLoop, Args: {AlignedAddr, Incr, Mask, Ordering}); |
4596 | } |
4597 | |
4598 | if (GRLen == 64) |
4599 | Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty()); |
4600 | return Result; |
4601 | } |
4602 | |
4603 | bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd( |
4604 | const MachineFunction &MF, EVT VT) const { |
4605 | VT = VT.getScalarType(); |
4606 | |
4607 | if (!VT.isSimple()) |
4608 | return false; |
4609 | |
4610 | switch (VT.getSimpleVT().SimpleTy) { |
4611 | case MVT::f32: |
4612 | case MVT::f64: |
4613 | return true; |
4614 | default: |
4615 | break; |
4616 | } |
4617 | |
4618 | return false; |
4619 | } |
4620 | |
4621 | Register LoongArchTargetLowering::getExceptionPointerRegister( |
4622 | const Constant *PersonalityFn) const { |
4623 | return LoongArch::R4; |
4624 | } |
4625 | |
4626 | Register LoongArchTargetLowering::getExceptionSelectorRegister( |
4627 | const Constant *PersonalityFn) const { |
4628 | return LoongArch::R5; |
4629 | } |
4630 | |
4631 | //===----------------------------------------------------------------------===// |
4632 | // LoongArch Inline Assembly Support |
4633 | //===----------------------------------------------------------------------===// |
4634 | |
4635 | LoongArchTargetLowering::ConstraintType |
4636 | LoongArchTargetLowering::getConstraintType(StringRef Constraint) const { |
4637 | // LoongArch specific constraints in GCC: config/loongarch/constraints.md |
4638 | // |
4639 | // 'f': A floating-point register (if available). |
4640 | // 'k': A memory operand whose address is formed by a base register and |
4641 | // (optionally scaled) index register. |
4642 | // 'l': A signed 16-bit constant. |
4643 | // 'm': A memory operand whose address is formed by a base register and |
4644 | // offset that is suitable for use in instructions with the same |
4645 | // addressing mode as st.w and ld.w. |
4646 | // 'I': A signed 12-bit constant (for arithmetic instructions). |
4647 | // 'J': Integer zero. |
4648 | // 'K': An unsigned 12-bit constant (for logic instructions). |
4649 | // "ZB": An address that is held in a general-purpose register. The offset is |
4650 | // zero. |
4651 | // "ZC": A memory operand whose address is formed by a base register and |
4652 | // offset that is suitable for use in instructions with the same |
4653 | // addressing mode as ll.w and sc.w. |
4654 | if (Constraint.size() == 1) { |
4655 | switch (Constraint[0]) { |
4656 | default: |
4657 | break; |
4658 | case 'f': |
4659 | return C_RegisterClass; |
4660 | case 'l': |
4661 | case 'I': |
4662 | case 'J': |
4663 | case 'K': |
4664 | return C_Immediate; |
4665 | case 'k': |
4666 | return C_Memory; |
4667 | } |
4668 | } |
4669 | |
4670 | if (Constraint == "ZC" || Constraint == "ZB" ) |
4671 | return C_Memory; |
4672 | |
4673 | // 'm' is handled here. |
4674 | return TargetLowering::getConstraintType(Constraint); |
4675 | } |
4676 | |
4677 | InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint( |
4678 | StringRef ConstraintCode) const { |
4679 | return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode) |
4680 | .Case(S: "k" , Value: InlineAsm::ConstraintCode::k) |
4681 | .Case(S: "ZB" , Value: InlineAsm::ConstraintCode::ZB) |
4682 | .Case(S: "ZC" , Value: InlineAsm::ConstraintCode::ZC) |
4683 | .Default(Value: TargetLowering::getInlineAsmMemConstraint(ConstraintCode)); |
4684 | } |
4685 | |
4686 | std::pair<unsigned, const TargetRegisterClass *> |
4687 | LoongArchTargetLowering::getRegForInlineAsmConstraint( |
4688 | const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { |
4689 | // First, see if this is a constraint that directly corresponds to a LoongArch |
4690 | // register class. |
4691 | if (Constraint.size() == 1) { |
4692 | switch (Constraint[0]) { |
4693 | case 'r': |
4694 | // TODO: Support fixed vectors up to GRLen? |
4695 | if (VT.isVector()) |
4696 | break; |
4697 | return std::make_pair(0U, &LoongArch::GPRRegClass); |
4698 | case 'f': |
4699 | if (Subtarget.hasBasicF() && VT == MVT::f32) |
4700 | return std::make_pair(0U, &LoongArch::FPR32RegClass); |
4701 | if (Subtarget.hasBasicD() && VT == MVT::f64) |
4702 | return std::make_pair(0U, &LoongArch::FPR64RegClass); |
4703 | if (Subtarget.hasExtLSX() && |
4704 | TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT)) |
4705 | return std::make_pair(0U, &LoongArch::LSX128RegClass); |
4706 | if (Subtarget.hasExtLASX() && |
4707 | TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT)) |
4708 | return std::make_pair(0U, &LoongArch::LASX256RegClass); |
4709 | break; |
4710 | default: |
4711 | break; |
4712 | } |
4713 | } |
4714 | |
4715 | // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen |
4716 | // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm |
4717 | // constraints while the official register name is prefixed with a '$'. So we |
4718 | // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.) |
4719 | // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is |
4720 | // case insensitive, so no need to convert the constraint to upper case here. |
4721 | // |
4722 | // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly |
4723 | // decode the usage of register name aliases into their official names. And |
4724 | // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use |
4725 | // official register names. |
4726 | if (Constraint.starts_with(Prefix: "{$r" ) || Constraint.starts_with(Prefix: "{$f" ) || |
4727 | Constraint.starts_with(Prefix: "{$vr" ) || Constraint.starts_with(Prefix: "{$xr" )) { |
4728 | bool IsFP = Constraint[2] == 'f'; |
4729 | std::pair<StringRef, StringRef> Temp = Constraint.split(Separator: '$'); |
4730 | std::pair<unsigned, const TargetRegisterClass *> R; |
4731 | R = TargetLowering::getRegForInlineAsmConstraint( |
4732 | TRI, Constraint: join_items(Separator: "" , Items&: Temp.first, Items&: Temp.second), VT); |
4733 | // Match those names to the widest floating point register type available. |
4734 | if (IsFP) { |
4735 | unsigned RegNo = R.first; |
4736 | if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) { |
4737 | if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) { |
4738 | unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64; |
4739 | return std::make_pair(DReg, &LoongArch::FPR64RegClass); |
4740 | } |
4741 | } |
4742 | } |
4743 | return R; |
4744 | } |
4745 | |
4746 | return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); |
4747 | } |
4748 | |
4749 | void LoongArchTargetLowering::LowerAsmOperandForConstraint( |
4750 | SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, |
4751 | SelectionDAG &DAG) const { |
4752 | // Currently only support length 1 constraints. |
4753 | if (Constraint.size() == 1) { |
4754 | switch (Constraint[0]) { |
4755 | case 'l': |
4756 | // Validate & create a 16-bit signed immediate operand. |
4757 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) { |
4758 | uint64_t CVal = C->getSExtValue(); |
4759 | if (isInt<16>(x: CVal)) |
4760 | Ops.push_back( |
4761 | x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getGRLenVT())); |
4762 | } |
4763 | return; |
4764 | case 'I': |
4765 | // Validate & create a 12-bit signed immediate operand. |
4766 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) { |
4767 | uint64_t CVal = C->getSExtValue(); |
4768 | if (isInt<12>(x: CVal)) |
4769 | Ops.push_back( |
4770 | x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getGRLenVT())); |
4771 | } |
4772 | return; |
4773 | case 'J': |
4774 | // Validate & create an integer zero operand. |
4775 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) |
4776 | if (C->getZExtValue() == 0) |
4777 | Ops.push_back( |
4778 | x: DAG.getTargetConstant(Val: 0, DL: SDLoc(Op), VT: Subtarget.getGRLenVT())); |
4779 | return; |
4780 | case 'K': |
4781 | // Validate & create a 12-bit unsigned immediate operand. |
4782 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) { |
4783 | uint64_t CVal = C->getZExtValue(); |
4784 | if (isUInt<12>(x: CVal)) |
4785 | Ops.push_back( |
4786 | x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getGRLenVT())); |
4787 | } |
4788 | return; |
4789 | default: |
4790 | break; |
4791 | } |
4792 | } |
4793 | TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); |
4794 | } |
4795 | |
4796 | #define GET_REGISTER_MATCHER |
4797 | #include "LoongArchGenAsmMatcher.inc" |
4798 | |
4799 | Register |
4800 | LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT, |
4801 | const MachineFunction &MF) const { |
4802 | std::pair<StringRef, StringRef> Name = StringRef(RegName).split(Separator: '$'); |
4803 | std::string NewRegName = Name.second.str(); |
4804 | Register Reg = MatchRegisterAltName(NewRegName); |
4805 | if (Reg == LoongArch::NoRegister) |
4806 | Reg = MatchRegisterName(NewRegName); |
4807 | if (Reg == LoongArch::NoRegister) |
4808 | report_fatal_error( |
4809 | reason: Twine("Invalid register name \"" + StringRef(RegName) + "\"." )); |
4810 | BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); |
4811 | if (!ReservedRegs.test(Idx: Reg)) |
4812 | report_fatal_error(reason: Twine("Trying to obtain non-reserved register \"" + |
4813 | StringRef(RegName) + "\"." )); |
4814 | return Reg; |
4815 | } |
4816 | |
4817 | bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context, |
4818 | EVT VT, SDValue C) const { |
4819 | // TODO: Support vectors. |
4820 | if (!VT.isScalarInteger()) |
4821 | return false; |
4822 | |
4823 | // Omit the optimization if the data size exceeds GRLen. |
4824 | if (VT.getSizeInBits() > Subtarget.getGRLen()) |
4825 | return false; |
4826 | |
4827 | if (auto *ConstNode = dyn_cast<ConstantSDNode>(Val: C.getNode())) { |
4828 | const APInt &Imm = ConstNode->getAPIntValue(); |
4829 | // Break MUL into (SLLI + ADD/SUB) or ALSL. |
4830 | if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || |
4831 | (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) |
4832 | return true; |
4833 | // Break MUL into (ALSL x, (SLLI x, imm0), imm1). |
4834 | if (ConstNode->hasOneUse() && |
4835 | ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() || |
4836 | (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2())) |
4837 | return true; |
4838 | // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)), |
4839 | // in which the immediate has two set bits. Or Break (MUL x, imm) |
4840 | // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate |
4841 | // equals to (1 << s0) - (1 << s1). |
4842 | if (ConstNode->hasOneUse() && !(Imm.sge(RHS: -2048) && Imm.sle(RHS: 4095))) { |
4843 | unsigned Shifts = Imm.countr_zero(); |
4844 | // Reject immediates which can be composed via a single LUI. |
4845 | if (Shifts >= 12) |
4846 | return false; |
4847 | // Reject multiplications can be optimized to |
4848 | // (SLLI (ALSL x, x, 1/2/3/4), s). |
4849 | APInt ImmPop = Imm.ashr(ShiftAmt: Shifts); |
4850 | if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17) |
4851 | return false; |
4852 | // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`, |
4853 | // since it needs one more instruction than other 3 cases. |
4854 | APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true); |
4855 | if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() || |
4856 | (ImmSmall - Imm).isPowerOf2()) |
4857 | return true; |
4858 | } |
4859 | } |
4860 | |
4861 | return false; |
4862 | } |
4863 | |
4864 | bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL, |
4865 | const AddrMode &AM, |
4866 | Type *Ty, unsigned AS, |
4867 | Instruction *I) const { |
4868 | // LoongArch has four basic addressing modes: |
4869 | // 1. reg |
4870 | // 2. reg + 12-bit signed offset |
4871 | // 3. reg + 14-bit signed offset left-shifted by 2 |
4872 | // 4. reg1 + reg2 |
4873 | // TODO: Add more checks after support vector extension. |
4874 | |
4875 | // No global is ever allowed as a base. |
4876 | if (AM.BaseGV) |
4877 | return false; |
4878 | |
4879 | // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2 |
4880 | // with `UAL` feature. |
4881 | if (!isInt<12>(x: AM.BaseOffs) && |
4882 | !(isShiftedInt<14, 2>(x: AM.BaseOffs) && Subtarget.hasUAL())) |
4883 | return false; |
4884 | |
4885 | switch (AM.Scale) { |
4886 | case 0: |
4887 | // "r+i" or just "i", depending on HasBaseReg. |
4888 | break; |
4889 | case 1: |
4890 | // "r+r+i" is not allowed. |
4891 | if (AM.HasBaseReg && AM.BaseOffs) |
4892 | return false; |
4893 | // Otherwise we have "r+r" or "r+i". |
4894 | break; |
4895 | case 2: |
4896 | // "2*r+r" or "2*r+i" is not allowed. |
4897 | if (AM.HasBaseReg || AM.BaseOffs) |
4898 | return false; |
4899 | // Allow "2*r" as "r+r". |
4900 | break; |
4901 | default: |
4902 | return false; |
4903 | } |
4904 | |
4905 | return true; |
4906 | } |
4907 | |
4908 | bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const { |
4909 | return isInt<12>(x: Imm); |
4910 | } |
4911 | |
4912 | bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const { |
4913 | return isInt<12>(x: Imm); |
4914 | } |
4915 | |
4916 | bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { |
4917 | // Zexts are free if they can be combined with a load. |
4918 | // Don't advertise i32->i64 zextload as being free for LA64. It interacts |
4919 | // poorly with type legalization of compares preferring sext. |
4920 | if (auto *LD = dyn_cast<LoadSDNode>(Val)) { |
4921 | EVT MemVT = LD->getMemoryVT(); |
4922 | if ((MemVT == MVT::i8 || MemVT == MVT::i16) && |
4923 | (LD->getExtensionType() == ISD::NON_EXTLOAD || |
4924 | LD->getExtensionType() == ISD::ZEXTLOAD)) |
4925 | return true; |
4926 | } |
4927 | |
4928 | return TargetLowering::isZExtFree(Val, VT2); |
4929 | } |
4930 | |
4931 | bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { |
4932 | return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; |
4933 | } |
4934 | |
4935 | bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const { |
4936 | // TODO: Support vectors. |
4937 | if (Y.getValueType().isVector()) |
4938 | return false; |
4939 | |
4940 | return !isa<ConstantSDNode>(Val: Y); |
4941 | } |
4942 | |
4943 | ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const { |
4944 | // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension. |
4945 | return ISD::SIGN_EXTEND; |
4946 | } |
4947 | |