1 | //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the interfaces that AArch64 uses to lower LLVM code into a |
10 | // selection DAG. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H |
15 | #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H |
16 | |
17 | #include "AArch64.h" |
18 | #include "Utils/AArch64SMEAttributes.h" |
19 | #include "llvm/CodeGen/CallingConvLower.h" |
20 | #include "llvm/CodeGen/MachineFunction.h" |
21 | #include "llvm/CodeGen/SelectionDAG.h" |
22 | #include "llvm/CodeGen/TargetLowering.h" |
23 | #include "llvm/IR/CallingConv.h" |
24 | #include "llvm/IR/Instruction.h" |
25 | |
26 | namespace llvm { |
27 | |
28 | namespace AArch64ISD { |
29 | |
30 | // For predicated nodes where the result is a vector, the operation is |
31 | // controlled by a governing predicate and the inactive lanes are explicitly |
32 | // defined with a value, please stick the following naming convention: |
33 | // |
34 | // _MERGE_OP<n> The result value is a vector with inactive lanes equal |
35 | // to source operand OP<n>. |
36 | // |
37 | // _MERGE_ZERO The result value is a vector with inactive lanes |
38 | // actively zeroed. |
39 | // |
40 | // _MERGE_PASSTHRU The result value is a vector with inactive lanes equal |
41 | // to the last source operand which only purpose is being |
42 | // a passthru value. |
43 | // |
44 | // For other cases where no explicit action is needed to set the inactive lanes, |
45 | // or when the result is not a vector and it is needed or helpful to |
46 | // distinguish a node from similar unpredicated nodes, use: |
47 | // |
48 | // _PRED |
49 | // |
50 | enum NodeType : unsigned { |
51 | FIRST_NUMBER = ISD::BUILTIN_OP_END, |
52 | WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses. |
53 | CALL, // Function call. |
54 | |
55 | // Pseudo for a OBJC call that gets emitted together with a special `mov |
56 | // x29, x29` marker instruction. |
57 | CALL_RVMARKER, |
58 | |
59 | CALL_BTI, // Function call followed by a BTI instruction. |
60 | |
61 | COALESCER_BARRIER, |
62 | |
63 | SMSTART, |
64 | SMSTOP, |
65 | RESTORE_ZA, |
66 | RESTORE_ZT, |
67 | SAVE_ZT, |
68 | |
69 | // A call with the callee in x16, i.e. "blr x16". |
70 | CALL_ARM64EC_TO_X64, |
71 | |
72 | // Produces the full sequence of instructions for getting the thread pointer |
73 | // offset of a variable into X0, using the TLSDesc model. |
74 | TLSDESC_CALLSEQ, |
75 | ADRP, // Page address of a TargetGlobalAddress operand. |
76 | ADR, // ADR |
77 | ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand. |
78 | LOADgot, // Load from automatically generated descriptor (e.g. Global |
79 | // Offset Table, TLS record). |
80 | RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand. |
81 | BRCOND, // Conditional branch instruction; "b.cond". |
82 | CSEL, |
83 | CSINV, // Conditional select invert. |
84 | CSNEG, // Conditional select negate. |
85 | CSINC, // Conditional select increment. |
86 | |
87 | // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on |
88 | // ELF. |
89 | THREAD_POINTER, |
90 | ADC, |
91 | SBC, // adc, sbc instructions |
92 | |
93 | // To avoid stack clash, allocation is performed by block and each block is |
94 | // probed. |
95 | PROBED_ALLOCA, |
96 | |
97 | // Predicated instructions where inactive lanes produce undefined results. |
98 | ABDS_PRED, |
99 | ABDU_PRED, |
100 | FADD_PRED, |
101 | FDIV_PRED, |
102 | FMA_PRED, |
103 | FMAX_PRED, |
104 | FMAXNM_PRED, |
105 | FMIN_PRED, |
106 | FMINNM_PRED, |
107 | FMUL_PRED, |
108 | FSUB_PRED, |
109 | HADDS_PRED, |
110 | HADDU_PRED, |
111 | MUL_PRED, |
112 | MULHS_PRED, |
113 | MULHU_PRED, |
114 | RHADDS_PRED, |
115 | RHADDU_PRED, |
116 | SDIV_PRED, |
117 | SHL_PRED, |
118 | SMAX_PRED, |
119 | SMIN_PRED, |
120 | SRA_PRED, |
121 | SRL_PRED, |
122 | UDIV_PRED, |
123 | UMAX_PRED, |
124 | UMIN_PRED, |
125 | |
126 | // Unpredicated vector instructions |
127 | BIC, |
128 | |
129 | SRAD_MERGE_OP1, |
130 | |
131 | // Predicated instructions with the result of inactive lanes provided by the |
132 | // last operand. |
133 | FABS_MERGE_PASSTHRU, |
134 | FCEIL_MERGE_PASSTHRU, |
135 | FFLOOR_MERGE_PASSTHRU, |
136 | FNEARBYINT_MERGE_PASSTHRU, |
137 | FNEG_MERGE_PASSTHRU, |
138 | FRECPX_MERGE_PASSTHRU, |
139 | FRINT_MERGE_PASSTHRU, |
140 | FROUND_MERGE_PASSTHRU, |
141 | FROUNDEVEN_MERGE_PASSTHRU, |
142 | FSQRT_MERGE_PASSTHRU, |
143 | FTRUNC_MERGE_PASSTHRU, |
144 | FP_ROUND_MERGE_PASSTHRU, |
145 | FP_EXTEND_MERGE_PASSTHRU, |
146 | UINT_TO_FP_MERGE_PASSTHRU, |
147 | SINT_TO_FP_MERGE_PASSTHRU, |
148 | FCVTZU_MERGE_PASSTHRU, |
149 | FCVTZS_MERGE_PASSTHRU, |
150 | SIGN_EXTEND_INREG_MERGE_PASSTHRU, |
151 | ZERO_EXTEND_INREG_MERGE_PASSTHRU, |
152 | ABS_MERGE_PASSTHRU, |
153 | NEG_MERGE_PASSTHRU, |
154 | |
155 | SETCC_MERGE_ZERO, |
156 | |
157 | // Arithmetic instructions which write flags. |
158 | ADDS, |
159 | SUBS, |
160 | ADCS, |
161 | SBCS, |
162 | ANDS, |
163 | |
164 | // Conditional compares. Operands: left,right,falsecc,cc,flags |
165 | CCMP, |
166 | CCMN, |
167 | FCCMP, |
168 | |
169 | // Floating point comparison |
170 | FCMP, |
171 | |
172 | // Scalar-to-vector duplication |
173 | DUP, |
174 | DUPLANE8, |
175 | DUPLANE16, |
176 | DUPLANE32, |
177 | DUPLANE64, |
178 | DUPLANE128, |
179 | |
180 | // Vector immedate moves |
181 | MOVI, |
182 | MOVIshift, |
183 | MOVIedit, |
184 | MOVImsl, |
185 | FMOV, |
186 | MVNIshift, |
187 | MVNImsl, |
188 | |
189 | // Vector immediate ops |
190 | BICi, |
191 | ORRi, |
192 | |
193 | // Vector bitwise select: similar to ISD::VSELECT but not all bits within an |
194 | // element must be identical. |
195 | BSP, |
196 | |
197 | // Vector shuffles |
198 | ZIP1, |
199 | ZIP2, |
200 | UZP1, |
201 | UZP2, |
202 | TRN1, |
203 | TRN2, |
204 | REV16, |
205 | REV32, |
206 | REV64, |
207 | EXT, |
208 | SPLICE, |
209 | |
210 | // Vector shift by scalar |
211 | VSHL, |
212 | VLSHR, |
213 | VASHR, |
214 | |
215 | // Vector shift by scalar (again) |
216 | SQSHL_I, |
217 | UQSHL_I, |
218 | SQSHLU_I, |
219 | SRSHR_I, |
220 | URSHR_I, |
221 | URSHR_I_PRED, |
222 | |
223 | // Vector narrowing shift by immediate (bottom) |
224 | RSHRNB_I, |
225 | |
226 | // Vector shift by constant and insert |
227 | VSLI, |
228 | VSRI, |
229 | |
230 | // Vector comparisons |
231 | CMEQ, |
232 | CMGE, |
233 | CMGT, |
234 | CMHI, |
235 | CMHS, |
236 | FCMEQ, |
237 | FCMGE, |
238 | FCMGT, |
239 | |
240 | // Vector zero comparisons |
241 | CMEQz, |
242 | CMGEz, |
243 | CMGTz, |
244 | CMLEz, |
245 | CMLTz, |
246 | FCMEQz, |
247 | FCMGEz, |
248 | FCMGTz, |
249 | FCMLEz, |
250 | FCMLTz, |
251 | |
252 | // Round wide FP to narrow FP with inexact results to odd. |
253 | FCVTXN, |
254 | |
255 | // Vector across-lanes addition |
256 | // Only the lower result lane is defined. |
257 | SADDV, |
258 | UADDV, |
259 | |
260 | // Unsigned sum Long across Vector |
261 | UADDLV, |
262 | SADDLV, |
263 | |
264 | // Add Pairwise of two vectors |
265 | ADDP, |
266 | // Add Long Pairwise |
267 | SADDLP, |
268 | UADDLP, |
269 | |
270 | // udot/sdot instructions |
271 | UDOT, |
272 | SDOT, |
273 | |
274 | // Vector across-lanes min/max |
275 | // Only the lower result lane is defined. |
276 | SMINV, |
277 | UMINV, |
278 | SMAXV, |
279 | UMAXV, |
280 | |
281 | SADDV_PRED, |
282 | UADDV_PRED, |
283 | SMAXV_PRED, |
284 | UMAXV_PRED, |
285 | SMINV_PRED, |
286 | UMINV_PRED, |
287 | ORV_PRED, |
288 | EORV_PRED, |
289 | ANDV_PRED, |
290 | |
291 | // Compare-and-branch |
292 | CBZ, |
293 | CBNZ, |
294 | TBZ, |
295 | TBNZ, |
296 | |
297 | // Tail calls |
298 | TC_RETURN, |
299 | |
300 | // Custom prefetch handling |
301 | PREFETCH, |
302 | |
303 | // {s|u}int to FP within a FP register. |
304 | SITOF, |
305 | UITOF, |
306 | |
307 | /// Natural vector cast. ISD::BITCAST is not natural in the big-endian |
308 | /// world w.r.t vectors; which causes additional REV instructions to be |
309 | /// generated to compensate for the byte-swapping. But sometimes we do |
310 | /// need to re-interpret the data in SIMD vector registers in big-endian |
311 | /// mode without emitting such REV instructions. |
312 | NVCAST, |
313 | |
314 | MRS, // MRS, also sets the flags via a glue. |
315 | |
316 | SMULL, |
317 | UMULL, |
318 | |
319 | PMULL, |
320 | |
321 | // Reciprocal estimates and steps. |
322 | FRECPE, |
323 | FRECPS, |
324 | FRSQRTE, |
325 | FRSQRTS, |
326 | |
327 | SUNPKHI, |
328 | SUNPKLO, |
329 | UUNPKHI, |
330 | UUNPKLO, |
331 | |
332 | CLASTA_N, |
333 | CLASTB_N, |
334 | LASTA, |
335 | LASTB, |
336 | TBL, |
337 | |
338 | // Floating-point reductions. |
339 | FADDA_PRED, |
340 | FADDV_PRED, |
341 | FMAXV_PRED, |
342 | FMAXNMV_PRED, |
343 | FMINV_PRED, |
344 | FMINNMV_PRED, |
345 | |
346 | INSR, |
347 | PTEST, |
348 | PTEST_ANY, |
349 | PTRUE, |
350 | |
351 | CTTZ_ELTS, |
352 | |
353 | BITREVERSE_MERGE_PASSTHRU, |
354 | BSWAP_MERGE_PASSTHRU, |
355 | REVH_MERGE_PASSTHRU, |
356 | REVW_MERGE_PASSTHRU, |
357 | CTLZ_MERGE_PASSTHRU, |
358 | CTPOP_MERGE_PASSTHRU, |
359 | DUP_MERGE_PASSTHRU, |
360 | INDEX_VECTOR, |
361 | |
362 | // Cast between vectors of the same element type but differ in length. |
363 | REINTERPRET_CAST, |
364 | |
365 | // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa |
366 | LS64_BUILD, |
367 | , |
368 | |
369 | LD1_MERGE_ZERO, |
370 | LD1S_MERGE_ZERO, |
371 | LDNF1_MERGE_ZERO, |
372 | LDNF1S_MERGE_ZERO, |
373 | LDFF1_MERGE_ZERO, |
374 | LDFF1S_MERGE_ZERO, |
375 | LD1RQ_MERGE_ZERO, |
376 | LD1RO_MERGE_ZERO, |
377 | |
378 | // Structured loads. |
379 | SVE_LD2_MERGE_ZERO, |
380 | SVE_LD3_MERGE_ZERO, |
381 | SVE_LD4_MERGE_ZERO, |
382 | |
383 | // Unsigned gather loads. |
384 | GLD1_MERGE_ZERO, |
385 | GLD1_SCALED_MERGE_ZERO, |
386 | GLD1_UXTW_MERGE_ZERO, |
387 | GLD1_SXTW_MERGE_ZERO, |
388 | GLD1_UXTW_SCALED_MERGE_ZERO, |
389 | GLD1_SXTW_SCALED_MERGE_ZERO, |
390 | GLD1_IMM_MERGE_ZERO, |
391 | GLD1Q_MERGE_ZERO, |
392 | GLD1Q_INDEX_MERGE_ZERO, |
393 | |
394 | // Signed gather loads |
395 | GLD1S_MERGE_ZERO, |
396 | GLD1S_SCALED_MERGE_ZERO, |
397 | GLD1S_UXTW_MERGE_ZERO, |
398 | GLD1S_SXTW_MERGE_ZERO, |
399 | GLD1S_UXTW_SCALED_MERGE_ZERO, |
400 | GLD1S_SXTW_SCALED_MERGE_ZERO, |
401 | GLD1S_IMM_MERGE_ZERO, |
402 | |
403 | // Unsigned gather loads. |
404 | GLDFF1_MERGE_ZERO, |
405 | GLDFF1_SCALED_MERGE_ZERO, |
406 | GLDFF1_UXTW_MERGE_ZERO, |
407 | GLDFF1_SXTW_MERGE_ZERO, |
408 | GLDFF1_UXTW_SCALED_MERGE_ZERO, |
409 | GLDFF1_SXTW_SCALED_MERGE_ZERO, |
410 | GLDFF1_IMM_MERGE_ZERO, |
411 | |
412 | // Signed gather loads. |
413 | GLDFF1S_MERGE_ZERO, |
414 | GLDFF1S_SCALED_MERGE_ZERO, |
415 | GLDFF1S_UXTW_MERGE_ZERO, |
416 | GLDFF1S_SXTW_MERGE_ZERO, |
417 | GLDFF1S_UXTW_SCALED_MERGE_ZERO, |
418 | GLDFF1S_SXTW_SCALED_MERGE_ZERO, |
419 | GLDFF1S_IMM_MERGE_ZERO, |
420 | |
421 | // Non-temporal gather loads |
422 | GLDNT1_MERGE_ZERO, |
423 | GLDNT1_INDEX_MERGE_ZERO, |
424 | GLDNT1S_MERGE_ZERO, |
425 | |
426 | // Contiguous masked store. |
427 | ST1_PRED, |
428 | |
429 | // Scatter store |
430 | SST1_PRED, |
431 | SST1_SCALED_PRED, |
432 | SST1_UXTW_PRED, |
433 | SST1_SXTW_PRED, |
434 | SST1_UXTW_SCALED_PRED, |
435 | SST1_SXTW_SCALED_PRED, |
436 | SST1_IMM_PRED, |
437 | SST1Q_PRED, |
438 | SST1Q_INDEX_PRED, |
439 | |
440 | // Non-temporal scatter store |
441 | SSTNT1_PRED, |
442 | SSTNT1_INDEX_PRED, |
443 | |
444 | // SME |
445 | RDSVL, |
446 | REVD_MERGE_PASSTHRU, |
447 | |
448 | // Asserts that a function argument (i32) is zero-extended to i8 by |
449 | // the caller |
450 | ASSERT_ZEXT_BOOL, |
451 | |
452 | // 128-bit system register accesses |
453 | // lo64, hi64, chain = MRRS(chain, sysregname) |
454 | MRRS, |
455 | // chain = MSRR(chain, sysregname, lo64, hi64) |
456 | MSRR, |
457 | |
458 | // Strict (exception-raising) floating point comparison |
459 | STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, |
460 | STRICT_FCMPE, |
461 | |
462 | // SME ZA loads and stores |
463 | SME_ZA_LDR, |
464 | SME_ZA_STR, |
465 | |
466 | // NEON Load/Store with post-increment base updates |
467 | LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE, |
468 | LD3post, |
469 | LD4post, |
470 | ST2post, |
471 | ST3post, |
472 | ST4post, |
473 | LD1x2post, |
474 | LD1x3post, |
475 | LD1x4post, |
476 | ST1x2post, |
477 | ST1x3post, |
478 | ST1x4post, |
479 | LD1DUPpost, |
480 | LD2DUPpost, |
481 | LD3DUPpost, |
482 | LD4DUPpost, |
483 | LD1LANEpost, |
484 | LD2LANEpost, |
485 | LD3LANEpost, |
486 | LD4LANEpost, |
487 | ST2LANEpost, |
488 | ST3LANEpost, |
489 | ST4LANEpost, |
490 | |
491 | STG, |
492 | STZG, |
493 | ST2G, |
494 | STZ2G, |
495 | |
496 | LDP, |
497 | LDIAPP, |
498 | LDNP, |
499 | STP, |
500 | STILP, |
501 | STNP, |
502 | |
503 | // Memory Operations |
504 | MOPS_MEMSET, |
505 | MOPS_MEMSET_TAGGING, |
506 | MOPS_MEMCOPY, |
507 | MOPS_MEMMOVE, |
508 | }; |
509 | |
510 | } // end namespace AArch64ISD |
511 | |
512 | namespace AArch64 { |
513 | /// Possible values of current rounding mode, which is specified in bits |
514 | /// 23:22 of FPCR. |
515 | enum Rounding { |
516 | RN = 0, // Round to Nearest |
517 | RP = 1, // Round towards Plus infinity |
518 | RM = 2, // Round towards Minus infinity |
519 | RZ = 3, // Round towards Zero |
520 | rmMask = 3 // Bit mask selecting rounding mode |
521 | }; |
522 | |
523 | // Bit position of rounding mode bits in FPCR. |
524 | const unsigned RoundingBitsPos = 22; |
525 | |
526 | // Registers used to pass function arguments. |
527 | ArrayRef<MCPhysReg> getGPRArgRegs(); |
528 | ArrayRef<MCPhysReg> getFPRArgRegs(); |
529 | |
530 | /// Maximum allowed number of unprobed bytes above SP at an ABI |
531 | /// boundary. |
532 | const unsigned StackProbeMaxUnprobedStack = 1024; |
533 | |
534 | /// Maximum number of iterations to unroll for a constant size probing loop. |
535 | const unsigned StackProbeMaxLoopUnroll = 4; |
536 | |
537 | } // namespace AArch64 |
538 | |
539 | class AArch64Subtarget; |
540 | |
541 | class AArch64TargetLowering : public TargetLowering { |
542 | public: |
543 | explicit AArch64TargetLowering(const TargetMachine &TM, |
544 | const AArch64Subtarget &STI); |
545 | |
546 | /// Control the following reassociation of operands: (op (op x, c1), y) -> (op |
547 | /// (op x, y), c1) where N0 is (op x, c1) and N1 is y. |
548 | bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, |
549 | SDValue N1) const override; |
550 | |
551 | /// Selects the correct CCAssignFn for a given CallingConvention value. |
552 | CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; |
553 | |
554 | /// Selects the correct CCAssignFn for a given CallingConvention value. |
555 | CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const; |
556 | |
557 | /// Determine which of the bits specified in Mask are known to be either zero |
558 | /// or one and return them in the KnownZero/KnownOne bitsets. |
559 | void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, |
560 | const APInt &DemandedElts, |
561 | const SelectionDAG &DAG, |
562 | unsigned Depth = 0) const override; |
563 | |
564 | unsigned ComputeNumSignBitsForTargetNode(SDValue Op, |
565 | const APInt &DemandedElts, |
566 | const SelectionDAG &DAG, |
567 | unsigned Depth) const override; |
568 | |
569 | MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override { |
570 | // Returning i64 unconditionally here (i.e. even for ILP32) means that the |
571 | // *DAG* representation of pointers will always be 64-bits. They will be |
572 | // truncated and extended when transferred to memory, but the 64-bit DAG |
573 | // allows us to use AArch64's addressing modes much more easily. |
574 | return MVT::getIntegerVT(BitWidth: 64); |
575 | } |
576 | |
577 | bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, |
578 | const APInt &DemandedElts, |
579 | TargetLoweringOpt &TLO) const override; |
580 | |
581 | MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; |
582 | |
583 | /// Returns true if the target allows unaligned memory accesses of the |
584 | /// specified type. |
585 | bool allowsMisalignedMemoryAccesses( |
586 | EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), |
587 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
588 | unsigned *Fast = nullptr) const override; |
589 | /// LLT variant. |
590 | bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace, |
591 | Align Alignment, |
592 | MachineMemOperand::Flags Flags, |
593 | unsigned *Fast = nullptr) const override; |
594 | |
595 | /// Provide custom lowering hooks for some operations. |
596 | SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; |
597 | |
598 | const char *getTargetNodeName(unsigned Opcode) const override; |
599 | |
600 | SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; |
601 | |
602 | /// This method returns a target specific FastISel object, or null if the |
603 | /// target does not support "fast" ISel. |
604 | FastISel *createFastISel(FunctionLoweringInfo &funcInfo, |
605 | const TargetLibraryInfo *libInfo) const override; |
606 | |
607 | bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; |
608 | |
609 | bool isFPImmLegal(const APFloat &Imm, EVT VT, |
610 | bool ForCodeSize) const override; |
611 | |
612 | /// Return true if the given shuffle mask can be codegen'd directly, or if it |
613 | /// should be stack expanded. |
614 | bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override; |
615 | |
616 | /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero' |
617 | /// shuffle mask can be codegen'd directly. |
618 | bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override; |
619 | |
620 | /// Return the ISD::SETCC ValueType. |
621 | EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, |
622 | EVT VT) const override; |
623 | |
624 | SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; |
625 | |
626 | MachineBasicBlock *EmitF128CSEL(MachineInstr &MI, |
627 | MachineBasicBlock *BB) const; |
628 | |
629 | MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, |
630 | MachineBasicBlock *BB) const; |
631 | |
632 | MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI, |
633 | MachineBasicBlock *MBB) const; |
634 | |
635 | MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg, |
636 | MachineInstr &MI, |
637 | MachineBasicBlock *BB) const; |
638 | MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const; |
639 | MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg, |
640 | MachineInstr &MI, MachineBasicBlock *BB, |
641 | bool HasTile) const; |
642 | MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB, |
643 | unsigned Opcode, bool Op0IsDef) const; |
644 | MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const; |
645 | |
646 | MachineBasicBlock * |
647 | EmitInstrWithCustomInserter(MachineInstr &MI, |
648 | MachineBasicBlock *MBB) const override; |
649 | |
650 | bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, |
651 | MachineFunction &MF, |
652 | unsigned Intrinsic) const override; |
653 | |
654 | bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, |
655 | EVT NewVT) const override; |
656 | |
657 | bool shouldRemoveRedundantExtend(SDValue Op) const override; |
658 | |
659 | bool isTruncateFree(Type *Ty1, Type *Ty2) const override; |
660 | bool isTruncateFree(EVT VT1, EVT VT2) const override; |
661 | |
662 | bool isProfitableToHoist(Instruction *I) const override; |
663 | |
664 | bool isZExtFree(Type *Ty1, Type *Ty2) const override; |
665 | bool isZExtFree(EVT VT1, EVT VT2) const override; |
666 | bool isZExtFree(SDValue Val, EVT VT2) const override; |
667 | |
668 | bool shouldSinkOperands(Instruction *I, |
669 | SmallVectorImpl<Use *> &Ops) const override; |
670 | |
671 | bool optimizeExtendOrTruncateConversion( |
672 | Instruction *I, Loop *L, const TargetTransformInfo &TTI) const override; |
673 | |
674 | bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override; |
675 | |
676 | unsigned getMaxSupportedInterleaveFactor() const override { return 4; } |
677 | |
678 | bool lowerInterleavedLoad(LoadInst *LI, |
679 | ArrayRef<ShuffleVectorInst *> Shuffles, |
680 | ArrayRef<unsigned> Indices, |
681 | unsigned Factor) const override; |
682 | bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, |
683 | unsigned Factor) const override; |
684 | |
685 | bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI, |
686 | LoadInst *LI) const override; |
687 | |
688 | bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, |
689 | StoreInst *SI) const override; |
690 | |
691 | bool isLegalAddImmediate(int64_t) const override; |
692 | bool isLegalAddScalableImmediate(int64_t) const override; |
693 | bool isLegalICmpImmediate(int64_t) const override; |
694 | |
695 | bool isMulAddWithConstProfitable(SDValue AddNode, |
696 | SDValue ConstNode) const override; |
697 | |
698 | bool shouldConsiderGEPOffsetSplit() const override; |
699 | |
700 | EVT getOptimalMemOpType(const MemOp &Op, |
701 | const AttributeList &FuncAttributes) const override; |
702 | |
703 | LLT getOptimalMemOpLLT(const MemOp &Op, |
704 | const AttributeList &FuncAttributes) const override; |
705 | |
706 | /// Return true if the addressing mode represented by AM is legal for this |
707 | /// target, for a load/store of the specified type. |
708 | bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, |
709 | unsigned AS, |
710 | Instruction *I = nullptr) const override; |
711 | |
712 | int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, |
713 | int64_t MaxOffset) const override; |
714 | |
715 | /// Return true if an FMA operation is faster than a pair of fmul and fadd |
716 | /// instructions. fmuladd intrinsics will be expanded to FMAs when this method |
717 | /// returns true, otherwise fmuladd is expanded to fmul + fadd. |
718 | bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
719 | EVT VT) const override; |
720 | bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override; |
721 | |
722 | bool generateFMAsInMachineCombiner(EVT VT, |
723 | CodeGenOptLevel OptLevel) const override; |
724 | |
725 | const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; |
726 | ArrayRef<MCPhysReg> getRoundingControlRegisters() const override; |
727 | |
728 | /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. |
729 | bool isDesirableToCommuteWithShift(const SDNode *N, |
730 | CombineLevel Level) const override; |
731 | |
732 | bool isDesirableToPullExtFromShl(const MachineInstr &MI) const override { |
733 | return false; |
734 | } |
735 | |
736 | /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. |
737 | bool isDesirableToCommuteXorWithShift(const SDNode *N) const override; |
738 | |
739 | /// Return true if it is profitable to fold a pair of shifts into a mask. |
740 | bool shouldFoldConstantShiftPairToMask(const SDNode *N, |
741 | CombineLevel Level) const override; |
742 | |
743 | bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, |
744 | EVT VT) const override; |
745 | |
746 | /// Returns true if it is beneficial to convert a load of a constant |
747 | /// to just the constant itself. |
748 | bool shouldConvertConstantLoadToIntImm(const APInt &Imm, |
749 | Type *Ty) const override; |
750 | |
751 | /// Return true if EXTRACT_SUBVECTOR is cheap for this result type |
752 | /// with this index. |
753 | bool (EVT ResVT, EVT SrcVT, |
754 | unsigned Index) const override; |
755 | |
756 | bool shouldFormOverflowOp(unsigned Opcode, EVT VT, |
757 | bool MathUsed) const override { |
758 | // Using overflow ops for overflow checks only should beneficial on |
759 | // AArch64. |
760 | return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed: true); |
761 | } |
762 | |
763 | Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, |
764 | AtomicOrdering Ord) const override; |
765 | Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, |
766 | AtomicOrdering Ord) const override; |
767 | |
768 | void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override; |
769 | |
770 | bool isOpSuitableForLDPSTP(const Instruction *I) const; |
771 | bool isOpSuitableForLSE128(const Instruction *I) const; |
772 | bool isOpSuitableForRCPC3(const Instruction *I) const; |
773 | bool shouldInsertFencesForAtomic(const Instruction *I) const override; |
774 | bool |
775 | shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override; |
776 | |
777 | TargetLoweringBase::AtomicExpansionKind |
778 | shouldExpandAtomicLoadInIR(LoadInst *LI) const override; |
779 | TargetLoweringBase::AtomicExpansionKind |
780 | shouldExpandAtomicStoreInIR(StoreInst *SI) const override; |
781 | TargetLoweringBase::AtomicExpansionKind |
782 | shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; |
783 | |
784 | TargetLoweringBase::AtomicExpansionKind |
785 | shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; |
786 | |
787 | bool useLoadStackGuardNode() const override; |
788 | TargetLoweringBase::LegalizeTypeAction |
789 | getPreferredVectorAction(MVT VT) const override; |
790 | |
791 | /// If the target has a standard location for the stack protector cookie, |
792 | /// returns the address of that location. Otherwise, returns nullptr. |
793 | Value *getIRStackGuard(IRBuilderBase &IRB) const override; |
794 | |
795 | void insertSSPDeclarations(Module &M) const override; |
796 | Value *getSDagStackGuard(const Module &M) const override; |
797 | Function *getSSPStackGuardCheck(const Module &M) const override; |
798 | |
799 | /// If the target has a standard location for the unsafe stack pointer, |
800 | /// returns the address of that location. Otherwise, returns nullptr. |
801 | Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override; |
802 | |
803 | /// If a physical register, this returns the register that receives the |
804 | /// exception address on entry to an EH pad. |
805 | Register |
806 | getExceptionPointerRegister(const Constant *PersonalityFn) const override { |
807 | // FIXME: This is a guess. Has this been defined yet? |
808 | return AArch64::X0; |
809 | } |
810 | |
811 | /// If a physical register, this returns the register that receives the |
812 | /// exception typeid on entry to a landing pad. |
813 | Register |
814 | getExceptionSelectorRegister(const Constant *PersonalityFn) const override { |
815 | // FIXME: This is a guess. Has this been defined yet? |
816 | return AArch64::X1; |
817 | } |
818 | |
819 | bool isIntDivCheap(EVT VT, AttributeList Attr) const override; |
820 | |
821 | bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, |
822 | const MachineFunction &MF) const override { |
823 | // Do not merge to float value size (128 bytes) if no implicit |
824 | // float attribute is set. |
825 | |
826 | bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat); |
827 | |
828 | if (NoFloat) |
829 | return (MemVT.getSizeInBits() <= 64); |
830 | return true; |
831 | } |
832 | |
833 | bool isCheapToSpeculateCttz(Type *) const override { |
834 | return true; |
835 | } |
836 | |
837 | bool isCheapToSpeculateCtlz(Type *) const override { |
838 | return true; |
839 | } |
840 | |
841 | bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; |
842 | |
843 | bool hasAndNotCompare(SDValue V) const override { |
844 | // We can use bics for any scalar. |
845 | return V.getValueType().isScalarInteger(); |
846 | } |
847 | |
848 | bool hasAndNot(SDValue Y) const override { |
849 | EVT VT = Y.getValueType(); |
850 | |
851 | if (!VT.isVector()) |
852 | return hasAndNotCompare(V: Y); |
853 | |
854 | TypeSize TS = VT.getSizeInBits(); |
855 | // TODO: We should be able to use bic/bif too for SVE. |
856 | return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic' |
857 | } |
858 | |
859 | bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
860 | SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, |
861 | unsigned OldShiftOpcode, unsigned NewShiftOpcode, |
862 | SelectionDAG &DAG) const override; |
863 | |
864 | ShiftLegalizationStrategy |
865 | preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, |
866 | unsigned ExpansionFactor) const override; |
867 | |
868 | bool shouldTransformSignedTruncationCheck(EVT XVT, |
869 | unsigned KeptBits) const override { |
870 | // For vectors, we don't have a preference.. |
871 | if (XVT.isVector()) |
872 | return false; |
873 | |
874 | auto VTIsOk = [](EVT VT) -> bool { |
875 | return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || |
876 | VT == MVT::i64; |
877 | }; |
878 | |
879 | // We are ok with KeptBitsVT being byte/word/dword, what SXT supports. |
880 | // XVT will be larger than KeptBitsVT. |
881 | MVT KeptBitsVT = MVT::getIntegerVT(BitWidth: KeptBits); |
882 | return VTIsOk(XVT) && VTIsOk(KeptBitsVT); |
883 | } |
884 | |
885 | bool preferIncOfAddToSubOfNot(EVT VT) const override; |
886 | |
887 | bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; |
888 | |
889 | bool isComplexDeinterleavingSupported() const override; |
890 | bool isComplexDeinterleavingOperationSupported( |
891 | ComplexDeinterleavingOperation Operation, Type *Ty) const override; |
892 | |
893 | Value *createComplexDeinterleavingIR( |
894 | IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, |
895 | ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, |
896 | Value *Accumulator = nullptr) const override; |
897 | |
898 | bool supportSplitCSR(MachineFunction *MF) const override { |
899 | return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && |
900 | MF->getFunction().hasFnAttribute(Attribute::NoUnwind); |
901 | } |
902 | void initializeSplitCSR(MachineBasicBlock *Entry) const override; |
903 | void insertCopiesSplitCSR( |
904 | MachineBasicBlock *Entry, |
905 | const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; |
906 | |
907 | bool supportSwiftError() const override { |
908 | return true; |
909 | } |
910 | |
911 | bool supportKCFIBundles() const override { return true; } |
912 | |
913 | MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, |
914 | MachineBasicBlock::instr_iterator &MBBI, |
915 | const TargetInstrInfo *TII) const override; |
916 | |
917 | /// Enable aggressive FMA fusion on targets that want it. |
918 | bool enableAggressiveFMAFusion(EVT VT) const override; |
919 | |
920 | /// Returns the size of the platform's va_list object. |
921 | unsigned getVaListSizeInBits(const DataLayout &DL) const override; |
922 | |
923 | /// Returns true if \p VecTy is a legal interleaved access type. This |
924 | /// function checks the vector element type and the overall width of the |
925 | /// vector. |
926 | bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, |
927 | bool &UseScalable) const; |
928 | |
929 | /// Returns the number of interleaved accesses that will be generated when |
930 | /// lowering accesses of the given type. |
931 | unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, |
932 | bool UseScalable) const; |
933 | |
934 | MachineMemOperand::Flags getTargetMMOFlags( |
935 | const Instruction &I) const override; |
936 | |
937 | bool functionArgumentNeedsConsecutiveRegisters( |
938 | Type *Ty, CallingConv::ID CallConv, bool isVarArg, |
939 | const DataLayout &DL) const override; |
940 | |
941 | /// Used for exception handling on Win64. |
942 | bool needsFixedCatchObjects() const override; |
943 | |
944 | bool fallBackToDAGISel(const Instruction &Inst) const override; |
945 | |
946 | /// SVE code generation for fixed length vectors does not custom lower |
947 | /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to |
948 | /// merge. However, merging them creates a BUILD_VECTOR that is just as |
949 | /// illegal as the original, thus leading to an infinite legalisation loop. |
950 | /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal |
951 | /// vector types this override can be removed. |
952 | bool mergeStoresAfterLegalization(EVT VT) const override; |
953 | |
954 | // If the platform/function should have a redzone, return the size in bytes. |
955 | unsigned getRedZoneSize(const Function &F) const { |
956 | if (F.hasFnAttribute(Attribute::NoRedZone)) |
957 | return 0; |
958 | return 128; |
959 | } |
960 | |
961 | bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const; |
962 | EVT getPromotedVTForPredicate(EVT VT) const; |
963 | |
964 | EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, |
965 | bool AllowUnknown = false) const override; |
966 | |
967 | bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override; |
968 | |
969 | bool shouldExpandCttzElements(EVT VT) const override; |
970 | |
971 | /// If a change in streaming mode is required on entry to/return from a |
972 | /// function call it emits and returns the corresponding SMSTART or SMSTOP |
973 | /// node. \p Condition should be one of the enum values from |
974 | /// AArch64SME::ToggleCondition. |
975 | SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable, |
976 | SDValue Chain, SDValue InGlue, unsigned Condition, |
977 | SDValue PStateSM = SDValue()) const; |
978 | |
979 | bool isVScaleKnownToBeAPowerOfTwo() const override { return true; } |
980 | |
981 | // Normally SVE is only used for byte size vectors that do not fit within a |
982 | // NEON vector. This changes when OverrideNEON is true, allowing SVE to be |
983 | // used for 64bit and 128bit vectors as well. |
984 | bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const; |
985 | |
986 | // Follow NEON ABI rules even when using SVE for fixed length vectors. |
987 | MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, |
988 | EVT VT) const override; |
989 | unsigned getNumRegistersForCallingConv(LLVMContext &Context, |
990 | CallingConv::ID CC, |
991 | EVT VT) const override; |
992 | unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, |
993 | CallingConv::ID CC, EVT VT, |
994 | EVT &IntermediateVT, |
995 | unsigned &NumIntermediates, |
996 | MVT &RegisterVT) const override; |
997 | |
998 | /// True if stack clash protection is enabled for this functions. |
999 | bool hasInlineStackProbe(const MachineFunction &MF) const override; |
1000 | |
1001 | #ifndef NDEBUG |
1002 | void verifyTargetSDNode(const SDNode *N) const override; |
1003 | #endif |
1004 | |
1005 | private: |
1006 | /// Keep a pointer to the AArch64Subtarget around so that we can |
1007 | /// make the right decision when generating code for different targets. |
1008 | const AArch64Subtarget *Subtarget; |
1009 | |
1010 | llvm::BumpPtrAllocator BumpAlloc; |
1011 | llvm::StringSaver Saver{BumpAlloc}; |
1012 | |
1013 | bool isExtFreeImpl(const Instruction *Ext) const override; |
1014 | |
1015 | void addTypeForNEON(MVT VT); |
1016 | void addTypeForFixedLengthSVE(MVT VT); |
1017 | void addDRTypeForNEON(MVT VT); |
1018 | void addQRTypeForNEON(MVT VT); |
1019 | |
1020 | unsigned allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL, |
1021 | SelectionDAG &DAG) const; |
1022 | |
1023 | SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, |
1024 | bool isVarArg, |
1025 | const SmallVectorImpl<ISD::InputArg> &Ins, |
1026 | const SDLoc &DL, SelectionDAG &DAG, |
1027 | SmallVectorImpl<SDValue> &InVals) const override; |
1028 | |
1029 | void AdjustInstrPostInstrSelection(MachineInstr &MI, |
1030 | SDNode *Node) const override; |
1031 | |
1032 | SDValue LowerCall(CallLoweringInfo & /*CLI*/, |
1033 | SmallVectorImpl<SDValue> &InVals) const override; |
1034 | |
1035 | SDValue LowerCallResult(SDValue Chain, SDValue InGlue, |
1036 | CallingConv::ID CallConv, bool isVarArg, |
1037 | const SmallVectorImpl<CCValAssign> &RVLocs, |
1038 | const SDLoc &DL, SelectionDAG &DAG, |
1039 | SmallVectorImpl<SDValue> &InVals, bool isThisReturn, |
1040 | SDValue ThisVal, bool RequiresSMChange) const; |
1041 | |
1042 | SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; |
1043 | SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; |
1044 | SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const; |
1045 | SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; |
1046 | |
1047 | SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const; |
1048 | SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const; |
1049 | |
1050 | SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const; |
1051 | |
1052 | SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; |
1053 | SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; |
1054 | SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; |
1055 | |
1056 | bool |
1057 | isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const; |
1058 | |
1059 | /// Finds the incoming stack arguments which overlap the given fixed stack |
1060 | /// object and incorporates their load into the current chain. This prevents |
1061 | /// an upcoming store from clobbering the stack argument before it's used. |
1062 | SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, |
1063 | MachineFrameInfo &MFI, int ClobberedFI) const; |
1064 | |
1065 | bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const; |
1066 | |
1067 | void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL, |
1068 | SDValue &Chain) const; |
1069 | |
1070 | bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, |
1071 | bool isVarArg, |
1072 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
1073 | LLVMContext &Context) const override; |
1074 | |
1075 | SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
1076 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
1077 | const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, |
1078 | SelectionDAG &DAG) const override; |
1079 | |
1080 | SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, |
1081 | unsigned Flag) const; |
1082 | SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, |
1083 | unsigned Flag) const; |
1084 | SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, |
1085 | unsigned Flag) const; |
1086 | SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, |
1087 | unsigned Flag) const; |
1088 | SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG, |
1089 | unsigned Flag) const; |
1090 | template <class NodeTy> |
1091 | SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; |
1092 | template <class NodeTy> |
1093 | SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; |
1094 | template <class NodeTy> |
1095 | SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; |
1096 | template <class NodeTy> |
1097 | SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; |
1098 | SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; |
1099 | SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; |
1100 | SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; |
1101 | SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; |
1102 | SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; |
1103 | SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase, |
1104 | const SDLoc &DL, SelectionDAG &DAG) const; |
1105 | SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL, |
1106 | SelectionDAG &DAG) const; |
1107 | SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; |
1108 | SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; |
1109 | SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; |
1110 | SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; |
1111 | SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; |
1112 | SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; |
1113 | SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS, |
1114 | SDValue TVal, SDValue FVal, const SDLoc &dl, |
1115 | SelectionDAG &DAG) const; |
1116 | SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; |
1117 | SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; |
1118 | SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; |
1119 | SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; |
1120 | SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const; |
1121 | SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const; |
1122 | SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const; |
1123 | SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; |
1124 | SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; |
1125 | SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; |
1126 | SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; |
1127 | SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const; |
1128 | SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; |
1129 | SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; |
1130 | SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; |
1131 | SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; |
1132 | SDValue (SDValue Op, SelectionDAG &DAG) const; |
1133 | SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
1134 | SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const; |
1135 | SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; |
1136 | SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
1137 | SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const; |
1138 | SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, |
1139 | unsigned NewOp) const; |
1140 | SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const; |
1141 | SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const; |
1142 | SDValue (SDValue Op, SelectionDAG &DAG) const; |
1143 | SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; |
1144 | SDValue LowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const; |
1145 | SDValue LowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const; |
1146 | SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const; |
1147 | SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; |
1148 | SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; |
1149 | SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const; |
1150 | SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; |
1151 | SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const; |
1152 | SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; |
1153 | SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const; |
1154 | SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const; |
1155 | SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; |
1156 | SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; |
1157 | SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; |
1158 | SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; |
1159 | SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; |
1160 | SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; |
1161 | SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; |
1162 | SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; |
1163 | SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; |
1164 | SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const; |
1165 | SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const; |
1166 | SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; |
1167 | SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; |
1168 | SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; |
1169 | SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const; |
1170 | SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; |
1171 | SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; |
1172 | SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const; |
1173 | SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; |
1174 | SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; |
1175 | SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; |
1176 | |
1177 | SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const; |
1178 | |
1179 | SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op, |
1180 | SelectionDAG &DAG) const; |
1181 | SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op, |
1182 | SelectionDAG &DAG) const; |
1183 | SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const; |
1184 | SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const; |
1185 | SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const; |
1186 | SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const; |
1187 | SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp, |
1188 | SelectionDAG &DAG) const; |
1189 | SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const; |
1190 | SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const; |
1191 | SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const; |
1192 | SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op, |
1193 | SelectionDAG &DAG) const; |
1194 | SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op, |
1195 | SelectionDAG &DAG) const; |
1196 | SDValue (SDValue Op, SelectionDAG &DAG) const; |
1197 | SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const; |
1198 | SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const; |
1199 | SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op, |
1200 | SelectionDAG &DAG) const; |
1201 | SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const; |
1202 | SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const; |
1203 | SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const; |
1204 | SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const; |
1205 | SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op, |
1206 | SelectionDAG &DAG) const; |
1207 | |
1208 | SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, |
1209 | SmallVectorImpl<SDNode *> &Created) const override; |
1210 | SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, |
1211 | SmallVectorImpl<SDNode *> &Created) const override; |
1212 | SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, |
1213 | int &, bool &UseOneConst, |
1214 | bool Reciprocal) const override; |
1215 | SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, |
1216 | int &) const override; |
1217 | SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, |
1218 | const DenormalMode &Mode) const override; |
1219 | SDValue getSqrtResultForDenormInput(SDValue Operand, |
1220 | SelectionDAG &DAG) const override; |
1221 | unsigned combineRepeatedFPDivisors() const override; |
1222 | |
1223 | ConstraintType getConstraintType(StringRef Constraint) const override; |
1224 | Register getRegisterByName(const char* RegName, LLT VT, |
1225 | const MachineFunction &MF) const override; |
1226 | |
1227 | /// Examine constraint string and operand type and determine a weight value. |
1228 | /// The operand object must already have been set up with the operand type. |
1229 | ConstraintWeight |
1230 | getSingleConstraintMatchWeight(AsmOperandInfo &info, |
1231 | const char *constraint) const override; |
1232 | |
1233 | std::pair<unsigned, const TargetRegisterClass *> |
1234 | getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
1235 | StringRef Constraint, MVT VT) const override; |
1236 | |
1237 | const char *LowerXConstraint(EVT ConstraintVT) const override; |
1238 | |
1239 | void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, |
1240 | std::vector<SDValue> &Ops, |
1241 | SelectionDAG &DAG) const override; |
1242 | |
1243 | InlineAsm::ConstraintCode |
1244 | getInlineAsmMemConstraint(StringRef ConstraintCode) const override { |
1245 | if (ConstraintCode == "Q" ) |
1246 | return InlineAsm::ConstraintCode::Q; |
1247 | // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are |
1248 | // followed by llvm_unreachable so we'll leave them unimplemented in |
1249 | // the backend for now. |
1250 | return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); |
1251 | } |
1252 | |
1253 | /// Handle Lowering flag assembly outputs. |
1254 | SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, |
1255 | const SDLoc &DL, |
1256 | const AsmOperandInfo &Constraint, |
1257 | SelectionDAG &DAG) const override; |
1258 | |
1259 | bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override; |
1260 | bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override; |
1261 | bool isVectorLoadExtDesirable(SDValue ExtVal) const override; |
1262 | bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; |
1263 | bool mayBeEmittedAsTailCall(const CallInst *CI) const override; |
1264 | bool getIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, |
1265 | SDValue &Offset, SelectionDAG &DAG) const; |
1266 | bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, |
1267 | ISD::MemIndexedMode &AM, |
1268 | SelectionDAG &DAG) const override; |
1269 | bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, |
1270 | SDValue &Offset, ISD::MemIndexedMode &AM, |
1271 | SelectionDAG &DAG) const override; |
1272 | bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset, |
1273 | bool IsPre, MachineRegisterInfo &MRI) const override; |
1274 | |
1275 | void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, |
1276 | SelectionDAG &DAG) const override; |
1277 | void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results, |
1278 | SelectionDAG &DAG) const; |
1279 | void (SDNode *N, |
1280 | SmallVectorImpl<SDValue> &Results, |
1281 | SelectionDAG &DAG) const; |
1282 | |
1283 | bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override; |
1284 | |
1285 | void finalizeLowering(MachineFunction &MF) const override; |
1286 | |
1287 | bool shouldLocalize(const MachineInstr &MI, |
1288 | const TargetTransformInfo *TTI) const override; |
1289 | |
1290 | bool SimplifyDemandedBitsForTargetNode(SDValue Op, |
1291 | const APInt &OriginalDemandedBits, |
1292 | const APInt &OriginalDemandedElts, |
1293 | KnownBits &Known, |
1294 | TargetLoweringOpt &TLO, |
1295 | unsigned Depth) const override; |
1296 | |
1297 | bool isTargetCanonicalConstantNode(SDValue Op) const override; |
1298 | |
1299 | // With the exception of data-predicate transitions, no instructions are |
1300 | // required to cast between legal scalable vector types. However: |
1301 | // 1. Packed and unpacked types have different bit lengths, meaning BITCAST |
1302 | // is not universally useable. |
1303 | // 2. Most unpacked integer types are not legal and thus integer extends |
1304 | // cannot be used to convert between unpacked and packed types. |
1305 | // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used |
1306 | // to transition between unpacked and packed types of the same element type, |
1307 | // with BITCAST used otherwise. |
1308 | // This function does not handle predicate bitcasts. |
1309 | SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const; |
1310 | |
1311 | // Returns the runtime value for PSTATE.SM by generating a call to |
1312 | // __arm_sme_state. |
1313 | SDValue getRuntimePStateSM(SelectionDAG &DAG, SDValue Chain, SDLoc DL, |
1314 | EVT VT) const; |
1315 | |
1316 | bool preferScalarizeSplat(SDNode *N) const override; |
1317 | |
1318 | unsigned getMinimumJumpTableEntries() const override; |
1319 | |
1320 | bool softPromoteHalfType() const override { return true; } |
1321 | }; |
1322 | |
1323 | namespace AArch64 { |
1324 | FastISel *createFastISel(FunctionLoweringInfo &funcInfo, |
1325 | const TargetLibraryInfo *libInfo); |
1326 | } // end namespace AArch64 |
1327 | |
1328 | } // end namespace llvm |
1329 | |
1330 | #endif |
1331 | |