1//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that AArch64 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15#define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16
17#include "AArch64.h"
18#include "Utils/AArch64SMEAttributes.h"
19#include "llvm/CodeGen/CallingConvLower.h"
20#include "llvm/CodeGen/MachineFunction.h"
21#include "llvm/CodeGen/SelectionDAG.h"
22#include "llvm/CodeGen/TargetLowering.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Instruction.h"
25
26namespace llvm {
27
28namespace AArch64ISD {
29
30// For predicated nodes where the result is a vector, the operation is
31// controlled by a governing predicate and the inactive lanes are explicitly
32// defined with a value, please stick the following naming convention:
33//
34// _MERGE_OP<n> The result value is a vector with inactive lanes equal
35// to source operand OP<n>.
36//
37// _MERGE_ZERO The result value is a vector with inactive lanes
38// actively zeroed.
39//
40// _MERGE_PASSTHRU The result value is a vector with inactive lanes equal
41// to the last source operand which only purpose is being
42// a passthru value.
43//
44// For other cases where no explicit action is needed to set the inactive lanes,
45// or when the result is not a vector and it is needed or helpful to
46// distinguish a node from similar unpredicated nodes, use:
47//
48// _PRED
49//
50enum NodeType : unsigned {
51 FIRST_NUMBER = ISD::BUILTIN_OP_END,
52 WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
53 CALL, // Function call.
54
55 // Pseudo for a OBJC call that gets emitted together with a special `mov
56 // x29, x29` marker instruction.
57 CALL_RVMARKER,
58
59 CALL_BTI, // Function call followed by a BTI instruction.
60
61 COALESCER_BARRIER,
62
63 SMSTART,
64 SMSTOP,
65 RESTORE_ZA,
66 RESTORE_ZT,
67 SAVE_ZT,
68
69 // A call with the callee in x16, i.e. "blr x16".
70 CALL_ARM64EC_TO_X64,
71
72 // Produces the full sequence of instructions for getting the thread pointer
73 // offset of a variable into X0, using the TLSDesc model.
74 TLSDESC_CALLSEQ,
75 ADRP, // Page address of a TargetGlobalAddress operand.
76 ADR, // ADR
77 ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
78 LOADgot, // Load from automatically generated descriptor (e.g. Global
79 // Offset Table, TLS record).
80 RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand.
81 BRCOND, // Conditional branch instruction; "b.cond".
82 CSEL,
83 CSINV, // Conditional select invert.
84 CSNEG, // Conditional select negate.
85 CSINC, // Conditional select increment.
86
87 // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
88 // ELF.
89 THREAD_POINTER,
90 ADC,
91 SBC, // adc, sbc instructions
92
93 // To avoid stack clash, allocation is performed by block and each block is
94 // probed.
95 PROBED_ALLOCA,
96
97 // Predicated instructions where inactive lanes produce undefined results.
98 ABDS_PRED,
99 ABDU_PRED,
100 FADD_PRED,
101 FDIV_PRED,
102 FMA_PRED,
103 FMAX_PRED,
104 FMAXNM_PRED,
105 FMIN_PRED,
106 FMINNM_PRED,
107 FMUL_PRED,
108 FSUB_PRED,
109 HADDS_PRED,
110 HADDU_PRED,
111 MUL_PRED,
112 MULHS_PRED,
113 MULHU_PRED,
114 RHADDS_PRED,
115 RHADDU_PRED,
116 SDIV_PRED,
117 SHL_PRED,
118 SMAX_PRED,
119 SMIN_PRED,
120 SRA_PRED,
121 SRL_PRED,
122 UDIV_PRED,
123 UMAX_PRED,
124 UMIN_PRED,
125
126 // Unpredicated vector instructions
127 BIC,
128
129 SRAD_MERGE_OP1,
130
131 // Predicated instructions with the result of inactive lanes provided by the
132 // last operand.
133 FABS_MERGE_PASSTHRU,
134 FCEIL_MERGE_PASSTHRU,
135 FFLOOR_MERGE_PASSTHRU,
136 FNEARBYINT_MERGE_PASSTHRU,
137 FNEG_MERGE_PASSTHRU,
138 FRECPX_MERGE_PASSTHRU,
139 FRINT_MERGE_PASSTHRU,
140 FROUND_MERGE_PASSTHRU,
141 FROUNDEVEN_MERGE_PASSTHRU,
142 FSQRT_MERGE_PASSTHRU,
143 FTRUNC_MERGE_PASSTHRU,
144 FP_ROUND_MERGE_PASSTHRU,
145 FP_EXTEND_MERGE_PASSTHRU,
146 UINT_TO_FP_MERGE_PASSTHRU,
147 SINT_TO_FP_MERGE_PASSTHRU,
148 FCVTZU_MERGE_PASSTHRU,
149 FCVTZS_MERGE_PASSTHRU,
150 SIGN_EXTEND_INREG_MERGE_PASSTHRU,
151 ZERO_EXTEND_INREG_MERGE_PASSTHRU,
152 ABS_MERGE_PASSTHRU,
153 NEG_MERGE_PASSTHRU,
154
155 SETCC_MERGE_ZERO,
156
157 // Arithmetic instructions which write flags.
158 ADDS,
159 SUBS,
160 ADCS,
161 SBCS,
162 ANDS,
163
164 // Conditional compares. Operands: left,right,falsecc,cc,flags
165 CCMP,
166 CCMN,
167 FCCMP,
168
169 // Floating point comparison
170 FCMP,
171
172 // Scalar-to-vector duplication
173 DUP,
174 DUPLANE8,
175 DUPLANE16,
176 DUPLANE32,
177 DUPLANE64,
178 DUPLANE128,
179
180 // Vector immedate moves
181 MOVI,
182 MOVIshift,
183 MOVIedit,
184 MOVImsl,
185 FMOV,
186 MVNIshift,
187 MVNImsl,
188
189 // Vector immediate ops
190 BICi,
191 ORRi,
192
193 // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
194 // element must be identical.
195 BSP,
196
197 // Vector shuffles
198 ZIP1,
199 ZIP2,
200 UZP1,
201 UZP2,
202 TRN1,
203 TRN2,
204 REV16,
205 REV32,
206 REV64,
207 EXT,
208 SPLICE,
209
210 // Vector shift by scalar
211 VSHL,
212 VLSHR,
213 VASHR,
214
215 // Vector shift by scalar (again)
216 SQSHL_I,
217 UQSHL_I,
218 SQSHLU_I,
219 SRSHR_I,
220 URSHR_I,
221 URSHR_I_PRED,
222
223 // Vector narrowing shift by immediate (bottom)
224 RSHRNB_I,
225
226 // Vector shift by constant and insert
227 VSLI,
228 VSRI,
229
230 // Vector comparisons
231 CMEQ,
232 CMGE,
233 CMGT,
234 CMHI,
235 CMHS,
236 FCMEQ,
237 FCMGE,
238 FCMGT,
239
240 // Vector zero comparisons
241 CMEQz,
242 CMGEz,
243 CMGTz,
244 CMLEz,
245 CMLTz,
246 FCMEQz,
247 FCMGEz,
248 FCMGTz,
249 FCMLEz,
250 FCMLTz,
251
252 // Round wide FP to narrow FP with inexact results to odd.
253 FCVTXN,
254
255 // Vector across-lanes addition
256 // Only the lower result lane is defined.
257 SADDV,
258 UADDV,
259
260 // Unsigned sum Long across Vector
261 UADDLV,
262 SADDLV,
263
264 // Add Pairwise of two vectors
265 ADDP,
266 // Add Long Pairwise
267 SADDLP,
268 UADDLP,
269
270 // udot/sdot instructions
271 UDOT,
272 SDOT,
273
274 // Vector across-lanes min/max
275 // Only the lower result lane is defined.
276 SMINV,
277 UMINV,
278 SMAXV,
279 UMAXV,
280
281 SADDV_PRED,
282 UADDV_PRED,
283 SMAXV_PRED,
284 UMAXV_PRED,
285 SMINV_PRED,
286 UMINV_PRED,
287 ORV_PRED,
288 EORV_PRED,
289 ANDV_PRED,
290
291 // Compare-and-branch
292 CBZ,
293 CBNZ,
294 TBZ,
295 TBNZ,
296
297 // Tail calls
298 TC_RETURN,
299
300 // Custom prefetch handling
301 PREFETCH,
302
303 // {s|u}int to FP within a FP register.
304 SITOF,
305 UITOF,
306
307 /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
308 /// world w.r.t vectors; which causes additional REV instructions to be
309 /// generated to compensate for the byte-swapping. But sometimes we do
310 /// need to re-interpret the data in SIMD vector registers in big-endian
311 /// mode without emitting such REV instructions.
312 NVCAST,
313
314 MRS, // MRS, also sets the flags via a glue.
315
316 SMULL,
317 UMULL,
318
319 PMULL,
320
321 // Reciprocal estimates and steps.
322 FRECPE,
323 FRECPS,
324 FRSQRTE,
325 FRSQRTS,
326
327 SUNPKHI,
328 SUNPKLO,
329 UUNPKHI,
330 UUNPKLO,
331
332 CLASTA_N,
333 CLASTB_N,
334 LASTA,
335 LASTB,
336 TBL,
337
338 // Floating-point reductions.
339 FADDA_PRED,
340 FADDV_PRED,
341 FMAXV_PRED,
342 FMAXNMV_PRED,
343 FMINV_PRED,
344 FMINNMV_PRED,
345
346 INSR,
347 PTEST,
348 PTEST_ANY,
349 PTRUE,
350
351 CTTZ_ELTS,
352
353 BITREVERSE_MERGE_PASSTHRU,
354 BSWAP_MERGE_PASSTHRU,
355 REVH_MERGE_PASSTHRU,
356 REVW_MERGE_PASSTHRU,
357 CTLZ_MERGE_PASSTHRU,
358 CTPOP_MERGE_PASSTHRU,
359 DUP_MERGE_PASSTHRU,
360 INDEX_VECTOR,
361
362 // Cast between vectors of the same element type but differ in length.
363 REINTERPRET_CAST,
364
365 // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
366 LS64_BUILD,
367 LS64_EXTRACT,
368
369 LD1_MERGE_ZERO,
370 LD1S_MERGE_ZERO,
371 LDNF1_MERGE_ZERO,
372 LDNF1S_MERGE_ZERO,
373 LDFF1_MERGE_ZERO,
374 LDFF1S_MERGE_ZERO,
375 LD1RQ_MERGE_ZERO,
376 LD1RO_MERGE_ZERO,
377
378 // Structured loads.
379 SVE_LD2_MERGE_ZERO,
380 SVE_LD3_MERGE_ZERO,
381 SVE_LD4_MERGE_ZERO,
382
383 // Unsigned gather loads.
384 GLD1_MERGE_ZERO,
385 GLD1_SCALED_MERGE_ZERO,
386 GLD1_UXTW_MERGE_ZERO,
387 GLD1_SXTW_MERGE_ZERO,
388 GLD1_UXTW_SCALED_MERGE_ZERO,
389 GLD1_SXTW_SCALED_MERGE_ZERO,
390 GLD1_IMM_MERGE_ZERO,
391 GLD1Q_MERGE_ZERO,
392 GLD1Q_INDEX_MERGE_ZERO,
393
394 // Signed gather loads
395 GLD1S_MERGE_ZERO,
396 GLD1S_SCALED_MERGE_ZERO,
397 GLD1S_UXTW_MERGE_ZERO,
398 GLD1S_SXTW_MERGE_ZERO,
399 GLD1S_UXTW_SCALED_MERGE_ZERO,
400 GLD1S_SXTW_SCALED_MERGE_ZERO,
401 GLD1S_IMM_MERGE_ZERO,
402
403 // Unsigned gather loads.
404 GLDFF1_MERGE_ZERO,
405 GLDFF1_SCALED_MERGE_ZERO,
406 GLDFF1_UXTW_MERGE_ZERO,
407 GLDFF1_SXTW_MERGE_ZERO,
408 GLDFF1_UXTW_SCALED_MERGE_ZERO,
409 GLDFF1_SXTW_SCALED_MERGE_ZERO,
410 GLDFF1_IMM_MERGE_ZERO,
411
412 // Signed gather loads.
413 GLDFF1S_MERGE_ZERO,
414 GLDFF1S_SCALED_MERGE_ZERO,
415 GLDFF1S_UXTW_MERGE_ZERO,
416 GLDFF1S_SXTW_MERGE_ZERO,
417 GLDFF1S_UXTW_SCALED_MERGE_ZERO,
418 GLDFF1S_SXTW_SCALED_MERGE_ZERO,
419 GLDFF1S_IMM_MERGE_ZERO,
420
421 // Non-temporal gather loads
422 GLDNT1_MERGE_ZERO,
423 GLDNT1_INDEX_MERGE_ZERO,
424 GLDNT1S_MERGE_ZERO,
425
426 // Contiguous masked store.
427 ST1_PRED,
428
429 // Scatter store
430 SST1_PRED,
431 SST1_SCALED_PRED,
432 SST1_UXTW_PRED,
433 SST1_SXTW_PRED,
434 SST1_UXTW_SCALED_PRED,
435 SST1_SXTW_SCALED_PRED,
436 SST1_IMM_PRED,
437 SST1Q_PRED,
438 SST1Q_INDEX_PRED,
439
440 // Non-temporal scatter store
441 SSTNT1_PRED,
442 SSTNT1_INDEX_PRED,
443
444 // SME
445 RDSVL,
446 REVD_MERGE_PASSTHRU,
447
448 // Asserts that a function argument (i32) is zero-extended to i8 by
449 // the caller
450 ASSERT_ZEXT_BOOL,
451
452 // 128-bit system register accesses
453 // lo64, hi64, chain = MRRS(chain, sysregname)
454 MRRS,
455 // chain = MSRR(chain, sysregname, lo64, hi64)
456 MSRR,
457
458 // Strict (exception-raising) floating point comparison
459 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
460 STRICT_FCMPE,
461
462 // SME ZA loads and stores
463 SME_ZA_LDR,
464 SME_ZA_STR,
465
466 // NEON Load/Store with post-increment base updates
467 LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
468 LD3post,
469 LD4post,
470 ST2post,
471 ST3post,
472 ST4post,
473 LD1x2post,
474 LD1x3post,
475 LD1x4post,
476 ST1x2post,
477 ST1x3post,
478 ST1x4post,
479 LD1DUPpost,
480 LD2DUPpost,
481 LD3DUPpost,
482 LD4DUPpost,
483 LD1LANEpost,
484 LD2LANEpost,
485 LD3LANEpost,
486 LD4LANEpost,
487 ST2LANEpost,
488 ST3LANEpost,
489 ST4LANEpost,
490
491 STG,
492 STZG,
493 ST2G,
494 STZ2G,
495
496 LDP,
497 LDIAPP,
498 LDNP,
499 STP,
500 STILP,
501 STNP,
502
503 // Memory Operations
504 MOPS_MEMSET,
505 MOPS_MEMSET_TAGGING,
506 MOPS_MEMCOPY,
507 MOPS_MEMMOVE,
508};
509
510} // end namespace AArch64ISD
511
512namespace AArch64 {
513/// Possible values of current rounding mode, which is specified in bits
514/// 23:22 of FPCR.
515enum Rounding {
516 RN = 0, // Round to Nearest
517 RP = 1, // Round towards Plus infinity
518 RM = 2, // Round towards Minus infinity
519 RZ = 3, // Round towards Zero
520 rmMask = 3 // Bit mask selecting rounding mode
521};
522
523// Bit position of rounding mode bits in FPCR.
524const unsigned RoundingBitsPos = 22;
525
526// Registers used to pass function arguments.
527ArrayRef<MCPhysReg> getGPRArgRegs();
528ArrayRef<MCPhysReg> getFPRArgRegs();
529
530/// Maximum allowed number of unprobed bytes above SP at an ABI
531/// boundary.
532const unsigned StackProbeMaxUnprobedStack = 1024;
533
534/// Maximum number of iterations to unroll for a constant size probing loop.
535const unsigned StackProbeMaxLoopUnroll = 4;
536
537} // namespace AArch64
538
539class AArch64Subtarget;
540
541class AArch64TargetLowering : public TargetLowering {
542public:
543 explicit AArch64TargetLowering(const TargetMachine &TM,
544 const AArch64Subtarget &STI);
545
546 /// Control the following reassociation of operands: (op (op x, c1), y) -> (op
547 /// (op x, y), c1) where N0 is (op x, c1) and N1 is y.
548 bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
549 SDValue N1) const override;
550
551 /// Selects the correct CCAssignFn for a given CallingConvention value.
552 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
553
554 /// Selects the correct CCAssignFn for a given CallingConvention value.
555 CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
556
557 /// Determine which of the bits specified in Mask are known to be either zero
558 /// or one and return them in the KnownZero/KnownOne bitsets.
559 void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
560 const APInt &DemandedElts,
561 const SelectionDAG &DAG,
562 unsigned Depth = 0) const override;
563
564 unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
565 const APInt &DemandedElts,
566 const SelectionDAG &DAG,
567 unsigned Depth) const override;
568
569 MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
570 // Returning i64 unconditionally here (i.e. even for ILP32) means that the
571 // *DAG* representation of pointers will always be 64-bits. They will be
572 // truncated and extended when transferred to memory, but the 64-bit DAG
573 // allows us to use AArch64's addressing modes much more easily.
574 return MVT::getIntegerVT(BitWidth: 64);
575 }
576
577 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
578 const APInt &DemandedElts,
579 TargetLoweringOpt &TLO) const override;
580
581 MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
582
583 /// Returns true if the target allows unaligned memory accesses of the
584 /// specified type.
585 bool allowsMisalignedMemoryAccesses(
586 EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
587 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
588 unsigned *Fast = nullptr) const override;
589 /// LLT variant.
590 bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
591 Align Alignment,
592 MachineMemOperand::Flags Flags,
593 unsigned *Fast = nullptr) const override;
594
595 /// Provide custom lowering hooks for some operations.
596 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
597
598 const char *getTargetNodeName(unsigned Opcode) const override;
599
600 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
601
602 /// This method returns a target specific FastISel object, or null if the
603 /// target does not support "fast" ISel.
604 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
605 const TargetLibraryInfo *libInfo) const override;
606
607 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
608
609 bool isFPImmLegal(const APFloat &Imm, EVT VT,
610 bool ForCodeSize) const override;
611
612 /// Return true if the given shuffle mask can be codegen'd directly, or if it
613 /// should be stack expanded.
614 bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
615
616 /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero'
617 /// shuffle mask can be codegen'd directly.
618 bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override;
619
620 /// Return the ISD::SETCC ValueType.
621 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
622 EVT VT) const override;
623
624 SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
625
626 MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
627 MachineBasicBlock *BB) const;
628
629 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
630 MachineBasicBlock *BB) const;
631
632 MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI,
633 MachineBasicBlock *MBB) const;
634
635 MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg,
636 MachineInstr &MI,
637 MachineBasicBlock *BB) const;
638 MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const;
639 MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg,
640 MachineInstr &MI, MachineBasicBlock *BB,
641 bool HasTile) const;
642 MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB,
643 unsigned Opcode, bool Op0IsDef) const;
644 MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
645
646 MachineBasicBlock *
647 EmitInstrWithCustomInserter(MachineInstr &MI,
648 MachineBasicBlock *MBB) const override;
649
650 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
651 MachineFunction &MF,
652 unsigned Intrinsic) const override;
653
654 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
655 EVT NewVT) const override;
656
657 bool shouldRemoveRedundantExtend(SDValue Op) const override;
658
659 bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
660 bool isTruncateFree(EVT VT1, EVT VT2) const override;
661
662 bool isProfitableToHoist(Instruction *I) const override;
663
664 bool isZExtFree(Type *Ty1, Type *Ty2) const override;
665 bool isZExtFree(EVT VT1, EVT VT2) const override;
666 bool isZExtFree(SDValue Val, EVT VT2) const override;
667
668 bool shouldSinkOperands(Instruction *I,
669 SmallVectorImpl<Use *> &Ops) const override;
670
671 bool optimizeExtendOrTruncateConversion(
672 Instruction *I, Loop *L, const TargetTransformInfo &TTI) const override;
673
674 bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
675
676 unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
677
678 bool lowerInterleavedLoad(LoadInst *LI,
679 ArrayRef<ShuffleVectorInst *> Shuffles,
680 ArrayRef<unsigned> Indices,
681 unsigned Factor) const override;
682 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
683 unsigned Factor) const override;
684
685 bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
686 LoadInst *LI) const override;
687
688 bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
689 StoreInst *SI) const override;
690
691 bool isLegalAddImmediate(int64_t) const override;
692 bool isLegalAddScalableImmediate(int64_t) const override;
693 bool isLegalICmpImmediate(int64_t) const override;
694
695 bool isMulAddWithConstProfitable(SDValue AddNode,
696 SDValue ConstNode) const override;
697
698 bool shouldConsiderGEPOffsetSplit() const override;
699
700 EVT getOptimalMemOpType(const MemOp &Op,
701 const AttributeList &FuncAttributes) const override;
702
703 LLT getOptimalMemOpLLT(const MemOp &Op,
704 const AttributeList &FuncAttributes) const override;
705
706 /// Return true if the addressing mode represented by AM is legal for this
707 /// target, for a load/store of the specified type.
708 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
709 unsigned AS,
710 Instruction *I = nullptr) const override;
711
712 int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset,
713 int64_t MaxOffset) const override;
714
715 /// Return true if an FMA operation is faster than a pair of fmul and fadd
716 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
717 /// returns true, otherwise fmuladd is expanded to fmul + fadd.
718 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
719 EVT VT) const override;
720 bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
721
722 bool generateFMAsInMachineCombiner(EVT VT,
723 CodeGenOptLevel OptLevel) const override;
724
725 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
726 ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
727
728 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
729 bool isDesirableToCommuteWithShift(const SDNode *N,
730 CombineLevel Level) const override;
731
732 bool isDesirableToPullExtFromShl(const MachineInstr &MI) const override {
733 return false;
734 }
735
736 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
737 bool isDesirableToCommuteXorWithShift(const SDNode *N) const override;
738
739 /// Return true if it is profitable to fold a pair of shifts into a mask.
740 bool shouldFoldConstantShiftPairToMask(const SDNode *N,
741 CombineLevel Level) const override;
742
743 bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
744 EVT VT) const override;
745
746 /// Returns true if it is beneficial to convert a load of a constant
747 /// to just the constant itself.
748 bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
749 Type *Ty) const override;
750
751 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
752 /// with this index.
753 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
754 unsigned Index) const override;
755
756 bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
757 bool MathUsed) const override {
758 // Using overflow ops for overflow checks only should beneficial on
759 // AArch64.
760 return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed: true);
761 }
762
763 Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
764 AtomicOrdering Ord) const override;
765 Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
766 AtomicOrdering Ord) const override;
767
768 void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
769
770 bool isOpSuitableForLDPSTP(const Instruction *I) const;
771 bool isOpSuitableForLSE128(const Instruction *I) const;
772 bool isOpSuitableForRCPC3(const Instruction *I) const;
773 bool shouldInsertFencesForAtomic(const Instruction *I) const override;
774 bool
775 shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override;
776
777 TargetLoweringBase::AtomicExpansionKind
778 shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
779 TargetLoweringBase::AtomicExpansionKind
780 shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
781 TargetLoweringBase::AtomicExpansionKind
782 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
783
784 TargetLoweringBase::AtomicExpansionKind
785 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
786
787 bool useLoadStackGuardNode() const override;
788 TargetLoweringBase::LegalizeTypeAction
789 getPreferredVectorAction(MVT VT) const override;
790
791 /// If the target has a standard location for the stack protector cookie,
792 /// returns the address of that location. Otherwise, returns nullptr.
793 Value *getIRStackGuard(IRBuilderBase &IRB) const override;
794
795 void insertSSPDeclarations(Module &M) const override;
796 Value *getSDagStackGuard(const Module &M) const override;
797 Function *getSSPStackGuardCheck(const Module &M) const override;
798
799 /// If the target has a standard location for the unsafe stack pointer,
800 /// returns the address of that location. Otherwise, returns nullptr.
801 Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
802
803 /// If a physical register, this returns the register that receives the
804 /// exception address on entry to an EH pad.
805 Register
806 getExceptionPointerRegister(const Constant *PersonalityFn) const override {
807 // FIXME: This is a guess. Has this been defined yet?
808 return AArch64::X0;
809 }
810
811 /// If a physical register, this returns the register that receives the
812 /// exception typeid on entry to a landing pad.
813 Register
814 getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
815 // FIXME: This is a guess. Has this been defined yet?
816 return AArch64::X1;
817 }
818
819 bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
820
821 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
822 const MachineFunction &MF) const override {
823 // Do not merge to float value size (128 bytes) if no implicit
824 // float attribute is set.
825
826 bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
827
828 if (NoFloat)
829 return (MemVT.getSizeInBits() <= 64);
830 return true;
831 }
832
833 bool isCheapToSpeculateCttz(Type *) const override {
834 return true;
835 }
836
837 bool isCheapToSpeculateCtlz(Type *) const override {
838 return true;
839 }
840
841 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
842
843 bool hasAndNotCompare(SDValue V) const override {
844 // We can use bics for any scalar.
845 return V.getValueType().isScalarInteger();
846 }
847
848 bool hasAndNot(SDValue Y) const override {
849 EVT VT = Y.getValueType();
850
851 if (!VT.isVector())
852 return hasAndNotCompare(V: Y);
853
854 TypeSize TS = VT.getSizeInBits();
855 // TODO: We should be able to use bic/bif too for SVE.
856 return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic'
857 }
858
859 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
860 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
861 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
862 SelectionDAG &DAG) const override;
863
864 ShiftLegalizationStrategy
865 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
866 unsigned ExpansionFactor) const override;
867
868 bool shouldTransformSignedTruncationCheck(EVT XVT,
869 unsigned KeptBits) const override {
870 // For vectors, we don't have a preference..
871 if (XVT.isVector())
872 return false;
873
874 auto VTIsOk = [](EVT VT) -> bool {
875 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
876 VT == MVT::i64;
877 };
878
879 // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
880 // XVT will be larger than KeptBitsVT.
881 MVT KeptBitsVT = MVT::getIntegerVT(BitWidth: KeptBits);
882 return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
883 }
884
885 bool preferIncOfAddToSubOfNot(EVT VT) const override;
886
887 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
888
889 bool isComplexDeinterleavingSupported() const override;
890 bool isComplexDeinterleavingOperationSupported(
891 ComplexDeinterleavingOperation Operation, Type *Ty) const override;
892
893 Value *createComplexDeinterleavingIR(
894 IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
895 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
896 Value *Accumulator = nullptr) const override;
897
898 bool supportSplitCSR(MachineFunction *MF) const override {
899 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
900 MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
901 }
902 void initializeSplitCSR(MachineBasicBlock *Entry) const override;
903 void insertCopiesSplitCSR(
904 MachineBasicBlock *Entry,
905 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
906
907 bool supportSwiftError() const override {
908 return true;
909 }
910
911 bool supportKCFIBundles() const override { return true; }
912
913 MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
914 MachineBasicBlock::instr_iterator &MBBI,
915 const TargetInstrInfo *TII) const override;
916
917 /// Enable aggressive FMA fusion on targets that want it.
918 bool enableAggressiveFMAFusion(EVT VT) const override;
919
920 /// Returns the size of the platform's va_list object.
921 unsigned getVaListSizeInBits(const DataLayout &DL) const override;
922
923 /// Returns true if \p VecTy is a legal interleaved access type. This
924 /// function checks the vector element type and the overall width of the
925 /// vector.
926 bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL,
927 bool &UseScalable) const;
928
929 /// Returns the number of interleaved accesses that will be generated when
930 /// lowering accesses of the given type.
931 unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL,
932 bool UseScalable) const;
933
934 MachineMemOperand::Flags getTargetMMOFlags(
935 const Instruction &I) const override;
936
937 bool functionArgumentNeedsConsecutiveRegisters(
938 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
939 const DataLayout &DL) const override;
940
941 /// Used for exception handling on Win64.
942 bool needsFixedCatchObjects() const override;
943
944 bool fallBackToDAGISel(const Instruction &Inst) const override;
945
946 /// SVE code generation for fixed length vectors does not custom lower
947 /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
948 /// merge. However, merging them creates a BUILD_VECTOR that is just as
949 /// illegal as the original, thus leading to an infinite legalisation loop.
950 /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
951 /// vector types this override can be removed.
952 bool mergeStoresAfterLegalization(EVT VT) const override;
953
954 // If the platform/function should have a redzone, return the size in bytes.
955 unsigned getRedZoneSize(const Function &F) const {
956 if (F.hasFnAttribute(Attribute::NoRedZone))
957 return 0;
958 return 128;
959 }
960
961 bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const;
962 EVT getPromotedVTForPredicate(EVT VT) const;
963
964 EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
965 bool AllowUnknown = false) const override;
966
967 bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override;
968
969 bool shouldExpandCttzElements(EVT VT) const override;
970
971 /// If a change in streaming mode is required on entry to/return from a
972 /// function call it emits and returns the corresponding SMSTART or SMSTOP
973 /// node. \p Condition should be one of the enum values from
974 /// AArch64SME::ToggleCondition.
975 SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable,
976 SDValue Chain, SDValue InGlue, unsigned Condition,
977 SDValue PStateSM = SDValue()) const;
978
979 bool isVScaleKnownToBeAPowerOfTwo() const override { return true; }
980
981 // Normally SVE is only used for byte size vectors that do not fit within a
982 // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
983 // used for 64bit and 128bit vectors as well.
984 bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
985
986 // Follow NEON ABI rules even when using SVE for fixed length vectors.
987 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
988 EVT VT) const override;
989 unsigned getNumRegistersForCallingConv(LLVMContext &Context,
990 CallingConv::ID CC,
991 EVT VT) const override;
992 unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context,
993 CallingConv::ID CC, EVT VT,
994 EVT &IntermediateVT,
995 unsigned &NumIntermediates,
996 MVT &RegisterVT) const override;
997
998 /// True if stack clash protection is enabled for this functions.
999 bool hasInlineStackProbe(const MachineFunction &MF) const override;
1000
1001#ifndef NDEBUG
1002 void verifyTargetSDNode(const SDNode *N) const override;
1003#endif
1004
1005private:
1006 /// Keep a pointer to the AArch64Subtarget around so that we can
1007 /// make the right decision when generating code for different targets.
1008 const AArch64Subtarget *Subtarget;
1009
1010 llvm::BumpPtrAllocator BumpAlloc;
1011 llvm::StringSaver Saver{BumpAlloc};
1012
1013 bool isExtFreeImpl(const Instruction *Ext) const override;
1014
1015 void addTypeForNEON(MVT VT);
1016 void addTypeForFixedLengthSVE(MVT VT);
1017 void addDRTypeForNEON(MVT VT);
1018 void addQRTypeForNEON(MVT VT);
1019
1020 unsigned allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL,
1021 SelectionDAG &DAG) const;
1022
1023 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
1024 bool isVarArg,
1025 const SmallVectorImpl<ISD::InputArg> &Ins,
1026 const SDLoc &DL, SelectionDAG &DAG,
1027 SmallVectorImpl<SDValue> &InVals) const override;
1028
1029 void AdjustInstrPostInstrSelection(MachineInstr &MI,
1030 SDNode *Node) const override;
1031
1032 SDValue LowerCall(CallLoweringInfo & /*CLI*/,
1033 SmallVectorImpl<SDValue> &InVals) const override;
1034
1035 SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1036 CallingConv::ID CallConv, bool isVarArg,
1037 const SmallVectorImpl<CCValAssign> &RVLocs,
1038 const SDLoc &DL, SelectionDAG &DAG,
1039 SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1040 SDValue ThisVal, bool RequiresSMChange) const;
1041
1042 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
1043 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
1044 SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
1045 SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
1046
1047 SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
1048 SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
1049
1050 SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
1051
1052 SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1053 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1054 SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
1055
1056 bool
1057 isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const;
1058
1059 /// Finds the incoming stack arguments which overlap the given fixed stack
1060 /// object and incorporates their load into the current chain. This prevents
1061 /// an upcoming store from clobbering the stack argument before it's used.
1062 SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
1063 MachineFrameInfo &MFI, int ClobberedFI) const;
1064
1065 bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
1066
1067 void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
1068 SDValue &Chain) const;
1069
1070 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1071 bool isVarArg,
1072 const SmallVectorImpl<ISD::OutputArg> &Outs,
1073 LLVMContext &Context) const override;
1074
1075 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1076 const SmallVectorImpl<ISD::OutputArg> &Outs,
1077 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1078 SelectionDAG &DAG) const override;
1079
1080 SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
1081 unsigned Flag) const;
1082 SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
1083 unsigned Flag) const;
1084 SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
1085 unsigned Flag) const;
1086 SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
1087 unsigned Flag) const;
1088 SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG,
1089 unsigned Flag) const;
1090 template <class NodeTy>
1091 SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1092 template <class NodeTy>
1093 SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1094 template <class NodeTy>
1095 SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1096 template <class NodeTy>
1097 SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1098 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1099 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1100 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1101 SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1102 SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1103 SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
1104 const SDLoc &DL, SelectionDAG &DAG) const;
1105 SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
1106 SelectionDAG &DAG) const;
1107 SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1108 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1109 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1110 SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
1111 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1112 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
1113 SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
1114 SDValue TVal, SDValue FVal, const SDLoc &dl,
1115 SelectionDAG &DAG) const;
1116 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1117 SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
1118 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1119 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1120 SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
1121 SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
1122 SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
1123 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1124 SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
1125 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1126 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1127 SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
1128 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1129 SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1130 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1131 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1132 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1133 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1134 SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
1135 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
1136 SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1137 SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
1138 SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
1139 unsigned NewOp) const;
1140 SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
1141 SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
1142 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
1143 SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
1144 SDValue LowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
1145 SDValue LowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
1146 SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
1147 SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
1148 SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
1149 SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
1150 SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
1151 SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const;
1152 SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
1153 SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
1154 SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
1155 SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
1156 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1157 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1158 SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1159 SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1160 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1161 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1162 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1163 SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1164 SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
1165 SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
1166 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
1167 SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
1168 SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
1169 SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
1170 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1171 SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
1172 SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
1173 SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1174 SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1175 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1176
1177 SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const;
1178
1179 SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
1180 SelectionDAG &DAG) const;
1181 SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
1182 SelectionDAG &DAG) const;
1183 SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1184 SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1185 SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
1186 SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
1187 SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
1188 SelectionDAG &DAG) const;
1189 SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
1190 SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
1191 SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
1192 SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op,
1193 SelectionDAG &DAG) const;
1194 SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
1195 SelectionDAG &DAG) const;
1196 SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
1197 SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
1198 SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const;
1199 SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op,
1200 SelectionDAG &DAG) const;
1201 SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
1202 SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
1203 SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
1204 SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
1205 SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op,
1206 SelectionDAG &DAG) const;
1207
1208 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1209 SmallVectorImpl<SDNode *> &Created) const override;
1210 SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1211 SmallVectorImpl<SDNode *> &Created) const override;
1212 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1213 int &ExtraSteps, bool &UseOneConst,
1214 bool Reciprocal) const override;
1215 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1216 int &ExtraSteps) const override;
1217 SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
1218 const DenormalMode &Mode) const override;
1219 SDValue getSqrtResultForDenormInput(SDValue Operand,
1220 SelectionDAG &DAG) const override;
1221 unsigned combineRepeatedFPDivisors() const override;
1222
1223 ConstraintType getConstraintType(StringRef Constraint) const override;
1224 Register getRegisterByName(const char* RegName, LLT VT,
1225 const MachineFunction &MF) const override;
1226
1227 /// Examine constraint string and operand type and determine a weight value.
1228 /// The operand object must already have been set up with the operand type.
1229 ConstraintWeight
1230 getSingleConstraintMatchWeight(AsmOperandInfo &info,
1231 const char *constraint) const override;
1232
1233 std::pair<unsigned, const TargetRegisterClass *>
1234 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1235 StringRef Constraint, MVT VT) const override;
1236
1237 const char *LowerXConstraint(EVT ConstraintVT) const override;
1238
1239 void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
1240 std::vector<SDValue> &Ops,
1241 SelectionDAG &DAG) const override;
1242
1243 InlineAsm::ConstraintCode
1244 getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1245 if (ConstraintCode == "Q")
1246 return InlineAsm::ConstraintCode::Q;
1247 // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
1248 // followed by llvm_unreachable so we'll leave them unimplemented in
1249 // the backend for now.
1250 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1251 }
1252
1253 /// Handle Lowering flag assembly outputs.
1254 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1255 const SDLoc &DL,
1256 const AsmOperandInfo &Constraint,
1257 SelectionDAG &DAG) const override;
1258
1259 bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
1260 bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override;
1261 bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
1262 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1263 bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1264 bool getIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1265 SDValue &Offset, SelectionDAG &DAG) const;
1266 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
1267 ISD::MemIndexedMode &AM,
1268 SelectionDAG &DAG) const override;
1269 bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1270 SDValue &Offset, ISD::MemIndexedMode &AM,
1271 SelectionDAG &DAG) const override;
1272 bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
1273 bool IsPre, MachineRegisterInfo &MRI) const override;
1274
1275 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1276 SelectionDAG &DAG) const override;
1277 void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1278 SelectionDAG &DAG) const;
1279 void ReplaceExtractSubVectorResults(SDNode *N,
1280 SmallVectorImpl<SDValue> &Results,
1281 SelectionDAG &DAG) const;
1282
1283 bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
1284
1285 void finalizeLowering(MachineFunction &MF) const override;
1286
1287 bool shouldLocalize(const MachineInstr &MI,
1288 const TargetTransformInfo *TTI) const override;
1289
1290 bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1291 const APInt &OriginalDemandedBits,
1292 const APInt &OriginalDemandedElts,
1293 KnownBits &Known,
1294 TargetLoweringOpt &TLO,
1295 unsigned Depth) const override;
1296
1297 bool isTargetCanonicalConstantNode(SDValue Op) const override;
1298
1299 // With the exception of data-predicate transitions, no instructions are
1300 // required to cast between legal scalable vector types. However:
1301 // 1. Packed and unpacked types have different bit lengths, meaning BITCAST
1302 // is not universally useable.
1303 // 2. Most unpacked integer types are not legal and thus integer extends
1304 // cannot be used to convert between unpacked and packed types.
1305 // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
1306 // to transition between unpacked and packed types of the same element type,
1307 // with BITCAST used otherwise.
1308 // This function does not handle predicate bitcasts.
1309 SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
1310
1311 // Returns the runtime value for PSTATE.SM by generating a call to
1312 // __arm_sme_state.
1313 SDValue getRuntimePStateSM(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
1314 EVT VT) const;
1315
1316 bool preferScalarizeSplat(SDNode *N) const override;
1317
1318 unsigned getMinimumJumpTableEntries() const override;
1319
1320 bool softPromoteHalfType() const override { return true; }
1321};
1322
1323namespace AArch64 {
1324FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1325 const TargetLibraryInfo *libInfo);
1326} // end namespace AArch64
1327
1328} // end namespace llvm
1329
1330#endif
1331

source code of llvm/lib/Target/AArch64/AArch64ISelLowering.h