AArch64ISelLowering.h source code [llvm/lib/Target/AArch64/AArch64ISelLowering.h]

1	//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----- C++ --==//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines the interfaces that AArch64 uses to lower LLVM code into a
10	// selection DAG.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15	#define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16
17	#include "AArch64.h"
18	#include "Utils/AArch64SMEAttributes.h"
19	#include "llvm/CodeGen/CallingConvLower.h"
20	#include "llvm/CodeGen/MachineFunction.h"
21	#include "llvm/CodeGen/SelectionDAG.h"
22	#include "llvm/CodeGen/TargetLowering.h"
23	#include "llvm/IR/CallingConv.h"
24	#include "llvm/IR/Instruction.h"
25
26	namespace llvm {
27
28	namespace AArch64ISD {
29
30	// For predicated nodes where the result is a vector, the operation is
31	// controlled by a governing predicate and the inactive lanes are explicitly
32	// defined with a value, please stick the following naming convention:
33	//
34	// _MERGE_OP<n> The result value is a vector with inactive lanes equal
35	// to source operand OP<n>.
36	//
37	// _MERGE_ZERO The result value is a vector with inactive lanes
38	// actively zeroed.
39	//
40	// _MERGE_PASSTHRU The result value is a vector with inactive lanes equal
41	// to the last source operand which only purpose is being
42	// a passthru value.
43	//
44	// For other cases where no explicit action is needed to set the inactive lanes,
45	// or when the result is not a vector and it is needed or helpful to
46	// distinguish a node from similar unpredicated nodes, use:
47	//
48	// _PRED
49	//
50	enum NodeType : unsigned {
51	FIRST_NUMBER = ISD::BUILTIN_OP_END,
52	WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
53	CALL, // Function call.
54
55	// Pseudo for a OBJC call that gets emitted together with a special `mov
56	// x29, x29` marker instruction.
57	CALL_RVMARKER,
58
59	CALL_BTI, // Function call followed by a BTI instruction.
60
61	COALESCER_BARRIER,
62
63	SMSTART,
64	SMSTOP,
65	RESTORE_ZA,
66	RESTORE_ZT,
67	SAVE_ZT,
68
69	// A call with the callee in x16, i.e. "blr x16".
70	CALL_ARM64EC_TO_X64,
71
72	// Produces the full sequence of instructions for getting the thread pointer
73	// offset of a variable into X0, using the TLSDesc model.
74	TLSDESC_CALLSEQ,
75	ADRP, // Page address of a TargetGlobalAddress operand.
76	ADR, // ADR
77	ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
78	LOADgot, // Load from automatically generated descriptor (e.g. Global
79	// Offset Table, TLS record).
80	RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand.
81	BRCOND, // Conditional branch instruction; "b.cond".
82	CSEL,
83	CSINV, // Conditional select invert.
84	CSNEG, // Conditional select negate.
85	CSINC, // Conditional select increment.
86
87	// Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
88	// ELF.
89	THREAD_POINTER,
90	ADC,
91	SBC, // adc, sbc instructions
92
93	// To avoid stack clash, allocation is performed by block and each block is
94	// probed.
95	PROBED_ALLOCA,
96
97	// Predicated instructions where inactive lanes produce undefined results.
98	ABDS_PRED,
99	ABDU_PRED,
100	FADD_PRED,
101	FDIV_PRED,
102	FMA_PRED,
103	FMAX_PRED,
104	FMAXNM_PRED,
105	FMIN_PRED,
106	FMINNM_PRED,
107	FMUL_PRED,
108	FSUB_PRED,
109	HADDS_PRED,
110	HADDU_PRED,
111	MUL_PRED,
112	MULHS_PRED,
113	MULHU_PRED,
114	RHADDS_PRED,
115	RHADDU_PRED,
116	SDIV_PRED,
117	SHL_PRED,
118	SMAX_PRED,
119	SMIN_PRED,
120	SRA_PRED,
121	SRL_PRED,
122	UDIV_PRED,
123	UMAX_PRED,
124	UMIN_PRED,
125
126	// Unpredicated vector instructions
127	BIC,
128
129	SRAD_MERGE_OP1,
130
131	// Predicated instructions with the result of inactive lanes provided by the
132	// last operand.
133	FABS_MERGE_PASSTHRU,
134	FCEIL_MERGE_PASSTHRU,
135	FFLOOR_MERGE_PASSTHRU,
136	FNEARBYINT_MERGE_PASSTHRU,
137	FNEG_MERGE_PASSTHRU,
138	FRECPX_MERGE_PASSTHRU,
139	FRINT_MERGE_PASSTHRU,
140	FROUND_MERGE_PASSTHRU,
141	FROUNDEVEN_MERGE_PASSTHRU,
142	FSQRT_MERGE_PASSTHRU,
143	FTRUNC_MERGE_PASSTHRU,
144	FP_ROUND_MERGE_PASSTHRU,
145	FP_EXTEND_MERGE_PASSTHRU,
146	UINT_TO_FP_MERGE_PASSTHRU,
147	SINT_TO_FP_MERGE_PASSTHRU,
148	FCVTZU_MERGE_PASSTHRU,
149	FCVTZS_MERGE_PASSTHRU,
150	SIGN_EXTEND_INREG_MERGE_PASSTHRU,
151	ZERO_EXTEND_INREG_MERGE_PASSTHRU,
152	ABS_MERGE_PASSTHRU,
153	NEG_MERGE_PASSTHRU,
154
155	SETCC_MERGE_ZERO,
156
157	// Arithmetic instructions which write flags.
158	ADDS,
159	SUBS,
160	ADCS,
161	SBCS,
162	ANDS,
163
164	// Conditional compares. Operands: left,right,falsecc,cc,flags
165	CCMP,
166	CCMN,
167	FCCMP,
168
169	// Floating point comparison
170	FCMP,
171
172	// Scalar-to-vector duplication
173	DUP,
174	DUPLANE8,
175	DUPLANE16,
176	DUPLANE32,
177	DUPLANE64,
178	DUPLANE128,
179
180	// Vector immedate moves
181	MOVI,
182	MOVIshift,
183	MOVIedit,
184	MOVImsl,
185	FMOV,
186	MVNIshift,
187	MVNImsl,
188
189	// Vector immediate ops
190	BICi,
191	ORRi,
192
193	// Vector bitwise select: similar to ISD::VSELECT but not all bits within an
194	// element must be identical.
195	BSP,
196
197	// Vector shuffles
198	ZIP1,
199	ZIP2,
200	UZP1,
201	UZP2,
202	TRN1,
203	TRN2,
204	REV16,
205	REV32,
206	REV64,
207	EXT,
208	SPLICE,
209
210	// Vector shift by scalar
211	VSHL,
212	VLSHR,
213	VASHR,
214
215	// Vector shift by scalar (again)
216	SQSHL_I,
217	UQSHL_I,
218	SQSHLU_I,
219	SRSHR_I,
220	URSHR_I,
221	URSHR_I_PRED,
222
223	// Vector narrowing shift by immediate (bottom)
224	RSHRNB_I,
225
226	// Vector shift by constant and insert
227	VSLI,
228	VSRI,
229
230	// Vector comparisons
231	CMEQ,
232	CMGE,
233	CMGT,
234	CMHI,
235	CMHS,
236	FCMEQ,
237	FCMGE,
238	FCMGT,
239
240	// Vector zero comparisons
241	CMEQz,
242	CMGEz,
243	CMGTz,
244	CMLEz,
245	CMLTz,
246	FCMEQz,
247	FCMGEz,
248	FCMGTz,
249	FCMLEz,
250	FCMLTz,
251
252	// Round wide FP to narrow FP with inexact results to odd.
253	FCVTXN,
254
255	// Vector across-lanes addition
256	// Only the lower result lane is defined.
257	SADDV,
258	UADDV,
259
260	// Unsigned sum Long across Vector
261	UADDLV,
262	SADDLV,
263
264	// Add Pairwise of two vectors
265	ADDP,
266	// Add Long Pairwise
267	SADDLP,
268	UADDLP,
269
270	// udot/sdot instructions
271	UDOT,
272	SDOT,
273
274	// Vector across-lanes min/max
275	// Only the lower result lane is defined.
276	SMINV,
277	UMINV,
278	SMAXV,
279	UMAXV,
280
281	SADDV_PRED,
282	UADDV_PRED,
283	SMAXV_PRED,
284	UMAXV_PRED,
285	SMINV_PRED,
286	UMINV_PRED,
287	ORV_PRED,
288	EORV_PRED,
289	ANDV_PRED,
290
291	// Compare-and-branch
292	CBZ,
293	CBNZ,
294	TBZ,
295	TBNZ,
296
297	// Tail calls
298	TC_RETURN,
299
300	// Custom prefetch handling
301	PREFETCH,
302
303	// {s\|u}int to FP within a FP register.
304	SITOF,
305	UITOF,
306
307	/// Natural vector cast. ISD::BITCAST is not natural in the big-endian
308	/// world w.r.t vectors; which causes additional REV instructions to be
309	/// generated to compensate for the byte-swapping. But sometimes we do
310	/// need to re-interpret the data in SIMD vector registers in big-endian
311	/// mode without emitting such REV instructions.
312	NVCAST,
313
314	MRS, // MRS, also sets the flags via a glue.
315
316	SMULL,
317	UMULL,
318
319	PMULL,
320
321	// Reciprocal estimates and steps.
322	FRECPE,
323	FRECPS,
324	FRSQRTE,
325	FRSQRTS,
326
327	SUNPKHI,
328	SUNPKLO,
329	UUNPKHI,
330	UUNPKLO,
331
332	CLASTA_N,
333	CLASTB_N,
334	LASTA,
335	LASTB,
336	TBL,
337
338	// Floating-point reductions.
339	FADDA_PRED,
340	FADDV_PRED,
341	FMAXV_PRED,
342	FMAXNMV_PRED,
343	FMINV_PRED,
344	FMINNMV_PRED,
345
346	INSR,
347	PTEST,
348	PTEST_ANY,
349	PTRUE,
350
351	CTTZ_ELTS,
352
353	BITREVERSE_MERGE_PASSTHRU,
354	BSWAP_MERGE_PASSTHRU,
355	REVH_MERGE_PASSTHRU,
356	REVW_MERGE_PASSTHRU,
357	CTLZ_MERGE_PASSTHRU,
358	CTPOP_MERGE_PASSTHRU,
359	DUP_MERGE_PASSTHRU,
360	INDEX_VECTOR,
361
362	// Cast between vectors of the same element type but differ in length.
363	REINTERPRET_CAST,
364
365	// Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
366	LS64_BUILD,
367	LS64_EXTRACT,
368
369	LD1_MERGE_ZERO,
370	LD1S_MERGE_ZERO,
371	LDNF1_MERGE_ZERO,
372	LDNF1S_MERGE_ZERO,
373	LDFF1_MERGE_ZERO,
374	LDFF1S_MERGE_ZERO,
375	LD1RQ_MERGE_ZERO,
376	LD1RO_MERGE_ZERO,
377
378	// Structured loads.
379	SVE_LD2_MERGE_ZERO,
380	SVE_LD3_MERGE_ZERO,
381	SVE_LD4_MERGE_ZERO,
382
383	// Unsigned gather loads.
384	GLD1_MERGE_ZERO,
385	GLD1_SCALED_MERGE_ZERO,
386	GLD1_UXTW_MERGE_ZERO,
387	GLD1_SXTW_MERGE_ZERO,
388	GLD1_UXTW_SCALED_MERGE_ZERO,
389	GLD1_SXTW_SCALED_MERGE_ZERO,
390	GLD1_IMM_MERGE_ZERO,
391	GLD1Q_MERGE_ZERO,
392	GLD1Q_INDEX_MERGE_ZERO,
393
394	// Signed gather loads
395	GLD1S_MERGE_ZERO,
396	GLD1S_SCALED_MERGE_ZERO,
397	GLD1S_UXTW_MERGE_ZERO,
398	GLD1S_SXTW_MERGE_ZERO,
399	GLD1S_UXTW_SCALED_MERGE_ZERO,
400	GLD1S_SXTW_SCALED_MERGE_ZERO,
401	GLD1S_IMM_MERGE_ZERO,
402
403	// Unsigned gather loads.
404	GLDFF1_MERGE_ZERO,
405	GLDFF1_SCALED_MERGE_ZERO,
406	GLDFF1_UXTW_MERGE_ZERO,
407	GLDFF1_SXTW_MERGE_ZERO,
408	GLDFF1_UXTW_SCALED_MERGE_ZERO,
409	GLDFF1_SXTW_SCALED_MERGE_ZERO,
410	GLDFF1_IMM_MERGE_ZERO,
411
412	// Signed gather loads.
413	GLDFF1S_MERGE_ZERO,
414	GLDFF1S_SCALED_MERGE_ZERO,
415	GLDFF1S_UXTW_MERGE_ZERO,
416	GLDFF1S_SXTW_MERGE_ZERO,
417	GLDFF1S_UXTW_SCALED_MERGE_ZERO,
418	GLDFF1S_SXTW_SCALED_MERGE_ZERO,
419	GLDFF1S_IMM_MERGE_ZERO,
420
421	// Non-temporal gather loads
422	GLDNT1_MERGE_ZERO,
423	GLDNT1_INDEX_MERGE_ZERO,
424	GLDNT1S_MERGE_ZERO,
425
426	// Contiguous masked store.
427	ST1_PRED,
428
429	// Scatter store
430	SST1_PRED,
431	SST1_SCALED_PRED,
432	SST1_UXTW_PRED,
433	SST1_SXTW_PRED,
434	SST1_UXTW_SCALED_PRED,
435	SST1_SXTW_SCALED_PRED,
436	SST1_IMM_PRED,
437	SST1Q_PRED,
438	SST1Q_INDEX_PRED,
439
440	// Non-temporal scatter store
441	SSTNT1_PRED,
442	SSTNT1_INDEX_PRED,
443
444	// SME
445	RDSVL,
446	REVD_MERGE_PASSTHRU,
447
448	// Asserts that a function argument (i32) is zero-extended to i8 by
449	// the caller
450	ASSERT_ZEXT_BOOL,
451
452	// 128-bit system register accesses
453	// lo64, hi64, chain = MRRS(chain, sysregname)
454	MRRS,
455	// chain = MSRR(chain, sysregname, lo64, hi64)
456	MSRR,
457
458	// Strict (exception-raising) floating point comparison
459	STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
460	STRICT_FCMPE,
461
462	// SME ZA loads and stores
463	SME_ZA_LDR,
464	SME_ZA_STR,
465
466	// NEON Load/Store with post-increment base updates
467	LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
468	LD3post,
469	LD4post,
470	ST2post,
471	ST3post,
472	ST4post,
473	LD1x2post,
474	LD1x3post,
475	LD1x4post,
476	ST1x2post,
477	ST1x3post,
478	ST1x4post,
479	LD1DUPpost,
480	LD2DUPpost,
481	LD3DUPpost,
482	LD4DUPpost,
483	LD1LANEpost,
484	LD2LANEpost,
485	LD3LANEpost,
486	LD4LANEpost,
487	ST2LANEpost,
488	ST3LANEpost,
489	ST4LANEpost,
490
491	STG,
492	STZG,
493	ST2G,
494	STZ2G,
495
496	LDP,
497	LDIAPP,
498	LDNP,
499	STP,
500	STILP,
501	STNP,
502
503	// Memory Operations
504	MOPS_MEMSET,
505	MOPS_MEMSET_TAGGING,
506	MOPS_MEMCOPY,
507	MOPS_MEMMOVE,
508	};
509
510	} // end namespace AArch64ISD
511
512	namespace AArch64 {
513	/// Possible values of current rounding mode, which is specified in bits
514	/// 23:22 of FPCR.
515	enum Rounding {
516	RN = `0`, // Round to Nearest
517	RP = `1`, // Round towards Plus infinity
518	RM = `2`, // Round towards Minus infinity
519	RZ = `3`, // Round towards Zero
520	rmMask = `3` // Bit mask selecting rounding mode
521	};
522
523	// Bit position of rounding mode bits in FPCR.
524	const unsigned RoundingBitsPos = `22`;
525
526	// Registers used to pass function arguments.
527	ArrayRef<MCPhysReg> getGPRArgRegs();
528	ArrayRef<MCPhysReg> getFPRArgRegs();
529
530	/// Maximum allowed number of unprobed bytes above SP at an ABI
531	/// boundary.
532	const unsigned StackProbeMaxUnprobedStack = `1024`;
533
534	/// Maximum number of iterations to unroll for a constant size probing loop.
535	const unsigned StackProbeMaxLoopUnroll = `4`;
536
537	} // namespace AArch64
538
539	class AArch64Subtarget;
540
541	class AArch64TargetLowering : public TargetLowering {
542	public:
543	explicit AArch64TargetLowering(const TargetMachine &TM,
544	const AArch64Subtarget &STI);
545
546	/// Control the following reassociation of operands: (op (op x, c1), y) -> (op
547	/// (op x, y), c1) where N0 is (op x, c1) and N1 is y.
548	bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
549	SDValue N1) const override;
550
551	/// Selects the correct CCAssignFn for a given CallingConvention value.
552	CCAssignFn CCAssignFnForCall(CallingConv::ID CC, bool* IsVarArg) const;
553
554	/// Selects the correct CCAssignFn for a given CallingConvention value.
555	CCAssignFn CCAssignFnForReturn(CallingConv::ID CC) const*;
556
557	/// Determine which of the bits specified in Mask are known to be either zero
558	/// or one and return them in the KnownZero/KnownOne bitsets.
559	void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
560	const APInt &DemandedElts,
561	const SelectionDAG &DAG,
562	unsigned Depth = `0`) const override;
563
564	unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
565	const APInt &DemandedElts,
566	const SelectionDAG &DAG,
567	unsigned Depth) const override;
568
569	MVT getPointerTy(const DataLayout &DL, uint32_t AS = `0`) const override {
570	// Returning i64 unconditionally here (i.e. even for ILP32) means that the
571	// DAG* representation of pointers will always be 64-bits. They will be*
572	// truncated and extended when transferred to memory, but the 64-bit DAG
573	// allows us to use AArch64's addressing modes much more easily.
574	return MVT::getIntegerVT(BitWidth: `64`);
575	}
576
577	bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
578	const APInt &DemandedElts,
579	TargetLoweringOpt &TLO) const override;
580
581	MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
582
583	/// Returns true if the target allows unaligned memory accesses of the
584	/// specified type.
585	bool allowsMisalignedMemoryAccesses(
586	EVT VT, unsigned AddrSpace = `0`, Align Alignment = Align (`1`),
587	MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
588	unsigned Fast = nullptr) const* override;
589	/// LLT variant.
590	bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
591	Align Alignment,
592	MachineMemOperand::Flags Flags,
593	unsigned Fast = nullptr) const* override;
594
595	/// Provide custom lowering hooks for some operations.
596	SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
597
598	const char getTargetNodeName(unsigned* Opcode) const override;
599
600	SDValue PerformDAGCombine(SDNode N, DAGCombinerInfo &DCI) const* override;
601
602	/// This method returns a target specific FastISel object, or null if the
603	/// target does not support "fast" ISel.
604	FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
605	const TargetLibraryInfo libInfo) const* override;
606
607	bool isOffsetFoldingLegal(const GlobalAddressSDNode GA) const* override;
608
609	bool isFPImmLegal(const APFloat &Imm, EVT VT,
610	bool ForCodeSize) const override;
611
612	/// Return true if the given shuffle mask can be codegen'd directly, or if it
613	/// should be stack expanded.
614	bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
615
616	/// Similar to isShuffleMaskLegal. Return true is the given 'select with zero'
617	/// shuffle mask can be codegen'd directly.
618	bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override;
619
620	/// Return the ISD::SETCC ValueType.
621	EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
622	EVT VT) const override;
623
624	SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
625
626	MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
627	MachineBasicBlock BB) const*;
628
629	MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
630	MachineBasicBlock BB) const*;
631
632	MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI,
633	MachineBasicBlock MBB) const*;
634
635	MachineBasicBlock EmitTileLoad(unsigned* Opc, unsigned BaseReg,
636	MachineInstr &MI,
637	MachineBasicBlock BB) const*;
638	MachineBasicBlock EmitFill(MachineInstr &MI, MachineBasicBlock BB) const;
639	MachineBasicBlock EmitZAInstr(unsigned* Opc, unsigned BaseReg,
640	MachineInstr &MI, MachineBasicBlock *BB,
641	bool HasTile) const;
642	MachineBasicBlock EmitZTInstr(MachineInstr &MI, MachineBasicBlock BB,
643	unsigned Opcode, bool Op0IsDef) const;
644	MachineBasicBlock EmitZero(MachineInstr &MI, MachineBasicBlock BB) const;
645
646	MachineBasicBlock *
647	EmitInstrWithCustomInserter(MachineInstr &MI,
648	MachineBasicBlock MBB) const* override;
649
650	bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
651	MachineFunction &MF,
652	unsigned Intrinsic) const override;
653
654	bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
655	EVT NewVT) const override;
656
657	bool shouldRemoveRedundantExtend(SDValue Op) const override;
658
659	bool isTruncateFree(Type Ty1, Type Ty2) const override;
660	bool isTruncateFree(EVT VT1, EVT VT2) const override;
661
662	bool isProfitableToHoist(Instruction I) const* override;
663
664	bool isZExtFree(Type Ty1, Type Ty2) const override;
665	bool isZExtFree(EVT VT1, EVT VT2) const override;
666	bool isZExtFree(SDValue Val, EVT VT2) const override;
667
668	bool shouldSinkOperands(Instruction *I,
669	SmallVectorImpl<Use > &Ops) const* override;
670
671	bool optimizeExtendOrTruncateConversion(
672	Instruction I, Loop L, const TargetTransformInfo &TTI) const override;
673
674	bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
675
676	unsigned getMaxSupportedInterleaveFactor() const override { return `4`; }
677
678	bool lowerInterleavedLoad(LoadInst *LI,
679	ArrayRef<ShuffleVectorInst *> Shuffles,
680	ArrayRef<unsigned> Indices,
681	unsigned Factor) const override;
682	bool lowerInterleavedStore(StoreInst SI, ShuffleVectorInst SVI,
683	unsigned Factor) const override;
684
685	bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
686	LoadInst LI) const* override;
687
688	bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
689	StoreInst SI) const* override;
690
691	bool isLegalAddImmediate(int64_t) const override;
692	bool isLegalAddScalableImmediate(int64_t) const override;
693	bool isLegalICmpImmediate(int64_t) const override;
694
695	bool isMulAddWithConstProfitable(SDValue AddNode,
696	SDValue ConstNode) const override;
697
698	bool shouldConsiderGEPOffsetSplit() const override;
699
700	EVT getOptimalMemOpType(const MemOp &Op,
701	const AttributeList &FuncAttributes) const override;
702
703	LLT getOptimalMemOpLLT(const MemOp &Op,
704	const AttributeList &FuncAttributes) const override;
705
706	/// Return true if the addressing mode represented by AM is legal for this
707	/// target, for a load/store of the specified type.
708	bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
709	unsigned AS,
710	Instruction I = nullptr) const* override;
711
712	int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset,
713	int64_t MaxOffset) const override;
714
715	/// Return true if an FMA operation is faster than a pair of fmul and fadd
716	/// instructions. fmuladd intrinsics will be expanded to FMAs when this method
717	/// returns true, otherwise fmuladd is expanded to fmul + fadd.
718	bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
719	EVT VT) const override;
720	bool isFMAFasterThanFMulAndFAdd(const Function &F, Type Ty) const* override;
721
722	bool generateFMAsInMachineCombiner(EVT VT,
723	CodeGenOptLevel OptLevel) const override;
724
725	const MCPhysReg getScratchRegisters(CallingConv::ID CC) const* override;
726	ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
727
728	/// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
729	bool isDesirableToCommuteWithShift(const SDNode *N,
730	CombineLevel Level) const override;
731
732	bool isDesirableToPullExtFromShl(const MachineInstr &MI) const override {
733	return false;
734	}
735
736	/// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
737	bool isDesirableToCommuteXorWithShift(const SDNode N) const* override;
738
739	/// Return true if it is profitable to fold a pair of shifts into a mask.
740	bool shouldFoldConstantShiftPairToMask(const SDNode *N,
741	CombineLevel Level) const override;
742
743	bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
744	EVT VT) const override;
745
746	/// Returns true if it is beneficial to convert a load of a constant
747	/// to just the constant itself.
748	bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
749	Type Ty) const* override;
750
751	/// Return true if EXTRACT_SUBVECTOR is cheap for this result type
752	/// with this index.
753	bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
754	unsigned Index) const override;
755
756	bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
757	bool MathUsed) const override {
758	// Using overflow ops for overflow checks only should beneficial on
759	// AArch64.
760	return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed: true);
761	}
762
763	Value emitLoadLinked(IRBuilderBase &Builder, Type ValueTy, Value *Addr,
764	AtomicOrdering Ord) const override;
765	Value emitStoreConditional(IRBuilderBase &Builder, Value Val, Value *Addr,
766	AtomicOrdering Ord) const override;
767
768	void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
769
770	bool isOpSuitableForLDPSTP(const Instruction I) const*;
771	bool isOpSuitableForLSE128(const Instruction I) const*;
772	bool isOpSuitableForRCPC3(const Instruction I) const*;
773	bool shouldInsertFencesForAtomic(const Instruction I) const* override;
774	bool
775	shouldInsertTrailingFenceForAtomicStore(const Instruction I) const* override;
776
777	TargetLoweringBase::AtomicExpansionKind
778	shouldExpandAtomicLoadInIR(LoadInst LI) const* override;
779	TargetLoweringBase::AtomicExpansionKind
780	shouldExpandAtomicStoreInIR(StoreInst SI) const* override;
781	TargetLoweringBase::AtomicExpansionKind
782	shouldExpandAtomicRMWInIR(AtomicRMWInst AI) const* override;
783
784	TargetLoweringBase::AtomicExpansionKind
785	shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst AI) const* override;
786
787	bool useLoadStackGuardNode() const override;
788	TargetLoweringBase::LegalizeTypeAction
789	getPreferredVectorAction(MVT VT) const override;
790
791	/// If the target has a standard location for the stack protector cookie,
792	/// returns the address of that location. Otherwise, returns nullptr.
793	Value getIRStackGuard(IRBuilderBase &IRB) const* override;
794
795	void insertSSPDeclarations(Module &M) const override;
796	Value getSDagStackGuard(const* Module &M) const override;
797	Function getSSPStackGuardCheck(const* Module &M) const override;
798
799	/// If the target has a standard location for the unsafe stack pointer,
800	/// returns the address of that location. Otherwise, returns nullptr.
801	Value getSafeStackPointerLocation(IRBuilderBase &IRB) const* override;
802
803	/// If a physical register, this returns the register that receives the
804	/// exception address on entry to an EH pad.
805	Register
806	getExceptionPointerRegister(const Constant PersonalityFn) const* override {
807	// FIXME: This is a guess. Has this been defined yet?
808	return AArch64::X0;
809	}
810
811	/// If a physical register, this returns the register that receives the
812	/// exception typeid on entry to a landing pad.
813	Register
814	getExceptionSelectorRegister(const Constant PersonalityFn) const* override {
815	// FIXME: This is a guess. Has this been defined yet?
816	return AArch64::X1;
817	}
818
819	bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
820
821	bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
822	const MachineFunction &MF) const override {
823	// Do not merge to float value size (128 bytes) if no implicit
824	// float attribute is set.
825
826	bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
827
828	if (NoFloat)
829	return (MemVT.getSizeInBits() <= `64`);
830	return true;
831	}
832
833	bool isCheapToSpeculateCttz(Type ) const* override {
834	return true;
835	}
836
837	bool isCheapToSpeculateCtlz(Type ) const* override {
838	return true;
839	}
840
841	bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
842
843	bool hasAndNotCompare(SDValue V) const override {
844	// We can use bics for any scalar.
845	return V.getValueType().isScalarInteger();
846	}
847
848	bool hasAndNot(SDValue Y) const override {
849	EVT VT = Y.getValueType();
850
851	if (!VT.isVector())
852	return hasAndNotCompare(V: Y);
853
854	TypeSize TS = VT.getSizeInBits();
855	// TODO: We should be able to use bic/bif too for SVE.
856	return !TS.isScalable() && TS.getFixedValue() >= `64`; // vector 'bic'
857	}
858
859	bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
860	SDValue X, ConstantSDNode XC, ConstantSDNode CC, SDValue Y,
861	unsigned OldShiftOpcode, unsigned NewShiftOpcode,
862	SelectionDAG &DAG) const override;
863
864	ShiftLegalizationStrategy
865	preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
866	unsigned ExpansionFactor) const override;
867
868	bool shouldTransformSignedTruncationCheck(EVT XVT,
869	unsigned KeptBits) const override {
870	// For vectors, we don't have a preference..
871	if (XVT.isVector())
872	return false;
873
874	auto VTIsOk = [](EVT VT) -> bool {
875	return VT == MVT::i8 \|\| VT == MVT::i16 \|\| VT == MVT::i32 \|\|
876	VT == MVT::i64;
877	};
878
879	// We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
880	// XVT will be larger than KeptBitsVT.
881	MVT KeptBitsVT = MVT::getIntegerVT(BitWidth: KeptBits);
882	return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
883	}
884
885	bool preferIncOfAddToSubOfNot(EVT VT) const override;
886
887	bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
888
889	bool isComplexDeinterleavingSupported() const override;
890	bool isComplexDeinterleavingOperationSupported(
891	ComplexDeinterleavingOperation Operation, Type Ty) const* override;
892
893	Value *createComplexDeinterleavingIR(
894	IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
895	ComplexDeinterleavingRotation Rotation, Value InputA, Value InputB,
896	Value Accumulator = nullptr) const* override;
897
898	bool supportSplitCSR(MachineFunction MF) const* override {
899	return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
900	MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
901	}
902	void initializeSplitCSR(MachineBasicBlock Entry) const* override;
903	void insertCopiesSplitCSR(
904	MachineBasicBlock *Entry,
905	const SmallVectorImpl<MachineBasicBlock > &Exits) const* override;
906
907	bool supportSwiftError() const override {
908	return true;
909	}
910
911	bool supportKCFIBundles() const override { return true; }
912
913	MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
914	MachineBasicBlock::instr_iterator &MBBI,
915	const TargetInstrInfo TII) const* override;
916
917	/// Enable aggressive FMA fusion on targets that want it.
918	bool enableAggressiveFMAFusion(EVT VT) const override;
919
920	/// Returns the size of the platform's va_list object.
921	unsigned getVaListSizeInBits(const DataLayout &DL) const override;
922
923	/// Returns true if \p VecTy is a legal interleaved access type. This
924	/// function checks the vector element type and the overall width of the
925	/// vector.
926	bool isLegalInterleavedAccessType(VectorType VecTy, const* DataLayout &DL,
927	bool &UseScalable) const;
928
929	/// Returns the number of interleaved accesses that will be generated when
930	/// lowering accesses of the given type.
931	unsigned getNumInterleavedAccesses(VectorType VecTy, const* DataLayout &DL,
932	bool UseScalable) const;
933
934	MachineMemOperand::Flags getTargetMMOFlags(
935	const Instruction &I) const override;
936
937	bool functionArgumentNeedsConsecutiveRegisters(
938	Type Ty, CallingConv::ID CallConv, bool* isVarArg,
939	const DataLayout &DL) const override;
940
941	/// Used for exception handling on Win64.
942	bool needsFixedCatchObjects() const override;
943
944	bool fallBackToDAGISel(const Instruction &Inst) const override;
945
946	/// SVE code generation for fixed length vectors does not custom lower
947	/// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
948	/// merge. However, merging them creates a BUILD_VECTOR that is just as
949	/// illegal as the original, thus leading to an infinite legalisation loop.
950	/// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
951	/// vector types this override can be removed.
952	bool mergeStoresAfterLegalization(EVT VT) const override;
953
954	// If the platform/function should have a redzone, return the size in bytes.
955	unsigned getRedZoneSize(const Function &F) const {
956	if (F.hasFnAttribute(Attribute::NoRedZone))
957	return `0`;
958	return `128`;
959	}
960
961	bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const;
962	EVT getPromotedVTForPredicate(EVT VT) const;
963
964	EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
965	bool AllowUnknown = false) const override;
966
967	bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override;
968
969	bool shouldExpandCttzElements(EVT VT) const override;
970
971	/// If a change in streaming mode is required on entry to/return from a
972	/// function call it emits and returns the corresponding SMSTART or SMSTOP
973	/// node. \p Condition should be one of the enum values from
974	/// AArch64SME::ToggleCondition.
975	SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable,
976	SDValue Chain, SDValue InGlue, unsigned Condition,
977	SDValue PStateSM = SDValue ()) const;
978
979	bool isVScaleKnownToBeAPowerOfTwo() const override { return true; }
980
981	// Normally SVE is only used for byte size vectors that do not fit within a
982	// NEON vector. This changes when OverrideNEON is true, allowing SVE to be
983	// used for 64bit and 128bit vectors as well.
984	bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
985
986	// Follow NEON ABI rules even when using SVE for fixed length vectors.
987	MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
988	EVT VT) const override;
989	unsigned getNumRegistersForCallingConv(LLVMContext &Context,
990	CallingConv::ID CC,
991	EVT VT) const override;
992	unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context,
993	CallingConv::ID CC, EVT VT,
994	EVT &IntermediateVT,
995	unsigned &NumIntermediates,
996	MVT &RegisterVT) const override;
997
998	/// True if stack clash protection is enabled for this functions.
999	bool hasInlineStackProbe(const MachineFunction &MF) const override;
1000
1001	#ifndef NDEBUG
1002	void verifyTargetSDNode(const SDNode N) const* override;
1003	#endif
1004
1005	private:
1006	/// Keep a pointer to the AArch64Subtarget around so that we can
1007	/// make the right decision when generating code for different targets.
1008	const AArch64Subtarget *Subtarget;
1009
1010	llvm::BumpPtrAllocator BumpAlloc;
1011	llvm::StringSaver Saver{BumpAlloc};
1012
1013	bool isExtFreeImpl(const Instruction Ext) const* override;
1014
1015	void addTypeForNEON(MVT VT);
1016	void addTypeForFixedLengthSVE(MVT VT);
1017	void addDRTypeForNEON(MVT VT);
1018	void addQRTypeForNEON(MVT VT);
1019
1020	unsigned allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL,
1021	SelectionDAG &DAG) const;
1022
1023	SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
1024	bool isVarArg,
1025	const SmallVectorImpl<ISD::InputArg> &Ins,
1026	const SDLoc &DL, SelectionDAG &DAG,
1027	SmallVectorImpl<SDValue> &InVals) const override;
1028
1029	void AdjustInstrPostInstrSelection(MachineInstr &MI,
1030	SDNode Node) const* override;
1031
1032	SDValue LowerCall(CallLoweringInfo & /CLI/,
1033	SmallVectorImpl<SDValue> &InVals) const override;
1034
1035	SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1036	CallingConv::ID CallConv, bool isVarArg,
1037	const SmallVectorImpl<CCValAssign> &RVLocs,
1038	const SDLoc &DL, SelectionDAG &DAG,
1039	SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1040	SDValue ThisVal, bool RequiresSMChange) const;
1041
1042	SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
1043	SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
1044	SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
1045	SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
1046
1047	SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
1048	SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
1049
1050	SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
1051
1052	SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1053	SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1054	SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
1055
1056	bool
1057	isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const;
1058
1059	/// Finds the incoming stack arguments which overlap the given fixed stack
1060	/// object and incorporates their load into the current chain. This prevents
1061	/// an upcoming store from clobbering the stack argument before it's used.
1062	SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
1063	MachineFrameInfo &MFI, int ClobberedFI) const;
1064
1065	bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
1066
1067	void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
1068	SDValue &Chain) const;
1069
1070	bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1071	bool isVarArg,
1072	const SmallVectorImpl<ISD::OutputArg> &Outs,
1073	LLVMContext &Context) const override;
1074
1075	SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1076	const SmallVectorImpl<ISD::OutputArg> &Outs,
1077	const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1078	SelectionDAG &DAG) const override;
1079
1080	SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
1081	unsigned Flag) const;
1082	SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
1083	unsigned Flag) const;
1084	SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
1085	unsigned Flag) const;
1086	SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
1087	unsigned Flag) const;
1088	SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG,
1089	unsigned Flag) const;
1090	template <class NodeTy>
1091	SDValue getGOT(NodeTy N, SelectionDAG &DAG, unsigned* Flags = `0`) const;
1092	template <class NodeTy>
1093	SDValue getAddrLarge(NodeTy N, SelectionDAG &DAG, unsigned* Flags = `0`) const;
1094	template <class NodeTy>
1095	SDValue getAddr(NodeTy N, SelectionDAG &DAG, unsigned* Flags = `0`) const;
1096	template <class NodeTy>
1097	SDValue getAddrTiny(NodeTy N, SelectionDAG &DAG, unsigned* Flags = `0`) const;
1098	SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1099	SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1100	SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1101	SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1102	SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1103	SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
1104	const SDLoc &DL, SelectionDAG &DAG) const;
1105	SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
1106	SelectionDAG &DAG) const;
1107	SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1108	SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1109	SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1110	SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
1111	SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1112	SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
1113	SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
1114	SDValue TVal, SDValue FVal, const SDLoc &dl,
1115	SelectionDAG &DAG) const;
1116	SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1117	SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
1118	SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1119	SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1120	SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
1121	SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
1122	SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
1123	SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1124	SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
1125	SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1126	SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1127	SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
1128	SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1129	SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1130	SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1131	SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1132	SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1133	SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1134	SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
1135	SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
1136	SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1137	SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
1138	SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
1139	unsigned NewOp) const;
1140	SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
1141	SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
1142	SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
1143	SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
1144	SDValue LowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
1145	SDValue LowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
1146	SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
1147	SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
1148	SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
1149	SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
1150	SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
1151	SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const;
1152	SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
1153	SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
1154	SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
1155	SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
1156	SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1157	SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1158	SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1159	SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1160	SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1161	SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1162	SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1163	SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1164	SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
1165	SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
1166	SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
1167	SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
1168	SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
1169	SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
1170	SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1171	SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
1172	SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
1173	SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1174	SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1175	SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1176
1177	SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const;
1178
1179	SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
1180	SelectionDAG &DAG) const;
1181	SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
1182	SelectionDAG &DAG) const;
1183	SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1184	SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1185	SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
1186	SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
1187	SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
1188	SelectionDAG &DAG) const;
1189	SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
1190	SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
1191	SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
1192	SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op,
1193	SelectionDAG &DAG) const;
1194	SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
1195	SelectionDAG &DAG) const;
1196	SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
1197	SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
1198	SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const;
1199	SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op,
1200	SelectionDAG &DAG) const;
1201	SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
1202	SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
1203	SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
1204	SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
1205	SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op,
1206	SelectionDAG &DAG) const;
1207
1208	SDValue BuildSDIVPow2(SDNode N, const* APInt &Divisor, SelectionDAG &DAG,
1209	SmallVectorImpl<SDNode > &Created) const* override;
1210	SDValue BuildSREMPow2(SDNode N, const* APInt &Divisor, SelectionDAG &DAG,
1211	SmallVectorImpl<SDNode > &Created) const* override;
1212	SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1213	int &ExtraSteps, bool &UseOneConst,
1214	bool Reciprocal) const override;
1215	SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1216	int &ExtraSteps) const override;
1217	SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
1218	const DenormalMode &Mode) const override;
1219	SDValue getSqrtResultForDenormInput(SDValue Operand,
1220	SelectionDAG &DAG) const override;
1221	unsigned combineRepeatedFPDivisors() const override;
1222
1223	ConstraintType getConstraintType(StringRef Constraint) const override;
1224	Register getRegisterByName(const char* RegName, LLT VT,
1225	const MachineFunction &MF) const override;
1226
1227	/// Examine constraint string and operand type and determine a weight value.
1228	/// The operand object must already have been set up with the operand type.
1229	ConstraintWeight
1230	getSingleConstraintMatchWeight(AsmOperandInfo &info,
1231	const char constraint) const* override;
1232
1233	std::pair<unsigned, const TargetRegisterClass *>
1234	getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1235	StringRef Constraint, MVT VT) const override;
1236
1237	const char LowerXConstraint(EVT ConstraintVT) const* override;
1238
1239	void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
1240	std::vector<SDValue> &Ops,
1241	SelectionDAG &DAG) const override;
1242
1243	InlineAsm::ConstraintCode
1244	getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1245	if (ConstraintCode == "Q")
1246	return InlineAsm::ConstraintCode::Q;
1247	// FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
1248	// followed by llvm_unreachable so we'll leave them unimplemented in
1249	// the backend for now.
1250	return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1251	}
1252
1253	/// Handle Lowering flag assembly outputs.
1254	SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1255	const SDLoc &DL,
1256	const AsmOperandInfo &Constraint,
1257	SelectionDAG &DAG) const override;
1258
1259	bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
1260	bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override;
1261	bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
1262	bool isUsedByReturnOnly(SDNode N, SDValue &Chain) const* override;
1263	bool mayBeEmittedAsTailCall(const CallInst CI) const* override;
1264	bool getIndexedAddressParts(SDNode N, SDNode Op, SDValue &Base,
1265	SDValue &Offset, SelectionDAG &DAG) const;
1266	bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
1267	ISD::MemIndexedMode &AM,
1268	SelectionDAG &DAG) const override;
1269	bool getPostIndexedAddressParts(SDNode N, SDNode Op, SDValue &Base,
1270	SDValue &Offset, ISD::MemIndexedMode &AM,
1271	SelectionDAG &DAG) const override;
1272	bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
1273	bool IsPre, MachineRegisterInfo &MRI) const override;
1274
1275	void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1276	SelectionDAG &DAG) const override;
1277	void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1278	SelectionDAG &DAG) const;
1279	void ReplaceExtractSubVectorResults(SDNode *N,
1280	SmallVectorImpl<SDValue> &Results,
1281	SelectionDAG &DAG) const;
1282
1283	bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
1284
1285	void finalizeLowering(MachineFunction &MF) const override;
1286
1287	bool shouldLocalize(const MachineInstr &MI,
1288	const TargetTransformInfo TTI) const* override;
1289
1290	bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1291	const APInt &OriginalDemandedBits,
1292	const APInt &OriginalDemandedElts,
1293	KnownBits &Known,
1294	TargetLoweringOpt &TLO,
1295	unsigned Depth) const override;
1296
1297	bool isTargetCanonicalConstantNode(SDValue Op) const override;
1298
1299	// With the exception of data-predicate transitions, no instructions are
1300	// required to cast between legal scalable vector types. However:
1301	// 1. Packed and unpacked types have different bit lengths, meaning BITCAST
1302	// is not universally useable.
1303	// 2. Most unpacked integer types are not legal and thus integer extends
1304	// cannot be used to convert between unpacked and packed types.
1305	// These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
1306	// to transition between unpacked and packed types of the same element type,
1307	// with BITCAST used otherwise.
1308	// This function does not handle predicate bitcasts.
1309	SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
1310
1311	// Returns the runtime value for PSTATE.SM by generating a call to
1312	// __arm_sme_state.
1313	SDValue getRuntimePStateSM(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
1314	EVT VT) const;
1315
1316	bool preferScalarizeSplat(SDNode N) const* override;
1317
1318	unsigned getMinimumJumpTableEntries() const override;
1319
1320	bool softPromoteHalfType() const override { return true; }
1321	};
1322
1323	namespace AArch64 {
1324	FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1325	const TargetLibraryInfo *libInfo);
1326	} // end namespace AArch64
1327
1328	} // end namespace llvm
1329
1330	#endif
1331

source code of llvm/lib/Target/AArch64/AArch64ISelLowering.h