AArch64ISelDAGToDAG.cpp source code [llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp]

1	//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines an instruction selector for the AArch64 target.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "AArch64MachineFunctionInfo.h"
14	#include "AArch64TargetMachine.h"
15	#include "MCTargetDesc/AArch64AddressingModes.h"
16	#include "llvm/ADT/APSInt.h"
17	#include "llvm/CodeGen/ISDOpcodes.h"
18	#include "llvm/CodeGen/SelectionDAGISel.h"
19	#include "llvm/IR/Function.h" // To access function attributes.
20	#include "llvm/IR/GlobalValue.h"
21	#include "llvm/IR/Intrinsics.h"
22	#include "llvm/IR/IntrinsicsAArch64.h"
23	#include "llvm/Support/Debug.h"
24	#include "llvm/Support/ErrorHandling.h"
25	#include "llvm/Support/KnownBits.h"
26	#include "llvm/Support/MathExtras.h"
27	#include "llvm/Support/raw_ostream.h"
28
29	using namespace llvm;
30
31	#define DEBUG_TYPE "aarch64-isel"
32	#define PASS_NAME "AArch64 Instruction Selection"
33
34	//===--------------------------------------------------------------------===//
35	/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
36	/// instructions for SelectionDAG operations.
37	///
38	namespace {
39
40	class AArch64DAGToDAGISel : public SelectionDAGISel {
41
42	/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
43	/// make the right decision when generating code for different targets.
44	const AArch64Subtarget *Subtarget;
45
46	public:
47	static char ID;
48
49	AArch64DAGToDAGISel() = delete;
50
51	explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
52	CodeGenOptLevel OptLevel)
53	: SelectionDAGISel (ID, tm, OptLevel), Subtarget(nullptr) {}
54
55	bool runOnMachineFunction(MachineFunction &MF) override {
56	Subtarget = &MF.getSubtarget<AArch64Subtarget>();
57	return SelectionDAGISel::runOnMachineFunction(MF);
58	}
59
60	void Select(SDNode *Node) override;
61
62	/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
63	/// inline asm expressions.
64	bool SelectInlineAsmMemoryOperand(const SDValue &Op,
65	InlineAsm::ConstraintCode ConstraintID,
66	std::vector<SDValue> &OutOps) override;
67
68	template <signed Low, signed High, signed Scale>
69	bool SelectRDVLImm(SDValue N, SDValue &Imm);
70
71	bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
72	bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
73	bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
74	bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
75	bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
76	return SelectShiftedRegister(N, AllowROR: false, Reg, Shift);
77	}
78	bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
79	return SelectShiftedRegister(N, AllowROR: true, Reg, Shift);
80	}
81	bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
82	return SelectAddrModeIndexed7S(N, Size: `1`, Base, OffImm);
83	}
84	bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
85	return SelectAddrModeIndexed7S(N, Size: `2`, Base, OffImm);
86	}
87	bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
88	return SelectAddrModeIndexed7S(N, Size: `4`, Base, OffImm);
89	}
90	bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
91	return SelectAddrModeIndexed7S(N, Size: `8`, Base, OffImm);
92	}
93	bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
94	return SelectAddrModeIndexed7S(N, Size: `16`, Base, OffImm);
95	}
96	bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
97	return SelectAddrModeIndexedBitWidth(N, IsSignedImm: true, BW: `9`, Size: `16`, Base, OffImm);
98	}
99	bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
100	return SelectAddrModeIndexedBitWidth(N, IsSignedImm: false, BW: `6`, Size: `16`, Base, OffImm);
101	}
102	bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
103	return SelectAddrModeIndexed(N, Size: `1`, Base, OffImm);
104	}
105	bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
106	return SelectAddrModeIndexed(N, Size: `2`, Base, OffImm);
107	}
108	bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
109	return SelectAddrModeIndexed(N, Size: `4`, Base, OffImm);
110	}
111	bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
112	return SelectAddrModeIndexed(N, Size: `8`, Base, OffImm);
113	}
114	bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
115	return SelectAddrModeIndexed(N, Size: `16`, Base, OffImm);
116	}
117	bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
118	return SelectAddrModeUnscaled(N, Size: `1`, Base, OffImm);
119	}
120	bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
121	return SelectAddrModeUnscaled(N, Size: `2`, Base, OffImm);
122	}
123	bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
124	return SelectAddrModeUnscaled(N, Size: `4`, Base, OffImm);
125	}
126	bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
127	return SelectAddrModeUnscaled(N, Size: `8`, Base, OffImm);
128	}
129	bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
130	return SelectAddrModeUnscaled(N, Size: `16`, Base, OffImm);
131	}
132	template <unsigned Size, unsigned Max>
133	bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
134	// Test if there is an appropriate addressing mode and check if the
135	// immediate fits.
136	bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
137	if (Found) {
138	if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
139	int64_t C = CI->getSExtValue();
140	if (C <= Max)
141	return true;
142	}
143	}
144
145	// Otherwise, base only, materialize address in register.
146	Base = N;
147	OffImm = CurDAG->getTargetConstant(`0`, SDLoc(N), MVT::i64);
148	return true;
149	}
150
151	template<int Width>
152	bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
153	SDValue &SignExtend, SDValue &DoShift) {
154	return SelectAddrModeWRO(N, Size: Width / `8`, Base, Offset, SignExtend, DoShift);
155	}
156
157	template<int Width>
158	bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
159	SDValue &SignExtend, SDValue &DoShift) {
160	return SelectAddrModeXRO(N, Size: Width / `8`, Base, Offset, SignExtend, DoShift);
161	}
162
163	bool SelectExtractHigh(SDValue N, SDValue &Res) {
164	if (Subtarget->isLittleEndian() && N ->getOpcode() == ISD::BITCAST)
165	N = N ->getOperand(Num: `0`);
166	if (N ->getOpcode() != ISD::EXTRACT_SUBVECTOR \|\|
167	!isa<ConstantSDNode>(N ->getOperand(Num: `1`)))
168	return false;
169	EVT VT = N ->getValueType(ResNo: `0`);
170	EVT LVT = N ->getOperand(Num: `0`).getValueType();
171	unsigned Index = N ->getConstantOperandVal(Num: `1`);
172	if (!VT.is64BitVector() \|\| !LVT.is128BitVector() \|\|
173	Index != VT.getVectorNumElements())
174	return false;
175	Res = N ->getOperand(Num: `0`);
176	return true;
177	}
178
179	bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
180	if (N.getOpcode() != AArch64ISD::VLSHR)
181	return false;
182	SDValue Op = N ->getOperand(Num: `0`);
183	EVT VT = Op.getValueType();
184	unsigned ShtAmt = N ->getConstantOperandVal(Num: `1`);
185	if (ShtAmt > VT.getScalarSizeInBits() / `2` \|\| Op.getOpcode() != ISD::ADD)
186	return false;
187
188	APInt Imm;
189	if (Op.getOperand(i: `1`).getOpcode() == AArch64ISD::MOVIshift)
190	Imm = APInt (VT.getScalarSizeInBits(),
191	Op.getOperand(i: `1`).getConstantOperandVal(i: `0`)
192	<< Op.getOperand(i: `1`).getConstantOperandVal(i: `1`));
193	else if (Op.getOperand(i: `1`).getOpcode() == AArch64ISD::DUP &&
194	isa<ConstantSDNode>(Op.getOperand(i: `1`).getOperand(i: `0`)))
195	Imm = APInt (VT.getScalarSizeInBits(),
196	Op.getOperand(i: `1`).getConstantOperandVal(i: `0`));
197	else
198	return false;
199
200	if (Imm != `1ULL` << (ShtAmt - `1`))
201	return false;
202
203	Res1 = Op.getOperand(i: `0`);
204	Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
205	return true;
206	}
207
208	bool SelectDupZeroOrUndef(SDValue N) {
209	switch(N ->getOpcode()) {
210	case ISD::UNDEF:
211	return true;
212	case AArch64ISD::DUP:
213	case ISD::SPLAT_VECTOR: {
214	auto Opnd0 = N ->getOperand(Num: `0`);
215	if (isNullConstant(V: Opnd0))
216	return true;
217	if (isNullFPConstant(V: Opnd0))
218	return true;
219	break;
220	}
221	default:
222	break;
223	}
224
225	return false;
226	}
227
228	bool SelectDupZero(SDValue N) {
229	switch(N ->getOpcode()) {
230	case AArch64ISD::DUP:
231	case ISD::SPLAT_VECTOR: {
232	auto Opnd0 = N ->getOperand(Num: `0`);
233	if (isNullConstant(V: Opnd0))
234	return true;
235	if (isNullFPConstant(V: Opnd0))
236	return true;
237	break;
238	}
239	}
240
241	return false;
242	}
243
244	bool SelectDupNegativeZero(SDValue N) {
245	switch(N ->getOpcode()) {
246	case AArch64ISD::DUP:
247	case ISD::SPLAT_VECTOR: {
248	ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(N ->getOperand(Num: `0`));
249	return Const && Const->isZero() && Const->isNegative();
250	}
251	}
252
253	return false;
254	}
255
256	template<MVT::SimpleValueType VT>
257	bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
258	return SelectSVEAddSubImm(N, VT, Imm, Shift);
259	}
260
261	template <MVT::SimpleValueType VT, bool Negate>
262	bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
263	return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
264	}
265
266	template <MVT::SimpleValueType VT>
267	bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
268	return SelectSVECpyDupImm(N, VT, Imm, Shift);
269	}
270
271	template <MVT::SimpleValueType VT, bool Invert = false>
272	bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
273	return SelectSVELogicalImm(N, VT, Imm, Invert);
274	}
275
276	template <MVT::SimpleValueType VT>
277	bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
278	return SelectSVEArithImm(N, VT, Imm);
279	}
280
281	template <unsigned Low, unsigned High, bool AllowSaturation = false>
282	bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
283	return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
284	}
285
286	bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
287	if (N ->getOpcode() != ISD::SPLAT_VECTOR)
288	return false;
289
290	EVT EltVT = N ->getValueType(ResNo: `0`).getVectorElementType();
291	return SelectSVEShiftImm(N: N ->getOperand(Num: `0`), / Low / `1`,
292	/ High / EltVT.getFixedSizeInBits(),
293	/ AllowSaturation / true, Imm);
294	}
295
296	// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALEN.*
297	template<signed Min, signed Max, signed Scale, bool Shift>
298	bool SelectCntImm(SDValue N, SDValue &Imm) {
299	if (!isa<ConstantSDNode>(N))
300	return false;
301
302	int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
303	if (Shift)
304	MulImm = `1LL` << MulImm;
305
306	if ((MulImm % std::abs(x: Scale)) != `0`)
307	return false;
308
309	MulImm /= Scale;
310	if ((MulImm >= Min) && (MulImm <= Max)) {
311	Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
312	return true;
313	}
314
315	return false;
316	}
317
318	template <signed Max, signed Scale>
319	bool SelectEXTImm(SDValue N, SDValue &Imm) {
320	if (!isa<ConstantSDNode>(N))
321	return false;
322
323	int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
324
325	if (MulImm >= `0` && MulImm <= Max) {
326	MulImm *= Scale;
327	Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
328	return true;
329	}
330
331	return false;
332	}
333
334	template <unsigned BaseReg, unsigned Max>
335	bool ImmToReg(SDValue N, SDValue &Imm) {
336	if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
337	uint64_t C = CI->getZExtValue();
338
339	if (C > Max)
340	return false;
341
342	Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
343	return true;
344	}
345	return false;
346	}
347
348	/// Form sequences of consecutive 64/128-bit registers for use in NEON
349	/// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
350	/// between 1 and 4 elements. If it contains a single element that is returned
351	/// unchanged; otherwise a REG_SEQUENCE value is returned.
352	SDValue createDTuple(ArrayRef<SDValue> Vecs);
353	SDValue createQTuple(ArrayRef<SDValue> Vecs);
354	// Form a sequence of SVE registers for instructions using list of vectors,
355	// e.g. structured loads and stores (ldN, stN).
356	SDValue createZTuple(ArrayRef<SDValue> Vecs);
357
358	// Similar to above, except the register must start at a multiple of the
359	// tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
360	SDValue createZMulTuple(ArrayRef<SDValue> Regs);
361
362	/// Generic helper for the createDTuple/createQTuple
363	/// functions. Those should almost always be called instead.
364	SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
365	const unsigned SubRegs[]);
366
367	void SelectTable(SDNode N, unsigned* NumVecs, unsigned Opc, bool isExt);
368
369	bool tryIndexedLoad(SDNode *N);
370
371	bool trySelectStackSlotTagP(SDNode *N);
372	void SelectTagP(SDNode *N);
373
374	void SelectLoad(SDNode N, unsigned* NumVecs, unsigned Opc,
375	unsigned SubRegIdx);
376	void SelectPostLoad(SDNode N, unsigned* NumVecs, unsigned Opc,
377	unsigned SubRegIdx);
378	void SelectLoadLane(SDNode N, unsigned* NumVecs, unsigned Opc);
379	void SelectPostLoadLane(SDNode N, unsigned* NumVecs, unsigned Opc);
380	void SelectPredicatedLoad(SDNode N, unsigned* NumVecs, unsigned Scale,
381	unsigned Opc_rr, unsigned Opc_ri,
382	bool IsIntr = false);
383	void SelectContiguousMultiVectorLoad(SDNode N, unsigned* NumVecs,
384	unsigned Scale, unsigned Opc_ri,
385	unsigned Opc_rr);
386	void SelectDestructiveMultiIntrinsic(SDNode N, unsigned* NumVecs,
387	bool IsZmMulti, unsigned Opcode,
388	bool HasPred = false);
389	void SelectPExtPair(SDNode N, unsigned* Opc);
390	void SelectWhilePair(SDNode N, unsigned* Opc);
391	void SelectCVTIntrinsic(SDNode N, unsigned* NumVecs, unsigned Opcode);
392	void SelectClamp(SDNode N, unsigned* NumVecs, unsigned Opcode);
393	void SelectUnaryMultiIntrinsic(SDNode N, unsigned* NumOutVecs,
394	bool IsTupleInput, unsigned Opc);
395	void SelectFrintFromVT(SDNode N, unsigned* NumVecs, unsigned Opcode);
396
397	template <unsigned MaxIdx, unsigned Scale>
398	void SelectMultiVectorMove(SDNode N, unsigned* NumVecs, unsigned BaseReg,
399	unsigned Op);
400
401	bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
402	/// SVE Reg+Imm addressing mode.
403	template <int64_t Min, int64_t Max>
404	bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
405	SDValue &OffImm);
406	/// SVE Reg+Reg address mode.
407	template <unsigned Scale>
408	bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
409	return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
410	}
411
412	void SelectMultiVectorLuti(SDNode Node, unsigned* NumOutVecs, unsigned Opc,
413	uint32_t MaxImm);
414
415	template <unsigned MaxIdx, unsigned Scale>
416	bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
417	return SelectSMETileSlice(N, MaxSize: MaxIdx, Vector, Offset, Scale);
418	}
419
420	void SelectStore(SDNode N, unsigned* NumVecs, unsigned Opc);
421	void SelectPostStore(SDNode N, unsigned* NumVecs, unsigned Opc);
422	void SelectStoreLane(SDNode N, unsigned* NumVecs, unsigned Opc);
423	void SelectPostStoreLane(SDNode N, unsigned* NumVecs, unsigned Opc);
424	void SelectPredicatedStore(SDNode N, unsigned* NumVecs, unsigned Scale,
425	unsigned Opc_rr, unsigned Opc_ri);
426	std::tuple<unsigned, SDValue, SDValue>
427	findAddrModeSVELoadStore(SDNode N, unsigned* Opc_rr, unsigned Opc_ri,
428	const SDValue &OldBase, const SDValue &OldOffset,
429	unsigned Scale);
430
431	bool tryBitfieldExtractOp(SDNode *N);
432	bool tryBitfieldExtractOpFromSExt(SDNode *N);
433	bool tryBitfieldInsertOp(SDNode *N);
434	bool tryBitfieldInsertInZeroOp(SDNode *N);
435	bool tryShiftAmountMod(SDNode *N);
436
437	bool tryReadRegister(SDNode *N);
438	bool tryWriteRegister(SDNode *N);
439
440	bool trySelectCastFixedLengthToScalableVector(SDNode *N);
441	bool trySelectCastScalableToFixedLengthVector(SDNode *N);
442
443	bool trySelectXAR(SDNode *N);
444
445	// Include the pieces autogenerated from the target description.
446	#include "AArch64GenDAGISel.inc"
447
448	private:
449	bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
450	SDValue &Shift);
451	bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
452	bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
453	SDValue &OffImm) {
454	return SelectAddrModeIndexedBitWidth(N, IsSignedImm: true, BW: `7`, Size, Base, OffImm);
455	}
456	bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
457	unsigned Size, SDValue &Base,
458	SDValue &OffImm);
459	bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
460	SDValue &OffImm);
461	bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
462	SDValue &OffImm);
463	bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
464	SDValue &Offset, SDValue &SignExtend,
465	SDValue &DoShift);
466	bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
467	SDValue &Offset, SDValue &SignExtend,
468	SDValue &DoShift);
469	bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
470	bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
471	bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
472	SDValue &Offset, SDValue &SignExtend);
473
474	template<unsigned RegWidth>
475	bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
476	return SelectCVTFixedPosOperand(N, FixedPos, Width: RegWidth);
477	}
478
479	bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
480
481	template<unsigned RegWidth>
482	bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
483	return SelectCVTFixedPosRecipOperand(N, FixedPos, Width: RegWidth);
484	}
485
486	bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
487	unsigned Width);
488
489	bool SelectCMP_SWAP(SDNode *N);
490
491	bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
492	bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
493	bool Negate);
494	bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
495	bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
496
497	bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
498	bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
499	bool AllowSaturation, SDValue &Imm);
500
501	bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
502	bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
503	SDValue &Offset);
504	bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
505	SDValue &Offset, unsigned Scale = `1`);
506
507	bool SelectAllActivePredicate(SDValue N);
508	bool SelectAnyPredicate(SDValue N);
509	};
510	} // end anonymous namespace
511
512	char AArch64DAGToDAGISel::ID = `0`;
513
514	INITIALIZE_PASS(AArch64DAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
515
516	/// isIntImmediate - This method tests to see if the node is a constant
517	/// operand. If so Imm will receive the 32-bit value.
518	static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
519	if (const ConstantSDNode C = dyn_cast<const* ConstantSDNode>(Val: N)) {
520	Imm = C->getZExtValue();
521	return true;
522	}
523	return false;
524	}
525
526	// isIntImmediate - This method tests to see if a constant operand.
527	// If so Imm will receive the value.
528	static bool isIntImmediate(SDValue N, uint64_t &Imm) {
529	return isIntImmediate(N: N.getNode(), Imm);
530	}
531
532	// isOpcWithIntImmediate - This method tests to see if the node is a specific
533	// opcode and that it has a immediate integer right operand.
534	// If so Imm will receive the 32 bit value.
535	static bool isOpcWithIntImmediate(const SDNode N, unsigned* Opc,
536	uint64_t &Imm) {
537	return N->getOpcode() == Opc &&
538	isIntImmediate(N: N->getOperand(Num: `1`).getNode(), Imm);
539	}
540
541	// isIntImmediateEq - This method tests to see if N is a constant operand that
542	// is equivalent to 'ImmExpected'.
543	#ifndef NDEBUG
544	static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
545	uint64_t Imm;
546	if (!isIntImmediate(N: N.getNode(), Imm))
547	return false;
548	return Imm == ImmExpected;
549	}
550	#endif
551
552	bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
553	const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
554	std::vector<SDValue> &OutOps) {
555	switch(ConstraintID) {
556	default:
557	llvm_unreachable("Unexpected asm memory constraint");
558	case InlineAsm::ConstraintCode::m:
559	case InlineAsm::ConstraintCode::o:
560	case InlineAsm::ConstraintCode::Q:
561	// We need to make sure that this one operand does not end up in XZR, thus
562	// require the address to be in a PointerRegClass register.
563	const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
564	const TargetRegisterClass TRC = TRI->getPointerRegClass(MF: MF);
565	SDLoc dl(Op);
566	SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
567	SDValue NewOp =
568	SDValue (CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS,
569	dl, VT: Op.getValueType(),
570	Op1: Op, Op2: RC), `0`);
571	OutOps.push_back(x: NewOp);
572	return false;
573	}
574	return true;
575	}
576
577	/// SelectArithImmed - Select an immediate value that can be represented as
578	/// a 12-bit value shifted left by either 0 or 12. If so, return true with
579	/// Val set to the 12-bit value and Shift set to the shifter operand.
580	bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
581	SDValue &Shift) {
582	// This function is called from the addsub_shifted_imm ComplexPattern,
583	// which lists [imm] as the list of opcode it's interested in, however
584	// we still need to check whether the operand is actually an immediate
585	// here because the ComplexPattern opcode list is only used in
586	// root-level opcode matching.
587	if (!isa<ConstantSDNode>(Val: N.getNode()))
588	return false;
589
590	uint64_t Immed = N.getNode()->getAsZExtVal();
591	unsigned ShiftAmt;
592
593	if (Immed >> `12` == `0`) {
594	ShiftAmt = `0`;
595	} else if ((Immed & `0xfff`) == `0` && Immed >> `24` == `0`) {
596	ShiftAmt = `12`;
597	Immed = Immed >> `12`;
598	} else
599	return false;
600
601	unsigned ShVal = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftAmt);
602	SDLoc dl(N);
603	Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
604	Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
605	return true;
606	}
607
608	/// SelectNegArithImmed - As above, but negates the value before trying to
609	/// select it.
610	bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
611	SDValue &Shift) {
612	// This function is called from the addsub_shifted_imm ComplexPattern,
613	// which lists [imm] as the list of opcode it's interested in, however
614	// we still need to check whether the operand is actually an immediate
615	// here because the ComplexPattern opcode list is only used in
616	// root-level opcode matching.
617	if (!isa<ConstantSDNode>(Val: N.getNode()))
618	return false;
619
620	// The immediate operand must be a 24-bit zero-extended immediate.
621	uint64_t Immed = N.getNode()->getAsZExtVal();
622
623	// This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
624	// have the opposite effect on the C flag, so this pattern mustn't match under
625	// those circumstances.
626	if (Immed == `0`)
627	return false;
628
629	if (N.getValueType() == MVT::i32)
630	Immed = ~((uint32_t)Immed) + `1`;
631	else
632	Immed = ~Immed + `1ULL`;
633	if (Immed & `0xFFFFFFFFFF000000ULL`)
634	return false;
635
636	Immed &= `0xFFFFFFULL`;
637	return SelectArithImmed(N: CurDAG->getConstant(Immed, SDLoc (N), MVT::i32), Val,
638	Shift);
639	}
640
641	/// getShiftTypeForNode - Translate a shift node to the corresponding
642	/// ShiftType value.
643	static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
644	switch (N.getOpcode()) {
645	default:
646	return AArch64_AM::InvalidShiftExtend;
647	case ISD::SHL:
648	return AArch64_AM::LSL;
649	case ISD::SRL:
650	return AArch64_AM::LSR;
651	case ISD::SRA:
652	return AArch64_AM::ASR;
653	case ISD::ROTR:
654	return AArch64_AM::ROR;
655	}
656	}
657
658	/// Determine whether it is worth it to fold SHL into the addressing
659	/// mode.
660	static bool isWorthFoldingSHL(SDValue V) {
661	assert(V.getOpcode() == ISD::SHL && "invalid opcode");
662	// It is worth folding logical shift of up to three places.
663	auto *CSD = dyn_cast<ConstantSDNode>(Val: V.getOperand(i: `1`));
664	if (!CSD)
665	return false;
666	unsigned ShiftVal = CSD->getZExtValue();
667	if (ShiftVal > `3`)
668	return false;
669
670	// Check if this particular node is reused in any non-memory related
671	// operation. If yes, do not try to fold this node into the address
672	// computation, since the computation will be kept.
673	const SDNode *Node = V.getNode();
674	for (SDNode *UI : Node->uses())
675	if (!isa<MemSDNode>(Val: *UI))
676	for (SDNode *UII : UI->uses())
677	if (!isa<MemSDNode>(Val: *UII))
678	return false;
679	return true;
680	}
681
682	/// Determine whether it is worth to fold V into an extended register addressing
683	/// mode.
684	bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
685	// Trivial if we are optimizing for code size or if there is only
686	// one use of the value.
687	if (CurDAG->shouldOptForSize() \|\| V.hasOneUse())
688	return true;
689
690	// If a subtarget has a slow shift, folding a shift into multiple loads
691	// costs additional micro-ops.
692	if (Subtarget->hasAddrLSLSlow14() && (Size == `2` \|\| Size == `16`))
693	return false;
694
695	// Check whether we're going to emit the address arithmetic anyway because
696	// it's used by a non-address operation.
697	if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
698	return true;
699	if (V.getOpcode() == ISD::ADD) {
700	const SDValue LHS = V.getOperand(i: `0`);
701	const SDValue RHS = V.getOperand(i: `1`);
702	if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(V: LHS))
703	return true;
704	if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(V: RHS))
705	return true;
706	}
707
708	// It hurts otherwise, since the value will be reused.
709	return false;
710	}
711
712	/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
713	/// to select more shifted register
714	bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
715	SDValue &Shift) {
716	EVT VT = N.getValueType();
717	if (VT != MVT::i32 && VT != MVT::i64)
718	return false;
719
720	if (N ->getOpcode() != ISD::AND \|\| !N ->hasOneUse())
721	return false;
722	SDValue LHS = N.getOperand(i: `0`);
723	if (!LHS ->hasOneUse())
724	return false;
725
726	unsigned LHSOpcode = LHS ->getOpcode();
727	if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
728	return false;
729
730	ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(Val: LHS.getOperand(i: `1`));
731	if (!ShiftAmtNode)
732	return false;
733
734	uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
735	ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
736	if (!RHSC)
737	return false;
738
739	APInt AndMask = RHSC->getAPIntValue();
740	unsigned LowZBits, MaskLen;
741	if (!AndMask.isShiftedMask(MaskIdx&: LowZBits, MaskLen))
742	return false;
743
744	unsigned BitWidth = N.getValueSizeInBits();
745	SDLoc DL(LHS);
746	uint64_t NewShiftC;
747	unsigned NewShiftOp;
748	if (LHSOpcode == ISD::SHL) {
749	// LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
750	// BitWidth != LowZBits + MaskLen doesn't match the pattern
751	if (LowZBits <= ShiftAmtC \|\| (BitWidth != LowZBits + MaskLen))
752	return false;
753
754	NewShiftC = LowZBits - ShiftAmtC;
755	NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
756	} else {
757	if (LowZBits == `0`)
758	return false;
759
760	// NewShiftC >= BitWidth will fall into isBitfieldExtractOp
761	NewShiftC = LowZBits + ShiftAmtC;
762	if (NewShiftC >= BitWidth)
763	return false;
764
765	// SRA need all high bits
766	if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
767	return false;
768
769	// SRL high bits can be 0 or 1
770	if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
771	return false;
772
773	if (LHSOpcode == ISD::SRL)
774	NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
775	else
776	NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
777	}
778
779	assert(NewShiftC < BitWidth && "Invalid shift amount");
780	SDValue NewShiftAmt = CurDAG->getTargetConstant(Val: NewShiftC, DL, VT);
781	SDValue BitWidthMinus1 = CurDAG->getTargetConstant(Val: BitWidth - `1`, DL, VT);
782	Reg = SDValue (CurDAG->getMachineNode(Opcode: NewShiftOp, dl: DL, VT, Op1: LHS ->getOperand(Num: `0`),
783	Op2: NewShiftAmt, Op3: BitWidthMinus1),
784	`0`);
785	unsigned ShVal = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: LowZBits);
786	Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
787	return true;
788	}
789
790	/// getExtendTypeForNode - Translate an extend node to the corresponding
791	/// ExtendType value.
792	static AArch64_AM::ShiftExtendType
793	getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
794	if (N.getOpcode() == ISD::SIGN_EXTEND \|\|
795	N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
796	EVT SrcVT;
797	if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
798	SrcVT = cast<VTSDNode>(Val: N.getOperand(i: `1`))->getVT();
799	else
800	SrcVT = N.getOperand(i: `0`).getValueType();
801
802	if (!IsLoadStore && SrcVT == MVT::i8)
803	return AArch64_AM::SXTB;
804	else if (!IsLoadStore && SrcVT == MVT::i16)
805	return AArch64_AM::SXTH;
806	else if (SrcVT == MVT::i32)
807	return AArch64_AM::SXTW;
808	assert(SrcVT != MVT::i64 && "extend from 64-bits?");
809
810	return AArch64_AM::InvalidShiftExtend;
811	} else if (N.getOpcode() == ISD::ZERO_EXTEND \|\|
812	N.getOpcode() == ISD::ANY_EXTEND) {
813	EVT SrcVT = N.getOperand(i: `0`).getValueType();
814	if (!IsLoadStore && SrcVT == MVT::i8)
815	return AArch64_AM::UXTB;
816	else if (!IsLoadStore && SrcVT == MVT::i16)
817	return AArch64_AM::UXTH;
818	else if (SrcVT == MVT::i32)
819	return AArch64_AM::UXTW;
820	assert(SrcVT != MVT::i64 && "extend from 64-bits?");
821
822	return AArch64_AM::InvalidShiftExtend;
823	} else if (N.getOpcode() == ISD::AND) {
824	ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
825	if (!CSD)
826	return AArch64_AM::InvalidShiftExtend;
827	uint64_t AndMask = CSD->getZExtValue();
828
829	switch (AndMask) {
830	default:
831	return AArch64_AM::InvalidShiftExtend;
832	case `0xFF`:
833	return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
834	case `0xFFFF`:
835	return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
836	case `0xFFFFFFFF`:
837	return AArch64_AM::UXTW;
838	}
839	}
840
841	return AArch64_AM::InvalidShiftExtend;
842	}
843
844	/// Determine whether it is worth to fold V into an extended register of an
845	/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
846	/// instruction, and the shift should be treated as worth folding even if has
847	/// multiple uses.
848	bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
849	// Trivial if we are optimizing for code size or if there is only
850	// one use of the value.
851	if (CurDAG->shouldOptForSize() \|\| V.hasOneUse())
852	return true;
853
854	// If a subtarget has a fastpath LSL we can fold a logical shift into
855	// the add/sub and save a cycle.
856	if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
857	V.getConstantOperandVal(i: `1`) <= `4` &&
858	getExtendTypeForNode(N: V.getOperand(i: `0`)) == AArch64_AM::InvalidShiftExtend)
859	return true;
860
861	// It hurts otherwise, since the value will be reused.
862	return false;
863	}
864
865	/// SelectShiftedRegister - Select a "shifted register" operand. If the value
866	/// is not shifted, set the Shift operand to default of "LSL 0". The logical
867	/// instructions allow the shifted register to be rotated, but the arithmetic
868	/// instructions do not. The AllowROR parameter specifies whether ROR is
869	/// supported.
870	bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
871	SDValue &Reg, SDValue &Shift) {
872	if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
873	return true;
874
875	AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
876	if (ShType == AArch64_AM::InvalidShiftExtend)
877	return false;
878	if (!AllowROR && ShType == AArch64_AM::ROR)
879	return false;
880
881	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
882	unsigned BitSize = N.getValueSizeInBits();
883	unsigned Val = RHS->getZExtValue() & (BitSize - `1`);
884	unsigned ShVal = AArch64_AM::getShifterImm(ST: ShType, Imm: Val);
885
886	Reg = N.getOperand(i: `0`);
887	Shift = CurDAG->getTargetConstant(ShVal, SDLoc (N), MVT::i32);
888	return isWorthFoldingALU(V: N, LSL: true);
889	}
890
891	return false;
892	}
893
894	/// Instructions that accept extend modifiers like UXTW expect the register
895	/// being extended to be a GPR32, but the incoming DAG might be acting on a
896	/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
897	/// this is the case.
898	static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
899	if (N.getValueType() == MVT::i32)
900	return N;
901
902	SDLoc dl(N);
903	return CurDAG->getTargetExtractSubreg(AArch64::SRIdx: sub_32, DL: dl, MVT::VT: i32, Operand: N);
904	}
905
906	// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALEN.*
907	template<signed Low, signed High, signed Scale>
908	bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
909	if (!isa<ConstantSDNode>(Val: N))
910	return false;
911
912	int64_t MulImm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
913	if ((MulImm % std::abs(x: Scale)) == `0`) {
914	int64_t RDVLImm = MulImm / Scale;
915	if ((RDVLImm >= Low) && (RDVLImm <= High)) {
916	Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc (N), MVT::i32);
917	return true;
918	}
919	}
920
921	return false;
922	}
923
924	/// SelectArithExtendedRegister - Select a "extended register" operand. This
925	/// operand folds in an extend followed by an optional left shift.
926	bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
927	SDValue &Shift) {
928	unsigned ShiftVal = `0`;
929	AArch64_AM::ShiftExtendType Ext;
930
931	if (N.getOpcode() == ISD::SHL) {
932	ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
933	if (!CSD)
934	return false;
935	ShiftVal = CSD->getZExtValue();
936	if (ShiftVal > `4`)
937	return false;
938
939	Ext = getExtendTypeForNode(N: N.getOperand(i: `0`));
940	if (Ext == AArch64_AM::InvalidShiftExtend)
941	return false;
942
943	Reg = N.getOperand(i: `0`).getOperand(i: `0`);
944	} else {
945	Ext = getExtendTypeForNode(N);
946	if (Ext == AArch64_AM::InvalidShiftExtend)
947	return false;
948
949	Reg = N.getOperand(i: `0`);
950
951	// Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
952	// isDef32 as a heuristic for when the operand is likely to be a 32bit def.
953	auto isDef32 = [](SDValue N) {
954	unsigned Opc = N.getOpcode();
955	return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
956	Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
957	Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
958	Opc != ISD::FREEZE;
959	};
960	if (Ext == AArch64_AM::UXTW && Reg ->getValueType(ResNo: `0`).getSizeInBits() == `32` &&
961	isDef32 (Reg))
962	return false;
963	}
964
965	// AArch64 mandates that the RHS of the operation must use the smallest
966	// register class that could contain the size being extended from. Thus,
967	// if we're folding a (sext i8), we need the RHS to be a GPR32, even though
968	// there might not be an actual 32-bit value in the program. We can
969	// (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
970	assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
971	Reg = narrowIfNeeded(CurDAG, N: Reg);
972	Shift = CurDAG->getTargetConstant(getArithExtendImm(ET: Ext, Imm: ShiftVal), SDLoc (N),
973	MVT::i32);
974	return isWorthFoldingALU(V: N);
975	}
976
977	/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
978	/// operand is refered by the instructions have SP operand
979	bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
980	SDValue &Shift) {
981	unsigned ShiftVal = `0`;
982	AArch64_AM::ShiftExtendType Ext;
983
984	if (N.getOpcode() != ISD::SHL)
985	return false;
986
987	ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
988	if (!CSD)
989	return false;
990	ShiftVal = CSD->getZExtValue();
991	if (ShiftVal > `4`)
992	return false;
993
994	Ext = AArch64_AM::UXTX;
995	Reg = N.getOperand(i: `0`);
996	Shift = CurDAG->getTargetConstant(getArithExtendImm(ET: Ext, Imm: ShiftVal), SDLoc (N),
997	MVT::i32);
998	return isWorthFoldingALU(V: N);
999	}
1000
1001	/// If there's a use of this ADDlow that's not itself a load/store then we'll
1002	/// need to create a real ADD instruction from it anyway and there's no point in
1003	/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1004	/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1005	/// leads to duplicated ADRP instructions.
1006	static bool isWorthFoldingADDlow(SDValue N) {
1007	for (auto *Use : N ->uses()) {
1008	if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
1009	Use->getOpcode() != ISD::ATOMIC_LOAD &&
1010	Use->getOpcode() != ISD::ATOMIC_STORE)
1011	return false;
1012
1013	// ldar and stlr have much more restrictive addressing modes (just a
1014	// register).
1015	if (isStrongerThanMonotonic(AO: cast<MemSDNode>(Val: Use)->getSuccessOrdering()))
1016	return false;
1017	}
1018
1019	return true;
1020	}
1021
1022	/// Check if the immediate offset is valid as a scaled immediate.
1023	static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1024	unsigned Size) {
1025	if ((Offset & (Size - `1`)) == `0` && Offset >= `0` &&
1026	Offset < (Range << Log2_32(Value: Size)))
1027	return true;
1028	return false;
1029	}
1030
1031	/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1032	/// immediate" address. The "Size" argument is the size in bytes of the memory
1033	/// reference, which determines the scale.
1034	bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1035	unsigned BW, unsigned Size,
1036	SDValue &Base,
1037	SDValue &OffImm) {
1038	SDLoc dl(N);
1039	const DataLayout &DL = CurDAG->getDataLayout();
1040	const TargetLowering *TLI = getTargetLowering();
1041	if (N.getOpcode() == ISD::FrameIndex) {
1042	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1043	Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1044	OffImm = CurDAG->getTargetConstant(`0`, dl, MVT::i64);
1045	return true;
1046	}
1047
1048	// As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1049	// selected here doesn't support labels/immediates, only base+offset.
1050	if (CurDAG->isBaseWithConstantOffset(Op: N)) {
1051	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
1052	if (IsSignedImm) {
1053	int64_t RHSC = RHS->getSExtValue();
1054	unsigned Scale = Log2_32(Value: Size);
1055	int64_t Range = `0x1LL` << (BW - `1`);
1056
1057	if ((RHSC & (Size - `1`)) == `0` && RHSC >= -(Range << Scale) &&
1058	RHSC < (Range << Scale)) {
1059	Base = N.getOperand(i: `0`);
1060	if (Base.getOpcode() == ISD::FrameIndex) {
1061	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1062	Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1063	}
1064	OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1065	return true;
1066	}
1067	} else {
1068	// unsigned Immediate
1069	uint64_t RHSC = RHS->getZExtValue();
1070	unsigned Scale = Log2_32(Value: Size);
1071	uint64_t Range = `0x1ULL` << BW;
1072
1073	if ((RHSC & (Size - `1`)) == `0` && RHSC < (Range << Scale)) {
1074	Base = N.getOperand(i: `0`);
1075	if (Base.getOpcode() == ISD::FrameIndex) {
1076	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1077	Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1078	}
1079	OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1080	return true;
1081	}
1082	}
1083	}
1084	}
1085	// Base only. The address will be materialized into a register before
1086	// the memory is accessed.
1087	// add x0, Xbase, #offset
1088	// stp x1, x2, [x0]
1089	Base = N;
1090	OffImm = CurDAG->getTargetConstant(`0`, dl, MVT::i64);
1091	return true;
1092	}
1093
1094	/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1095	/// immediate" address. The "Size" argument is the size in bytes of the memory
1096	/// reference, which determines the scale.
1097	bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1098	SDValue &Base, SDValue &OffImm) {
1099	SDLoc dl(N);
1100	const DataLayout &DL = CurDAG->getDataLayout();
1101	const TargetLowering *TLI = getTargetLowering();
1102	if (N.getOpcode() == ISD::FrameIndex) {
1103	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1104	Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1105	OffImm = CurDAG->getTargetConstant(`0`, dl, MVT::i64);
1106	return true;
1107	}
1108
1109	if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1110	GlobalAddressSDNode *GAN =
1111	dyn_cast<GlobalAddressSDNode>(Val: N.getOperand(i: `1`).getNode());
1112	Base = N.getOperand(i: `0`);
1113	OffImm = N.getOperand(i: `1`);
1114	if (!GAN)
1115	return true;
1116
1117	if (GAN->getOffset() % Size == `0` &&
1118	GAN->getGlobal()->getPointerAlignment(DL) >= Size)
1119	return true;
1120	}
1121
1122	if (CurDAG->isBaseWithConstantOffset(Op: N)) {
1123	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
1124	int64_t RHSC = (int64_t)RHS->getZExtValue();
1125	unsigned Scale = Log2_32(Value: Size);
1126	if (isValidAsScaledImmediate(Offset: RHSC, Range: `0x1000`, Size)) {
1127	Base = N.getOperand(i: `0`);
1128	if (Base.getOpcode() == ISD::FrameIndex) {
1129	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1130	Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1131	}
1132	OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1133	return true;
1134	}
1135	}
1136	}
1137
1138	// Before falling back to our general case, check if the unscaled
1139	// instructions can handle this. If so, that's preferable.
1140	if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1141	return false;
1142
1143	// Base only. The address will be materialized into a register before
1144	// the memory is accessed.
1145	// add x0, Xbase, #offset
1146	// ldr x0, [x0]
1147	Base = N;
1148	OffImm = CurDAG->getTargetConstant(`0`, dl, MVT::i64);
1149	return true;
1150	}
1151
1152	/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1153	/// immediate" address. This should only match when there is an offset that
1154	/// is not valid for a scaled immediate addressing mode. The "Size" argument
1155	/// is the size in bytes of the memory reference, which is needed here to know
1156	/// what is valid for a scaled immediate.
1157	bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1158	SDValue &Base,
1159	SDValue &OffImm) {
1160	if (!CurDAG->isBaseWithConstantOffset(Op: N))
1161	return false;
1162	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
1163	int64_t RHSC = RHS->getSExtValue();
1164	if (RHSC >= -`256` && RHSC < `256`) {
1165	Base = N.getOperand(i: `0`);
1166	if (Base.getOpcode() == ISD::FrameIndex) {
1167	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1168	const TargetLowering *TLI = getTargetLowering();
1169	Base = CurDAG->getTargetFrameIndex(
1170	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1171	}
1172	OffImm = CurDAG->getTargetConstant(RHSC, SDLoc (N), MVT::i64);
1173	return true;
1174	}
1175	}
1176	return false;
1177	}
1178
1179	static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
1180	SDLoc dl(N);
1181	SDValue ImpDef = SDValue(
1182	CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), `0`);
1183	return CurDAG->getTargetInsertSubreg(AArch64::SRIdx: sub_32, DL: dl, MVT::VT: i64, Operand: ImpDef,
1184	Subreg: N);
1185	}
1186
1187	/// Check if the given SHL node (\p N), can be used to form an
1188	/// extended register for an addressing mode.
1189	bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1190	bool WantExtend, SDValue &Offset,
1191	SDValue &SignExtend) {
1192	assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1193	ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
1194	if (!CSD \|\| (CSD->getZExtValue() & `0x7`) != CSD->getZExtValue())
1195	return false;
1196
1197	SDLoc dl(N);
1198	if (WantExtend) {
1199	AArch64_AM::ShiftExtendType Ext =
1200	getExtendTypeForNode(N: N.getOperand(i: `0`), IsLoadStore: true);
1201	if (Ext == AArch64_AM::InvalidShiftExtend)
1202	return false;
1203
1204	Offset = narrowIfNeeded(CurDAG, N: N.getOperand(i: `0`).getOperand(i: `0`));
1205	SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1206	MVT::i32);
1207	} else {
1208	Offset = N.getOperand(i: `0`);
1209	SignExtend = CurDAG->getTargetConstant(`0`, dl, MVT::i32);
1210	}
1211
1212	unsigned LegalShiftVal = Log2_32(Value: Size);
1213	unsigned ShiftVal = CSD->getZExtValue();
1214
1215	if (ShiftVal != `0` && ShiftVal != LegalShiftVal)
1216	return false;
1217
1218	return isWorthFoldingAddr(V: N, Size);
1219	}
1220
1221	bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1222	SDValue &Base, SDValue &Offset,
1223	SDValue &SignExtend,
1224	SDValue &DoShift) {
1225	if (N.getOpcode() != ISD::ADD)
1226	return false;
1227	SDValue LHS = N.getOperand(i: `0`);
1228	SDValue RHS = N.getOperand(i: `1`);
1229	SDLoc dl(N);
1230
1231	// We don't want to match immediate adds here, because they are better lowered
1232	// to the register-immediate addressing modes.
1233	if (isa<ConstantSDNode>(Val: LHS) \|\| isa<ConstantSDNode>(Val: RHS))
1234	return false;
1235
1236	// Check if this particular node is reused in any non-memory related
1237	// operation. If yes, do not try to fold this node into the address
1238	// computation, since the computation will be kept.
1239	const SDNode *Node = N.getNode();
1240	for (SDNode *UI : Node->uses()) {
1241	if (!isa<MemSDNode>(Val: *UI))
1242	return false;
1243	}
1244
1245	// Remember if it is worth folding N when it produces extended register.
1246	bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(V: N, Size);
1247
1248	// Try to match a shifted extend on the RHS.
1249	if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1250	SelectExtendedSHL(N: RHS, Size, WantExtend: true, Offset, SignExtend)) {
1251	Base = LHS;
1252	DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1253	return true;
1254	}
1255
1256	// Try to match a shifted extend on the LHS.
1257	if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1258	SelectExtendedSHL(N: LHS, Size, WantExtend: true, Offset, SignExtend)) {
1259	Base = RHS;
1260	DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1261	return true;
1262	}
1263
1264	// There was no shift, whatever else we find.
1265	DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1266
1267	AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
1268	// Try to match an unshifted extend on the LHS.
1269	if (IsExtendedRegisterWorthFolding &&
1270	(Ext = getExtendTypeForNode(N: LHS, IsLoadStore: true)) !=
1271	AArch64_AM::InvalidShiftExtend) {
1272	Base = RHS;
1273	Offset = narrowIfNeeded(CurDAG, N: LHS.getOperand(i: `0`));
1274	SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1275	MVT::i32);
1276	if (isWorthFoldingAddr(V: LHS, Size))
1277	return true;
1278	}
1279
1280	// Try to match an unshifted extend on the RHS.
1281	if (IsExtendedRegisterWorthFolding &&
1282	(Ext = getExtendTypeForNode(N: RHS, IsLoadStore: true)) !=
1283	AArch64_AM::InvalidShiftExtend) {
1284	Base = LHS;
1285	Offset = narrowIfNeeded(CurDAG, N: RHS.getOperand(i: `0`));
1286	SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1287	MVT::i32);
1288	if (isWorthFoldingAddr(V: RHS, Size))
1289	return true;
1290	}
1291
1292	return false;
1293	}
1294
1295	// Check if the given immediate is preferred by ADD. If an immediate can be
1296	// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1297	// encoded by one MOVZ, return true.
1298	static bool isPreferredADD(int64_t ImmOff) {
1299	// Constant in [0x0, 0xfff] can be encoded in ADD.
1300	if ((ImmOff & `0xfffffffffffff000LL`) == `0x0LL`)
1301	return true;
1302	// Check if it can be encoded in an "ADD LSL #12".
1303	if ((ImmOff & `0xffffffffff000fffLL`) == `0x0LL`)
1304	// As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1305	return (ImmOff & `0xffffffffff00ffffLL`) != `0x0LL` &&
1306	(ImmOff & `0xffffffffffff0fffLL`) != `0x0LL`;
1307	return false;
1308	}
1309
1310	bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1311	SDValue &Base, SDValue &Offset,
1312	SDValue &SignExtend,
1313	SDValue &DoShift) {
1314	if (N.getOpcode() != ISD::ADD)
1315	return false;
1316	SDValue LHS = N.getOperand(i: `0`);
1317	SDValue RHS = N.getOperand(i: `1`);
1318	SDLoc DL(N);
1319
1320	// Check if this particular node is reused in any non-memory related
1321	// operation. If yes, do not try to fold this node into the address
1322	// computation, since the computation will be kept.
1323	const SDNode *Node = N.getNode();
1324	for (SDNode *UI : Node->uses()) {
1325	if (!isa<MemSDNode>(Val: *UI))
1326	return false;
1327	}
1328
1329	// Watch out if RHS is a wide immediate, it can not be selected into
1330	// [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1331	// ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1332	// instructions like:
1333	// MOV X0, WideImmediate
1334	// ADD X1, BaseReg, X0
1335	// LDR X2, [X1, 0]
1336	// For such situation, using [BaseReg, XReg] addressing mode can save one
1337	// ADD/SUB:
1338	// MOV X0, WideImmediate
1339	// LDR X2, [BaseReg, X0]
1340	if (isa<ConstantSDNode>(Val: RHS)) {
1341	int64_t ImmOff = (int64_t)RHS ->getAsZExtVal();
1342	// Skip the immediate can be selected by load/store addressing mode.
1343	// Also skip the immediate can be encoded by a single ADD (SUB is also
1344	// checked by using -ImmOff).
1345	if (isValidAsScaledImmediate(Offset: ImmOff, Range: `0x1000`, Size) \|\|
1346	isPreferredADD(ImmOff) \|\| isPreferredADD(ImmOff: -ImmOff))
1347	return false;
1348
1349	SDValue Ops[] = { RHS };
1350	SDNode *MOVI =
1351	CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1352	SDValue MOVIV = SDValue (MOVI, `0`);
1353	// This ADD of two X register will be selected into [Reg+Reg] mode.
1354	N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1355	}
1356
1357	// Remember if it is worth folding N when it produces extended register.
1358	bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(V: N, Size);
1359
1360	// Try to match a shifted extend on the RHS.
1361	if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1362	SelectExtendedSHL(N: RHS, Size, WantExtend: false, Offset, SignExtend)) {
1363	Base = LHS;
1364	DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1365	return true;
1366	}
1367
1368	// Try to match a shifted extend on the LHS.
1369	if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1370	SelectExtendedSHL(N: LHS, Size, WantExtend: false, Offset, SignExtend)) {
1371	Base = RHS;
1372	DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1373	return true;
1374	}
1375
1376	// Match any non-shifted, non-extend, non-immediate add expression.
1377	Base = LHS;
1378	Offset = RHS;
1379	SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1380	DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1381	// Reg1 + Reg2 is free: no check needed.
1382	return true;
1383	}
1384
1385	SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1386	static const unsigned RegClassIDs[] = {
1387	AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1388	static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1389	AArch64::dsub2, AArch64::dsub3};
1390
1391	return createTuple(Vecs: Regs, RegClassIDs, SubRegs);
1392	}
1393
1394	SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1395	static const unsigned RegClassIDs[] = {
1396	AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1397	static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1398	AArch64::qsub2, AArch64::qsub3};
1399
1400	return createTuple(Vecs: Regs, RegClassIDs, SubRegs);
1401	}
1402
1403	SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1404	static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1405	AArch64::ZPR3RegClassID,
1406	AArch64::ZPR4RegClassID};
1407	static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1408	AArch64::zsub2, AArch64::zsub3};
1409
1410	return createTuple(Vecs: Regs, RegClassIDs, SubRegs);
1411	}
1412
1413	SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1414	assert(Regs.size() == `2` \|\| Regs.size() == `4`);
1415
1416	// The createTuple interface requires 3 RegClassIDs for each possible
1417	// tuple type even though we only have them for ZPR2 and ZPR4.
1418	static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, `0`,
1419	AArch64::ZPR4Mul4RegClassID};
1420	static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1421	AArch64::zsub2, AArch64::zsub3};
1422	return createTuple(Vecs: Regs, RegClassIDs, SubRegs);
1423	}
1424
1425	SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1426	const unsigned RegClassIDs[],
1427	const unsigned SubRegs[]) {
1428	// There's no special register-class for a vector-list of 1 element: it's just
1429	// a vector.
1430	if (Regs.size() == `1`)
1431	return Regs [`0`];
1432
1433	assert(Regs.size() >= `2` && Regs.size() <= `4`);
1434
1435	SDLoc DL(Regs [`0`]);
1436
1437	SmallVector<SDValue, `4`> Ops;
1438
1439	// First operand of REG_SEQUENCE is the desired RegClass.
1440	Ops.push_back(
1441	CurDAG->getTargetConstant(RegClassIDs[Regs.size() - `2`], DL, MVT::i32));
1442
1443	// Then we get pairs of source & subregister-position for the components.
1444	for (unsigned i = `0`; i < Regs.size(); ++i) {
1445	Ops.push_back(Elt: Regs [i]);
1446	Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1447	}
1448
1449	SDNode *N =
1450	CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1451	return SDValue (N, `0`);
1452	}
1453
1454	void AArch64DAGToDAGISel::SelectTable(SDNode N, unsigned* NumVecs, unsigned Opc,
1455	bool isExt) {
1456	SDLoc dl(N);
1457	EVT VT = N->getValueType(ResNo: `0`);
1458
1459	unsigned ExtOff = isExt;
1460
1461	// Form a REG_SEQUENCE to force register allocation.
1462	unsigned Vec0Off = ExtOff + `1`;
1463	SmallVector<SDValue, `4`> Regs(N->op_begin() + Vec0Off,
1464	N->op_begin() + Vec0Off + NumVecs);
1465	SDValue RegSeq = createQTuple(Regs);
1466
1467	SmallVector<SDValue, `6`> Ops;
1468	if (isExt)
1469	Ops.push_back(Elt: N->getOperand(Num: `1`));
1470	Ops.push_back(Elt: RegSeq);
1471	Ops.push_back(Elt: N->getOperand(Num: NumVecs + ExtOff + `1`));
1472	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT, Ops));
1473	}
1474
1475	bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1476	LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1477	if (LD->isUnindexed())
1478	return false;
1479	EVT VT = LD->getMemoryVT();
1480	EVT DstVT = N->getValueType(ResNo: `0`);
1481	ISD::MemIndexedMode AM = LD->getAddressingMode();
1482	bool IsPre = AM == ISD::PRE_INC \|\| AM == ISD::PRE_DEC;
1483
1484	// We're not doing validity checking here. That was done when checking
1485	// if we should mark the load as indexed or not. We're just selecting
1486	// the right instruction.
1487	unsigned Opcode = `0`;
1488
1489	ISD::LoadExtType ExtType = LD->getExtensionType();
1490	bool InsertTo64 = false;
1491	if (VT == MVT::i64)
1492	Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1493	else if (VT == MVT::i32) {
1494	if (ExtType == ISD::NON_EXTLOAD)
1495	Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1496	else if (ExtType == ISD::SEXTLOAD)
1497	Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1498	else {
1499	Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1500	InsertTo64 = true;
1501	// The result of the load is only i32. It's the subreg_to_reg that makes
1502	// it into an i64.
1503	DstVT = MVT::i32;
1504	}
1505	} else if (VT == MVT::i16) {
1506	if (ExtType == ISD::SEXTLOAD) {
1507	if (DstVT == MVT::i64)
1508	Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1509	else
1510	Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1511	} else {
1512	Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1513	InsertTo64 = DstVT == MVT::i64;
1514	// The result of the load is only i32. It's the subreg_to_reg that makes
1515	// it into an i64.
1516	DstVT = MVT::i32;
1517	}
1518	} else if (VT == MVT::i8) {
1519	if (ExtType == ISD::SEXTLOAD) {
1520	if (DstVT == MVT::i64)
1521	Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1522	else
1523	Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1524	} else {
1525	Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1526	InsertTo64 = DstVT == MVT::i64;
1527	// The result of the load is only i32. It's the subreg_to_reg that makes
1528	// it into an i64.
1529	DstVT = MVT::i32;
1530	}
1531	} else if (VT == MVT::f16) {
1532	Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1533	} else if (VT == MVT::bf16) {
1534	Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1535	} else if (VT == MVT::f32) {
1536	Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1537	} else if (VT == MVT::f64 \|\| VT.is64BitVector()) {
1538	Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1539	} else if (VT.is128BitVector()) {
1540	Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1541	} else
1542	return false;
1543	SDValue Chain = LD->getChain();
1544	SDValue Base = LD->getBasePtr();
1545	ConstantSDNode *OffsetOp = cast<ConstantSDNode>(Val: LD->getOffset());
1546	int OffsetVal = (int)OffsetOp->getZExtValue();
1547	SDLoc dl(N);
1548	SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1549	SDValue Ops[] = { Base, Offset, Chain };
1550	SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1551	MVT::Other, Ops);
1552
1553	// Transfer memoperands.
1554	MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
1555	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Res), NewMemRefs: {MemOp});
1556
1557	// Either way, we're replacing the node, so tell the caller that.
1558	SDValue LoadedVal = SDValue (Res, `1`);
1559	if (InsertTo64) {
1560	SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1561	LoadedVal =
1562	SDValue(CurDAG->getMachineNode(
1563	AArch64::SUBREG_TO_REG, dl, MVT::i64,
1564	CurDAG->getTargetConstant(`0`, dl, MVT::i64), LoadedVal,
1565	SubReg),
1566	`0`);
1567	}
1568
1569	ReplaceUses(F: SDValue (N, `0`), T: LoadedVal);
1570	ReplaceUses(F: SDValue (N, `1`), T: SDValue (Res, `0`));
1571	ReplaceUses(F: SDValue (N, `2`), T: SDValue (Res, `2`));
1572	CurDAG->RemoveDeadNode(N);
1573	return true;
1574	}
1575
1576	void AArch64DAGToDAGISel::SelectLoad(SDNode N, unsigned* NumVecs, unsigned Opc,
1577	unsigned SubRegIdx) {
1578	SDLoc dl(N);
1579	EVT VT = N->getValueType(ResNo: `0`);
1580	SDValue Chain = N->getOperand(Num: `0`);
1581
1582	SDValue Ops[] = {N->getOperand(Num: `2`), // Mem operand;
1583	Chain};
1584
1585	const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1586
1587	SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1588	SDValue SuperReg = SDValue (Ld, `0`);
1589	for (unsigned i = `0`; i < NumVecs; ++i)
1590	ReplaceUses(F: SDValue (N, i),
1591	T: CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx + i, DL: dl, VT, Operand: SuperReg));
1592
1593	ReplaceUses(F: SDValue (N, NumVecs), T: SDValue (Ld, `1`));
1594
1595	// Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1596	// because it's too simple to have needed special treatment during lowering.
1597	if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Val: N)) {
1598	MachineMemOperand *MemOp = MemIntr->getMemOperand();
1599	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Ld), NewMemRefs: {MemOp});
1600	}
1601
1602	CurDAG->RemoveDeadNode(N);
1603	}
1604
1605	void AArch64DAGToDAGISel::SelectPostLoad(SDNode N, unsigned* NumVecs,
1606	unsigned Opc, unsigned SubRegIdx) {
1607	SDLoc dl(N);
1608	EVT VT = N->getValueType(ResNo: `0`);
1609	SDValue Chain = N->getOperand(Num: `0`);
1610
1611	SDValue Ops[] = {N->getOperand(Num: `1`), // Mem operand
1612	N->getOperand(Num: `2`), // Incremental
1613	Chain};
1614
1615	const EVT ResTys[] = {MVT::i64, // Type of the write back register
1616	MVT::Untyped, MVT::Other};
1617
1618	SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1619
1620	// Update uses of write back register
1621	ReplaceUses(F: SDValue (N, NumVecs), T: SDValue (Ld, `0`));
1622
1623	// Update uses of vector list
1624	SDValue SuperReg = SDValue (Ld, `1`);
1625	if (NumVecs == `1`)
1626	ReplaceUses(F: SDValue (N, `0`), T: SuperReg);
1627	else
1628	for (unsigned i = `0`; i < NumVecs; ++i)
1629	ReplaceUses(F: SDValue (N, i),
1630	T: CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx + i, DL: dl, VT, Operand: SuperReg));
1631
1632	// Update the chain
1633	ReplaceUses(F: SDValue (N, NumVecs + `1`), T: SDValue (Ld, `2`));
1634	CurDAG->RemoveDeadNode(N);
1635	}
1636
1637	/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1638	/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1639	/// new Base and an SDValue representing the new offset.
1640	std::tuple<unsigned, SDValue, SDValue>
1641	AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode N, unsigned* Opc_rr,
1642	unsigned Opc_ri,
1643	const SDValue &OldBase,
1644	const SDValue &OldOffset,
1645	unsigned Scale) {
1646	SDValue NewBase = OldBase;
1647	SDValue NewOffset = OldOffset;
1648	// Detect a possible Reg+Imm addressing mode.
1649	const bool IsRegImm = SelectAddrModeIndexedSVE</Min=/-`8`, /Max=/`7`>(
1650	Root: N, N: OldBase, Base&: NewBase, OffImm&: NewOffset);
1651
1652	// Detect a possible reg+reg addressing mode, but only if we haven't already
1653	// detected a Reg+Imm one.
1654	const bool IsRegReg =
1655	!IsRegImm && SelectSVERegRegAddrMode(N: OldBase, Scale, Base&: NewBase, Offset&: NewOffset);
1656
1657	// Select the instruction.
1658	return std::make_tuple(args&: IsRegReg ? Opc_rr : Opc_ri, args&: NewBase, args&: NewOffset);
1659	}
1660
1661	enum class SelectTypeKind {
1662	Int1 = `0`,
1663	Int = `1`,
1664	FP = `2`,
1665	AnyType = `3`,
1666	};
1667
1668	/// This function selects an opcode from a list of opcodes, which is
1669	/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1670	/// element types, in this order.
1671	template <SelectTypeKind Kind>
1672	static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1673	// Only match scalable vector VTs
1674	if (!VT.isScalableVector())
1675	return `0`;
1676
1677	EVT EltVT = VT.getVectorElementType();
1678	switch (Kind) {
1679	case SelectTypeKind::AnyType:
1680	break;
1681	case SelectTypeKind::Int:
1682	if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1683	EltVT != MVT::i64)
1684	return `0`;
1685	break;
1686	case SelectTypeKind::Int1:
1687	if (EltVT != MVT::i1)
1688	return `0`;
1689	break;
1690	case SelectTypeKind::FP:
1691	if (EltVT != MVT::f16 && EltVT != MVT::f32 && EltVT != MVT::f64)
1692	return `0`;
1693	break;
1694	}
1695
1696	unsigned Offset;
1697	switch (VT.getVectorMinNumElements()) {
1698	case `16`: // 8-bit
1699	Offset = `0`;
1700	break;
1701	case `8`: // 16-bit
1702	Offset = `1`;
1703	break;
1704	case `4`: // 32-bit
1705	Offset = `2`;
1706	break;
1707	case `2`: // 64-bit
1708	Offset = `3`;
1709	break;
1710	default:
1711	return `0`;
1712	}
1713
1714	return (Opcodes.size() <= Offset) ? `0` : Opcodes [Offset];
1715	}
1716
1717	// This function is almost identical to SelectWhilePair, but has an
1718	// extra check on the range of the immediate operand.
1719	// TODO: Merge these two functions together at some point?
1720	void AArch64DAGToDAGISel::SelectPExtPair(SDNode N, unsigned* Opc) {
1721	// Immediate can be either 0 or 1.
1722	if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `2`)))
1723	if (Imm->getZExtValue() > `1`)
1724	return;
1725
1726	SDLoc DL(N);
1727	EVT VT = N->getValueType(ResNo: `0`);
1728	SDValue Ops[] = {N->getOperand(Num: `1`), N->getOperand(Num: `2`)};
1729	SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1730	SDValue SuperReg = SDValue (WhilePair, `0`);
1731
1732	for (unsigned I = `0`; I < `2`; ++I)
1733	ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1734	AArch64::psub0 + I, DL, VT, SuperReg));
1735
1736	CurDAG->RemoveDeadNode(N);
1737	}
1738
1739	void AArch64DAGToDAGISel::SelectWhilePair(SDNode N, unsigned* Opc) {
1740	SDLoc DL(N);
1741	EVT VT = N->getValueType(ResNo: `0`);
1742
1743	SDValue Ops[] = {N->getOperand(Num: `1`), N->getOperand(Num: `2`)};
1744
1745	SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1746	SDValue SuperReg = SDValue (WhilePair, `0`);
1747
1748	for (unsigned I = `0`; I < `2`; ++I)
1749	ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1750	AArch64::psub0 + I, DL, VT, SuperReg));
1751
1752	CurDAG->RemoveDeadNode(N);
1753	}
1754
1755	void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode N, unsigned* NumVecs,
1756	unsigned Opcode) {
1757	EVT VT = N->getValueType(ResNo: `0`);
1758	SmallVector<SDValue, `4`> Regs(N->op_begin() + `1`, N->op_begin() + `1` + NumVecs);
1759	SDValue Ops = createZTuple(Regs);
1760	SDLoc DL(N);
1761	SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1762	SDValue SuperReg = SDValue (Intrinsic, `0`);
1763	for (unsigned i = `0`; i < NumVecs; ++i)
1764	ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1765	AArch64::zsub0 + i, DL, VT, SuperReg));
1766
1767	CurDAG->RemoveDeadNode(N);
1768	}
1769
1770	void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1771	unsigned NumVecs,
1772	bool IsZmMulti,
1773	unsigned Opcode,
1774	bool HasPred) {
1775	assert(Opcode != `0` && "Unexpected opcode");
1776
1777	SDLoc DL(N);
1778	EVT VT = N->getValueType(ResNo: `0`);
1779	unsigned FirstVecIdx = HasPred ? `2` : `1`;
1780
1781	auto GetMultiVecOperand = [=](unsigned StartIdx) {
1782	SmallVector<SDValue, `4`> Regs(N->op_begin() + StartIdx,
1783	N->op_begin() + StartIdx + NumVecs);
1784	return createZMulTuple(Regs);
1785	};
1786
1787	SDValue Zdn = GetMultiVecOperand (FirstVecIdx);
1788
1789	SDValue Zm;
1790	if (IsZmMulti)
1791	Zm = GetMultiVecOperand (NumVecs + FirstVecIdx);
1792	else
1793	Zm = N->getOperand(Num: NumVecs + FirstVecIdx);
1794
1795	SDNode *Intrinsic;
1796	if (HasPred)
1797	Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
1798	N->getOperand(`1`), Zdn, Zm);
1799	else
1800	Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
1801	SDValue SuperReg = SDValue (Intrinsic, `0`);
1802	for (unsigned i = `0`; i < NumVecs; ++i)
1803	ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1804	AArch64::zsub0 + i, DL, VT, SuperReg));
1805
1806	CurDAG->RemoveDeadNode(N);
1807	}
1808
1809	void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode N, unsigned* NumVecs,
1810	unsigned Scale, unsigned Opc_ri,
1811	unsigned Opc_rr, bool IsIntr) {
1812	assert(Scale < `5` && "Invalid scaling value.");
1813	SDLoc DL(N);
1814	EVT VT = N->getValueType(ResNo: `0`);
1815	SDValue Chain = N->getOperand(Num: `0`);
1816
1817	// Optimize addressing mode.
1818	SDValue Base, Offset;
1819	unsigned Opc;
1820	std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1821	N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? `3` : `2`),
1822	CurDAG->getTargetConstant(`0`, DL, MVT::i64), Scale);
1823
1824	SDValue Ops[] = {N->getOperand(Num: IsIntr ? `2` : `1`), // Predicate
1825	Base, // Memory operand
1826	Offset, Chain};
1827
1828	const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1829
1830	SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1831	SDValue SuperReg = SDValue (Load, `0`);
1832	for (unsigned i = `0`; i < NumVecs; ++i)
1833	ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1834	AArch64::zsub0 + i, DL, VT, SuperReg));
1835
1836	// Copy chain
1837	unsigned ChainIdx = NumVecs;
1838	ReplaceUses(F: SDValue (N, ChainIdx), T: SDValue (Load, `1`));
1839	CurDAG->RemoveDeadNode(N);
1840	}
1841
1842	void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
1843	unsigned NumVecs,
1844	unsigned Scale,
1845	unsigned Opc_ri,
1846	unsigned Opc_rr) {
1847	assert(Scale < `4` && "Invalid scaling value.");
1848	SDLoc DL(N);
1849	EVT VT = N->getValueType(ResNo: `0`);
1850	SDValue Chain = N->getOperand(Num: `0`);
1851
1852	SDValue PNg = N->getOperand(Num: `2`);
1853	SDValue Base = N->getOperand(Num: `3`);
1854	SDValue Offset = CurDAG->getTargetConstant(`0`, DL, MVT::i64);
1855	unsigned Opc;
1856	std::tie(args&: Opc, args&: Base, args&: Offset) =
1857	findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, OldBase: Base, OldOffset: Offset, Scale);
1858
1859	SDValue Ops[] = {PNg, // Predicate-as-counter
1860	Base, // Memory operand
1861	Offset, Chain};
1862
1863	const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1864
1865	SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1866	SDValue SuperReg = SDValue (Load, `0`);
1867	for (unsigned i = `0`; i < NumVecs; ++i)
1868	ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1869	AArch64::zsub0 + i, DL, VT, SuperReg));
1870
1871	// Copy chain
1872	unsigned ChainIdx = NumVecs;
1873	ReplaceUses(F: SDValue (N, ChainIdx), T: SDValue (Load, `1`));
1874	CurDAG->RemoveDeadNode(N);
1875	}
1876
1877	void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode N, unsigned* NumVecs,
1878	unsigned Opcode) {
1879	if (N->getValueType(`0`) != MVT::nxv4f32)
1880	return;
1881	SelectUnaryMultiIntrinsic(N, NumOutVecs: NumVecs, IsTupleInput: true, Opc: Opcode);
1882	}
1883
1884	void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
1885	unsigned NumOutVecs,
1886	unsigned Opc, uint32_t MaxImm) {
1887	if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: `4`)))
1888	if (Imm->getZExtValue() > MaxImm)
1889	return;
1890
1891	SDValue ZtValue;
1892	if (!ImmToReg<AArch64::ZT0, `0`>(Node->getOperand(`2`), ZtValue))
1893	return;
1894	SDValue Ops[] = {ZtValue, Node->getOperand(Num: `3`), Node->getOperand(Num: `4`)};
1895	SDLoc DL(Node);
1896	EVT VT = Node->getValueType(ResNo: `0`);
1897
1898	SDNode *Instruction =
1899	CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
1900	SDValue SuperReg = SDValue (Instruction, `0`);
1901
1902	for (unsigned I = `0`; I < NumOutVecs; ++I)
1903	ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
1904	AArch64::zsub0 + I, DL, VT, SuperReg));
1905
1906	// Copy chain
1907	unsigned ChainIdx = NumOutVecs;
1908	ReplaceUses(F: SDValue (Node, ChainIdx), T: SDValue (Instruction, `1`));
1909	CurDAG->RemoveDeadNode(N: Node);
1910	}
1911
1912	void AArch64DAGToDAGISel::SelectClamp(SDNode N, unsigned* NumVecs,
1913	unsigned Op) {
1914	SDLoc DL(N);
1915	EVT VT = N->getValueType(ResNo: `0`);
1916
1917	SmallVector<SDValue, `4`> Regs(N->op_begin() + `1`, N->op_begin() + `1` + NumVecs);
1918	SDValue Zd = createZMulTuple(Regs);
1919	SDValue Zn = N->getOperand(Num: `1` + NumVecs);
1920	SDValue Zm = N->getOperand(Num: `2` + NumVecs);
1921
1922	SDValue Ops[] = {Zd, Zn, Zm};
1923
1924	SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
1925	SDValue SuperReg = SDValue (Intrinsic, `0`);
1926	for (unsigned i = `0`; i < NumVecs; ++i)
1927	ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1928	AArch64::zsub0 + i, DL, VT, SuperReg));
1929
1930	CurDAG->RemoveDeadNode(N);
1931	}
1932
1933	bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
1934	switch (BaseReg) {
1935	default:
1936	return false;
1937	case AArch64::ZA:
1938	case AArch64::ZAB0:
1939	if (TileNum == `0`)
1940	break;
1941	return false;
1942	case AArch64::ZAH0:
1943	if (TileNum <= `1`)
1944	break;
1945	return false;
1946	case AArch64::ZAS0:
1947	if (TileNum <= `3`)
1948	break;
1949	return false;
1950	case AArch64::ZAD0:
1951	if (TileNum <= `7`)
1952	break;
1953	return false;
1954	}
1955
1956	BaseReg += TileNum;
1957	return true;
1958	}
1959
1960	template <unsigned MaxIdx, unsigned Scale>
1961	void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode N, unsigned* NumVecs,
1962	unsigned BaseReg, unsigned Op) {
1963	unsigned TileNum = `0`;
1964	if (BaseReg != AArch64::ZA)
1965	TileNum = N->getConstantOperandVal(Num: `2`);
1966
1967	if (!SelectSMETile(BaseReg, TileNum))
1968	return;
1969
1970	SDValue SliceBase, Base, Offset;
1971	if (BaseReg == AArch64::ZA)
1972	SliceBase = N->getOperand(Num: `2`);
1973	else
1974	SliceBase = N->getOperand(Num: `3`);
1975
1976	if (!SelectSMETileSlice(N: SliceBase, MaxSize: MaxIdx, Vector&: Base, Offset, Scale))
1977	return;
1978
1979	SDLoc DL(N);
1980	SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
1981	SDValue Ops[] = {SubReg, Base, Offset, /Chain/ N->getOperand(Num: `0`)};
1982	SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
1983
1984	EVT VT = N->getValueType(ResNo: `0`);
1985	for (unsigned I = `0`; I < NumVecs; ++I)
1986	ReplaceUses(SDValue(N, I),
1987	CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
1988	SDValue(Mov, `0`)));
1989	// Copy chain
1990	unsigned ChainIdx = NumVecs;
1991	ReplaceUses(F: SDValue (N, ChainIdx), T: SDValue (Mov, `1`));
1992	CurDAG->RemoveDeadNode(N);
1993	}
1994
1995	void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
1996	unsigned NumOutVecs,
1997	bool IsTupleInput,
1998	unsigned Opc) {
1999	SDLoc DL(N);
2000	EVT VT = N->getValueType(ResNo: `0`);
2001	unsigned NumInVecs = N->getNumOperands() - `1`;
2002
2003	SmallVector<SDValue, `6`> Ops;
2004	if (IsTupleInput) {
2005	assert((NumInVecs == `2` \|\| NumInVecs == `4`) &&
2006	"Don't know how to handle multi-register input!");
2007	SmallVector<SDValue, `4`> Regs(N->op_begin() + `1`,
2008	N->op_begin() + `1` + NumInVecs);
2009	Ops.push_back(Elt: createZMulTuple(Regs));
2010	} else {
2011	// All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2012	for (unsigned I = `0`; I < NumInVecs; I++)
2013	Ops.push_back(Elt: N->getOperand(Num: `1` + I));
2014	}
2015
2016	SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2017	SDValue SuperReg = SDValue (Res, `0`);
2018
2019	for (unsigned I = `0`; I < NumOutVecs; I++)
2020	ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2021	AArch64::zsub0 + I, DL, VT, SuperReg));
2022	CurDAG->RemoveDeadNode(N);
2023	}
2024
2025	void AArch64DAGToDAGISel::SelectStore(SDNode N, unsigned* NumVecs,
2026	unsigned Opc) {
2027	SDLoc dl(N);
2028	EVT VT = N->getOperand(Num: `2`)->getValueType(ResNo: `0`);
2029
2030	// Form a REG_SEQUENCE to force register allocation.
2031	bool Is128Bit = VT.getSizeInBits() == `128`;
2032	SmallVector<SDValue, `4`> Regs(N->op_begin() + `2`, N->op_begin() + `2` + NumVecs);
2033	SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2034
2035	SDValue Ops[] = {RegSeq, N->getOperand(Num: NumVecs + `2`), N->getOperand(Num: `0`)};
2036	SDNode *St = CurDAG->getMachineNode(Opcode: Opc, dl, VT: N->getValueType(ResNo: `0`), Ops);
2037
2038	// Transfer memoperands.
2039	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2040	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
2041
2042	ReplaceNode(F: N, T: St);
2043	}
2044
2045	void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode N, unsigned* NumVecs,
2046	unsigned Scale, unsigned Opc_rr,
2047	unsigned Opc_ri) {
2048	SDLoc dl(N);
2049
2050	// Form a REG_SEQUENCE to force register allocation.
2051	SmallVector<SDValue, `4`> Regs(N->op_begin() + `2`, N->op_begin() + `2` + NumVecs);
2052	SDValue RegSeq = createZTuple(Regs);
2053
2054	// Optimize addressing mode.
2055	unsigned Opc;
2056	SDValue Offset, Base;
2057	std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2058	N, Opc_rr, Opc_ri, N->getOperand(NumVecs + `3`),
2059	CurDAG->getTargetConstant(`0`, dl, MVT::i64), Scale);
2060
2061	SDValue Ops[] = {RegSeq, N->getOperand(Num: NumVecs + `2`), // predicate
2062	Base, // address
2063	Offset, // offset
2064	N->getOperand(Num: `0`)}; // chain
2065	SDNode *St = CurDAG->getMachineNode(Opcode: Opc, dl, VT: N->getValueType(ResNo: `0`), Ops);
2066
2067	ReplaceNode(F: N, T: St);
2068	}
2069
2070	bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2071	SDValue &OffImm) {
2072	SDLoc dl(N);
2073	const DataLayout &DL = CurDAG->getDataLayout();
2074	const TargetLowering *TLI = getTargetLowering();
2075
2076	// Try to match it for the frame address
2077	if (auto FINode = dyn_cast<FrameIndexSDNode>(Val&: N)) {
2078	int FI = FINode->getIndex();
2079	Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
2080	OffImm = CurDAG->getTargetConstant(`0`, dl, MVT::i64);
2081	return true;
2082	}
2083
2084	return false;
2085	}
2086
2087	void AArch64DAGToDAGISel::SelectPostStore(SDNode N, unsigned* NumVecs,
2088	unsigned Opc) {
2089	SDLoc dl(N);
2090	EVT VT = N->getOperand(Num: `2`)->getValueType(ResNo: `0`);
2091	const EVT ResTys[] = {MVT::i64, // Type of the write back register
2092	MVT::Other}; // Type for the Chain
2093
2094	// Form a REG_SEQUENCE to force register allocation.
2095	bool Is128Bit = VT.getSizeInBits() == `128`;
2096	SmallVector<SDValue, `4`> Regs(N->op_begin() + `1`, N->op_begin() + `1` + NumVecs);
2097	SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2098
2099	SDValue Ops[] = {RegSeq,
2100	N->getOperand(Num: NumVecs + `1`), // base register
2101	N->getOperand(Num: NumVecs + `2`), // Incremental
2102	N->getOperand(Num: `0`)}; // Chain
2103	SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2104
2105	ReplaceNode(F: N, T: St);
2106	}
2107
2108	namespace {
2109	/// WidenVector - Given a value in the V64 register class, produce the
2110	/// equivalent value in the V128 register class.
2111	class WidenVector {
2112	SelectionDAG &DAG;
2113
2114	public:
2115	WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2116
2117	SDValue operator()(SDValue V64Reg) {
2118	EVT VT = V64Reg.getValueType();
2119	unsigned NarrowSize = VT.getVectorNumElements();
2120	MVT EltTy = VT.getVectorElementType().getSimpleVT();
2121	MVT WideTy = MVT::getVectorVT(VT: EltTy, NumElements: `2` * NarrowSize);
2122	SDLoc DL(V64Reg);
2123
2124	SDValue Undef =
2125	SDValue (DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: WideTy), `0`);
2126	return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2127	}
2128	};
2129	} // namespace
2130
2131	/// NarrowVector - Given a value in the V128 register class, produce the
2132	/// equivalent value in the V64 register class.
2133	static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
2134	EVT VT = V128Reg.getValueType();
2135	unsigned WideSize = VT.getVectorNumElements();
2136	MVT EltTy = VT.getVectorElementType().getSimpleVT();
2137	MVT NarrowTy = MVT::getVectorVT(VT: EltTy, NumElements: WideSize / `2`);
2138
2139	return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2140	V128Reg);
2141	}
2142
2143	void AArch64DAGToDAGISel::SelectLoadLane(SDNode N, unsigned* NumVecs,
2144	unsigned Opc) {
2145	SDLoc dl(N);
2146	EVT VT = N->getValueType(ResNo: `0`);
2147	bool Narrow = VT.getSizeInBits() == `64`;
2148
2149	// Form a REG_SEQUENCE to force register allocation.
2150	SmallVector<SDValue, `4`> Regs(N->op_begin() + `2`, N->op_begin() + `2` + NumVecs);
2151
2152	if (Narrow)
2153	transform(Range&: Regs, d_first: Regs.begin(),
2154	F: WidenVector (*CurDAG));
2155
2156	SDValue RegSeq = createQTuple(Regs);
2157
2158	const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2159
2160	unsigned LaneNo = N->getConstantOperandVal(Num: NumVecs + `2`);
2161
2162	SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2163	N->getOperand(NumVecs + `3`), N->getOperand(`0`)};
2164	SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2165	SDValue SuperReg = SDValue (Ld, `0`);
2166
2167	EVT WideVT = RegSeq.getOperand(i: `1`)->getValueType(ResNo: `0`);
2168	static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2169	AArch64::qsub2, AArch64::qsub3 };
2170	for (unsigned i = `0`; i < NumVecs; ++i) {
2171	SDValue NV = CurDAG->getTargetExtractSubreg(SRIdx: QSubs[i], DL: dl, VT: WideVT, Operand: SuperReg);
2172	if (Narrow)
2173	NV = NarrowVector(V128Reg: NV, DAG&: *CurDAG);
2174	ReplaceUses(F: SDValue (N, i), T: NV);
2175	}
2176
2177	ReplaceUses(F: SDValue (N, NumVecs), T: SDValue (Ld, `1`));
2178	CurDAG->RemoveDeadNode(N);
2179	}
2180
2181	void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode N, unsigned* NumVecs,
2182	unsigned Opc) {
2183	SDLoc dl(N);
2184	EVT VT = N->getValueType(ResNo: `0`);
2185	bool Narrow = VT.getSizeInBits() == `64`;
2186
2187	// Form a REG_SEQUENCE to force register allocation.
2188	SmallVector<SDValue, `4`> Regs(N->op_begin() + `1`, N->op_begin() + `1` + NumVecs);
2189
2190	if (Narrow)
2191	transform(Range&: Regs, d_first: Regs.begin(),
2192	F: WidenVector (*CurDAG));
2193
2194	SDValue RegSeq = createQTuple(Regs);
2195
2196	const EVT ResTys[] = {MVT::i64, // Type of the write back register
2197	RegSeq->getValueType(`0`), MVT::Other};
2198
2199	unsigned LaneNo = N->getConstantOperandVal(Num: NumVecs + `1`);
2200
2201	SDValue Ops[] = {RegSeq,
2202	CurDAG->getTargetConstant(LaneNo, dl,
2203	MVT::i64), // Lane Number
2204	N->getOperand(NumVecs + `2`), // Base register
2205	N->getOperand(NumVecs + `3`), // Incremental
2206	N->getOperand(`0`)};
2207	SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2208
2209	// Update uses of the write back register
2210	ReplaceUses(F: SDValue (N, NumVecs), T: SDValue (Ld, `0`));
2211
2212	// Update uses of the vector list
2213	SDValue SuperReg = SDValue (Ld, `1`);
2214	if (NumVecs == `1`) {
2215	ReplaceUses(F: SDValue (N, `0`),
2216	T: Narrow ? NarrowVector(V128Reg: SuperReg, DAG&: *CurDAG) : SuperReg);
2217	} else {
2218	EVT WideVT = RegSeq.getOperand(i: `1`)->getValueType(ResNo: `0`);
2219	static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2220	AArch64::qsub2, AArch64::qsub3 };
2221	for (unsigned i = `0`; i < NumVecs; ++i) {
2222	SDValue NV = CurDAG->getTargetExtractSubreg(SRIdx: QSubs[i], DL: dl, VT: WideVT,
2223	Operand: SuperReg);
2224	if (Narrow)
2225	NV = NarrowVector(V128Reg: NV, DAG&: *CurDAG);
2226	ReplaceUses(F: SDValue (N, i), T: NV);
2227	}
2228	}
2229
2230	// Update the Chain
2231	ReplaceUses(F: SDValue (N, NumVecs + `1`), T: SDValue (Ld, `2`));
2232	CurDAG->RemoveDeadNode(N);
2233	}
2234
2235	void AArch64DAGToDAGISel::SelectStoreLane(SDNode N, unsigned* NumVecs,
2236	unsigned Opc) {
2237	SDLoc dl(N);
2238	EVT VT = N->getOperand(Num: `2`)->getValueType(ResNo: `0`);
2239	bool Narrow = VT.getSizeInBits() == `64`;
2240
2241	// Form a REG_SEQUENCE to force register allocation.
2242	SmallVector<SDValue, `4`> Regs(N->op_begin() + `2`, N->op_begin() + `2` + NumVecs);
2243
2244	if (Narrow)
2245	transform(Range&: Regs, d_first: Regs.begin(),
2246	F: WidenVector (*CurDAG));
2247
2248	SDValue RegSeq = createQTuple(Regs);
2249
2250	unsigned LaneNo = N->getConstantOperandVal(Num: NumVecs + `2`);
2251
2252	SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2253	N->getOperand(NumVecs + `3`), N->getOperand(`0`)};
2254	SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2255
2256	// Transfer memoperands.
2257	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2258	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
2259
2260	ReplaceNode(F: N, T: St);
2261	}
2262
2263	void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode N, unsigned* NumVecs,
2264	unsigned Opc) {
2265	SDLoc dl(N);
2266	EVT VT = N->getOperand(Num: `2`)->getValueType(ResNo: `0`);
2267	bool Narrow = VT.getSizeInBits() == `64`;
2268
2269	// Form a REG_SEQUENCE to force register allocation.
2270	SmallVector<SDValue, `4`> Regs(N->op_begin() + `1`, N->op_begin() + `1` + NumVecs);
2271
2272	if (Narrow)
2273	transform(Range&: Regs, d_first: Regs.begin(),
2274	F: WidenVector (*CurDAG));
2275
2276	SDValue RegSeq = createQTuple(Regs);
2277
2278	const EVT ResTys[] = {MVT::i64, // Type of the write back register
2279	MVT::Other};
2280
2281	unsigned LaneNo = N->getConstantOperandVal(Num: NumVecs + `1`);
2282
2283	SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2284	N->getOperand(NumVecs + `2`), // Base Register
2285	N->getOperand(NumVecs + `3`), // Incremental
2286	N->getOperand(`0`)};
2287	SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2288
2289	// Transfer memoperands.
2290	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2291	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
2292
2293	ReplaceNode(F: N, T: St);
2294	}
2295
2296	static bool isBitfieldExtractOpFromAnd(SelectionDAG CurDAG, SDNode N,
2297	unsigned &Opc, SDValue &Opd0,
2298	unsigned &LSB, unsigned &MSB,
2299	unsigned NumberOfIgnoredLowBits,
2300	bool BiggerPattern) {
2301	assert(N->getOpcode() == ISD::AND &&
2302	"N must be a AND operation to call this function");
2303
2304	EVT VT = N->getValueType(ResNo: `0`);
2305
2306	// Here we can test the type of VT and return false when the type does not
2307	// match, but since it is done prior to that call in the current context
2308	// we turned that into an assert to avoid redundant code.
2309	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
2310	"Type checking must have been done before calling this function");
2311
2312	// FIXME: simplify-demanded-bits in DAGCombine will probably have
2313	// changed the AND node to a 32-bit mask operation. We'll have to
2314	// undo that as part of the transform here if we want to catch all
2315	// the opportunities.
2316	// Currently the NumberOfIgnoredLowBits argument helps to recover
2317	// from these situations when matching bigger pattern (bitfield insert).
2318
2319	// For unsigned extracts, check for a shift right and mask
2320	uint64_t AndImm = `0`;
2321	if (!isOpcWithIntImmediate(N, Opc: ISD::AND, Imm&: AndImm))
2322	return false;
2323
2324	const SDNode *Op0 = N->getOperand(Num: `0`).getNode();
2325
2326	// Because of simplify-demanded-bits in DAGCombine, the mask may have been
2327	// simplified. Try to undo that
2328	AndImm \|= maskTrailingOnes<uint64_t>(N: NumberOfIgnoredLowBits);
2329
2330	// The immediate is a mask of the low bits iff imm & (imm+1) == 0
2331	if (AndImm & (AndImm + `1`))
2332	return false;
2333
2334	bool ClampMSB = false;
2335	uint64_t SrlImm = `0`;
2336	// Handle the SRL + ANY_EXTEND case.
2337	if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2338	isOpcWithIntImmediate(Op0->getOperand(`0`).getNode(), ISD::SRL, SrlImm)) {
2339	// Extend the incoming operand of the SRL to 64-bit.
2340	Opd0 = Widen(CurDAG, N: Op0->getOperand(Num: `0`).getOperand(i: `0`));
2341	// Make sure to clamp the MSB so that we preserve the semantics of the
2342	// original operations.
2343	ClampMSB = true;
2344	} else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2345	isOpcWithIntImmediate(Op0->getOperand(`0`).getNode(), ISD::SRL,
2346	SrlImm)) {
2347	// If the shift result was truncated, we can still combine them.
2348	Opd0 = Op0->getOperand(Num: `0`).getOperand(i: `0`);
2349
2350	// Use the type of SRL node.
2351	VT = Opd0 ->getValueType(ResNo: `0`);
2352	} else if (isOpcWithIntImmediate(N: Op0, Opc: ISD::SRL, Imm&: SrlImm)) {
2353	Opd0 = Op0->getOperand(Num: `0`);
2354	ClampMSB = (VT == MVT::i32);
2355	} else if (BiggerPattern) {
2356	// Let's pretend a 0 shift right has been performed.
2357	// The resulting code will be at least as good as the original one
2358	// plus it may expose more opportunities for bitfield insert pattern.
2359	// FIXME: Currently we limit this to the bigger pattern, because
2360	// some optimizations expect AND and not UBFM.
2361	Opd0 = N->getOperand(Num: `0`);
2362	} else
2363	return false;
2364
2365	// Bail out on large immediates. This happens when no proper
2366	// combining/constant folding was performed.
2367	if (!BiggerPattern && (SrlImm <= `0` \|\| SrlImm >= VT.getSizeInBits())) {
2368	LLVM_DEBUG(
2369	(dbgs() << N
2370	<< ": Found large shift immediate, this should not happen\n"));
2371	return false;
2372	}
2373
2374	LSB = SrlImm;
2375	MSB = SrlImm +
2376	(VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2377	: llvm::countr_one<uint64_t>(AndImm)) -
2378	`1`;
2379	if (ClampMSB)
2380	// Since we're moving the extend before the right shift operation, we need
2381	// to clamp the MSB to make sure we don't shift in undefined bits instead of
2382	// the zeros which would get shifted in with the original right shift
2383	// operation.
2384	MSB = MSB > `31` ? `31` : MSB;
2385
2386	Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2387	return true;
2388	}
2389
2390	static bool isBitfieldExtractOpFromSExtInReg(SDNode N, unsigned* &Opc,
2391	SDValue &Opd0, unsigned &Immr,
2392	unsigned &Imms) {
2393	assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2394
2395	EVT VT = N->getValueType(ResNo: `0`);
2396	unsigned BitWidth = VT.getSizeInBits();
2397	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
2398	"Type checking must have been done before calling this function");
2399
2400	SDValue Op = N->getOperand(Num: `0`);
2401	if (Op ->getOpcode() == ISD::TRUNCATE) {
2402	Op = Op ->getOperand(Num: `0`);
2403	VT = Op ->getValueType(ResNo: `0`);
2404	BitWidth = VT.getSizeInBits();
2405	}
2406
2407	uint64_t ShiftImm;
2408	if (!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::SRL, Imm&: ShiftImm) &&
2409	!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::SRA, Imm&: ShiftImm))
2410	return false;
2411
2412	unsigned Width = cast<VTSDNode>(Val: N->getOperand(Num: `1`))->getVT().getSizeInBits();
2413	if (ShiftImm + Width > BitWidth)
2414	return false;
2415
2416	Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2417	Opd0 = Op.getOperand(i: `0`);
2418	Immr = ShiftImm;
2419	Imms = ShiftImm + Width - `1`;
2420	return true;
2421	}
2422
2423	static bool isSeveralBitsExtractOpFromShr(SDNode N, unsigned* &Opc,
2424	SDValue &Opd0, unsigned &LSB,
2425	unsigned &MSB) {
2426	// We are looking for the following pattern which basically extracts several
2427	// continuous bits from the source value and places it from the LSB of the
2428	// destination value, all other bits of the destination value or set to zero:
2429	//
2430	// Value2 = AND Value, MaskImm
2431	// SRL Value2, ShiftImm
2432	//
2433	// with MaskImm >> ShiftImm to search for the bit width.
2434	//
2435	// This gets selected into a single UBFM:
2436	//
2437	// UBFM Value, ShiftImm, Log2_64(MaskImm)
2438	//
2439
2440	if (N->getOpcode() != ISD::SRL)
2441	return false;
2442
2443	uint64_t AndMask = `0`;
2444	if (!isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::AND, Imm&: AndMask))
2445	return false;
2446
2447	Opd0 = N->getOperand(Num: `0`).getOperand(i: `0`);
2448
2449	uint64_t SrlImm = `0`;
2450	if (!isIntImmediate(N: N->getOperand(Num: `1`), Imm&: SrlImm))
2451	return false;
2452
2453	// Check whether we really have several bits extract here.
2454	if (!isMask_64(Value: AndMask >> SrlImm))
2455	return false;
2456
2457	Opc = N->getValueType(`0`) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2458	LSB = SrlImm;
2459	MSB = llvm::Log2_64(Value: AndMask);
2460	return true;
2461	}
2462
2463	static bool isBitfieldExtractOpFromShr(SDNode N, unsigned* &Opc, SDValue &Opd0,
2464	unsigned &Immr, unsigned &Imms,
2465	bool BiggerPattern) {
2466	assert((N->getOpcode() == ISD::SRA \|\| N->getOpcode() == ISD::SRL) &&
2467	"N must be a SHR/SRA operation to call this function");
2468
2469	EVT VT = N->getValueType(ResNo: `0`);
2470
2471	// Here we can test the type of VT and return false when the type does not
2472	// match, but since it is done prior to that call in the current context
2473	// we turned that into an assert to avoid redundant code.
2474	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
2475	"Type checking must have been done before calling this function");
2476
2477	// Check for AND + SRL doing several bits extract.
2478	if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, LSB&: Immr, MSB&: Imms))
2479	return true;
2480
2481	// We're looking for a shift of a shift.
2482	uint64_t ShlImm = `0`;
2483	uint64_t TruncBits = `0`;
2484	if (isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::SHL, Imm&: ShlImm)) {
2485	Opd0 = N->getOperand(Num: `0`).getOperand(i: `0`);
2486	} else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2487	N->getOperand(`0`).getNode()->getOpcode() == ISD::TRUNCATE) {
2488	// We are looking for a shift of truncate. Truncate from i64 to i32 could
2489	// be considered as setting high 32 bits as zero. Our strategy here is to
2490	// always generate 64bit UBFM. This consistency will help the CSE pass
2491	// later find more redundancy.
2492	Opd0 = N->getOperand(Num: `0`).getOperand(i: `0`);
2493	TruncBits = Opd0 ->getValueType(ResNo: `0`).getSizeInBits() - VT.getSizeInBits();
2494	VT = Opd0.getValueType();
2495	assert(VT == MVT::i64 && "the promoted type should be i64");
2496	} else if (BiggerPattern) {
2497	// Let's pretend a 0 shift left has been performed.
2498	// FIXME: Currently we limit this to the bigger pattern case,
2499	// because some optimizations expect AND and not UBFM
2500	Opd0 = N->getOperand(Num: `0`);
2501	} else
2502	return false;
2503
2504	// Missing combines/constant folding may have left us with strange
2505	// constants.
2506	if (ShlImm >= VT.getSizeInBits()) {
2507	LLVM_DEBUG(
2508	(dbgs() << N
2509	<< ": Found large shift immediate, this should not happen\n"));
2510	return false;
2511	}
2512
2513	uint64_t SrlImm = `0`;
2514	if (!isIntImmediate(N: N->getOperand(Num: `1`), Imm&: SrlImm))
2515	return false;
2516
2517	assert(SrlImm > `0` && SrlImm < VT.getSizeInBits() &&
2518	"bad amount in shift node!");
2519	int immr = SrlImm - ShlImm;
2520	Immr = immr < `0` ? immr + VT.getSizeInBits() : immr;
2521	Imms = VT.getSizeInBits() - ShlImm - TruncBits - `1`;
2522	// SRA requires a signed extraction
2523	if (VT == MVT::i32)
2524	Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2525	else
2526	Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2527	return true;
2528	}
2529
2530	bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2531	assert(N->getOpcode() == ISD::SIGN_EXTEND);
2532
2533	EVT VT = N->getValueType(ResNo: `0`);
2534	EVT NarrowVT = N->getOperand(Num: `0`)->getValueType(ResNo: `0`);
2535	if (VT != MVT::i64 \|\| NarrowVT != MVT::i32)
2536	return false;
2537
2538	uint64_t ShiftImm;
2539	SDValue Op = N->getOperand(Num: `0`);
2540	if (!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::SRA, Imm&: ShiftImm))
2541	return false;
2542
2543	SDLoc dl(N);
2544	// Extend the incoming operand of the shift to 64-bits.
2545	SDValue Opd0 = Widen(CurDAG, N: Op.getOperand(i: `0`));
2546	unsigned Immr = ShiftImm;
2547	unsigned Imms = NarrowVT.getSizeInBits() - `1`;
2548	SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Val: Immr, DL: dl, VT),
2549	CurDAG->getTargetConstant(Val: Imms, DL: dl, VT)};
2550	CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2551	return true;
2552	}
2553
2554	static bool isBitfieldExtractOp(SelectionDAG CurDAG, SDNode N, unsigned &Opc,
2555	SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2556	unsigned NumberOfIgnoredLowBits = `0`,
2557	bool BiggerPattern = false) {
2558	if (N->getValueType(`0`) != MVT::i32 && N->getValueType(`0`) != MVT::i64)
2559	return false;
2560
2561	switch (N->getOpcode()) {
2562	default:
2563	if (!N->isMachineOpcode())
2564	return false;
2565	break;
2566	case ISD::AND:
2567	return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB&: Immr, MSB&: Imms,
2568	NumberOfIgnoredLowBits, BiggerPattern);
2569	case ISD::SRL:
2570	case ISD::SRA:
2571	return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2572
2573	case ISD::SIGN_EXTEND_INREG:
2574	return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2575	}
2576
2577	unsigned NOpc = N->getMachineOpcode();
2578	switch (NOpc) {
2579	default:
2580	return false;
2581	case AArch64::SBFMWri:
2582	case AArch64::UBFMWri:
2583	case AArch64::SBFMXri:
2584	case AArch64::UBFMXri:
2585	Opc = NOpc;
2586	Opd0 = N->getOperand(Num: `0`);
2587	Immr = N->getConstantOperandVal(Num: `1`);
2588	Imms = N->getConstantOperandVal(Num: `2`);
2589	return true;
2590	}
2591	// Unreachable
2592	return false;
2593	}
2594
2595	bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2596	unsigned Opc, Immr, Imms;
2597	SDValue Opd0;
2598	if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2599	return false;
2600
2601	EVT VT = N->getValueType(ResNo: `0`);
2602	SDLoc dl(N);
2603
2604	// If the bit extract operation is 64bit but the original type is 32bit, we
2605	// need to add one EXTRACT_SUBREG.
2606	if ((Opc == AArch64::SBFMXri \|\| Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2607	SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2608	CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2609
2610	SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2611	SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2612	MVT::i32, SDValue(BFM, `0`));
2613	ReplaceNode(F: N, T: Inner.getNode());
2614	return true;
2615	}
2616
2617	SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Val: Immr, DL: dl, VT),
2618	CurDAG->getTargetConstant(Val: Imms, DL: dl, VT)};
2619	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
2620	return true;
2621	}
2622
2623	/// Does DstMask form a complementary pair with the mask provided by
2624	/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2625	/// this asks whether DstMask zeroes precisely those bits that will be set by
2626	/// the other half.
2627	static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2628	unsigned NumberOfIgnoredHighBits, EVT VT) {
2629	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
2630	"i32 or i64 mask type expected!");
2631	unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2632
2633	APInt SignificantDstMask = APInt (BitWidth, DstMask);
2634	APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(width: BitWidth);
2635
2636	return (SignificantDstMask & SignificantBitsToBeInserted) == `0` &&
2637	(SignificantDstMask \| SignificantBitsToBeInserted).isAllOnes();
2638	}
2639
2640	// Look for bits that will be useful for later uses.
2641	// A bit is consider useless as soon as it is dropped and never used
2642	// before it as been dropped.
2643	// E.g., looking for useful bit of x
2644	// 1. y = x & 0x7
2645	// 2. z = y >> 2
2646	// After #1, x useful bits are 0x7, then the useful bits of x, live through
2647	// y.
2648	// After #2, the useful bits of x are 0x4.
2649	// However, if x is used on an unpredicatable instruction, then all its bits
2650	// are useful.
2651	// E.g.
2652	// 1. y = x & 0x7
2653	// 2. z = y >> 2
2654	// 3. str x, [@x]
2655	static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = `0`);
2656
2657	static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
2658	unsigned Depth) {
2659	uint64_t Imm =
2660	cast<const ConstantSDNode>(Val: Op.getOperand(i: `1`).getNode())->getZExtValue();
2661	Imm = AArch64_AM::decodeLogicalImmediate(val: Imm, regSize: UsefulBits.getBitWidth());
2662	UsefulBits &= APInt (UsefulBits.getBitWidth(), Imm);
2663	getUsefulBits(Op, UsefulBits, Depth: Depth + `1`);
2664	}
2665
2666	static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
2667	uint64_t Imm, uint64_t MSB,
2668	unsigned Depth) {
2669	// inherit the bitwidth value
2670	APInt OpUsefulBits(UsefulBits);
2671	OpUsefulBits = `1`;
2672
2673	if (MSB >= Imm) {
2674	OpUsefulBits <<= MSB - Imm + `1`;
2675	--OpUsefulBits;
2676	// The interesting part will be in the lower part of the result
2677	getUsefulBits(Op, UsefulBits&: OpUsefulBits, Depth: Depth + `1`);
2678	// The interesting part was starting at Imm in the argument
2679	OpUsefulBits <<= Imm;
2680	} else {
2681	OpUsefulBits <<= MSB + `1`;
2682	--OpUsefulBits;
2683	// The interesting part will be shifted in the result
2684	OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2685	getUsefulBits(Op, UsefulBits&: OpUsefulBits, Depth: Depth + `1`);
2686	// The interesting part was at zero in the argument
2687	OpUsefulBits.lshrInPlace(ShiftAmt: OpUsefulBits.getBitWidth() - Imm);
2688	}
2689
2690	UsefulBits &= OpUsefulBits;
2691	}
2692
2693	static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2694	unsigned Depth) {
2695	uint64_t Imm =
2696	cast<const ConstantSDNode>(Val: Op.getOperand(i: `1`).getNode())->getZExtValue();
2697	uint64_t MSB =
2698	cast<const ConstantSDNode>(Val: Op.getOperand(i: `2`).getNode())->getZExtValue();
2699
2700	getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2701	}
2702
2703	static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
2704	unsigned Depth) {
2705	uint64_t ShiftTypeAndValue =
2706	cast<const ConstantSDNode>(Val: Op.getOperand(i: `2`).getNode())->getZExtValue();
2707	APInt Mask(UsefulBits);
2708	Mask.clearAllBits();
2709	Mask.flipAllBits();
2710
2711	if (AArch64_AM::getShiftType(Imm: ShiftTypeAndValue) == AArch64_AM::LSL) {
2712	// Shift Left
2713	uint64_t ShiftAmt = AArch64_AM::getShiftValue(Imm: ShiftTypeAndValue);
2714	Mask <<= ShiftAmt;
2715	getUsefulBits(Op, UsefulBits&: Mask, Depth: Depth + `1`);
2716	Mask.lshrInPlace(ShiftAmt);
2717	} else if (AArch64_AM::getShiftType(Imm: ShiftTypeAndValue) == AArch64_AM::LSR) {
2718	// Shift Right
2719	// We do not handle AArch64_AM::ASR, because the sign will change the
2720	// number of useful bits
2721	uint64_t ShiftAmt = AArch64_AM::getShiftValue(Imm: ShiftTypeAndValue);
2722	Mask.lshrInPlace(ShiftAmt);
2723	getUsefulBits(Op, UsefulBits&: Mask, Depth: Depth + `1`);
2724	Mask <<= ShiftAmt;
2725	} else
2726	return;
2727
2728	UsefulBits &= Mask;
2729	}
2730
2731	static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2732	unsigned Depth) {
2733	uint64_t Imm =
2734	cast<const ConstantSDNode>(Val: Op.getOperand(i: `2`).getNode())->getZExtValue();
2735	uint64_t MSB =
2736	cast<const ConstantSDNode>(Val: Op.getOperand(i: `3`).getNode())->getZExtValue();
2737
2738	APInt OpUsefulBits(UsefulBits);
2739	OpUsefulBits = `1`;
2740
2741	APInt ResultUsefulBits(UsefulBits.getBitWidth(), `0`);
2742	ResultUsefulBits.flipAllBits();
2743	APInt Mask(UsefulBits.getBitWidth(), `0`);
2744
2745	getUsefulBits(Op, UsefulBits&: ResultUsefulBits, Depth: Depth + `1`);
2746
2747	if (MSB >= Imm) {
2748	// The instruction is a BFXIL.
2749	uint64_t Width = MSB - Imm + `1`;
2750	uint64_t LSB = Imm;
2751
2752	OpUsefulBits <<= Width;
2753	--OpUsefulBits;
2754
2755	if (Op.getOperand(i: `1`) == Orig) {
2756	// Copy the low bits from the result to bits starting from LSB.
2757	Mask = ResultUsefulBits & OpUsefulBits;
2758	Mask <<= LSB;
2759	}
2760
2761	if (Op.getOperand(i: `0`) == Orig)
2762	// Bits starting from LSB in the input contribute to the result.
2763	Mask \|= (ResultUsefulBits & ~OpUsefulBits);
2764	} else {
2765	// The instruction is a BFI.
2766	uint64_t Width = MSB + `1`;
2767	uint64_t LSB = UsefulBits.getBitWidth() - Imm;
2768
2769	OpUsefulBits <<= Width;
2770	--OpUsefulBits;
2771	OpUsefulBits <<= LSB;
2772
2773	if (Op.getOperand(i: `1`) == Orig) {
2774	// Copy the bits from the result to the zero bits.
2775	Mask = ResultUsefulBits & OpUsefulBits;
2776	Mask.lshrInPlace(ShiftAmt: LSB);
2777	}
2778
2779	if (Op.getOperand(i: `0`) == Orig)
2780	Mask \|= (ResultUsefulBits & ~OpUsefulBits);
2781	}
2782
2783	UsefulBits &= Mask;
2784	}
2785
2786	static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2787	SDValue Orig, unsigned Depth) {
2788
2789	// Users of this node should have already been instruction selected
2790	// FIXME: Can we turn that into an assert?
2791	if (!UserNode->isMachineOpcode())
2792	return;
2793
2794	switch (UserNode->getMachineOpcode()) {
2795	default:
2796	return;
2797	case AArch64::ANDSWri:
2798	case AArch64::ANDSXri:
2799	case AArch64::ANDWri:
2800	case AArch64::ANDXri:
2801	// We increment Depth only when we call the getUsefulBits
2802	return getUsefulBitsFromAndWithImmediate(Op: SDValue (UserNode, `0`), UsefulBits,
2803	Depth);
2804	case AArch64::UBFMWri:
2805	case AArch64::UBFMXri:
2806	return getUsefulBitsFromUBFM(Op: SDValue (UserNode, `0`), UsefulBits, Depth);
2807
2808	case AArch64::ORRWrs:
2809	case AArch64::ORRXrs:
2810	if (UserNode->getOperand(Num: `0`) != Orig && UserNode->getOperand(Num: `1`) == Orig)
2811	getUsefulBitsFromOrWithShiftedReg(Op: SDValue (UserNode, `0`), UsefulBits,
2812	Depth);
2813	return;
2814	case AArch64::BFMWri:
2815	case AArch64::BFMXri:
2816	return getUsefulBitsFromBFM(Op: SDValue (UserNode, `0`), Orig, UsefulBits, Depth);
2817
2818	case AArch64::STRBBui:
2819	case AArch64::STURBBi:
2820	if (UserNode->getOperand(Num: `0`) != Orig)
2821	return;
2822	UsefulBits &= APInt (UsefulBits.getBitWidth(), `0xff`);
2823	return;
2824
2825	case AArch64::STRHHui:
2826	case AArch64::STURHHi:
2827	if (UserNode->getOperand(Num: `0`) != Orig)
2828	return;
2829	UsefulBits &= APInt (UsefulBits.getBitWidth(), `0xffff`);
2830	return;
2831	}
2832	}
2833
2834	static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2835	if (Depth >= SelectionDAG::MaxRecursionDepth)
2836	return;
2837	// Initialize UsefulBits
2838	if (!Depth) {
2839	unsigned Bitwidth = Op.getScalarValueSizeInBits();
2840	// At the beginning, assume every produced bits is useful
2841	UsefulBits = APInt (Bitwidth, `0`);
2842	UsefulBits.flipAllBits();
2843	}
2844	APInt UsersUsefulBits(UsefulBits.getBitWidth(), `0`);
2845
2846	for (SDNode *Node : Op.getNode()->uses()) {
2847	// A use cannot produce useful bits
2848	APInt UsefulBitsForUse = APInt (UsefulBits);
2849	getUsefulBitsForUse(UserNode: Node, UsefulBits&: UsefulBitsForUse, Orig: Op, Depth);
2850	UsersUsefulBits \|= UsefulBitsForUse;
2851	}
2852	// UsefulBits contains the produced bits that are meaningful for the
2853	// current definition, thus a user cannot make a bit meaningful at
2854	// this point
2855	UsefulBits &= UsersUsefulBits;
2856	}
2857
2858	/// Create a machine node performing a notional SHL of Op by ShlAmount. If
2859	/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2860	/// 0, return Op unchanged.
2861	static SDValue getLeftShift(SelectionDAG CurDAG, SDValue Op, int* ShlAmount) {
2862	if (ShlAmount == `0`)
2863	return Op;
2864
2865	EVT VT = Op.getValueType();
2866	SDLoc dl(Op);
2867	unsigned BitWidth = VT.getSizeInBits();
2868	unsigned UBFMOpc = BitWidth == `32` ? AArch64::UBFMWri : AArch64::UBFMXri;
2869
2870	SDNode *ShiftNode;
2871	if (ShlAmount > `0`) {
2872	// LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2873	ShiftNode = CurDAG->getMachineNode(
2874	Opcode: UBFMOpc, dl, VT, Op1: Op,
2875	Op2: CurDAG->getTargetConstant(Val: BitWidth - ShlAmount, DL: dl, VT),
2876	Op3: CurDAG->getTargetConstant(Val: BitWidth - `1` - ShlAmount, DL: dl, VT));
2877	} else {
2878	// LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2879	assert(ShlAmount < `0` && "expected right shift");
2880	int ShrAmount = -ShlAmount;
2881	ShiftNode = CurDAG->getMachineNode(
2882	Opcode: UBFMOpc, dl, VT, Op1: Op, Op2: CurDAG->getTargetConstant(Val: ShrAmount, DL: dl, VT),
2883	Op3: CurDAG->getTargetConstant(Val: BitWidth - `1`, DL: dl, VT));
2884	}
2885
2886	return SDValue (ShiftNode, `0`);
2887	}
2888
2889	// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
2890	static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
2891	bool BiggerPattern,
2892	const uint64_t NonZeroBits,
2893	SDValue &Src, int &DstLSB,
2894	int &Width);
2895
2896	// For bit-field-positioning pattern "shl VAL, N)".
2897	static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
2898	bool BiggerPattern,
2899	const uint64_t NonZeroBits,
2900	SDValue &Src, int &DstLSB,
2901	int &Width);
2902
2903	/// Does this tree qualify as an attempt to move a bitfield into position,
2904	/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
2905	static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
2906	bool BiggerPattern, SDValue &Src,
2907	int &DstLSB, int &Width) {
2908	EVT VT = Op.getValueType();
2909	unsigned BitWidth = VT.getSizeInBits();
2910	(void)BitWidth;
2911	assert(BitWidth == `32` \|\| BitWidth == `64`);
2912
2913	KnownBits Known = CurDAG->computeKnownBits(Op);
2914
2915	// Non-zero in the sense that they're not provably zero, which is the key
2916	// point if we want to use this value
2917	const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2918	if (!isShiftedMask_64(Value: NonZeroBits))
2919	return false;
2920
2921	switch (Op.getOpcode()) {
2922	default:
2923	break;
2924	case ISD::AND:
2925	return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
2926	NonZeroBits, Src, DstLSB, Width);
2927	case ISD::SHL:
2928	return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
2929	NonZeroBits, Src, DstLSB, Width);
2930	}
2931
2932	return false;
2933	}
2934
2935	static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
2936	bool BiggerPattern,
2937	const uint64_t NonZeroBits,
2938	SDValue &Src, int &DstLSB,
2939	int &Width) {
2940	assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
2941
2942	EVT VT = Op.getValueType();
2943	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
2944	"Caller guarantees VT is one of i32 or i64");
2945	(void)VT;
2946
2947	uint64_t AndImm;
2948	if (!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::AND, Imm&: AndImm))
2949	return false;
2950
2951	// If (~AndImm & NonZeroBits) is not zero at POS, we know that
2952	// 1) (AndImm & (1 << POS) == 0)
2953	// 2) the result of AND is not zero at POS bit (according to NonZeroBits)
2954	//
2955	// 1) and 2) don't agree so something must be wrong (e.g., in
2956	// 'SelectionDAG::computeKnownBits')
2957	assert((~AndImm & NonZeroBits) == `0` &&
2958	"Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
2959
2960	SDValue AndOp0 = Op.getOperand(i: `0`);
2961
2962	uint64_t ShlImm;
2963	SDValue ShlOp0;
2964	if (isOpcWithIntImmediate(N: AndOp0.getNode(), Opc: ISD::SHL, Imm&: ShlImm)) {
2965	// For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
2966	ShlOp0 = AndOp0.getOperand(i: `0`);
2967	} else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
2968	isOpcWithIntImmediate(AndOp0.getOperand(`0`).getNode(), ISD::SHL,
2969	ShlImm)) {
2970	// For pattern "and(any_extend(shl(val, N)), shifted-mask)"
2971
2972	// ShlVal == shl(val, N), which is a left shift on a smaller type.
2973	SDValue ShlVal = AndOp0.getOperand(i: `0`);
2974
2975	// Since this is after type legalization and ShlVal is extended to MVT::i64,
2976	// expect VT to be MVT::i32.
2977	assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
2978
2979	// Widens 'val' to MVT::i64 as the source of bit field positioning.
2980	ShlOp0 = Widen(CurDAG, N: ShlVal.getOperand(i: `0`));
2981	} else
2982	return false;
2983
2984	// For !BiggerPattern, bail out if the AndOp0 has more than one use, since
2985	// then we'll end up generating AndOp0+UBFIZ instead of just keeping
2986	// AndOp0+AND.
2987	if (!BiggerPattern && !AndOp0.hasOneUse())
2988	return false;
2989
2990	DstLSB = llvm::countr_zero(Val: NonZeroBits);
2991	Width = llvm::countr_one(Value: NonZeroBits >> DstLSB);
2992
2993	// Bail out on large Width. This happens when no proper combining / constant
2994	// folding was performed.
2995	if (Width >= (int)VT.getSizeInBits()) {
2996	// If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
2997	// Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
2998	// "val".
2999	// If VT is i32, what Width >= 32 means:
3000	// - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3001	// demands at least 'Width' bits (after dag-combiner). This together with
3002	// `any_extend` Op (undefined higher bits) indicates missed combination
3003	// when lowering the 'and' IR instruction to an machine IR instruction.
3004	LLVM_DEBUG(
3005	dbgs()
3006	<< "Found large Width in bit-field-positioning -- this indicates no "
3007	"proper combining / constant folding was performed\n");
3008	return false;
3009	}
3010
3011	// BFI encompasses sufficiently many nodes that it's worth inserting an extra
3012	// LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3013	// amount. BiggerPattern is true when this pattern is being matched for BFI,
3014	// BiggerPattern is false when this pattern is being matched for UBFIZ, in
3015	// which case it is not profitable to insert an extra shift.
3016	if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3017	return false;
3018
3019	Src = getLeftShift(CurDAG, Op: ShlOp0, ShlAmount: ShlImm - DstLSB);
3020	return true;
3021	}
3022
3023	// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3024	// UBFIZ.
3025	static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op,
3026	SDValue &Src, int &DstLSB,
3027	int &Width) {
3028	// Caller should have verified that N is a left shift with constant shift
3029	// amount; asserts that.
3030	assert(Op.getOpcode() == ISD::SHL &&
3031	"Op.getNode() should be a SHL node to call this function");
3032	assert(isIntImmediateEq(Op.getOperand(`1`), ShlImm) &&
3033	"Op.getNode() should shift ShlImm to call this function");
3034
3035	uint64_t AndImm = `0`;
3036	SDValue Op0 = Op.getOperand(i: `0`);
3037	if (!isOpcWithIntImmediate(N: Op0.getNode(), Opc: ISD::AND, Imm&: AndImm))
3038	return false;
3039
3040	const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3041	if (isMask_64(Value: ShiftedAndImm)) {
3042	// AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3043	// should end with Mask, and could be prefixed with random bits if those
3044	// bits are shifted out.
3045	//
3046	// For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3047	// the AND result corresponding to those bits are shifted out, so it's fine
3048	// to not extract them.
3049	Width = llvm::countr_one(Value: ShiftedAndImm);
3050	DstLSB = ShlImm;
3051	Src = Op0.getOperand(i: `0`);
3052	return true;
3053	}
3054	return false;
3055	}
3056
3057	static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3058	bool BiggerPattern,
3059	const uint64_t NonZeroBits,
3060	SDValue &Src, int &DstLSB,
3061	int &Width) {
3062	assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3063
3064	EVT VT = Op.getValueType();
3065	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
3066	"Caller guarantees that type is i32 or i64");
3067	(void)VT;
3068
3069	uint64_t ShlImm;
3070	if (!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::SHL, Imm&: ShlImm))
3071	return false;
3072
3073	if (!BiggerPattern && !Op.hasOneUse())
3074	return false;
3075
3076	if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3077	return true;
3078
3079	DstLSB = llvm::countr_zero(Val: NonZeroBits);
3080	Width = llvm::countr_one(Value: NonZeroBits >> DstLSB);
3081
3082	if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3083	return false;
3084
3085	Src = getLeftShift(CurDAG, Op: Op.getOperand(i: `0`), ShlAmount: ShlImm - DstLSB);
3086	return true;
3087	}
3088
3089	static bool isShiftedMask(uint64_t Mask, EVT VT) {
3090	assert(VT == MVT::i32 \|\| VT == MVT::i64);
3091	if (VT == MVT::i32)
3092	return isShiftedMask_32(Value: Mask);
3093	return isShiftedMask_64(Value: Mask);
3094	}
3095
3096	// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3097	// inserted only sets known zero bits.
3098	static bool tryBitfieldInsertOpFromOrAndImm(SDNode N, SelectionDAG CurDAG) {
3099	assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3100
3101	EVT VT = N->getValueType(ResNo: `0`);
3102	if (VT != MVT::i32 && VT != MVT::i64)
3103	return false;
3104
3105	unsigned BitWidth = VT.getSizeInBits();
3106
3107	uint64_t OrImm;
3108	if (!isOpcWithIntImmediate(N, Opc: ISD::OR, Imm&: OrImm))
3109	return false;
3110
3111	// Skip this transformation if the ORR immediate can be encoded in the ORR.
3112	// Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3113	// performance neutral.
3114	if (AArch64_AM::isLogicalImmediate(imm: OrImm, regSize: BitWidth))
3115	return false;
3116
3117	uint64_t MaskImm;
3118	SDValue And = N->getOperand(Num: `0`);
3119	// Must be a single use AND with an immediate operand.
3120	if (!And.hasOneUse() \|\|
3121	!isOpcWithIntImmediate(N: And.getNode(), Opc: ISD::AND, Imm&: MaskImm))
3122	return false;
3123
3124	// Compute the Known Zero for the AND as this allows us to catch more general
3125	// cases than just looking for AND with imm.
3126	KnownBits Known = CurDAG->computeKnownBits(Op: And);
3127
3128	// Non-zero in the sense that they're not provably zero, which is the key
3129	// point if we want to use this value.
3130	uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3131
3132	// The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3133	if (!isShiftedMask(Mask: Known.Zero.getZExtValue(), VT))
3134	return false;
3135
3136	// The bits being inserted must only set those bits that are known to be zero.
3137	if ((OrImm & NotKnownZero) != `0`) {
3138	// FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3139	// currently handle this case.
3140	return false;
3141	}
3142
3143	// BFI/BFXIL dst, src, #lsb, #width.
3144	int LSB = llvm::countr_one(Value: NotKnownZero);
3145	int Width = BitWidth - APInt (BitWidth, NotKnownZero).popcount();
3146
3147	// BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3148	unsigned ImmR = (BitWidth - LSB) % BitWidth;
3149	unsigned ImmS = Width - `1`;
3150
3151	// If we're creating a BFI instruction avoid cases where we need more
3152	// instructions to materialize the BFI constant as compared to the original
3153	// ORR. A BFXIL will use the same constant as the original ORR, so the code
3154	// should be no worse in this case.
3155	bool IsBFI = LSB != `0`;
3156	uint64_t BFIImm = OrImm >> LSB;
3157	if (IsBFI && !AArch64_AM::isLogicalImmediate(imm: BFIImm, regSize: BitWidth)) {
3158	// We have a BFI instruction and we know the constant can't be materialized
3159	// with a ORR-immediate with the zero register.
3160	unsigned OrChunks = `0`, BFIChunks = `0`;
3161	for (unsigned Shift = `0`; Shift < BitWidth; Shift += `16`) {
3162	if (((OrImm >> Shift) & `0xFFFF`) != `0`)
3163	++OrChunks;
3164	if (((BFIImm >> Shift) & `0xFFFF`) != `0`)
3165	++BFIChunks;
3166	}
3167	if (BFIChunks > OrChunks)
3168	return false;
3169	}
3170
3171	// Materialize the constant to be inserted.
3172	SDLoc DL(N);
3173	unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3174	SDNode *MOVI = CurDAG->getMachineNode(
3175	Opcode: MOVIOpc, dl: DL, VT, Op1: CurDAG->getTargetConstant(Val: BFIImm, DL, VT));
3176
3177	// Create the BFI/BFXIL instruction.
3178	SDValue Ops[] = {And.getOperand(i: `0`), SDValue (MOVI, `0`),
3179	CurDAG->getTargetConstant(Val: ImmR, DL, VT),
3180	CurDAG->getTargetConstant(Val: ImmS, DL, VT)};
3181	unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3182	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3183	return true;
3184	}
3185
3186	static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG,
3187	SDValue &ShiftedOperand,
3188	uint64_t &EncodedShiftImm) {
3189	// Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3190	if (!Dst.hasOneUse())
3191	return false;
3192
3193	EVT VT = Dst.getValueType();
3194	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
3195	"Caller should guarantee that VT is one of i32 or i64");
3196	const unsigned SizeInBits = VT.getSizeInBits();
3197
3198	SDLoc DL(Dst.getNode());
3199	uint64_t AndImm, ShlImm;
3200	if (isOpcWithIntImmediate(N: Dst.getNode(), Opc: ISD::AND, Imm&: AndImm) &&
3201	isShiftedMask_64(Value: AndImm)) {
3202	// Avoid transforming 'DstOp0' if it has other uses than the AND node.
3203	SDValue DstOp0 = Dst.getOperand(i: `0`);
3204	if (!DstOp0.hasOneUse())
3205	return false;
3206
3207	// An example to illustrate the transformation
3208	// From:
3209	// lsr x8, x1, #1
3210	// and x8, x8, #0x3f80
3211	// bfxil x8, x1, #0, #7
3212	// To:
3213	// and x8, x23, #0x7f
3214	// ubfx x9, x23, #8, #7
3215	// orr x23, x8, x9, lsl #7
3216	//
3217	// The number of instructions remains the same, but ORR is faster than BFXIL
3218	// on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3219	// the dependency chain is improved after the transformation.
3220	uint64_t SrlImm;
3221	if (isOpcWithIntImmediate(N: DstOp0.getNode(), Opc: ISD::SRL, Imm&: SrlImm)) {
3222	uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(Val: AndImm);
3223	if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3224	unsigned MaskWidth =
3225	llvm::countr_one(Value: AndImm >> NumTrailingZeroInShiftedMask);
3226	unsigned UBFMOpc =
3227	(VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3228	SDNode *UBFMNode = CurDAG->getMachineNode(
3229	Opcode: UBFMOpc, dl: DL, VT, Op1: DstOp0.getOperand(i: `0`),
3230	Op2: CurDAG->getTargetConstant(Val: SrlImm + NumTrailingZeroInShiftedMask, DL,
3231	VT),
3232	Op3: CurDAG->getTargetConstant(
3233	Val: SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - `1`, DL, VT));
3234	ShiftedOperand = SDValue (UBFMNode, `0`);
3235	EncodedShiftImm = AArch64_AM::getShifterImm(
3236	ST: AArch64_AM::LSL, Imm: NumTrailingZeroInShiftedMask);
3237	return true;
3238	}
3239	}
3240	return false;
3241	}
3242
3243	if (isOpcWithIntImmediate(N: Dst.getNode(), Opc: ISD::SHL, Imm&: ShlImm)) {
3244	ShiftedOperand = Dst.getOperand(i: `0`);
3245	EncodedShiftImm = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShlImm);
3246	return true;
3247	}
3248
3249	uint64_t SrlImm;
3250	if (isOpcWithIntImmediate(N: Dst.getNode(), Opc: ISD::SRL, Imm&: SrlImm)) {
3251	ShiftedOperand = Dst.getOperand(i: `0`);
3252	EncodedShiftImm = AArch64_AM::getShifterImm(ST: AArch64_AM::LSR, Imm: SrlImm);
3253	return true;
3254	}
3255	return false;
3256	}
3257
3258	// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3259	// the operands and select it to AArch64::ORR with shifted registers if
3260	// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3261	static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3262	SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3263	const bool BiggerPattern) {
3264	EVT VT = N->getValueType(ResNo: `0`);
3265	assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3266	assert(((N->getOperand(`0`) == OrOpd0 && N->getOperand(`1`) == OrOpd1) \|\|
3267	(N->getOperand(`1`) == OrOpd0 && N->getOperand(`0`) == OrOpd1)) &&
3268	"Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3269	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
3270	"Expect result type to be i32 or i64 since N is combinable to BFM");
3271	SDLoc DL(N);
3272
3273	// Bail out if BFM simplifies away one node in BFM Dst.
3274	if (OrOpd1 != Dst)
3275	return false;
3276
3277	const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3278	// For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3279	// nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3280	if (BiggerPattern) {
3281	uint64_t SrcAndImm;
3282	if (isOpcWithIntImmediate(N: OrOpd0.getNode(), Opc: ISD::AND, Imm&: SrcAndImm) &&
3283	isMask_64(Value: SrcAndImm) && OrOpd0.getOperand(i: `0`) == Src) {
3284	// OrOpd0 = AND Src, #Mask
3285	// So BFM simplifies away one AND node from Src and doesn't simplify away
3286	// nodes from Dst. If ORR with left-shifted operand also simplifies away
3287	// one node (from Rd), ORR is better since it has higher throughput and
3288	// smaller latency than BFM on many AArch64 processors (and for the rest
3289	// ORR is at least as good as BFM).
3290	SDValue ShiftedOperand;
3291	uint64_t EncodedShiftImm;
3292	if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3293	EncodedShiftImm)) {
3294	SDValue Ops[] = {OrOpd0, ShiftedOperand,
3295	CurDAG->getTargetConstant(Val: EncodedShiftImm, DL, VT)};
3296	CurDAG->SelectNodeTo(N, MachineOpc: OrrOpc, VT, Ops);
3297	return true;
3298	}
3299	}
3300	return false;
3301	}
3302
3303	assert((!BiggerPattern) && "BiggerPattern should be handled above");
3304
3305	uint64_t ShlImm;
3306	if (isOpcWithIntImmediate(N: OrOpd0.getNode(), Opc: ISD::SHL, Imm&: ShlImm)) {
3307	if (OrOpd0.getOperand(i: `0`) == Src && OrOpd0.hasOneUse()) {
3308	SDValue Ops[] = {
3309	Dst, Src,
3310	CurDAG->getTargetConstant(
3311	Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShlImm), DL, VT)};
3312	CurDAG->SelectNodeTo(N, MachineOpc: OrrOpc, VT, Ops);
3313	return true;
3314	}
3315
3316	// Select the following pattern to left-shifted operand rather than BFI.
3317	// %val1 = op ..
3318	// %val2 = shl %val1, #imm
3319	// %res = or %val1, %val2
3320	//
3321	// If N is selected to be BFI, we know that
3322	// 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3323	// BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3324	//
3325	// Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3326	if (OrOpd0.getOperand(i: `0`) == OrOpd1) {
3327	SDValue Ops[] = {
3328	OrOpd1, OrOpd1,
3329	CurDAG->getTargetConstant(
3330	Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShlImm), DL, VT)};
3331	CurDAG->SelectNodeTo(N, MachineOpc: OrrOpc, VT, Ops);
3332	return true;
3333	}
3334	}
3335
3336	uint64_t SrlImm;
3337	if (isOpcWithIntImmediate(N: OrOpd0.getNode(), Opc: ISD::SRL, Imm&: SrlImm)) {
3338	// Select the following pattern to right-shifted operand rather than BFXIL.
3339	// %val1 = op ..
3340	// %val2 = lshr %val1, #imm
3341	// %res = or %val1, %val2
3342	//
3343	// If N is selected to be BFXIL, we know that
3344	// 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3345	// BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3346	//
3347	// Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3348	if (OrOpd0.getOperand(i: `0`) == OrOpd1) {
3349	SDValue Ops[] = {
3350	OrOpd1, OrOpd1,
3351	CurDAG->getTargetConstant(
3352	Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSR, Imm: SrlImm), DL, VT)};
3353	CurDAG->SelectNodeTo(N, MachineOpc: OrrOpc, VT, Ops);
3354	return true;
3355	}
3356	}
3357
3358	return false;
3359	}
3360
3361	static bool tryBitfieldInsertOpFromOr(SDNode N, const* APInt &UsefulBits,
3362	SelectionDAG *CurDAG) {
3363	assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3364
3365	EVT VT = N->getValueType(ResNo: `0`);
3366	if (VT != MVT::i32 && VT != MVT::i64)
3367	return false;
3368
3369	unsigned BitWidth = VT.getSizeInBits();
3370
3371	// Because of simplify-demanded-bits in DAGCombine, involved masks may not
3372	// have the expected shape. Try to undo that.
3373
3374	unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3375	unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3376
3377	// Given a OR operation, check if we have the following pattern
3378	// ubfm c, b, imm, imm2 (or something that does the same jobs, see
3379	// isBitfieldExtractOp)
3380	// d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3381	// countTrailingZeros(mask2) == imm2 - imm + 1
3382	// f = d \| c
3383	// if yes, replace the OR instruction with:
3384	// f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3385
3386	// OR is commutative, check all combinations of operand order and values of
3387	// BiggerPattern, i.e.
3388	// Opd0, Opd1, BiggerPattern=false
3389	// Opd1, Opd0, BiggerPattern=false
3390	// Opd0, Opd1, BiggerPattern=true
3391	// Opd1, Opd0, BiggerPattern=true
3392	// Several of these combinations may match, so check with BiggerPattern=false
3393	// first since that will produce better results by matching more instructions
3394	// and/or inserting fewer extra instructions.
3395	for (int I = `0`; I < `4`; ++I) {
3396
3397	SDValue Dst, Src;
3398	unsigned ImmR, ImmS;
3399	bool BiggerPattern = I / `2`;
3400	SDValue OrOpd0Val = N->getOperand(Num: I % `2`);
3401	SDNode *OrOpd0 = OrOpd0Val.getNode();
3402	SDValue OrOpd1Val = N->getOperand(Num: (I + `1`) % `2`);
3403	SDNode *OrOpd1 = OrOpd1Val.getNode();
3404
3405	unsigned BFXOpc;
3406	int DstLSB, Width;
3407	if (isBitfieldExtractOp(CurDAG, N: OrOpd0, Opc&: BFXOpc, Opd0&: Src, Immr&: ImmR, Imms&: ImmS,
3408	NumberOfIgnoredLowBits, BiggerPattern)) {
3409	// Check that the returned opcode is compatible with the pattern,
3410	// i.e., same type and zero extended (U and not S)
3411	if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) \|\|
3412	(BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3413	continue;
3414
3415	// Compute the width of the bitfield insertion
3416	DstLSB = `0`;
3417	Width = ImmS - ImmR + `1`;
3418	// FIXME: This constraint is to catch bitfield insertion we may
3419	// want to widen the pattern if we want to grab general bitfied
3420	// move case
3421	if (Width <= `0`)
3422	continue;
3423
3424	// If the mask on the insertee is correct, we have a BFXIL operation. We
3425	// can share the ImmR and ImmS values from the already-computed UBFM.
3426	} else if (isBitfieldPositioningOp(CurDAG, Op: OrOpd0Val,
3427	BiggerPattern,
3428	Src, DstLSB, Width)) {
3429	ImmR = (BitWidth - DstLSB) % BitWidth;
3430	ImmS = Width - `1`;
3431	} else
3432	continue;
3433
3434	// Check the second part of the pattern
3435	EVT VT = OrOpd1Val.getValueType();
3436	assert((VT == MVT::i32 \|\| VT == MVT::i64) && "unexpected OR operand");
3437
3438	// Compute the Known Zero for the candidate of the first operand.
3439	// This allows to catch more general case than just looking for
3440	// AND with imm. Indeed, simplify-demanded-bits may have removed
3441	// the AND instruction because it proves it was useless.
3442	KnownBits Known = CurDAG->computeKnownBits(Op: OrOpd1Val);
3443
3444	// Check if there is enough room for the second operand to appear
3445	// in the first one
3446	APInt BitsToBeInserted =
3447	APInt::getBitsSet(numBits: Known.getBitWidth(), loBit: DstLSB, hiBit: DstLSB + Width);
3448
3449	if ((BitsToBeInserted & ~Known.Zero) != `0`)
3450	continue;
3451
3452	// Set the first operand
3453	uint64_t Imm;
3454	if (isOpcWithIntImmediate(N: OrOpd1, Opc: ISD::AND, Imm) &&
3455	isBitfieldDstMask(DstMask: Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3456	// In that case, we can eliminate the AND
3457	Dst = OrOpd1->getOperand(Num: `0`);
3458	else
3459	// Maybe the AND has been removed by simplify-demanded-bits
3460	// or is useful because it discards more bits
3461	Dst = OrOpd1Val;
3462
3463	// Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3464	// with shifted operand is more efficient.
3465	if (tryOrrWithShift(N, OrOpd0: OrOpd0Val, OrOpd1: OrOpd1Val, Src, Dst, CurDAG,
3466	BiggerPattern))
3467	return true;
3468
3469	// both parts match
3470	SDLoc DL(N);
3471	SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(Val: ImmR, DL, VT),
3472	CurDAG->getTargetConstant(Val: ImmS, DL, VT)};
3473	unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3474	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3475	return true;
3476	}
3477
3478	// Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3479	// Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3480	// mask (e.g., 0x000ffff0).
3481	uint64_t Mask0Imm, Mask1Imm;
3482	SDValue And0 = N->getOperand(Num: `0`);
3483	SDValue And1 = N->getOperand(Num: `1`);
3484	if (And0.hasOneUse() && And1.hasOneUse() &&
3485	isOpcWithIntImmediate(N: And0.getNode(), Opc: ISD::AND, Imm&: Mask0Imm) &&
3486	isOpcWithIntImmediate(N: And1.getNode(), Opc: ISD::AND, Imm&: Mask1Imm) &&
3487	APInt (BitWidth, Mask0Imm) == ~APInt (BitWidth, Mask1Imm) &&
3488	(isShiftedMask(Mask: Mask0Imm, VT) \|\| isShiftedMask(Mask: Mask1Imm, VT))) {
3489
3490	// ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3491	// (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3492	// bits to be inserted.
3493	if (isShiftedMask(Mask: Mask0Imm, VT)) {
3494	std::swap(a&: And0, b&: And1);
3495	std::swap(a&: Mask0Imm, b&: Mask1Imm);
3496	}
3497
3498	SDValue Src = And1 ->getOperand(Num: `0`);
3499	SDValue Dst = And0 ->getOperand(Num: `0`);
3500	unsigned LSB = llvm::countr_zero(Val: Mask1Imm);
3501	int Width = BitWidth - APInt (BitWidth, Mask0Imm).popcount();
3502
3503	// The BFXIL inserts the low-order bits from a source register, so right
3504	// shift the needed bits into place.
3505	SDLoc DL(N);
3506	unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3507	uint64_t LsrImm = LSB;
3508	if (Src ->hasOneUse() &&
3509	isOpcWithIntImmediate(N: Src.getNode(), Opc: ISD::SRL, Imm&: LsrImm) &&
3510	(LsrImm + LSB) < BitWidth) {
3511	Src = Src ->getOperand(Num: `0`);
3512	LsrImm += LSB;
3513	}
3514
3515	SDNode *LSR = CurDAG->getMachineNode(
3516	Opcode: ShiftOpc, dl: DL, VT, Op1: Src, Op2: CurDAG->getTargetConstant(Val: LsrImm, DL, VT),
3517	Op3: CurDAG->getTargetConstant(Val: BitWidth - `1`, DL, VT));
3518
3519	// BFXIL is an alias of BFM, so translate to BFM operands.
3520	unsigned ImmR = (BitWidth - LSB) % BitWidth;
3521	unsigned ImmS = Width - `1`;
3522
3523	// Create the BFXIL instruction.
3524	SDValue Ops[] = {Dst, SDValue (LSR, `0`),
3525	CurDAG->getTargetConstant(Val: ImmR, DL, VT),
3526	CurDAG->getTargetConstant(Val: ImmS, DL, VT)};
3527	unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3528	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3529	return true;
3530	}
3531
3532	return false;
3533	}
3534
3535	bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3536	if (N->getOpcode() != ISD::OR)
3537	return false;
3538
3539	APInt NUsefulBits;
3540	getUsefulBits(Op: SDValue (N, `0`), UsefulBits&: NUsefulBits);
3541
3542	// If all bits are not useful, just return UNDEF.
3543	if (!NUsefulBits) {
3544	CurDAG->SelectNodeTo(N, MachineOpc: TargetOpcode::IMPLICIT_DEF, VT: N->getValueType(ResNo: `0`));
3545	return true;
3546	}
3547
3548	if (tryBitfieldInsertOpFromOr(N, UsefulBits: NUsefulBits, CurDAG))
3549	return true;
3550
3551	return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3552	}
3553
3554	/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3555	/// equivalent of a left shift by a constant amount followed by an and masking
3556	/// out a contiguous set of bits.
3557	bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3558	if (N->getOpcode() != ISD::AND)
3559	return false;
3560
3561	EVT VT = N->getValueType(ResNo: `0`);
3562	if (VT != MVT::i32 && VT != MVT::i64)
3563	return false;
3564
3565	SDValue Op0;
3566	int DstLSB, Width;
3567	if (!isBitfieldPositioningOp(CurDAG, Op: SDValue (N, `0`), /BiggerPattern=/false,
3568	Src&: Op0, DstLSB, Width))
3569	return false;
3570
3571	// ImmR is the rotate right amount.
3572	unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3573	// ImmS is the most significant bit of the source to be moved.
3574	unsigned ImmS = Width - `1`;
3575
3576	SDLoc DL(N);
3577	SDValue Ops[] = {Op0, CurDAG->getTargetConstant(Val: ImmR, DL, VT),
3578	CurDAG->getTargetConstant(Val: ImmS, DL, VT)};
3579	unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3580	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3581	return true;
3582	}
3583
3584	/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3585	/// variable shift/rotate instructions.
3586	bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3587	EVT VT = N->getValueType(ResNo: `0`);
3588
3589	unsigned Opc;
3590	switch (N->getOpcode()) {
3591	case ISD::ROTR:
3592	Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3593	break;
3594	case ISD::SHL:
3595	Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3596	break;
3597	case ISD::SRL:
3598	Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3599	break;
3600	case ISD::SRA:
3601	Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3602	break;
3603	default:
3604	return false;
3605	}
3606
3607	uint64_t Size;
3608	uint64_t Bits;
3609	if (VT == MVT::i32) {
3610	Bits = `5`;
3611	Size = `32`;
3612	} else if (VT == MVT::i64) {
3613	Bits = `6`;
3614	Size = `64`;
3615	} else
3616	return false;
3617
3618	SDValue ShiftAmt = N->getOperand(Num: `1`);
3619	SDLoc DL(N);
3620	SDValue NewShiftAmt;
3621
3622	// Skip over an extend of the shift amount.
3623	if (ShiftAmt ->getOpcode() == ISD::ZERO_EXTEND \|\|
3624	ShiftAmt ->getOpcode() == ISD::ANY_EXTEND)
3625	ShiftAmt = ShiftAmt ->getOperand(Num: `0`);
3626
3627	if (ShiftAmt ->getOpcode() == ISD::ADD \|\| ShiftAmt ->getOpcode() == ISD::SUB) {
3628	SDValue Add0 = ShiftAmt ->getOperand(Num: `0`);
3629	SDValue Add1 = ShiftAmt ->getOperand(Num: `1`);
3630	uint64_t Add0Imm;
3631	uint64_t Add1Imm;
3632	if (isIntImmediate(N: Add1, Imm&: Add1Imm) && (Add1Imm % Size == `0`)) {
3633	// If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3634	// to avoid the ADD/SUB.
3635	NewShiftAmt = Add0;
3636	} else if (ShiftAmt ->getOpcode() == ISD::SUB &&
3637	isIntImmediate(N: Add0, Imm&: Add0Imm) && Add0Imm != `0` &&
3638	(Add0Imm % Size == `0`)) {
3639	// If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3640	// to generate a NEG instead of a SUB from a constant.
3641	unsigned NegOpc;
3642	unsigned ZeroReg;
3643	EVT SubVT = ShiftAmt ->getValueType(ResNo: `0`);
3644	if (SubVT == MVT::i32) {
3645	NegOpc = AArch64::SUBWrr;
3646	ZeroReg = AArch64::WZR;
3647	} else {
3648	assert(SubVT == MVT::i64);
3649	NegOpc = AArch64::SUBXrr;
3650	ZeroReg = AArch64::XZR;
3651	}
3652	SDValue Zero =
3653	CurDAG->getCopyFromReg(Chain: CurDAG->getEntryNode(), dl: DL, Reg: ZeroReg, VT: SubVT);
3654	MachineSDNode *Neg =
3655	CurDAG->getMachineNode(Opcode: NegOpc, dl: DL, VT: SubVT, Op1: Zero, Op2: Add1);
3656	NewShiftAmt = SDValue (Neg, `0`);
3657	} else if (ShiftAmt ->getOpcode() == ISD::SUB &&
3658	isIntImmediate(N: Add0, Imm&: Add0Imm) && (Add0Imm % Size == Size - `1`)) {
3659	// If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3660	// to generate a NOT instead of a SUB from a constant.
3661	unsigned NotOpc;
3662	unsigned ZeroReg;
3663	EVT SubVT = ShiftAmt ->getValueType(ResNo: `0`);
3664	if (SubVT == MVT::i32) {
3665	NotOpc = AArch64::ORNWrr;
3666	ZeroReg = AArch64::WZR;
3667	} else {
3668	assert(SubVT == MVT::i64);
3669	NotOpc = AArch64::ORNXrr;
3670	ZeroReg = AArch64::XZR;
3671	}
3672	SDValue Zero =
3673	CurDAG->getCopyFromReg(Chain: CurDAG->getEntryNode(), dl: DL, Reg: ZeroReg, VT: SubVT);
3674	MachineSDNode *Not =
3675	CurDAG->getMachineNode(Opcode: NotOpc, dl: DL, VT: SubVT, Op1: Zero, Op2: Add1);
3676	NewShiftAmt = SDValue (Not, `0`);
3677	} else
3678	return false;
3679	} else {
3680	// If the shift amount is masked with an AND, check that the mask covers the
3681	// bits that are implicitly ANDed off by the above opcodes and if so, skip
3682	// the AND.
3683	uint64_t MaskImm;
3684	if (!isOpcWithIntImmediate(N: ShiftAmt.getNode(), Opc: ISD::AND, Imm&: MaskImm) &&
3685	!isOpcWithIntImmediate(N: ShiftAmt.getNode(), Opc: AArch64ISD::ANDS, Imm&: MaskImm))
3686	return false;
3687
3688	if ((unsigned)llvm::countr_one(Value: MaskImm) < Bits)
3689	return false;
3690
3691	NewShiftAmt = ShiftAmt ->getOperand(Num: `0`);
3692	}
3693
3694	// Narrow/widen the shift amount to match the size of the shift operation.
3695	if (VT == MVT::i32)
3696	NewShiftAmt = narrowIfNeeded(CurDAG, N: NewShiftAmt);
3697	else if (VT == MVT::i64 && NewShiftAmt->getValueType(`0`) == MVT::i32) {
3698	SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3699	MachineSDNode *Ext = CurDAG->getMachineNode(
3700	AArch64::SUBREG_TO_REG, DL, VT,
3701	CurDAG->getTargetConstant(`0`, DL, MVT::i64), NewShiftAmt, SubReg);
3702	NewShiftAmt = SDValue (Ext, `0`);
3703	}
3704
3705	SDValue Ops[] = {N->getOperand(Num: `0`), NewShiftAmt};
3706	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3707	return true;
3708	}
3709
3710	static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N,
3711	SDValue &FixedPos,
3712	unsigned RegWidth,
3713	bool isReciprocal) {
3714	APFloat FVal(`0.0`);
3715	if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Val&: N))
3716	FVal = CN->getValueAPF();
3717	else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(Val&: N)) {
3718	// Some otherwise illegal constants are allowed in this case.
3719	if (LN->getOperand(Num: `1`).getOpcode() != AArch64ISD::ADDlow \|\|
3720	!isa<ConstantPoolSDNode>(Val: LN->getOperand(Num: `1`)->getOperand(Num: `1`)))
3721	return false;
3722
3723	ConstantPoolSDNode *CN =
3724	dyn_cast<ConstantPoolSDNode>(Val: LN->getOperand(Num: `1`)->getOperand(Num: `1`));
3725	FVal = cast<ConstantFP>(Val: CN->getConstVal())->getValueAPF();
3726	} else
3727	return false;
3728
3729	// An FCVT[SU] instruction performs: convertToInt(Val 2^fbits) where fbits*
3730	// is between 1 and 32 for a destination w-register, or 1 and 64 for an
3731	// x-register.
3732	//
3733	// By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
3734	// want THIS_NODE to be 2^fbits. This is much easier to deal with using
3735	// integers.
3736	bool IsExact;
3737
3738	if (isReciprocal)
3739	if (!FVal.getExactInverse(inv: &FVal))
3740	return false;
3741
3742	// fbits is between 1 and 64 in the worst-case, which means the fmul
3743	// could have 2^64 as an actual operand. Need 65 bits of precision.
3744	APSInt IntVal(`65`, true);
3745	FVal.convertToInteger(Result&: IntVal, RM: APFloat::rmTowardZero, IsExact: &IsExact);
3746
3747	// N.b. isPowerOf2 also checks for > 0.
3748	if (!IsExact \|\| !IntVal.isPowerOf2())
3749	return false;
3750	unsigned FBits = IntVal.logBase2();
3751
3752	// Checks above should have guaranteed that we haven't lost information in
3753	// finding FBits, but it must still be in range.
3754	if (FBits == `0` \|\| FBits > RegWidth) return false;
3755
3756	FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
3757	return true;
3758	}
3759
3760	bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
3761	unsigned RegWidth) {
3762	return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3763	isReciprocal: false);
3764	}
3765
3766	bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
3767	SDValue &FixedPos,
3768	unsigned RegWidth) {
3769	return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3770	isReciprocal: true);
3771	}
3772
3773	// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
3774	// of the string and obtains the integer values from them and combines these
3775	// into a single value to be used in the MRS/MSR instruction.
3776	static int getIntOperandFromRegisterString(StringRef RegString) {
3777	SmallVector<StringRef, `5`> Fields;
3778	RegString.split(A&: Fields, Separator: `':'`);
3779
3780	if (Fields.size() == `1`)
3781	return -`1`;
3782
3783	assert(Fields.size() == `5`
3784	&& "Invalid number of fields in read register string");
3785
3786	SmallVector<int, `5`> Ops;
3787	bool AllIntFields = true;
3788
3789	for (StringRef Field : Fields) {
3790	unsigned IntField;
3791	AllIntFields &= !Field.getAsInteger(Radix: `10`, Result&: IntField);
3792	Ops.push_back(Elt: IntField);
3793	}
3794
3795	assert(AllIntFields &&
3796	"Unexpected non-integer value in special register string.");
3797	(void)AllIntFields;
3798
3799	// Need to combine the integer fields of the string into a single value
3800	// based on the bit encoding of MRS/MSR instruction.
3801	return (Ops [`0`] << `14`) \| (Ops [`1`] << `11`) \| (Ops [`2`] << `7`) \|
3802	(Ops [`3`] << `3`) \| (Ops [`4`]);
3803	}
3804
3805	// Lower the read_register intrinsic to an MRS instruction node if the special
3806	// register string argument is either of the form detailed in the ALCE (the
3807	// form described in getIntOperandsFromRegsterString) or is a named register
3808	// known by the MRS SysReg mapper.
3809	bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
3810	const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: `1`));
3811	const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: `0`));
3812	SDLoc DL(N);
3813
3814	bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
3815
3816	unsigned Opcode64Bit = AArch64::MRS;
3817	int Imm = getIntOperandFromRegisterString(RegString: RegString->getString());
3818	if (Imm == -`1`) {
3819	// No match, Use the sysreg mapper to map the remaining possible strings to
3820	// the value for the register to be used for the instruction operand.
3821	const auto *TheReg =
3822	AArch64SysReg::lookupSysRegByName(RegString->getString());
3823	if (TheReg && TheReg->Readable &&
3824	TheReg->haveFeatures(ActiveFeatures: Subtarget->getFeatureBits()))
3825	Imm = TheReg->Encoding;
3826	else
3827	Imm = AArch64SysReg::parseGenericRegister(Name: RegString->getString());
3828
3829	if (Imm == -`1`) {
3830	// Still no match, see if this is "pc" or give up.
3831	if (!ReadIs128Bit && RegString->getString() == "pc") {
3832	Opcode64Bit = AArch64::ADR;
3833	Imm = `0`;
3834	} else {
3835	return false;
3836	}
3837	}
3838	}
3839
3840	SDValue InChain = N->getOperand(Num: `0`);
3841	SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
3842	if (!ReadIs128Bit) {
3843	CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other / Chain /,
3844	{SysRegImm, InChain});
3845	} else {
3846	SDNode *MRRS = CurDAG->getMachineNode(
3847	AArch64::MRRS, DL,
3848	{MVT::Untyped / XSeqPair /, MVT::Other / Chain /},
3849	{SysRegImm, InChain});
3850
3851	// Sysregs are not endian. The even register always contains the low half
3852	// of the register.
3853	SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
3854	SDValue(MRRS, `0`));
3855	SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
3856	SDValue(MRRS, `0`));
3857	SDValue OutChain = SDValue (MRRS, `1`);
3858
3859	ReplaceUses(F: SDValue (N, `0`), T: Lo);
3860	ReplaceUses(F: SDValue (N, `1`), T: Hi);
3861	ReplaceUses(F: SDValue (N, `2`), T: OutChain);
3862	};
3863	return true;
3864	}
3865
3866	// Lower the write_register intrinsic to an MSR instruction node if the special
3867	// register string argument is either of the form detailed in the ALCE (the
3868	// form described in getIntOperandsFromRegsterString) or is a named register
3869	// known by the MSR SysReg mapper.
3870	bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
3871	const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: `1`));
3872	const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: `0`));
3873	SDLoc DL(N);
3874
3875	bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
3876
3877	if (!WriteIs128Bit) {
3878	// Check if the register was one of those allowed as the pstatefield value
3879	// in the MSR (immediate) instruction. To accept the values allowed in the
3880	// pstatefield for the MSR (immediate) instruction, we also require that an
3881	// immediate value has been provided as an argument, we know that this is
3882	// the case as it has been ensured by semantic checking.
3883	auto trySelectPState = [&](auto PMapper, unsigned State) {
3884	if (PMapper) {
3885	assert(isa<ConstantSDNode>(N->getOperand(`2`)) &&
3886	"Expected a constant integer expression.");
3887	unsigned Reg = PMapper->Encoding;
3888	uint64_t Immed = N->getConstantOperandVal(Num: `2`);
3889	CurDAG->SelectNodeTo(
3890	N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
3891	CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(`0`));
3892	return true;
3893	}
3894	return false;
3895	};
3896
3897	if (trySelectPState(
3898	AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
3899	AArch64::MSRpstateImm4))
3900	return true;
3901	if (trySelectPState(
3902	AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
3903	AArch64::MSRpstateImm1))
3904	return true;
3905	}
3906
3907	int Imm = getIntOperandFromRegisterString(RegString: RegString->getString());
3908	if (Imm == -`1`) {
3909	// Use the sysreg mapper to attempt to map the remaining possible strings
3910	// to the value for the register to be used for the MSR (register)
3911	// instruction operand.
3912	auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
3913	if (TheReg && TheReg->Writeable &&
3914	TheReg->haveFeatures(ActiveFeatures: Subtarget->getFeatureBits()))
3915	Imm = TheReg->Encoding;
3916	else
3917	Imm = AArch64SysReg::parseGenericRegister(Name: RegString->getString());
3918
3919	if (Imm == -`1`)
3920	return false;
3921	}
3922
3923	SDValue InChain = N->getOperand(Num: `0`);
3924	if (!WriteIs128Bit) {
3925	CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
3926	CurDAG->getTargetConstant(Imm, DL, MVT::i32),
3927	N->getOperand(`2`), InChain);
3928	} else {
3929	// No endian swap. The lower half always goes into the even subreg, and the
3930	// higher half always into the odd supreg.
3931	SDNode *Pair = CurDAG->getMachineNode(
3932	TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped / XSeqPair /,
3933	{CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
3934	MVT::i32),
3935	N->getOperand(`2`),
3936	CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
3937	N->getOperand(`3`),
3938	CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
3939
3940	CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
3941	CurDAG->getTargetConstant(Imm, DL, MVT::i32),
3942	SDValue(Pair, `0`), InChain);
3943	}
3944
3945	return true;
3946	}
3947
3948	/// We've got special pseudo-instructions for these
3949	bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3950	unsigned Opcode;
3951	EVT MemTy = cast<MemSDNode>(Val: N)->getMemoryVT();
3952
3953	// Leave IR for LSE if subtarget supports it.
3954	if (Subtarget->hasLSE()) return false;
3955
3956	if (MemTy == MVT::i8)
3957	Opcode = AArch64::CMP_SWAP_8;
3958	else if (MemTy == MVT::i16)
3959	Opcode = AArch64::CMP_SWAP_16;
3960	else if (MemTy == MVT::i32)
3961	Opcode = AArch64::CMP_SWAP_32;
3962	else if (MemTy == MVT::i64)
3963	Opcode = AArch64::CMP_SWAP_64;
3964	else
3965	llvm_unreachable("Unknown AtomicCmpSwap type");
3966
3967	MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
3968	SDValue Ops[] = {N->getOperand(Num: `1`), N->getOperand(Num: `2`), N->getOperand(Num: `3`),
3969	N->getOperand(Num: `0`)};
3970	SDNode *CmpSwap = CurDAG->getMachineNode(
3971	Opcode, SDLoc(N),
3972	CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
3973
3974	MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
3975	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: CmpSwap), NewMemRefs: {MemOp});
3976
3977	ReplaceUses(F: SDValue (N, `0`), T: SDValue (CmpSwap, `0`));
3978	ReplaceUses(F: SDValue (N, `1`), T: SDValue (CmpSwap, `2`));
3979	CurDAG->RemoveDeadNode(N);
3980
3981	return true;
3982	}
3983
3984	bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
3985	SDValue &Shift) {
3986	if (!isa<ConstantSDNode>(Val: N))
3987	return false;
3988
3989	SDLoc DL(N);
3990	uint64_t Val = cast<ConstantSDNode>(Val&: N)
3991	->getAPIntValue()
3992	.trunc(width: VT.getFixedSizeInBits())
3993	.getZExtValue();
3994
3995	switch (VT.SimpleTy) {
3996	case MVT::i8:
3997	// All immediates are supported.
3998	Shift = CurDAG->getTargetConstant(`0`, DL, MVT::i32);
3999	Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4000	return true;
4001	case MVT::i16:
4002	case MVT::i32:
4003	case MVT::i64:
4004	// Support 8bit unsigned immediates.
4005	if (Val <= `255`) {
4006	Shift = CurDAG->getTargetConstant(`0`, DL, MVT::i32);
4007	Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4008	return true;
4009	}
4010	// Support 16bit unsigned immediates that are a multiple of 256.
4011	if (Val <= `65280` && Val % `256` == `0`) {
4012	Shift = CurDAG->getTargetConstant(`8`, DL, MVT::i32);
4013	Imm = CurDAG->getTargetConstant(Val >> `8`, DL, MVT::i32);
4014	return true;
4015	}
4016	break;
4017	default:
4018	break;
4019	}
4020
4021	return false;
4022	}
4023
4024	bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4025	SDValue &Imm, SDValue &Shift,
4026	bool Negate) {
4027	if (!isa<ConstantSDNode>(Val: N))
4028	return false;
4029
4030	SDLoc DL(N);
4031	int64_t Val = cast<ConstantSDNode>(Val&: N)
4032	->getAPIntValue()
4033	.trunc(width: VT.getFixedSizeInBits())
4034	.getSExtValue();
4035
4036	if (Negate)
4037	Val = -Val;
4038
4039	// Signed saturating instructions treat their immediate operand as unsigned,
4040	// whereas the related intrinsics define their operands to be signed. This
4041	// means we can only use the immediate form when the operand is non-negative.
4042	if (Val < `0`)
4043	return false;
4044
4045	switch (VT.SimpleTy) {
4046	case MVT::i8:
4047	// All positive immediates are supported.
4048	Shift = CurDAG->getTargetConstant(`0`, DL, MVT::i32);
4049	Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4050	return true;
4051	case MVT::i16:
4052	case MVT::i32:
4053	case MVT::i64:
4054	// Support 8bit positive immediates.
4055	if (Val <= `255`) {
4056	Shift = CurDAG->getTargetConstant(`0`, DL, MVT::i32);
4057	Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4058	return true;
4059	}
4060	// Support 16bit positive immediates that are a multiple of 256.
4061	if (Val <= `65280` && Val % `256` == `0`) {
4062	Shift = CurDAG->getTargetConstant(`8`, DL, MVT::i32);
4063	Imm = CurDAG->getTargetConstant(Val >> `8`, DL, MVT::i32);
4064	return true;
4065	}
4066	break;
4067	default:
4068	break;
4069	}
4070
4071	return false;
4072	}
4073
4074	bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4075	SDValue &Shift) {
4076	if (!isa<ConstantSDNode>(Val: N))
4077	return false;
4078
4079	SDLoc DL(N);
4080	int64_t Val = cast<ConstantSDNode>(Val&: N)
4081	->getAPIntValue()
4082	.trunc(width: VT.getFixedSizeInBits())
4083	.getSExtValue();
4084
4085	switch (VT.SimpleTy) {
4086	case MVT::i8:
4087	// All immediates are supported.
4088	Shift = CurDAG->getTargetConstant(`0`, DL, MVT::i32);
4089	Imm = CurDAG->getTargetConstant(Val & `0xFF`, DL, MVT::i32);
4090	return true;
4091	case MVT::i16:
4092	case MVT::i32:
4093	case MVT::i64:
4094	// Support 8bit signed immediates.
4095	if (Val >= -`128` && Val <= `127`) {
4096	Shift = CurDAG->getTargetConstant(`0`, DL, MVT::i32);
4097	Imm = CurDAG->getTargetConstant(Val & `0xFF`, DL, MVT::i32);
4098	return true;
4099	}
4100	// Support 16bit signed immediates that are a multiple of 256.
4101	if (Val >= -`32768` && Val <= `32512` && Val % `256` == `0`) {
4102	Shift = CurDAG->getTargetConstant(`8`, DL, MVT::i32);
4103	Imm = CurDAG->getTargetConstant((Val >> `8`) & `0xFF`, DL, MVT::i32);
4104	return true;
4105	}
4106	break;
4107	default:
4108	break;
4109	}
4110
4111	return false;
4112	}
4113
4114	bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4115	if (auto CNode = dyn_cast<ConstantSDNode>(Val&: N)) {
4116	int64_t ImmVal = CNode->getSExtValue();
4117	SDLoc DL(N);
4118	if (ImmVal >= -`128` && ImmVal < `128`) {
4119	Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4120	return true;
4121	}
4122	}
4123	return false;
4124	}
4125
4126	bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4127	if (auto CNode = dyn_cast<ConstantSDNode>(Val&: N)) {
4128	uint64_t ImmVal = CNode->getZExtValue();
4129
4130	switch (VT.SimpleTy) {
4131	case MVT::i8:
4132	ImmVal &= `0xFF`;
4133	break;
4134	case MVT::i16:
4135	ImmVal &= `0xFFFF`;
4136	break;
4137	case MVT::i32:
4138	ImmVal &= `0xFFFFFFFF`;
4139	break;
4140	case MVT::i64:
4141	break;
4142	default:
4143	llvm_unreachable("Unexpected type");
4144	}
4145
4146	if (ImmVal < `256`) {
4147	Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4148	return true;
4149	}
4150	}
4151	return false;
4152	}
4153
4154	bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4155	bool Invert) {
4156	if (auto CNode = dyn_cast<ConstantSDNode>(Val&: N)) {
4157	uint64_t ImmVal = CNode->getZExtValue();
4158	SDLoc DL(N);
4159
4160	if (Invert)
4161	ImmVal = ~ImmVal;
4162
4163	// Shift mask depending on type size.
4164	switch (VT.SimpleTy) {
4165	case MVT::i8:
4166	ImmVal &= `0xFF`;
4167	ImmVal \|= ImmVal << `8`;
4168	ImmVal \|= ImmVal << `16`;
4169	ImmVal \|= ImmVal << `32`;
4170	break;
4171	case MVT::i16:
4172	ImmVal &= `0xFFFF`;
4173	ImmVal \|= ImmVal << `16`;
4174	ImmVal \|= ImmVal << `32`;
4175	break;
4176	case MVT::i32:
4177	ImmVal &= `0xFFFFFFFF`;
4178	ImmVal \|= ImmVal << `32`;
4179	break;
4180	case MVT::i64:
4181	break;
4182	default:
4183	llvm_unreachable("Unexpected type");
4184	}
4185
4186	uint64_t encoding;
4187	if (AArch64_AM::processLogicalImmediate(Imm: ImmVal, RegSize: `64`, Encoding&: encoding)) {
4188	Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
4189	return true;
4190	}
4191	}
4192	return false;
4193	}
4194
4195	// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4196	// Rather than attempt to normalise everything we can sometimes saturate the
4197	// shift amount during selection. This function also allows for consistent
4198	// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4199	// required by the instructions.
4200	bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4201	uint64_t High, bool AllowSaturation,
4202	SDValue &Imm) {
4203	if (auto *CN = dyn_cast<ConstantSDNode>(Val&: N)) {
4204	uint64_t ImmVal = CN->getZExtValue();
4205
4206	// Reject shift amounts that are too small.
4207	if (ImmVal < Low)
4208	return false;
4209
4210	// Reject or saturate shift amounts that are too big.
4211	if (ImmVal > High) {
4212	if (!AllowSaturation)
4213	return false;
4214	ImmVal = High;
4215	}
4216
4217	Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4218	return true;
4219	}
4220
4221	return false;
4222	}
4223
4224	bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4225	// tagp(FrameIndex, IRGstack, tag_offset):
4226	// since the offset between FrameIndex and IRGstack is a compile-time
4227	// constant, this can be lowered to a single ADDG instruction.
4228	if (!(isa<FrameIndexSDNode>(Val: N->getOperand(Num: `1`)))) {
4229	return false;
4230	}
4231
4232	SDValue IRG_SP = N->getOperand(Num: `2`);
4233	if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN \|\|
4234	IRG_SP->getConstantOperandVal(`1`) != Intrinsic::aarch64_irg_sp) {
4235	return false;
4236	}
4237
4238	const TargetLowering *TLI = getTargetLowering();
4239	SDLoc DL(N);
4240	int FI = cast<FrameIndexSDNode>(Val: N->getOperand(Num: `1`))->getIndex();
4241	SDValue FiOp = CurDAG->getTargetFrameIndex(
4242	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
4243	int TagOffset = N->getConstantOperandVal(Num: `3`);
4244
4245	SDNode *Out = CurDAG->getMachineNode(
4246	AArch64::TAGPstack, DL, MVT::i64,
4247	{FiOp, CurDAG->getTargetConstant(`0`, DL, MVT::i64), N->getOperand(`2`),
4248	CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4249	ReplaceNode(F: N, T: Out);
4250	return true;
4251	}
4252
4253	void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4254	assert(isa<ConstantSDNode>(N->getOperand(`3`)) &&
4255	"llvm.aarch64.tagp third argument must be an immediate");
4256	if (trySelectStackSlotTagP(N))
4257	return;
4258	// FIXME: above applies in any case when offset between Op1 and Op2 is a
4259	// compile-time constant, not just for stack allocations.
4260
4261	// General case for unrelated pointers in Op1 and Op2.
4262	SDLoc DL(N);
4263	int TagOffset = N->getConstantOperandVal(Num: `3`);
4264	SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4265	{N->getOperand(`1`), N->getOperand(`2`)});
4266	SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4267	{SDValue(N1, `0`), N->getOperand(`2`)});
4268	SDNode *N3 = CurDAG->getMachineNode(
4269	AArch64::ADDG, DL, MVT::i64,
4270	{SDValue(N2, `0`), CurDAG->getTargetConstant(`0`, DL, MVT::i64),
4271	CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4272	ReplaceNode(F: N, T: N3);
4273	}
4274
4275	bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4276	assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4277
4278	// Bail when not a "cast" like insert_subvector.
4279	if (N->getConstantOperandVal(Num: `2`) != `0`)
4280	return false;
4281	if (!N->getOperand(Num: `0`).isUndef())
4282	return false;
4283
4284	// Bail when normal isel should do the job.
4285	EVT VT = N->getValueType(ResNo: `0`);
4286	EVT InVT = N->getOperand(Num: `1`).getValueType();
4287	if (VT.isFixedLengthVector() \|\| InVT.isScalableVector())
4288	return false;
4289	if (InVT.getSizeInBits() <= `128`)
4290	return false;
4291
4292	// NOTE: We can only get here when doing fixed length SVE code generation.
4293	// We do manual selection because the types involved are not linked to real
4294	// registers (despite being legal) and must be coerced into SVE registers.
4295
4296	assert(VT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&
4297	"Expected to insert into a packed scalable vector!");
4298
4299	SDLoc DL(N);
4300	auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4301	ReplaceNode(F: N, T: CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4302	N->getOperand(Num: `1`), RC));
4303	return true;
4304	}
4305
4306	bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4307	assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4308
4309	// Bail when not a "cast" like extract_subvector.
4310	if (N->getConstantOperandVal(Num: `1`) != `0`)
4311	return false;
4312
4313	// Bail when normal isel can do the job.
4314	EVT VT = N->getValueType(ResNo: `0`);
4315	EVT InVT = N->getOperand(Num: `0`).getValueType();
4316	if (VT.isScalableVector() \|\| InVT.isFixedLengthVector())
4317	return false;
4318	if (VT.getSizeInBits() <= `128`)
4319	return false;
4320
4321	// NOTE: We can only get here when doing fixed length SVE code generation.
4322	// We do manual selection because the types involved are not linked to real
4323	// registers (despite being legal) and must be coerced into SVE registers.
4324
4325	assert(InVT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&
4326	"Expected to extract from a packed scalable vector!");
4327
4328	SDLoc DL(N);
4329	auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4330	ReplaceNode(F: N, T: CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4331	N->getOperand(Num: `0`), RC));
4332	return true;
4333	}
4334
4335	bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4336	assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4337
4338	SDValue N0 = N->getOperand(Num: `0`);
4339	SDValue N1 = N->getOperand(Num: `1`);
4340	EVT VT = N->getValueType(ResNo: `0`);
4341
4342	// Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4343	// Rotate by a constant is a funnel shift in IR which is exanded to
4344	// an OR with shifted operands.
4345	// We do the following transform:
4346	// OR N0, N1 -> xar (x, y, imm)
4347	// Where:
4348	// N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4349	// N0 = SHL_PRED true, V, splat(bits-imm)
4350	// V = (xor x, y)
4351	if (VT.isScalableVector() && Subtarget->hasSVE2orSME()) {
4352	if (N0.getOpcode() != AArch64ISD::SHL_PRED \|\|
4353	N1.getOpcode() != AArch64ISD::SRL_PRED)
4354	std::swap(a&: N0, b&: N1);
4355	if (N0.getOpcode() != AArch64ISD::SHL_PRED \|\|
4356	N1.getOpcode() != AArch64ISD::SRL_PRED)
4357	return false;
4358
4359	auto TLI = static_cast<const* AArch64TargetLowering *>(getTargetLowering());
4360	if (!TLI->isAllActivePredicate(DAG&: *CurDAG, N: N0.getOperand(i: `0`)) \|\|
4361	!TLI->isAllActivePredicate(DAG&: *CurDAG, N: N1.getOperand(i: `0`)))
4362	return false;
4363
4364	SDValue XOR = N0.getOperand(i: `1`);
4365	if (XOR.getOpcode() != ISD::XOR \|\| XOR != N1.getOperand(i: `1`))
4366	return false;
4367
4368	APInt ShlAmt, ShrAmt;
4369	if (!ISD::isConstantSplatVector(N: N0.getOperand(i: `2`).getNode(), SplatValue&: ShlAmt) \|\|
4370	!ISD::isConstantSplatVector(N: N1.getOperand(i: `2`).getNode(), SplatValue&: ShrAmt))
4371	return false;
4372
4373	if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4374	return false;
4375
4376	SDLoc DL(N);
4377	SDValue Imm =
4378	CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4379
4380	SDValue Ops[] = {XOR.getOperand(i: `0`), XOR.getOperand(i: `1`), Imm};
4381	if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4382	VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4383	AArch64::XAR_ZZZI_D})) {
4384	CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4385	return true;
4386	}
4387	return false;
4388	}
4389
4390	if (!Subtarget->hasSHA3())
4391	return false;
4392
4393	if (N0 ->getOpcode() != AArch64ISD::VSHL \|\|
4394	N1 ->getOpcode() != AArch64ISD::VLSHR)
4395	return false;
4396
4397	if (N0 ->getOperand(Num: `0`) != N1 ->getOperand(Num: `0`) \|\|
4398	N1 ->getOperand(Num: `0`)->getOpcode() != ISD::XOR)
4399	return false;
4400
4401	SDValue XOR = N0.getOperand(i: `0`);
4402	SDValue R1 = XOR.getOperand(i: `0`);
4403	SDValue R2 = XOR.getOperand(i: `1`);
4404
4405	unsigned HsAmt = N0.getConstantOperandVal(i: `1`);
4406	unsigned ShAmt = N1.getConstantOperandVal(i: `1`);
4407
4408	SDLoc DL = SDLoc (N0.getOperand(i: `1`));
4409	SDValue Imm = CurDAG->getTargetConstant(
4410	Val: ShAmt, DL, VT: N0.getOperand(i: `1`).getValueType(), isOpaque: false);
4411
4412	if (ShAmt + HsAmt != `64`)
4413	return false;
4414
4415	SDValue Ops[] = {R1, R2, Imm};
4416	CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops);
4417
4418	return true;
4419	}
4420
4421	void AArch64DAGToDAGISel::Select(SDNode *Node) {
4422	// If we have a custom node, we already have selected!
4423	if (Node->isMachineOpcode()) {
4424	LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4425	Node->setNodeId(-`1`);
4426	return;
4427	}
4428
4429	// Few custom selection stuff.
4430	EVT VT = Node->getValueType(ResNo: `0`);
4431
4432	switch (Node->getOpcode()) {
4433	default:
4434	break;
4435
4436	case ISD::ATOMIC_CMP_SWAP:
4437	if (SelectCMP_SWAP(N: Node))
4438	return;
4439	break;
4440
4441	case ISD::READ_REGISTER:
4442	case AArch64ISD::MRRS:
4443	if (tryReadRegister(N: Node))
4444	return;
4445	break;
4446
4447	case ISD::WRITE_REGISTER:
4448	case AArch64ISD::MSRR:
4449	if (tryWriteRegister(N: Node))
4450	return;
4451	break;
4452
4453	case ISD::LOAD: {
4454	// Try to select as an indexed load. Fall through to normal processing
4455	// if we can't.
4456	if (tryIndexedLoad(N: Node))
4457	return;
4458	break;
4459	}
4460
4461	case ISD::SRL:
4462	case ISD::AND:
4463	case ISD::SRA:
4464	case ISD::SIGN_EXTEND_INREG:
4465	if (tryBitfieldExtractOp(N: Node))
4466	return;
4467	if (tryBitfieldInsertInZeroOp(N: Node))
4468	return;
4469	[[fallthrough]];
4470	case ISD::ROTR:
4471	case ISD::SHL:
4472	if (tryShiftAmountMod(N: Node))
4473	return;
4474	break;
4475
4476	case ISD::SIGN_EXTEND:
4477	if (tryBitfieldExtractOpFromSExt(N: Node))
4478	return;
4479	break;
4480
4481	case ISD::OR:
4482	if (tryBitfieldInsertOp(N: Node))
4483	return;
4484	if (trySelectXAR(N: Node))
4485	return;
4486	break;
4487
4488	case ISD::EXTRACT_SUBVECTOR: {
4489	if (trySelectCastScalableToFixedLengthVector(N: Node))
4490	return;
4491	break;
4492	}
4493
4494	case ISD::INSERT_SUBVECTOR: {
4495	if (trySelectCastFixedLengthToScalableVector(N: Node))
4496	return;
4497	break;
4498	}
4499
4500	case ISD::Constant: {
4501	// Materialize zero constants as copies from WZR/XZR. This allows
4502	// the coalescer to propagate these into other instructions.
4503	ConstantSDNode *ConstNode = cast<ConstantSDNode>(Val: Node);
4504	if (ConstNode->isZero()) {
4505	if (VT == MVT::i32) {
4506	SDValue New = CurDAG->getCopyFromReg(
4507	CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4508	ReplaceNode(F: Node, T: New.getNode());
4509	return;
4510	} else if (VT == MVT::i64) {
4511	SDValue New = CurDAG->getCopyFromReg(
4512	CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4513	ReplaceNode(F: Node, T: New.getNode());
4514	return;
4515	}
4516	}
4517	break;
4518	}
4519
4520	case ISD::FrameIndex: {
4521	// Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4522	int FI = cast<FrameIndexSDNode>(Val: Node)->getIndex();
4523	unsigned Shifter = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: `0`);
4524	const TargetLowering *TLI = getTargetLowering();
4525	SDValue TFI = CurDAG->getTargetFrameIndex(
4526	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
4527	SDLoc DL(Node);
4528	SDValue Ops[] = { TFI, CurDAG->getTargetConstant(`0`, DL, MVT::i32),
4529	CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4530	CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4531	return;
4532	}
4533	case ISD::INTRINSIC_W_CHAIN: {
4534	unsigned IntNo = Node->getConstantOperandVal(Num: `1`);
4535	switch (IntNo) {
4536	default:
4537	break;
4538	case Intrinsic::aarch64_ldaxp:
4539	case Intrinsic::aarch64_ldxp: {
4540	unsigned Op =
4541	IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4542	SDValue MemAddr = Node->getOperand(Num: `2`);
4543	SDLoc DL(Node);
4544	SDValue Chain = Node->getOperand(Num: `0`);
4545
4546	SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4547	MVT::Other, MemAddr, Chain);
4548
4549	// Transfer memoperands.
4550	MachineMemOperand *MemOp =
4551	cast<MemIntrinsicSDNode>(Val: Node)->getMemOperand();
4552	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Ld), NewMemRefs: {MemOp});
4553	ReplaceNode(F: Node, T: Ld);
4554	return;
4555	}
4556	case Intrinsic::aarch64_stlxp:
4557	case Intrinsic::aarch64_stxp: {
4558	unsigned Op =
4559	IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4560	SDLoc DL(Node);
4561	SDValue Chain = Node->getOperand(Num: `0`);
4562	SDValue ValLo = Node->getOperand(Num: `2`);
4563	SDValue ValHi = Node->getOperand(Num: `3`);
4564	SDValue MemAddr = Node->getOperand(Num: `4`);
4565
4566	// Place arguments in the right order.
4567	SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4568
4569	SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4570	// Transfer memoperands.
4571	MachineMemOperand *MemOp =
4572	cast<MemIntrinsicSDNode>(Val: Node)->getMemOperand();
4573	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
4574
4575	ReplaceNode(F: Node, T: St);
4576	return;
4577	}
4578	case Intrinsic::aarch64_neon_ld1x2:
4579	if (VT == MVT::v8i8) {
4580	SelectLoad(Node, `2`, AArch64::LD1Twov8b, AArch64::dsub0);
4581	return;
4582	} else if (VT == MVT::v16i8) {
4583	SelectLoad(Node, `2`, AArch64::LD1Twov16b, AArch64::qsub0);
4584	return;
4585	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
4586	SelectLoad(Node, `2`, AArch64::LD1Twov4h, AArch64::dsub0);
4587	return;
4588	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
4589	SelectLoad(Node, `2`, AArch64::LD1Twov8h, AArch64::qsub0);
4590	return;
4591	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
4592	SelectLoad(Node, `2`, AArch64::LD1Twov2s, AArch64::dsub0);
4593	return;
4594	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
4595	SelectLoad(Node, `2`, AArch64::LD1Twov4s, AArch64::qsub0);
4596	return;
4597	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
4598	SelectLoad(Node, `2`, AArch64::LD1Twov1d, AArch64::dsub0);
4599	return;
4600	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
4601	SelectLoad(Node, `2`, AArch64::LD1Twov2d, AArch64::qsub0);
4602	return;
4603	}
4604	break;
4605	case Intrinsic::aarch64_neon_ld1x3:
4606	if (VT == MVT::v8i8) {
4607	SelectLoad(Node, `3`, AArch64::LD1Threev8b, AArch64::dsub0);
4608	return;
4609	} else if (VT == MVT::v16i8) {
4610	SelectLoad(Node, `3`, AArch64::LD1Threev16b, AArch64::qsub0);
4611	return;
4612	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
4613	SelectLoad(Node, `3`, AArch64::LD1Threev4h, AArch64::dsub0);
4614	return;
4615	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
4616	SelectLoad(Node, `3`, AArch64::LD1Threev8h, AArch64::qsub0);
4617	return;
4618	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
4619	SelectLoad(Node, `3`, AArch64::LD1Threev2s, AArch64::dsub0);
4620	return;
4621	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
4622	SelectLoad(Node, `3`, AArch64::LD1Threev4s, AArch64::qsub0);
4623	return;
4624	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
4625	SelectLoad(Node, `3`, AArch64::LD1Threev1d, AArch64::dsub0);
4626	return;
4627	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
4628	SelectLoad(Node, `3`, AArch64::LD1Threev2d, AArch64::qsub0);
4629	return;
4630	}
4631	break;
4632	case Intrinsic::aarch64_neon_ld1x4:
4633	if (VT == MVT::v8i8) {
4634	SelectLoad(Node, `4`, AArch64::LD1Fourv8b, AArch64::dsub0);
4635	return;
4636	} else if (VT == MVT::v16i8) {
4637	SelectLoad(Node, `4`, AArch64::LD1Fourv16b, AArch64::qsub0);
4638	return;
4639	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
4640	SelectLoad(Node, `4`, AArch64::LD1Fourv4h, AArch64::dsub0);
4641	return;
4642	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
4643	SelectLoad(Node, `4`, AArch64::LD1Fourv8h, AArch64::qsub0);
4644	return;
4645	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
4646	SelectLoad(Node, `4`, AArch64::LD1Fourv2s, AArch64::dsub0);
4647	return;
4648	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
4649	SelectLoad(Node, `4`, AArch64::LD1Fourv4s, AArch64::qsub0);
4650	return;
4651	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
4652	SelectLoad(Node, `4`, AArch64::LD1Fourv1d, AArch64::dsub0);
4653	return;
4654	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
4655	SelectLoad(Node, `4`, AArch64::LD1Fourv2d, AArch64::qsub0);
4656	return;
4657	}
4658	break;
4659	case Intrinsic::aarch64_neon_ld2:
4660	if (VT == MVT::v8i8) {
4661	SelectLoad(Node, `2`, AArch64::LD2Twov8b, AArch64::dsub0);
4662	return;
4663	} else if (VT == MVT::v16i8) {
4664	SelectLoad(Node, `2`, AArch64::LD2Twov16b, AArch64::qsub0);
4665	return;
4666	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
4667	SelectLoad(Node, `2`, AArch64::LD2Twov4h, AArch64::dsub0);
4668	return;
4669	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
4670	SelectLoad(Node, `2`, AArch64::LD2Twov8h, AArch64::qsub0);
4671	return;
4672	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
4673	SelectLoad(Node, `2`, AArch64::LD2Twov2s, AArch64::dsub0);
4674	return;
4675	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
4676	SelectLoad(Node, `2`, AArch64::LD2Twov4s, AArch64::qsub0);
4677	return;
4678	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
4679	SelectLoad(Node, `2`, AArch64::LD1Twov1d, AArch64::dsub0);
4680	return;
4681	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
4682	SelectLoad(Node, `2`, AArch64::LD2Twov2d, AArch64::qsub0);
4683	return;
4684	}
4685	break;
4686	case Intrinsic::aarch64_neon_ld3:
4687	if (VT == MVT::v8i8) {
4688	SelectLoad(Node, `3`, AArch64::LD3Threev8b, AArch64::dsub0);
4689	return;
4690	} else if (VT == MVT::v16i8) {
4691	SelectLoad(Node, `3`, AArch64::LD3Threev16b, AArch64::qsub0);
4692	return;
4693	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
4694	SelectLoad(Node, `3`, AArch64::LD3Threev4h, AArch64::dsub0);
4695	return;
4696	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
4697	SelectLoad(Node, `3`, AArch64::LD3Threev8h, AArch64::qsub0);
4698	return;
4699	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
4700	SelectLoad(Node, `3`, AArch64::LD3Threev2s, AArch64::dsub0);
4701	return;
4702	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
4703	SelectLoad(Node, `3`, AArch64::LD3Threev4s, AArch64::qsub0);
4704	return;
4705	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
4706	SelectLoad(Node, `3`, AArch64::LD1Threev1d, AArch64::dsub0);
4707	return;
4708	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
4709	SelectLoad(Node, `3`, AArch64::LD3Threev2d, AArch64::qsub0);
4710	return;
4711	}
4712	break;
4713	case Intrinsic::aarch64_neon_ld4:
4714	if (VT == MVT::v8i8) {
4715	SelectLoad(Node, `4`, AArch64::LD4Fourv8b, AArch64::dsub0);
4716	return;
4717	} else if (VT == MVT::v16i8) {
4718	SelectLoad(Node, `4`, AArch64::LD4Fourv16b, AArch64::qsub0);
4719	return;
4720	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
4721	SelectLoad(Node, `4`, AArch64::LD4Fourv4h, AArch64::dsub0);
4722	return;
4723	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
4724	SelectLoad(Node, `4`, AArch64::LD4Fourv8h, AArch64::qsub0);
4725	return;
4726	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
4727	SelectLoad(Node, `4`, AArch64::LD4Fourv2s, AArch64::dsub0);
4728	return;
4729	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
4730	SelectLoad(Node, `4`, AArch64::LD4Fourv4s, AArch64::qsub0);
4731	return;
4732	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
4733	SelectLoad(Node, `4`, AArch64::LD1Fourv1d, AArch64::dsub0);
4734	return;
4735	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
4736	SelectLoad(Node, `4`, AArch64::LD4Fourv2d, AArch64::qsub0);
4737	return;
4738	}
4739	break;
4740	case Intrinsic::aarch64_neon_ld2r:
4741	if (VT == MVT::v8i8) {
4742	SelectLoad(Node, `2`, AArch64::LD2Rv8b, AArch64::dsub0);
4743	return;
4744	} else if (VT == MVT::v16i8) {
4745	SelectLoad(Node, `2`, AArch64::LD2Rv16b, AArch64::qsub0);
4746	return;
4747	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
4748	SelectLoad(Node, `2`, AArch64::LD2Rv4h, AArch64::dsub0);
4749	return;
4750	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
4751	SelectLoad(Node, `2`, AArch64::LD2Rv8h, AArch64::qsub0);
4752	return;
4753	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
4754	SelectLoad(Node, `2`, AArch64::LD2Rv2s, AArch64::dsub0);
4755	return;
4756	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
4757	SelectLoad(Node, `2`, AArch64::LD2Rv4s, AArch64::qsub0);
4758	return;
4759	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
4760	SelectLoad(Node, `2`, AArch64::LD2Rv1d, AArch64::dsub0);
4761	return;
4762	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
4763	SelectLoad(Node, `2`, AArch64::LD2Rv2d, AArch64::qsub0);
4764	return;
4765	}
4766	break;
4767	case Intrinsic::aarch64_neon_ld3r:
4768	if (VT == MVT::v8i8) {
4769	SelectLoad(Node, `3`, AArch64::LD3Rv8b, AArch64::dsub0);
4770	return;
4771	} else if (VT == MVT::v16i8) {
4772	SelectLoad(Node, `3`, AArch64::LD3Rv16b, AArch64::qsub0);
4773	return;
4774	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
4775	SelectLoad(Node, `3`, AArch64::LD3Rv4h, AArch64::dsub0);
4776	return;
4777	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
4778	SelectLoad(Node, `3`, AArch64::LD3Rv8h, AArch64::qsub0);
4779	return;
4780	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
4781	SelectLoad(Node, `3`, AArch64::LD3Rv2s, AArch64::dsub0);
4782	return;
4783	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
4784	SelectLoad(Node, `3`, AArch64::LD3Rv4s, AArch64::qsub0);
4785	return;
4786	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
4787	SelectLoad(Node, `3`, AArch64::LD3Rv1d, AArch64::dsub0);
4788	return;
4789	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
4790	SelectLoad(Node, `3`, AArch64::LD3Rv2d, AArch64::qsub0);
4791	return;
4792	}
4793	break;
4794	case Intrinsic::aarch64_neon_ld4r:
4795	if (VT == MVT::v8i8) {
4796	SelectLoad(Node, `4`, AArch64::LD4Rv8b, AArch64::dsub0);
4797	return;
4798	} else if (VT == MVT::v16i8) {
4799	SelectLoad(Node, `4`, AArch64::LD4Rv16b, AArch64::qsub0);
4800	return;
4801	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
4802	SelectLoad(Node, `4`, AArch64::LD4Rv4h, AArch64::dsub0);
4803	return;
4804	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
4805	SelectLoad(Node, `4`, AArch64::LD4Rv8h, AArch64::qsub0);
4806	return;
4807	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
4808	SelectLoad(Node, `4`, AArch64::LD4Rv2s, AArch64::dsub0);
4809	return;
4810	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
4811	SelectLoad(Node, `4`, AArch64::LD4Rv4s, AArch64::qsub0);
4812	return;
4813	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
4814	SelectLoad(Node, `4`, AArch64::LD4Rv1d, AArch64::dsub0);
4815	return;
4816	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
4817	SelectLoad(Node, `4`, AArch64::LD4Rv2d, AArch64::qsub0);
4818	return;
4819	}
4820	break;
4821	case Intrinsic::aarch64_neon_ld2lane:
4822	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
4823	SelectLoadLane(Node, `2`, AArch64::LD2i8);
4824	return;
4825	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
4826	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
4827	SelectLoadLane(Node, `2`, AArch64::LD2i16);
4828	return;
4829	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
4830	VT == MVT::v2f32) {
4831	SelectLoadLane(Node, `2`, AArch64::LD2i32);
4832	return;
4833	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
4834	VT == MVT::v1f64) {
4835	SelectLoadLane(Node, `2`, AArch64::LD2i64);
4836	return;
4837	}
4838	break;
4839	case Intrinsic::aarch64_neon_ld3lane:
4840	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
4841	SelectLoadLane(Node, `3`, AArch64::LD3i8);
4842	return;
4843	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
4844	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
4845	SelectLoadLane(Node, `3`, AArch64::LD3i16);
4846	return;
4847	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
4848	VT == MVT::v2f32) {
4849	SelectLoadLane(Node, `3`, AArch64::LD3i32);
4850	return;
4851	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
4852	VT == MVT::v1f64) {
4853	SelectLoadLane(Node, `3`, AArch64::LD3i64);
4854	return;
4855	}
4856	break;
4857	case Intrinsic::aarch64_neon_ld4lane:
4858	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
4859	SelectLoadLane(Node, `4`, AArch64::LD4i8);
4860	return;
4861	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
4862	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
4863	SelectLoadLane(Node, `4`, AArch64::LD4i16);
4864	return;
4865	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
4866	VT == MVT::v2f32) {
4867	SelectLoadLane(Node, `4`, AArch64::LD4i32);
4868	return;
4869	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
4870	VT == MVT::v1f64) {
4871	SelectLoadLane(Node, `4`, AArch64::LD4i64);
4872	return;
4873	}
4874	break;
4875	case Intrinsic::aarch64_ld64b:
4876	SelectLoad(Node, `8`, AArch64::LD64B, AArch64::x8sub_0);
4877	return;
4878	case Intrinsic::aarch64_sve_ld2q_sret: {
4879	SelectPredicatedLoad(Node, `2`, `4`, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
4880	return;
4881	}
4882	case Intrinsic::aarch64_sve_ld3q_sret: {
4883	SelectPredicatedLoad(Node, `3`, `4`, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
4884	return;
4885	}
4886	case Intrinsic::aarch64_sve_ld4q_sret: {
4887	SelectPredicatedLoad(Node, `4`, `4`, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
4888	return;
4889	}
4890	case Intrinsic::aarch64_sve_ld2_sret: {
4891	if (VT == MVT::nxv16i8) {
4892	SelectPredicatedLoad(Node, `2`, `0`, AArch64::LD2B_IMM, AArch64::LD2B,
4893	true);
4894	return;
4895	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
4896	VT == MVT::nxv8bf16) {
4897	SelectPredicatedLoad(Node, `2`, `1`, AArch64::LD2H_IMM, AArch64::LD2H,
4898	true);
4899	return;
4900	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
4901	SelectPredicatedLoad(Node, `2`, `2`, AArch64::LD2W_IMM, AArch64::LD2W,
4902	true);
4903	return;
4904	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
4905	SelectPredicatedLoad(Node, `2`, `3`, AArch64::LD2D_IMM, AArch64::LD2D,
4906	true);
4907	return;
4908	}
4909	break;
4910	}
4911	case Intrinsic::aarch64_sve_ld1_pn_x2: {
4912	if (VT == MVT::nxv16i8) {
4913	if (Subtarget->hasSME2())
4914	SelectContiguousMultiVectorLoad(
4915	Node, `2`, `0`, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
4916	else if (Subtarget->hasSVE2p1())
4917	SelectContiguousMultiVectorLoad(Node, `2`, `0`, AArch64::LD1B_2Z_IMM,
4918	AArch64::LD1B_2Z);
4919	else
4920	break;
4921	return;
4922	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
4923	VT == MVT::nxv8bf16) {
4924	if (Subtarget->hasSME2())
4925	SelectContiguousMultiVectorLoad(
4926	Node, `2`, `1`, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
4927	else if (Subtarget->hasSVE2p1())
4928	SelectContiguousMultiVectorLoad(Node, `2`, `1`, AArch64::LD1H_2Z_IMM,
4929	AArch64::LD1H_2Z);
4930	else
4931	break;
4932	return;
4933	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
4934	if (Subtarget->hasSME2())
4935	SelectContiguousMultiVectorLoad(
4936	Node, `2`, `2`, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
4937	else if (Subtarget->hasSVE2p1())
4938	SelectContiguousMultiVectorLoad(Node, `2`, `2`, AArch64::LD1W_2Z_IMM,
4939	AArch64::LD1W_2Z);
4940	else
4941	break;
4942	return;
4943	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
4944	if (Subtarget->hasSME2())
4945	SelectContiguousMultiVectorLoad(
4946	Node, `2`, `3`, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
4947	else if (Subtarget->hasSVE2p1())
4948	SelectContiguousMultiVectorLoad(Node, `2`, `3`, AArch64::LD1D_2Z_IMM,
4949	AArch64::LD1D_2Z);
4950	else
4951	break;
4952	return;
4953	}
4954	break;
4955	}
4956	case Intrinsic::aarch64_sve_ld1_pn_x4: {
4957	if (VT == MVT::nxv16i8) {
4958	if (Subtarget->hasSME2())
4959	SelectContiguousMultiVectorLoad(
4960	Node, `4`, `0`, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
4961	else if (Subtarget->hasSVE2p1())
4962	SelectContiguousMultiVectorLoad(Node, `4`, `0`, AArch64::LD1B_4Z_IMM,
4963	AArch64::LD1B_4Z);
4964	else
4965	break;
4966	return;
4967	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
4968	VT == MVT::nxv8bf16) {
4969	if (Subtarget->hasSME2())
4970	SelectContiguousMultiVectorLoad(
4971	Node, `4`, `1`, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
4972	else if (Subtarget->hasSVE2p1())
4973	SelectContiguousMultiVectorLoad(Node, `4`, `1`, AArch64::LD1H_4Z_IMM,
4974	AArch64::LD1H_4Z);
4975	else
4976	break;
4977	return;
4978	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
4979	if (Subtarget->hasSME2())
4980	SelectContiguousMultiVectorLoad(
4981	Node, `4`, `2`, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
4982	else if (Subtarget->hasSVE2p1())
4983	SelectContiguousMultiVectorLoad(Node, `4`, `2`, AArch64::LD1W_4Z_IMM,
4984	AArch64::LD1W_4Z);
4985	else
4986	break;
4987	return;
4988	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
4989	if (Subtarget->hasSME2())
4990	SelectContiguousMultiVectorLoad(
4991	Node, `4`, `3`, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
4992	else if (Subtarget->hasSVE2p1())
4993	SelectContiguousMultiVectorLoad(Node, `4`, `3`, AArch64::LD1D_4Z_IMM,
4994	AArch64::LD1D_4Z);
4995	else
4996	break;
4997	return;
4998	}
4999	break;
5000	}
5001	case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5002	if (VT == MVT::nxv16i8) {
5003	if (Subtarget->hasSME2())
5004	SelectContiguousMultiVectorLoad(Node, `2`, `0`,
5005	AArch64::LDNT1B_2Z_IMM_PSEUDO,
5006	AArch64::LDNT1B_2Z_PSEUDO);
5007	else if (Subtarget->hasSVE2p1())
5008	SelectContiguousMultiVectorLoad(Node, `2`, `0`, AArch64::LDNT1B_2Z_IMM,
5009	AArch64::LDNT1B_2Z);
5010	else
5011	break;
5012	return;
5013	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5014	VT == MVT::nxv8bf16) {
5015	if (Subtarget->hasSME2())
5016	SelectContiguousMultiVectorLoad(Node, `2`, `1`,
5017	AArch64::LDNT1H_2Z_IMM_PSEUDO,
5018	AArch64::LDNT1H_2Z_PSEUDO);
5019	else if (Subtarget->hasSVE2p1())
5020	SelectContiguousMultiVectorLoad(Node, `2`, `1`, AArch64::LDNT1H_2Z_IMM,
5021	AArch64::LDNT1H_2Z);
5022	else
5023	break;
5024	return;
5025	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5026	if (Subtarget->hasSME2())
5027	SelectContiguousMultiVectorLoad(Node, `2`, `2`,
5028	AArch64::LDNT1W_2Z_IMM_PSEUDO,
5029	AArch64::LDNT1W_2Z_PSEUDO);
5030	else if (Subtarget->hasSVE2p1())
5031	SelectContiguousMultiVectorLoad(Node, `2`, `2`, AArch64::LDNT1W_2Z_IMM,
5032	AArch64::LDNT1W_2Z);
5033	else
5034	break;
5035	return;
5036	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5037	if (Subtarget->hasSME2())
5038	SelectContiguousMultiVectorLoad(Node, `2`, `3`,
5039	AArch64::LDNT1D_2Z_IMM_PSEUDO,
5040	AArch64::LDNT1D_2Z_PSEUDO);
5041	else if (Subtarget->hasSVE2p1())
5042	SelectContiguousMultiVectorLoad(Node, `2`, `3`, AArch64::LDNT1D_2Z_IMM,
5043	AArch64::LDNT1D_2Z);
5044	else
5045	break;
5046	return;
5047	}
5048	break;
5049	}
5050	case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5051	if (VT == MVT::nxv16i8) {
5052	if (Subtarget->hasSME2())
5053	SelectContiguousMultiVectorLoad(Node, `4`, `0`,
5054	AArch64::LDNT1B_4Z_IMM_PSEUDO,
5055	AArch64::LDNT1B_4Z_PSEUDO);
5056	else if (Subtarget->hasSVE2p1())
5057	SelectContiguousMultiVectorLoad(Node, `4`, `0`, AArch64::LDNT1B_4Z_IMM,
5058	AArch64::LDNT1B_4Z);
5059	else
5060	break;
5061	return;
5062	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5063	VT == MVT::nxv8bf16) {
5064	if (Subtarget->hasSME2())
5065	SelectContiguousMultiVectorLoad(Node, `4`, `1`,
5066	AArch64::LDNT1H_4Z_IMM_PSEUDO,
5067	AArch64::LDNT1H_4Z_PSEUDO);
5068	else if (Subtarget->hasSVE2p1())
5069	SelectContiguousMultiVectorLoad(Node, `4`, `1`, AArch64::LDNT1H_4Z_IMM,
5070	AArch64::LDNT1H_4Z);
5071	else
5072	break;
5073	return;
5074	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5075	if (Subtarget->hasSME2())
5076	SelectContiguousMultiVectorLoad(Node, `4`, `2`,
5077	AArch64::LDNT1W_4Z_IMM_PSEUDO,
5078	AArch64::LDNT1W_4Z_PSEUDO);
5079	else if (Subtarget->hasSVE2p1())
5080	SelectContiguousMultiVectorLoad(Node, `4`, `2`, AArch64::LDNT1W_4Z_IMM,
5081	AArch64::LDNT1W_4Z);
5082	else
5083	break;
5084	return;
5085	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5086	if (Subtarget->hasSME2())
5087	SelectContiguousMultiVectorLoad(Node, `4`, `3`,
5088	AArch64::LDNT1D_4Z_IMM_PSEUDO,
5089	AArch64::LDNT1D_4Z_PSEUDO);
5090	else if (Subtarget->hasSVE2p1())
5091	SelectContiguousMultiVectorLoad(Node, `4`, `3`, AArch64::LDNT1D_4Z_IMM,
5092	AArch64::LDNT1D_4Z);
5093	else
5094	break;
5095	return;
5096	}
5097	break;
5098	}
5099	case Intrinsic::aarch64_sve_ld3_sret: {
5100	if (VT == MVT::nxv16i8) {
5101	SelectPredicatedLoad(Node, `3`, `0`, AArch64::LD3B_IMM, AArch64::LD3B,
5102	true);
5103	return;
5104	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5105	VT == MVT::nxv8bf16) {
5106	SelectPredicatedLoad(Node, `3`, `1`, AArch64::LD3H_IMM, AArch64::LD3H,
5107	true);
5108	return;
5109	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5110	SelectPredicatedLoad(Node, `3`, `2`, AArch64::LD3W_IMM, AArch64::LD3W,
5111	true);
5112	return;
5113	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5114	SelectPredicatedLoad(Node, `3`, `3`, AArch64::LD3D_IMM, AArch64::LD3D,
5115	true);
5116	return;
5117	}
5118	break;
5119	}
5120	case Intrinsic::aarch64_sve_ld4_sret: {
5121	if (VT == MVT::nxv16i8) {
5122	SelectPredicatedLoad(Node, `4`, `0`, AArch64::LD4B_IMM, AArch64::LD4B,
5123	true);
5124	return;
5125	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5126	VT == MVT::nxv8bf16) {
5127	SelectPredicatedLoad(Node, `4`, `1`, AArch64::LD4H_IMM, AArch64::LD4H,
5128	true);
5129	return;
5130	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5131	SelectPredicatedLoad(Node, `4`, `2`, AArch64::LD4W_IMM, AArch64::LD4W,
5132	true);
5133	return;
5134	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5135	SelectPredicatedLoad(Node, `4`, `3`, AArch64::LD4D_IMM, AArch64::LD4D,
5136	true);
5137	return;
5138	}
5139	break;
5140	}
5141	case Intrinsic::aarch64_sme_read_hor_vg2: {
5142	if (VT == MVT::nxv16i8) {
5143	SelectMultiVectorMove<`14`, `2`>(Node, `2`, AArch64::ZAB0,
5144	AArch64::MOVA_2ZMXI_H_B);
5145	return;
5146	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5147	VT == MVT::nxv8bf16) {
5148	SelectMultiVectorMove<`6`, `2`>(Node, `2`, AArch64::ZAH0,
5149	AArch64::MOVA_2ZMXI_H_H);
5150	return;
5151	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5152	SelectMultiVectorMove<`2`, `2`>(Node, `2`, AArch64::ZAS0,
5153	AArch64::MOVA_2ZMXI_H_S);
5154	return;
5155	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5156	SelectMultiVectorMove<`0`, `2`>(Node, `2`, AArch64::ZAD0,
5157	AArch64::MOVA_2ZMXI_H_D);
5158	return;
5159	}
5160	break;
5161	}
5162	case Intrinsic::aarch64_sme_read_ver_vg2: {
5163	if (VT == MVT::nxv16i8) {
5164	SelectMultiVectorMove<`14`, `2`>(Node, `2`, AArch64::ZAB0,
5165	AArch64::MOVA_2ZMXI_V_B);
5166	return;
5167	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5168	VT == MVT::nxv8bf16) {
5169	SelectMultiVectorMove<`6`, `2`>(Node, `2`, AArch64::ZAH0,
5170	AArch64::MOVA_2ZMXI_V_H);
5171	return;
5172	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5173	SelectMultiVectorMove<`2`, `2`>(Node, `2`, AArch64::ZAS0,
5174	AArch64::MOVA_2ZMXI_V_S);
5175	return;
5176	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5177	SelectMultiVectorMove<`0`, `2`>(Node, `2`, AArch64::ZAD0,
5178	AArch64::MOVA_2ZMXI_V_D);
5179	return;
5180	}
5181	break;
5182	}
5183	case Intrinsic::aarch64_sme_read_hor_vg4: {
5184	if (VT == MVT::nxv16i8) {
5185	SelectMultiVectorMove<`12`, `4`>(Node, `4`, AArch64::ZAB0,
5186	AArch64::MOVA_4ZMXI_H_B);
5187	return;
5188	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5189	VT == MVT::nxv8bf16) {
5190	SelectMultiVectorMove<`4`, `4`>(Node, `4`, AArch64::ZAH0,
5191	AArch64::MOVA_4ZMXI_H_H);
5192	return;
5193	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5194	SelectMultiVectorMove<`0`, `2`>(Node, `4`, AArch64::ZAS0,
5195	AArch64::MOVA_4ZMXI_H_S);
5196	return;
5197	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5198	SelectMultiVectorMove<`0`, `2`>(Node, `4`, AArch64::ZAD0,
5199	AArch64::MOVA_4ZMXI_H_D);
5200	return;
5201	}
5202	break;
5203	}
5204	case Intrinsic::aarch64_sme_read_ver_vg4: {
5205	if (VT == MVT::nxv16i8) {
5206	SelectMultiVectorMove<`12`, `4`>(Node, `4`, AArch64::ZAB0,
5207	AArch64::MOVA_4ZMXI_V_B);
5208	return;
5209	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5210	VT == MVT::nxv8bf16) {
5211	SelectMultiVectorMove<`4`, `4`>(Node, `4`, AArch64::ZAH0,
5212	AArch64::MOVA_4ZMXI_V_H);
5213	return;
5214	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5215	SelectMultiVectorMove<`0`, `4`>(Node, `4`, AArch64::ZAS0,
5216	AArch64::MOVA_4ZMXI_V_S);
5217	return;
5218	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5219	SelectMultiVectorMove<`0`, `4`>(Node, `4`, AArch64::ZAD0,
5220	AArch64::MOVA_4ZMXI_V_D);
5221	return;
5222	}
5223	break;
5224	}
5225	case Intrinsic::aarch64_sme_read_vg1x2: {
5226	SelectMultiVectorMove<`7`, `1`>(Node, `2`, AArch64::ZA,
5227	AArch64::MOVA_VG2_2ZMXI);
5228	return;
5229	}
5230	case Intrinsic::aarch64_sme_read_vg1x4: {
5231	SelectMultiVectorMove<`7`, `1`>(Node, `4`, AArch64::ZA,
5232	AArch64::MOVA_VG4_4ZMXI);
5233	return;
5234	}
5235	case Intrinsic::swift_async_context_addr: {
5236	SDLoc DL(Node);
5237	SDValue Chain = Node->getOperand(Num: `0`);
5238	SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5239	SDValue Res = SDValue(
5240	CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5241	CurDAG->getTargetConstant(`8`, DL, MVT::i32),
5242	CurDAG->getTargetConstant(`0`, DL, MVT::i32)),
5243	`0`);
5244	ReplaceUses(F: SDValue (Node, `0`), T: Res);
5245	ReplaceUses(F: SDValue (Node, `1`), T: CopyFP.getValue(R: `1`));
5246	CurDAG->RemoveDeadNode(N: Node);
5247
5248	auto &MF = CurDAG->getMachineFunction();
5249	MF.getFrameInfo().setFrameAddressIsTaken(true);
5250	MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5251	return;
5252	}
5253	case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5254	if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5255	Node->getValueType(`0`),
5256	{AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5257	AArch64::LUTI2_4ZTZI_S}))
5258	// Second Immediate must be <= 3:
5259	SelectMultiVectorLuti(Node, NumOutVecs: `4`, Opc: Opc, MaxImm: `3`);
5260	return;
5261	}
5262	case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5263	if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5264	Node->getValueType(`0`),
5265	{`0`, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5266	// Second Immediate must be <= 1:
5267	SelectMultiVectorLuti(Node, NumOutVecs: `4`, Opc: Opc, MaxImm: `1`);
5268	return;
5269	}
5270	case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5271	if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5272	Node->getValueType(`0`),
5273	{AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5274	AArch64::LUTI2_2ZTZI_S}))
5275	// Second Immediate must be <= 7:
5276	SelectMultiVectorLuti(Node, NumOutVecs: `2`, Opc: Opc, MaxImm: `7`);
5277	return;
5278	}
5279	case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5280	if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5281	Node->getValueType(`0`),
5282	{AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5283	AArch64::LUTI4_2ZTZI_S}))
5284	// Second Immediate must be <= 3:
5285	SelectMultiVectorLuti(Node, NumOutVecs: `2`, Opc: Opc, MaxImm: `3`);
5286	return;
5287	}
5288	}
5289	} break;
5290	case ISD::INTRINSIC_WO_CHAIN: {
5291	unsigned IntNo = Node->getConstantOperandVal(Num: `0`);
5292	switch (IntNo) {
5293	default:
5294	break;
5295	case Intrinsic::aarch64_tagp:
5296	SelectTagP(N: Node);
5297	return;
5298	case Intrinsic::aarch64_neon_tbl2:
5299	SelectTable(Node, `2`,
5300	VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5301	false);
5302	return;
5303	case Intrinsic::aarch64_neon_tbl3:
5304	SelectTable(Node, `3`, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5305	: AArch64::TBLv16i8Three,
5306	false);
5307	return;
5308	case Intrinsic::aarch64_neon_tbl4:
5309	SelectTable(Node, `4`, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5310	: AArch64::TBLv16i8Four,
5311	false);
5312	return;
5313	case Intrinsic::aarch64_neon_tbx2:
5314	SelectTable(Node, `2`,
5315	VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5316	true);
5317	return;
5318	case Intrinsic::aarch64_neon_tbx3:
5319	SelectTable(Node, `3`, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5320	: AArch64::TBXv16i8Three,
5321	true);
5322	return;
5323	case Intrinsic::aarch64_neon_tbx4:
5324	SelectTable(Node, `4`, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5325	: AArch64::TBXv16i8Four,
5326	true);
5327	return;
5328	case Intrinsic::aarch64_sve_srshl_single_x2:
5329	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5330	Node->getValueType(`0`),
5331	{AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5332	AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5333	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
5334	return;
5335	case Intrinsic::aarch64_sve_srshl_single_x4:
5336	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5337	Node->getValueType(`0`),
5338	{AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5339	AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5340	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
5341	return;
5342	case Intrinsic::aarch64_sve_urshl_single_x2:
5343	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5344	Node->getValueType(`0`),
5345	{AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5346	AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5347	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
5348	return;
5349	case Intrinsic::aarch64_sve_urshl_single_x4:
5350	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5351	Node->getValueType(`0`),
5352	{AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5353	AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5354	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
5355	return;
5356	case Intrinsic::aarch64_sve_srshl_x2:
5357	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5358	Node->getValueType(`0`),
5359	{AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5360	AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5361	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
5362	return;
5363	case Intrinsic::aarch64_sve_srshl_x4:
5364	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5365	Node->getValueType(`0`),
5366	{AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5367	AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5368	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
5369	return;
5370	case Intrinsic::aarch64_sve_urshl_x2:
5371	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5372	Node->getValueType(`0`),
5373	{AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5374	AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5375	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
5376	return;
5377	case Intrinsic::aarch64_sve_urshl_x4:
5378	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5379	Node->getValueType(`0`),
5380	{AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5381	AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5382	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
5383	return;
5384	case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5385	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5386	Node->getValueType(`0`),
5387	{AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5388	AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5389	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
5390	return;
5391	case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5392	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5393	Node->getValueType(`0`),
5394	{AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5395	AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5396	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
5397	return;
5398	case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5399	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5400	Node->getValueType(`0`),
5401	{AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5402	AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5403	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
5404	return;
5405	case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5406	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5407	Node->getValueType(`0`),
5408	{AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5409	AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5410	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
5411	return;
5412	case Intrinsic::aarch64_sve_whilege_x2:
5413	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5414	Node->getValueType(`0`),
5415	{AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5416	AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5417	SelectWhilePair(N: Node, Opc: Op);
5418	return;
5419	case Intrinsic::aarch64_sve_whilegt_x2:
5420	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5421	Node->getValueType(`0`),
5422	{AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5423	AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5424	SelectWhilePair(N: Node, Opc: Op);
5425	return;
5426	case Intrinsic::aarch64_sve_whilehi_x2:
5427	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5428	Node->getValueType(`0`),
5429	{AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5430	AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5431	SelectWhilePair(N: Node, Opc: Op);
5432	return;
5433	case Intrinsic::aarch64_sve_whilehs_x2:
5434	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5435	Node->getValueType(`0`),
5436	{AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5437	AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5438	SelectWhilePair(N: Node, Opc: Op);
5439	return;
5440	case Intrinsic::aarch64_sve_whilele_x2:
5441	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5442	Node->getValueType(`0`),
5443	{AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5444	AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5445	SelectWhilePair(N: Node, Opc: Op);
5446	return;
5447	case Intrinsic::aarch64_sve_whilelo_x2:
5448	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5449	Node->getValueType(`0`),
5450	{AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
5451	AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
5452	SelectWhilePair(N: Node, Opc: Op);
5453	return;
5454	case Intrinsic::aarch64_sve_whilels_x2:
5455	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5456	Node->getValueType(`0`),
5457	{AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
5458	AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
5459	SelectWhilePair(N: Node, Opc: Op);
5460	return;
5461	case Intrinsic::aarch64_sve_whilelt_x2:
5462	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5463	Node->getValueType(`0`),
5464	{AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
5465	AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
5466	SelectWhilePair(N: Node, Opc: Op);
5467	return;
5468	case Intrinsic::aarch64_sve_smax_single_x2:
5469	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5470	Node->getValueType(`0`),
5471	{AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
5472	AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
5473	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
5474	return;
5475	case Intrinsic::aarch64_sve_umax_single_x2:
5476	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5477	Node->getValueType(`0`),
5478	{AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
5479	AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
5480	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
5481	return;
5482	case Intrinsic::aarch64_sve_fmax_single_x2:
5483	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5484	Node->getValueType(`0`),
5485	{`0`, AArch64::FMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_S,
5486	AArch64::FMAX_VG2_2ZZ_D}))
5487	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
5488	return;
5489	case Intrinsic::aarch64_sve_smax_single_x4:
5490	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5491	Node->getValueType(`0`),
5492	{AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
5493	AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
5494	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
5495	return;
5496	case Intrinsic::aarch64_sve_umax_single_x4:
5497	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5498	Node->getValueType(`0`),
5499	{AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
5500	AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
5501	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
5502	return;
5503	case Intrinsic::aarch64_sve_fmax_single_x4:
5504	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5505	Node->getValueType(`0`),
5506	{`0`, AArch64::FMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_S,
5507	AArch64::FMAX_VG4_4ZZ_D}))
5508	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
5509	return;
5510	case Intrinsic::aarch64_sve_smin_single_x2:
5511	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5512	Node->getValueType(`0`),
5513	{AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
5514	AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
5515	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
5516	return;
5517	case Intrinsic::aarch64_sve_umin_single_x2:
5518	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5519	Node->getValueType(`0`),
5520	{AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
5521	AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
5522	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
5523	return;
5524	case Intrinsic::aarch64_sve_fmin_single_x2:
5525	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5526	Node->getValueType(`0`),
5527	{`0`, AArch64::FMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_S,
5528	AArch64::FMIN_VG2_2ZZ_D}))
5529	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
5530	return;
5531	case Intrinsic::aarch64_sve_smin_single_x4:
5532	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5533	Node->getValueType(`0`),
5534	{AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
5535	AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
5536	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
5537	return;
5538	case Intrinsic::aarch64_sve_umin_single_x4:
5539	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5540	Node->getValueType(`0`),
5541	{AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
5542	AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
5543	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
5544	return;
5545	case Intrinsic::aarch64_sve_fmin_single_x4:
5546	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5547	Node->getValueType(`0`),
5548	{`0`, AArch64::FMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_S,
5549	AArch64::FMIN_VG4_4ZZ_D}))
5550	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
5551	return;
5552	case Intrinsic::aarch64_sve_smax_x2:
5553	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5554	Node->getValueType(`0`),
5555	{AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
5556	AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
5557	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
5558	return;
5559	case Intrinsic::aarch64_sve_umax_x2:
5560	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5561	Node->getValueType(`0`),
5562	{AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
5563	AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
5564	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
5565	return;
5566	case Intrinsic::aarch64_sve_fmax_x2:
5567	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5568	Node->getValueType(`0`),
5569	{`0`, AArch64::FMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_S,
5570	AArch64::FMAX_VG2_2Z2Z_D}))
5571	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
5572	return;
5573	case Intrinsic::aarch64_sve_smax_x4:
5574	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5575	Node->getValueType(`0`),
5576	{AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
5577	AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
5578	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
5579	return;
5580	case Intrinsic::aarch64_sve_umax_x4:
5581	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5582	Node->getValueType(`0`),
5583	{AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
5584	AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
5585	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
5586	return;
5587	case Intrinsic::aarch64_sve_fmax_x4:
5588	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5589	Node->getValueType(`0`),
5590	{`0`, AArch64::FMAX_VG4_4Z4Z_H, AArch64::FMAX_VG4_4Z4Z_S,
5591	AArch64::FMAX_VG4_4Z4Z_D}))
5592	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
5593	return;
5594	case Intrinsic::aarch64_sve_smin_x2:
5595	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5596	Node->getValueType(`0`),
5597	{AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
5598	AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
5599	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
5600	return;
5601	case Intrinsic::aarch64_sve_umin_x2:
5602	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5603	Node->getValueType(`0`),
5604	{AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
5605	AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
5606	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
5607	return;
5608	case Intrinsic::aarch64_sve_fmin_x2:
5609	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5610	Node->getValueType(`0`),
5611	{`0`, AArch64::FMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_S,
5612	AArch64::FMIN_VG2_2Z2Z_D}))
5613	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
5614	return;
5615	case Intrinsic::aarch64_sve_smin_x4:
5616	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5617	Node->getValueType(`0`),
5618	{AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
5619	AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
5620	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
5621	return;
5622	case Intrinsic::aarch64_sve_umin_x4:
5623	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5624	Node->getValueType(`0`),
5625	{AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
5626	AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
5627	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
5628	return;
5629	case Intrinsic::aarch64_sve_fmin_x4:
5630	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5631	Node->getValueType(`0`),
5632	{`0`, AArch64::FMIN_VG4_4Z4Z_H, AArch64::FMIN_VG4_4Z4Z_S,
5633	AArch64::FMIN_VG4_4Z4Z_D}))
5634	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
5635	return;
5636	case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
5637	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5638	Node->getValueType(`0`),
5639	{`0`, AArch64::FMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_S,
5640	AArch64::FMAXNM_VG2_2ZZ_D}))
5641	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
5642	return;
5643	case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
5644	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5645	Node->getValueType(`0`),
5646	{`0`, AArch64::FMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_S,
5647	AArch64::FMAXNM_VG4_4ZZ_D}))
5648	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
5649	return;
5650	case Intrinsic::aarch64_sve_fminnm_single_x2:
5651	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5652	Node->getValueType(`0`),
5653	{`0`, AArch64::FMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_S,
5654	AArch64::FMINNM_VG2_2ZZ_D}))
5655	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
5656	return;
5657	case Intrinsic::aarch64_sve_fminnm_single_x4:
5658	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5659	Node->getValueType(`0`),
5660	{`0`, AArch64::FMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_S,
5661	AArch64::FMINNM_VG4_4ZZ_D}))
5662	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
5663	return;
5664	case Intrinsic::aarch64_sve_fmaxnm_x2:
5665	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5666	Node->getValueType(`0`),
5667	{`0`, AArch64::FMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_S,
5668	AArch64::FMAXNM_VG2_2Z2Z_D}))
5669	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
5670	return;
5671	case Intrinsic::aarch64_sve_fmaxnm_x4:
5672	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5673	Node->getValueType(`0`),
5674	{`0`, AArch64::FMAXNM_VG4_4Z4Z_H, AArch64::FMAXNM_VG4_4Z4Z_S,
5675	AArch64::FMAXNM_VG4_4Z4Z_D}))
5676	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
5677	return;
5678	case Intrinsic::aarch64_sve_fminnm_x2:
5679	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5680	Node->getValueType(`0`),
5681	{`0`, AArch64::FMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_S,
5682	AArch64::FMINNM_VG2_2Z2Z_D}))
5683	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
5684	return;
5685	case Intrinsic::aarch64_sve_fminnm_x4:
5686	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5687	Node->getValueType(`0`),
5688	{`0`, AArch64::FMINNM_VG4_4Z4Z_H, AArch64::FMINNM_VG4_4Z4Z_S,
5689	AArch64::FMINNM_VG4_4Z4Z_D}))
5690	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
5691	return;
5692	case Intrinsic::aarch64_sve_fcvtzs_x2:
5693	SelectCVTIntrinsic(Node, `2`, AArch64::FCVTZS_2Z2Z_StoS);
5694	return;
5695	case Intrinsic::aarch64_sve_scvtf_x2:
5696	SelectCVTIntrinsic(Node, `2`, AArch64::SCVTF_2Z2Z_StoS);
5697	return;
5698	case Intrinsic::aarch64_sve_fcvtzu_x2:
5699	SelectCVTIntrinsic(Node, `2`, AArch64::FCVTZU_2Z2Z_StoS);
5700	return;
5701	case Intrinsic::aarch64_sve_ucvtf_x2:
5702	SelectCVTIntrinsic(Node, `2`, AArch64::UCVTF_2Z2Z_StoS);
5703	return;
5704	case Intrinsic::aarch64_sve_fcvtzs_x4:
5705	SelectCVTIntrinsic(Node, `4`, AArch64::FCVTZS_4Z4Z_StoS);
5706	return;
5707	case Intrinsic::aarch64_sve_scvtf_x4:
5708	SelectCVTIntrinsic(Node, `4`, AArch64::SCVTF_4Z4Z_StoS);
5709	return;
5710	case Intrinsic::aarch64_sve_fcvtzu_x4:
5711	SelectCVTIntrinsic(Node, `4`, AArch64::FCVTZU_4Z4Z_StoS);
5712	return;
5713	case Intrinsic::aarch64_sve_ucvtf_x4:
5714	SelectCVTIntrinsic(Node, `4`, AArch64::UCVTF_4Z4Z_StoS);
5715	return;
5716	case Intrinsic::aarch64_sve_sclamp_single_x2:
5717	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5718	Node->getValueType(`0`),
5719	{AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
5720	AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
5721	SelectClamp(N: Node, NumVecs: `2`, Op: Op);
5722	return;
5723	case Intrinsic::aarch64_sve_uclamp_single_x2:
5724	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5725	Node->getValueType(`0`),
5726	{AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
5727	AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
5728	SelectClamp(N: Node, NumVecs: `2`, Op: Op);
5729	return;
5730	case Intrinsic::aarch64_sve_fclamp_single_x2:
5731	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5732	Node->getValueType(`0`),
5733	{`0`, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
5734	AArch64::FCLAMP_VG2_2Z2Z_D}))
5735	SelectClamp(N: Node, NumVecs: `2`, Op: Op);
5736	return;
5737	case Intrinsic::aarch64_sve_sclamp_single_x4:
5738	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5739	Node->getValueType(`0`),
5740	{AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
5741	AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
5742	SelectClamp(N: Node, NumVecs: `4`, Op: Op);
5743	return;
5744	case Intrinsic::aarch64_sve_uclamp_single_x4:
5745	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5746	Node->getValueType(`0`),
5747	{AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
5748	AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
5749	SelectClamp(N: Node, NumVecs: `4`, Op: Op);
5750	return;
5751	case Intrinsic::aarch64_sve_fclamp_single_x4:
5752	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5753	Node->getValueType(`0`),
5754	{`0`, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
5755	AArch64::FCLAMP_VG4_4Z4Z_D}))
5756	SelectClamp(N: Node, NumVecs: `4`, Op: Op);
5757	return;
5758	case Intrinsic::aarch64_sve_add_single_x2:
5759	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5760	Node->getValueType(`0`),
5761	{AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
5762	AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
5763	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
5764	return;
5765	case Intrinsic::aarch64_sve_add_single_x4:
5766	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5767	Node->getValueType(`0`),
5768	{AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
5769	AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
5770	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
5771	return;
5772	case Intrinsic::aarch64_sve_zip_x2:
5773	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5774	Node->getValueType(`0`),
5775	{AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
5776	AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
5777	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `2`, /IsTupleInput=/false, Opc: Op);
5778	return;
5779	case Intrinsic::aarch64_sve_zipq_x2:
5780	SelectUnaryMultiIntrinsic(Node, `2`, /IsTupleInput=/false,
5781	AArch64::ZIP_VG2_2ZZZ_Q);
5782	return;
5783	case Intrinsic::aarch64_sve_zip_x4:
5784	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5785	Node->getValueType(`0`),
5786	{AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
5787	AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
5788	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `4`, /IsTupleInput=/true, Opc: Op);
5789	return;
5790	case Intrinsic::aarch64_sve_zipq_x4:
5791	SelectUnaryMultiIntrinsic(Node, `4`, /IsTupleInput=/true,
5792	AArch64::ZIP_VG4_4Z4Z_Q);
5793	return;
5794	case Intrinsic::aarch64_sve_uzp_x2:
5795	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5796	Node->getValueType(`0`),
5797	{AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
5798	AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
5799	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `2`, /IsTupleInput=/false, Opc: Op);
5800	return;
5801	case Intrinsic::aarch64_sve_uzpq_x2:
5802	SelectUnaryMultiIntrinsic(Node, `2`, /IsTupleInput=/false,
5803	AArch64::UZP_VG2_2ZZZ_Q);
5804	return;
5805	case Intrinsic::aarch64_sve_uzp_x4:
5806	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5807	Node->getValueType(`0`),
5808	{AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
5809	AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
5810	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `4`, /IsTupleInput=/true, Opc: Op);
5811	return;
5812	case Intrinsic::aarch64_sve_uzpq_x4:
5813	SelectUnaryMultiIntrinsic(Node, `4`, /IsTupleInput=/true,
5814	AArch64::UZP_VG4_4Z4Z_Q);
5815	return;
5816	case Intrinsic::aarch64_sve_sel_x2:
5817	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5818	Node->getValueType(`0`),
5819	{AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
5820	AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
5821	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op, /HasPred=/true);
5822	return;
5823	case Intrinsic::aarch64_sve_sel_x4:
5824	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5825	Node->getValueType(`0`),
5826	{AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
5827	AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
5828	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op, /HasPred=/true);
5829	return;
5830	case Intrinsic::aarch64_sve_frinta_x2:
5831	SelectFrintFromVT(Node, `2`, AArch64::FRINTA_2Z2Z_S);
5832	return;
5833	case Intrinsic::aarch64_sve_frinta_x4:
5834	SelectFrintFromVT(Node, `4`, AArch64::FRINTA_4Z4Z_S);
5835	return;
5836	case Intrinsic::aarch64_sve_frintm_x2:
5837	SelectFrintFromVT(Node, `2`, AArch64::FRINTM_2Z2Z_S);
5838	return;
5839	case Intrinsic::aarch64_sve_frintm_x4:
5840	SelectFrintFromVT(Node, `4`, AArch64::FRINTM_4Z4Z_S);
5841	return;
5842	case Intrinsic::aarch64_sve_frintn_x2:
5843	SelectFrintFromVT(Node, `2`, AArch64::FRINTN_2Z2Z_S);
5844	return;
5845	case Intrinsic::aarch64_sve_frintn_x4:
5846	SelectFrintFromVT(Node, `4`, AArch64::FRINTN_4Z4Z_S);
5847	return;
5848	case Intrinsic::aarch64_sve_frintp_x2:
5849	SelectFrintFromVT(Node, `2`, AArch64::FRINTP_2Z2Z_S);
5850	return;
5851	case Intrinsic::aarch64_sve_frintp_x4:
5852	SelectFrintFromVT(Node, `4`, AArch64::FRINTP_4Z4Z_S);
5853	return;
5854	case Intrinsic::aarch64_sve_sunpk_x2:
5855	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5856	Node->getValueType(`0`),
5857	{`0`, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
5858	AArch64::SUNPK_VG2_2ZZ_D}))
5859	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `2`, /IsTupleInput=/false, Opc: Op);
5860	return;
5861	case Intrinsic::aarch64_sve_uunpk_x2:
5862	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5863	Node->getValueType(`0`),
5864	{`0`, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
5865	AArch64::UUNPK_VG2_2ZZ_D}))
5866	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `2`, /IsTupleInput=/false, Opc: Op);
5867	return;
5868	case Intrinsic::aarch64_sve_sunpk_x4:
5869	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5870	Node->getValueType(`0`),
5871	{`0`, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
5872	AArch64::SUNPK_VG4_4Z2Z_D}))
5873	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `4`, /IsTupleInput=/true, Opc: Op);
5874	return;
5875	case Intrinsic::aarch64_sve_uunpk_x4:
5876	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5877	Node->getValueType(`0`),
5878	{`0`, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
5879	AArch64::UUNPK_VG4_4Z2Z_D}))
5880	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `4`, /IsTupleInput=/true, Opc: Op);
5881	return;
5882	case Intrinsic::aarch64_sve_pext_x2: {
5883	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5884	Node->getValueType(`0`),
5885	{AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
5886	AArch64::PEXT_2PCI_D}))
5887	SelectPExtPair(N: Node, Opc: Op);
5888	return;
5889	}
5890	}
5891	break;
5892	}
5893	case ISD::INTRINSIC_VOID: {
5894	unsigned IntNo = Node->getConstantOperandVal(Num: `1`);
5895	if (Node->getNumOperands() >= `3`)
5896	VT = Node->getOperand(Num: `2`)->getValueType(ResNo: `0`);
5897	switch (IntNo) {
5898	default:
5899	break;
5900	case Intrinsic::aarch64_neon_st1x2: {
5901	if (VT == MVT::v8i8) {
5902	SelectStore(Node, `2`, AArch64::ST1Twov8b);
5903	return;
5904	} else if (VT == MVT::v16i8) {
5905	SelectStore(Node, `2`, AArch64::ST1Twov16b);
5906	return;
5907	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
5908	VT == MVT::v4bf16) {
5909	SelectStore(Node, `2`, AArch64::ST1Twov4h);
5910	return;
5911	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\|
5912	VT == MVT::v8bf16) {
5913	SelectStore(Node, `2`, AArch64::ST1Twov8h);
5914	return;
5915	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
5916	SelectStore(Node, `2`, AArch64::ST1Twov2s);
5917	return;
5918	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
5919	SelectStore(Node, `2`, AArch64::ST1Twov4s);
5920	return;
5921	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
5922	SelectStore(Node, `2`, AArch64::ST1Twov2d);
5923	return;
5924	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
5925	SelectStore(Node, `2`, AArch64::ST1Twov1d);
5926	return;
5927	}
5928	break;
5929	}
5930	case Intrinsic::aarch64_neon_st1x3: {
5931	if (VT == MVT::v8i8) {
5932	SelectStore(Node, `3`, AArch64::ST1Threev8b);
5933	return;
5934	} else if (VT == MVT::v16i8) {
5935	SelectStore(Node, `3`, AArch64::ST1Threev16b);
5936	return;
5937	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
5938	VT == MVT::v4bf16) {
5939	SelectStore(Node, `3`, AArch64::ST1Threev4h);
5940	return;
5941	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\|
5942	VT == MVT::v8bf16) {
5943	SelectStore(Node, `3`, AArch64::ST1Threev8h);
5944	return;
5945	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
5946	SelectStore(Node, `3`, AArch64::ST1Threev2s);
5947	return;
5948	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
5949	SelectStore(Node, `3`, AArch64::ST1Threev4s);
5950	return;
5951	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
5952	SelectStore(Node, `3`, AArch64::ST1Threev2d);
5953	return;
5954	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
5955	SelectStore(Node, `3`, AArch64::ST1Threev1d);
5956	return;
5957	}
5958	break;
5959	}
5960	case Intrinsic::aarch64_neon_st1x4: {
5961	if (VT == MVT::v8i8) {
5962	SelectStore(Node, `4`, AArch64::ST1Fourv8b);
5963	return;
5964	} else if (VT == MVT::v16i8) {
5965	SelectStore(Node, `4`, AArch64::ST1Fourv16b);
5966	return;
5967	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
5968	VT == MVT::v4bf16) {
5969	SelectStore(Node, `4`, AArch64::ST1Fourv4h);
5970	return;
5971	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\|
5972	VT == MVT::v8bf16) {
5973	SelectStore(Node, `4`, AArch64::ST1Fourv8h);
5974	return;
5975	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
5976	SelectStore(Node, `4`, AArch64::ST1Fourv2s);
5977	return;
5978	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
5979	SelectStore(Node, `4`, AArch64::ST1Fourv4s);
5980	return;
5981	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
5982	SelectStore(Node, `4`, AArch64::ST1Fourv2d);
5983	return;
5984	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
5985	SelectStore(Node, `4`, AArch64::ST1Fourv1d);
5986	return;
5987	}
5988	break;
5989	}
5990	case Intrinsic::aarch64_neon_st2: {
5991	if (VT == MVT::v8i8) {
5992	SelectStore(Node, `2`, AArch64::ST2Twov8b);
5993	return;
5994	} else if (VT == MVT::v16i8) {
5995	SelectStore(Node, `2`, AArch64::ST2Twov16b);
5996	return;
5997	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
5998	VT == MVT::v4bf16) {
5999	SelectStore(Node, `2`, AArch64::ST2Twov4h);
6000	return;
6001	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\|
6002	VT == MVT::v8bf16) {
6003	SelectStore(Node, `2`, AArch64::ST2Twov8h);
6004	return;
6005	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6006	SelectStore(Node, `2`, AArch64::ST2Twov2s);
6007	return;
6008	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6009	SelectStore(Node, `2`, AArch64::ST2Twov4s);
6010	return;
6011	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6012	SelectStore(Node, `2`, AArch64::ST2Twov2d);
6013	return;
6014	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6015	SelectStore(Node, `2`, AArch64::ST1Twov1d);
6016	return;
6017	}
6018	break;
6019	}
6020	case Intrinsic::aarch64_neon_st3: {
6021	if (VT == MVT::v8i8) {
6022	SelectStore(Node, `3`, AArch64::ST3Threev8b);
6023	return;
6024	} else if (VT == MVT::v16i8) {
6025	SelectStore(Node, `3`, AArch64::ST3Threev16b);
6026	return;
6027	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6028	VT == MVT::v4bf16) {
6029	SelectStore(Node, `3`, AArch64::ST3Threev4h);
6030	return;
6031	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\|
6032	VT == MVT::v8bf16) {
6033	SelectStore(Node, `3`, AArch64::ST3Threev8h);
6034	return;
6035	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6036	SelectStore(Node, `3`, AArch64::ST3Threev2s);
6037	return;
6038	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6039	SelectStore(Node, `3`, AArch64::ST3Threev4s);
6040	return;
6041	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6042	SelectStore(Node, `3`, AArch64::ST3Threev2d);
6043	return;
6044	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6045	SelectStore(Node, `3`, AArch64::ST1Threev1d);
6046	return;
6047	}
6048	break;
6049	}
6050	case Intrinsic::aarch64_neon_st4: {
6051	if (VT == MVT::v8i8) {
6052	SelectStore(Node, `4`, AArch64::ST4Fourv8b);
6053	return;
6054	} else if (VT == MVT::v16i8) {
6055	SelectStore(Node, `4`, AArch64::ST4Fourv16b);
6056	return;
6057	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6058	VT == MVT::v4bf16) {
6059	SelectStore(Node, `4`, AArch64::ST4Fourv4h);
6060	return;
6061	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\|
6062	VT == MVT::v8bf16) {
6063	SelectStore(Node, `4`, AArch64::ST4Fourv8h);
6064	return;
6065	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6066	SelectStore(Node, `4`, AArch64::ST4Fourv2s);
6067	return;
6068	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6069	SelectStore(Node, `4`, AArch64::ST4Fourv4s);
6070	return;
6071	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6072	SelectStore(Node, `4`, AArch64::ST4Fourv2d);
6073	return;
6074	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6075	SelectStore(Node, `4`, AArch64::ST1Fourv1d);
6076	return;
6077	}
6078	break;
6079	}
6080	case Intrinsic::aarch64_neon_st2lane: {
6081	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
6082	SelectStoreLane(Node, `2`, AArch64::ST2i8);
6083	return;
6084	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6085	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
6086	SelectStoreLane(Node, `2`, AArch64::ST2i16);
6087	return;
6088	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
6089	VT == MVT::v2f32) {
6090	SelectStoreLane(Node, `2`, AArch64::ST2i32);
6091	return;
6092	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
6093	VT == MVT::v1f64) {
6094	SelectStoreLane(Node, `2`, AArch64::ST2i64);
6095	return;
6096	}
6097	break;
6098	}
6099	case Intrinsic::aarch64_neon_st3lane: {
6100	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
6101	SelectStoreLane(Node, `3`, AArch64::ST3i8);
6102	return;
6103	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6104	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
6105	SelectStoreLane(Node, `3`, AArch64::ST3i16);
6106	return;
6107	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
6108	VT == MVT::v2f32) {
6109	SelectStoreLane(Node, `3`, AArch64::ST3i32);
6110	return;
6111	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
6112	VT == MVT::v1f64) {
6113	SelectStoreLane(Node, `3`, AArch64::ST3i64);
6114	return;
6115	}
6116	break;
6117	}
6118	case Intrinsic::aarch64_neon_st4lane: {
6119	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
6120	SelectStoreLane(Node, `4`, AArch64::ST4i8);
6121	return;
6122	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6123	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
6124	SelectStoreLane(Node, `4`, AArch64::ST4i16);
6125	return;
6126	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
6127	VT == MVT::v2f32) {
6128	SelectStoreLane(Node, `4`, AArch64::ST4i32);
6129	return;
6130	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
6131	VT == MVT::v1f64) {
6132	SelectStoreLane(Node, `4`, AArch64::ST4i64);
6133	return;
6134	}
6135	break;
6136	}
6137	case Intrinsic::aarch64_sve_st2q: {
6138	SelectPredicatedStore(Node, `2`, `4`, AArch64::ST2Q, AArch64::ST2Q_IMM);
6139	return;
6140	}
6141	case Intrinsic::aarch64_sve_st3q: {
6142	SelectPredicatedStore(Node, `3`, `4`, AArch64::ST3Q, AArch64::ST3Q_IMM);
6143	return;
6144	}
6145	case Intrinsic::aarch64_sve_st4q: {
6146	SelectPredicatedStore(Node, `4`, `4`, AArch64::ST4Q, AArch64::ST4Q_IMM);
6147	return;
6148	}
6149	case Intrinsic::aarch64_sve_st2: {
6150	if (VT == MVT::nxv16i8) {
6151	SelectPredicatedStore(Node, `2`, `0`, AArch64::ST2B, AArch64::ST2B_IMM);
6152	return;
6153	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
6154	VT == MVT::nxv8bf16) {
6155	SelectPredicatedStore(Node, `2`, `1`, AArch64::ST2H, AArch64::ST2H_IMM);
6156	return;
6157	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
6158	SelectPredicatedStore(Node, `2`, `2`, AArch64::ST2W, AArch64::ST2W_IMM);
6159	return;
6160	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
6161	SelectPredicatedStore(Node, `2`, `3`, AArch64::ST2D, AArch64::ST2D_IMM);
6162	return;
6163	}
6164	break;
6165	}
6166	case Intrinsic::aarch64_sve_st3: {
6167	if (VT == MVT::nxv16i8) {
6168	SelectPredicatedStore(Node, `3`, `0`, AArch64::ST3B, AArch64::ST3B_IMM);
6169	return;
6170	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
6171	VT == MVT::nxv8bf16) {
6172	SelectPredicatedStore(Node, `3`, `1`, AArch64::ST3H, AArch64::ST3H_IMM);
6173	return;
6174	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
6175	SelectPredicatedStore(Node, `3`, `2`, AArch64::ST3W, AArch64::ST3W_IMM);
6176	return;
6177	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
6178	SelectPredicatedStore(Node, `3`, `3`, AArch64::ST3D, AArch64::ST3D_IMM);
6179	return;
6180	}
6181	break;
6182	}
6183	case Intrinsic::aarch64_sve_st4: {
6184	if (VT == MVT::nxv16i8) {
6185	SelectPredicatedStore(Node, `4`, `0`, AArch64::ST4B, AArch64::ST4B_IMM);
6186	return;
6187	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
6188	VT == MVT::nxv8bf16) {
6189	SelectPredicatedStore(Node, `4`, `1`, AArch64::ST4H, AArch64::ST4H_IMM);
6190	return;
6191	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
6192	SelectPredicatedStore(Node, `4`, `2`, AArch64::ST4W, AArch64::ST4W_IMM);
6193	return;
6194	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
6195	SelectPredicatedStore(Node, `4`, `3`, AArch64::ST4D, AArch64::ST4D_IMM);
6196	return;
6197	}
6198	break;
6199	}
6200	}
6201	break;
6202	}
6203	case AArch64ISD::LD2post: {
6204	if (VT == MVT::v8i8) {
6205	SelectPostLoad(Node, `2`, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6206	return;
6207	} else if (VT == MVT::v16i8) {
6208	SelectPostLoad(Node, `2`, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6209	return;
6210	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6211	SelectPostLoad(Node, `2`, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6212	return;
6213	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6214	SelectPostLoad(Node, `2`, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6215	return;
6216	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6217	SelectPostLoad(Node, `2`, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6218	return;
6219	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6220	SelectPostLoad(Node, `2`, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6221	return;
6222	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6223	SelectPostLoad(Node, `2`, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6224	return;
6225	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6226	SelectPostLoad(Node, `2`, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6227	return;
6228	}
6229	break;
6230	}
6231	case AArch64ISD::LD3post: {
6232	if (VT == MVT::v8i8) {
6233	SelectPostLoad(Node, `3`, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6234	return;
6235	} else if (VT == MVT::v16i8) {
6236	SelectPostLoad(Node, `3`, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6237	return;
6238	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6239	SelectPostLoad(Node, `3`, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6240	return;
6241	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6242	SelectPostLoad(Node, `3`, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6243	return;
6244	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6245	SelectPostLoad(Node, `3`, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6246	return;
6247	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6248	SelectPostLoad(Node, `3`, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6249	return;
6250	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6251	SelectPostLoad(Node, `3`, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6252	return;
6253	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6254	SelectPostLoad(Node, `3`, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6255	return;
6256	}
6257	break;
6258	}
6259	case AArch64ISD::LD4post: {
6260	if (VT == MVT::v8i8) {
6261	SelectPostLoad(Node, `4`, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6262	return;
6263	} else if (VT == MVT::v16i8) {
6264	SelectPostLoad(Node, `4`, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6265	return;
6266	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6267	SelectPostLoad(Node, `4`, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6268	return;
6269	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6270	SelectPostLoad(Node, `4`, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6271	return;
6272	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6273	SelectPostLoad(Node, `4`, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6274	return;
6275	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6276	SelectPostLoad(Node, `4`, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6277	return;
6278	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6279	SelectPostLoad(Node, `4`, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6280	return;
6281	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6282	SelectPostLoad(Node, `4`, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6283	return;
6284	}
6285	break;
6286	}
6287	case AArch64ISD::LD1x2post: {
6288	if (VT == MVT::v8i8) {
6289	SelectPostLoad(Node, `2`, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6290	return;
6291	} else if (VT == MVT::v16i8) {
6292	SelectPostLoad(Node, `2`, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6293	return;
6294	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6295	SelectPostLoad(Node, `2`, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6296	return;
6297	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6298	SelectPostLoad(Node, `2`, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6299	return;
6300	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6301	SelectPostLoad(Node, `2`, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6302	return;
6303	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6304	SelectPostLoad(Node, `2`, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6305	return;
6306	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6307	SelectPostLoad(Node, `2`, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6308	return;
6309	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6310	SelectPostLoad(Node, `2`, AArch64::LD1Twov2d_POST, AArch64::qsub0);
6311	return;
6312	}
6313	break;
6314	}
6315	case AArch64ISD::LD1x3post: {
6316	if (VT == MVT::v8i8) {
6317	SelectPostLoad(Node, `3`, AArch64::LD1Threev8b_POST, AArch64::dsub0);
6318	return;
6319	} else if (VT == MVT::v16i8) {
6320	SelectPostLoad(Node, `3`, AArch64::LD1Threev16b_POST, AArch64::qsub0);
6321	return;
6322	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6323	SelectPostLoad(Node, `3`, AArch64::LD1Threev4h_POST, AArch64::dsub0);
6324	return;
6325	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6326	SelectPostLoad(Node, `3`, AArch64::LD1Threev8h_POST, AArch64::qsub0);
6327	return;
6328	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6329	SelectPostLoad(Node, `3`, AArch64::LD1Threev2s_POST, AArch64::dsub0);
6330	return;
6331	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6332	SelectPostLoad(Node, `3`, AArch64::LD1Threev4s_POST, AArch64::qsub0);
6333	return;
6334	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6335	SelectPostLoad(Node, `3`, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6336	return;
6337	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6338	SelectPostLoad(Node, `3`, AArch64::LD1Threev2d_POST, AArch64::qsub0);
6339	return;
6340	}
6341	break;
6342	}
6343	case AArch64ISD::LD1x4post: {
6344	if (VT == MVT::v8i8) {
6345	SelectPostLoad(Node, `4`, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
6346	return;
6347	} else if (VT == MVT::v16i8) {
6348	SelectPostLoad(Node, `4`, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
6349	return;
6350	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6351	SelectPostLoad(Node, `4`, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
6352	return;
6353	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6354	SelectPostLoad(Node, `4`, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
6355	return;
6356	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6357	SelectPostLoad(Node, `4`, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
6358	return;
6359	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6360	SelectPostLoad(Node, `4`, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
6361	return;
6362	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6363	SelectPostLoad(Node, `4`, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6364	return;
6365	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6366	SelectPostLoad(Node, `4`, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
6367	return;
6368	}
6369	break;
6370	}
6371	case AArch64ISD::LD1DUPpost: {
6372	if (VT == MVT::v8i8) {
6373	SelectPostLoad(Node, `1`, AArch64::LD1Rv8b_POST, AArch64::dsub0);
6374	return;
6375	} else if (VT == MVT::v16i8) {
6376	SelectPostLoad(Node, `1`, AArch64::LD1Rv16b_POST, AArch64::qsub0);
6377	return;
6378	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6379	SelectPostLoad(Node, `1`, AArch64::LD1Rv4h_POST, AArch64::dsub0);
6380	return;
6381	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6382	SelectPostLoad(Node, `1`, AArch64::LD1Rv8h_POST, AArch64::qsub0);
6383	return;
6384	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6385	SelectPostLoad(Node, `1`, AArch64::LD1Rv2s_POST, AArch64::dsub0);
6386	return;
6387	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6388	SelectPostLoad(Node, `1`, AArch64::LD1Rv4s_POST, AArch64::qsub0);
6389	return;
6390	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6391	SelectPostLoad(Node, `1`, AArch64::LD1Rv1d_POST, AArch64::dsub0);
6392	return;
6393	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6394	SelectPostLoad(Node, `1`, AArch64::LD1Rv2d_POST, AArch64::qsub0);
6395	return;
6396	}
6397	break;
6398	}
6399	case AArch64ISD::LD2DUPpost: {
6400	if (VT == MVT::v8i8) {
6401	SelectPostLoad(Node, `2`, AArch64::LD2Rv8b_POST, AArch64::dsub0);
6402	return;
6403	} else if (VT == MVT::v16i8) {
6404	SelectPostLoad(Node, `2`, AArch64::LD2Rv16b_POST, AArch64::qsub0);
6405	return;
6406	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6407	SelectPostLoad(Node, `2`, AArch64::LD2Rv4h_POST, AArch64::dsub0);
6408	return;
6409	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6410	SelectPostLoad(Node, `2`, AArch64::LD2Rv8h_POST, AArch64::qsub0);
6411	return;
6412	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6413	SelectPostLoad(Node, `2`, AArch64::LD2Rv2s_POST, AArch64::dsub0);
6414	return;
6415	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6416	SelectPostLoad(Node, `2`, AArch64::LD2Rv4s_POST, AArch64::qsub0);
6417	return;
6418	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6419	SelectPostLoad(Node, `2`, AArch64::LD2Rv1d_POST, AArch64::dsub0);
6420	return;
6421	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6422	SelectPostLoad(Node, `2`, AArch64::LD2Rv2d_POST, AArch64::qsub0);
6423	return;
6424	}
6425	break;
6426	}
6427	case AArch64ISD::LD3DUPpost: {
6428	if (VT == MVT::v8i8) {
6429	SelectPostLoad(Node, `3`, AArch64::LD3Rv8b_POST, AArch64::dsub0);
6430	return;
6431	} else if (VT == MVT::v16i8) {
6432	SelectPostLoad(Node, `3`, AArch64::LD3Rv16b_POST, AArch64::qsub0);
6433	return;
6434	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6435	SelectPostLoad(Node, `3`, AArch64::LD3Rv4h_POST, AArch64::dsub0);
6436	return;
6437	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6438	SelectPostLoad(Node, `3`, AArch64::LD3Rv8h_POST, AArch64::qsub0);
6439	return;
6440	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6441	SelectPostLoad(Node, `3`, AArch64::LD3Rv2s_POST, AArch64::dsub0);
6442	return;
6443	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6444	SelectPostLoad(Node, `3`, AArch64::LD3Rv4s_POST, AArch64::qsub0);
6445	return;
6446	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6447	SelectPostLoad(Node, `3`, AArch64::LD3Rv1d_POST, AArch64::dsub0);
6448	return;
6449	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6450	SelectPostLoad(Node, `3`, AArch64::LD3Rv2d_POST, AArch64::qsub0);
6451	return;
6452	}
6453	break;
6454	}
6455	case AArch64ISD::LD4DUPpost: {
6456	if (VT == MVT::v8i8) {
6457	SelectPostLoad(Node, `4`, AArch64::LD4Rv8b_POST, AArch64::dsub0);
6458	return;
6459	} else if (VT == MVT::v16i8) {
6460	SelectPostLoad(Node, `4`, AArch64::LD4Rv16b_POST, AArch64::qsub0);
6461	return;
6462	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6463	SelectPostLoad(Node, `4`, AArch64::LD4Rv4h_POST, AArch64::dsub0);
6464	return;
6465	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6466	SelectPostLoad(Node, `4`, AArch64::LD4Rv8h_POST, AArch64::qsub0);
6467	return;
6468	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6469	SelectPostLoad(Node, `4`, AArch64::LD4Rv2s_POST, AArch64::dsub0);
6470	return;
6471	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6472	SelectPostLoad(Node, `4`, AArch64::LD4Rv4s_POST, AArch64::qsub0);
6473	return;
6474	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6475	SelectPostLoad(Node, `4`, AArch64::LD4Rv1d_POST, AArch64::dsub0);
6476	return;
6477	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6478	SelectPostLoad(Node, `4`, AArch64::LD4Rv2d_POST, AArch64::qsub0);
6479	return;
6480	}
6481	break;
6482	}
6483	case AArch64ISD::LD1LANEpost: {
6484	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
6485	SelectPostLoadLane(Node, `1`, AArch64::LD1i8_POST);
6486	return;
6487	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6488	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
6489	SelectPostLoadLane(Node, `1`, AArch64::LD1i16_POST);
6490	return;
6491	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
6492	VT == MVT::v2f32) {
6493	SelectPostLoadLane(Node, `1`, AArch64::LD1i32_POST);
6494	return;
6495	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
6496	VT == MVT::v1f64) {
6497	SelectPostLoadLane(Node, `1`, AArch64::LD1i64_POST);
6498	return;
6499	}
6500	break;
6501	}
6502	case AArch64ISD::LD2LANEpost: {
6503	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
6504	SelectPostLoadLane(Node, `2`, AArch64::LD2i8_POST);
6505	return;
6506	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6507	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
6508	SelectPostLoadLane(Node, `2`, AArch64::LD2i16_POST);
6509	return;
6510	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
6511	VT == MVT::v2f32) {
6512	SelectPostLoadLane(Node, `2`, AArch64::LD2i32_POST);
6513	return;
6514	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
6515	VT == MVT::v1f64) {
6516	SelectPostLoadLane(Node, `2`, AArch64::LD2i64_POST);
6517	return;
6518	}
6519	break;
6520	}
6521	case AArch64ISD::LD3LANEpost: {
6522	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
6523	SelectPostLoadLane(Node, `3`, AArch64::LD3i8_POST);
6524	return;
6525	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6526	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
6527	SelectPostLoadLane(Node, `3`, AArch64::LD3i16_POST);
6528	return;
6529	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
6530	VT == MVT::v2f32) {
6531	SelectPostLoadLane(Node, `3`, AArch64::LD3i32_POST);
6532	return;
6533	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
6534	VT == MVT::v1f64) {
6535	SelectPostLoadLane(Node, `3`, AArch64::LD3i64_POST);
6536	return;
6537	}
6538	break;
6539	}
6540	case AArch64ISD::LD4LANEpost: {
6541	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
6542	SelectPostLoadLane(Node, `4`, AArch64::LD4i8_POST);
6543	return;
6544	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6545	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
6546	SelectPostLoadLane(Node, `4`, AArch64::LD4i16_POST);
6547	return;
6548	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
6549	VT == MVT::v2f32) {
6550	SelectPostLoadLane(Node, `4`, AArch64::LD4i32_POST);
6551	return;
6552	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
6553	VT == MVT::v1f64) {
6554	SelectPostLoadLane(Node, `4`, AArch64::LD4i64_POST);
6555	return;
6556	}
6557	break;
6558	}
6559	case AArch64ISD::ST2post: {
6560	VT = Node->getOperand(Num: `1`).getValueType();
6561	if (VT == MVT::v8i8) {
6562	SelectPostStore(Node, `2`, AArch64::ST2Twov8b_POST);
6563	return;
6564	} else if (VT == MVT::v16i8) {
6565	SelectPostStore(Node, `2`, AArch64::ST2Twov16b_POST);
6566	return;
6567	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6568	SelectPostStore(Node, `2`, AArch64::ST2Twov4h_POST);
6569	return;
6570	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6571	SelectPostStore(Node, `2`, AArch64::ST2Twov8h_POST);
6572	return;
6573	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6574	SelectPostStore(Node, `2`, AArch64::ST2Twov2s_POST);
6575	return;
6576	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6577	SelectPostStore(Node, `2`, AArch64::ST2Twov4s_POST);
6578	return;
6579	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6580	SelectPostStore(Node, `2`, AArch64::ST2Twov2d_POST);
6581	return;
6582	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6583	SelectPostStore(Node, `2`, AArch64::ST1Twov1d_POST);
6584	return;
6585	}
6586	break;
6587	}
6588	case AArch64ISD::ST3post: {
6589	VT = Node->getOperand(Num: `1`).getValueType();
6590	if (VT == MVT::v8i8) {
6591	SelectPostStore(Node, `3`, AArch64::ST3Threev8b_POST);
6592	return;
6593	} else if (VT == MVT::v16i8) {
6594	SelectPostStore(Node, `3`, AArch64::ST3Threev16b_POST);
6595	return;
6596	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6597	SelectPostStore(Node, `3`, AArch64::ST3Threev4h_POST);
6598	return;
6599	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6600	SelectPostStore(Node, `3`, AArch64::ST3Threev8h_POST);
6601	return;
6602	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6603	SelectPostStore(Node, `3`, AArch64::ST3Threev2s_POST);
6604	return;
6605	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6606	SelectPostStore(Node, `3`, AArch64::ST3Threev4s_POST);
6607	return;
6608	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6609	SelectPostStore(Node, `3`, AArch64::ST3Threev2d_POST);
6610	return;
6611	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6612	SelectPostStore(Node, `3`, AArch64::ST1Threev1d_POST);
6613	return;
6614	}
6615	break;
6616	}
6617	case AArch64ISD::ST4post: {
6618	VT = Node->getOperand(Num: `1`).getValueType();
6619	if (VT == MVT::v8i8) {
6620	SelectPostStore(Node, `4`, AArch64::ST4Fourv8b_POST);
6621	return;
6622	} else if (VT == MVT::v16i8) {
6623	SelectPostStore(Node, `4`, AArch64::ST4Fourv16b_POST);
6624	return;
6625	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6626	SelectPostStore(Node, `4`, AArch64::ST4Fourv4h_POST);
6627	return;
6628	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6629	SelectPostStore(Node, `4`, AArch64::ST4Fourv8h_POST);
6630	return;
6631	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6632	SelectPostStore(Node, `4`, AArch64::ST4Fourv2s_POST);
6633	return;
6634	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6635	SelectPostStore(Node, `4`, AArch64::ST4Fourv4s_POST);
6636	return;
6637	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6638	SelectPostStore(Node, `4`, AArch64::ST4Fourv2d_POST);
6639	return;
6640	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6641	SelectPostStore(Node, `4`, AArch64::ST1Fourv1d_POST);
6642	return;
6643	}
6644	break;
6645	}
6646	case AArch64ISD::ST1x2post: {
6647	VT = Node->getOperand(Num: `1`).getValueType();
6648	if (VT == MVT::v8i8) {
6649	SelectPostStore(Node, `2`, AArch64::ST1Twov8b_POST);
6650	return;
6651	} else if (VT == MVT::v16i8) {
6652	SelectPostStore(Node, `2`, AArch64::ST1Twov16b_POST);
6653	return;
6654	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6655	SelectPostStore(Node, `2`, AArch64::ST1Twov4h_POST);
6656	return;
6657	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6658	SelectPostStore(Node, `2`, AArch64::ST1Twov8h_POST);
6659	return;
6660	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6661	SelectPostStore(Node, `2`, AArch64::ST1Twov2s_POST);
6662	return;
6663	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6664	SelectPostStore(Node, `2`, AArch64::ST1Twov4s_POST);
6665	return;
6666	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6667	SelectPostStore(Node, `2`, AArch64::ST1Twov1d_POST);
6668	return;
6669	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6670	SelectPostStore(Node, `2`, AArch64::ST1Twov2d_POST);
6671	return;
6672	}
6673	break;
6674	}
6675	case AArch64ISD::ST1x3post: {
6676	VT = Node->getOperand(Num: `1`).getValueType();
6677	if (VT == MVT::v8i8) {
6678	SelectPostStore(Node, `3`, AArch64::ST1Threev8b_POST);
6679	return;
6680	} else if (VT == MVT::v16i8) {
6681	SelectPostStore(Node, `3`, AArch64::ST1Threev16b_POST);
6682	return;
6683	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6684	SelectPostStore(Node, `3`, AArch64::ST1Threev4h_POST);
6685	return;
6686	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16 ) {
6687	SelectPostStore(Node, `3`, AArch64::ST1Threev8h_POST);
6688	return;
6689	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6690	SelectPostStore(Node, `3`, AArch64::ST1Threev2s_POST);
6691	return;
6692	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6693	SelectPostStore(Node, `3`, AArch64::ST1Threev4s_POST);
6694	return;
6695	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6696	SelectPostStore(Node, `3`, AArch64::ST1Threev1d_POST);
6697	return;
6698	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6699	SelectPostStore(Node, `3`, AArch64::ST1Threev2d_POST);
6700	return;
6701	}
6702	break;
6703	}
6704	case AArch64ISD::ST1x4post: {
6705	VT = Node->getOperand(Num: `1`).getValueType();
6706	if (VT == MVT::v8i8) {
6707	SelectPostStore(Node, `4`, AArch64::ST1Fourv8b_POST);
6708	return;
6709	} else if (VT == MVT::v16i8) {
6710	SelectPostStore(Node, `4`, AArch64::ST1Fourv16b_POST);
6711	return;
6712	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6713	SelectPostStore(Node, `4`, AArch64::ST1Fourv4h_POST);
6714	return;
6715	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6716	SelectPostStore(Node, `4`, AArch64::ST1Fourv8h_POST);
6717	return;
6718	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6719	SelectPostStore(Node, `4`, AArch64::ST1Fourv2s_POST);
6720	return;
6721	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6722	SelectPostStore(Node, `4`, AArch64::ST1Fourv4s_POST);
6723	return;
6724	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6725	SelectPostStore(Node, `4`, AArch64::ST1Fourv1d_POST);
6726	return;
6727	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6728	SelectPostStore(Node, `4`, AArch64::ST1Fourv2d_POST);
6729	return;
6730	}
6731	break;
6732	}
6733	case AArch64ISD::ST2LANEpost: {
6734	VT = Node->getOperand(Num: `1`).getValueType();
6735	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
6736	SelectPostStoreLane(Node, `2`, AArch64::ST2i8_POST);
6737	return;
6738	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6739	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
6740	SelectPostStoreLane(Node, `2`, AArch64::ST2i16_POST);
6741	return;
6742	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
6743	VT == MVT::v2f32) {
6744	SelectPostStoreLane(Node, `2`, AArch64::ST2i32_POST);
6745	return;
6746	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
6747	VT == MVT::v1f64) {
6748	SelectPostStoreLane(Node, `2`, AArch64::ST2i64_POST);
6749	return;
6750	}
6751	break;
6752	}
6753	case AArch64ISD::ST3LANEpost: {
6754	VT = Node->getOperand(Num: `1`).getValueType();
6755	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
6756	SelectPostStoreLane(Node, `3`, AArch64::ST3i8_POST);
6757	return;
6758	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6759	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
6760	SelectPostStoreLane(Node, `3`, AArch64::ST3i16_POST);
6761	return;
6762	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
6763	VT == MVT::v2f32) {
6764	SelectPostStoreLane(Node, `3`, AArch64::ST3i32_POST);
6765	return;
6766	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
6767	VT == MVT::v1f64) {
6768	SelectPostStoreLane(Node, `3`, AArch64::ST3i64_POST);
6769	return;
6770	}
6771	break;
6772	}
6773	case AArch64ISD::ST4LANEpost: {
6774	VT = Node->getOperand(Num: `1`).getValueType();
6775	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
6776	SelectPostStoreLane(Node, `4`, AArch64::ST4i8_POST);
6777	return;
6778	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6779	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
6780	SelectPostStoreLane(Node, `4`, AArch64::ST4i16_POST);
6781	return;
6782	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
6783	VT == MVT::v2f32) {
6784	SelectPostStoreLane(Node, `4`, AArch64::ST4i32_POST);
6785	return;
6786	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
6787	VT == MVT::v1f64) {
6788	SelectPostStoreLane(Node, `4`, AArch64::ST4i64_POST);
6789	return;
6790	}
6791	break;
6792	}
6793	case AArch64ISD::SVE_LD2_MERGE_ZERO: {
6794	if (VT == MVT::nxv16i8) {
6795	SelectPredicatedLoad(Node, `2`, `0`, AArch64::LD2B_IMM, AArch64::LD2B);
6796	return;
6797	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
6798	VT == MVT::nxv8bf16) {
6799	SelectPredicatedLoad(Node, `2`, `1`, AArch64::LD2H_IMM, AArch64::LD2H);
6800	return;
6801	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
6802	SelectPredicatedLoad(Node, `2`, `2`, AArch64::LD2W_IMM, AArch64::LD2W);
6803	return;
6804	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
6805	SelectPredicatedLoad(Node, `2`, `3`, AArch64::LD2D_IMM, AArch64::LD2D);
6806	return;
6807	}
6808	break;
6809	}
6810	case AArch64ISD::SVE_LD3_MERGE_ZERO: {
6811	if (VT == MVT::nxv16i8) {
6812	SelectPredicatedLoad(Node, `3`, `0`, AArch64::LD3B_IMM, AArch64::LD3B);
6813	return;
6814	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
6815	VT == MVT::nxv8bf16) {
6816	SelectPredicatedLoad(Node, `3`, `1`, AArch64::LD3H_IMM, AArch64::LD3H);
6817	return;
6818	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
6819	SelectPredicatedLoad(Node, `3`, `2`, AArch64::LD3W_IMM, AArch64::LD3W);
6820	return;
6821	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
6822	SelectPredicatedLoad(Node, `3`, `3`, AArch64::LD3D_IMM, AArch64::LD3D);
6823	return;
6824	}
6825	break;
6826	}
6827	case AArch64ISD::SVE_LD4_MERGE_ZERO: {
6828	if (VT == MVT::nxv16i8) {
6829	SelectPredicatedLoad(Node, `4`, `0`, AArch64::LD4B_IMM, AArch64::LD4B);
6830	return;
6831	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
6832	VT == MVT::nxv8bf16) {
6833	SelectPredicatedLoad(Node, `4`, `1`, AArch64::LD4H_IMM, AArch64::LD4H);
6834	return;
6835	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
6836	SelectPredicatedLoad(Node, `4`, `2`, AArch64::LD4W_IMM, AArch64::LD4W);
6837	return;
6838	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
6839	SelectPredicatedLoad(Node, `4`, `3`, AArch64::LD4D_IMM, AArch64::LD4D);
6840	return;
6841	}
6842	break;
6843	}
6844	}
6845
6846	// Select the default instruction
6847	SelectCode(Node);
6848	}
6849
6850	/// createAArch64ISelDag - This pass converts a legalized DAG into a
6851	/// AArch64-specific DAG, ready for instruction scheduling.
6852	FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
6853	CodeGenOptLevel OptLevel) {
6854	return new AArch64DAGToDAGISel (TM, OptLevel);
6855	}
6856
6857	/// When \p PredVT is a scalable vector predicate in the form
6858	/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
6859	/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
6860	/// structured vectors (NumVec >1), the output data type is
6861	/// MVT::nx<MNumVec>xi<bits> s.t. M x bits = 128. If the input*
6862	/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
6863	/// EVT.
6864	static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT,
6865	unsigned NumVec) {
6866	assert(NumVec > `0` && NumVec < `5` && "Invalid number of vectors.");
6867	if (!PredVT.isScalableVector() \|\| PredVT.getVectorElementType() != MVT::i1)
6868	return EVT ();
6869
6870	if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
6871	PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
6872	return EVT ();
6873
6874	ElementCount EC = PredVT.getVectorElementCount();
6875	EVT ScalarVT =
6876	EVT::getIntegerVT(Context&: Ctx, BitWidth: AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
6877	EVT MemVT = EVT::getVectorVT(Context&: Ctx, VT: ScalarVT, EC: EC * NumVec);
6878
6879	return MemVT;
6880	}
6881
6882	/// Return the EVT of the data associated to a memory operation in \p
6883	/// Root. If such EVT cannot be retrived, it returns an invalid EVT.
6884	static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
6885	if (isa<MemSDNode>(Val: Root))
6886	return cast<MemSDNode>(Val: Root)->getMemoryVT();
6887
6888	if (isa<MemIntrinsicSDNode>(Val: Root))
6889	return cast<MemIntrinsicSDNode>(Val: Root)->getMemoryVT();
6890
6891	const unsigned Opcode = Root->getOpcode();
6892	// For custom ISD nodes, we have to look at them individually to extract the
6893	// type of the data moved to/from memory.
6894	switch (Opcode) {
6895	case AArch64ISD::LD1_MERGE_ZERO:
6896	case AArch64ISD::LD1S_MERGE_ZERO:
6897	case AArch64ISD::LDNF1_MERGE_ZERO:
6898	case AArch64ISD::LDNF1S_MERGE_ZERO:
6899	return cast<VTSDNode>(Val: Root->getOperand(Num: `3`))->getVT();
6900	case AArch64ISD::ST1_PRED:
6901	return cast<VTSDNode>(Val: Root->getOperand(Num: `4`))->getVT();
6902	case AArch64ISD::SVE_LD2_MERGE_ZERO:
6903	return getPackedVectorTypeFromPredicateType(
6904	Ctx, PredVT: Root->getOperand(Num: `1`)->getValueType(ResNo: `0`), /NumVec=/`2`);
6905	case AArch64ISD::SVE_LD3_MERGE_ZERO:
6906	return getPackedVectorTypeFromPredicateType(
6907	Ctx, PredVT: Root->getOperand(Num: `1`)->getValueType(ResNo: `0`), /NumVec=/`3`);
6908	case AArch64ISD::SVE_LD4_MERGE_ZERO:
6909	return getPackedVectorTypeFromPredicateType(
6910	Ctx, PredVT: Root->getOperand(Num: `1`)->getValueType(ResNo: `0`), /NumVec=/`4`);
6911	default:
6912	break;
6913	}
6914
6915	if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
6916	return EVT ();
6917
6918	switch (Root->getConstantOperandVal(Num: `1`)) {
6919	default:
6920	return EVT ();
6921	case Intrinsic::aarch64_sme_ldr:
6922	case Intrinsic::aarch64_sme_str:
6923	return MVT::nxv16i8;
6924	case Intrinsic::aarch64_sve_prf:
6925	// We are using an SVE prefetch intrinsic. Type must be inferred from the
6926	// width of the predicate.
6927	return getPackedVectorTypeFromPredicateType(
6928	Ctx, PredVT: Root->getOperand(Num: `2`)->getValueType(ResNo: `0`), /NumVec=/`1`);
6929	case Intrinsic::aarch64_sve_ld2_sret:
6930	case Intrinsic::aarch64_sve_ld2q_sret:
6931	return getPackedVectorTypeFromPredicateType(
6932	Ctx, PredVT: Root->getOperand(Num: `2`)->getValueType(ResNo: `0`), /NumVec=/`2`);
6933	case Intrinsic::aarch64_sve_st2q:
6934	return getPackedVectorTypeFromPredicateType(
6935	Ctx, PredVT: Root->getOperand(Num: `4`)->getValueType(ResNo: `0`), /NumVec=/`2`);
6936	case Intrinsic::aarch64_sve_ld3_sret:
6937	case Intrinsic::aarch64_sve_ld3q_sret:
6938	return getPackedVectorTypeFromPredicateType(
6939	Ctx, PredVT: Root->getOperand(Num: `2`)->getValueType(ResNo: `0`), /NumVec=/`3`);
6940	case Intrinsic::aarch64_sve_st3q:
6941	return getPackedVectorTypeFromPredicateType(
6942	Ctx, PredVT: Root->getOperand(Num: `5`)->getValueType(ResNo: `0`), /NumVec=/`3`);
6943	case Intrinsic::aarch64_sve_ld4_sret:
6944	case Intrinsic::aarch64_sve_ld4q_sret:
6945	return getPackedVectorTypeFromPredicateType(
6946	Ctx, PredVT: Root->getOperand(Num: `2`)->getValueType(ResNo: `0`), /NumVec=/`4`);
6947	case Intrinsic::aarch64_sve_st4q:
6948	return getPackedVectorTypeFromPredicateType(
6949	Ctx, PredVT: Root->getOperand(Num: `6`)->getValueType(ResNo: `0`), /NumVec=/`4`);
6950	case Intrinsic::aarch64_sve_ld1udq:
6951	case Intrinsic::aarch64_sve_st1dq:
6952	return EVT(MVT::nxv1i64);
6953	case Intrinsic::aarch64_sve_ld1uwq:
6954	case Intrinsic::aarch64_sve_st1wq:
6955	return EVT(MVT::nxv1i32);
6956	}
6957	}
6958
6959	/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
6960	/// Base + OffImm sizeof(MemVT) for Min >= OffImm <= Max*
6961	/// where Root is the memory access using N for its address.
6962	template <int64_t Min, int64_t Max>
6963	bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
6964	SDValue &Base,
6965	SDValue &OffImm) {
6966	const EVT MemVT = getMemVTFromNode(Ctx&: *(CurDAG->getContext()), Root);
6967	const DataLayout &DL = CurDAG->getDataLayout();
6968	const MachineFrameInfo &MFI = MF->getFrameInfo();
6969
6970	if (N.getOpcode() == ISD::FrameIndex) {
6971	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
6972	// We can only encode VL scaled offsets, so only fold in frame indexes
6973	// referencing SVE objects.
6974	if (MFI.getStackID(ObjectIdx: FI) == TargetStackID::ScalableVector) {
6975	Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
6976	OffImm = CurDAG->getTargetConstant(`0`, SDLoc(N), MVT::i64);
6977	return true;
6978	}
6979
6980	return false;
6981	}
6982
6983	if (MemVT == EVT ())
6984	return false;
6985
6986	if (N.getOpcode() != ISD::ADD)
6987	return false;
6988
6989	SDValue VScale = N.getOperand(i: `1`);
6990	if (VScale.getOpcode() != ISD::VSCALE)
6991	return false;
6992
6993	TypeSize TS = MemVT.getSizeInBits();
6994	int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / `8`;
6995	int64_t MulImm = cast<ConstantSDNode>(Val: VScale.getOperand(i: `0`))->getSExtValue();
6996
6997	if ((MulImm % MemWidthBytes) != `0`)
6998	return false;
6999
7000	int64_t Offset = MulImm / MemWidthBytes;
7001	if (Offset < Min \|\| Offset > Max)
7002	return false;
7003
7004	Base = N.getOperand(i: `0`);
7005	if (Base.getOpcode() == ISD::FrameIndex) {
7006	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
7007	// We can only encode VL scaled offsets, so only fold in frame indexes
7008	// referencing SVE objects.
7009	if (MFI.getStackID(ObjectIdx: FI) == TargetStackID::ScalableVector)
7010	Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
7011	}
7012
7013	OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7014	return true;
7015	}
7016
7017	/// Select register plus register addressing mode for SVE, with scaled
7018	/// offset.
7019	bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7020	SDValue &Base,
7021	SDValue &Offset) {
7022	if (N.getOpcode() != ISD::ADD)
7023	return false;
7024
7025	// Process an ADD node.
7026	const SDValue LHS = N.getOperand(i: `0`);
7027	const SDValue RHS = N.getOperand(i: `1`);
7028
7029	// 8 bit data does not come with the SHL node, so it is treated
7030	// separately.
7031	if (Scale == `0`) {
7032	Base = LHS;
7033	Offset = RHS;
7034	return true;
7035	}
7036
7037	if (auto C = dyn_cast<ConstantSDNode>(Val: RHS)) {
7038	int64_t ImmOff = C->getSExtValue();
7039	unsigned Size = `1` << Scale;
7040
7041	// To use the reg+reg addressing mode, the immediate must be a multiple of
7042	// the vector element's byte size.
7043	if (ImmOff % Size)
7044	return false;
7045
7046	SDLoc DL(N);
7047	Base = LHS;
7048	Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7049	SDValue Ops[] = {Offset};
7050	SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7051	Offset = SDValue (MI, `0`);
7052	return true;
7053	}
7054
7055	// Check if the RHS is a shift node with a constant.
7056	if (RHS.getOpcode() != ISD::SHL)
7057	return false;
7058
7059	const SDValue ShiftRHS = RHS.getOperand(i: `1`);
7060	if (auto *C = dyn_cast<ConstantSDNode>(Val: ShiftRHS))
7061	if (C->getZExtValue() == Scale) {
7062	Base = LHS;
7063	Offset = RHS.getOperand(i: `0`);
7064	return true;
7065	}
7066
7067	return false;
7068	}
7069
7070	bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7071	const AArch64TargetLowering *TLI =
7072	static_cast<const AArch64TargetLowering *>(getTargetLowering());
7073
7074	return TLI->isAllActivePredicate(DAG&: *CurDAG, N);
7075	}
7076
7077	bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7078	EVT VT = N.getValueType();
7079	return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7080	}
7081
7082	bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7083	SDValue &Base, SDValue &Offset,
7084	unsigned Scale) {
7085	// Try to untangle an ADD node into a 'reg + offset'
7086	if (N.getOpcode() == ISD::ADD)
7087	if (auto C = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
7088	int64_t ImmOff = C->getSExtValue();
7089	if ((ImmOff > `0` && ImmOff <= MaxSize && (ImmOff % Scale == `0`))) {
7090	Base = N.getOperand(i: `0`);
7091	Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7092	return true;
7093	}
7094	}
7095
7096	// By default, just match reg + 0.
7097	Base = N;
7098	Offset = CurDAG->getTargetConstant(`0`, SDLoc(N), MVT::i64);
7099	return true;
7100	}
7101

source code of llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp