ARMISelDAGToDAG.cpp source code [llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp]

1	//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines an instruction selector for the ARM target.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "ARM.h"
14	#include "ARMBaseInstrInfo.h"
15	#include "ARMTargetMachine.h"
16	#include "MCTargetDesc/ARMAddressingModes.h"
17	#include "Utils/ARMBaseInfo.h"
18	#include "llvm/ADT/APSInt.h"
19	#include "llvm/ADT/StringSwitch.h"
20	#include "llvm/CodeGen/MachineFrameInfo.h"
21	#include "llvm/CodeGen/MachineFunction.h"
22	#include "llvm/CodeGen/MachineInstrBuilder.h"
23	#include "llvm/CodeGen/MachineRegisterInfo.h"
24	#include "llvm/CodeGen/SelectionDAG.h"
25	#include "llvm/CodeGen/SelectionDAGISel.h"
26	#include "llvm/CodeGen/TargetLowering.h"
27	#include "llvm/IR/CallingConv.h"
28	#include "llvm/IR/Constants.h"
29	#include "llvm/IR/DerivedTypes.h"
30	#include "llvm/IR/Function.h"
31	#include "llvm/IR/Intrinsics.h"
32	#include "llvm/IR/IntrinsicsARM.h"
33	#include "llvm/IR/LLVMContext.h"
34	#include "llvm/Support/CommandLine.h"
35	#include "llvm/Support/Debug.h"
36	#include "llvm/Support/ErrorHandling.h"
37	#include "llvm/Target/TargetOptions.h"
38	#include <optional>
39
40	using namespace llvm;
41
42	#define DEBUG_TYPE "arm-isel"
43	#define PASS_NAME "ARM Instruction Selection"
44
45	static cl::opt<bool>
46	DisableShifterOp("disable-shifter-op", cl::Hidden,
47	cl::desc ("Disable isel of shifter-op"),
48	cl::init(Val: false));
49
50	//===--------------------------------------------------------------------===//
51	/// ARMDAGToDAGISel - ARM specific code to select ARM machine
52	/// instructions for SelectionDAG operations.
53	///
54	namespace {
55
56	class ARMDAGToDAGISel : public SelectionDAGISel {
57	/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
58	/// make the right decision when generating code for different targets.
59	const ARMSubtarget *Subtarget;
60
61	public:
62	static char ID;
63
64	ARMDAGToDAGISel() = delete;
65
66	explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
67	: SelectionDAGISel (ID, tm, OptLevel) {}
68
69	bool runOnMachineFunction(MachineFunction &MF) override {
70	// Reset the subtarget each time through.
71	Subtarget = &MF.getSubtarget<ARMSubtarget>();
72	SelectionDAGISel::runOnMachineFunction(MF);
73	return true;
74	}
75
76	void PreprocessISelDAG() override;
77
78	/// getI32Imm - Return a target constant of type i32 with the specified
79	/// value.
80	inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
81	return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
82	}
83
84	void Select(SDNode *N) override;
85
86	/// Return true as some complex patterns, like those that call
87	/// canExtractShiftFromMul can modify the DAG inplace.
88	bool ComplexPatternFuncMutatesDAG() const override { return true; }
89
90	bool hasNoVMLxHazardUse(SDNode N) const*;
91	bool isShifterOpProfitable(const SDValue &Shift,
92	ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
93	bool SelectRegShifterOperand(SDValue N, SDValue &A,
94	SDValue &B, SDValue &C,
95	bool CheckProfitability = true);
96	bool SelectImmShifterOperand(SDValue N, SDValue &A,
97	SDValue &B, bool CheckProfitability = true);
98	bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B,
99	SDValue &C) {
100	// Don't apply the profitability check
101	return SelectRegShifterOperand(N, A, B, C, CheckProfitability: false);
102	}
103	bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) {
104	// Don't apply the profitability check
105	return SelectImmShifterOperand(N, A, B, CheckProfitability: false);
106	}
107	bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) {
108	if (!N.hasOneUse())
109	return false;
110	return SelectImmShifterOperand(N, A, B, CheckProfitability: false);
111	}
112
113	bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
114
115	bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
116	bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
117
118	bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
119	const ConstantSDNode *CN = cast<ConstantSDNode>(N);
120	Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
121	Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
122	return true;
123	}
124
125	bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
126	SDValue &Offset, SDValue &Opc);
127	bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
128	SDValue &Offset, SDValue &Opc);
129	bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
130	SDValue &Offset, SDValue &Opc);
131	bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
132	bool SelectAddrMode3(SDValue N, SDValue &Base,
133	SDValue &Offset, SDValue &Opc);
134	bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
135	SDValue &Offset, SDValue &Opc);
136	bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
137	bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
138	bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
139	bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
140	bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
141
142	bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
143
144	// Thumb Addressing Modes:
145	bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
146	bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
147	bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
148	SDValue &OffImm);
149	bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
150	SDValue &OffImm);
151	bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
152	SDValue &OffImm);
153	bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
154	SDValue &OffImm);
155	bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
156	template <unsigned Shift>
157	bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
158
159	// Thumb 2 Addressing Modes:
160	bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
161	template <unsigned Shift>
162	bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
163	bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
164	SDValue &OffImm);
165	bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
166	SDValue &OffImm);
167	template <unsigned Shift>
168	bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
169	bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
170	unsigned Shift);
171	template <unsigned Shift>
172	bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
173	bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
174	SDValue &OffReg, SDValue &ShImm);
175	bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
176
177	template<int Min, int Max>
178	bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
179
180	inline bool is_so_imm(unsigned Imm) const {
181	return ARM_AM::getSOImmVal(Arg: Imm) != -`1`;
182	}
183
184	inline bool is_so_imm_not(unsigned Imm) const {
185	return ARM_AM::getSOImmVal(Arg: ~Imm) != -`1`;
186	}
187
188	inline bool is_t2_so_imm(unsigned Imm) const {
189	return ARM_AM::getT2SOImmVal(Arg: Imm) != -`1`;
190	}
191
192	inline bool is_t2_so_imm_not(unsigned Imm) const {
193	return ARM_AM::getT2SOImmVal(Arg: ~Imm) != -`1`;
194	}
195
196	// Include the pieces autogenerated from the target description.
197	#include "ARMGenDAGISel.inc"
198
199	private:
200	void transferMemOperands(SDNode Src, SDNode Dst);
201
202	/// Indexed (pre/post inc/dec) load matching code for ARM.
203	bool tryARMIndexedLoad(SDNode *N);
204	bool tryT1IndexedLoad(SDNode *N);
205	bool tryT2IndexedLoad(SDNode *N);
206	bool tryMVEIndexedLoad(SDNode *N);
207	bool tryFMULFixed(SDNode *N, SDLoc dl);
208	bool tryFP_TO_INT(SDNode *N, SDLoc dl);
209	bool transformFixedFloatingPointConversion(SDNode N, SDNode FMul,
210	bool IsUnsigned,
211	bool FixedToFloat);
212
213	/// SelectVLD - Select NEON load intrinsics. NumVecs should be
214	/// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
215	/// loads of D registers and even subregs and odd subregs of Q registers.
216	/// For NumVecs <= 2, QOpcodes1 is not used.
217	void SelectVLD(SDNode N, bool* isUpdating, unsigned NumVecs,
218	const uint16_t DOpcodes, const* uint16_t *QOpcodes0,
219	const uint16_t *QOpcodes1);
220
221	/// SelectVST - Select NEON store intrinsics. NumVecs should
222	/// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
223	/// stores of D registers and even subregs and odd subregs of Q registers.
224	/// For NumVecs <= 2, QOpcodes1 is not used.
225	void SelectVST(SDNode N, bool* isUpdating, unsigned NumVecs,
226	const uint16_t DOpcodes, const* uint16_t *QOpcodes0,
227	const uint16_t *QOpcodes1);
228
229	/// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
230	/// be 2, 3 or 4. The opcode arrays specify the instructions used for
231	/// load/store of D registers and Q registers.
232	void SelectVLDSTLane(SDNode N, bool* IsLoad, bool isUpdating,
233	unsigned NumVecs, const uint16_t *DOpcodes,
234	const uint16_t *QOpcodes);
235
236	/// Helper functions for setting up clusters of MVE predication operands.
237	template <typename SDValueVector>
238	void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
239	SDValue PredicateMask);
240	template <typename SDValueVector>
241	void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
242	SDValue PredicateMask, SDValue Inactive);
243
244	template <typename SDValueVector>
245	void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
246	template <typename SDValueVector>
247	void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
248
249	/// SelectMVE_WB - Select MVE writeback load/store intrinsics.
250	void SelectMVE_WB(SDNode N, const* uint16_t Opcodes, bool* Predicated);
251
252	/// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
253	void SelectMVE_LongShift(SDNode N, uint16_t Opcode, bool* Immediate,
254	bool HasSaturationOperand);
255
256	/// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
257	void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
258	uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
259
260	/// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
261	/// vector lanes.
262	void SelectMVE_VSHLC(SDNode N, bool* Predicated);
263
264	/// Select long MVE vector reductions with two vector operands
265	/// Stride is the number of vector element widths the instruction can operate
266	/// on:
267	/// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
268	/// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
269	/// Stride is used when addressing the OpcodesS array which contains multiple
270	/// opcodes for each element width.
271	/// TySize is the index into the list of element types listed above
272	void SelectBaseMVE_VMLLDAV(SDNode N, bool* Predicated,
273	const uint16_t OpcodesS, const* uint16_t *OpcodesU,
274	size_t Stride, size_t TySize);
275
276	/// Select a 64-bit MVE vector reduction with two vector operands
277	/// arm_mve_vmlldava_[predicated]
278	void SelectMVE_VMLLDAV(SDNode N, bool* Predicated, const uint16_t *OpcodesS,
279	const uint16_t *OpcodesU);
280	/// Select a 72-bit MVE vector rounding reduction with two vector operands
281	/// int_arm_mve_vrmlldavha[_predicated]
282	void SelectMVE_VRMLLDAVH(SDNode N, bool* Predicated, const uint16_t *OpcodesS,
283	const uint16_t *OpcodesU);
284
285	/// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
286	/// should be 2 or 4. The opcode array specifies the instructions
287	/// used for 8, 16 and 32-bit lane sizes respectively, and each
288	/// pointer points to a set of NumVecs sub-opcodes used for the
289	/// different stages (e.g. VLD20 versus VLD21) of each load family.
290	void SelectMVE_VLD(SDNode N, unsigned* NumVecs,
291	const uint16_t *const Opcodes, bool* HasWriteback);
292
293	/// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
294	/// array of 3 elements for the 8, 16 and 32-bit lane sizes.
295	void SelectMVE_VxDUP(SDNode N, const* uint16_t *Opcodes,
296	bool Wrapping, bool Predicated);
297
298	/// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
299	/// CX1DA, CX2D, CX2DA, CX3, CX3DA).
300	/// \arg \c NumExtraOps number of extra operands besides the coprocossor,
301	/// the accumulator and the immediate operand, i.e. 0
302	/// for CX1, 1 for CX2, 2 for CX3*
303	/// \arg \c HasAccum whether the instruction has an accumulator operand
304	void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
305	bool HasAccum);
306
307	/// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
308	/// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
309	/// for loading D registers.
310	void SelectVLDDup(SDNode N, bool* IsIntrinsic, bool isUpdating,
311	unsigned NumVecs, const uint16_t *DOpcodes,
312	const uint16_t QOpcodes0 = nullptr*,
313	const uint16_t QOpcodes1 = nullptr*);
314
315	/// Try to select SBFX/UBFX instructions for ARM.
316	bool tryV6T2BitfieldExtractOp(SDNode N, bool* isSigned);
317
318	bool tryInsertVectorElt(SDNode *N);
319
320	// Select special operations if node forms integer ABS pattern
321	bool tryABSOp(SDNode *N);
322
323	bool tryReadRegister(SDNode *N);
324	bool tryWriteRegister(SDNode *N);
325
326	bool tryInlineAsm(SDNode *N);
327
328	void SelectCMPZ(SDNode N, bool* &SwitchEQNEToPLMI);
329
330	void SelectCMP_SWAP(SDNode *N);
331
332	/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
333	/// inline asm expressions.
334	bool SelectInlineAsmMemoryOperand(const SDValue &Op,
335	InlineAsm::ConstraintCode ConstraintID,
336	std::vector<SDValue> &OutOps) override;
337
338	// Form pairs of consecutive R, S, D, or Q registers.
339	SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
340	SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
341	SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
342	SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
343
344	// Form sequences of 4 consecutive S, D, or Q registers.
345	SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
346	SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
347	SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
348
349	// Get the alignment operand for a NEON VLD or VST instruction.
350	SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
351	bool is64BitVector);
352
353	/// Checks if N is a multiplication by a constant where we can extract out a
354	/// power of two from the constant so that it can be used in a shift, but only
355	/// if it simplifies the materialization of the constant. Returns true if it
356	/// is, and assigns to PowerOfTwo the power of two that should be extracted
357	/// out and to NewMulConst the new constant to be multiplied by.
358	bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
359	unsigned &PowerOfTwo, SDValue &NewMulConst) const;
360
361	/// Replace N with M in CurDAG, in a way that also ensures that M gets
362	/// selected when N would have been selected.
363	void replaceDAGValue(const SDValue &N, SDValue M);
364	};
365	}
366
367	char ARMDAGToDAGISel::ID = `0`;
368
369	INITIALIZE_PASS(ARMDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
370
371	/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
372	/// operand. If so Imm will receive the 32-bit value.
373	static bool isInt32Immediate(SDNode N, unsigned* &Imm) {
374	if (N->getOpcode() == ISD::Constant && N->getValueType(ResNo: `0`) == MVT::i32) {
375	Imm = N->getAsZExtVal();
376	return true;
377	}
378	return false;
379	}
380
381	// isInt32Immediate - This method tests to see if a constant operand.
382	// If so Imm will receive the 32 bit value.
383	static bool isInt32Immediate(SDValue N, unsigned &Imm) {
384	return isInt32Immediate(N: N.getNode(), Imm);
385	}
386
387	// isOpcWithIntImmediate - This method tests to see if the node is a specific
388	// opcode and that it has a immediate integer right operand.
389	// If so Imm will receive the 32 bit value.
390	static bool isOpcWithIntImmediate(SDNode N, unsigned* Opc, unsigned& Imm) {
391	return N->getOpcode() == Opc &&
392	isInt32Immediate(N: N->getOperand(Num: `1`).getNode(), Imm);
393	}
394
395	/// Check whether a particular node is a constant value representable as
396	/// (N Scale) where (N in [\p RangeMin, \p RangeMax).*
397	///
398	/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
399	static bool isScaledConstantInRange(SDValue Node, int Scale,
400	int RangeMin, int RangeMax,
401	int &ScaledConstant) {
402	assert(Scale > `0` && "Invalid scale!");
403
404	// Check that this is a constant.
405	const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: Node);
406	if (!C)
407	return false;
408
409	ScaledConstant = (int) C->getZExtValue();
410	if ((ScaledConstant % Scale) != `0`)
411	return false;
412
413	ScaledConstant /= Scale;
414	return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
415	}
416
417	void ARMDAGToDAGISel::PreprocessISelDAG() {
418	if (!Subtarget->hasV6T2Ops())
419	return;
420
421	bool isThumb2 = Subtarget->isThumb();
422	// We use make_early_inc_range to avoid invalidation issues.
423	for (SDNode &N : llvm::make_early_inc_range(Range: CurDAG->allnodes())) {
424	if (N.getOpcode() != ISD::ADD)
425	continue;
426
427	// Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
428	// leading zeros, followed by consecutive set bits, followed by 1 or 2
429	// trailing zeros, e.g. 1020.
430	// Transform the expression to
431	// (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
432	// of trailing zeros of c2. The left shift would be folded as an shifter
433	// operand of 'add' and the 'and' and 'srl' would become a bits extraction
434	// node (UBFX).
435
436	SDValue N0 = N.getOperand(Num: `0`);
437	SDValue N1 = N.getOperand(Num: `1`);
438	unsigned And_imm = `0`;
439	if (!isOpcWithIntImmediate(N: N1.getNode(), Opc: ISD::AND, Imm&: And_imm)) {
440	if (isOpcWithIntImmediate(N: N0.getNode(), Opc: ISD::AND, Imm&: And_imm))
441	std::swap(a&: N0, b&: N1);
442	}
443	if (!And_imm)
444	continue;
445
446	// Check if the AND mask is an immediate of the form: 000.....1111111100
447	unsigned TZ = llvm::countr_zero(Val: And_imm);
448	if (TZ != `1` && TZ != `2`)
449	// Be conservative here. Shifter operands aren't always free. e.g. On
450	// Swift, left shifter operand of 1 / 2 for free but others are not.
451	// e.g.
452	// ubfx r3, r1, #16, #8
453	// ldr.w r3, [r0, r3, lsl #2]
454	// vs.
455	// mov.w r9, #1020
456	// and.w r2, r9, r1, lsr #14
457	// ldr r2, [r0, r2]
458	continue;
459	And_imm >>= TZ;
460	if (And_imm & (And_imm + `1`))
461	continue;
462
463	// Look for (and (srl X, c1), c2).
464	SDValue Srl = N1.getOperand(i: `0`);
465	unsigned Srl_imm = `0`;
466	if (!isOpcWithIntImmediate(N: Srl.getNode(), Opc: ISD::SRL, Imm&: Srl_imm) \|\|
467	(Srl_imm <= `2`))
468	continue;
469
470	// Make sure first operand is not a shifter operand which would prevent
471	// folding of the left shift.
472	SDValue CPTmp0;
473	SDValue CPTmp1;
474	SDValue CPTmp2;
475	if (isThumb2) {
476	if (SelectImmShifterOperand(N: N0, A&: CPTmp0, B&: CPTmp1))
477	continue;
478	} else {
479	if (SelectImmShifterOperand(N: N0, A&: CPTmp0, B&: CPTmp1) \|\|
480	SelectRegShifterOperand(N: N0, A&: CPTmp0, B&: CPTmp1, C&: CPTmp2))
481	continue;
482	}
483
484	// Now make the transformation.
485	Srl = CurDAG->getNode(ISD::SRL, SDLoc (Srl), MVT::i32,
486	Srl.getOperand(i: `0`),
487	CurDAG->getConstant(Srl_imm + TZ, SDLoc (Srl),
488	MVT::i32));
489	N1 = CurDAG->getNode(ISD::AND, SDLoc (N1), MVT::i32,
490	Srl,
491	CurDAG->getConstant(And_imm, SDLoc (Srl), MVT::i32));
492	N1 = CurDAG->getNode(ISD::SHL, SDLoc (N1), MVT::i32,
493	N1, CurDAG->getConstant(TZ, SDLoc (Srl), MVT::i32));
494	CurDAG->UpdateNodeOperands(N: &N, Op1: N0, Op2: N1);
495	}
496	}
497
498	/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
499	/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
500	/// least on current ARM implementations) which should be avoidded.
501	bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode N) const* {
502	if (OptLevel == CodeGenOptLevel::None)
503	return true;
504
505	if (!Subtarget->hasVMLxHazards())
506	return true;
507
508	if (!N->hasOneUse())
509	return false;
510
511	SDNode Use = N->use_begin();
512	if (Use->getOpcode() == ISD::CopyToReg)
513	return true;
514	if (Use->isMachineOpcode()) {
515	const ARMBaseInstrInfo TII = static_cast<const* ARMBaseInstrInfo *>(
516	CurDAG->getSubtarget().getInstrInfo());
517
518	const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
519	if (MCID.mayStore())
520	return true;
521	unsigned Opcode = MCID.getOpcode();
522	if (Opcode == ARM::VMOVRS \|\| Opcode == ARM::VMOVRRD)
523	return true;
524	// vmlx feeding into another vmlx. We actually want to unfold
525	// the use later in the MLxExpansion pass. e.g.
526	// vmla
527	// vmla (stall 8 cycles)
528	//
529	// vmul (5 cycles)
530	// vadd (5 cycles)
531	// vmla
532	// This adds up to about 18 - 19 cycles.
533	//
534	// vmla
535	// vmul (stall 4 cycles)
536	// vadd adds up to about 14 cycles.
537	return TII->isFpMLxInstruction(Opcode);
538	}
539
540	return false;
541	}
542
543	bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
544	ARM_AM::ShiftOpc ShOpcVal,
545	unsigned ShAmt) {
546	if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
547	return true;
548	if (Shift.hasOneUse())
549	return true;
550	// R << 2 is free.
551	return ShOpcVal == ARM_AM::lsl &&
552	(ShAmt == `2` \|\| (Subtarget->isSwift() && ShAmt == `1`));
553	}
554
555	bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
556	unsigned MaxShift,
557	unsigned &PowerOfTwo,
558	SDValue &NewMulConst) const {
559	assert(N.getOpcode() == ISD::MUL);
560	assert(MaxShift > `0`);
561
562	// If the multiply is used in more than one place then changing the constant
563	// will make other uses incorrect, so don't.
564	if (!N.hasOneUse()) return false;
565	// Check if the multiply is by a constant
566	ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
567	if (!MulConst) return false;
568	// If the constant is used in more than one place then modifying it will mean
569	// we need to materialize two constants instead of one, which is a bad idea.
570	if (!MulConst->hasOneUse()) return false;
571	unsigned MulConstVal = MulConst->getZExtValue();
572	if (MulConstVal == `0`) return false;
573
574	// Find the largest power of 2 that MulConstVal is a multiple of
575	PowerOfTwo = MaxShift;
576	while ((MulConstVal % (`1` << PowerOfTwo)) != `0`) {
577	--PowerOfTwo;
578	if (PowerOfTwo == `0`) return false;
579	}
580
581	// Only optimise if the new cost is better
582	unsigned NewMulConstVal = MulConstVal / (`1` << PowerOfTwo);
583	NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc (N), MVT::i32);
584	unsigned OldCost = ConstantMaterializationCost(Val: MulConstVal, Subtarget);
585	unsigned NewCost = ConstantMaterializationCost(Val: NewMulConstVal, Subtarget);
586	return NewCost < OldCost;
587	}
588
589	void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
590	CurDAG->RepositionNode(Position: N.getNode()->getIterator(), N: M.getNode());
591	ReplaceUses(F: N, T: M);
592	}
593
594	bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
595	SDValue &BaseReg,
596	SDValue &Opc,
597	bool CheckProfitability) {
598	if (DisableShifterOp)
599	return false;
600
601	// If N is a multiply-by-constant and it's profitable to extract a shift and
602	// use it in a shifted operand do so.
603	if (N.getOpcode() == ISD::MUL) {
604	unsigned PowerOfTwo = `0`;
605	SDValue NewMulConst;
606	if (canExtractShiftFromMul(N, MaxShift: `31`, PowerOfTwo, NewMulConst)) {
607	HandleSDNode Handle(N);
608	SDLoc Loc(N);
609	replaceDAGValue(N: N.getOperand(i: `1`), M: NewMulConst);
610	BaseReg = Handle.getValue();
611	Opc = CurDAG->getTargetConstant(
612	ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: PowerOfTwo), Loc, MVT::i32);
613	return true;
614	}
615	}
616
617	ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOpcode());
618
619	// Don't match base register only case. That is matched to a separate
620	// lower complexity pattern with explicit register operand.
621	if (ShOpcVal == ARM_AM::no_shift) return false;
622
623	BaseReg = N.getOperand(i: `0`);
624	unsigned ShImmVal = `0`;
625	ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
626	if (!RHS) return false;
627	ShImmVal = RHS->getZExtValue() & `31`;
628	Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOp: ShOpcVal, Imm: ShImmVal),
629	SDLoc (N), MVT::i32);
630	return true;
631	}
632
633	bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
634	SDValue &BaseReg,
635	SDValue &ShReg,
636	SDValue &Opc,
637	bool CheckProfitability) {
638	if (DisableShifterOp)
639	return false;
640
641	ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOpcode());
642
643	// Don't match base register only case. That is matched to a separate
644	// lower complexity pattern with explicit register operand.
645	if (ShOpcVal == ARM_AM::no_shift) return false;
646
647	BaseReg = N.getOperand(i: `0`);
648	unsigned ShImmVal = `0`;
649	ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
650	if (RHS) return false;
651
652	ShReg = N.getOperand(i: `1`);
653	if (CheckProfitability && !isShifterOpProfitable(Shift: N, ShOpcVal, ShAmt: ShImmVal))
654	return false;
655	Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOp: ShOpcVal, Imm: ShImmVal),
656	SDLoc (N), MVT::i32);
657	return true;
658	}
659
660	// Determine whether an ISD::OR's operands are suitable to turn the operation
661	// into an addition, which often has more compact encodings.
662	bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
663	assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
664	Out = N;
665	return CurDAG->haveNoCommonBitsSet(A: N, B: Parent->getOperand(Num: `1`));
666	}
667
668
669	bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
670	SDValue &Base,
671	SDValue &OffImm) {
672	// Match simple R + imm12 operands.
673
674	// Base only.
675	if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
676	!CurDAG->isBaseWithConstantOffset(Op: N)) {
677	if (N.getOpcode() == ISD::FrameIndex) {
678	// Match frame index.
679	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
680	Base = CurDAG->getTargetFrameIndex(
681	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
682	OffImm = CurDAG->getTargetConstant(`0`, SDLoc (N), MVT::i32);
683	return true;
684	}
685
686	if (N.getOpcode() == ARMISD::Wrapper &&
687	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalAddress &&
688	N.getOperand(i: `0`).getOpcode() != ISD::TargetExternalSymbol &&
689	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalTLSAddress) {
690	Base = N.getOperand(i: `0`);
691	} else
692	Base = N;
693	OffImm = CurDAG->getTargetConstant(`0`, SDLoc (N), MVT::i32);
694	return true;
695	}
696
697	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
698	int RHSC = (int)RHS->getSExtValue();
699	if (N.getOpcode() == ISD::SUB)
700	RHSC = -RHSC;
701
702	if (RHSC > -`0x1000` && RHSC < `0x1000`) { // 12 bits
703	Base = N.getOperand(i: `0`);
704	if (Base.getOpcode() == ISD::FrameIndex) {
705	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
706	Base = CurDAG->getTargetFrameIndex(
707	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
708	}
709	OffImm = CurDAG->getTargetConstant(RHSC, SDLoc (N), MVT::i32);
710	return true;
711	}
712	}
713
714	// Base only.
715	Base = N;
716	OffImm = CurDAG->getTargetConstant(`0`, SDLoc (N), MVT::i32);
717	return true;
718	}
719
720
721
722	bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
723	SDValue &Opc) {
724	if (N.getOpcode() == ISD::MUL &&
725	((!Subtarget->isLikeA9() && !Subtarget->isSwift()) \|\| N.hasOneUse())) {
726	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
727	// X [3,5,9] -> X + X * [2,4,8] etc.*
728	int RHSC = (int)RHS->getZExtValue();
729	if (RHSC & `1`) {
730	RHSC = RHSC & ~`1`;
731	ARM_AM::AddrOpc AddSub = ARM_AM::add;
732	if (RHSC < `0`) {
733	AddSub = ARM_AM::sub;
734	RHSC = - RHSC;
735	}
736	if (isPowerOf2_32(Value: RHSC)) {
737	unsigned ShAmt = Log2_32(Value: RHSC);
738	Base = Offset = N.getOperand(i: `0`);
739	Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(Opc: AddSub, Imm12: ShAmt,
740	SO: ARM_AM::lsl),
741	SDLoc (N), MVT::i32);
742	return true;
743	}
744	}
745	}
746	}
747
748	if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
749	// ISD::OR that is equivalent to an ISD::ADD.
750	!CurDAG->isBaseWithConstantOffset(Op: N))
751	return false;
752
753	// Leave simple R +/- imm12 operands for LDRi12
754	if (N.getOpcode() == ISD::ADD \|\| N.getOpcode() == ISD::OR) {
755	int RHSC;
756	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), /Scale=/`1`,
757	RangeMin: -`0x1000`+`1`, RangeMax: `0x1000`, ScaledConstant&: RHSC)) // 12 bits.
758	return false;
759	}
760
761	// Otherwise this is R +/- [possibly shifted] R.
762	ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
763	ARM_AM::ShiftOpc ShOpcVal =
764	ARM_AM::getShiftOpcForNode(Opcode: N.getOperand(i: `1`).getOpcode());
765	unsigned ShAmt = `0`;
766
767	Base = N.getOperand(i: `0`);
768	Offset = N.getOperand(i: `1`);
769
770	if (ShOpcVal != ARM_AM::no_shift) {
771	// Check to see if the RHS of the shift is a constant, if not, we can't fold
772	// it.
773	if (ConstantSDNode *Sh =
774	dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`).getOperand(i: `1`))) {
775	ShAmt = Sh->getZExtValue();
776	if (isShifterOpProfitable(Shift: Offset, ShOpcVal, ShAmt))
777	Offset = N.getOperand(i: `1`).getOperand(i: `0`);
778	else {
779	ShAmt = `0`;
780	ShOpcVal = ARM_AM::no_shift;
781	}
782	} else {
783	ShOpcVal = ARM_AM::no_shift;
784	}
785	}
786
787	// Try matching (R shl C) + (R).
788	if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
789	!(Subtarget->isLikeA9() \|\| Subtarget->isSwift() \|\|
790	N.getOperand(i: `0`).hasOneUse())) {
791	ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOperand(i: `0`).getOpcode());
792	if (ShOpcVal != ARM_AM::no_shift) {
793	// Check to see if the RHS of the shift is a constant, if not, we can't
794	// fold it.
795	if (ConstantSDNode *Sh =
796	dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `0`).getOperand(i: `1`))) {
797	ShAmt = Sh->getZExtValue();
798	if (isShifterOpProfitable(Shift: N.getOperand(i: `0`), ShOpcVal, ShAmt)) {
799	Offset = N.getOperand(i: `0`).getOperand(i: `0`);
800	Base = N.getOperand(i: `1`);
801	} else {
802	ShAmt = `0`;
803	ShOpcVal = ARM_AM::no_shift;
804	}
805	} else {
806	ShOpcVal = ARM_AM::no_shift;
807	}
808	}
809	}
810
811	// If Offset is a multiply-by-constant and it's profitable to extract a shift
812	// and use it in a shifted operand do so.
813	if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
814	unsigned PowerOfTwo = `0`;
815	SDValue NewMulConst;
816	if (canExtractShiftFromMul(N: Offset, MaxShift: `31`, PowerOfTwo, NewMulConst)) {
817	HandleSDNode Handle(Offset);
818	replaceDAGValue(N: Offset.getOperand(i: `1`), M: NewMulConst);
819	Offset = Handle.getValue();
820	ShAmt = PowerOfTwo;
821	ShOpcVal = ARM_AM::lsl;
822	}
823	}
824
825	Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(Opc: AddSub, Imm12: ShAmt, SO: ShOpcVal),
826	SDLoc (N), MVT::i32);
827	return true;
828	}
829
830	bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
831	SDValue &Offset, SDValue &Opc) {
832	unsigned Opcode = Op->getOpcode();
833	ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
834	? cast<LoadSDNode>(Val: Op)->getAddressingMode()
835	: cast<StoreSDNode>(Val: Op)->getAddressingMode();
836	ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC \|\| AM == ISD::POST_INC)
837	? ARM_AM::add : ARM_AM::sub;
838	int Val;
839	if (isScaledConstantInRange(Node: N, /Scale=/`1`, RangeMin: `0`, RangeMax: `0x1000`, ScaledConstant&: Val))
840	return false;
841
842	Offset = N;
843	ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOpcode());
844	unsigned ShAmt = `0`;
845	if (ShOpcVal != ARM_AM::no_shift) {
846	// Check to see if the RHS of the shift is a constant, if not, we can't fold
847	// it.
848	if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
849	ShAmt = Sh->getZExtValue();
850	if (isShifterOpProfitable(Shift: N, ShOpcVal, ShAmt))
851	Offset = N.getOperand(i: `0`);
852	else {
853	ShAmt = `0`;
854	ShOpcVal = ARM_AM::no_shift;
855	}
856	} else {
857	ShOpcVal = ARM_AM::no_shift;
858	}
859	}
860
861	Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(Opc: AddSub, Imm12: ShAmt, SO: ShOpcVal),
862	SDLoc (N), MVT::i32);
863	return true;
864	}
865
866	bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
867	SDValue &Offset, SDValue &Opc) {
868	unsigned Opcode = Op->getOpcode();
869	ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
870	? cast<LoadSDNode>(Val: Op)->getAddressingMode()
871	: cast<StoreSDNode>(Val: Op)->getAddressingMode();
872	ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC \|\| AM == ISD::POST_INC)
873	? ARM_AM::add : ARM_AM::sub;
874	int Val;
875	if (isScaledConstantInRange(Node: N, /Scale=/`1`, RangeMin: `0`, RangeMax: `0x1000`, ScaledConstant&: Val)) { // 12 bits.
876	if (AddSub == ARM_AM::sub) Val *= -`1`;
877	Offset = CurDAG->getRegister(Reg: `0`, MVT::VT: i32);
878	Opc = CurDAG->getTargetConstant(Val, SDLoc (Op), MVT::i32);
879	return true;
880	}
881
882	return false;
883	}
884
885
886	bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
887	SDValue &Offset, SDValue &Opc) {
888	unsigned Opcode = Op->getOpcode();
889	ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
890	? cast<LoadSDNode>(Val: Op)->getAddressingMode()
891	: cast<StoreSDNode>(Val: Op)->getAddressingMode();
892	ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC \|\| AM == ISD::POST_INC)
893	? ARM_AM::add : ARM_AM::sub;
894	int Val;
895	if (isScaledConstantInRange(Node: N, /Scale=/`1`, RangeMin: `0`, RangeMax: `0x1000`, ScaledConstant&: Val)) { // 12 bits.
896	Offset = CurDAG->getRegister(Reg: `0`, MVT::VT: i32);
897	Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(Opc: AddSub, Imm12: Val,
898	SO: ARM_AM::no_shift),
899	SDLoc (Op), MVT::i32);
900	return true;
901	}
902
903	return false;
904	}
905
906	bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
907	Base = N;
908	return true;
909	}
910
911	bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
912	SDValue &Base, SDValue &Offset,
913	SDValue &Opc) {
914	if (N.getOpcode() == ISD::SUB) {
915	// X - C is canonicalize to X + -C, no need to handle it here.
916	Base = N.getOperand(i: `0`);
917	Offset = N.getOperand(i: `1`);
918	Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(Opc: ARM_AM::sub, Offset: `0`), SDLoc (N),
919	MVT::i32);
920	return true;
921	}
922
923	if (!CurDAG->isBaseWithConstantOffset(Op: N)) {
924	Base = N;
925	if (N.getOpcode() == ISD::FrameIndex) {
926	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
927	Base = CurDAG->getTargetFrameIndex(
928	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
929	}
930	Offset = CurDAG->getRegister(Reg: `0`, MVT::VT: i32);
931	Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(Opc: ARM_AM::add, Offset: `0`), SDLoc (N),
932	MVT::i32);
933	return true;
934	}
935
936	// If the RHS is +/- imm8, fold into addr mode.
937	int RHSC;
938	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), /Scale=/`1`,
939	RangeMin: -`256` + `1`, RangeMax: `256`, ScaledConstant&: RHSC)) { // 8 bits.
940	Base = N.getOperand(i: `0`);
941	if (Base.getOpcode() == ISD::FrameIndex) {
942	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
943	Base = CurDAG->getTargetFrameIndex(
944	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
945	}
946	Offset = CurDAG->getRegister(Reg: `0`, MVT::VT: i32);
947
948	ARM_AM::AddrOpc AddSub = ARM_AM::add;
949	if (RHSC < `0`) {
950	AddSub = ARM_AM::sub;
951	RHSC = -RHSC;
952	}
953	Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(Opc: AddSub, Offset: RHSC), SDLoc (N),
954	MVT::i32);
955	return true;
956	}
957
958	Base = N.getOperand(i: `0`);
959	Offset = N.getOperand(i: `1`);
960	Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(Opc: ARM_AM::add, Offset: `0`), SDLoc (N),
961	MVT::i32);
962	return true;
963	}
964
965	bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
966	SDValue &Offset, SDValue &Opc) {
967	unsigned Opcode = Op->getOpcode();
968	ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
969	? cast<LoadSDNode>(Val: Op)->getAddressingMode()
970	: cast<StoreSDNode>(Val: Op)->getAddressingMode();
971	ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC \|\| AM == ISD::POST_INC)
972	? ARM_AM::add : ARM_AM::sub;
973	int Val;
974	if (isScaledConstantInRange(Node: N, /Scale=/`1`, RangeMin: `0`, RangeMax: `256`, ScaledConstant&: Val)) { // 12 bits.
975	Offset = CurDAG->getRegister(Reg: `0`, MVT::VT: i32);
976	Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(Opc: AddSub, Offset: Val), SDLoc (Op),
977	MVT::i32);
978	return true;
979	}
980
981	Offset = N;
982	Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(Opc: AddSub, Offset: `0`), SDLoc (Op),
983	MVT::i32);
984	return true;
985	}
986
987	bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
988	bool FP16) {
989	if (!CurDAG->isBaseWithConstantOffset(Op: N)) {
990	Base = N;
991	if (N.getOpcode() == ISD::FrameIndex) {
992	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
993	Base = CurDAG->getTargetFrameIndex(
994	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
995	} else if (N.getOpcode() == ARMISD::Wrapper &&
996	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalAddress &&
997	N.getOperand(i: `0`).getOpcode() != ISD::TargetExternalSymbol &&
998	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalTLSAddress) {
999	Base = N.getOperand(i: `0`);
1000	}
1001	Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(Opc: ARM_AM::add, Offset: `0`),
1002	SDLoc (N), MVT::i32);
1003	return true;
1004	}
1005
1006	// If the RHS is +/- imm8, fold into addr mode.
1007	int RHSC;
1008	const int Scale = FP16 ? `2` : `4`;
1009
1010	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), Scale, RangeMin: -`255`, RangeMax: `256`, ScaledConstant&: RHSC)) {
1011	Base = N.getOperand(i: `0`);
1012	if (Base.getOpcode() == ISD::FrameIndex) {
1013	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1014	Base = CurDAG->getTargetFrameIndex(
1015	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1016	}
1017
1018	ARM_AM::AddrOpc AddSub = ARM_AM::add;
1019	if (RHSC < `0`) {
1020	AddSub = ARM_AM::sub;
1021	RHSC = -RHSC;
1022	}
1023
1024	if (FP16)
1025	Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(Opc: AddSub, Offset: RHSC),
1026	SDLoc (N), MVT::i32);
1027	else
1028	Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1029	SDLoc(N), MVT::i32);
1030
1031	return true;
1032	}
1033
1034	Base = N;
1035
1036	if (FP16)
1037	Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, `0`),
1038	SDLoc(N), MVT::i32);
1039	else
1040	Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, `0`),
1041	SDLoc(N), MVT::i32);
1042
1043	return true;
1044	}
1045
1046	bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1047	SDValue &Base, SDValue &Offset) {
1048	return IsAddressingMode5(N, Base, Offset, /FP16=/ false);
1049	}
1050
1051	bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
1052	SDValue &Base, SDValue &Offset) {
1053	return IsAddressingMode5(N, Base, Offset, /FP16=/ true);
1054	}
1055
1056	bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1057	SDValue &Align) {
1058	Addr = N;
1059
1060	unsigned Alignment = `0`;
1061
1062	MemSDNode *MemN = cast<MemSDNode>(Val: Parent);
1063
1064	if (isa<LSBaseSDNode>(Val: MemN) \|\|
1065	((MemN->getOpcode() == ARMISD::VST1_UPD \|\|
1066	MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1067	MemN->getConstantOperandVal(Num: MemN->getNumOperands() - `1`) == `1`)) {
1068	// This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1069	// The maximum alignment is equal to the memory size being referenced.
1070	llvm::Align MMOAlign = MemN->getAlign();
1071	unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / `8`;
1072	if (MMOAlign.value() >= MemSize && MemSize > `1`)
1073	Alignment = MemSize;
1074	} else {
1075	// All other uses of addrmode6 are for intrinsics. For now just record
1076	// the raw alignment value; it will be refined later based on the legal
1077	// alignment operands for the intrinsic.
1078	Alignment = MemN->getAlign().value();
1079	}
1080
1081	Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1082	return true;
1083	}
1084
1085	bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1086	SDValue &Offset) {
1087	LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Val: Op);
1088	ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1089	if (AM != ISD::POST_INC)
1090	return false;
1091	Offset = N;
1092	if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(Val&: N)) {
1093	if (NC->getZExtValue() * `8` == LdSt->getMemoryVT().getSizeInBits())
1094	Offset = CurDAG->getRegister(`0`, MVT::i32);
1095	}
1096	return true;
1097	}
1098
1099	bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1100	SDValue &Offset, SDValue &Label) {
1101	if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1102	Offset = N.getOperand(i: `0`);
1103	SDValue N1 = N.getOperand(i: `1`);
1104	Label = CurDAG->getTargetConstant(N1->getAsZExtVal(), SDLoc(N), MVT::i32);
1105	return true;
1106	}
1107
1108	return false;
1109	}
1110
1111
1112	//===----------------------------------------------------------------------===//
1113	// Thumb Addressing Modes
1114	//===----------------------------------------------------------------------===//
1115
1116	static bool shouldUseZeroOffsetLdSt(SDValue N) {
1117	// Negative numbers are difficult to materialise in thumb1. If we are
1118	// selecting the add of a negative, instead try to select ri with a zero
1119	// offset, so create the add node directly which will become a sub.
1120	if (N.getOpcode() != ISD::ADD)
1121	return false;
1122
1123	// Look for an imm which is not legal for ld/st, but is legal for sub.
1124	if (auto C = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`)))
1125	return C->getSExtValue() < `0` && C->getSExtValue() >= -`255`;
1126
1127	return false;
1128	}
1129
1130	bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1131	SDValue &Offset) {
1132	if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(Op: N)) {
1133	if (!isNullConstant(V: N))
1134	return false;
1135
1136	Base = Offset = N;
1137	return true;
1138	}
1139
1140	Base = N.getOperand(i: `0`);
1141	Offset = N.getOperand(i: `1`);
1142	return true;
1143	}
1144
1145	bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1146	SDValue &Offset) {
1147	if (shouldUseZeroOffsetLdSt(N))
1148	return false; // Select ri instead
1149	return SelectThumbAddrModeRRSext(N, Base, Offset);
1150	}
1151
1152	bool
1153	ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1154	SDValue &Base, SDValue &OffImm) {
1155	if (shouldUseZeroOffsetLdSt(N)) {
1156	Base = N;
1157	OffImm = CurDAG->getTargetConstant(`0`, SDLoc(N), MVT::i32);
1158	return true;
1159	}
1160
1161	if (!CurDAG->isBaseWithConstantOffset(Op: N)) {
1162	if (N.getOpcode() == ISD::ADD) {
1163	return false; // We want to select register offset instead
1164	} else if (N.getOpcode() == ARMISD::Wrapper &&
1165	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalAddress &&
1166	N.getOperand(i: `0`).getOpcode() != ISD::TargetExternalSymbol &&
1167	N.getOperand(i: `0`).getOpcode() != ISD::TargetConstantPool &&
1168	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalTLSAddress) {
1169	Base = N.getOperand(i: `0`);
1170	} else {
1171	Base = N;
1172	}
1173
1174	OffImm = CurDAG->getTargetConstant(`0`, SDLoc(N), MVT::i32);
1175	return true;
1176	}
1177
1178	// If the RHS is + imm5 scale, fold into addr mode.*
1179	int RHSC;
1180	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), Scale, RangeMin: `0`, RangeMax: `32`, ScaledConstant&: RHSC)) {
1181	Base = N.getOperand(i: `0`);
1182	OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1183	return true;
1184	}
1185
1186	// Offset is too large, so use register offset instead.
1187	return false;
1188	}
1189
1190	bool
1191	ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1192	SDValue &OffImm) {
1193	return SelectThumbAddrModeImm5S(N, Scale: `4`, Base, OffImm);
1194	}
1195
1196	bool
1197	ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1198	SDValue &OffImm) {
1199	return SelectThumbAddrModeImm5S(N, Scale: `2`, Base, OffImm);
1200	}
1201
1202	bool
1203	ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1204	SDValue &OffImm) {
1205	return SelectThumbAddrModeImm5S(N, Scale: `1`, Base, OffImm);
1206	}
1207
1208	bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1209	SDValue &Base, SDValue &OffImm) {
1210	if (N.getOpcode() == ISD::FrameIndex) {
1211	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1212	// Only multiples of 4 are allowed for the offset, so the frame object
1213	// alignment must be at least 4.
1214	MachineFrameInfo &MFI = MF->getFrameInfo();
1215	if (MFI.getObjectAlign(ObjectIdx: FI) < Align (`4`))
1216	MFI.setObjectAlignment(ObjectIdx: FI, Alignment: Align (`4`));
1217	Base = CurDAG->getTargetFrameIndex(
1218	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1219	OffImm = CurDAG->getTargetConstant(`0`, SDLoc(N), MVT::i32);
1220	return true;
1221	}
1222
1223	if (!CurDAG->isBaseWithConstantOffset(Op: N))
1224	return false;
1225
1226	if (N.getOperand(i: `0`).getOpcode() == ISD::FrameIndex) {
1227	// If the RHS is + imm8 scale, fold into addr mode.*
1228	int RHSC;
1229	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), /Scale=/`4`, RangeMin: `0`, RangeMax: `256`, ScaledConstant&: RHSC)) {
1230	Base = N.getOperand(i: `0`);
1231	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1232	// Make sure the offset is inside the object, or we might fail to
1233	// allocate an emergency spill slot. (An out-of-range access is UB, but
1234	// it could show up anyway.)
1235	MachineFrameInfo &MFI = MF->getFrameInfo();
1236	if (RHSC * `4` < MFI.getObjectSize(ObjectIdx: FI)) {
1237	// For LHS+RHS to result in an offset that's a multiple of 4 the object
1238	// indexed by the LHS must be 4-byte aligned.
1239	if (!MFI.isFixedObjectIndex(ObjectIdx: FI) && MFI.getObjectAlign(ObjectIdx: FI) < Align (`4`))
1240	MFI.setObjectAlignment(ObjectIdx: FI, Alignment: Align (`4`));
1241	if (MFI.getObjectAlign(ObjectIdx: FI) >= Align (`4`)) {
1242	Base = CurDAG->getTargetFrameIndex(
1243	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1244	OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1245	return true;
1246	}
1247	}
1248	}
1249	}
1250
1251	return false;
1252	}
1253
1254	template <unsigned Shift>
1255	bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1256	SDValue &OffImm) {
1257	if (N.getOpcode() == ISD::SUB \|\| CurDAG->isBaseWithConstantOffset(Op: N)) {
1258	int RHSC;
1259	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), Scale: `1` << Shift, RangeMin: -`0x7f`, RangeMax: `0x80`,
1260	ScaledConstant&: RHSC)) {
1261	Base = N.getOperand(i: `0`);
1262	if (N.getOpcode() == ISD::SUB)
1263	RHSC = -RHSC;
1264	OffImm =
1265	CurDAG->getTargetConstant(RHSC * (`1` << Shift), SDLoc(N), MVT::i32);
1266	return true;
1267	}
1268	}
1269
1270	// Base only.
1271	Base = N;
1272	OffImm = CurDAG->getTargetConstant(`0`, SDLoc(N), MVT::i32);
1273	return true;
1274	}
1275
1276
1277	//===----------------------------------------------------------------------===//
1278	// Thumb 2 Addressing Modes
1279	//===----------------------------------------------------------------------===//
1280
1281
1282	bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1283	SDValue &Base, SDValue &OffImm) {
1284	// Match simple R + imm12 operands.
1285
1286	// Base only.
1287	if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1288	!CurDAG->isBaseWithConstantOffset(Op: N)) {
1289	if (N.getOpcode() == ISD::FrameIndex) {
1290	// Match frame index.
1291	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1292	Base = CurDAG->getTargetFrameIndex(
1293	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1294	OffImm = CurDAG->getTargetConstant(`0`, SDLoc(N), MVT::i32);
1295	return true;
1296	}
1297
1298	if (N.getOpcode() == ARMISD::Wrapper &&
1299	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalAddress &&
1300	N.getOperand(i: `0`).getOpcode() != ISD::TargetExternalSymbol &&
1301	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalTLSAddress) {
1302	Base = N.getOperand(i: `0`);
1303	if (Base.getOpcode() == ISD::TargetConstantPool)
1304	return false; // We want to select t2LDRpci instead.
1305	} else
1306	Base = N;
1307	OffImm = CurDAG->getTargetConstant(`0`, SDLoc(N), MVT::i32);
1308	return true;
1309	}
1310
1311	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
1312	if (SelectT2AddrModeImm8(N, Base, OffImm))
1313	// Let t2LDRi8 handle (R - imm8).
1314	return false;
1315
1316	int RHSC = (int)RHS->getZExtValue();
1317	if (N.getOpcode() == ISD::SUB)
1318	RHSC = -RHSC;
1319
1320	if (RHSC >= `0` && RHSC < `0x1000`) { // 12 bits (unsigned)
1321	Base = N.getOperand(i: `0`);
1322	if (Base.getOpcode() == ISD::FrameIndex) {
1323	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1324	Base = CurDAG->getTargetFrameIndex(
1325	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1326	}
1327	OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1328	return true;
1329	}
1330	}
1331
1332	// Base only.
1333	Base = N;
1334	OffImm = CurDAG->getTargetConstant(`0`, SDLoc(N), MVT::i32);
1335	return true;
1336	}
1337
1338	template <unsigned Shift>
1339	bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
1340	SDValue &OffImm) {
1341	if (N.getOpcode() == ISD::SUB \|\| CurDAG->isBaseWithConstantOffset(Op: N)) {
1342	int RHSC;
1343	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), Scale: `1` << Shift, RangeMin: -`255`, RangeMax: `256`, ScaledConstant&: RHSC)) {
1344	Base = N.getOperand(i: `0`);
1345	if (Base.getOpcode() == ISD::FrameIndex) {
1346	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1347	Base = CurDAG->getTargetFrameIndex(
1348	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1349	}
1350
1351	if (N.getOpcode() == ISD::SUB)
1352	RHSC = -RHSC;
1353	OffImm =
1354	CurDAG->getTargetConstant(RHSC * (`1` << Shift), SDLoc(N), MVT::i32);
1355	return true;
1356	}
1357	}
1358
1359	// Base only.
1360	Base = N;
1361	OffImm = CurDAG->getTargetConstant(`0`, SDLoc(N), MVT::i32);
1362	return true;
1363	}
1364
1365	bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1366	SDValue &Base, SDValue &OffImm) {
1367	// Match simple R - imm8 operands.
1368	if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1369	!CurDAG->isBaseWithConstantOffset(Op: N))
1370	return false;
1371
1372	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
1373	int RHSC = (int)RHS->getSExtValue();
1374	if (N.getOpcode() == ISD::SUB)
1375	RHSC = -RHSC;
1376
1377	if ((RHSC >= -`255`) && (RHSC < `0`)) { // 8 bits (always negative)
1378	Base = N.getOperand(i: `0`);
1379	if (Base.getOpcode() == ISD::FrameIndex) {
1380	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1381	Base = CurDAG->getTargetFrameIndex(
1382	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1383	}
1384	OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1385	return true;
1386	}
1387	}
1388
1389	return false;
1390	}
1391
1392	bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1393	SDValue &OffImm){
1394	unsigned Opcode = Op->getOpcode();
1395	ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1396	? cast<LoadSDNode>(Val: Op)->getAddressingMode()
1397	: cast<StoreSDNode>(Val: Op)->getAddressingMode();
1398	int RHSC;
1399	if (isScaledConstantInRange(Node: N, /Scale=/`1`, RangeMin: `0`, RangeMax: `0x100`, ScaledConstant&: RHSC)) { // 8 bits.
1400	OffImm = ((AM == ISD::PRE_INC) \|\| (AM == ISD::POST_INC))
1401	? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1402	: CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1403	return true;
1404	}
1405
1406	return false;
1407	}
1408
1409	template <unsigned Shift>
1410	bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1411	SDValue &OffImm) {
1412	if (N.getOpcode() == ISD::SUB \|\| CurDAG->isBaseWithConstantOffset(Op: N)) {
1413	int RHSC;
1414	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), Scale: `1` << Shift, RangeMin: -`0x7f`, RangeMax: `0x80`,
1415	ScaledConstant&: RHSC)) {
1416	Base = N.getOperand(i: `0`);
1417	if (Base.getOpcode() == ISD::FrameIndex) {
1418	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1419	Base = CurDAG->getTargetFrameIndex(
1420	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1421	}
1422
1423	if (N.getOpcode() == ISD::SUB)
1424	RHSC = -RHSC;
1425	OffImm =
1426	CurDAG->getTargetConstant(RHSC * (`1` << Shift), SDLoc(N), MVT::i32);
1427	return true;
1428	}
1429	}
1430
1431	// Base only.
1432	Base = N;
1433	OffImm = CurDAG->getTargetConstant(`0`, SDLoc(N), MVT::i32);
1434	return true;
1435	}
1436
1437	template <unsigned Shift>
1438	bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1439	SDValue &OffImm) {
1440	return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1441	}
1442
1443	bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1444	SDValue &OffImm,
1445	unsigned Shift) {
1446	unsigned Opcode = Op->getOpcode();
1447	ISD::MemIndexedMode AM;
1448	switch (Opcode) {
1449	case ISD::LOAD:
1450	AM = cast<LoadSDNode>(Val: Op)->getAddressingMode();
1451	break;
1452	case ISD::STORE:
1453	AM = cast<StoreSDNode>(Val: Op)->getAddressingMode();
1454	break;
1455	case ISD::MLOAD:
1456	AM = cast<MaskedLoadSDNode>(Val: Op)->getAddressingMode();
1457	break;
1458	case ISD::MSTORE:
1459	AM = cast<MaskedStoreSDNode>(Val: Op)->getAddressingMode();
1460	break;
1461	default:
1462	llvm_unreachable("Unexpected Opcode for Imm7Offset");
1463	}
1464
1465	int RHSC;
1466	// 7 bit constant, shifted by Shift.
1467	if (isScaledConstantInRange(Node: N, Scale: `1` << Shift, RangeMin: `0`, RangeMax: `0x80`, ScaledConstant&: RHSC)) {
1468	OffImm =
1469	((AM == ISD::PRE_INC) \|\| (AM == ISD::POST_INC))
1470	? CurDAG->getTargetConstant(RHSC * (`1` << Shift), SDLoc(N), MVT::i32)
1471	: CurDAG->getTargetConstant(-RHSC * (`1` << Shift), SDLoc(N),
1472	MVT::i32);
1473	return true;
1474	}
1475	return false;
1476	}
1477
1478	template <int Min, int Max>
1479	bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
1480	int Val;
1481	if (isScaledConstantInRange(Node: N, Scale: `1`, RangeMin: Min, RangeMax: Max, ScaledConstant&: Val)) {
1482	OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32);
1483	return true;
1484	}
1485	return false;
1486	}
1487
1488	bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1489	SDValue &Base,
1490	SDValue &OffReg, SDValue &ShImm) {
1491	// (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1492	if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(Op: N))
1493	return false;
1494
1495	// Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1496	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
1497	int RHSC = (int)RHS->getZExtValue();
1498	if (RHSC >= `0` && RHSC < `0x1000`) // 12 bits (unsigned)
1499	return false;
1500	else if (RHSC < `0` && RHSC >= -`255`) // 8 bits
1501	return false;
1502	}
1503
1504	// Look for (R + R) or (R + (R << [1,2,3])).
1505	unsigned ShAmt = `0`;
1506	Base = N.getOperand(i: `0`);
1507	OffReg = N.getOperand(i: `1`);
1508
1509	// Swap if it is ((R << c) + R).
1510	ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: OffReg.getOpcode());
1511	if (ShOpcVal != ARM_AM::lsl) {
1512	ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: Base.getOpcode());
1513	if (ShOpcVal == ARM_AM::lsl)
1514	std::swap(a&: Base, b&: OffReg);
1515	}
1516
1517	if (ShOpcVal == ARM_AM::lsl) {
1518	// Check to see if the RHS of the shift is a constant, if not, we can't fold
1519	// it.
1520	if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(Val: OffReg.getOperand(i: `1`))) {
1521	ShAmt = Sh->getZExtValue();
1522	if (ShAmt < `4` && isShifterOpProfitable(Shift: OffReg, ShOpcVal, ShAmt))
1523	OffReg = OffReg.getOperand(i: `0`);
1524	else {
1525	ShAmt = `0`;
1526	}
1527	}
1528	}
1529
1530	// If OffReg is a multiply-by-constant and it's profitable to extract a shift
1531	// and use it in a shifted operand do so.
1532	if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1533	unsigned PowerOfTwo = `0`;
1534	SDValue NewMulConst;
1535	if (canExtractShiftFromMul(N: OffReg, MaxShift: `3`, PowerOfTwo, NewMulConst)) {
1536	HandleSDNode Handle(OffReg);
1537	replaceDAGValue(N: OffReg.getOperand(i: `1`), M: NewMulConst);
1538	OffReg = Handle.getValue();
1539	ShAmt = PowerOfTwo;
1540	}
1541	}
1542
1543	ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1544
1545	return true;
1546	}
1547
1548	bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1549	SDValue &OffImm) {
1550	// This must* succeed since it's used for the irreplaceable ldrex and strex*
1551	// instructions.
1552	Base = N;
1553	OffImm = CurDAG->getTargetConstant(`0`, SDLoc(N), MVT::i32);
1554
1555	if (N.getOpcode() != ISD::ADD \|\| !CurDAG->isBaseWithConstantOffset(Op: N))
1556	return true;
1557
1558	ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
1559	if (!RHS)
1560	return true;
1561
1562	uint32_t RHSC = (int)RHS->getZExtValue();
1563	if (RHSC > `1020` \|\| RHSC % `4` != `0`)
1564	return true;
1565
1566	Base = N.getOperand(i: `0`);
1567	if (Base.getOpcode() == ISD::FrameIndex) {
1568	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1569	Base = CurDAG->getTargetFrameIndex(
1570	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1571	}
1572
1573	OffImm = CurDAG->getTargetConstant(RHSC/`4`, SDLoc(N), MVT::i32);
1574	return true;
1575	}
1576
1577	//===--------------------------------------------------------------------===//
1578
1579	/// getAL - Returns a ARMCC::AL immediate node.
1580	static inline SDValue getAL(SelectionDAG CurDAG, const* SDLoc &dl) {
1581	return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1582	}
1583
1584	void ARMDAGToDAGISel::transferMemOperands(SDNode N, SDNode Result) {
1585	MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
1586	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Result), NewMemRefs: {MemOp});
1587	}
1588
1589	bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1590	LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1591	ISD::MemIndexedMode AM = LD->getAddressingMode();
1592	if (AM == ISD::UNINDEXED)
1593	return false;
1594
1595	EVT LoadedVT = LD->getMemoryVT();
1596	SDValue Offset, AMOpc;
1597	bool isPre = (AM == ISD::PRE_INC) \|\| (AM == ISD::PRE_DEC);
1598	unsigned Opcode = `0`;
1599	bool Match = false;
1600	if (LoadedVT == MVT::i32 && isPre &&
1601	SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1602	Opcode = ARM::LDR_PRE_IMM;
1603	Match = true;
1604	} else if (LoadedVT == MVT::i32 && !isPre &&
1605	SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1606	Opcode = ARM::LDR_POST_IMM;
1607	Match = true;
1608	} else if (LoadedVT == MVT::i32 &&
1609	SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1610	Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1611	Match = true;
1612
1613	} else if (LoadedVT == MVT::i16 &&
1614	SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1615	Match = true;
1616	Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1617	? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1618	: (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1619	} else if (LoadedVT == MVT::i8 \|\| LoadedVT == MVT::i1) {
1620	if (LD->getExtensionType() == ISD::SEXTLOAD) {
1621	if (SelectAddrMode3Offset(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1622	Match = true;
1623	Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1624	}
1625	} else {
1626	if (isPre &&
1627	SelectAddrMode2OffsetImmPre(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1628	Match = true;
1629	Opcode = ARM::LDRB_PRE_IMM;
1630	} else if (!isPre &&
1631	SelectAddrMode2OffsetImm(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1632	Match = true;
1633	Opcode = ARM::LDRB_POST_IMM;
1634	} else if (SelectAddrMode2OffsetReg(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1635	Match = true;
1636	Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1637	}
1638	}
1639	}
1640
1641	if (Match) {
1642	if (Opcode == ARM::LDR_PRE_IMM \|\| Opcode == ARM::LDRB_PRE_IMM) {
1643	SDValue Chain = LD->getChain();
1644	SDValue Base = LD->getBasePtr();
1645	SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1646	CurDAG->getRegister(`0`, MVT::i32), Chain };
1647	SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1648	MVT::Other, Ops);
1649	transferMemOperands(N, Result: New);
1650	ReplaceNode(F: N, T: New);
1651	return true;
1652	} else {
1653	SDValue Chain = LD->getChain();
1654	SDValue Base = LD->getBasePtr();
1655	SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1656	CurDAG->getRegister(`0`, MVT::i32), Chain };
1657	SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1658	MVT::Other, Ops);
1659	transferMemOperands(N, Result: New);
1660	ReplaceNode(F: N, T: New);
1661	return true;
1662	}
1663	}
1664
1665	return false;
1666	}
1667
1668	bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1669	LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1670	EVT LoadedVT = LD->getMemoryVT();
1671	ISD::MemIndexedMode AM = LD->getAddressingMode();
1672	if (AM != ISD::POST_INC \|\| LD->getExtensionType() != ISD::NON_EXTLOAD \|\|
1673	LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1674	return false;
1675
1676	auto *COffs = dyn_cast<ConstantSDNode>(Val: LD->getOffset());
1677	if (!COffs \|\| COffs->getZExtValue() != `4`)
1678	return false;
1679
1680	// A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1681	// The encoding of LDM is not how the rest of ISel expects a post-inc load to
1682	// look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1683	// ISel.
1684	SDValue Chain = LD->getChain();
1685	SDValue Base = LD->getBasePtr();
1686	SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1687	CurDAG->getRegister(`0`, MVT::i32), Chain };
1688	SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1689	MVT::i32, MVT::Other, Ops);
1690	transferMemOperands(N, Result: New);
1691	ReplaceNode(F: N, T: New);
1692	return true;
1693	}
1694
1695	bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1696	LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1697	ISD::MemIndexedMode AM = LD->getAddressingMode();
1698	if (AM == ISD::UNINDEXED)
1699	return false;
1700
1701	EVT LoadedVT = LD->getMemoryVT();
1702	bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1703	SDValue Offset;
1704	bool isPre = (AM == ISD::PRE_INC) \|\| (AM == ISD::PRE_DEC);
1705	unsigned Opcode = `0`;
1706	bool Match = false;
1707	if (SelectT2AddrModeImm8Offset(Op: N, N: LD->getOffset(), OffImm&: Offset)) {
1708	switch (LoadedVT.getSimpleVT().SimpleTy) {
1709	case MVT::i32:
1710	Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1711	break;
1712	case MVT::i16:
1713	if (isSExtLd)
1714	Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1715	else
1716	Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1717	break;
1718	case MVT::i8:
1719	case MVT::i1:
1720	if (isSExtLd)
1721	Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1722	else
1723	Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1724	break;
1725	default:
1726	return false;
1727	}
1728	Match = true;
1729	}
1730
1731	if (Match) {
1732	SDValue Chain = LD->getChain();
1733	SDValue Base = LD->getBasePtr();
1734	SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1735	CurDAG->getRegister(`0`, MVT::i32), Chain };
1736	SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1737	MVT::Other, Ops);
1738	transferMemOperands(N, Result: New);
1739	ReplaceNode(F: N, T: New);
1740	return true;
1741	}
1742
1743	return false;
1744	}
1745
1746	bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1747	EVT LoadedVT;
1748	unsigned Opcode = `0`;
1749	bool isSExtLd, isPre;
1750	Align Alignment;
1751	ARMVCC::VPTCodes Pred;
1752	SDValue PredReg;
1753	SDValue Chain, Base, Offset;
1754
1755	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
1756	ISD::MemIndexedMode AM = LD->getAddressingMode();
1757	if (AM == ISD::UNINDEXED)
1758	return false;
1759	LoadedVT = LD->getMemoryVT();
1760	if (!LoadedVT.isVector())
1761	return false;
1762
1763	Chain = LD->getChain();
1764	Base = LD->getBasePtr();
1765	Offset = LD->getOffset();
1766	Alignment = LD->getAlign();
1767	isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1768	isPre = (AM == ISD::PRE_INC) \|\| (AM == ISD::PRE_DEC);
1769	Pred = ARMVCC::None;
1770	PredReg = CurDAG->getRegister(`0`, MVT::i32);
1771	} else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Val: N)) {
1772	ISD::MemIndexedMode AM = LD->getAddressingMode();
1773	if (AM == ISD::UNINDEXED)
1774	return false;
1775	LoadedVT = LD->getMemoryVT();
1776	if (!LoadedVT.isVector())
1777	return false;
1778
1779	Chain = LD->getChain();
1780	Base = LD->getBasePtr();
1781	Offset = LD->getOffset();
1782	Alignment = LD->getAlign();
1783	isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1784	isPre = (AM == ISD::PRE_INC) \|\| (AM == ISD::PRE_DEC);
1785	Pred = ARMVCC::Then;
1786	PredReg = LD->getMask();
1787	} else
1788	llvm_unreachable("Expected a Load or a Masked Load!");
1789
1790	// We allow LE non-masked loads to change the type (for example use a vldrb.8
1791	// as opposed to a vldrw.32). This can allow extra addressing modes or
1792	// alignments for what is otherwise an equivalent instruction.
1793	bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(Val: N);
1794
1795	SDValue NewOffset;
1796	if (Alignment >= Align(`2`) && LoadedVT == MVT::v4i16 &&
1797	SelectT2AddrModeImm7Offset(N, Offset, NewOffset, `1`)) {
1798	if (isSExtLd)
1799	Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1800	else
1801	Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1802	} else if (LoadedVT == MVT::v8i8 &&
1803	SelectT2AddrModeImm7Offset(N, Offset, NewOffset, `0`)) {
1804	if (isSExtLd)
1805	Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1806	else
1807	Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1808	} else if (LoadedVT == MVT::v4i8 &&
1809	SelectT2AddrModeImm7Offset(N, Offset, NewOffset, `0`)) {
1810	if (isSExtLd)
1811	Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1812	else
1813	Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1814	} else if (Alignment >= Align(`4`) &&
1815	(CanChangeType \|\| LoadedVT == MVT::v4i32 \|\|
1816	LoadedVT == MVT::v4f32) &&
1817	SelectT2AddrModeImm7Offset(N, Offset, NewOffset, `2`))
1818	Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1819	else if (Alignment >= Align(`2`) &&
1820	(CanChangeType \|\| LoadedVT == MVT::v8i16 \|\|
1821	LoadedVT == MVT::v8f16) &&
1822	SelectT2AddrModeImm7Offset(N, Offset, NewOffset, `1`))
1823	Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1824	else if ((CanChangeType \|\| LoadedVT == MVT::v16i8) &&
1825	SelectT2AddrModeImm7Offset(N, Offset, NewOffset, `0`))
1826	Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1827	else
1828	return false;
1829
1830	SDValue Ops[] = {Base,
1831	NewOffset,
1832	CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32),
1833	PredReg,
1834	CurDAG->getRegister(`0`, MVT::i32), // tp_reg
1835	Chain};
1836	SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1837	N->getValueType(`0`), MVT::Other, Ops);
1838	transferMemOperands(N, Result: New);
1839	ReplaceUses(F: SDValue (N, `0`), T: SDValue (New, `1`));
1840	ReplaceUses(F: SDValue (N, `1`), T: SDValue (New, `0`));
1841	ReplaceUses(F: SDValue (N, `2`), T: SDValue (New, `2`));
1842	CurDAG->RemoveDeadNode(N);
1843	return true;
1844	}
1845
1846	/// Form a GPRPair pseudo register from a pair of GPR regs.
1847	SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1848	SDLoc dl(V0.getNode());
1849	SDValue RegClass =
1850	CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1851	SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1852	SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1853	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1854	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1855	}
1856
1857	/// Form a D register from a pair of S registers.
1858	SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1859	SDLoc dl(V0.getNode());
1860	SDValue RegClass =
1861	CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1862	SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1863	SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1864	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1865	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1866	}
1867
1868	/// Form a quad register from a pair of D registers.
1869	SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1870	SDLoc dl(V0.getNode());
1871	SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1872	MVT::i32);
1873	SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1874	SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1875	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1876	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1877	}
1878
1879	/// Form 4 consecutive D registers from a pair of Q registers.
1880	SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1881	SDLoc dl(V0.getNode());
1882	SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1883	MVT::i32);
1884	SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1885	SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1886	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1887	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1888	}
1889
1890	/// Form 4 consecutive S registers.
1891	SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1892	SDValue V2, SDValue V3) {
1893	SDLoc dl(V0.getNode());
1894	SDValue RegClass =
1895	CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1896	SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1897	SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1898	SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1899	SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1900	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1901	V2, SubReg2, V3, SubReg3 };
1902	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1903	}
1904
1905	/// Form 4 consecutive D registers.
1906	SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1907	SDValue V2, SDValue V3) {
1908	SDLoc dl(V0.getNode());
1909	SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1910	MVT::i32);
1911	SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1912	SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1913	SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1914	SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1915	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1916	V2, SubReg2, V3, SubReg3 };
1917	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1918	}
1919
1920	/// Form 4 consecutive Q registers.
1921	SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1922	SDValue V2, SDValue V3) {
1923	SDLoc dl(V0.getNode());
1924	SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1925	MVT::i32);
1926	SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1927	SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1928	SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1929	SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1930	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1931	V2, SubReg2, V3, SubReg3 };
1932	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1933	}
1934
1935	/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1936	/// of a NEON VLD or VST instruction. The supported values depend on the
1937	/// number of registers being loaded.
1938	SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1939	unsigned NumVecs, bool is64BitVector) {
1940	unsigned NumRegs = NumVecs;
1941	if (!is64BitVector && NumVecs < `3`)
1942	NumRegs *= `2`;
1943
1944	unsigned Alignment = Align ->getAsZExtVal();
1945	if (Alignment >= `32` && NumRegs == `4`)
1946	Alignment = `32`;
1947	else if (Alignment >= `16` && (NumRegs == `2` \|\| NumRegs == `4`))
1948	Alignment = `16`;
1949	else if (Alignment >= `8`)
1950	Alignment = `8`;
1951	else
1952	Alignment = `0`;
1953
1954	return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1955	}
1956
1957	static bool isVLDfixed(unsigned Opc)
1958	{
1959	switch (Opc) {
1960	default: return false;
1961	case ARM::VLD1d8wb_fixed : return true;
1962	case ARM::VLD1d16wb_fixed : return true;
1963	case ARM::VLD1d64Qwb_fixed : return true;
1964	case ARM::VLD1d32wb_fixed : return true;
1965	case ARM::VLD1d64wb_fixed : return true;
1966	case ARM::VLD1d8TPseudoWB_fixed : return true;
1967	case ARM::VLD1d16TPseudoWB_fixed : return true;
1968	case ARM::VLD1d32TPseudoWB_fixed : return true;
1969	case ARM::VLD1d64TPseudoWB_fixed : return true;
1970	case ARM::VLD1d8QPseudoWB_fixed : return true;
1971	case ARM::VLD1d16QPseudoWB_fixed : return true;
1972	case ARM::VLD1d32QPseudoWB_fixed : return true;
1973	case ARM::VLD1d64QPseudoWB_fixed : return true;
1974	case ARM::VLD1q8wb_fixed : return true;
1975	case ARM::VLD1q16wb_fixed : return true;
1976	case ARM::VLD1q32wb_fixed : return true;
1977	case ARM::VLD1q64wb_fixed : return true;
1978	case ARM::VLD1DUPd8wb_fixed : return true;
1979	case ARM::VLD1DUPd16wb_fixed : return true;
1980	case ARM::VLD1DUPd32wb_fixed : return true;
1981	case ARM::VLD1DUPq8wb_fixed : return true;
1982	case ARM::VLD1DUPq16wb_fixed : return true;
1983	case ARM::VLD1DUPq32wb_fixed : return true;
1984	case ARM::VLD2d8wb_fixed : return true;
1985	case ARM::VLD2d16wb_fixed : return true;
1986	case ARM::VLD2d32wb_fixed : return true;
1987	case ARM::VLD2q8PseudoWB_fixed : return true;
1988	case ARM::VLD2q16PseudoWB_fixed : return true;
1989	case ARM::VLD2q32PseudoWB_fixed : return true;
1990	case ARM::VLD2DUPd8wb_fixed : return true;
1991	case ARM::VLD2DUPd16wb_fixed : return true;
1992	case ARM::VLD2DUPd32wb_fixed : return true;
1993	case ARM::VLD2DUPq8OddPseudoWB_fixed: return true;
1994	case ARM::VLD2DUPq16OddPseudoWB_fixed: return true;
1995	case ARM::VLD2DUPq32OddPseudoWB_fixed: return true;
1996	}
1997	}
1998
1999	static bool isVSTfixed(unsigned Opc)
2000	{
2001	switch (Opc) {
2002	default: return false;
2003	case ARM::VST1d8wb_fixed : return true;
2004	case ARM::VST1d16wb_fixed : return true;
2005	case ARM::VST1d32wb_fixed : return true;
2006	case ARM::VST1d64wb_fixed : return true;
2007	case ARM::VST1q8wb_fixed : return true;
2008	case ARM::VST1q16wb_fixed : return true;
2009	case ARM::VST1q32wb_fixed : return true;
2010	case ARM::VST1q64wb_fixed : return true;
2011	case ARM::VST1d8TPseudoWB_fixed : return true;
2012	case ARM::VST1d16TPseudoWB_fixed : return true;
2013	case ARM::VST1d32TPseudoWB_fixed : return true;
2014	case ARM::VST1d64TPseudoWB_fixed : return true;
2015	case ARM::VST1d8QPseudoWB_fixed : return true;
2016	case ARM::VST1d16QPseudoWB_fixed : return true;
2017	case ARM::VST1d32QPseudoWB_fixed : return true;
2018	case ARM::VST1d64QPseudoWB_fixed : return true;
2019	case ARM::VST2d8wb_fixed : return true;
2020	case ARM::VST2d16wb_fixed : return true;
2021	case ARM::VST2d32wb_fixed : return true;
2022	case ARM::VST2q8PseudoWB_fixed : return true;
2023	case ARM::VST2q16PseudoWB_fixed : return true;
2024	case ARM::VST2q32PseudoWB_fixed : return true;
2025	}
2026	}
2027
2028	// Get the register stride update opcode of a VLD/VST instruction that
2029	// is otherwise equivalent to the given fixed stride updating instruction.
2030	static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
2031	assert((isVLDfixed(Opc) \|\| isVSTfixed(Opc))
2032	&& "Incorrect fixed stride updating instruction.");
2033	switch (Opc) {
2034	default: break;
2035	case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
2036	case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
2037	case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
2038	case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
2039	case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
2040	case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
2041	case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
2042	case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
2043	case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
2044	case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
2045	case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;
2046	case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;
2047	case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;
2048	case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
2049	case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;
2050	case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;
2051	case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;
2052	case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
2053	case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
2054	case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
2055	case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
2056	case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
2057	case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
2058	case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
2059	case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register;
2060	case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register;
2061	case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register;
2062
2063	case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
2064	case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
2065	case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
2066	case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
2067	case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
2068	case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
2069	case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
2070	case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
2071	case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register;
2072	case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register;
2073	case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register;
2074	case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
2075	case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register;
2076	case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register;
2077	case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register;
2078	case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
2079
2080	case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
2081	case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
2082	case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
2083	case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
2084	case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
2085	case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
2086
2087	case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
2088	case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
2089	case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
2090	case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
2091	case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
2092	case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
2093
2094	case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
2095	case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
2096	case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
2097	}
2098	return Opc; // If not one we handle, return it unchanged.
2099	}
2100
2101	/// Returns true if the given increment is a Constant known to be equal to the
2102	/// access size performed by a NEON load/store. This means the "[rN]!" form can
2103	/// be used.
2104	static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
2105	auto C = dyn_cast<ConstantSDNode>(Val&: Inc);
2106	return C && C->getZExtValue() == VecTy.getSizeInBits() / `8` * NumVecs;
2107	}
2108
2109	void ARMDAGToDAGISel::SelectVLD(SDNode N, bool* isUpdating, unsigned NumVecs,
2110	const uint16_t *DOpcodes,
2111	const uint16_t *QOpcodes0,
2112	const uint16_t *QOpcodes1) {
2113	assert(Subtarget->hasNEON());
2114	assert(NumVecs >= `1` && NumVecs <= `4` && "VLD NumVecs out-of-range");
2115	SDLoc dl(N);
2116
2117	SDValue MemAddr, Align;
2118	bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2119	// nodes are not intrinsics.
2120	unsigned AddrOpIdx = IsIntrinsic ? `2` : `1`;
2121	if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2122	return;
2123
2124	SDValue Chain = N->getOperand(Num: `0`);
2125	EVT VT = N->getValueType(ResNo: `0`);
2126	bool is64BitVector = VT.is64BitVector();
2127	Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2128
2129	unsigned OpcodeIndex;
2130	switch (VT.getSimpleVT().SimpleTy) {
2131	default: llvm_unreachable("unhandled vld type");
2132	// Double-register operations:
2133	case MVT::v8i8: OpcodeIndex = `0`; break;
2134	case MVT::v4f16:
2135	case MVT::v4bf16:
2136	case MVT::v4i16: OpcodeIndex = `1`; break;
2137	case MVT::v2f32:
2138	case MVT::v2i32: OpcodeIndex = `2`; break;
2139	case MVT::v1i64: OpcodeIndex = `3`; break;
2140	// Quad-register operations:
2141	case MVT::v16i8: OpcodeIndex = `0`; break;
2142	case MVT::v8f16:
2143	case MVT::v8bf16:
2144	case MVT::v8i16: OpcodeIndex = `1`; break;
2145	case MVT::v4f32:
2146	case MVT::v4i32: OpcodeIndex = `2`; break;
2147	case MVT::v2f64:
2148	case MVT::v2i64: OpcodeIndex = `3`; break;
2149	}
2150
2151	EVT ResTy;
2152	if (NumVecs == `1`)
2153	ResTy = VT;
2154	else {
2155	unsigned ResTyElts = (NumVecs == `3`) ? `4` : NumVecs;
2156	if (!is64BitVector)
2157	ResTyElts *= `2`;
2158	ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2159	}
2160	std::vector<EVT> ResTys;
2161	ResTys.push_back(x: ResTy);
2162	if (isUpdating)
2163	ResTys.push_back(MVT::i32);
2164	ResTys.push_back(MVT::Other);
2165
2166	SDValue Pred = getAL(CurDAG, dl);
2167	SDValue Reg0 = CurDAG->getRegister(`0`, MVT::i32);
2168	SDNode *VLd;
2169	SmallVector<SDValue, `7`> Ops;
2170
2171	// Double registers and VLD1/VLD2 quad registers are directly supported.
2172	if (is64BitVector \|\| NumVecs <= `2`) {
2173	unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2174	QOpcodes0[OpcodeIndex]);
2175	Ops.push_back(Elt: MemAddr);
2176	Ops.push_back(Elt: Align);
2177	if (isUpdating) {
2178	SDValue Inc = N->getOperand(Num: AddrOpIdx + `1`);
2179	bool IsImmUpdate = isPerfectIncrement(Inc, VecTy: VT, NumVecs);
2180	if (!IsImmUpdate) {
2181	// We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2182	// check for the opcode rather than the number of vector elements.
2183	if (isVLDfixed(Opc))
2184	Opc = getVLDSTRegisterUpdateOpcode(Opc);
2185	Ops.push_back(Elt: Inc);
2186	// VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2187	// the operands if not such an opcode.
2188	} else if (!isVLDfixed(Opc))
2189	Ops.push_back(Elt: Reg0);
2190	}
2191	Ops.push_back(Elt: Pred);
2192	Ops.push_back(Elt: Reg0);
2193	Ops.push_back(Elt: Chain);
2194	VLd = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2195
2196	} else {
2197	// Otherwise, quad registers are loaded with two separate instructions,
2198	// where one loads the even registers and the other loads the odd registers.
2199	EVT AddrTy = MemAddr.getValueType();
2200
2201	// Load the even subregs. This is always an updating load, so that it
2202	// provides the address to the second load for the odd subregs.
2203	SDValue ImplDef =
2204	SDValue (CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT: ResTy), `0`);
2205	const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
2206	SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2207	ResTy, AddrTy, MVT::Other, OpsA);
2208	Chain = SDValue (VLdA, `2`);
2209
2210	// Load the odd subregs.
2211	Ops.push_back(Elt: SDValue (VLdA, `1`));
2212	Ops.push_back(Elt: Align);
2213	if (isUpdating) {
2214	SDValue Inc = N->getOperand(Num: AddrOpIdx + `1`);
2215	assert(isa<ConstantSDNode>(Inc.getNode()) &&
2216	"only constant post-increment update allowed for VLD3/4");
2217	(void)Inc;
2218	Ops.push_back(Elt: Reg0);
2219	}
2220	Ops.push_back(Elt: SDValue (VLdA, `0`));
2221	Ops.push_back(Elt: Pred);
2222	Ops.push_back(Elt: Reg0);
2223	Ops.push_back(Elt: Chain);
2224	VLd = CurDAG->getMachineNode(Opcode: QOpcodes1[OpcodeIndex], dl, ResultTys: ResTys, Ops);
2225	}
2226
2227	// Transfer memoperands.
2228	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2229	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VLd), NewMemRefs: {MemOp});
2230
2231	if (NumVecs == `1`) {
2232	ReplaceNode(F: N, T: VLd);
2233	return;
2234	}
2235
2236	// Extract out the subregisters.
2237	SDValue SuperReg = SDValue (VLd, `0`);
2238	static_assert(ARM::dsub_7 == ARM::dsub_0 + `7` &&
2239	ARM::qsub_3 == ARM::qsub_0 + `3`,
2240	"Unexpected subreg numbering");
2241	unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2242	for (unsigned Vec = `0`; Vec < NumVecs; ++Vec)
2243	ReplaceUses(F: SDValue (N, Vec),
2244	T: CurDAG->getTargetExtractSubreg(SRIdx: Sub0 + Vec, DL: dl, VT, Operand: SuperReg));
2245	ReplaceUses(F: SDValue (N, NumVecs), T: SDValue (VLd, `1`));
2246	if (isUpdating)
2247	ReplaceUses(F: SDValue (N, NumVecs + `1`), T: SDValue (VLd, `2`));
2248	CurDAG->RemoveDeadNode(N);
2249	}
2250
2251	void ARMDAGToDAGISel::SelectVST(SDNode N, bool* isUpdating, unsigned NumVecs,
2252	const uint16_t *DOpcodes,
2253	const uint16_t *QOpcodes0,
2254	const uint16_t *QOpcodes1) {
2255	assert(Subtarget->hasNEON());
2256	assert(NumVecs >= `1` && NumVecs <= `4` && "VST NumVecs out-of-range");
2257	SDLoc dl(N);
2258
2259	SDValue MemAddr, Align;
2260	bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2261	// nodes are not intrinsics.
2262	unsigned AddrOpIdx = IsIntrinsic ? `2` : `1`;
2263	unsigned Vec0Idx = `3`; // AddrOpIdx + (isUpdating ? 2 : 1)
2264	if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2265	return;
2266
2267	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2268
2269	SDValue Chain = N->getOperand(Num: `0`);
2270	EVT VT = N->getOperand(Num: Vec0Idx).getValueType();
2271	bool is64BitVector = VT.is64BitVector();
2272	Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2273
2274	unsigned OpcodeIndex;
2275	switch (VT.getSimpleVT().SimpleTy) {
2276	default: llvm_unreachable("unhandled vst type");
2277	// Double-register operations:
2278	case MVT::v8i8: OpcodeIndex = `0`; break;
2279	case MVT::v4f16:
2280	case MVT::v4bf16:
2281	case MVT::v4i16: OpcodeIndex = `1`; break;
2282	case MVT::v2f32:
2283	case MVT::v2i32: OpcodeIndex = `2`; break;
2284	case MVT::v1i64: OpcodeIndex = `3`; break;
2285	// Quad-register operations:
2286	case MVT::v16i8: OpcodeIndex = `0`; break;
2287	case MVT::v8f16:
2288	case MVT::v8bf16:
2289	case MVT::v8i16: OpcodeIndex = `1`; break;
2290	case MVT::v4f32:
2291	case MVT::v4i32: OpcodeIndex = `2`; break;
2292	case MVT::v2f64:
2293	case MVT::v2i64: OpcodeIndex = `3`; break;
2294	}
2295
2296	std::vector<EVT> ResTys;
2297	if (isUpdating)
2298	ResTys.push_back(MVT::i32);
2299	ResTys.push_back(MVT::Other);
2300
2301	SDValue Pred = getAL(CurDAG, dl);
2302	SDValue Reg0 = CurDAG->getRegister(`0`, MVT::i32);
2303	SmallVector<SDValue, `7`> Ops;
2304
2305	// Double registers and VST1/VST2 quad registers are directly supported.
2306	if (is64BitVector \|\| NumVecs <= `2`) {
2307	SDValue SrcReg;
2308	if (NumVecs == `1`) {
2309	SrcReg = N->getOperand(Num: Vec0Idx);
2310	} else if (is64BitVector) {
2311	// Form a REG_SEQUENCE to force register allocation.
2312	SDValue V0 = N->getOperand(Num: Vec0Idx + `0`);
2313	SDValue V1 = N->getOperand(Num: Vec0Idx + `1`);
2314	if (NumVecs == `2`)
2315	SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), `0`);
2316	else {
2317	SDValue V2 = N->getOperand(Num: Vec0Idx + `2`);
2318	// If it's a vst3, form a quad D-register and leave the last part as
2319	// an undef.
2320	SDValue V3 = (NumVecs == `3`)
2321	? SDValue (CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF,dl,VT), `0`)
2322	: N->getOperand(Num: Vec0Idx + `3`);
2323	SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), `0`);
2324	}
2325	} else {
2326	// Form a QQ register.
2327	SDValue Q0 = N->getOperand(Num: Vec0Idx);
2328	SDValue Q1 = N->getOperand(Num: Vec0Idx + `1`);
2329	SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), `0`);
2330	}
2331
2332	unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2333	QOpcodes0[OpcodeIndex]);
2334	Ops.push_back(Elt: MemAddr);
2335	Ops.push_back(Elt: Align);
2336	if (isUpdating) {
2337	SDValue Inc = N->getOperand(Num: AddrOpIdx + `1`);
2338	bool IsImmUpdate = isPerfectIncrement(Inc, VecTy: VT, NumVecs);
2339	if (!IsImmUpdate) {
2340	// We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2341	// check for the opcode rather than the number of vector elements.
2342	if (isVSTfixed(Opc))
2343	Opc = getVLDSTRegisterUpdateOpcode(Opc);
2344	Ops.push_back(Elt: Inc);
2345	}
2346	// VST1/VST2 fixed increment does not need Reg0 so only include it in
2347	// the operands if not such an opcode.
2348	else if (!isVSTfixed(Opc))
2349	Ops.push_back(Elt: Reg0);
2350	}
2351	Ops.push_back(Elt: SrcReg);
2352	Ops.push_back(Elt: Pred);
2353	Ops.push_back(Elt: Reg0);
2354	Ops.push_back(Elt: Chain);
2355	SDNode *VSt = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2356
2357	// Transfer memoperands.
2358	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VSt), NewMemRefs: {MemOp});
2359
2360	ReplaceNode(F: N, T: VSt);
2361	return;
2362	}
2363
2364	// Otherwise, quad registers are stored with two separate instructions,
2365	// where one stores the even registers and the other stores the odd registers.
2366
2367	// Form the QQQQ REG_SEQUENCE.
2368	SDValue V0 = N->getOperand(Num: Vec0Idx + `0`);
2369	SDValue V1 = N->getOperand(Num: Vec0Idx + `1`);
2370	SDValue V2 = N->getOperand(Num: Vec0Idx + `2`);
2371	SDValue V3 = (NumVecs == `3`)
2372	? SDValue (CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT), `0`)
2373	: N->getOperand(Num: Vec0Idx + `3`);
2374	SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), `0`);
2375
2376	// Store the even D registers. This is always an updating store, so that it
2377	// provides the address to the second store for the odd subregs.
2378	const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2379	SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2380	MemAddr.getValueType(),
2381	MVT::Other, OpsA);
2382	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VStA), NewMemRefs: {MemOp});
2383	Chain = SDValue (VStA, `1`);
2384
2385	// Store the odd D registers.
2386	Ops.push_back(Elt: SDValue (VStA, `0`));
2387	Ops.push_back(Elt: Align);
2388	if (isUpdating) {
2389	SDValue Inc = N->getOperand(Num: AddrOpIdx + `1`);
2390	assert(isa<ConstantSDNode>(Inc.getNode()) &&
2391	"only constant post-increment update allowed for VST3/4");
2392	(void)Inc;
2393	Ops.push_back(Elt: Reg0);
2394	}
2395	Ops.push_back(Elt: RegSeq);
2396	Ops.push_back(Elt: Pred);
2397	Ops.push_back(Elt: Reg0);
2398	Ops.push_back(Elt: Chain);
2399	SDNode *VStB = CurDAG->getMachineNode(Opcode: QOpcodes1[OpcodeIndex], dl, ResultTys: ResTys,
2400	Ops);
2401	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VStB), NewMemRefs: {MemOp});
2402	ReplaceNode(F: N, T: VStB);
2403	}
2404
2405	void ARMDAGToDAGISel::SelectVLDSTLane(SDNode N, bool* IsLoad, bool isUpdating,
2406	unsigned NumVecs,
2407	const uint16_t *DOpcodes,
2408	const uint16_t *QOpcodes) {
2409	assert(Subtarget->hasNEON());
2410	assert(NumVecs >=`2` && NumVecs <= `4` && "VLDSTLane NumVecs out-of-range");
2411	SDLoc dl(N);
2412
2413	SDValue MemAddr, Align;
2414	bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2415	// nodes are not intrinsics.
2416	unsigned AddrOpIdx = IsIntrinsic ? `2` : `1`;
2417	unsigned Vec0Idx = `3`; // AddrOpIdx + (isUpdating ? 2 : 1)
2418	if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2419	return;
2420
2421	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2422
2423	SDValue Chain = N->getOperand(Num: `0`);
2424	unsigned Lane = N->getConstantOperandVal(Num: Vec0Idx + NumVecs);
2425	EVT VT = N->getOperand(Num: Vec0Idx).getValueType();
2426	bool is64BitVector = VT.is64BitVector();
2427
2428	unsigned Alignment = `0`;
2429	if (NumVecs != `3`) {
2430	Alignment = Align ->getAsZExtVal();
2431	unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / `8`;
2432	if (Alignment > NumBytes)
2433	Alignment = NumBytes;
2434	if (Alignment < `8` && Alignment < NumBytes)
2435	Alignment = `0`;
2436	// Alignment must be a power of two; make sure of that.
2437	Alignment = (Alignment & -Alignment);
2438	if (Alignment == `1`)
2439	Alignment = `0`;
2440	}
2441	Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2442
2443	unsigned OpcodeIndex;
2444	switch (VT.getSimpleVT().SimpleTy) {
2445	default: llvm_unreachable("unhandled vld/vst lane type");
2446	// Double-register operations:
2447	case MVT::v8i8: OpcodeIndex = `0`; break;
2448	case MVT::v4f16:
2449	case MVT::v4bf16:
2450	case MVT::v4i16: OpcodeIndex = `1`; break;
2451	case MVT::v2f32:
2452	case MVT::v2i32: OpcodeIndex = `2`; break;
2453	// Quad-register operations:
2454	case MVT::v8f16:
2455	case MVT::v8bf16:
2456	case MVT::v8i16: OpcodeIndex = `0`; break;
2457	case MVT::v4f32:
2458	case MVT::v4i32: OpcodeIndex = `1`; break;
2459	}
2460
2461	std::vector<EVT> ResTys;
2462	if (IsLoad) {
2463	unsigned ResTyElts = (NumVecs == `3`) ? `4` : NumVecs;
2464	if (!is64BitVector)
2465	ResTyElts *= `2`;
2466	ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2467	MVT::i64, ResTyElts));
2468	}
2469	if (isUpdating)
2470	ResTys.push_back(MVT::i32);
2471	ResTys.push_back(MVT::Other);
2472
2473	SDValue Pred = getAL(CurDAG, dl);
2474	SDValue Reg0 = CurDAG->getRegister(`0`, MVT::i32);
2475
2476	SmallVector<SDValue, `8`> Ops;
2477	Ops.push_back(Elt: MemAddr);
2478	Ops.push_back(Elt: Align);
2479	if (isUpdating) {
2480	SDValue Inc = N->getOperand(Num: AddrOpIdx + `1`);
2481	bool IsImmUpdate =
2482	isPerfectIncrement(Inc, VecTy: VT.getVectorElementType(), NumVecs);
2483	Ops.push_back(Elt: IsImmUpdate ? Reg0 : Inc);
2484	}
2485
2486	SDValue SuperReg;
2487	SDValue V0 = N->getOperand(Num: Vec0Idx + `0`);
2488	SDValue V1 = N->getOperand(Num: Vec0Idx + `1`);
2489	if (NumVecs == `2`) {
2490	if (is64BitVector)
2491	SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), `0`);
2492	else
2493	SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), `0`);
2494	} else {
2495	SDValue V2 = N->getOperand(Num: Vec0Idx + `2`);
2496	SDValue V3 = (NumVecs == `3`)
2497	? SDValue (CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT), `0`)
2498	: N->getOperand(Num: Vec0Idx + `3`);
2499	if (is64BitVector)
2500	SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), `0`);
2501	else
2502	SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), `0`);
2503	}
2504	Ops.push_back(Elt: SuperReg);
2505	Ops.push_back(Elt: getI32Imm(Imm: Lane, dl));
2506	Ops.push_back(Elt: Pred);
2507	Ops.push_back(Elt: Reg0);
2508	Ops.push_back(Elt: Chain);
2509
2510	unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2511	QOpcodes[OpcodeIndex]);
2512	SDNode *VLdLn = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2513	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VLdLn), NewMemRefs: {MemOp});
2514	if (!IsLoad) {
2515	ReplaceNode(F: N, T: VLdLn);
2516	return;
2517	}
2518
2519	// Extract the subregisters.
2520	SuperReg = SDValue (VLdLn, `0`);
2521	static_assert(ARM::dsub_7 == ARM::dsub_0 + `7` &&
2522	ARM::qsub_3 == ARM::qsub_0 + `3`,
2523	"Unexpected subreg numbering");
2524	unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2525	for (unsigned Vec = `0`; Vec < NumVecs; ++Vec)
2526	ReplaceUses(F: SDValue (N, Vec),
2527	T: CurDAG->getTargetExtractSubreg(SRIdx: Sub0 + Vec, DL: dl, VT, Operand: SuperReg));
2528	ReplaceUses(F: SDValue (N, NumVecs), T: SDValue (VLdLn, `1`));
2529	if (isUpdating)
2530	ReplaceUses(F: SDValue (N, NumVecs + `1`), T: SDValue (VLdLn, `2`));
2531	CurDAG->RemoveDeadNode(N);
2532	}
2533
2534	template <typename SDValueVector>
2535	void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2536	SDValue PredicateMask) {
2537	Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2538	Ops.push_back(PredicateMask);
2539	Ops.push_back(CurDAG->getRegister(`0`, MVT::i32)); // tp_reg
2540	}
2541
2542	template <typename SDValueVector>
2543	void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2544	SDValue PredicateMask,
2545	SDValue Inactive) {
2546	Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2547	Ops.push_back(PredicateMask);
2548	Ops.push_back(CurDAG->getRegister(`0`, MVT::i32)); // tp_reg
2549	Ops.push_back(Inactive);
2550	}
2551
2552	template <typename SDValueVector>
2553	void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
2554	Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2555	Ops.push_back(CurDAG->getRegister(`0`, MVT::i32));
2556	Ops.push_back(CurDAG->getRegister(`0`, MVT::i32)); // tp_reg
2557	}
2558
2559	template <typename SDValueVector>
2560	void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2561	EVT InactiveTy) {
2562	Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2563	Ops.push_back(CurDAG->getRegister(`0`, MVT::i32));
2564	Ops.push_back(CurDAG->getRegister(`0`, MVT::i32)); // tp_reg
2565	Ops.push_back(SDValue (
2566	CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: Loc, VT: InactiveTy), `0`));
2567	}
2568
2569	void ARMDAGToDAGISel::SelectMVE_WB(SDNode N, const* uint16_t *Opcodes,
2570	bool Predicated) {
2571	SDLoc Loc(N);
2572	SmallVector<SDValue, `8`> Ops;
2573
2574	uint16_t Opcode;
2575	switch (N->getValueType(ResNo: `1`).getVectorElementType().getSizeInBits()) {
2576	case `32`:
2577	Opcode = Opcodes[`0`];
2578	break;
2579	case `64`:
2580	Opcode = Opcodes[`1`];
2581	break;
2582	default:
2583	llvm_unreachable("bad vector element size in SelectMVE_WB");
2584	}
2585
2586	Ops.push_back(Elt: N->getOperand(Num: `2`)); // vector of base addresses
2587
2588	int32_t ImmValue = N->getConstantOperandVal(Num: `3`);
2589	Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc)); // immediate offset
2590
2591	if (Predicated)
2592	AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: `4`));
2593	else
2594	AddEmptyMVEPredicateToOps(Ops, Loc);
2595
2596	Ops.push_back(Elt: N->getOperand(Num: `0`)); // chain
2597
2598	SmallVector<EVT, `8`> VTs;
2599	VTs.push_back(Elt: N->getValueType(ResNo: `1`));
2600	VTs.push_back(Elt: N->getValueType(ResNo: `0`));
2601	VTs.push_back(Elt: N->getValueType(ResNo: `2`));
2602
2603	SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc (N), ResultTys: VTs, Ops);
2604	ReplaceUses(F: SDValue (N, `0`), T: SDValue (New, `1`));
2605	ReplaceUses(F: SDValue (N, `1`), T: SDValue (New, `0`));
2606	ReplaceUses(F: SDValue (N, `2`), T: SDValue (New, `2`));
2607	transferMemOperands(N, Result: New);
2608	CurDAG->RemoveDeadNode(N);
2609	}
2610
2611	void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
2612	bool Immediate,
2613	bool HasSaturationOperand) {
2614	SDLoc Loc(N);
2615	SmallVector<SDValue, `8`> Ops;
2616
2617	// Two 32-bit halves of the value to be shifted
2618	Ops.push_back(Elt: N->getOperand(Num: `1`));
2619	Ops.push_back(Elt: N->getOperand(Num: `2`));
2620
2621	// The shift count
2622	if (Immediate) {
2623	int32_t ImmValue = N->getConstantOperandVal(Num: `3`);
2624	Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc)); // immediate shift count
2625	} else {
2626	Ops.push_back(Elt: N->getOperand(Num: `3`));
2627	}
2628
2629	// The immediate saturation operand, if any
2630	if (HasSaturationOperand) {
2631	int32_t SatOp = N->getConstantOperandVal(Num: `4`);
2632	int SatBit = (SatOp == `64` ? `0` : `1`);
2633	Ops.push_back(Elt: getI32Imm(Imm: SatBit, dl: Loc));
2634	}
2635
2636	// MVE scalar shifts are IT-predicable, so include the standard
2637	// predicate arguments.
2638	Ops.push_back(Elt: getAL(CurDAG, dl: Loc));
2639	Ops.push_back(CurDAG->getRegister(`0`, MVT::i32));
2640
2641	CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2642	}
2643
2644	void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
2645	uint16_t OpcodeWithNoCarry,
2646	bool Add, bool Predicated) {
2647	SDLoc Loc(N);
2648	SmallVector<SDValue, `8`> Ops;
2649	uint16_t Opcode;
2650
2651	unsigned FirstInputOp = Predicated ? `2` : `1`;
2652
2653	// Two input vectors and the input carry flag
2654	Ops.push_back(Elt: N->getOperand(Num: FirstInputOp));
2655	Ops.push_back(Elt: N->getOperand(Num: FirstInputOp + `1`));
2656	SDValue CarryIn = N->getOperand(Num: FirstInputOp + `2`);
2657	ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(Val&: CarryIn);
2658	uint32_t CarryMask = `1` << `29`;
2659	uint32_t CarryExpected = Add ? `0` : CarryMask;
2660	if (CarryInConstant &&
2661	(CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
2662	Opcode = OpcodeWithNoCarry;
2663	} else {
2664	Ops.push_back(Elt: CarryIn);
2665	Opcode = OpcodeWithCarry;
2666	}
2667
2668	if (Predicated)
2669	AddMVEPredicateToOps(Ops, Loc,
2670	PredicateMask: N->getOperand(Num: FirstInputOp + `3`), // predicate
2671	Inactive: N->getOperand(Num: FirstInputOp - `1`)); // inactive
2672	else
2673	AddEmptyMVEPredicateToOps(Ops, Loc, InactiveTy: N->getValueType(ResNo: `0`));
2674
2675	CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2676	}
2677
2678	void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode N, bool* Predicated) {
2679	SDLoc Loc(N);
2680	SmallVector<SDValue, `8`> Ops;
2681
2682	// One vector input, followed by a 32-bit word of bits to shift in
2683	// and then an immediate shift count
2684	Ops.push_back(Elt: N->getOperand(Num: `1`));
2685	Ops.push_back(Elt: N->getOperand(Num: `2`));
2686	int32_t ImmValue = N->getConstantOperandVal(Num: `3`);
2687	Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc)); // immediate shift count
2688
2689	if (Predicated)
2690	AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: `4`));
2691	else
2692	AddEmptyMVEPredicateToOps(Ops, Loc);
2693
2694	CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), ArrayRef(Ops));
2695	}
2696
2697	static bool SDValueToConstBool(SDValue SDVal) {
2698	assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
2699	ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(Val&: SDVal);
2700	uint64_t Value = SDValConstant->getZExtValue();
2701	assert((Value == `0` \|\| Value == `1`) && "expected value 0 or 1");
2702	return Value;
2703	}
2704
2705	void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode N, bool* Predicated,
2706	const uint16_t *OpcodesS,
2707	const uint16_t *OpcodesU,
2708	size_t Stride, size_t TySize) {
2709	assert(TySize < Stride && "Invalid TySize");
2710	bool IsUnsigned = SDValueToConstBool(SDVal: N->getOperand(Num: `1`));
2711	bool IsSub = SDValueToConstBool(SDVal: N->getOperand(Num: `2`));
2712	bool IsExchange = SDValueToConstBool(SDVal: N->getOperand(Num: `3`));
2713	if (IsUnsigned) {
2714	assert(!IsSub &&
2715	"Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2716	assert(!IsExchange &&
2717	"Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2718	}
2719
2720	auto OpIsZero = [N](size_t OpNo) {
2721	return isNullConstant(V: N->getOperand(Num: OpNo));
2722	};
2723
2724	// If the input accumulator value is not zero, select an instruction with
2725	// accumulator, otherwise select an instruction without accumulator
2726	bool IsAccum = !(OpIsZero (`4`) && OpIsZero (`5`));
2727
2728	const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
2729	if (IsSub)
2730	Opcodes += `4` * Stride;
2731	if (IsExchange)
2732	Opcodes += `2` * Stride;
2733	if (IsAccum)
2734	Opcodes += Stride;
2735	uint16_t Opcode = Opcodes[TySize];
2736
2737	SDLoc Loc(N);
2738	SmallVector<SDValue, `8`> Ops;
2739	// Push the accumulator operands, if they are used
2740	if (IsAccum) {
2741	Ops.push_back(Elt: N->getOperand(Num: `4`));
2742	Ops.push_back(Elt: N->getOperand(Num: `5`));
2743	}
2744	// Push the two vector operands
2745	Ops.push_back(Elt: N->getOperand(Num: `6`));
2746	Ops.push_back(Elt: N->getOperand(Num: `7`));
2747
2748	if (Predicated)
2749	AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: `8`));
2750	else
2751	AddEmptyMVEPredicateToOps(Ops, Loc);
2752
2753	CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2754	}
2755
2756	void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode N, bool* Predicated,
2757	const uint16_t *OpcodesS,
2758	const uint16_t *OpcodesU) {
2759	EVT VecTy = N->getOperand(Num: `6`).getValueType();
2760	size_t SizeIndex;
2761	switch (VecTy.getVectorElementType().getSizeInBits()) {
2762	case `16`:
2763	SizeIndex = `0`;
2764	break;
2765	case `32`:
2766	SizeIndex = `1`;
2767	break;
2768	default:
2769	llvm_unreachable("bad vector element size");
2770	}
2771
2772	SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, Stride: `2`, TySize: SizeIndex);
2773	}
2774
2775	void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode N, bool* Predicated,
2776	const uint16_t *OpcodesS,
2777	const uint16_t *OpcodesU) {
2778	assert(
2779	N->getOperand(`6`).getValueType().getVectorElementType().getSizeInBits() ==
2780	`32` &&
2781	"bad vector element size");
2782	SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, Stride: `1`, TySize: `0`);
2783	}
2784
2785	void ARMDAGToDAGISel::SelectMVE_VLD(SDNode N, unsigned* NumVecs,
2786	const uint16_t *const *Opcodes,
2787	bool HasWriteback) {
2788	EVT VT = N->getValueType(ResNo: `0`);
2789	SDLoc Loc(N);
2790
2791	const uint16_t *OurOpcodes;
2792	switch (VT.getVectorElementType().getSizeInBits()) {
2793	case `8`:
2794	OurOpcodes = Opcodes[`0`];
2795	break;
2796	case `16`:
2797	OurOpcodes = Opcodes[`1`];
2798	break;
2799	case `32`:
2800	OurOpcodes = Opcodes[`2`];
2801	break;
2802	default:
2803	llvm_unreachable("bad vector element size in SelectMVE_VLD");
2804	}
2805
2806	EVT DataTy = EVT::getVectorVT(CurDAG->getContext(), MVT::i64, NumVecs `2`);
2807	SmallVector<EVT, `4`> ResultTys = {DataTy, MVT::Other};
2808	unsigned PtrOperand = HasWriteback ? `1` : `2`;
2809
2810	auto Data = SDValue (
2811	CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: Loc, VT: DataTy), `0`);
2812	SDValue Chain = N->getOperand(Num: `0`);
2813	// Add a MVE_VLDn instruction for each Vec, except the last
2814	for (unsigned Stage = `0`; Stage < NumVecs - `1`; ++Stage) {
2815	SDValue Ops[] = {Data, N->getOperand(Num: PtrOperand), Chain};
2816	auto LoadInst =
2817	CurDAG->getMachineNode(Opcode: OurOpcodes[Stage], dl: Loc, ResultTys, Ops);
2818	Data = SDValue (LoadInst, `0`);
2819	Chain = SDValue (LoadInst, `1`);
2820	transferMemOperands(N, Result: LoadInst);
2821	}
2822	// The last may need a writeback on it
2823	if (HasWriteback)
2824	ResultTys = {DataTy, MVT::i32, MVT::Other};
2825	SDValue Ops[] = {Data, N->getOperand(Num: PtrOperand), Chain};
2826	auto LoadInst =
2827	CurDAG->getMachineNode(Opcode: OurOpcodes[NumVecs - `1`], dl: Loc, ResultTys, Ops);
2828	transferMemOperands(N, Result: LoadInst);
2829
2830	unsigned i;
2831	for (i = `0`; i < NumVecs; i++)
2832	ReplaceUses(SDValue(N, i),
2833	CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT,
2834	SDValue(LoadInst, `0`)));
2835	if (HasWriteback)
2836	ReplaceUses(F: SDValue (N, i++), T: SDValue (LoadInst, `1`));
2837	ReplaceUses(F: SDValue (N, i), T: SDValue (LoadInst, HasWriteback ? `2` : `1`));
2838	CurDAG->RemoveDeadNode(N);
2839	}
2840
2841	void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode N, const* uint16_t *Opcodes,
2842	bool Wrapping, bool Predicated) {
2843	EVT VT = N->getValueType(ResNo: `0`);
2844	SDLoc Loc(N);
2845
2846	uint16_t Opcode;
2847	switch (VT.getScalarSizeInBits()) {
2848	case `8`:
2849	Opcode = Opcodes[`0`];
2850	break;
2851	case `16`:
2852	Opcode = Opcodes[`1`];
2853	break;
2854	case `32`:
2855	Opcode = Opcodes[`2`];
2856	break;
2857	default:
2858	llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
2859	}
2860
2861	SmallVector<SDValue, `8`> Ops;
2862	unsigned OpIdx = `1`;
2863
2864	SDValue Inactive;
2865	if (Predicated)
2866	Inactive = N->getOperand(Num: OpIdx++);
2867
2868	Ops.push_back(Elt: N->getOperand(Num: OpIdx++)); // base
2869	if (Wrapping)
2870	Ops.push_back(Elt: N->getOperand(Num: OpIdx++)); // limit
2871
2872	SDValue ImmOp = N->getOperand(Num: OpIdx++); // step
2873	int ImmValue = ImmOp ->getAsZExtVal();
2874	Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc));
2875
2876	if (Predicated)
2877	AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: OpIdx), Inactive);
2878	else
2879	AddEmptyMVEPredicateToOps(Ops, Loc, InactiveTy: N->getValueType(ResNo: `0`));
2880
2881	CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2882	}
2883
2884	void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
2885	size_t NumExtraOps, bool HasAccum) {
2886	bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
2887	SDLoc Loc(N);
2888	SmallVector<SDValue, `8`> Ops;
2889
2890	unsigned OpIdx = `1`;
2891
2892	// Convert and append the immediate operand designating the coprocessor.
2893	SDValue ImmCorpoc = N->getOperand(Num: OpIdx++);
2894	uint32_t ImmCoprocVal = ImmCorpoc ->getAsZExtVal();
2895	Ops.push_back(Elt: getI32Imm(Imm: ImmCoprocVal, dl: Loc));
2896
2897	// For accumulating variants copy the low and high order parts of the
2898	// accumulator into a register pair and add it to the operand vector.
2899	if (HasAccum) {
2900	SDValue AccLo = N->getOperand(Num: OpIdx++);
2901	SDValue AccHi = N->getOperand(Num: OpIdx++);
2902	if (IsBigEndian)
2903	std::swap(a&: AccLo, b&: AccHi);
2904	Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), `0`));
2905	}
2906
2907	// Copy extra operands as-is.
2908	for (size_t I = `0`; I < NumExtraOps; I++)
2909	Ops.push_back(Elt: N->getOperand(Num: OpIdx++));
2910
2911	// Convert and append the immediate operand
2912	SDValue Imm = N->getOperand(Num: OpIdx);
2913	uint32_t ImmVal = Imm ->getAsZExtVal();
2914	Ops.push_back(Elt: getI32Imm(Imm: ImmVal, dl: Loc));
2915
2916	// Accumulating variants are IT-predicable, add predicate operands.
2917	if (HasAccum) {
2918	SDValue Pred = getAL(CurDAG, dl: Loc);
2919	SDValue PredReg = CurDAG->getRegister(`0`, MVT::i32);
2920	Ops.push_back(Elt: Pred);
2921	Ops.push_back(Elt: PredReg);
2922	}
2923
2924	// Create the CDE intruction
2925	SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops);
2926	SDValue ResultPair = SDValue (InstrNode, `0`);
2927
2928	// The original intrinsic had two outputs, and the output of the dual-register
2929	// CDE instruction is a register pair. We need to extract the two subregisters
2930	// and replace all uses of the original outputs with the extracted
2931	// subregisters.
2932	uint16_t SubRegs[`2`] = {ARM::gsub_0, ARM::gsub_1};
2933	if (IsBigEndian)
2934	std::swap(a&: SubRegs[`0`], b&: SubRegs[`1`]);
2935
2936	for (size_t ResIdx = `0`; ResIdx < `2`; ResIdx++) {
2937	if (SDValue (N, ResIdx).use_empty())
2938	continue;
2939	SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc,
2940	MVT::i32, ResultPair);
2941	ReplaceUses(F: SDValue (N, ResIdx), T: SubReg);
2942	}
2943
2944	CurDAG->RemoveDeadNode(N);
2945	}
2946
2947	void ARMDAGToDAGISel::SelectVLDDup(SDNode N, bool* IsIntrinsic,
2948	bool isUpdating, unsigned NumVecs,
2949	const uint16_t *DOpcodes,
2950	const uint16_t *QOpcodes0,
2951	const uint16_t *QOpcodes1) {
2952	assert(Subtarget->hasNEON());
2953	assert(NumVecs >= `1` && NumVecs <= `4` && "VLDDup NumVecs out-of-range");
2954	SDLoc dl(N);
2955
2956	SDValue MemAddr, Align;
2957	unsigned AddrOpIdx = IsIntrinsic ? `2` : `1`;
2958	if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2959	return;
2960
2961	SDValue Chain = N->getOperand(Num: `0`);
2962	EVT VT = N->getValueType(ResNo: `0`);
2963	bool is64BitVector = VT.is64BitVector();
2964
2965	unsigned Alignment = `0`;
2966	if (NumVecs != `3`) {
2967	Alignment = Align ->getAsZExtVal();
2968	unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / `8`;
2969	if (Alignment > NumBytes)
2970	Alignment = NumBytes;
2971	if (Alignment < `8` && Alignment < NumBytes)
2972	Alignment = `0`;
2973	// Alignment must be a power of two; make sure of that.
2974	Alignment = (Alignment & -Alignment);
2975	if (Alignment == `1`)
2976	Alignment = `0`;
2977	}
2978	Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2979
2980	unsigned OpcodeIndex;
2981	switch (VT.getSimpleVT().SimpleTy) {
2982	default: llvm_unreachable("unhandled vld-dup type");
2983	case MVT::v8i8:
2984	case MVT::v16i8: OpcodeIndex = `0`; break;
2985	case MVT::v4i16:
2986	case MVT::v8i16:
2987	case MVT::v4f16:
2988	case MVT::v8f16:
2989	case MVT::v4bf16:
2990	case MVT::v8bf16:
2991	OpcodeIndex = `1`; break;
2992	case MVT::v2f32:
2993	case MVT::v2i32:
2994	case MVT::v4f32:
2995	case MVT::v4i32: OpcodeIndex = `2`; break;
2996	case MVT::v1f64:
2997	case MVT::v1i64: OpcodeIndex = `3`; break;
2998	}
2999
3000	unsigned ResTyElts = (NumVecs == `3`) ? `4` : NumVecs;
3001	if (!is64BitVector)
3002	ResTyElts *= `2`;
3003	EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
3004
3005	std::vector<EVT> ResTys;
3006	ResTys.push_back(x: ResTy);
3007	if (isUpdating)
3008	ResTys.push_back(MVT::i32);
3009	ResTys.push_back(MVT::Other);
3010
3011	SDValue Pred = getAL(CurDAG, dl);
3012	SDValue Reg0 = CurDAG->getRegister(`0`, MVT::i32);
3013
3014	SmallVector<SDValue, `6`> Ops;
3015	Ops.push_back(Elt: MemAddr);
3016	Ops.push_back(Elt: Align);
3017	unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex]
3018	: (NumVecs == `1`) ? QOpcodes0[OpcodeIndex]
3019	: QOpcodes1[OpcodeIndex];
3020	if (isUpdating) {
3021	SDValue Inc = N->getOperand(Num: `2`);
3022	bool IsImmUpdate =
3023	isPerfectIncrement(Inc, VecTy: VT.getVectorElementType(), NumVecs);
3024	if (IsImmUpdate) {
3025	if (!isVLDfixed(Opc))
3026	Ops.push_back(Elt: Reg0);
3027	} else {
3028	if (isVLDfixed(Opc))
3029	Opc = getVLDSTRegisterUpdateOpcode(Opc);
3030	Ops.push_back(Elt: Inc);
3031	}
3032	}
3033	if (is64BitVector \|\| NumVecs == `1`) {
3034	// Double registers and VLD1 quad registers are directly supported.
3035	} else {
3036	SDValue ImplDef = SDValue (
3037	CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT: ResTy), `0`);
3038	const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain};
3039	SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
3040	MVT::Other, OpsA);
3041	Ops.push_back(Elt: SDValue (VLdA, `0`));
3042	Chain = SDValue (VLdA, `1`);
3043	}
3044
3045	Ops.push_back(Elt: Pred);
3046	Ops.push_back(Elt: Reg0);
3047	Ops.push_back(Elt: Chain);
3048
3049	SDNode *VLdDup = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
3050
3051	// Transfer memoperands.
3052	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
3053	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VLdDup), NewMemRefs: {MemOp});
3054
3055	// Extract the subregisters.
3056	if (NumVecs == `1`) {
3057	ReplaceUses(F: SDValue (N, `0`), T: SDValue (VLdDup, `0`));
3058	} else {
3059	SDValue SuperReg = SDValue (VLdDup, `0`);
3060	static_assert(ARM::dsub_7 == ARM::dsub_0 + `7`, "Unexpected subreg numbering");
3061	unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
3062	for (unsigned Vec = `0`; Vec != NumVecs; ++Vec) {
3063	ReplaceUses(F: SDValue (N, Vec),
3064	T: CurDAG->getTargetExtractSubreg(SRIdx: SubIdx+Vec, DL: dl, VT, Operand: SuperReg));
3065	}
3066	}
3067	ReplaceUses(F: SDValue (N, NumVecs), T: SDValue (VLdDup, `1`));
3068	if (isUpdating)
3069	ReplaceUses(F: SDValue (N, NumVecs + `1`), T: SDValue (VLdDup, `2`));
3070	CurDAG->RemoveDeadNode(N);
3071	}
3072
3073	bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
3074	if (!Subtarget->hasMVEIntegerOps())
3075	return false;
3076
3077	SDLoc dl(N);
3078
3079	// We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and
3080	// extracts of v8f16 and v8i16 vectors. Check that we have two adjacent
3081	// inserts of the correct type:
3082	SDValue Ins1 = SDValue (N, `0`);
3083	SDValue Ins2 = N->getOperand(Num: `0`);
3084	EVT VT = Ins1.getValueType();
3085	if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT \|\| !Ins2.hasOneUse() \|\|
3086	!isa<ConstantSDNode>(Ins1.getOperand(`2`)) \|\|
3087	!isa<ConstantSDNode>(Ins2.getOperand(`2`)) \|\|
3088	(VT != MVT::v8f16 && VT != MVT::v8i16) \|\| (Ins2.getValueType() != VT))
3089	return false;
3090
3091	unsigned Lane1 = Ins1.getConstantOperandVal(i: `2`);
3092	unsigned Lane2 = Ins2.getConstantOperandVal(i: `2`);
3093	if (Lane2 % `2` != `0` \|\| Lane1 != Lane2 + `1`)
3094	return false;
3095
3096	// If the inserted values will be able to use T/B already, leave it to the
3097	// existing tablegen patterns. For example VCVTT/VCVTB.
3098	SDValue Val1 = Ins1.getOperand(i: `1`);
3099	SDValue Val2 = Ins2.getOperand(i: `1`);
3100	if (Val1.getOpcode() == ISD::FP_ROUND \|\| Val2.getOpcode() == ISD::FP_ROUND)
3101	return false;
3102
3103	// Check if the inserted values are both extracts.
3104	if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT \|\|
3105	Val1.getOpcode() == ARMISD::VGETLANEu) &&
3106	(Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT \|\|
3107	Val2.getOpcode() == ARMISD::VGETLANEu) &&
3108	isa<ConstantSDNode>(Val1.getOperand(`1`)) &&
3109	isa<ConstantSDNode>(Val2.getOperand(`1`)) &&
3110	(Val1.getOperand(`0`).getValueType() == MVT::v8f16 \|\|
3111	Val1.getOperand(`0`).getValueType() == MVT::v8i16) &&
3112	(Val2.getOperand(`0`).getValueType() == MVT::v8f16 \|\|
3113	Val2.getOperand(`0`).getValueType() == MVT::v8i16)) {
3114	unsigned ExtractLane1 = Val1.getConstantOperandVal(i: `1`);
3115	unsigned ExtractLane2 = Val2.getConstantOperandVal(i: `1`);
3116
3117	// If the two extracted lanes are from the same place and adjacent, this
3118	// simplifies into a f32 lane move.
3119	if (Val1.getOperand(i: `0`) == Val2.getOperand(i: `0`) && ExtractLane2 % `2` == `0` &&
3120	ExtractLane1 == ExtractLane2 + `1`) {
3121	SDValue NewExt = CurDAG->getTargetExtractSubreg(
3122	ARM::ssub_0 + ExtractLane2 / `2`, dl, MVT::f32, Val1.getOperand(`0`));
3123	SDValue NewIns = CurDAG->getTargetInsertSubreg(
3124	ARM::ssub_0 + Lane2 / `2`, dl, VT, Ins2.getOperand(`0`),
3125	NewExt);
3126	ReplaceUses(F: Ins1, T: NewIns);
3127	return true;
3128	}
3129
3130	// Else v8i16 pattern of an extract and an insert, with a optional vmovx for
3131	// extracting odd lanes.
3132	if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) {
3133	SDValue Inp1 = CurDAG->getTargetExtractSubreg(
3134	ARM::ssub_0 + ExtractLane1 / `2`, dl, MVT::f32, Val1.getOperand(`0`));
3135	SDValue Inp2 = CurDAG->getTargetExtractSubreg(
3136	ARM::ssub_0 + ExtractLane2 / `2`, dl, MVT::f32, Val2.getOperand(`0`));
3137	if (ExtractLane1 % `2` != `0`)
3138	Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), `0`);
3139	if (ExtractLane2 % `2` != `0`)
3140	Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), `0`);
3141	SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1);
3142	SDValue NewIns =
3143	CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / `2`, dl, MVT::v4f32,
3144	Ins2.getOperand(`0`), SDValue(VINS, `0`));
3145	ReplaceUses(F: Ins1, T: NewIns);
3146	return true;
3147	}
3148	}
3149
3150	// The inserted values are not extracted - if they are f16 then insert them
3151	// directly using a VINS.
3152	if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) {
3153	SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1);
3154	SDValue NewIns =
3155	CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / `2`, dl, MVT::v4f32,
3156	Ins2.getOperand(`0`), SDValue(VINS, `0`));
3157	ReplaceUses(F: Ins1, T: NewIns);
3158	return true;
3159	}
3160
3161	return false;
3162	}
3163
3164	bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N,
3165	SDNode *FMul,
3166	bool IsUnsigned,
3167	bool FixedToFloat) {
3168	auto Type = N->getValueType(ResNo: `0`);
3169	unsigned ScalarBits = Type.getScalarSizeInBits();
3170	if (ScalarBits > `32`)
3171	return false;
3172
3173	SDNodeFlags FMulFlags = FMul->getFlags();
3174	// The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3175	// allowed in 16 bit unsigned floats
3176	if (ScalarBits == `16` && !FMulFlags.hasNoInfs() && IsUnsigned)
3177	return false;
3178
3179	SDValue ImmNode = FMul->getOperand(Num: `1`);
3180	SDValue VecVal = FMul->getOperand(Num: `0`);
3181	if (VecVal ->getOpcode() == ISD::UINT_TO_FP \|\|
3182	VecVal ->getOpcode() == ISD::SINT_TO_FP)
3183	VecVal = VecVal ->getOperand(Num: `0`);
3184
3185	if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits)
3186	return false;
3187
3188	if (ImmNode.getOpcode() == ISD::BITCAST) {
3189	if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3190	return false;
3191	ImmNode = ImmNode.getOperand(i: `0`);
3192	}
3193
3194	if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3195	return false;
3196
3197	APFloat ImmAPF(`0.0f`);
3198	switch (ImmNode.getOpcode()) {
3199	case ARMISD::VMOVIMM:
3200	case ARMISD::VDUP: {
3201	if (!isa<ConstantSDNode>(Val: ImmNode.getOperand(i: `0`)))
3202	return false;
3203	unsigned Imm = ImmNode.getConstantOperandVal(i: `0`);
3204	if (ImmNode.getOpcode() == ARMISD::VMOVIMM)
3205	Imm = ARM_AM::decodeVMOVModImm(ModImm: Imm, EltBits&: ScalarBits);
3206	ImmAPF =
3207	APFloat (ScalarBits == `32` ? APFloat::IEEEsingle() : APFloat::IEEEhalf(),
3208	APInt (ScalarBits, Imm));
3209	break;
3210	}
3211	case ARMISD::VMOVFPIMM: {
3212	ImmAPF = APFloat (ARM_AM::getFPImmFloat(Imm: ImmNode.getConstantOperandVal(i: `0`)));
3213	break;
3214	}
3215	default:
3216	return false;
3217	}
3218
3219	// Where n is the number of fractional bits, multiplying by 2^n will convert
3220	// from float to fixed and multiplying by 2^-n will convert from fixed to
3221	// float. Taking log2 of the factor (after taking the inverse in the case of
3222	// float to fixed) will give n.
3223	APFloat ToConvert = ImmAPF;
3224	if (FixedToFloat) {
3225	if (!ImmAPF.getExactInverse(inv: &ToConvert))
3226	return false;
3227	}
3228	APSInt Converted(`64`, false);
3229	bool IsExact;
3230	ToConvert.convertToInteger(Result&: Converted, RM: llvm::RoundingMode::NearestTiesToEven,
3231	IsExact: &IsExact);
3232	if (!IsExact \|\| !Converted.isPowerOf2())
3233	return false;
3234
3235	unsigned FracBits = Converted.logBase2();
3236	if (FracBits > ScalarBits)
3237	return false;
3238
3239	SmallVector<SDValue, `3`> Ops{
3240	VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)};
3241	AddEmptyMVEPredicateToOps(Ops, Loc: SDLoc (N), InactiveTy: Type);
3242
3243	unsigned int Opcode;
3244	switch (ScalarBits) {
3245	case `16`:
3246	if (FixedToFloat)
3247	Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix;
3248	else
3249	Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3250	break;
3251	case `32`:
3252	if (FixedToFloat)
3253	Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix;
3254	else
3255	Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3256	break;
3257	default:
3258	llvm_unreachable("unexpected number of scalar bits");
3259	break;
3260	}
3261
3262	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: SDLoc (N), VT: Type, Ops));
3263	return true;
3264	}
3265
3266	bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
3267	// Transform a floating-point to fixed-point conversion to a VCVT
3268	if (!Subtarget->hasMVEFloatOps())
3269	return false;
3270	EVT Type = N->getValueType(ResNo: `0`);
3271	if (!Type.isVector())
3272	return false;
3273	unsigned int ScalarBits = Type.getScalarSizeInBits();
3274
3275	bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT \|\|
3276	N->getOpcode() == ISD::FP_TO_UINT_SAT;
3277	SDNode *Node = N->getOperand(Num: `0`).getNode();
3278
3279	// floating-point to fixed-point with one fractional bit gets turned into an
3280	// FP_TO_[U\|S]INT(FADD (x, x)) rather than an FP_TO_[U\|S]INT(FMUL (x, y))
3281	if (Node->getOpcode() == ISD::FADD) {
3282	if (Node->getOperand(Num: `0`) != Node->getOperand(Num: `1`))
3283	return false;
3284	SDNodeFlags Flags = Node->getFlags();
3285	// The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3286	// allowed in 16 bit unsigned floats
3287	if (ScalarBits == `16` && !Flags.hasNoInfs() && IsUnsigned)
3288	return false;
3289
3290	unsigned Opcode;
3291	switch (ScalarBits) {
3292	case `16`:
3293	Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3294	break;
3295	case `32`:
3296	Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3297	break;
3298	}
3299	SmallVector<SDValue, `3`> Ops{Node->getOperand(`0`),
3300	CurDAG->getConstant(`1`, dl, MVT::i32)};
3301	AddEmptyMVEPredicateToOps(Ops, Loc: dl, InactiveTy: Type);
3302
3303	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl, VT: Type, Ops));
3304	return true;
3305	}
3306
3307	if (Node->getOpcode() != ISD::FMUL)
3308	return false;
3309
3310	return transformFixedFloatingPointConversion(N, FMul: Node, IsUnsigned, FixedToFloat: false);
3311	}
3312
3313	bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
3314	// Transform a fixed-point to floating-point conversion to a VCVT
3315	if (!Subtarget->hasMVEFloatOps())
3316	return false;
3317	auto Type = N->getValueType(ResNo: `0`);
3318	if (!Type.isVector())
3319	return false;
3320
3321	auto LHS = N->getOperand(Num: `0`);
3322	if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP)
3323	return false;
3324
3325	return transformFixedFloatingPointConversion(
3326	N, FMul: N, IsUnsigned: LHS.getOpcode() == ISD::UINT_TO_FP, FixedToFloat: true);
3327	}
3328
3329	bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode N, bool* isSigned) {
3330	if (!Subtarget->hasV6T2Ops())
3331	return false;
3332
3333	unsigned Opc = isSigned
3334	? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
3335	: (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
3336	SDLoc dl(N);
3337
3338	// For unsigned extracts, check for a shift right and mask
3339	unsigned And_imm = `0`;
3340	if (N->getOpcode() == ISD::AND) {
3341	if (isOpcWithIntImmediate(N, Opc: ISD::AND, Imm&: And_imm)) {
3342
3343	// The immediate is a mask of the low bits iff imm & (imm+1) == 0
3344	if (And_imm & (And_imm + `1`))
3345	return false;
3346
3347	unsigned Srl_imm = `0`;
3348	if (isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::SRL,
3349	Imm&: Srl_imm)) {
3350	assert(Srl_imm > `0` && Srl_imm < `32` && "bad amount in shift node!");
3351
3352	// Mask off the unnecessary bits of the AND immediate; normally
3353	// DAGCombine will do this, but that might not happen if
3354	// targetShrinkDemandedConstant chooses a different immediate.
3355	And_imm &= -`1U` >> Srl_imm;
3356
3357	// Note: The width operand is encoded as width-1.
3358	unsigned Width = llvm::countr_one(Value: And_imm) - `1`;
3359	unsigned LSB = Srl_imm;
3360
3361	SDValue Reg0 = CurDAG->getRegister(`0`, MVT::i32);
3362
3363	if ((LSB + Width + `1`) == N->getValueType(ResNo: `0`).getSizeInBits()) {
3364	// It's cheaper to use a right shift to extract the top bits.
3365	if (Subtarget->isThumb()) {
3366	Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
3367	SDValue Ops[] = { N->getOperand(`0`).getOperand(`0`),
3368	CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3369	getAL(CurDAG, dl), Reg0, Reg0 };
3370	CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3371	return true;
3372	}
3373
3374	// ARM models shift instructions as MOVsi with shifter operand.
3375	ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: ISD::SRL);
3376	SDValue ShOpc =
3377	CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
3378	MVT::i32);
3379	SDValue Ops[] = { N->getOperand(Num: `0`).getOperand(i: `0`), ShOpc,
3380	getAL(CurDAG, dl), Reg0, Reg0 };
3381	CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
3382	return true;
3383	}
3384
3385	assert(LSB + Width + `1` <= `32` && "Shouldn't create an invalid ubfx");
3386	SDValue Ops[] = { N->getOperand(`0`).getOperand(`0`),
3387	CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3388	CurDAG->getTargetConstant(Width, dl, MVT::i32),
3389	getAL(CurDAG, dl), Reg0 };
3390	CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3391	return true;
3392	}
3393	}
3394	return false;
3395	}
3396
3397	// Otherwise, we're looking for a shift of a shift
3398	unsigned Shl_imm = `0`;
3399	if (isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::SHL, Imm&: Shl_imm)) {
3400	assert(Shl_imm > `0` && Shl_imm < `32` && "bad amount in shift node!");
3401	unsigned Srl_imm = `0`;
3402	if (isInt32Immediate(N: N->getOperand(Num: `1`), Imm&: Srl_imm)) {
3403	assert(Srl_imm > `0` && Srl_imm < `32` && "bad amount in shift node!");
3404	// Note: The width operand is encoded as width-1.
3405	unsigned Width = `32` - Srl_imm - `1`;
3406	int LSB = Srl_imm - Shl_imm;
3407	if (LSB < `0`)
3408	return false;
3409	SDValue Reg0 = CurDAG->getRegister(`0`, MVT::i32);
3410	assert(LSB + Width + `1` <= `32` && "Shouldn't create an invalid ubfx");
3411	SDValue Ops[] = { N->getOperand(`0`).getOperand(`0`),
3412	CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3413	CurDAG->getTargetConstant(Width, dl, MVT::i32),
3414	getAL(CurDAG, dl), Reg0 };
3415	CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3416	return true;
3417	}
3418	}
3419
3420	// Or we are looking for a shift of an and, with a mask operand
3421	if (isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::AND, Imm&: And_imm) &&
3422	isShiftedMask_32(Value: And_imm)) {
3423	unsigned Srl_imm = `0`;
3424	unsigned LSB = llvm::countr_zero(Val: And_imm);
3425	// Shift must be the same as the ands lsb
3426	if (isInt32Immediate(N: N->getOperand(Num: `1`), Imm&: Srl_imm) && Srl_imm == LSB) {
3427	assert(Srl_imm > `0` && Srl_imm < `32` && "bad amount in shift node!");
3428	unsigned MSB = llvm::Log2_32(Value: And_imm);
3429	// Note: The width operand is encoded as width-1.
3430	unsigned Width = MSB - LSB;
3431	SDValue Reg0 = CurDAG->getRegister(`0`, MVT::i32);
3432	assert(Srl_imm + Width + `1` <= `32` && "Shouldn't create an invalid ubfx");
3433	SDValue Ops[] = { N->getOperand(`0`).getOperand(`0`),
3434	CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
3435	CurDAG->getTargetConstant(Width, dl, MVT::i32),
3436	getAL(CurDAG, dl), Reg0 };
3437	CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3438	return true;
3439	}
3440	}
3441
3442	if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
3443	unsigned Width = cast<VTSDNode>(Val: N->getOperand(Num: `1`))->getVT().getSizeInBits();
3444	unsigned LSB = `0`;
3445	if (!isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::SRL, Imm&: LSB) &&
3446	!isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::SRA, Imm&: LSB))
3447	return false;
3448
3449	if (LSB + Width > `32`)
3450	return false;
3451
3452	SDValue Reg0 = CurDAG->getRegister(`0`, MVT::i32);
3453	assert(LSB + Width <= `32` && "Shouldn't create an invalid ubfx");
3454	SDValue Ops[] = { N->getOperand(`0`).getOperand(`0`),
3455	CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3456	CurDAG->getTargetConstant(Width - `1`, dl, MVT::i32),
3457	getAL(CurDAG, dl), Reg0 };
3458	CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3459	return true;
3460	}
3461
3462	return false;
3463	}
3464
3465	/// Target-specific DAG combining for ISD::SUB.
3466	/// Target-independent combining lowers SELECT_CC nodes of the form
3467	/// select_cc setg[ge] X, 0, X, -X
3468	/// select_cc setgt X, -1, X, -X
3469	/// select_cc setl[te] X, 0, -X, X
3470	/// select_cc setlt X, 1, -X, X
3471	/// which represent Integer ABS into:
3472	/// Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
3473	/// ARM instruction selection detects the latter and matches it to
3474	/// ARM::ABS or ARM::t2ABS machine node.
3475	bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
3476	SDValue SUBSrc0 = N->getOperand(Num: `0`);
3477	SDValue SUBSrc1 = N->getOperand(Num: `1`);
3478	EVT VT = N->getValueType(ResNo: `0`);
3479
3480	if (Subtarget->isThumb1Only())
3481	return false;
3482
3483	if (SUBSrc0.getOpcode() != ISD::XOR \|\| SUBSrc1.getOpcode() != ISD::SRA)
3484	return false;
3485
3486	SDValue XORSrc0 = SUBSrc0.getOperand(i: `0`);
3487	SDValue XORSrc1 = SUBSrc0.getOperand(i: `1`);
3488	SDValue SRASrc0 = SUBSrc1.getOperand(i: `0`);
3489	SDValue SRASrc1 = SUBSrc1.getOperand(i: `1`);
3490	ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(Val&: SRASrc1);
3491	EVT XType = SRASrc0.getValueType();
3492	unsigned Size = XType.getSizeInBits() - `1`;
3493
3494	if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() &&
3495	SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) {
3496	unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
3497	CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VT, Op1: XORSrc0);
3498	return true;
3499	}
3500
3501	return false;
3502	}
3503
3504	/// We've got special pseudo-instructions for these
3505	void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3506	unsigned Opcode;
3507	EVT MemTy = cast<MemSDNode>(Val: N)->getMemoryVT();
3508	if (MemTy == MVT::i8)
3509	Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8;
3510	else if (MemTy == MVT::i16)
3511	Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16;
3512	else if (MemTy == MVT::i32)
3513	Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32;
3514	else
3515	llvm_unreachable("Unknown AtomicCmpSwap type");
3516
3517	SDValue Ops[] = {N->getOperand(Num: `1`), N->getOperand(Num: `2`), N->getOperand(Num: `3`),
3518	N->getOperand(Num: `0`)};
3519	SDNode *CmpSwap = CurDAG->getMachineNode(
3520	Opcode, SDLoc(N),
3521	CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
3522
3523	MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
3524	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: CmpSwap), NewMemRefs: {MemOp});
3525
3526	ReplaceUses(F: SDValue (N, `0`), T: SDValue (CmpSwap, `0`));
3527	ReplaceUses(F: SDValue (N, `1`), T: SDValue (CmpSwap, `2`));
3528	CurDAG->RemoveDeadNode(N);
3529	}
3530
3531	static std::optional<std::pair<unsigned, unsigned>>
3532	getContiguousRangeOfSetBits(const APInt &A) {
3533	unsigned FirstOne = A.getBitWidth() - A.countl_zero() - `1`;
3534	unsigned LastOne = A.countr_zero();
3535	if (A.popcount() != (FirstOne - LastOne + `1`))
3536	return std::nullopt;
3537	return std::make_pair(x&: FirstOne, y&: LastOne);
3538	}
3539
3540	void ARMDAGToDAGISel::SelectCMPZ(SDNode N, bool* &SwitchEQNEToPLMI) {
3541	assert(N->getOpcode() == ARMISD::CMPZ);
3542	SwitchEQNEToPLMI = false;
3543
3544	if (!Subtarget->isThumb())
3545	// FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3546	// LSR don't exist as standalone instructions - they need the barrel shifter.
3547	return;
3548
3549	// select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3550	SDValue And = N->getOperand(Num: `0`);
3551	if (!And ->hasOneUse())
3552	return;
3553
3554	SDValue Zero = N->getOperand(Num: `1`);
3555	if (!isNullConstant(V: Zero) \|\| And ->getOpcode() != ISD::AND)
3556	return;
3557	SDValue X = And.getOperand(i: `0`);
3558	auto C = dyn_cast<ConstantSDNode>(Val: And.getOperand(i: `1`));
3559
3560	if (!C)
3561	return;
3562	auto Range = getContiguousRangeOfSetBits(A: C->getAPIntValue());
3563	if (!Range)
3564	return;
3565
3566	// There are several ways to lower this:
3567	SDNode *NewN;
3568	SDLoc dl(N);
3569
3570	auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
3571	if (Subtarget->isThumb2()) {
3572	Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
3573	SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3574	getAL(CurDAG, dl), CurDAG->getRegister(`0`, MVT::i32),
3575	CurDAG->getRegister(`0`, MVT::i32) };
3576	return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3577	} else {
3578	SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
3579	CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3580	getAL(CurDAG, dl), CurDAG->getRegister(`0`, MVT::i32)};
3581	return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3582	}
3583	};
3584
3585	if (Range ->second == `0`) {
3586	// 1. Mask includes the LSB -> Simply shift the top N bits off
3587	NewN = EmitShift(ARM::tLSLri, X, `31` - Range->first);
3588	ReplaceNode(F: And.getNode(), T: NewN);
3589	} else if (Range ->first == `31`) {
3590	// 2. Mask includes the MSB -> Simply shift the bottom N bits off
3591	NewN = EmitShift(ARM::tLSRri, X, Range->second);
3592	ReplaceNode(F: And.getNode(), T: NewN);
3593	} else if (Range ->first == Range ->second) {
3594	// 3. Only one bit is set. We can shift this into the sign bit and use a
3595	// PL/MI comparison.
3596	NewN = EmitShift(ARM::tLSLri, X, `31` - Range->first);
3597	ReplaceNode(F: And.getNode(), T: NewN);
3598
3599	SwitchEQNEToPLMI = true;
3600	} else if (!Subtarget->hasV6T2Ops()) {
3601	// 4. Do a double shift to clear bottom and top bits, but only in
3602	// thumb-1 mode as in thumb-2 we can use UBFX.
3603	NewN = EmitShift(ARM::tLSLri, X, `31` - Range->first);
3604	NewN = EmitShift(ARM::tLSRri, SDValue(NewN, `0`),
3605	Range->second + (`31` - Range->first));
3606	ReplaceNode(F: And.getNode(), T: NewN);
3607	}
3608	}
3609
3610	static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[`3`],
3611	unsigned Opc128[`3`]) {
3612	assert((VT.is64BitVector() \|\| VT.is128BitVector()) &&
3613	"Unexpected vector shuffle length");
3614	switch (VT.getScalarSizeInBits()) {
3615	default:
3616	llvm_unreachable("Unexpected vector shuffle element size");
3617	case `8`:
3618	return VT.is64BitVector() ? Opc64[`0`] : Opc128[`0`];
3619	case `16`:
3620	return VT.is64BitVector() ? Opc64[`1`] : Opc128[`1`];
3621	case `32`:
3622	return VT.is64BitVector() ? Opc64[`2`] : Opc128[`2`];
3623	}
3624	}
3625
3626	void ARMDAGToDAGISel::Select(SDNode *N) {
3627	SDLoc dl(N);
3628
3629	if (N->isMachineOpcode()) {
3630	N->setNodeId(-`1`);
3631	return; // Already selected.
3632	}
3633
3634	switch (N->getOpcode()) {
3635	default: break;
3636	case ISD::STORE: {
3637	// For Thumb1, match an sp-relative store in C++. This is a little
3638	// unfortunate, but I don't think I can make the chain check work
3639	// otherwise. (The chain of the store has to be the same as the chain
3640	// of the CopyFromReg, or else we can't replace the CopyFromReg with
3641	// a direct reference to "SP".)
3642	//
3643	// This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3644	// a different addressing mode from other four-byte stores.
3645	//
3646	// This pattern usually comes up with call arguments.
3647	StoreSDNode *ST = cast<StoreSDNode>(Val: N);
3648	SDValue Ptr = ST->getBasePtr();
3649	if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
3650	int RHSC = `0`;
3651	if (Ptr.getOpcode() == ISD::ADD &&
3652	isScaledConstantInRange(Node: Ptr.getOperand(i: `1`), /Scale=/`4`, RangeMin: `0`, RangeMax: `256`, ScaledConstant&: RHSC))
3653	Ptr = Ptr.getOperand(i: `0`);
3654
3655	if (Ptr.getOpcode() == ISD::CopyFromReg &&
3656	cast<RegisterSDNode>(Ptr.getOperand(`1`))->getReg() == ARM::SP &&
3657	Ptr.getOperand(`0`) == ST->getChain()) {
3658	SDValue Ops[] = {ST->getValue(),
3659	CurDAG->getRegister(ARM::SP, MVT::i32),
3660	CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
3661	getAL(CurDAG, dl),
3662	CurDAG->getRegister(`0`, MVT::i32),
3663	ST->getChain()};
3664	MachineSDNode *ResNode =
3665	CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
3666	MachineMemOperand *MemOp = ST->getMemOperand();
3667	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: ResNode), NewMemRefs: {MemOp});
3668	ReplaceNode(F: N, T: ResNode);
3669	return;
3670	}
3671	}
3672	break;
3673	}
3674	case ISD::WRITE_REGISTER:
3675	if (tryWriteRegister(N))
3676	return;
3677	break;
3678	case ISD::READ_REGISTER:
3679	if (tryReadRegister(N))
3680	return;
3681	break;
3682	case ISD::INLINEASM:
3683	case ISD::INLINEASM_BR:
3684	if (tryInlineAsm(N))
3685	return;
3686	break;
3687	case ISD::SUB:
3688	// Select special operations if SUB node forms integer ABS pattern
3689	if (tryABSOp(N))
3690	return;
3691	// Other cases are autogenerated.
3692	break;
3693	case ISD::Constant: {
3694	unsigned Val = N->getAsZExtVal();
3695	// If we can't materialize the constant we need to use a literal pool
3696	if (ConstantMaterializationCost(Val, Subtarget) > `2` &&
3697	!Subtarget->genExecuteOnly()) {
3698	SDValue CPIdx = CurDAG->getTargetConstantPool(
3699	C: ConstantInt::get(Ty: Type::getInt32Ty(C&: *CurDAG->getContext()), V: Val),
3700	VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
3701
3702	SDNode *ResNode;
3703	if (Subtarget->isThumb()) {
3704	SDValue Ops[] = {
3705	CPIdx,
3706	getAL(CurDAG, dl),
3707	CurDAG->getRegister(`0`, MVT::i32),
3708	CurDAG->getEntryNode()
3709	};
3710	ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
3711	Ops);
3712	} else {
3713	SDValue Ops[] = {
3714	CPIdx,
3715	CurDAG->getTargetConstant(`0`, dl, MVT::i32),
3716	getAL(CurDAG, dl),
3717	CurDAG->getRegister(`0`, MVT::i32),
3718	CurDAG->getEntryNode()
3719	};
3720	ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
3721	Ops);
3722	}
3723	// Annotate the Node with memory operand information so that MachineInstr
3724	// queries work properly. This e.g. gives the register allocation the
3725	// required information for rematerialization.
3726	MachineFunction& MF = CurDAG->getMachineFunction();
3727	MachineMemOperand *MemOp =
3728	MF.getMachineMemOperand(PtrInfo: MachinePointerInfo::getConstantPool(MF),
3729	F: MachineMemOperand::MOLoad, Size: `4`, BaseAlignment: Align (`4`));
3730
3731	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: ResNode), NewMemRefs: {MemOp});
3732
3733	ReplaceNode(F: N, T: ResNode);
3734	return;
3735	}
3736
3737	// Other cases are autogenerated.
3738	break;
3739	}
3740	case ISD::FrameIndex: {
3741	// Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3742	int FI = cast<FrameIndexSDNode>(Val: N)->getIndex();
3743	SDValue TFI = CurDAG->getTargetFrameIndex(
3744	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
3745	if (Subtarget->isThumb1Only()) {
3746	// Set the alignment of the frame object to 4, to avoid having to generate
3747	// more than one ADD
3748	MachineFrameInfo &MFI = MF->getFrameInfo();
3749	if (MFI.getObjectAlign(ObjectIdx: FI) < Align (`4`))
3750	MFI.setObjectAlignment(ObjectIdx: FI, Alignment: Align (`4`));
3751	CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
3752	CurDAG->getTargetConstant(`0`, dl, MVT::i32));
3753	return;
3754	} else {
3755	unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
3756	ARM::t2ADDri : ARM::ADDri);
3757	SDValue Ops[] = { TFI, CurDAG->getTargetConstant(`0`, dl, MVT::i32),
3758	getAL(CurDAG, dl), CurDAG->getRegister(`0`, MVT::i32),
3759	CurDAG->getRegister(`0`, MVT::i32) };
3760	CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3761	return;
3762	}
3763	}
3764	case ISD::INSERT_VECTOR_ELT: {
3765	if (tryInsertVectorElt(N))
3766	return;
3767	break;
3768	}
3769	case ISD::SRL:
3770	if (tryV6T2BitfieldExtractOp(N, isSigned: false))
3771	return;
3772	break;
3773	case ISD::SIGN_EXTEND_INREG:
3774	case ISD::SRA:
3775	if (tryV6T2BitfieldExtractOp(N, isSigned: true))
3776	return;
3777	break;
3778	case ISD::FP_TO_UINT:
3779	case ISD::FP_TO_SINT:
3780	case ISD::FP_TO_UINT_SAT:
3781	case ISD::FP_TO_SINT_SAT:
3782	if (tryFP_TO_INT(N, dl))
3783	return;
3784	break;
3785	case ISD::FMUL:
3786	if (tryFMULFixed(N, dl))
3787	return;
3788	break;
3789	case ISD::MUL:
3790	if (Subtarget->isThumb1Only())
3791	break;
3792	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`))) {
3793	unsigned RHSV = C->getZExtValue();
3794	if (!RHSV) break;
3795	if (isPowerOf2_32(Value: RHSV-`1`)) { // 2^n+1?
3796	unsigned ShImm = Log2_32(Value: RHSV-`1`);
3797	if (ShImm >= `32`)
3798	break;
3799	SDValue V = N->getOperand(Num: `0`);
3800	ShImm = ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: ShImm);
3801	SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3802	SDValue Reg0 = CurDAG->getRegister(`0`, MVT::i32);
3803	if (Subtarget->isThumb()) {
3804	SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3805	CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
3806	return;
3807	} else {
3808	SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3809	Reg0 };
3810	CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
3811	return;
3812	}
3813	}
3814	if (isPowerOf2_32(Value: RHSV+`1`)) { // 2^n-1?
3815	unsigned ShImm = Log2_32(Value: RHSV+`1`);
3816	if (ShImm >= `32`)
3817	break;
3818	SDValue V = N->getOperand(Num: `0`);
3819	ShImm = ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: ShImm);
3820	SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3821	SDValue Reg0 = CurDAG->getRegister(`0`, MVT::i32);
3822	if (Subtarget->isThumb()) {
3823	SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3824	CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
3825	return;
3826	} else {
3827	SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3828	Reg0 };
3829	CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
3830	return;
3831	}
3832	}
3833	}
3834	break;
3835	case ISD::AND: {
3836	// Check for unsigned bitfield extract
3837	if (tryV6T2BitfieldExtractOp(N, isSigned: false))
3838	return;
3839
3840	// If an immediate is used in an AND node, it is possible that the immediate
3841	// can be more optimally materialized when negated. If this is the case we
3842	// can negate the immediate and use a BIC instead.
3843	auto *N1C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
3844	if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
3845	uint32_t Imm = (uint32_t) N1C->getZExtValue();
3846
3847	// In Thumb2 mode, an AND can take a 12-bit immediate. If this
3848	// immediate can be negated and fit in the immediate operand of
3849	// a t2BIC, don't do any manual transform here as this can be
3850	// handled by the generic ISel machinery.
3851	bool PreferImmediateEncoding =
3852	Subtarget->hasThumb2() && (is_t2_so_imm(Imm) \|\| is_t2_so_imm_not(Imm));
3853	if (!PreferImmediateEncoding &&
3854	ConstantMaterializationCost(Val: Imm, Subtarget) >
3855	ConstantMaterializationCost(Val: ~Imm, Subtarget)) {
3856	// The current immediate costs more to materialize than a negated
3857	// immediate, so negate the immediate and use a BIC.
3858	SDValue NewImm =
3859	CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
3860	// If the new constant didn't exist before, reposition it in the topological
3861	// ordering so it is just before N. Otherwise, don't touch its location.
3862	if (NewImm ->getNodeId() == -`1`)
3863	CurDAG->RepositionNode(Position: N->getIterator(), N: NewImm.getNode());
3864
3865	if (!Subtarget->hasThumb2()) {
3866	SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
3867	N->getOperand(`0`), NewImm, getAL(CurDAG, dl),
3868	CurDAG->getRegister(`0`, MVT::i32)};
3869	ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
3870	return;
3871	} else {
3872	SDValue Ops[] = {N->getOperand(`0`), NewImm, getAL(CurDAG, dl),
3873	CurDAG->getRegister(`0`, MVT::i32),
3874	CurDAG->getRegister(`0`, MVT::i32)};
3875	ReplaceNode(N,
3876	CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
3877	return;
3878	}
3879	}
3880	}
3881
3882	// (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3883	// of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3884	// are entirely contributed by c2 and lower 16-bits are entirely contributed
3885	// by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3886	// Select it to: "movt x, ((c1 & 0xffff) >> 16)
3887	EVT VT = N->getValueType(ResNo: `0`);
3888	if (VT != MVT::i32)
3889	break;
3890	unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3891	? ARM::t2MOVTi16
3892	: (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : `0`);
3893	if (!Opc)
3894	break;
3895	SDValue N0 = N->getOperand(Num: `0`), N1 = N->getOperand(Num: `1`);
3896	N1C = dyn_cast<ConstantSDNode>(Val&: N1);
3897	if (!N1C)
3898	break;
3899	if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3900	SDValue N2 = N0.getOperand(i: `1`);
3901	ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Val&: N2);
3902	if (!N2C)
3903	break;
3904	unsigned N1CVal = N1C->getZExtValue();
3905	unsigned N2CVal = N2C->getZExtValue();
3906	if ((N1CVal & `0xffff0000U`) == (N2CVal & `0xffff0000U`) &&
3907	(N1CVal & `0xffffU`) == `0xffffU` &&
3908	(N2CVal & `0xffffU`) == `0x0U`) {
3909	SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & `0xFFFF0000U`) >> `16`,
3910	dl, MVT::i32);
3911	SDValue Ops[] = { N0.getOperand(`0`), Imm16,
3912	getAL(CurDAG, dl), CurDAG->getRegister(`0`, MVT::i32) };
3913	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opc, dl, VT, Ops));
3914	return;
3915	}
3916	}
3917
3918	break;
3919	}
3920	case ARMISD::UMAAL: {
3921	unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3922	SDValue Ops[] = { N->getOperand(`0`), N->getOperand(`1`),
3923	N->getOperand(`2`), N->getOperand(`3`),
3924	getAL(CurDAG, dl),
3925	CurDAG->getRegister(`0`, MVT::i32) };
3926	ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3927	return;
3928	}
3929	case ARMISD::UMLAL:{
3930	if (Subtarget->isThumb()) {
3931	SDValue Ops[] = { N->getOperand(`0`), N->getOperand(`1`), N->getOperand(`2`),
3932	N->getOperand(`3`), getAL(CurDAG, dl),
3933	CurDAG->getRegister(`0`, MVT::i32)};
3934	ReplaceNode(
3935	N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3936	return;
3937	}else{
3938	SDValue Ops[] = { N->getOperand(`0`), N->getOperand(`1`), N->getOperand(`2`),
3939	N->getOperand(`3`), getAL(CurDAG, dl),
3940	CurDAG->getRegister(`0`, MVT::i32),
3941	CurDAG->getRegister(`0`, MVT::i32) };
3942	ReplaceNode(N, CurDAG->getMachineNode(
3943	Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3944	MVT::i32, MVT::i32, Ops));
3945	return;
3946	}
3947	}
3948	case ARMISD::SMLAL:{
3949	if (Subtarget->isThumb()) {
3950	SDValue Ops[] = { N->getOperand(`0`), N->getOperand(`1`), N->getOperand(`2`),
3951	N->getOperand(`3`), getAL(CurDAG, dl),
3952	CurDAG->getRegister(`0`, MVT::i32)};
3953	ReplaceNode(
3954	N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3955	return;
3956	}else{
3957	SDValue Ops[] = { N->getOperand(`0`), N->getOperand(`1`), N->getOperand(`2`),
3958	N->getOperand(`3`), getAL(CurDAG, dl),
3959	CurDAG->getRegister(`0`, MVT::i32),
3960	CurDAG->getRegister(`0`, MVT::i32) };
3961	ReplaceNode(N, CurDAG->getMachineNode(
3962	Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3963	MVT::i32, MVT::i32, Ops));
3964	return;
3965	}
3966	}
3967	case ARMISD::SUBE: {
3968	if (!Subtarget->hasV6Ops() \|\| !Subtarget->hasDSP())
3969	break;
3970	// Look for a pattern to match SMMLS
3971	// (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3972	if (N->getOperand(Num: `1`).getOpcode() != ISD::SMUL_LOHI \|\|
3973	N->getOperand(Num: `2`).getOpcode() != ARMISD::SUBC \|\|
3974	!SDValue (N, `1`).use_empty())
3975	break;
3976
3977	if (Subtarget->isThumb())
3978	assert(Subtarget->hasThumb2() &&
3979	"This pattern should not be generated for Thumb");
3980
3981	SDValue SmulLoHi = N->getOperand(Num: `1`);
3982	SDValue Subc = N->getOperand(Num: `2`);
3983	SDValue Zero = Subc.getOperand(i: `0`);
3984
3985	if (!isNullConstant(V: Zero) \|\| Subc.getOperand(i: `1`) != SmulLoHi.getValue(R: `0`) \|\|
3986	N->getOperand(Num: `1`) != SmulLoHi.getValue(R: `1`) \|\|
3987	N->getOperand(Num: `2`) != Subc.getValue(R: `1`))
3988	break;
3989
3990	unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3991	SDValue Ops[] = { SmulLoHi.getOperand(`0`), SmulLoHi.getOperand(`1`),
3992	N->getOperand(`0`), getAL(CurDAG, dl),
3993	CurDAG->getRegister(`0`, MVT::i32) };
3994	ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
3995	return;
3996	}
3997	case ISD::LOAD: {
3998	if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3999	return;
4000	if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
4001	if (tryT2IndexedLoad(N))
4002	return;
4003	} else if (Subtarget->isThumb()) {
4004	if (tryT1IndexedLoad(N))
4005	return;
4006	} else if (tryARMIndexedLoad(N))
4007	return;
4008	// Other cases are autogenerated.
4009	break;
4010	}
4011	case ISD::MLOAD:
4012	if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
4013	return;
4014	// Other cases are autogenerated.
4015	break;
4016	case ARMISD::WLSSETUP: {
4017	SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32,
4018	N->getOperand(`0`));
4019	ReplaceUses(F: N, T: New);
4020	CurDAG->RemoveDeadNode(N);
4021	return;
4022	}
4023	case ARMISD::WLS: {
4024	SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other,
4025	N->getOperand(`1`), N->getOperand(`2`),
4026	N->getOperand(`0`));
4027	ReplaceUses(F: N, T: New);
4028	CurDAG->RemoveDeadNode(N);
4029	return;
4030	}
4031	case ARMISD::LE: {
4032	SDValue Ops[] = { N->getOperand(Num: `1`),
4033	N->getOperand(Num: `2`),
4034	N->getOperand(Num: `0`) };
4035	unsigned Opc = ARM::t2LoopEnd;
4036	SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
4037	ReplaceUses(F: N, T: New);
4038	CurDAG->RemoveDeadNode(N);
4039	return;
4040	}
4041	case ARMISD::LDRD: {
4042	if (Subtarget->isThumb2())
4043	break; // TableGen handles isel in this case.
4044	SDValue Base, RegOffset, ImmOffset;
4045	const SDValue &Chain = N->getOperand(Num: `0`);
4046	const SDValue &Addr = N->getOperand(Num: `1`);
4047	SelectAddrMode3(N: Addr, Base, Offset&: RegOffset, Opc&: ImmOffset);
4048	if (RegOffset != CurDAG->getRegister(`0`, MVT::i32)) {
4049	// The register-offset variant of LDRD mandates that the register
4050	// allocated to RegOffset is not reused in any of the remaining operands.
4051	// This restriction is currently not enforced. Therefore emitting this
4052	// variant is explicitly avoided.
4053	Base = Addr;
4054	RegOffset = CurDAG->getRegister(`0`, MVT::i32);
4055	}
4056	SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
4057	SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
4058	{MVT::Untyped, MVT::Other}, Ops);
4059	SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4060	SDValue(New, `0`));
4061	SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4062	SDValue(New, `0`));
4063	transferMemOperands(N, Result: New);
4064	ReplaceUses(F: SDValue (N, `0`), T: Lo);
4065	ReplaceUses(F: SDValue (N, `1`), T: Hi);
4066	ReplaceUses(F: SDValue (N, `2`), T: SDValue (New, `1`));
4067	CurDAG->RemoveDeadNode(N);
4068	return;
4069	}
4070	case ARMISD::STRD: {
4071	if (Subtarget->isThumb2())
4072	break; // TableGen handles isel in this case.
4073	SDValue Base, RegOffset, ImmOffset;
4074	const SDValue &Chain = N->getOperand(Num: `0`);
4075	const SDValue &Addr = N->getOperand(Num: `3`);
4076	SelectAddrMode3(N: Addr, Base, Offset&: RegOffset, Opc&: ImmOffset);
4077	if (RegOffset != CurDAG->getRegister(`0`, MVT::i32)) {
4078	// The register-offset variant of STRD mandates that the register
4079	// allocated to RegOffset is not reused in any of the remaining operands.
4080	// This restriction is currently not enforced. Therefore emitting this
4081	// variant is explicitly avoided.
4082	Base = Addr;
4083	RegOffset = CurDAG->getRegister(`0`, MVT::i32);
4084	}
4085	SDNode *RegPair =
4086	createGPRPairNode(MVT::Untyped, N->getOperand(`1`), N->getOperand(`2`));
4087	SDValue Ops[] = {SDValue (RegPair, `0`), Base, RegOffset, ImmOffset, Chain};
4088	SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);
4089	transferMemOperands(N, Result: New);
4090	ReplaceUses(F: SDValue (N, `0`), T: SDValue (New, `0`));
4091	CurDAG->RemoveDeadNode(N);
4092	return;
4093	}
4094	case ARMISD::LOOP_DEC: {
4095	SDValue Ops[] = { N->getOperand(Num: `1`),
4096	N->getOperand(Num: `2`),
4097	N->getOperand(Num: `0`) };
4098	SDNode *Dec =
4099	CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4100	CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
4101	ReplaceUses(F: N, T: Dec);
4102	CurDAG->RemoveDeadNode(N);
4103	return;
4104	}
4105	case ARMISD::BRCOND: {
4106	// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4107	// Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4108	// Pattern complexity = 6 cost = 1 size = 0
4109
4110	// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4111	// Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
4112	// Pattern complexity = 6 cost = 1 size = 0
4113
4114	// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4115	// Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4116	// Pattern complexity = 6 cost = 1 size = 0
4117
4118	unsigned Opc = Subtarget->isThumb() ?
4119	((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
4120	SDValue Chain = N->getOperand(Num: `0`);
4121	SDValue N1 = N->getOperand(Num: `1`);
4122	SDValue N2 = N->getOperand(Num: `2`);
4123	SDValue N3 = N->getOperand(Num: `3`);
4124	SDValue InGlue = N->getOperand(Num: `4`);
4125	assert(N1.getOpcode() == ISD::BasicBlock);
4126	assert(N2.getOpcode() == ISD::Constant);
4127	assert(N3.getOpcode() == ISD::Register);
4128
4129	unsigned CC = (unsigned)N2 ->getAsZExtVal();
4130
4131	if (InGlue.getOpcode() == ARMISD::CMPZ) {
4132	if (InGlue.getOperand(i: `0`).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4133	SDValue Int = InGlue.getOperand(i: `0`);
4134	uint64_t ID = Int ->getConstantOperandVal(Num: `1`);
4135
4136	// Handle low-overhead loops.
4137	if (ID == Intrinsic::loop_decrement_reg) {
4138	SDValue Elements = Int.getOperand(i: `2`);
4139	SDValue Size = CurDAG->getTargetConstant(Int.getConstantOperandVal(`3`),
4140	dl, MVT::i32);
4141
4142	SDValue Args[] = { Elements, Size, Int.getOperand(i: `0`) };
4143	SDNode *LoopDec =
4144	CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4145	CurDAG->getVTList(MVT::i32, MVT::Other),
4146	Args);
4147	ReplaceUses(F: Int.getNode(), T: LoopDec);
4148
4149	SDValue EndArgs[] = { SDValue (LoopDec, `0`), N1, Chain };
4150	SDNode *LoopEnd =
4151	CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
4152
4153	ReplaceUses(F: N, T: LoopEnd);
4154	CurDAG->RemoveDeadNode(N);
4155	CurDAG->RemoveDeadNode(N: InGlue.getNode());
4156	CurDAG->RemoveDeadNode(N: Int.getNode());
4157	return;
4158	}
4159	}
4160
4161	bool SwitchEQNEToPLMI;
4162	SelectCMPZ(N: InGlue.getNode(), SwitchEQNEToPLMI);
4163	InGlue = N->getOperand(Num: `4`);
4164
4165	if (SwitchEQNEToPLMI) {
4166	switch ((ARMCC::CondCodes)CC) {
4167	default: llvm_unreachable("CMPZ must be either NE or EQ!");
4168	case ARMCC::NE:
4169	CC = (unsigned)ARMCC::MI;
4170	break;
4171	case ARMCC::EQ:
4172	CC = (unsigned)ARMCC::PL;
4173	break;
4174	}
4175	}
4176	}
4177
4178	SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
4179	SDValue Ops[] = { N1, Tmp2, N3, Chain, InGlue };
4180	SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
4181	MVT::Glue, Ops);
4182	Chain = SDValue (ResNode, `0`);
4183	if (N->getNumValues() == `2`) {
4184	InGlue = SDValue (ResNode, `1`);
4185	ReplaceUses(F: SDValue (N, `1`), T: InGlue);
4186	}
4187	ReplaceUses(F: SDValue (N, `0`),
4188	T: SDValue (Chain.getNode(), Chain.getResNo()));
4189	CurDAG->RemoveDeadNode(N);
4190	return;
4191	}
4192
4193	case ARMISD::CMPZ: {
4194	// select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
4195	// This allows us to avoid materializing the expensive negative constant.
4196	// The CMPZ #0 is useless and will be peepholed away but we need to keep it
4197	// for its glue output.
4198	SDValue X = N->getOperand(Num: `0`);
4199	auto *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`).getNode());
4200	if (C && C->getSExtValue() < `0` && Subtarget->isThumb()) {
4201	int64_t Addend = -C->getSExtValue();
4202
4203	SDNode Add = nullptr*;
4204	// ADDS can be better than CMN if the immediate fits in a
4205	// 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
4206	// Outside that range we can just use a CMN which is 32-bit but has a
4207	// 12-bit immediate range.
4208	if (Addend < `1`<<`8`) {
4209	if (Subtarget->isThumb2()) {
4210	SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4211	getAL(CurDAG, dl), CurDAG->getRegister(`0`, MVT::i32),
4212	CurDAG->getRegister(`0`, MVT::i32) };
4213	Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
4214	} else {
4215	unsigned Opc = (Addend < `1`<<`3`) ? ARM::tADDi3 : ARM::tADDi8;
4216	SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
4217	CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4218	getAL(CurDAG, dl), CurDAG->getRegister(`0`, MVT::i32)};
4219	Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
4220	}
4221	}
4222	if (Add) {
4223	SDValue Ops2[] = {SDValue(Add, `0`), CurDAG->getConstant(`0`, dl, MVT::i32)};
4224	CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
4225	}
4226	}
4227	// Other cases are autogenerated.
4228	break;
4229	}
4230
4231	case ARMISD::CMOV: {
4232	SDValue InGlue = N->getOperand(Num: `4`);
4233
4234	if (InGlue.getOpcode() == ARMISD::CMPZ) {
4235	bool SwitchEQNEToPLMI;
4236	SelectCMPZ(N: InGlue.getNode(), SwitchEQNEToPLMI);
4237
4238	if (SwitchEQNEToPLMI) {
4239	SDValue ARMcc = N->getOperand(Num: `2`);
4240	ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc ->getAsZExtVal();
4241
4242	switch (CC) {
4243	default: llvm_unreachable("CMPZ must be either NE or EQ!");
4244	case ARMCC::NE:
4245	CC = ARMCC::MI;
4246	break;
4247	case ARMCC::EQ:
4248	CC = ARMCC::PL;
4249	break;
4250	}
4251	SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
4252	SDValue Ops[] = {N->getOperand(Num: `0`), N->getOperand(Num: `1`), NewARMcc,
4253	N->getOperand(Num: `3`), N->getOperand(Num: `4`)};
4254	CurDAG->MorphNodeTo(N, Opc: ARMISD::CMOV, VTs: N->getVTList(), Ops);
4255	}
4256
4257	}
4258	// Other cases are autogenerated.
4259	break;
4260	}
4261	case ARMISD::VZIP: {
4262	EVT VT = N->getValueType(ResNo: `0`);
4263	// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4264	unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32};
4265	unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32};
4266	unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4267	SDValue Pred = getAL(CurDAG, dl);
4268	SDValue PredReg = CurDAG->getRegister(`0`, MVT::i32);
4269	SDValue Ops[] = {N->getOperand(Num: `0`), N->getOperand(Num: `1`), Pred, PredReg};
4270	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: VT, VT2: VT, Ops));
4271	return;
4272	}
4273	case ARMISD::VUZP: {
4274	EVT VT = N->getValueType(ResNo: `0`);
4275	// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4276	unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32};
4277	unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32};
4278	unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4279	SDValue Pred = getAL(CurDAG, dl);
4280	SDValue PredReg = CurDAG->getRegister(`0`, MVT::i32);
4281	SDValue Ops[] = {N->getOperand(Num: `0`), N->getOperand(Num: `1`), Pred, PredReg};
4282	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: VT, VT2: VT, Ops));
4283	return;
4284	}
4285	case ARMISD::VTRN: {
4286	EVT VT = N->getValueType(ResNo: `0`);
4287	unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32};
4288	unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32};
4289	unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4290	SDValue Pred = getAL(CurDAG, dl);
4291	SDValue PredReg = CurDAG->getRegister(`0`, MVT::i32);
4292	SDValue Ops[] = {N->getOperand(Num: `0`), N->getOperand(Num: `1`), Pred, PredReg};
4293	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: VT, VT2: VT, Ops));
4294	return;
4295	}
4296	case ARMISD::BUILD_VECTOR: {
4297	EVT VecVT = N->getValueType(ResNo: `0`);
4298	EVT EltVT = VecVT.getVectorElementType();
4299	unsigned NumElts = VecVT.getVectorNumElements();
4300	if (EltVT == MVT::f64) {
4301	assert(NumElts == `2` && "unexpected type for BUILD_VECTOR");
4302	ReplaceNode(
4303	F: N, T: createDRegPairNode(VT: VecVT, V0: N->getOperand(Num: `0`), V1: N->getOperand(Num: `1`)));
4304	return;
4305	}
4306	assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
4307	if (NumElts == `2`) {
4308	ReplaceNode(
4309	F: N, T: createSRegPairNode(VT: VecVT, V0: N->getOperand(Num: `0`), V1: N->getOperand(Num: `1`)));
4310	return;
4311	}
4312	assert(NumElts == `4` && "unexpected type for BUILD_VECTOR");
4313	ReplaceNode(F: N,
4314	T: createQuadSRegsNode(VT: VecVT, V0: N->getOperand(Num: `0`), V1: N->getOperand(Num: `1`),
4315	V2: N->getOperand(Num: `2`), V3: N->getOperand(Num: `3`)));
4316	return;
4317	}
4318
4319	case ARMISD::VLD1DUP: {
4320	static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
4321	ARM::VLD1DUPd32 };
4322	static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
4323	ARM::VLD1DUPq32 };
4324	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: false, NumVecs: `1`, DOpcodes, QOpcodes0: QOpcodes);
4325	return;
4326	}
4327
4328	case ARMISD::VLD2DUP: {
4329	static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4330	ARM::VLD2DUPd32 };
4331	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: false, NumVecs: `2`, DOpcodes: Opcodes);
4332	return;
4333	}
4334
4335	case ARMISD::VLD3DUP: {
4336	static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
4337	ARM::VLD3DUPd16Pseudo,
4338	ARM::VLD3DUPd32Pseudo };
4339	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: false, NumVecs: `3`, DOpcodes: Opcodes);
4340	return;
4341	}
4342
4343	case ARMISD::VLD4DUP: {
4344	static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
4345	ARM::VLD4DUPd16Pseudo,
4346	ARM::VLD4DUPd32Pseudo };
4347	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: false, NumVecs: `4`, DOpcodes: Opcodes);
4348	return;
4349	}
4350
4351	case ARMISD::VLD1DUP_UPD: {
4352	static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
4353	ARM::VLD1DUPd16wb_fixed,
4354	ARM::VLD1DUPd32wb_fixed };
4355	static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
4356	ARM::VLD1DUPq16wb_fixed,
4357	ARM::VLD1DUPq32wb_fixed };
4358	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: true, NumVecs: `1`, DOpcodes, QOpcodes0: QOpcodes);
4359	return;
4360	}
4361
4362	case ARMISD::VLD2DUP_UPD: {
4363	static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,
4364	ARM::VLD2DUPd16wb_fixed,
4365	ARM::VLD2DUPd32wb_fixed,
4366	ARM::VLD1q64wb_fixed };
4367	static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4368	ARM::VLD2DUPq16EvenPseudo,
4369	ARM::VLD2DUPq32EvenPseudo };
4370	static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,
4371	ARM::VLD2DUPq16OddPseudoWB_fixed,
4372	ARM::VLD2DUPq32OddPseudoWB_fixed };
4373	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes0, QOpcodes1);
4374	return;
4375	}
4376
4377	case ARMISD::VLD3DUP_UPD: {
4378	static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
4379	ARM::VLD3DUPd16Pseudo_UPD,
4380	ARM::VLD3DUPd32Pseudo_UPD,
4381	ARM::VLD1d64TPseudoWB_fixed };
4382	static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4383	ARM::VLD3DUPq16EvenPseudo,
4384	ARM::VLD3DUPq32EvenPseudo };
4385	static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,
4386	ARM::VLD3DUPq16OddPseudo_UPD,
4387	ARM::VLD3DUPq32OddPseudo_UPD };
4388	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4389	return;
4390	}
4391
4392	case ARMISD::VLD4DUP_UPD: {
4393	static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
4394	ARM::VLD4DUPd16Pseudo_UPD,
4395	ARM::VLD4DUPd32Pseudo_UPD,
4396	ARM::VLD1d64QPseudoWB_fixed };
4397	static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4398	ARM::VLD4DUPq16EvenPseudo,
4399	ARM::VLD4DUPq32EvenPseudo };
4400	static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,
4401	ARM::VLD4DUPq16OddPseudo_UPD,
4402	ARM::VLD4DUPq32OddPseudo_UPD };
4403	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4404	return;
4405	}
4406
4407	case ARMISD::VLD1_UPD: {
4408	static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
4409	ARM::VLD1d16wb_fixed,
4410	ARM::VLD1d32wb_fixed,
4411	ARM::VLD1d64wb_fixed };
4412	static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
4413	ARM::VLD1q16wb_fixed,
4414	ARM::VLD1q32wb_fixed,
4415	ARM::VLD1q64wb_fixed };
4416	SelectVLD(N, isUpdating: true, NumVecs: `1`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4417	return;
4418	}
4419
4420	case ARMISD::VLD2_UPD: {
4421	if (Subtarget->hasNEON()) {
4422	static const uint16_t DOpcodes[] = {
4423	ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
4424	ARM::VLD1q64wb_fixed};
4425	static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
4426	ARM::VLD2q16PseudoWB_fixed,
4427	ARM::VLD2q32PseudoWB_fixed};
4428	SelectVLD(N, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4429	} else {
4430	static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
4431	ARM::MVE_VLD21_8_wb};
4432	static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4433	ARM::MVE_VLD21_16_wb};
4434	static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4435	ARM::MVE_VLD21_32_wb};
4436	static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4437	SelectMVE_VLD(N, NumVecs: `2`, Opcodes, HasWriteback: true);
4438	}
4439	return;
4440	}
4441
4442	case ARMISD::VLD3_UPD: {
4443	static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
4444	ARM::VLD3d16Pseudo_UPD,
4445	ARM::VLD3d32Pseudo_UPD,
4446	ARM::VLD1d64TPseudoWB_fixed};
4447	static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4448	ARM::VLD3q16Pseudo_UPD,
4449	ARM::VLD3q32Pseudo_UPD };
4450	static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
4451	ARM::VLD3q16oddPseudo_UPD,
4452	ARM::VLD3q32oddPseudo_UPD };
4453	SelectVLD(N, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4454	return;
4455	}
4456
4457	case ARMISD::VLD4_UPD: {
4458	if (Subtarget->hasNEON()) {
4459	static const uint16_t DOpcodes[] = {
4460	ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
4461	ARM::VLD1d64QPseudoWB_fixed};
4462	static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
4463	ARM::VLD4q16Pseudo_UPD,
4464	ARM::VLD4q32Pseudo_UPD};
4465	static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
4466	ARM::VLD4q16oddPseudo_UPD,
4467	ARM::VLD4q32oddPseudo_UPD};
4468	SelectVLD(N, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4469	} else {
4470	static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4471	ARM::MVE_VLD42_8,
4472	ARM::MVE_VLD43_8_wb};
4473	static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4474	ARM::MVE_VLD42_16,
4475	ARM::MVE_VLD43_16_wb};
4476	static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4477	ARM::MVE_VLD42_32,
4478	ARM::MVE_VLD43_32_wb};
4479	static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4480	SelectMVE_VLD(N, NumVecs: `4`, Opcodes, HasWriteback: true);
4481	}
4482	return;
4483	}
4484
4485	case ARMISD::VLD1x2_UPD: {
4486	if (Subtarget->hasNEON()) {
4487	static const uint16_t DOpcodes[] = {
4488	ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
4489	ARM::VLD1q64wb_fixed};
4490	static const uint16_t QOpcodes[] = {
4491	ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4492	ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4493	SelectVLD(N, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4494	return;
4495	}
4496	break;
4497	}
4498
4499	case ARMISD::VLD1x3_UPD: {
4500	if (Subtarget->hasNEON()) {
4501	static const uint16_t DOpcodes[] = {
4502	ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
4503	ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
4504	static const uint16_t QOpcodes0[] = {
4505	ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
4506	ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
4507	static const uint16_t QOpcodes1[] = {
4508	ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
4509	ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
4510	SelectVLD(N, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4511	return;
4512	}
4513	break;
4514	}
4515
4516	case ARMISD::VLD1x4_UPD: {
4517	if (Subtarget->hasNEON()) {
4518	static const uint16_t DOpcodes[] = {
4519	ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4520	ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4521	static const uint16_t QOpcodes0[] = {
4522	ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
4523	ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
4524	static const uint16_t QOpcodes1[] = {
4525	ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
4526	ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
4527	SelectVLD(N, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4528	return;
4529	}
4530	break;
4531	}
4532
4533	case ARMISD::VLD2LN_UPD: {
4534	static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
4535	ARM::VLD2LNd16Pseudo_UPD,
4536	ARM::VLD2LNd32Pseudo_UPD };
4537	static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
4538	ARM::VLD2LNq32Pseudo_UPD };
4539	SelectVLDSTLane(N, IsLoad: true, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes);
4540	return;
4541	}
4542
4543	case ARMISD::VLD3LN_UPD: {
4544	static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
4545	ARM::VLD3LNd16Pseudo_UPD,
4546	ARM::VLD3LNd32Pseudo_UPD };
4547	static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
4548	ARM::VLD3LNq32Pseudo_UPD };
4549	SelectVLDSTLane(N, IsLoad: true, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes);
4550	return;
4551	}
4552
4553	case ARMISD::VLD4LN_UPD: {
4554	static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
4555	ARM::VLD4LNd16Pseudo_UPD,
4556	ARM::VLD4LNd32Pseudo_UPD };
4557	static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
4558	ARM::VLD4LNq32Pseudo_UPD };
4559	SelectVLDSTLane(N, IsLoad: true, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes);
4560	return;
4561	}
4562
4563	case ARMISD::VST1_UPD: {
4564	static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
4565	ARM::VST1d16wb_fixed,
4566	ARM::VST1d32wb_fixed,
4567	ARM::VST1d64wb_fixed };
4568	static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
4569	ARM::VST1q16wb_fixed,
4570	ARM::VST1q32wb_fixed,
4571	ARM::VST1q64wb_fixed };
4572	SelectVST(N, isUpdating: true, NumVecs: `1`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4573	return;
4574	}
4575
4576	case ARMISD::VST2_UPD: {
4577	if (Subtarget->hasNEON()) {
4578	static const uint16_t DOpcodes[] = {
4579	ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
4580	ARM::VST1q64wb_fixed};
4581	static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
4582	ARM::VST2q16PseudoWB_fixed,
4583	ARM::VST2q32PseudoWB_fixed};
4584	SelectVST(N, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4585	return;
4586	}
4587	break;
4588	}
4589
4590	case ARMISD::VST3_UPD: {
4591	static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
4592	ARM::VST3d16Pseudo_UPD,
4593	ARM::VST3d32Pseudo_UPD,
4594	ARM::VST1d64TPseudoWB_fixed};
4595	static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4596	ARM::VST3q16Pseudo_UPD,
4597	ARM::VST3q32Pseudo_UPD };
4598	static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
4599	ARM::VST3q16oddPseudo_UPD,
4600	ARM::VST3q32oddPseudo_UPD };
4601	SelectVST(N, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4602	return;
4603	}
4604
4605	case ARMISD::VST4_UPD: {
4606	if (Subtarget->hasNEON()) {
4607	static const uint16_t DOpcodes[] = {
4608	ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
4609	ARM::VST1d64QPseudoWB_fixed};
4610	static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
4611	ARM::VST4q16Pseudo_UPD,
4612	ARM::VST4q32Pseudo_UPD};
4613	static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
4614	ARM::VST4q16oddPseudo_UPD,
4615	ARM::VST4q32oddPseudo_UPD};
4616	SelectVST(N, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4617	return;
4618	}
4619	break;
4620	}
4621
4622	case ARMISD::VST1x2_UPD: {
4623	if (Subtarget->hasNEON()) {
4624	static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,
4625	ARM::VST1q16wb_fixed,
4626	ARM::VST1q32wb_fixed,
4627	ARM::VST1q64wb_fixed};
4628	static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4629	ARM::VST1d16QPseudoWB_fixed,
4630	ARM::VST1d32QPseudoWB_fixed,
4631	ARM::VST1d64QPseudoWB_fixed };
4632	SelectVST(N, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4633	return;
4634	}
4635	break;
4636	}
4637
4638	case ARMISD::VST1x3_UPD: {
4639	if (Subtarget->hasNEON()) {
4640	static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,
4641	ARM::VST1d16TPseudoWB_fixed,
4642	ARM::VST1d32TPseudoWB_fixed,
4643	ARM::VST1d64TPseudoWB_fixed };
4644	static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4645	ARM::VST1q16LowTPseudo_UPD,
4646	ARM::VST1q32LowTPseudo_UPD,
4647	ARM::VST1q64LowTPseudo_UPD };
4648	static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,
4649	ARM::VST1q16HighTPseudo_UPD,
4650	ARM::VST1q32HighTPseudo_UPD,
4651	ARM::VST1q64HighTPseudo_UPD };
4652	SelectVST(N, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4653	return;
4654	}
4655	break;
4656	}
4657
4658	case ARMISD::VST1x4_UPD: {
4659	if (Subtarget->hasNEON()) {
4660	static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4661	ARM::VST1d16QPseudoWB_fixed,
4662	ARM::VST1d32QPseudoWB_fixed,
4663	ARM::VST1d64QPseudoWB_fixed };
4664	static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4665	ARM::VST1q16LowQPseudo_UPD,
4666	ARM::VST1q32LowQPseudo_UPD,
4667	ARM::VST1q64LowQPseudo_UPD };
4668	static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,
4669	ARM::VST1q16HighQPseudo_UPD,
4670	ARM::VST1q32HighQPseudo_UPD,
4671	ARM::VST1q64HighQPseudo_UPD };
4672	SelectVST(N, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4673	return;
4674	}
4675	break;
4676	}
4677	case ARMISD::VST2LN_UPD: {
4678	static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
4679	ARM::VST2LNd16Pseudo_UPD,
4680	ARM::VST2LNd32Pseudo_UPD };
4681	static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
4682	ARM::VST2LNq32Pseudo_UPD };
4683	SelectVLDSTLane(N, IsLoad: false, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes);
4684	return;
4685	}
4686
4687	case ARMISD::VST3LN_UPD: {
4688	static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
4689	ARM::VST3LNd16Pseudo_UPD,
4690	ARM::VST3LNd32Pseudo_UPD };
4691	static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
4692	ARM::VST3LNq32Pseudo_UPD };
4693	SelectVLDSTLane(N, IsLoad: false, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes);
4694	return;
4695	}
4696
4697	case ARMISD::VST4LN_UPD: {
4698	static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
4699	ARM::VST4LNd16Pseudo_UPD,
4700	ARM::VST4LNd32Pseudo_UPD };
4701	static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
4702	ARM::VST4LNq32Pseudo_UPD };
4703	SelectVLDSTLane(N, IsLoad: false, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes);
4704	return;
4705	}
4706
4707	case ISD::INTRINSIC_VOID:
4708	case ISD::INTRINSIC_W_CHAIN: {
4709	unsigned IntNo = N->getConstantOperandVal(Num: `1`);
4710	switch (IntNo) {
4711	default:
4712	break;
4713
4714	case Intrinsic::arm_mrrc:
4715	case Intrinsic::arm_mrrc2: {
4716	SDLoc dl(N);
4717	SDValue Chain = N->getOperand(Num: `0`);
4718	unsigned Opc;
4719
4720	if (Subtarget->isThumb())
4721	Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
4722	else
4723	Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
4724
4725	SmallVector<SDValue, `5`> Ops;
4726	Ops.push_back(Elt: getI32Imm(Imm: N->getConstantOperandVal(Num: `2`), dl)); / coproc /
4727	Ops.push_back(Elt: getI32Imm(Imm: N->getConstantOperandVal(Num: `3`), dl)); / opc /
4728	Ops.push_back(Elt: getI32Imm(Imm: N->getConstantOperandVal(Num: `4`), dl)); / CRm /
4729
4730	// The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4731	// instruction will always be '1111' but it is possible in assembly language to specify
4732	// AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4733	if (Opc != ARM::MRRC2) {
4734	Ops.push_back(Elt: getAL(CurDAG, dl));
4735	Ops.push_back(CurDAG->getRegister(`0`, MVT::i32));
4736	}
4737
4738	Ops.push_back(Elt: Chain);
4739
4740	// Writes to two registers.
4741	const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
4742
4743	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opc, dl, RetType, Ops));
4744	return;
4745	}
4746	case Intrinsic::arm_ldaexd:
4747	case Intrinsic::arm_ldrexd: {
4748	SDLoc dl(N);
4749	SDValue Chain = N->getOperand(Num: `0`);
4750	SDValue MemAddr = N->getOperand(Num: `2`);
4751	bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
4752
4753	bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
4754	unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
4755	: (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
4756
4757	// arm_ldrexd returns a i64 value in {i32, i32}
4758	std::vector<EVT> ResTys;
4759	if (isThumb) {
4760	ResTys.push_back(MVT::i32);
4761	ResTys.push_back(MVT::i32);
4762	} else
4763	ResTys.push_back(MVT::Untyped);
4764	ResTys.push_back(MVT::Other);
4765
4766	// Place arguments in the right order.
4767	SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
4768	CurDAG->getRegister(`0`, MVT::i32), Chain};
4769	SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4770	// Transfer memoperands.
4771	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
4772	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Ld), NewMemRefs: {MemOp});
4773
4774	// Remap uses.
4775	SDValue OutChain = isThumb ? SDValue (Ld, `2`) : SDValue (Ld, `1`);
4776	if (!SDValue (N, `0`).use_empty()) {
4777	SDValue Result;
4778	if (isThumb)
4779	Result = SDValue (Ld, `0`);
4780	else {
4781	SDValue SubRegIdx =
4782	CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
4783	SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4784	dl, MVT::i32, SDValue(Ld, `0`), SubRegIdx);
4785	Result = SDValue (ResNode,`0`);
4786	}
4787	ReplaceUses(F: SDValue (N, `0`), T: Result);
4788	}
4789	if (!SDValue (N, `1`).use_empty()) {
4790	SDValue Result;
4791	if (isThumb)
4792	Result = SDValue (Ld, `1`);
4793	else {
4794	SDValue SubRegIdx =
4795	CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
4796	SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4797	dl, MVT::i32, SDValue(Ld, `0`), SubRegIdx);
4798	Result = SDValue (ResNode,`0`);
4799	}
4800	ReplaceUses(F: SDValue (N, `1`), T: Result);
4801	}
4802	ReplaceUses(F: SDValue (N, `2`), T: OutChain);
4803	CurDAG->RemoveDeadNode(N);
4804	return;
4805	}
4806	case Intrinsic::arm_stlexd:
4807	case Intrinsic::arm_strexd: {
4808	SDLoc dl(N);
4809	SDValue Chain = N->getOperand(Num: `0`);
4810	SDValue Val0 = N->getOperand(Num: `2`);
4811	SDValue Val1 = N->getOperand(Num: `3`);
4812	SDValue MemAddr = N->getOperand(Num: `4`);
4813
4814	// Store exclusive double return a i32 value which is the return status
4815	// of the issued store.
4816	const EVT ResTys[] = {MVT::i32, MVT::Other};
4817
4818	bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
4819	// Place arguments in the right order.
4820	SmallVector<SDValue, `7`> Ops;
4821	if (isThumb) {
4822	Ops.push_back(Elt: Val0);
4823	Ops.push_back(Elt: Val1);
4824	} else
4825	// arm_strexd uses GPRPair.
4826	Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), `0`));
4827	Ops.push_back(Elt: MemAddr);
4828	Ops.push_back(Elt: getAL(CurDAG, dl));
4829	Ops.push_back(CurDAG->getRegister(`0`, MVT::i32));
4830	Ops.push_back(Elt: Chain);
4831
4832	bool IsRelease = IntNo == Intrinsic::arm_stlexd;
4833	unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
4834	: (IsRelease ? ARM::STLEXD : ARM::STREXD);
4835
4836	SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4837	// Transfer memoperands.
4838	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
4839	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
4840
4841	ReplaceNode(F: N, T: St);
4842	return;
4843	}
4844
4845	case Intrinsic::arm_neon_vld1: {
4846	static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
4847	ARM::VLD1d32, ARM::VLD1d64 };
4848	static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4849	ARM::VLD1q32, ARM::VLD1q64};
4850	SelectVLD(N, isUpdating: false, NumVecs: `1`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4851	return;
4852	}
4853
4854	case Intrinsic::arm_neon_vld1x2: {
4855	static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4856	ARM::VLD1q32, ARM::VLD1q64 };
4857	static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
4858	ARM::VLD1d16QPseudo,
4859	ARM::VLD1d32QPseudo,
4860	ARM::VLD1d64QPseudo };
4861	SelectVLD(N, isUpdating: false, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4862	return;
4863	}
4864
4865	case Intrinsic::arm_neon_vld1x3: {
4866	static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
4867	ARM::VLD1d16TPseudo,
4868	ARM::VLD1d32TPseudo,
4869	ARM::VLD1d64TPseudo };
4870	static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
4871	ARM::VLD1q16LowTPseudo_UPD,
4872	ARM::VLD1q32LowTPseudo_UPD,
4873	ARM::VLD1q64LowTPseudo_UPD };
4874	static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
4875	ARM::VLD1q16HighTPseudo,
4876	ARM::VLD1q32HighTPseudo,
4877	ARM::VLD1q64HighTPseudo };
4878	SelectVLD(N, isUpdating: false, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4879	return;
4880	}
4881
4882	case Intrinsic::arm_neon_vld1x4: {
4883	static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
4884	ARM::VLD1d16QPseudo,
4885	ARM::VLD1d32QPseudo,
4886	ARM::VLD1d64QPseudo };
4887	static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
4888	ARM::VLD1q16LowQPseudo_UPD,
4889	ARM::VLD1q32LowQPseudo_UPD,
4890	ARM::VLD1q64LowQPseudo_UPD };
4891	static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
4892	ARM::VLD1q16HighQPseudo,
4893	ARM::VLD1q32HighQPseudo,
4894	ARM::VLD1q64HighQPseudo };
4895	SelectVLD(N, isUpdating: false, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4896	return;
4897	}
4898
4899	case Intrinsic::arm_neon_vld2: {
4900	static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
4901	ARM::VLD2d32, ARM::VLD1q64 };
4902	static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
4903	ARM::VLD2q32Pseudo };
4904	SelectVLD(N, isUpdating: false, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4905	return;
4906	}
4907
4908	case Intrinsic::arm_neon_vld3: {
4909	static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
4910	ARM::VLD3d16Pseudo,
4911	ARM::VLD3d32Pseudo,
4912	ARM::VLD1d64TPseudo };
4913	static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4914	ARM::VLD3q16Pseudo_UPD,
4915	ARM::VLD3q32Pseudo_UPD };
4916	static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
4917	ARM::VLD3q16oddPseudo,
4918	ARM::VLD3q32oddPseudo };
4919	SelectVLD(N, isUpdating: false, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4920	return;
4921	}
4922
4923	case Intrinsic::arm_neon_vld4: {
4924	static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
4925	ARM::VLD4d16Pseudo,
4926	ARM::VLD4d32Pseudo,
4927	ARM::VLD1d64QPseudo };
4928	static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
4929	ARM::VLD4q16Pseudo_UPD,
4930	ARM::VLD4q32Pseudo_UPD };
4931	static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
4932	ARM::VLD4q16oddPseudo,
4933	ARM::VLD4q32oddPseudo };
4934	SelectVLD(N, isUpdating: false, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4935	return;
4936	}
4937
4938	case Intrinsic::arm_neon_vld2dup: {
4939	static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4940	ARM::VLD2DUPd32, ARM::VLD1q64 };
4941	static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4942	ARM::VLD2DUPq16EvenPseudo,
4943	ARM::VLD2DUPq32EvenPseudo };
4944	static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
4945	ARM::VLD2DUPq16OddPseudo,
4946	ARM::VLD2DUPq32OddPseudo };
4947	SelectVLDDup(N, / IsIntrinsic= / true, isUpdating: false, NumVecs: `2`,
4948	DOpcodes, QOpcodes0, QOpcodes1);
4949	return;
4950	}
4951
4952	case Intrinsic::arm_neon_vld3dup: {
4953	static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
4954	ARM::VLD3DUPd16Pseudo,
4955	ARM::VLD3DUPd32Pseudo,
4956	ARM::VLD1d64TPseudo };
4957	static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4958	ARM::VLD3DUPq16EvenPseudo,
4959	ARM::VLD3DUPq32EvenPseudo };
4960	static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
4961	ARM::VLD3DUPq16OddPseudo,
4962	ARM::VLD3DUPq32OddPseudo };
4963	SelectVLDDup(N, / IsIntrinsic= / true, isUpdating: false, NumVecs: `3`,
4964	DOpcodes, QOpcodes0, QOpcodes1);
4965	return;
4966	}
4967
4968	case Intrinsic::arm_neon_vld4dup: {
4969	static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
4970	ARM::VLD4DUPd16Pseudo,
4971	ARM::VLD4DUPd32Pseudo,
4972	ARM::VLD1d64QPseudo };
4973	static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4974	ARM::VLD4DUPq16EvenPseudo,
4975	ARM::VLD4DUPq32EvenPseudo };
4976	static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
4977	ARM::VLD4DUPq16OddPseudo,
4978	ARM::VLD4DUPq32OddPseudo };
4979	SelectVLDDup(N, / IsIntrinsic= / true, isUpdating: false, NumVecs: `4`,
4980	DOpcodes, QOpcodes0, QOpcodes1);
4981	return;
4982	}
4983
4984	case Intrinsic::arm_neon_vld2lane: {
4985	static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
4986	ARM::VLD2LNd16Pseudo,
4987	ARM::VLD2LNd32Pseudo };
4988	static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
4989	ARM::VLD2LNq32Pseudo };
4990	SelectVLDSTLane(N, IsLoad: true, isUpdating: false, NumVecs: `2`, DOpcodes, QOpcodes);
4991	return;
4992	}
4993
4994	case Intrinsic::arm_neon_vld3lane: {
4995	static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
4996	ARM::VLD3LNd16Pseudo,
4997	ARM::VLD3LNd32Pseudo };
4998	static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
4999	ARM::VLD3LNq32Pseudo };
5000	SelectVLDSTLane(N, IsLoad: true, isUpdating: false, NumVecs: `3`, DOpcodes, QOpcodes);
5001	return;
5002	}
5003
5004	case Intrinsic::arm_neon_vld4lane: {
5005	static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
5006	ARM::VLD4LNd16Pseudo,
5007	ARM::VLD4LNd32Pseudo };
5008	static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
5009	ARM::VLD4LNq32Pseudo };
5010	SelectVLDSTLane(N, IsLoad: true, isUpdating: false, NumVecs: `4`, DOpcodes, QOpcodes);
5011	return;
5012	}
5013
5014	case Intrinsic::arm_neon_vst1: {
5015	static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
5016	ARM::VST1d32, ARM::VST1d64 };
5017	static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5018	ARM::VST1q32, ARM::VST1q64 };
5019	SelectVST(N, isUpdating: false, NumVecs: `1`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
5020	return;
5021	}
5022
5023	case Intrinsic::arm_neon_vst1x2: {
5024	static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5025	ARM::VST1q32, ARM::VST1q64 };
5026	static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
5027	ARM::VST1d16QPseudo,
5028	ARM::VST1d32QPseudo,
5029	ARM::VST1d64QPseudo };
5030	SelectVST(N, isUpdating: false, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
5031	return;
5032	}
5033
5034	case Intrinsic::arm_neon_vst1x3: {
5035	static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
5036	ARM::VST1d16TPseudo,
5037	ARM::VST1d32TPseudo,
5038	ARM::VST1d64TPseudo };
5039	static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
5040	ARM::VST1q16LowTPseudo_UPD,
5041	ARM::VST1q32LowTPseudo_UPD,
5042	ARM::VST1q64LowTPseudo_UPD };
5043	static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
5044	ARM::VST1q16HighTPseudo,
5045	ARM::VST1q32HighTPseudo,
5046	ARM::VST1q64HighTPseudo };
5047	SelectVST(N, isUpdating: false, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
5048	return;
5049	}
5050
5051	case Intrinsic::arm_neon_vst1x4: {
5052	static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
5053	ARM::VST1d16QPseudo,
5054	ARM::VST1d32QPseudo,
5055	ARM::VST1d64QPseudo };
5056	static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
5057	ARM::VST1q16LowQPseudo_UPD,
5058	ARM::VST1q32LowQPseudo_UPD,
5059	ARM::VST1q64LowQPseudo_UPD };
5060	static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
5061	ARM::VST1q16HighQPseudo,
5062	ARM::VST1q32HighQPseudo,
5063	ARM::VST1q64HighQPseudo };
5064	SelectVST(N, isUpdating: false, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
5065	return;
5066	}
5067
5068	case Intrinsic::arm_neon_vst2: {
5069	static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
5070	ARM::VST2d32, ARM::VST1q64 };
5071	static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
5072	ARM::VST2q32Pseudo };
5073	SelectVST(N, isUpdating: false, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
5074	return;
5075	}
5076
5077	case Intrinsic::arm_neon_vst3: {
5078	static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
5079	ARM::VST3d16Pseudo,
5080	ARM::VST3d32Pseudo,
5081	ARM::VST1d64TPseudo };
5082	static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
5083	ARM::VST3q16Pseudo_UPD,
5084	ARM::VST3q32Pseudo_UPD };
5085	static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
5086	ARM::VST3q16oddPseudo,
5087	ARM::VST3q32oddPseudo };
5088	SelectVST(N, isUpdating: false, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
5089	return;
5090	}
5091
5092	case Intrinsic::arm_neon_vst4: {
5093	static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
5094	ARM::VST4d16Pseudo,
5095	ARM::VST4d32Pseudo,
5096	ARM::VST1d64QPseudo };
5097	static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
5098	ARM::VST4q16Pseudo_UPD,
5099	ARM::VST4q32Pseudo_UPD };
5100	static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
5101	ARM::VST4q16oddPseudo,
5102	ARM::VST4q32oddPseudo };
5103	SelectVST(N, isUpdating: false, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
5104	return;
5105	}
5106
5107	case Intrinsic::arm_neon_vst2lane: {
5108	static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
5109	ARM::VST2LNd16Pseudo,
5110	ARM::VST2LNd32Pseudo };
5111	static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
5112	ARM::VST2LNq32Pseudo };
5113	SelectVLDSTLane(N, IsLoad: false, isUpdating: false, NumVecs: `2`, DOpcodes, QOpcodes);
5114	return;
5115	}
5116
5117	case Intrinsic::arm_neon_vst3lane: {
5118	static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
5119	ARM::VST3LNd16Pseudo,
5120	ARM::VST3LNd32Pseudo };
5121	static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
5122	ARM::VST3LNq32Pseudo };
5123	SelectVLDSTLane(N, IsLoad: false, isUpdating: false, NumVecs: `3`, DOpcodes, QOpcodes);
5124	return;
5125	}
5126
5127	case Intrinsic::arm_neon_vst4lane: {
5128	static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
5129	ARM::VST4LNd16Pseudo,
5130	ARM::VST4LNd32Pseudo };
5131	static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
5132	ARM::VST4LNq32Pseudo };
5133	SelectVLDSTLane(N, IsLoad: false, isUpdating: false, NumVecs: `4`, DOpcodes, QOpcodes);
5134	return;
5135	}
5136
5137	case Intrinsic::arm_mve_vldr_gather_base_wb:
5138	case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
5139	static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
5140	ARM::MVE_VLDRDU64_qi_pre};
5141	SelectMVE_WB(N, Opcodes,
5142	IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
5143	return;
5144	}
5145
5146	case Intrinsic::arm_mve_vld2q: {
5147	static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
5148	static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
5149	ARM::MVE_VLD21_16};
5150	static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
5151	ARM::MVE_VLD21_32};
5152	static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5153	SelectMVE_VLD(N, NumVecs: `2`, Opcodes, HasWriteback: false);
5154	return;
5155	}
5156
5157	case Intrinsic::arm_mve_vld4q: {
5158	static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
5159	ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
5160	static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
5161	ARM::MVE_VLD42_16,
5162	ARM::MVE_VLD43_16};
5163	static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
5164	ARM::MVE_VLD42_32,
5165	ARM::MVE_VLD43_32};
5166	static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5167	SelectMVE_VLD(N, NumVecs: `4`, Opcodes, HasWriteback: false);
5168	return;
5169	}
5170	}
5171	break;
5172	}
5173
5174	case ISD::INTRINSIC_WO_CHAIN: {
5175	unsigned IntNo = N->getConstantOperandVal(Num: `0`);
5176	switch (IntNo) {
5177	default:
5178	break;
5179
5180	// Scalar f32 -> bf16
5181	case Intrinsic::arm_neon_vcvtbfp2bf: {
5182	SDLoc dl(N);
5183	const SDValue &Src = N->getOperand(Num: `1`);
5184	llvm::EVT DestTy = N->getValueType(ResNo: `0`);
5185	SDValue Pred = getAL(CurDAG, dl);
5186	SDValue Reg0 = CurDAG->getRegister(`0`, MVT::i32);
5187	SDValue Ops[] = { Src, Src, Pred, Reg0 };
5188	CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops);
5189	return;
5190	}
5191
5192	// Vector v4f32 -> v4bf16
5193	case Intrinsic::arm_neon_vcvtfp2bf: {
5194	SDLoc dl(N);
5195	const SDValue &Src = N->getOperand(Num: `1`);
5196	SDValue Pred = getAL(CurDAG, dl);
5197	SDValue Reg0 = CurDAG->getRegister(`0`, MVT::i32);
5198	SDValue Ops[] = { Src, Pred, Reg0 };
5199	CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops);
5200	return;
5201	}
5202
5203	case Intrinsic::arm_mve_urshrl:
5204	SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false);
5205	return;
5206	case Intrinsic::arm_mve_uqshll:
5207	SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false);
5208	return;
5209	case Intrinsic::arm_mve_srshrl:
5210	SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false);
5211	return;
5212	case Intrinsic::arm_mve_sqshll:
5213	SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false);
5214	return;
5215	case Intrinsic::arm_mve_uqrshll:
5216	SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true);
5217	return;
5218	case Intrinsic::arm_mve_sqrshrl:
5219	SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
5220	return;
5221
5222	case Intrinsic::arm_mve_vadc:
5223	case Intrinsic::arm_mve_vadc_predicated:
5224	SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true,
5225	IntNo == Intrinsic::arm_mve_vadc_predicated);
5226	return;
5227	case Intrinsic::arm_mve_vsbc:
5228	case Intrinsic::arm_mve_vsbc_predicated:
5229	SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true,
5230	IntNo == Intrinsic::arm_mve_vsbc_predicated);
5231	return;
5232	case Intrinsic::arm_mve_vshlc:
5233	case Intrinsic::arm_mve_vshlc_predicated:
5234	SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated);
5235	return;
5236
5237	case Intrinsic::arm_mve_vmlldava:
5238	case Intrinsic::arm_mve_vmlldava_predicated: {
5239	static const uint16_t OpcodesU[] = {
5240	ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32,
5241	ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32,
5242	};
5243	static const uint16_t OpcodesS[] = {
5244	ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32,
5245	ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32,
5246	ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32,
5247	ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
5248	ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32,
5249	ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32,
5250	ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32,
5251	ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
5252	};
5253	SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated,
5254	OpcodesS, OpcodesU);
5255	return;
5256	}
5257
5258	case Intrinsic::arm_mve_vrmlldavha:
5259	case Intrinsic::arm_mve_vrmlldavha_predicated: {
5260	static const uint16_t OpcodesU[] = {
5261	ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32,
5262	};
5263	static const uint16_t OpcodesS[] = {
5264	ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32,
5265	ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
5266	ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32,
5267	ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
5268	};
5269	SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
5270	OpcodesS, OpcodesU);
5271	return;
5272	}
5273
5274	case Intrinsic::arm_mve_vidup:
5275	case Intrinsic::arm_mve_vidup_predicated: {
5276	static const uint16_t Opcodes[] = {
5277	ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
5278	};
5279	SelectMVE_VxDUP(N, Opcodes, false,
5280	IntNo == Intrinsic::arm_mve_vidup_predicated);
5281	return;
5282	}
5283
5284	case Intrinsic::arm_mve_vddup:
5285	case Intrinsic::arm_mve_vddup_predicated: {
5286	static const uint16_t Opcodes[] = {
5287	ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
5288	};
5289	SelectMVE_VxDUP(N, Opcodes, false,
5290	IntNo == Intrinsic::arm_mve_vddup_predicated);
5291	return;
5292	}
5293
5294	case Intrinsic::arm_mve_viwdup:
5295	case Intrinsic::arm_mve_viwdup_predicated: {
5296	static const uint16_t Opcodes[] = {
5297	ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
5298	};
5299	SelectMVE_VxDUP(N, Opcodes, true,
5300	IntNo == Intrinsic::arm_mve_viwdup_predicated);
5301	return;
5302	}
5303
5304	case Intrinsic::arm_mve_vdwdup:
5305	case Intrinsic::arm_mve_vdwdup_predicated: {
5306	static const uint16_t Opcodes[] = {
5307	ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
5308	};
5309	SelectMVE_VxDUP(N, Opcodes, true,
5310	IntNo == Intrinsic::arm_mve_vdwdup_predicated);
5311	return;
5312	}
5313
5314	case Intrinsic::arm_cde_cx1d:
5315	case Intrinsic::arm_cde_cx1da:
5316	case Intrinsic::arm_cde_cx2d:
5317	case Intrinsic::arm_cde_cx2da:
5318	case Intrinsic::arm_cde_cx3d:
5319	case Intrinsic::arm_cde_cx3da: {
5320	bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da \|\|
5321	IntNo == Intrinsic::arm_cde_cx2da \|\|
5322	IntNo == Intrinsic::arm_cde_cx3da;
5323	size_t NumExtraOps;
5324	uint16_t Opcode;
5325	switch (IntNo) {
5326	case Intrinsic::arm_cde_cx1d:
5327	case Intrinsic::arm_cde_cx1da:
5328	NumExtraOps = `0`;
5329	Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
5330	break;
5331	case Intrinsic::arm_cde_cx2d:
5332	case Intrinsic::arm_cde_cx2da:
5333	NumExtraOps = `1`;
5334	Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
5335	break;
5336	case Intrinsic::arm_cde_cx3d:
5337	case Intrinsic::arm_cde_cx3da:
5338	NumExtraOps = `2`;
5339	Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
5340	break;
5341	default:
5342	llvm_unreachable("Unexpected opcode");
5343	}
5344	SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
5345	return;
5346	}
5347	}
5348	break;
5349	}
5350
5351	case ISD::ATOMIC_CMP_SWAP:
5352	SelectCMP_SWAP(N);
5353	return;
5354	}
5355
5356	SelectCode(N);
5357	}
5358
5359	// Inspect a register string of the form
5360	// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
5361	// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
5362	// and obtain the integer operands from them, adding these operands to the
5363	// provided vector.
5364	static void getIntOperandsFromRegisterString(StringRef RegString,
5365	SelectionDAG *CurDAG,
5366	const SDLoc &DL,
5367	std::vector<SDValue> &Ops) {
5368	SmallVector<StringRef, `5`> Fields;
5369	RegString.split(A&: Fields, Separator: `':'`);
5370
5371	if (Fields.size() > `1`) {
5372	bool AllIntFields = true;
5373
5374	for (StringRef Field : Fields) {
5375	// Need to trim out leading 'cp' characters and get the integer field.
5376	unsigned IntField;
5377	AllIntFields &= !Field.trim(Chars: "CPcp").getAsInteger(Radix: `10`, Result&: IntField);
5378	Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
5379	}
5380
5381	assert(AllIntFields &&
5382	"Unexpected non-integer value in special register string.");
5383	(void)AllIntFields;
5384	}
5385	}
5386
5387	// Maps a Banked Register string to its mask value. The mask value returned is
5388	// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
5389	// mask operand, which expresses which register is to be used, e.g. r8, and in
5390	// which mode it is to be used, e.g. usr. Returns -1 to signify that the string
5391	// was invalid.
5392	static inline int getBankedRegisterMask(StringRef RegString) {
5393	auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
5394	if (!TheReg)
5395	return -`1`;
5396	return TheReg->Encoding;
5397	}
5398
5399	// The flags here are common to those allowed for apsr in the A class cores and
5400	// those allowed for the special registers in the M class cores. Returns a
5401	// value representing which flags were present, -1 if invalid.
5402	static inline int getMClassFlagsMask(StringRef Flags) {
5403	return StringSwitch<int>(Flags)
5404	.Case(S: "", Value: `0x2`) // no flags means nzcvq for psr registers, and 0x2 is
5405	// correct when flags are not permitted
5406	.Case(S: "g", Value: `0x1`)
5407	.Case(S: "nzcvq", Value: `0x2`)
5408	.Case(S: "nzcvqg", Value: `0x3`)
5409	.Default(Value: -`1`);
5410	}
5411
5412	// Maps MClass special registers string to its value for use in the
5413	// t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
5414	// Returns -1 to signify that the string was invalid.
5415	static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
5416	auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
5417	const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
5418	if (!TheReg \|\| !TheReg->hasRequiredFeatures(FeatureBits))
5419	return -`1`;
5420	return (int)(TheReg->Encoding & `0xFFF`); // SYSm value
5421	}
5422
5423	static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
5424	// The mask operand contains the special register (R Bit) in bit 4, whether
5425	// the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
5426	// bits 3-0 contains the fields to be accessed in the special register, set by
5427	// the flags provided with the register.
5428	int Mask = `0`;
5429	if (Reg == "apsr") {
5430	// The flags permitted for apsr are the same flags that are allowed in
5431	// M class registers. We get the flag value and then shift the flags into
5432	// the correct place to combine with the mask.
5433	Mask = getMClassFlagsMask(Flags);
5434	if (Mask == -`1`)
5435	return -`1`;
5436	return Mask << `2`;
5437	}
5438
5439	if (Reg != "cpsr" && Reg != "spsr") {
5440	return -`1`;
5441	}
5442
5443	// This is the same as if the flags were "fc"
5444	if (Flags.empty() \|\| Flags == "all")
5445	return Mask \| `0x9`;
5446
5447	// Inspect the supplied flags string and set the bits in the mask for
5448	// the relevant and valid flags allowed for cpsr and spsr.
5449	for (char Flag : Flags) {
5450	int FlagVal;
5451	switch (Flag) {
5452	case `'c'`:
5453	FlagVal = `0x1`;
5454	break;
5455	case `'x'`:
5456	FlagVal = `0x2`;
5457	break;
5458	case `'s'`:
5459	FlagVal = `0x4`;
5460	break;
5461	case `'f'`:
5462	FlagVal = `0x8`;
5463	break;
5464	default:
5465	FlagVal = `0`;
5466	}
5467
5468	// This avoids allowing strings where the same flag bit appears twice.
5469	if (!FlagVal \|\| (Mask & FlagVal))
5470	return -`1`;
5471	Mask \|= FlagVal;
5472	}
5473
5474	// If the register is spsr then we need to set the R bit.
5475	if (Reg == "spsr")
5476	Mask \|= `0x10`;
5477
5478	return Mask;
5479	}
5480
5481	// Lower the read_register intrinsic to ARM specific DAG nodes
5482	// using the supplied metadata string to select the instruction node to use
5483	// and the registers/masks to construct as operands for the node.
5484	bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
5485	const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: `1`));
5486	const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: `0`));
5487	bool IsThumb2 = Subtarget->isThumb2();
5488	SDLoc DL(N);
5489
5490	std::vector<SDValue> Ops;
5491	getIntOperandsFromRegisterString(RegString: RegString->getString(), CurDAG, DL, Ops);
5492
5493	if (!Ops.empty()) {
5494	// If the special register string was constructed of fields (as defined
5495	// in the ACLE) then need to lower to MRC node (32 bit) or
5496	// MRRC node(64 bit), we can make the distinction based on the number of
5497	// operands we have.
5498	unsigned Opcode;
5499	SmallVector<EVT, `3`> ResTypes;
5500	if (Ops.size() == `5`){
5501	Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
5502	ResTypes.append({ MVT::i32, MVT::Other });
5503	} else {
5504	assert(Ops.size() == `3` &&
5505	"Invalid number of fields in special register string.");
5506	Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
5507	ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
5508	}
5509
5510	Ops.push_back(x: getAL(CurDAG, dl: DL));
5511	Ops.push_back(CurDAG->getRegister(`0`, MVT::i32));
5512	Ops.push_back(x: N->getOperand(Num: `0`));
5513	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: DL, ResultTys: ResTypes, Ops));
5514	return true;
5515	}
5516
5517	std::string SpecialReg = RegString->getString().lower();
5518
5519	int BankedReg = getBankedRegisterMask(RegString: SpecialReg);
5520	if (BankedReg != -`1`) {
5521	Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
5522	getAL(CurDAG, DL), CurDAG->getRegister(`0`, MVT::i32),
5523	N->getOperand(`0`) };
5524	ReplaceNode(
5525	N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
5526	DL, MVT::i32, MVT::Other, Ops));
5527	return true;
5528	}
5529
5530	// The VFP registers are read by creating SelectionDAG nodes with opcodes
5531	// corresponding to the register that is being read from. So we switch on the
5532	// string to find which opcode we need to use.
5533	unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5534	.Case("fpscr", ARM::VMRS)
5535	.Case("fpexc", ARM::VMRS_FPEXC)
5536	.Case("fpsid", ARM::VMRS_FPSID)
5537	.Case("mvfr0", ARM::VMRS_MVFR0)
5538	.Case("mvfr1", ARM::VMRS_MVFR1)
5539	.Case("mvfr2", ARM::VMRS_MVFR2)
5540	.Case("fpinst", ARM::VMRS_FPINST)
5541	.Case("fpinst2", ARM::VMRS_FPINST2)
5542	.Default(`0`);
5543
5544	// If an opcode was found then we can lower the read to a VFP instruction.
5545	if (Opcode) {
5546	if (!Subtarget->hasVFP2Base())
5547	return false;
5548	if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
5549	return false;
5550
5551	Ops = { getAL(CurDAG, DL), CurDAG->getRegister(`0`, MVT::i32),
5552	N->getOperand(`0`) };
5553	ReplaceNode(N,
5554	CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
5555	return true;
5556	}
5557
5558	// If the target is M Class then need to validate that the register string
5559	// is an acceptable value, so check that a mask can be constructed from the
5560	// string.
5561	if (Subtarget->isMClass()) {
5562	int SYSmValue = getMClassRegisterMask(Reg: SpecialReg, Subtarget);
5563	if (SYSmValue == -`1`)
5564	return false;
5565
5566	SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5567	getAL(CurDAG, DL), CurDAG->getRegister(`0`, MVT::i32),
5568	N->getOperand(`0`) };
5569	ReplaceNode(
5570	N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
5571	return true;
5572	}
5573
5574	// Here we know the target is not M Class so we need to check if it is one
5575	// of the remaining possible values which are apsr, cpsr or spsr.
5576	if (SpecialReg == "apsr" \|\| SpecialReg == "cpsr") {
5577	Ops = { getAL(CurDAG, DL), CurDAG->getRegister(`0`, MVT::i32),
5578	N->getOperand(`0`) };
5579	ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
5580	DL, MVT::i32, MVT::Other, Ops));
5581	return true;
5582	}
5583
5584	if (SpecialReg == "spsr") {
5585	Ops = { getAL(CurDAG, DL), CurDAG->getRegister(`0`, MVT::i32),
5586	N->getOperand(`0`) };
5587	ReplaceNode(
5588	N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
5589	MVT::i32, MVT::Other, Ops));
5590	return true;
5591	}
5592
5593	return false;
5594	}
5595
5596	// Lower the write_register intrinsic to ARM specific DAG nodes
5597	// using the supplied metadata string to select the instruction node to use
5598	// and the registers/masks to use in the nodes
5599	bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
5600	const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: `1`));
5601	const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: `0`));
5602	bool IsThumb2 = Subtarget->isThumb2();
5603	SDLoc DL(N);
5604
5605	std::vector<SDValue> Ops;
5606	getIntOperandsFromRegisterString(RegString: RegString->getString(), CurDAG, DL, Ops);
5607
5608	if (!Ops.empty()) {
5609	// If the special register string was constructed of fields (as defined
5610	// in the ACLE) then need to lower to MCR node (32 bit) or
5611	// MCRR node(64 bit), we can make the distinction based on the number of
5612	// operands we have.
5613	unsigned Opcode;
5614	if (Ops.size() == `5`) {
5615	Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
5616	Ops.insert(position: Ops.begin()+`2`, x: N->getOperand(Num: `2`));
5617	} else {
5618	assert(Ops.size() == `3` &&
5619	"Invalid number of fields in special register string.");
5620	Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
5621	SDValue WriteValue[] = { N->getOperand(Num: `2`), N->getOperand(Num: `3`) };
5622	Ops.insert(position: Ops.begin()+`2`, first: WriteValue, last: WriteValue+`2`);
5623	}
5624
5625	Ops.push_back(x: getAL(CurDAG, dl: DL));
5626	Ops.push_back(CurDAG->getRegister(`0`, MVT::i32));
5627	Ops.push_back(x: N->getOperand(Num: `0`));
5628
5629	ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5630	return true;
5631	}
5632
5633	std::string SpecialReg = RegString->getString().lower();
5634	int BankedReg = getBankedRegisterMask(RegString: SpecialReg);
5635	if (BankedReg != -`1`) {
5636	Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(`2`),
5637	getAL(CurDAG, DL), CurDAG->getRegister(`0`, MVT::i32),
5638	N->getOperand(`0`) };
5639	ReplaceNode(
5640	N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
5641	DL, MVT::Other, Ops));
5642	return true;
5643	}
5644
5645	// The VFP registers are written to by creating SelectionDAG nodes with
5646	// opcodes corresponding to the register that is being written. So we switch
5647	// on the string to find which opcode we need to use.
5648	unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5649	.Case("fpscr", ARM::VMSR)
5650	.Case("fpexc", ARM::VMSR_FPEXC)
5651	.Case("fpsid", ARM::VMSR_FPSID)
5652	.Case("fpinst", ARM::VMSR_FPINST)
5653	.Case("fpinst2", ARM::VMSR_FPINST2)
5654	.Default(`0`);
5655
5656	if (Opcode) {
5657	if (!Subtarget->hasVFP2Base())
5658	return false;
5659	Ops = { N->getOperand(`2`), getAL(CurDAG, DL),
5660	CurDAG->getRegister(`0`, MVT::i32), N->getOperand(`0`) };
5661	ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5662	return true;
5663	}
5664
5665	std::pair<StringRef, StringRef> Fields;
5666	Fields = StringRef (SpecialReg).rsplit(Separator: `'_'`);
5667	std::string Reg = Fields.first.str();
5668	StringRef Flags = Fields.second;
5669
5670	// If the target was M Class then need to validate the special register value
5671	// and retrieve the mask for use in the instruction node.
5672	if (Subtarget->isMClass()) {
5673	int SYSmValue = getMClassRegisterMask(Reg: SpecialReg, Subtarget);
5674	if (SYSmValue == -`1`)
5675	return false;
5676
5677	SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5678	N->getOperand(`2`), getAL(CurDAG, DL),
5679	CurDAG->getRegister(`0`, MVT::i32), N->getOperand(`0`) };
5680	ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
5681	return true;
5682	}
5683
5684	// We then check to see if a valid mask can be constructed for one of the
5685	// register string values permitted for the A and R class cores. These values
5686	// are apsr, spsr and cpsr; these are also valid on older cores.
5687	int Mask = getARClassRegisterMask(Reg, Flags);
5688	if (Mask != -`1`) {
5689	Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(`2`),
5690	getAL(CurDAG, DL), CurDAG->getRegister(`0`, MVT::i32),
5691	N->getOperand(`0`) };
5692	ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
5693	DL, MVT::Other, Ops));
5694	return true;
5695	}
5696
5697	return false;
5698	}
5699
5700	bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
5701	std::vector<SDValue> AsmNodeOperands;
5702	InlineAsm::Flag Flag;
5703	bool Changed = false;
5704	unsigned NumOps = N->getNumOperands();
5705
5706	// Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
5707	// However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
5708	// (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
5709	// respectively. Since there is no constraint to explicitly specify a
5710	// reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
5711	// the 64-bit data may be referred by H, Q, R modifiers, so we still pack
5712	// them into a GPRPair.
5713
5714	SDLoc dl(N);
5715	SDValue Glue = N->getGluedNode() ? N->getOperand(Num: NumOps - `1`) : SDValue ();
5716
5717	SmallVector<bool, `8`> OpChanged;
5718	// Glue node will be appended late.
5719	for(unsigned i = `0`, e = N->getGluedNode() ? NumOps - `1` : NumOps; i < e; ++i) {
5720	SDValue op = N->getOperand(Num: i);
5721	AsmNodeOperands.push_back(x: op);
5722
5723	if (i < InlineAsm::Op_FirstOperand)
5724	continue;
5725
5726	if (const auto *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: i)))
5727	Flag = InlineAsm::Flag (C->getZExtValue());
5728	else
5729	continue;
5730
5731	// Immediate operands to inline asm in the SelectionDAG are modeled with
5732	// two operands. The first is a constant of value InlineAsm::Kind::Imm, and
5733	// the second is a constant with the value of the immediate. If we get here
5734	// and we have a Kind::Imm, skip the next operand, and continue.
5735	if (Flag.isImmKind()) {
5736	SDValue op = N->getOperand(Num: ++i);
5737	AsmNodeOperands.push_back(x: op);
5738	continue;
5739	}
5740
5741	const unsigned NumRegs = Flag.getNumOperandRegisters();
5742	if (NumRegs)
5743	OpChanged.push_back(Elt: false);
5744
5745	unsigned DefIdx = `0`;
5746	bool IsTiedToChangedOp = false;
5747	// If it's a use that is tied with a previous def, it has no
5748	// reg class constraint.
5749	if (Changed && Flag.isUseOperandTiedToDef(Idx&: DefIdx))
5750	IsTiedToChangedOp = OpChanged [DefIdx];
5751
5752	// Memory operands to inline asm in the SelectionDAG are modeled with two
5753	// operands: a constant of value InlineAsm::Kind::Mem followed by the input
5754	// operand. If we get here and we have a Kind::Mem, skip the next operand
5755	// (so it doesn't get misinterpreted), and continue. We do this here because
5756	// it's important to update the OpChanged array correctly before moving on.
5757	if (Flag.isMemKind()) {
5758	SDValue op = N->getOperand(Num: ++i);
5759	AsmNodeOperands.push_back(x: op);
5760	continue;
5761	}
5762
5763	if (!Flag.isRegUseKind() && !Flag.isRegDefKind() &&
5764	!Flag.isRegDefEarlyClobberKind())
5765	continue;
5766
5767	unsigned RC;
5768	const bool HasRC = Flag.hasRegClassConstraint(RC);
5769	if ((!IsTiedToChangedOp && (!HasRC \|\| RC != ARM::GPRRegClassID))
5770	\|\| NumRegs != `2`)
5771	continue;
5772
5773	assert((i+`2` < NumOps) && "Invalid number of operands in inline asm");
5774	SDValue V0 = N->getOperand(Num: i+`1`);
5775	SDValue V1 = N->getOperand(Num: i+`2`);
5776	Register Reg0 = cast<RegisterSDNode>(Val&: V0)->getReg();
5777	Register Reg1 = cast<RegisterSDNode>(Val&: V1)->getReg();
5778	SDValue PairedReg;
5779	MachineRegisterInfo &MRI = MF->getRegInfo();
5780
5781	if (Flag.isRegDefKind() \|\| Flag.isRegDefEarlyClobberKind()) {
5782	// Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
5783	// the original GPRs.
5784
5785	Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5786	PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5787	SDValue Chain = SDValue (N,`0`);
5788
5789	SDNode *GU = N->getGluedUser();
5790	SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
5791	Chain.getValue(`1`));
5792
5793	// Extract values from a GPRPair reg and copy to the original GPR reg.
5794	SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
5795	RegCopy);
5796	SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
5797	RegCopy);
5798	SDValue T0 = CurDAG->getCopyToReg(Chain: Sub0, dl, Reg: Reg0, N: Sub0,
5799	Glue: RegCopy.getValue(R: `1`));
5800	SDValue T1 = CurDAG->getCopyToReg(Chain: Sub1, dl, Reg: Reg1, N: Sub1, Glue: T0.getValue(R: `1`));
5801
5802	// Update the original glue user.
5803	std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-`1`);
5804	Ops.push_back(x: T1.getValue(R: `1`));
5805	CurDAG->UpdateNodeOperands(N: GU, Ops);
5806	} else {
5807	// For Kind == InlineAsm::Kind::RegUse, we first copy two GPRs into a
5808	// GPRPair and then pass the GPRPair to the inline asm.
5809	SDValue Chain = AsmNodeOperands [InlineAsm::Op_InputChain];
5810
5811	// As REG_SEQ doesn't take RegisterSDNode, we copy them first.
5812	SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
5813	Chain.getValue(`1`));
5814	SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
5815	T0.getValue(`1`));
5816	SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), `0`);
5817
5818	// Copy REG_SEQ into a GPRPair-typed VR and replace the original two
5819	// i32 VRs of inline asm with it.
5820	Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5821	PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5822	Chain = CurDAG->getCopyToReg(Chain: T1, dl, Reg: GPVR, N: Pair, Glue: T1.getValue(R: `1`));
5823
5824	AsmNodeOperands [InlineAsm::Op_InputChain] = Chain;
5825	Glue = Chain.getValue(R: `1`);
5826	}
5827
5828	Changed = true;
5829
5830	if(PairedReg.getNode()) {
5831	OpChanged [OpChanged.size() -`1` ] = true;
5832	Flag = InlineAsm::Flag (Flag.getKind(), `1` / RegNum/);
5833	if (IsTiedToChangedOp)
5834	Flag.setMatchingOp(DefIdx);
5835	else
5836	Flag.setRegClass(ARM::GPRPairRegClassID);
5837	// Replace the current flag.
5838	AsmNodeOperands[AsmNodeOperands.size() -`1`] = CurDAG->getTargetConstant(
5839	Flag, dl, MVT::i32);
5840	// Add the new register node and skip the original two GPRs.
5841	AsmNodeOperands.push_back(x: PairedReg);
5842	// Skip the next two GPRs.
5843	i += `2`;
5844	}
5845	}
5846
5847	if (Glue.getNode())
5848	AsmNodeOperands.push_back(x: Glue);
5849	if (!Changed)
5850	return false;
5851
5852	SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
5853	CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
5854	New ->setNodeId(-`1`);
5855	ReplaceNode(F: N, T: New.getNode());
5856	return true;
5857	}
5858
5859	bool ARMDAGToDAGISel::SelectInlineAsmMemoryOperand(
5860	const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
5861	std::vector<SDValue> &OutOps) {
5862	switch(ConstraintID) {
5863	default:
5864	llvm_unreachable("Unexpected asm memory constraint");
5865	case InlineAsm::ConstraintCode::m:
5866	case InlineAsm::ConstraintCode::o:
5867	case InlineAsm::ConstraintCode::Q:
5868	case InlineAsm::ConstraintCode::Um:
5869	case InlineAsm::ConstraintCode::Un:
5870	case InlineAsm::ConstraintCode::Uq:
5871	case InlineAsm::ConstraintCode::Us:
5872	case InlineAsm::ConstraintCode::Ut:
5873	case InlineAsm::ConstraintCode::Uv:
5874	case InlineAsm::ConstraintCode::Uy:
5875	// Require the address to be in a register. That is safe for all ARM
5876	// variants and it is hard to do anything much smarter without knowing
5877	// how the operand is used.
5878	OutOps.push_back(x: Op);
5879	return false;
5880	}
5881	return true;
5882	}
5883
5884	/// createARMISelDag - This pass converts a legalized DAG into a
5885	/// ARM-specific DAG, ready for instruction scheduling.
5886	///
5887	FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
5888	CodeGenOptLevel OptLevel) {
5889	return new ARMDAGToDAGISel (TM, OptLevel);
5890	}
5891

source code of llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp