1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64MachineFunctionInfo.h"
14#include "AArch64TargetMachine.h"
15#include "MCTargetDesc/AArch64AddressingModes.h"
16#include "llvm/ADT/APSInt.h"
17#include "llvm/CodeGen/ISDOpcodes.h"
18#include "llvm/CodeGen/SelectionDAGISel.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
24#include "llvm/Support/ErrorHandling.h"
25#include "llvm/Support/KnownBits.h"
26#include "llvm/Support/MathExtras.h"
27#include "llvm/Support/raw_ostream.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34//===--------------------------------------------------------------------===//
35/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
36/// instructions for SelectionDAG operations.
37///
38namespace {
39
40class AArch64DAGToDAGISel : public SelectionDAGISel {
41
42 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
43 /// make the right decision when generating code for different targets.
44 const AArch64Subtarget *Subtarget;
45
46public:
47 static char ID;
48
49 AArch64DAGToDAGISel() = delete;
50
51 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
52 CodeGenOptLevel OptLevel)
53 : SelectionDAGISel(ID, tm, OptLevel), Subtarget(nullptr) {}
54
55 bool runOnMachineFunction(MachineFunction &MF) override {
56 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
57 return SelectionDAGISel::runOnMachineFunction(MF);
58 }
59
60 void Select(SDNode *Node) override;
61
62 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
63 /// inline asm expressions.
64 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
65 InlineAsm::ConstraintCode ConstraintID,
66 std::vector<SDValue> &OutOps) override;
67
68 template <signed Low, signed High, signed Scale>
69 bool SelectRDVLImm(SDValue N, SDValue &Imm);
70
71 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
72 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
73 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
74 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
75 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
76 return SelectShiftedRegister(N, AllowROR: false, Reg, Shift);
77 }
78 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
79 return SelectShiftedRegister(N, AllowROR: true, Reg, Shift);
80 }
81 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
82 return SelectAddrModeIndexed7S(N, Size: 1, Base, OffImm);
83 }
84 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
85 return SelectAddrModeIndexed7S(N, Size: 2, Base, OffImm);
86 }
87 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
88 return SelectAddrModeIndexed7S(N, Size: 4, Base, OffImm);
89 }
90 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
91 return SelectAddrModeIndexed7S(N, Size: 8, Base, OffImm);
92 }
93 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
94 return SelectAddrModeIndexed7S(N, Size: 16, Base, OffImm);
95 }
96 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
97 return SelectAddrModeIndexedBitWidth(N, IsSignedImm: true, BW: 9, Size: 16, Base, OffImm);
98 }
99 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
100 return SelectAddrModeIndexedBitWidth(N, IsSignedImm: false, BW: 6, Size: 16, Base, OffImm);
101 }
102 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
103 return SelectAddrModeIndexed(N, Size: 1, Base, OffImm);
104 }
105 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
106 return SelectAddrModeIndexed(N, Size: 2, Base, OffImm);
107 }
108 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
109 return SelectAddrModeIndexed(N, Size: 4, Base, OffImm);
110 }
111 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
112 return SelectAddrModeIndexed(N, Size: 8, Base, OffImm);
113 }
114 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
115 return SelectAddrModeIndexed(N, Size: 16, Base, OffImm);
116 }
117 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
118 return SelectAddrModeUnscaled(N, Size: 1, Base, OffImm);
119 }
120 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
121 return SelectAddrModeUnscaled(N, Size: 2, Base, OffImm);
122 }
123 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
124 return SelectAddrModeUnscaled(N, Size: 4, Base, OffImm);
125 }
126 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
127 return SelectAddrModeUnscaled(N, Size: 8, Base, OffImm);
128 }
129 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
130 return SelectAddrModeUnscaled(N, Size: 16, Base, OffImm);
131 }
132 template <unsigned Size, unsigned Max>
133 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
134 // Test if there is an appropriate addressing mode and check if the
135 // immediate fits.
136 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
137 if (Found) {
138 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
139 int64_t C = CI->getSExtValue();
140 if (C <= Max)
141 return true;
142 }
143 }
144
145 // Otherwise, base only, materialize address in register.
146 Base = N;
147 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
148 return true;
149 }
150
151 template<int Width>
152 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
153 SDValue &SignExtend, SDValue &DoShift) {
154 return SelectAddrModeWRO(N, Size: Width / 8, Base, Offset, SignExtend, DoShift);
155 }
156
157 template<int Width>
158 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
159 SDValue &SignExtend, SDValue &DoShift) {
160 return SelectAddrModeXRO(N, Size: Width / 8, Base, Offset, SignExtend, DoShift);
161 }
162
163 bool SelectExtractHigh(SDValue N, SDValue &Res) {
164 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
165 N = N->getOperand(Num: 0);
166 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
167 !isa<ConstantSDNode>(N->getOperand(Num: 1)))
168 return false;
169 EVT VT = N->getValueType(ResNo: 0);
170 EVT LVT = N->getOperand(Num: 0).getValueType();
171 unsigned Index = N->getConstantOperandVal(Num: 1);
172 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
173 Index != VT.getVectorNumElements())
174 return false;
175 Res = N->getOperand(Num: 0);
176 return true;
177 }
178
179 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
180 if (N.getOpcode() != AArch64ISD::VLSHR)
181 return false;
182 SDValue Op = N->getOperand(Num: 0);
183 EVT VT = Op.getValueType();
184 unsigned ShtAmt = N->getConstantOperandVal(Num: 1);
185 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
186 return false;
187
188 APInt Imm;
189 if (Op.getOperand(i: 1).getOpcode() == AArch64ISD::MOVIshift)
190 Imm = APInt(VT.getScalarSizeInBits(),
191 Op.getOperand(i: 1).getConstantOperandVal(i: 0)
192 << Op.getOperand(i: 1).getConstantOperandVal(i: 1));
193 else if (Op.getOperand(i: 1).getOpcode() == AArch64ISD::DUP &&
194 isa<ConstantSDNode>(Op.getOperand(i: 1).getOperand(i: 0)))
195 Imm = APInt(VT.getScalarSizeInBits(),
196 Op.getOperand(i: 1).getConstantOperandVal(i: 0));
197 else
198 return false;
199
200 if (Imm != 1ULL << (ShtAmt - 1))
201 return false;
202
203 Res1 = Op.getOperand(i: 0);
204 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
205 return true;
206 }
207
208 bool SelectDupZeroOrUndef(SDValue N) {
209 switch(N->getOpcode()) {
210 case ISD::UNDEF:
211 return true;
212 case AArch64ISD::DUP:
213 case ISD::SPLAT_VECTOR: {
214 auto Opnd0 = N->getOperand(Num: 0);
215 if (isNullConstant(V: Opnd0))
216 return true;
217 if (isNullFPConstant(V: Opnd0))
218 return true;
219 break;
220 }
221 default:
222 break;
223 }
224
225 return false;
226 }
227
228 bool SelectDupZero(SDValue N) {
229 switch(N->getOpcode()) {
230 case AArch64ISD::DUP:
231 case ISD::SPLAT_VECTOR: {
232 auto Opnd0 = N->getOperand(Num: 0);
233 if (isNullConstant(V: Opnd0))
234 return true;
235 if (isNullFPConstant(V: Opnd0))
236 return true;
237 break;
238 }
239 }
240
241 return false;
242 }
243
244 bool SelectDupNegativeZero(SDValue N) {
245 switch(N->getOpcode()) {
246 case AArch64ISD::DUP:
247 case ISD::SPLAT_VECTOR: {
248 ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(N->getOperand(Num: 0));
249 return Const && Const->isZero() && Const->isNegative();
250 }
251 }
252
253 return false;
254 }
255
256 template<MVT::SimpleValueType VT>
257 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
258 return SelectSVEAddSubImm(N, VT, Imm, Shift);
259 }
260
261 template <MVT::SimpleValueType VT, bool Negate>
262 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
263 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
264 }
265
266 template <MVT::SimpleValueType VT>
267 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
268 return SelectSVECpyDupImm(N, VT, Imm, Shift);
269 }
270
271 template <MVT::SimpleValueType VT, bool Invert = false>
272 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
273 return SelectSVELogicalImm(N, VT, Imm, Invert);
274 }
275
276 template <MVT::SimpleValueType VT>
277 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
278 return SelectSVEArithImm(N, VT, Imm);
279 }
280
281 template <unsigned Low, unsigned High, bool AllowSaturation = false>
282 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
283 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
284 }
285
286 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
287 if (N->getOpcode() != ISD::SPLAT_VECTOR)
288 return false;
289
290 EVT EltVT = N->getValueType(ResNo: 0).getVectorElementType();
291 return SelectSVEShiftImm(N: N->getOperand(Num: 0), /* Low */ 1,
292 /* High */ EltVT.getFixedSizeInBits(),
293 /* AllowSaturation */ true, Imm);
294 }
295
296 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
297 template<signed Min, signed Max, signed Scale, bool Shift>
298 bool SelectCntImm(SDValue N, SDValue &Imm) {
299 if (!isa<ConstantSDNode>(N))
300 return false;
301
302 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
303 if (Shift)
304 MulImm = 1LL << MulImm;
305
306 if ((MulImm % std::abs(x: Scale)) != 0)
307 return false;
308
309 MulImm /= Scale;
310 if ((MulImm >= Min) && (MulImm <= Max)) {
311 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
312 return true;
313 }
314
315 return false;
316 }
317
318 template <signed Max, signed Scale>
319 bool SelectEXTImm(SDValue N, SDValue &Imm) {
320 if (!isa<ConstantSDNode>(N))
321 return false;
322
323 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
324
325 if (MulImm >= 0 && MulImm <= Max) {
326 MulImm *= Scale;
327 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
328 return true;
329 }
330
331 return false;
332 }
333
334 template <unsigned BaseReg, unsigned Max>
335 bool ImmToReg(SDValue N, SDValue &Imm) {
336 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
337 uint64_t C = CI->getZExtValue();
338
339 if (C > Max)
340 return false;
341
342 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
343 return true;
344 }
345 return false;
346 }
347
348 /// Form sequences of consecutive 64/128-bit registers for use in NEON
349 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
350 /// between 1 and 4 elements. If it contains a single element that is returned
351 /// unchanged; otherwise a REG_SEQUENCE value is returned.
352 SDValue createDTuple(ArrayRef<SDValue> Vecs);
353 SDValue createQTuple(ArrayRef<SDValue> Vecs);
354 // Form a sequence of SVE registers for instructions using list of vectors,
355 // e.g. structured loads and stores (ldN, stN).
356 SDValue createZTuple(ArrayRef<SDValue> Vecs);
357
358 // Similar to above, except the register must start at a multiple of the
359 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
360 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
361
362 /// Generic helper for the createDTuple/createQTuple
363 /// functions. Those should almost always be called instead.
364 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
365 const unsigned SubRegs[]);
366
367 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
368
369 bool tryIndexedLoad(SDNode *N);
370
371 bool trySelectStackSlotTagP(SDNode *N);
372 void SelectTagP(SDNode *N);
373
374 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
375 unsigned SubRegIdx);
376 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
377 unsigned SubRegIdx);
378 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
379 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
380 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
381 unsigned Opc_rr, unsigned Opc_ri,
382 bool IsIntr = false);
383 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
384 unsigned Scale, unsigned Opc_ri,
385 unsigned Opc_rr);
386 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
387 bool IsZmMulti, unsigned Opcode,
388 bool HasPred = false);
389 void SelectPExtPair(SDNode *N, unsigned Opc);
390 void SelectWhilePair(SDNode *N, unsigned Opc);
391 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
392 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
393 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
394 bool IsTupleInput, unsigned Opc);
395 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
396
397 template <unsigned MaxIdx, unsigned Scale>
398 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
399 unsigned Op);
400
401 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
402 /// SVE Reg+Imm addressing mode.
403 template <int64_t Min, int64_t Max>
404 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
405 SDValue &OffImm);
406 /// SVE Reg+Reg address mode.
407 template <unsigned Scale>
408 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
409 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
410 }
411
412 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc,
413 uint32_t MaxImm);
414
415 template <unsigned MaxIdx, unsigned Scale>
416 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
417 return SelectSMETileSlice(N, MaxSize: MaxIdx, Vector, Offset, Scale);
418 }
419
420 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
421 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
422 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
423 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
424 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
425 unsigned Opc_rr, unsigned Opc_ri);
426 std::tuple<unsigned, SDValue, SDValue>
427 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
428 const SDValue &OldBase, const SDValue &OldOffset,
429 unsigned Scale);
430
431 bool tryBitfieldExtractOp(SDNode *N);
432 bool tryBitfieldExtractOpFromSExt(SDNode *N);
433 bool tryBitfieldInsertOp(SDNode *N);
434 bool tryBitfieldInsertInZeroOp(SDNode *N);
435 bool tryShiftAmountMod(SDNode *N);
436
437 bool tryReadRegister(SDNode *N);
438 bool tryWriteRegister(SDNode *N);
439
440 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
441 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
442
443 bool trySelectXAR(SDNode *N);
444
445// Include the pieces autogenerated from the target description.
446#include "AArch64GenDAGISel.inc"
447
448private:
449 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
450 SDValue &Shift);
451 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
452 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
453 SDValue &OffImm) {
454 return SelectAddrModeIndexedBitWidth(N, IsSignedImm: true, BW: 7, Size, Base, OffImm);
455 }
456 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
457 unsigned Size, SDValue &Base,
458 SDValue &OffImm);
459 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
460 SDValue &OffImm);
461 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
462 SDValue &OffImm);
463 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
464 SDValue &Offset, SDValue &SignExtend,
465 SDValue &DoShift);
466 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
467 SDValue &Offset, SDValue &SignExtend,
468 SDValue &DoShift);
469 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
470 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
471 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
472 SDValue &Offset, SDValue &SignExtend);
473
474 template<unsigned RegWidth>
475 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
476 return SelectCVTFixedPosOperand(N, FixedPos, Width: RegWidth);
477 }
478
479 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
480
481 template<unsigned RegWidth>
482 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
483 return SelectCVTFixedPosRecipOperand(N, FixedPos, Width: RegWidth);
484 }
485
486 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
487 unsigned Width);
488
489 bool SelectCMP_SWAP(SDNode *N);
490
491 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
492 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
493 bool Negate);
494 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
495 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
496
497 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
498 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
499 bool AllowSaturation, SDValue &Imm);
500
501 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
502 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
503 SDValue &Offset);
504 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
505 SDValue &Offset, unsigned Scale = 1);
506
507 bool SelectAllActivePredicate(SDValue N);
508 bool SelectAnyPredicate(SDValue N);
509};
510} // end anonymous namespace
511
512char AArch64DAGToDAGISel::ID = 0;
513
514INITIALIZE_PASS(AArch64DAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
515
516/// isIntImmediate - This method tests to see if the node is a constant
517/// operand. If so Imm will receive the 32-bit value.
518static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
519 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(Val: N)) {
520 Imm = C->getZExtValue();
521 return true;
522 }
523 return false;
524}
525
526// isIntImmediate - This method tests to see if a constant operand.
527// If so Imm will receive the value.
528static bool isIntImmediate(SDValue N, uint64_t &Imm) {
529 return isIntImmediate(N: N.getNode(), Imm);
530}
531
532// isOpcWithIntImmediate - This method tests to see if the node is a specific
533// opcode and that it has a immediate integer right operand.
534// If so Imm will receive the 32 bit value.
535static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
536 uint64_t &Imm) {
537 return N->getOpcode() == Opc &&
538 isIntImmediate(N: N->getOperand(Num: 1).getNode(), Imm);
539}
540
541// isIntImmediateEq - This method tests to see if N is a constant operand that
542// is equivalent to 'ImmExpected'.
543#ifndef NDEBUG
544static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
545 uint64_t Imm;
546 if (!isIntImmediate(N: N.getNode(), Imm))
547 return false;
548 return Imm == ImmExpected;
549}
550#endif
551
552bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
553 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
554 std::vector<SDValue> &OutOps) {
555 switch(ConstraintID) {
556 default:
557 llvm_unreachable("Unexpected asm memory constraint");
558 case InlineAsm::ConstraintCode::m:
559 case InlineAsm::ConstraintCode::o:
560 case InlineAsm::ConstraintCode::Q:
561 // We need to make sure that this one operand does not end up in XZR, thus
562 // require the address to be in a PointerRegClass register.
563 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
564 const TargetRegisterClass *TRC = TRI->getPointerRegClass(MF: *MF);
565 SDLoc dl(Op);
566 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
567 SDValue NewOp =
568 SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS,
569 dl, VT: Op.getValueType(),
570 Op1: Op, Op2: RC), 0);
571 OutOps.push_back(x: NewOp);
572 return false;
573 }
574 return true;
575}
576
577/// SelectArithImmed - Select an immediate value that can be represented as
578/// a 12-bit value shifted left by either 0 or 12. If so, return true with
579/// Val set to the 12-bit value and Shift set to the shifter operand.
580bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
581 SDValue &Shift) {
582 // This function is called from the addsub_shifted_imm ComplexPattern,
583 // which lists [imm] as the list of opcode it's interested in, however
584 // we still need to check whether the operand is actually an immediate
585 // here because the ComplexPattern opcode list is only used in
586 // root-level opcode matching.
587 if (!isa<ConstantSDNode>(Val: N.getNode()))
588 return false;
589
590 uint64_t Immed = N.getNode()->getAsZExtVal();
591 unsigned ShiftAmt;
592
593 if (Immed >> 12 == 0) {
594 ShiftAmt = 0;
595 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
596 ShiftAmt = 12;
597 Immed = Immed >> 12;
598 } else
599 return false;
600
601 unsigned ShVal = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftAmt);
602 SDLoc dl(N);
603 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
604 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
605 return true;
606}
607
608/// SelectNegArithImmed - As above, but negates the value before trying to
609/// select it.
610bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
611 SDValue &Shift) {
612 // This function is called from the addsub_shifted_imm ComplexPattern,
613 // which lists [imm] as the list of opcode it's interested in, however
614 // we still need to check whether the operand is actually an immediate
615 // here because the ComplexPattern opcode list is only used in
616 // root-level opcode matching.
617 if (!isa<ConstantSDNode>(Val: N.getNode()))
618 return false;
619
620 // The immediate operand must be a 24-bit zero-extended immediate.
621 uint64_t Immed = N.getNode()->getAsZExtVal();
622
623 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
624 // have the opposite effect on the C flag, so this pattern mustn't match under
625 // those circumstances.
626 if (Immed == 0)
627 return false;
628
629 if (N.getValueType() == MVT::i32)
630 Immed = ~((uint32_t)Immed) + 1;
631 else
632 Immed = ~Immed + 1ULL;
633 if (Immed & 0xFFFFFFFFFF000000ULL)
634 return false;
635
636 Immed &= 0xFFFFFFULL;
637 return SelectArithImmed(N: CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
638 Shift);
639}
640
641/// getShiftTypeForNode - Translate a shift node to the corresponding
642/// ShiftType value.
643static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
644 switch (N.getOpcode()) {
645 default:
646 return AArch64_AM::InvalidShiftExtend;
647 case ISD::SHL:
648 return AArch64_AM::LSL;
649 case ISD::SRL:
650 return AArch64_AM::LSR;
651 case ISD::SRA:
652 return AArch64_AM::ASR;
653 case ISD::ROTR:
654 return AArch64_AM::ROR;
655 }
656}
657
658/// Determine whether it is worth it to fold SHL into the addressing
659/// mode.
660static bool isWorthFoldingSHL(SDValue V) {
661 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
662 // It is worth folding logical shift of up to three places.
663 auto *CSD = dyn_cast<ConstantSDNode>(Val: V.getOperand(i: 1));
664 if (!CSD)
665 return false;
666 unsigned ShiftVal = CSD->getZExtValue();
667 if (ShiftVal > 3)
668 return false;
669
670 // Check if this particular node is reused in any non-memory related
671 // operation. If yes, do not try to fold this node into the address
672 // computation, since the computation will be kept.
673 const SDNode *Node = V.getNode();
674 for (SDNode *UI : Node->uses())
675 if (!isa<MemSDNode>(Val: *UI))
676 for (SDNode *UII : UI->uses())
677 if (!isa<MemSDNode>(Val: *UII))
678 return false;
679 return true;
680}
681
682/// Determine whether it is worth to fold V into an extended register addressing
683/// mode.
684bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
685 // Trivial if we are optimizing for code size or if there is only
686 // one use of the value.
687 if (CurDAG->shouldOptForSize() || V.hasOneUse())
688 return true;
689
690 // If a subtarget has a slow shift, folding a shift into multiple loads
691 // costs additional micro-ops.
692 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
693 return false;
694
695 // Check whether we're going to emit the address arithmetic anyway because
696 // it's used by a non-address operation.
697 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
698 return true;
699 if (V.getOpcode() == ISD::ADD) {
700 const SDValue LHS = V.getOperand(i: 0);
701 const SDValue RHS = V.getOperand(i: 1);
702 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(V: LHS))
703 return true;
704 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(V: RHS))
705 return true;
706 }
707
708 // It hurts otherwise, since the value will be reused.
709 return false;
710}
711
712/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
713/// to select more shifted register
714bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
715 SDValue &Shift) {
716 EVT VT = N.getValueType();
717 if (VT != MVT::i32 && VT != MVT::i64)
718 return false;
719
720 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
721 return false;
722 SDValue LHS = N.getOperand(i: 0);
723 if (!LHS->hasOneUse())
724 return false;
725
726 unsigned LHSOpcode = LHS->getOpcode();
727 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
728 return false;
729
730 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(Val: LHS.getOperand(i: 1));
731 if (!ShiftAmtNode)
732 return false;
733
734 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
735 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
736 if (!RHSC)
737 return false;
738
739 APInt AndMask = RHSC->getAPIntValue();
740 unsigned LowZBits, MaskLen;
741 if (!AndMask.isShiftedMask(MaskIdx&: LowZBits, MaskLen))
742 return false;
743
744 unsigned BitWidth = N.getValueSizeInBits();
745 SDLoc DL(LHS);
746 uint64_t NewShiftC;
747 unsigned NewShiftOp;
748 if (LHSOpcode == ISD::SHL) {
749 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
750 // BitWidth != LowZBits + MaskLen doesn't match the pattern
751 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
752 return false;
753
754 NewShiftC = LowZBits - ShiftAmtC;
755 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
756 } else {
757 if (LowZBits == 0)
758 return false;
759
760 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
761 NewShiftC = LowZBits + ShiftAmtC;
762 if (NewShiftC >= BitWidth)
763 return false;
764
765 // SRA need all high bits
766 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
767 return false;
768
769 // SRL high bits can be 0 or 1
770 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
771 return false;
772
773 if (LHSOpcode == ISD::SRL)
774 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
775 else
776 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
777 }
778
779 assert(NewShiftC < BitWidth && "Invalid shift amount");
780 SDValue NewShiftAmt = CurDAG->getTargetConstant(Val: NewShiftC, DL, VT);
781 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(Val: BitWidth - 1, DL, VT);
782 Reg = SDValue(CurDAG->getMachineNode(Opcode: NewShiftOp, dl: DL, VT, Op1: LHS->getOperand(Num: 0),
783 Op2: NewShiftAmt, Op3: BitWidthMinus1),
784 0);
785 unsigned ShVal = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: LowZBits);
786 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
787 return true;
788}
789
790/// getExtendTypeForNode - Translate an extend node to the corresponding
791/// ExtendType value.
792static AArch64_AM::ShiftExtendType
793getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
794 if (N.getOpcode() == ISD::SIGN_EXTEND ||
795 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
796 EVT SrcVT;
797 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
798 SrcVT = cast<VTSDNode>(Val: N.getOperand(i: 1))->getVT();
799 else
800 SrcVT = N.getOperand(i: 0).getValueType();
801
802 if (!IsLoadStore && SrcVT == MVT::i8)
803 return AArch64_AM::SXTB;
804 else if (!IsLoadStore && SrcVT == MVT::i16)
805 return AArch64_AM::SXTH;
806 else if (SrcVT == MVT::i32)
807 return AArch64_AM::SXTW;
808 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
809
810 return AArch64_AM::InvalidShiftExtend;
811 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
812 N.getOpcode() == ISD::ANY_EXTEND) {
813 EVT SrcVT = N.getOperand(i: 0).getValueType();
814 if (!IsLoadStore && SrcVT == MVT::i8)
815 return AArch64_AM::UXTB;
816 else if (!IsLoadStore && SrcVT == MVT::i16)
817 return AArch64_AM::UXTH;
818 else if (SrcVT == MVT::i32)
819 return AArch64_AM::UXTW;
820 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
821
822 return AArch64_AM::InvalidShiftExtend;
823 } else if (N.getOpcode() == ISD::AND) {
824 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
825 if (!CSD)
826 return AArch64_AM::InvalidShiftExtend;
827 uint64_t AndMask = CSD->getZExtValue();
828
829 switch (AndMask) {
830 default:
831 return AArch64_AM::InvalidShiftExtend;
832 case 0xFF:
833 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
834 case 0xFFFF:
835 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
836 case 0xFFFFFFFF:
837 return AArch64_AM::UXTW;
838 }
839 }
840
841 return AArch64_AM::InvalidShiftExtend;
842}
843
844/// Determine whether it is worth to fold V into an extended register of an
845/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
846/// instruction, and the shift should be treated as worth folding even if has
847/// multiple uses.
848bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
849 // Trivial if we are optimizing for code size or if there is only
850 // one use of the value.
851 if (CurDAG->shouldOptForSize() || V.hasOneUse())
852 return true;
853
854 // If a subtarget has a fastpath LSL we can fold a logical shift into
855 // the add/sub and save a cycle.
856 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
857 V.getConstantOperandVal(i: 1) <= 4 &&
858 getExtendTypeForNode(N: V.getOperand(i: 0)) == AArch64_AM::InvalidShiftExtend)
859 return true;
860
861 // It hurts otherwise, since the value will be reused.
862 return false;
863}
864
865/// SelectShiftedRegister - Select a "shifted register" operand. If the value
866/// is not shifted, set the Shift operand to default of "LSL 0". The logical
867/// instructions allow the shifted register to be rotated, but the arithmetic
868/// instructions do not. The AllowROR parameter specifies whether ROR is
869/// supported.
870bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
871 SDValue &Reg, SDValue &Shift) {
872 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
873 return true;
874
875 AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
876 if (ShType == AArch64_AM::InvalidShiftExtend)
877 return false;
878 if (!AllowROR && ShType == AArch64_AM::ROR)
879 return false;
880
881 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
882 unsigned BitSize = N.getValueSizeInBits();
883 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
884 unsigned ShVal = AArch64_AM::getShifterImm(ST: ShType, Imm: Val);
885
886 Reg = N.getOperand(i: 0);
887 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
888 return isWorthFoldingALU(V: N, LSL: true);
889 }
890
891 return false;
892}
893
894/// Instructions that accept extend modifiers like UXTW expect the register
895/// being extended to be a GPR32, but the incoming DAG might be acting on a
896/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
897/// this is the case.
898static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
899 if (N.getValueType() == MVT::i32)
900 return N;
901
902 SDLoc dl(N);
903 return CurDAG->getTargetExtractSubreg(AArch64::SRIdx: sub_32, DL: dl, MVT::VT: i32, Operand: N);
904}
905
906// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
907template<signed Low, signed High, signed Scale>
908bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
909 if (!isa<ConstantSDNode>(Val: N))
910 return false;
911
912 int64_t MulImm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
913 if ((MulImm % std::abs(x: Scale)) == 0) {
914 int64_t RDVLImm = MulImm / Scale;
915 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
916 Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
917 return true;
918 }
919 }
920
921 return false;
922}
923
924/// SelectArithExtendedRegister - Select a "extended register" operand. This
925/// operand folds in an extend followed by an optional left shift.
926bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
927 SDValue &Shift) {
928 unsigned ShiftVal = 0;
929 AArch64_AM::ShiftExtendType Ext;
930
931 if (N.getOpcode() == ISD::SHL) {
932 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
933 if (!CSD)
934 return false;
935 ShiftVal = CSD->getZExtValue();
936 if (ShiftVal > 4)
937 return false;
938
939 Ext = getExtendTypeForNode(N: N.getOperand(i: 0));
940 if (Ext == AArch64_AM::InvalidShiftExtend)
941 return false;
942
943 Reg = N.getOperand(i: 0).getOperand(i: 0);
944 } else {
945 Ext = getExtendTypeForNode(N);
946 if (Ext == AArch64_AM::InvalidShiftExtend)
947 return false;
948
949 Reg = N.getOperand(i: 0);
950
951 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
952 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
953 auto isDef32 = [](SDValue N) {
954 unsigned Opc = N.getOpcode();
955 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
956 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
957 Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
958 Opc != ISD::FREEZE;
959 };
960 if (Ext == AArch64_AM::UXTW && Reg->getValueType(ResNo: 0).getSizeInBits() == 32 &&
961 isDef32(Reg))
962 return false;
963 }
964
965 // AArch64 mandates that the RHS of the operation must use the smallest
966 // register class that could contain the size being extended from. Thus,
967 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
968 // there might not be an actual 32-bit value in the program. We can
969 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
970 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
971 Reg = narrowIfNeeded(CurDAG, N: Reg);
972 Shift = CurDAG->getTargetConstant(getArithExtendImm(ET: Ext, Imm: ShiftVal), SDLoc(N),
973 MVT::i32);
974 return isWorthFoldingALU(V: N);
975}
976
977/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
978/// operand is refered by the instructions have SP operand
979bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
980 SDValue &Shift) {
981 unsigned ShiftVal = 0;
982 AArch64_AM::ShiftExtendType Ext;
983
984 if (N.getOpcode() != ISD::SHL)
985 return false;
986
987 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
988 if (!CSD)
989 return false;
990 ShiftVal = CSD->getZExtValue();
991 if (ShiftVal > 4)
992 return false;
993
994 Ext = AArch64_AM::UXTX;
995 Reg = N.getOperand(i: 0);
996 Shift = CurDAG->getTargetConstant(getArithExtendImm(ET: Ext, Imm: ShiftVal), SDLoc(N),
997 MVT::i32);
998 return isWorthFoldingALU(V: N);
999}
1000
1001/// If there's a use of this ADDlow that's not itself a load/store then we'll
1002/// need to create a real ADD instruction from it anyway and there's no point in
1003/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1004/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1005/// leads to duplicated ADRP instructions.
1006static bool isWorthFoldingADDlow(SDValue N) {
1007 for (auto *Use : N->uses()) {
1008 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
1009 Use->getOpcode() != ISD::ATOMIC_LOAD &&
1010 Use->getOpcode() != ISD::ATOMIC_STORE)
1011 return false;
1012
1013 // ldar and stlr have much more restrictive addressing modes (just a
1014 // register).
1015 if (isStrongerThanMonotonic(AO: cast<MemSDNode>(Val: Use)->getSuccessOrdering()))
1016 return false;
1017 }
1018
1019 return true;
1020}
1021
1022/// Check if the immediate offset is valid as a scaled immediate.
1023static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1024 unsigned Size) {
1025 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1026 Offset < (Range << Log2_32(Value: Size)))
1027 return true;
1028 return false;
1029}
1030
1031/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1032/// immediate" address. The "Size" argument is the size in bytes of the memory
1033/// reference, which determines the scale.
1034bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1035 unsigned BW, unsigned Size,
1036 SDValue &Base,
1037 SDValue &OffImm) {
1038 SDLoc dl(N);
1039 const DataLayout &DL = CurDAG->getDataLayout();
1040 const TargetLowering *TLI = getTargetLowering();
1041 if (N.getOpcode() == ISD::FrameIndex) {
1042 int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1043 Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1044 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1045 return true;
1046 }
1047
1048 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1049 // selected here doesn't support labels/immediates, only base+offset.
1050 if (CurDAG->isBaseWithConstantOffset(Op: N)) {
1051 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
1052 if (IsSignedImm) {
1053 int64_t RHSC = RHS->getSExtValue();
1054 unsigned Scale = Log2_32(Value: Size);
1055 int64_t Range = 0x1LL << (BW - 1);
1056
1057 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1058 RHSC < (Range << Scale)) {
1059 Base = N.getOperand(i: 0);
1060 if (Base.getOpcode() == ISD::FrameIndex) {
1061 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1062 Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1063 }
1064 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1065 return true;
1066 }
1067 } else {
1068 // unsigned Immediate
1069 uint64_t RHSC = RHS->getZExtValue();
1070 unsigned Scale = Log2_32(Value: Size);
1071 uint64_t Range = 0x1ULL << BW;
1072
1073 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1074 Base = N.getOperand(i: 0);
1075 if (Base.getOpcode() == ISD::FrameIndex) {
1076 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1077 Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1078 }
1079 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1080 return true;
1081 }
1082 }
1083 }
1084 }
1085 // Base only. The address will be materialized into a register before
1086 // the memory is accessed.
1087 // add x0, Xbase, #offset
1088 // stp x1, x2, [x0]
1089 Base = N;
1090 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1091 return true;
1092}
1093
1094/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1095/// immediate" address. The "Size" argument is the size in bytes of the memory
1096/// reference, which determines the scale.
1097bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1098 SDValue &Base, SDValue &OffImm) {
1099 SDLoc dl(N);
1100 const DataLayout &DL = CurDAG->getDataLayout();
1101 const TargetLowering *TLI = getTargetLowering();
1102 if (N.getOpcode() == ISD::FrameIndex) {
1103 int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1104 Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1105 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1106 return true;
1107 }
1108
1109 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1110 GlobalAddressSDNode *GAN =
1111 dyn_cast<GlobalAddressSDNode>(Val: N.getOperand(i: 1).getNode());
1112 Base = N.getOperand(i: 0);
1113 OffImm = N.getOperand(i: 1);
1114 if (!GAN)
1115 return true;
1116
1117 if (GAN->getOffset() % Size == 0 &&
1118 GAN->getGlobal()->getPointerAlignment(DL) >= Size)
1119 return true;
1120 }
1121
1122 if (CurDAG->isBaseWithConstantOffset(Op: N)) {
1123 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
1124 int64_t RHSC = (int64_t)RHS->getZExtValue();
1125 unsigned Scale = Log2_32(Value: Size);
1126 if (isValidAsScaledImmediate(Offset: RHSC, Range: 0x1000, Size)) {
1127 Base = N.getOperand(i: 0);
1128 if (Base.getOpcode() == ISD::FrameIndex) {
1129 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1130 Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1131 }
1132 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1133 return true;
1134 }
1135 }
1136 }
1137
1138 // Before falling back to our general case, check if the unscaled
1139 // instructions can handle this. If so, that's preferable.
1140 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1141 return false;
1142
1143 // Base only. The address will be materialized into a register before
1144 // the memory is accessed.
1145 // add x0, Xbase, #offset
1146 // ldr x0, [x0]
1147 Base = N;
1148 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1149 return true;
1150}
1151
1152/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1153/// immediate" address. This should only match when there is an offset that
1154/// is not valid for a scaled immediate addressing mode. The "Size" argument
1155/// is the size in bytes of the memory reference, which is needed here to know
1156/// what is valid for a scaled immediate.
1157bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1158 SDValue &Base,
1159 SDValue &OffImm) {
1160 if (!CurDAG->isBaseWithConstantOffset(Op: N))
1161 return false;
1162 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
1163 int64_t RHSC = RHS->getSExtValue();
1164 if (RHSC >= -256 && RHSC < 256) {
1165 Base = N.getOperand(i: 0);
1166 if (Base.getOpcode() == ISD::FrameIndex) {
1167 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1168 const TargetLowering *TLI = getTargetLowering();
1169 Base = CurDAG->getTargetFrameIndex(
1170 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1171 }
1172 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1173 return true;
1174 }
1175 }
1176 return false;
1177}
1178
1179static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
1180 SDLoc dl(N);
1181 SDValue ImpDef = SDValue(
1182 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1183 return CurDAG->getTargetInsertSubreg(AArch64::SRIdx: sub_32, DL: dl, MVT::VT: i64, Operand: ImpDef,
1184 Subreg: N);
1185}
1186
1187/// Check if the given SHL node (\p N), can be used to form an
1188/// extended register for an addressing mode.
1189bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1190 bool WantExtend, SDValue &Offset,
1191 SDValue &SignExtend) {
1192 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1193 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
1194 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1195 return false;
1196
1197 SDLoc dl(N);
1198 if (WantExtend) {
1199 AArch64_AM::ShiftExtendType Ext =
1200 getExtendTypeForNode(N: N.getOperand(i: 0), IsLoadStore: true);
1201 if (Ext == AArch64_AM::InvalidShiftExtend)
1202 return false;
1203
1204 Offset = narrowIfNeeded(CurDAG, N: N.getOperand(i: 0).getOperand(i: 0));
1205 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1206 MVT::i32);
1207 } else {
1208 Offset = N.getOperand(i: 0);
1209 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1210 }
1211
1212 unsigned LegalShiftVal = Log2_32(Value: Size);
1213 unsigned ShiftVal = CSD->getZExtValue();
1214
1215 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1216 return false;
1217
1218 return isWorthFoldingAddr(V: N, Size);
1219}
1220
1221bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1222 SDValue &Base, SDValue &Offset,
1223 SDValue &SignExtend,
1224 SDValue &DoShift) {
1225 if (N.getOpcode() != ISD::ADD)
1226 return false;
1227 SDValue LHS = N.getOperand(i: 0);
1228 SDValue RHS = N.getOperand(i: 1);
1229 SDLoc dl(N);
1230
1231 // We don't want to match immediate adds here, because they are better lowered
1232 // to the register-immediate addressing modes.
1233 if (isa<ConstantSDNode>(Val: LHS) || isa<ConstantSDNode>(Val: RHS))
1234 return false;
1235
1236 // Check if this particular node is reused in any non-memory related
1237 // operation. If yes, do not try to fold this node into the address
1238 // computation, since the computation will be kept.
1239 const SDNode *Node = N.getNode();
1240 for (SDNode *UI : Node->uses()) {
1241 if (!isa<MemSDNode>(Val: *UI))
1242 return false;
1243 }
1244
1245 // Remember if it is worth folding N when it produces extended register.
1246 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(V: N, Size);
1247
1248 // Try to match a shifted extend on the RHS.
1249 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1250 SelectExtendedSHL(N: RHS, Size, WantExtend: true, Offset, SignExtend)) {
1251 Base = LHS;
1252 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1253 return true;
1254 }
1255
1256 // Try to match a shifted extend on the LHS.
1257 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1258 SelectExtendedSHL(N: LHS, Size, WantExtend: true, Offset, SignExtend)) {
1259 Base = RHS;
1260 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1261 return true;
1262 }
1263
1264 // There was no shift, whatever else we find.
1265 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1266
1267 AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
1268 // Try to match an unshifted extend on the LHS.
1269 if (IsExtendedRegisterWorthFolding &&
1270 (Ext = getExtendTypeForNode(N: LHS, IsLoadStore: true)) !=
1271 AArch64_AM::InvalidShiftExtend) {
1272 Base = RHS;
1273 Offset = narrowIfNeeded(CurDAG, N: LHS.getOperand(i: 0));
1274 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1275 MVT::i32);
1276 if (isWorthFoldingAddr(V: LHS, Size))
1277 return true;
1278 }
1279
1280 // Try to match an unshifted extend on the RHS.
1281 if (IsExtendedRegisterWorthFolding &&
1282 (Ext = getExtendTypeForNode(N: RHS, IsLoadStore: true)) !=
1283 AArch64_AM::InvalidShiftExtend) {
1284 Base = LHS;
1285 Offset = narrowIfNeeded(CurDAG, N: RHS.getOperand(i: 0));
1286 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1287 MVT::i32);
1288 if (isWorthFoldingAddr(V: RHS, Size))
1289 return true;
1290 }
1291
1292 return false;
1293}
1294
1295// Check if the given immediate is preferred by ADD. If an immediate can be
1296// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1297// encoded by one MOVZ, return true.
1298static bool isPreferredADD(int64_t ImmOff) {
1299 // Constant in [0x0, 0xfff] can be encoded in ADD.
1300 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1301 return true;
1302 // Check if it can be encoded in an "ADD LSL #12".
1303 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1304 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1305 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1306 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1307 return false;
1308}
1309
1310bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1311 SDValue &Base, SDValue &Offset,
1312 SDValue &SignExtend,
1313 SDValue &DoShift) {
1314 if (N.getOpcode() != ISD::ADD)
1315 return false;
1316 SDValue LHS = N.getOperand(i: 0);
1317 SDValue RHS = N.getOperand(i: 1);
1318 SDLoc DL(N);
1319
1320 // Check if this particular node is reused in any non-memory related
1321 // operation. If yes, do not try to fold this node into the address
1322 // computation, since the computation will be kept.
1323 const SDNode *Node = N.getNode();
1324 for (SDNode *UI : Node->uses()) {
1325 if (!isa<MemSDNode>(Val: *UI))
1326 return false;
1327 }
1328
1329 // Watch out if RHS is a wide immediate, it can not be selected into
1330 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1331 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1332 // instructions like:
1333 // MOV X0, WideImmediate
1334 // ADD X1, BaseReg, X0
1335 // LDR X2, [X1, 0]
1336 // For such situation, using [BaseReg, XReg] addressing mode can save one
1337 // ADD/SUB:
1338 // MOV X0, WideImmediate
1339 // LDR X2, [BaseReg, X0]
1340 if (isa<ConstantSDNode>(Val: RHS)) {
1341 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1342 // Skip the immediate can be selected by load/store addressing mode.
1343 // Also skip the immediate can be encoded by a single ADD (SUB is also
1344 // checked by using -ImmOff).
1345 if (isValidAsScaledImmediate(Offset: ImmOff, Range: 0x1000, Size) ||
1346 isPreferredADD(ImmOff) || isPreferredADD(ImmOff: -ImmOff))
1347 return false;
1348
1349 SDValue Ops[] = { RHS };
1350 SDNode *MOVI =
1351 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1352 SDValue MOVIV = SDValue(MOVI, 0);
1353 // This ADD of two X register will be selected into [Reg+Reg] mode.
1354 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1355 }
1356
1357 // Remember if it is worth folding N when it produces extended register.
1358 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(V: N, Size);
1359
1360 // Try to match a shifted extend on the RHS.
1361 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1362 SelectExtendedSHL(N: RHS, Size, WantExtend: false, Offset, SignExtend)) {
1363 Base = LHS;
1364 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1365 return true;
1366 }
1367
1368 // Try to match a shifted extend on the LHS.
1369 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1370 SelectExtendedSHL(N: LHS, Size, WantExtend: false, Offset, SignExtend)) {
1371 Base = RHS;
1372 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1373 return true;
1374 }
1375
1376 // Match any non-shifted, non-extend, non-immediate add expression.
1377 Base = LHS;
1378 Offset = RHS;
1379 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1380 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1381 // Reg1 + Reg2 is free: no check needed.
1382 return true;
1383}
1384
1385SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1386 static const unsigned RegClassIDs[] = {
1387 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1388 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1389 AArch64::dsub2, AArch64::dsub3};
1390
1391 return createTuple(Vecs: Regs, RegClassIDs, SubRegs);
1392}
1393
1394SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1395 static const unsigned RegClassIDs[] = {
1396 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1397 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1398 AArch64::qsub2, AArch64::qsub3};
1399
1400 return createTuple(Vecs: Regs, RegClassIDs, SubRegs);
1401}
1402
1403SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1404 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1405 AArch64::ZPR3RegClassID,
1406 AArch64::ZPR4RegClassID};
1407 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1408 AArch64::zsub2, AArch64::zsub3};
1409
1410 return createTuple(Vecs: Regs, RegClassIDs, SubRegs);
1411}
1412
1413SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1414 assert(Regs.size() == 2 || Regs.size() == 4);
1415
1416 // The createTuple interface requires 3 RegClassIDs for each possible
1417 // tuple type even though we only have them for ZPR2 and ZPR4.
1418 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1419 AArch64::ZPR4Mul4RegClassID};
1420 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1421 AArch64::zsub2, AArch64::zsub3};
1422 return createTuple(Vecs: Regs, RegClassIDs, SubRegs);
1423}
1424
1425SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1426 const unsigned RegClassIDs[],
1427 const unsigned SubRegs[]) {
1428 // There's no special register-class for a vector-list of 1 element: it's just
1429 // a vector.
1430 if (Regs.size() == 1)
1431 return Regs[0];
1432
1433 assert(Regs.size() >= 2 && Regs.size() <= 4);
1434
1435 SDLoc DL(Regs[0]);
1436
1437 SmallVector<SDValue, 4> Ops;
1438
1439 // First operand of REG_SEQUENCE is the desired RegClass.
1440 Ops.push_back(
1441 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1442
1443 // Then we get pairs of source & subregister-position for the components.
1444 for (unsigned i = 0; i < Regs.size(); ++i) {
1445 Ops.push_back(Elt: Regs[i]);
1446 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1447 }
1448
1449 SDNode *N =
1450 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1451 return SDValue(N, 0);
1452}
1453
1454void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1455 bool isExt) {
1456 SDLoc dl(N);
1457 EVT VT = N->getValueType(ResNo: 0);
1458
1459 unsigned ExtOff = isExt;
1460
1461 // Form a REG_SEQUENCE to force register allocation.
1462 unsigned Vec0Off = ExtOff + 1;
1463 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1464 N->op_begin() + Vec0Off + NumVecs);
1465 SDValue RegSeq = createQTuple(Regs);
1466
1467 SmallVector<SDValue, 6> Ops;
1468 if (isExt)
1469 Ops.push_back(Elt: N->getOperand(Num: 1));
1470 Ops.push_back(Elt: RegSeq);
1471 Ops.push_back(Elt: N->getOperand(Num: NumVecs + ExtOff + 1));
1472 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT, Ops));
1473}
1474
1475bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1476 LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1477 if (LD->isUnindexed())
1478 return false;
1479 EVT VT = LD->getMemoryVT();
1480 EVT DstVT = N->getValueType(ResNo: 0);
1481 ISD::MemIndexedMode AM = LD->getAddressingMode();
1482 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1483
1484 // We're not doing validity checking here. That was done when checking
1485 // if we should mark the load as indexed or not. We're just selecting
1486 // the right instruction.
1487 unsigned Opcode = 0;
1488
1489 ISD::LoadExtType ExtType = LD->getExtensionType();
1490 bool InsertTo64 = false;
1491 if (VT == MVT::i64)
1492 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1493 else if (VT == MVT::i32) {
1494 if (ExtType == ISD::NON_EXTLOAD)
1495 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1496 else if (ExtType == ISD::SEXTLOAD)
1497 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1498 else {
1499 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1500 InsertTo64 = true;
1501 // The result of the load is only i32. It's the subreg_to_reg that makes
1502 // it into an i64.
1503 DstVT = MVT::i32;
1504 }
1505 } else if (VT == MVT::i16) {
1506 if (ExtType == ISD::SEXTLOAD) {
1507 if (DstVT == MVT::i64)
1508 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1509 else
1510 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1511 } else {
1512 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1513 InsertTo64 = DstVT == MVT::i64;
1514 // The result of the load is only i32. It's the subreg_to_reg that makes
1515 // it into an i64.
1516 DstVT = MVT::i32;
1517 }
1518 } else if (VT == MVT::i8) {
1519 if (ExtType == ISD::SEXTLOAD) {
1520 if (DstVT == MVT::i64)
1521 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1522 else
1523 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1524 } else {
1525 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1526 InsertTo64 = DstVT == MVT::i64;
1527 // The result of the load is only i32. It's the subreg_to_reg that makes
1528 // it into an i64.
1529 DstVT = MVT::i32;
1530 }
1531 } else if (VT == MVT::f16) {
1532 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1533 } else if (VT == MVT::bf16) {
1534 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1535 } else if (VT == MVT::f32) {
1536 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1537 } else if (VT == MVT::f64 || VT.is64BitVector()) {
1538 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1539 } else if (VT.is128BitVector()) {
1540 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1541 } else
1542 return false;
1543 SDValue Chain = LD->getChain();
1544 SDValue Base = LD->getBasePtr();
1545 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(Val: LD->getOffset());
1546 int OffsetVal = (int)OffsetOp->getZExtValue();
1547 SDLoc dl(N);
1548 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1549 SDValue Ops[] = { Base, Offset, Chain };
1550 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1551 MVT::Other, Ops);
1552
1553 // Transfer memoperands.
1554 MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
1555 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Res), NewMemRefs: {MemOp});
1556
1557 // Either way, we're replacing the node, so tell the caller that.
1558 SDValue LoadedVal = SDValue(Res, 1);
1559 if (InsertTo64) {
1560 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1561 LoadedVal =
1562 SDValue(CurDAG->getMachineNode(
1563 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1564 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1565 SubReg),
1566 0);
1567 }
1568
1569 ReplaceUses(F: SDValue(N, 0), T: LoadedVal);
1570 ReplaceUses(F: SDValue(N, 1), T: SDValue(Res, 0));
1571 ReplaceUses(F: SDValue(N, 2), T: SDValue(Res, 2));
1572 CurDAG->RemoveDeadNode(N);
1573 return true;
1574}
1575
1576void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1577 unsigned SubRegIdx) {
1578 SDLoc dl(N);
1579 EVT VT = N->getValueType(ResNo: 0);
1580 SDValue Chain = N->getOperand(Num: 0);
1581
1582 SDValue Ops[] = {N->getOperand(Num: 2), // Mem operand;
1583 Chain};
1584
1585 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1586
1587 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1588 SDValue SuperReg = SDValue(Ld, 0);
1589 for (unsigned i = 0; i < NumVecs; ++i)
1590 ReplaceUses(F: SDValue(N, i),
1591 T: CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx + i, DL: dl, VT, Operand: SuperReg));
1592
1593 ReplaceUses(F: SDValue(N, NumVecs), T: SDValue(Ld, 1));
1594
1595 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1596 // because it's too simple to have needed special treatment during lowering.
1597 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Val: N)) {
1598 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1599 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Ld), NewMemRefs: {MemOp});
1600 }
1601
1602 CurDAG->RemoveDeadNode(N);
1603}
1604
1605void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1606 unsigned Opc, unsigned SubRegIdx) {
1607 SDLoc dl(N);
1608 EVT VT = N->getValueType(ResNo: 0);
1609 SDValue Chain = N->getOperand(Num: 0);
1610
1611 SDValue Ops[] = {N->getOperand(Num: 1), // Mem operand
1612 N->getOperand(Num: 2), // Incremental
1613 Chain};
1614
1615 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1616 MVT::Untyped, MVT::Other};
1617
1618 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1619
1620 // Update uses of write back register
1621 ReplaceUses(F: SDValue(N, NumVecs), T: SDValue(Ld, 0));
1622
1623 // Update uses of vector list
1624 SDValue SuperReg = SDValue(Ld, 1);
1625 if (NumVecs == 1)
1626 ReplaceUses(F: SDValue(N, 0), T: SuperReg);
1627 else
1628 for (unsigned i = 0; i < NumVecs; ++i)
1629 ReplaceUses(F: SDValue(N, i),
1630 T: CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx + i, DL: dl, VT, Operand: SuperReg));
1631
1632 // Update the chain
1633 ReplaceUses(F: SDValue(N, NumVecs + 1), T: SDValue(Ld, 2));
1634 CurDAG->RemoveDeadNode(N);
1635}
1636
1637/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1638/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1639/// new Base and an SDValue representing the new offset.
1640std::tuple<unsigned, SDValue, SDValue>
1641AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1642 unsigned Opc_ri,
1643 const SDValue &OldBase,
1644 const SDValue &OldOffset,
1645 unsigned Scale) {
1646 SDValue NewBase = OldBase;
1647 SDValue NewOffset = OldOffset;
1648 // Detect a possible Reg+Imm addressing mode.
1649 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1650 Root: N, N: OldBase, Base&: NewBase, OffImm&: NewOffset);
1651
1652 // Detect a possible reg+reg addressing mode, but only if we haven't already
1653 // detected a Reg+Imm one.
1654 const bool IsRegReg =
1655 !IsRegImm && SelectSVERegRegAddrMode(N: OldBase, Scale, Base&: NewBase, Offset&: NewOffset);
1656
1657 // Select the instruction.
1658 return std::make_tuple(args&: IsRegReg ? Opc_rr : Opc_ri, args&: NewBase, args&: NewOffset);
1659}
1660
1661enum class SelectTypeKind {
1662 Int1 = 0,
1663 Int = 1,
1664 FP = 2,
1665 AnyType = 3,
1666};
1667
1668/// This function selects an opcode from a list of opcodes, which is
1669/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1670/// element types, in this order.
1671template <SelectTypeKind Kind>
1672static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1673 // Only match scalable vector VTs
1674 if (!VT.isScalableVector())
1675 return 0;
1676
1677 EVT EltVT = VT.getVectorElementType();
1678 switch (Kind) {
1679 case SelectTypeKind::AnyType:
1680 break;
1681 case SelectTypeKind::Int:
1682 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1683 EltVT != MVT::i64)
1684 return 0;
1685 break;
1686 case SelectTypeKind::Int1:
1687 if (EltVT != MVT::i1)
1688 return 0;
1689 break;
1690 case SelectTypeKind::FP:
1691 if (EltVT != MVT::f16 && EltVT != MVT::f32 && EltVT != MVT::f64)
1692 return 0;
1693 break;
1694 }
1695
1696 unsigned Offset;
1697 switch (VT.getVectorMinNumElements()) {
1698 case 16: // 8-bit
1699 Offset = 0;
1700 break;
1701 case 8: // 16-bit
1702 Offset = 1;
1703 break;
1704 case 4: // 32-bit
1705 Offset = 2;
1706 break;
1707 case 2: // 64-bit
1708 Offset = 3;
1709 break;
1710 default:
1711 return 0;
1712 }
1713
1714 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1715}
1716
1717// This function is almost identical to SelectWhilePair, but has an
1718// extra check on the range of the immediate operand.
1719// TODO: Merge these two functions together at some point?
1720void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1721 // Immediate can be either 0 or 1.
1722 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 2)))
1723 if (Imm->getZExtValue() > 1)
1724 return;
1725
1726 SDLoc DL(N);
1727 EVT VT = N->getValueType(ResNo: 0);
1728 SDValue Ops[] = {N->getOperand(Num: 1), N->getOperand(Num: 2)};
1729 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1730 SDValue SuperReg = SDValue(WhilePair, 0);
1731
1732 for (unsigned I = 0; I < 2; ++I)
1733 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1734 AArch64::psub0 + I, DL, VT, SuperReg));
1735
1736 CurDAG->RemoveDeadNode(N);
1737}
1738
1739void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1740 SDLoc DL(N);
1741 EVT VT = N->getValueType(ResNo: 0);
1742
1743 SDValue Ops[] = {N->getOperand(Num: 1), N->getOperand(Num: 2)};
1744
1745 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1746 SDValue SuperReg = SDValue(WhilePair, 0);
1747
1748 for (unsigned I = 0; I < 2; ++I)
1749 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1750 AArch64::psub0 + I, DL, VT, SuperReg));
1751
1752 CurDAG->RemoveDeadNode(N);
1753}
1754
1755void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1756 unsigned Opcode) {
1757 EVT VT = N->getValueType(ResNo: 0);
1758 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1759 SDValue Ops = createZTuple(Regs);
1760 SDLoc DL(N);
1761 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1762 SDValue SuperReg = SDValue(Intrinsic, 0);
1763 for (unsigned i = 0; i < NumVecs; ++i)
1764 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1765 AArch64::zsub0 + i, DL, VT, SuperReg));
1766
1767 CurDAG->RemoveDeadNode(N);
1768}
1769
1770void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1771 unsigned NumVecs,
1772 bool IsZmMulti,
1773 unsigned Opcode,
1774 bool HasPred) {
1775 assert(Opcode != 0 && "Unexpected opcode");
1776
1777 SDLoc DL(N);
1778 EVT VT = N->getValueType(ResNo: 0);
1779 unsigned FirstVecIdx = HasPred ? 2 : 1;
1780
1781 auto GetMultiVecOperand = [=](unsigned StartIdx) {
1782 SmallVector<SDValue, 4> Regs(N->op_begin() + StartIdx,
1783 N->op_begin() + StartIdx + NumVecs);
1784 return createZMulTuple(Regs);
1785 };
1786
1787 SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1788
1789 SDValue Zm;
1790 if (IsZmMulti)
1791 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1792 else
1793 Zm = N->getOperand(Num: NumVecs + FirstVecIdx);
1794
1795 SDNode *Intrinsic;
1796 if (HasPred)
1797 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
1798 N->getOperand(1), Zdn, Zm);
1799 else
1800 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
1801 SDValue SuperReg = SDValue(Intrinsic, 0);
1802 for (unsigned i = 0; i < NumVecs; ++i)
1803 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1804 AArch64::zsub0 + i, DL, VT, SuperReg));
1805
1806 CurDAG->RemoveDeadNode(N);
1807}
1808
1809void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
1810 unsigned Scale, unsigned Opc_ri,
1811 unsigned Opc_rr, bool IsIntr) {
1812 assert(Scale < 5 && "Invalid scaling value.");
1813 SDLoc DL(N);
1814 EVT VT = N->getValueType(ResNo: 0);
1815 SDValue Chain = N->getOperand(Num: 0);
1816
1817 // Optimize addressing mode.
1818 SDValue Base, Offset;
1819 unsigned Opc;
1820 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1821 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
1822 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
1823
1824 SDValue Ops[] = {N->getOperand(Num: IsIntr ? 2 : 1), // Predicate
1825 Base, // Memory operand
1826 Offset, Chain};
1827
1828 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1829
1830 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1831 SDValue SuperReg = SDValue(Load, 0);
1832 for (unsigned i = 0; i < NumVecs; ++i)
1833 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1834 AArch64::zsub0 + i, DL, VT, SuperReg));
1835
1836 // Copy chain
1837 unsigned ChainIdx = NumVecs;
1838 ReplaceUses(F: SDValue(N, ChainIdx), T: SDValue(Load, 1));
1839 CurDAG->RemoveDeadNode(N);
1840}
1841
1842void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
1843 unsigned NumVecs,
1844 unsigned Scale,
1845 unsigned Opc_ri,
1846 unsigned Opc_rr) {
1847 assert(Scale < 4 && "Invalid scaling value.");
1848 SDLoc DL(N);
1849 EVT VT = N->getValueType(ResNo: 0);
1850 SDValue Chain = N->getOperand(Num: 0);
1851
1852 SDValue PNg = N->getOperand(Num: 2);
1853 SDValue Base = N->getOperand(Num: 3);
1854 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
1855 unsigned Opc;
1856 std::tie(args&: Opc, args&: Base, args&: Offset) =
1857 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, OldBase: Base, OldOffset: Offset, Scale);
1858
1859 SDValue Ops[] = {PNg, // Predicate-as-counter
1860 Base, // Memory operand
1861 Offset, Chain};
1862
1863 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1864
1865 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1866 SDValue SuperReg = SDValue(Load, 0);
1867 for (unsigned i = 0; i < NumVecs; ++i)
1868 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1869 AArch64::zsub0 + i, DL, VT, SuperReg));
1870
1871 // Copy chain
1872 unsigned ChainIdx = NumVecs;
1873 ReplaceUses(F: SDValue(N, ChainIdx), T: SDValue(Load, 1));
1874 CurDAG->RemoveDeadNode(N);
1875}
1876
1877void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
1878 unsigned Opcode) {
1879 if (N->getValueType(0) != MVT::nxv4f32)
1880 return;
1881 SelectUnaryMultiIntrinsic(N, NumOutVecs: NumVecs, IsTupleInput: true, Opc: Opcode);
1882}
1883
1884void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
1885 unsigned NumOutVecs,
1886 unsigned Opc, uint32_t MaxImm) {
1887 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 4)))
1888 if (Imm->getZExtValue() > MaxImm)
1889 return;
1890
1891 SDValue ZtValue;
1892 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
1893 return;
1894 SDValue Ops[] = {ZtValue, Node->getOperand(Num: 3), Node->getOperand(Num: 4)};
1895 SDLoc DL(Node);
1896 EVT VT = Node->getValueType(ResNo: 0);
1897
1898 SDNode *Instruction =
1899 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
1900 SDValue SuperReg = SDValue(Instruction, 0);
1901
1902 for (unsigned I = 0; I < NumOutVecs; ++I)
1903 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
1904 AArch64::zsub0 + I, DL, VT, SuperReg));
1905
1906 // Copy chain
1907 unsigned ChainIdx = NumOutVecs;
1908 ReplaceUses(F: SDValue(Node, ChainIdx), T: SDValue(Instruction, 1));
1909 CurDAG->RemoveDeadNode(N: Node);
1910}
1911
1912void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
1913 unsigned Op) {
1914 SDLoc DL(N);
1915 EVT VT = N->getValueType(ResNo: 0);
1916
1917 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1918 SDValue Zd = createZMulTuple(Regs);
1919 SDValue Zn = N->getOperand(Num: 1 + NumVecs);
1920 SDValue Zm = N->getOperand(Num: 2 + NumVecs);
1921
1922 SDValue Ops[] = {Zd, Zn, Zm};
1923
1924 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
1925 SDValue SuperReg = SDValue(Intrinsic, 0);
1926 for (unsigned i = 0; i < NumVecs; ++i)
1927 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1928 AArch64::zsub0 + i, DL, VT, SuperReg));
1929
1930 CurDAG->RemoveDeadNode(N);
1931}
1932
1933bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
1934 switch (BaseReg) {
1935 default:
1936 return false;
1937 case AArch64::ZA:
1938 case AArch64::ZAB0:
1939 if (TileNum == 0)
1940 break;
1941 return false;
1942 case AArch64::ZAH0:
1943 if (TileNum <= 1)
1944 break;
1945 return false;
1946 case AArch64::ZAS0:
1947 if (TileNum <= 3)
1948 break;
1949 return false;
1950 case AArch64::ZAD0:
1951 if (TileNum <= 7)
1952 break;
1953 return false;
1954 }
1955
1956 BaseReg += TileNum;
1957 return true;
1958}
1959
1960template <unsigned MaxIdx, unsigned Scale>
1961void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
1962 unsigned BaseReg, unsigned Op) {
1963 unsigned TileNum = 0;
1964 if (BaseReg != AArch64::ZA)
1965 TileNum = N->getConstantOperandVal(Num: 2);
1966
1967 if (!SelectSMETile(BaseReg, TileNum))
1968 return;
1969
1970 SDValue SliceBase, Base, Offset;
1971 if (BaseReg == AArch64::ZA)
1972 SliceBase = N->getOperand(Num: 2);
1973 else
1974 SliceBase = N->getOperand(Num: 3);
1975
1976 if (!SelectSMETileSlice(N: SliceBase, MaxSize: MaxIdx, Vector&: Base, Offset, Scale))
1977 return;
1978
1979 SDLoc DL(N);
1980 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
1981 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(Num: 0)};
1982 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
1983
1984 EVT VT = N->getValueType(ResNo: 0);
1985 for (unsigned I = 0; I < NumVecs; ++I)
1986 ReplaceUses(SDValue(N, I),
1987 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
1988 SDValue(Mov, 0)));
1989 // Copy chain
1990 unsigned ChainIdx = NumVecs;
1991 ReplaceUses(F: SDValue(N, ChainIdx), T: SDValue(Mov, 1));
1992 CurDAG->RemoveDeadNode(N);
1993}
1994
1995void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
1996 unsigned NumOutVecs,
1997 bool IsTupleInput,
1998 unsigned Opc) {
1999 SDLoc DL(N);
2000 EVT VT = N->getValueType(ResNo: 0);
2001 unsigned NumInVecs = N->getNumOperands() - 1;
2002
2003 SmallVector<SDValue, 6> Ops;
2004 if (IsTupleInput) {
2005 assert((NumInVecs == 2 || NumInVecs == 4) &&
2006 "Don't know how to handle multi-register input!");
2007 SmallVector<SDValue, 4> Regs(N->op_begin() + 1,
2008 N->op_begin() + 1 + NumInVecs);
2009 Ops.push_back(Elt: createZMulTuple(Regs));
2010 } else {
2011 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2012 for (unsigned I = 0; I < NumInVecs; I++)
2013 Ops.push_back(Elt: N->getOperand(Num: 1 + I));
2014 }
2015
2016 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2017 SDValue SuperReg = SDValue(Res, 0);
2018
2019 for (unsigned I = 0; I < NumOutVecs; I++)
2020 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2021 AArch64::zsub0 + I, DL, VT, SuperReg));
2022 CurDAG->RemoveDeadNode(N);
2023}
2024
2025void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2026 unsigned Opc) {
2027 SDLoc dl(N);
2028 EVT VT = N->getOperand(Num: 2)->getValueType(ResNo: 0);
2029
2030 // Form a REG_SEQUENCE to force register allocation.
2031 bool Is128Bit = VT.getSizeInBits() == 128;
2032 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2033 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2034
2035 SDValue Ops[] = {RegSeq, N->getOperand(Num: NumVecs + 2), N->getOperand(Num: 0)};
2036 SDNode *St = CurDAG->getMachineNode(Opcode: Opc, dl, VT: N->getValueType(ResNo: 0), Ops);
2037
2038 // Transfer memoperands.
2039 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2040 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
2041
2042 ReplaceNode(F: N, T: St);
2043}
2044
2045void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2046 unsigned Scale, unsigned Opc_rr,
2047 unsigned Opc_ri) {
2048 SDLoc dl(N);
2049
2050 // Form a REG_SEQUENCE to force register allocation.
2051 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2052 SDValue RegSeq = createZTuple(Regs);
2053
2054 // Optimize addressing mode.
2055 unsigned Opc;
2056 SDValue Offset, Base;
2057 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2058 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2059 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2060
2061 SDValue Ops[] = {RegSeq, N->getOperand(Num: NumVecs + 2), // predicate
2062 Base, // address
2063 Offset, // offset
2064 N->getOperand(Num: 0)}; // chain
2065 SDNode *St = CurDAG->getMachineNode(Opcode: Opc, dl, VT: N->getValueType(ResNo: 0), Ops);
2066
2067 ReplaceNode(F: N, T: St);
2068}
2069
2070bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2071 SDValue &OffImm) {
2072 SDLoc dl(N);
2073 const DataLayout &DL = CurDAG->getDataLayout();
2074 const TargetLowering *TLI = getTargetLowering();
2075
2076 // Try to match it for the frame address
2077 if (auto FINode = dyn_cast<FrameIndexSDNode>(Val&: N)) {
2078 int FI = FINode->getIndex();
2079 Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
2080 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2081 return true;
2082 }
2083
2084 return false;
2085}
2086
2087void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2088 unsigned Opc) {
2089 SDLoc dl(N);
2090 EVT VT = N->getOperand(Num: 2)->getValueType(ResNo: 0);
2091 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2092 MVT::Other}; // Type for the Chain
2093
2094 // Form a REG_SEQUENCE to force register allocation.
2095 bool Is128Bit = VT.getSizeInBits() == 128;
2096 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2097 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2098
2099 SDValue Ops[] = {RegSeq,
2100 N->getOperand(Num: NumVecs + 1), // base register
2101 N->getOperand(Num: NumVecs + 2), // Incremental
2102 N->getOperand(Num: 0)}; // Chain
2103 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2104
2105 ReplaceNode(F: N, T: St);
2106}
2107
2108namespace {
2109/// WidenVector - Given a value in the V64 register class, produce the
2110/// equivalent value in the V128 register class.
2111class WidenVector {
2112 SelectionDAG &DAG;
2113
2114public:
2115 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2116
2117 SDValue operator()(SDValue V64Reg) {
2118 EVT VT = V64Reg.getValueType();
2119 unsigned NarrowSize = VT.getVectorNumElements();
2120 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2121 MVT WideTy = MVT::getVectorVT(VT: EltTy, NumElements: 2 * NarrowSize);
2122 SDLoc DL(V64Reg);
2123
2124 SDValue Undef =
2125 SDValue(DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: WideTy), 0);
2126 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2127 }
2128};
2129} // namespace
2130
2131/// NarrowVector - Given a value in the V128 register class, produce the
2132/// equivalent value in the V64 register class.
2133static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
2134 EVT VT = V128Reg.getValueType();
2135 unsigned WideSize = VT.getVectorNumElements();
2136 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2137 MVT NarrowTy = MVT::getVectorVT(VT: EltTy, NumElements: WideSize / 2);
2138
2139 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2140 V128Reg);
2141}
2142
2143void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2144 unsigned Opc) {
2145 SDLoc dl(N);
2146 EVT VT = N->getValueType(ResNo: 0);
2147 bool Narrow = VT.getSizeInBits() == 64;
2148
2149 // Form a REG_SEQUENCE to force register allocation.
2150 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2151
2152 if (Narrow)
2153 transform(Range&: Regs, d_first: Regs.begin(),
2154 F: WidenVector(*CurDAG));
2155
2156 SDValue RegSeq = createQTuple(Regs);
2157
2158 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2159
2160 unsigned LaneNo = N->getConstantOperandVal(Num: NumVecs + 2);
2161
2162 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2163 N->getOperand(NumVecs + 3), N->getOperand(0)};
2164 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2165 SDValue SuperReg = SDValue(Ld, 0);
2166
2167 EVT WideVT = RegSeq.getOperand(i: 1)->getValueType(ResNo: 0);
2168 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2169 AArch64::qsub2, AArch64::qsub3 };
2170 for (unsigned i = 0; i < NumVecs; ++i) {
2171 SDValue NV = CurDAG->getTargetExtractSubreg(SRIdx: QSubs[i], DL: dl, VT: WideVT, Operand: SuperReg);
2172 if (Narrow)
2173 NV = NarrowVector(V128Reg: NV, DAG&: *CurDAG);
2174 ReplaceUses(F: SDValue(N, i), T: NV);
2175 }
2176
2177 ReplaceUses(F: SDValue(N, NumVecs), T: SDValue(Ld, 1));
2178 CurDAG->RemoveDeadNode(N);
2179}
2180
2181void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2182 unsigned Opc) {
2183 SDLoc dl(N);
2184 EVT VT = N->getValueType(ResNo: 0);
2185 bool Narrow = VT.getSizeInBits() == 64;
2186
2187 // Form a REG_SEQUENCE to force register allocation.
2188 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2189
2190 if (Narrow)
2191 transform(Range&: Regs, d_first: Regs.begin(),
2192 F: WidenVector(*CurDAG));
2193
2194 SDValue RegSeq = createQTuple(Regs);
2195
2196 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2197 RegSeq->getValueType(0), MVT::Other};
2198
2199 unsigned LaneNo = N->getConstantOperandVal(Num: NumVecs + 1);
2200
2201 SDValue Ops[] = {RegSeq,
2202 CurDAG->getTargetConstant(LaneNo, dl,
2203 MVT::i64), // Lane Number
2204 N->getOperand(NumVecs + 2), // Base register
2205 N->getOperand(NumVecs + 3), // Incremental
2206 N->getOperand(0)};
2207 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2208
2209 // Update uses of the write back register
2210 ReplaceUses(F: SDValue(N, NumVecs), T: SDValue(Ld, 0));
2211
2212 // Update uses of the vector list
2213 SDValue SuperReg = SDValue(Ld, 1);
2214 if (NumVecs == 1) {
2215 ReplaceUses(F: SDValue(N, 0),
2216 T: Narrow ? NarrowVector(V128Reg: SuperReg, DAG&: *CurDAG) : SuperReg);
2217 } else {
2218 EVT WideVT = RegSeq.getOperand(i: 1)->getValueType(ResNo: 0);
2219 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2220 AArch64::qsub2, AArch64::qsub3 };
2221 for (unsigned i = 0; i < NumVecs; ++i) {
2222 SDValue NV = CurDAG->getTargetExtractSubreg(SRIdx: QSubs[i], DL: dl, VT: WideVT,
2223 Operand: SuperReg);
2224 if (Narrow)
2225 NV = NarrowVector(V128Reg: NV, DAG&: *CurDAG);
2226 ReplaceUses(F: SDValue(N, i), T: NV);
2227 }
2228 }
2229
2230 // Update the Chain
2231 ReplaceUses(F: SDValue(N, NumVecs + 1), T: SDValue(Ld, 2));
2232 CurDAG->RemoveDeadNode(N);
2233}
2234
2235void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2236 unsigned Opc) {
2237 SDLoc dl(N);
2238 EVT VT = N->getOperand(Num: 2)->getValueType(ResNo: 0);
2239 bool Narrow = VT.getSizeInBits() == 64;
2240
2241 // Form a REG_SEQUENCE to force register allocation.
2242 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2243
2244 if (Narrow)
2245 transform(Range&: Regs, d_first: Regs.begin(),
2246 F: WidenVector(*CurDAG));
2247
2248 SDValue RegSeq = createQTuple(Regs);
2249
2250 unsigned LaneNo = N->getConstantOperandVal(Num: NumVecs + 2);
2251
2252 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2253 N->getOperand(NumVecs + 3), N->getOperand(0)};
2254 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2255
2256 // Transfer memoperands.
2257 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2258 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
2259
2260 ReplaceNode(F: N, T: St);
2261}
2262
2263void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2264 unsigned Opc) {
2265 SDLoc dl(N);
2266 EVT VT = N->getOperand(Num: 2)->getValueType(ResNo: 0);
2267 bool Narrow = VT.getSizeInBits() == 64;
2268
2269 // Form a REG_SEQUENCE to force register allocation.
2270 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2271
2272 if (Narrow)
2273 transform(Range&: Regs, d_first: Regs.begin(),
2274 F: WidenVector(*CurDAG));
2275
2276 SDValue RegSeq = createQTuple(Regs);
2277
2278 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2279 MVT::Other};
2280
2281 unsigned LaneNo = N->getConstantOperandVal(Num: NumVecs + 1);
2282
2283 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2284 N->getOperand(NumVecs + 2), // Base Register
2285 N->getOperand(NumVecs + 3), // Incremental
2286 N->getOperand(0)};
2287 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2288
2289 // Transfer memoperands.
2290 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2291 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
2292
2293 ReplaceNode(F: N, T: St);
2294}
2295
2296static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
2297 unsigned &Opc, SDValue &Opd0,
2298 unsigned &LSB, unsigned &MSB,
2299 unsigned NumberOfIgnoredLowBits,
2300 bool BiggerPattern) {
2301 assert(N->getOpcode() == ISD::AND &&
2302 "N must be a AND operation to call this function");
2303
2304 EVT VT = N->getValueType(ResNo: 0);
2305
2306 // Here we can test the type of VT and return false when the type does not
2307 // match, but since it is done prior to that call in the current context
2308 // we turned that into an assert to avoid redundant code.
2309 assert((VT == MVT::i32 || VT == MVT::i64) &&
2310 "Type checking must have been done before calling this function");
2311
2312 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2313 // changed the AND node to a 32-bit mask operation. We'll have to
2314 // undo that as part of the transform here if we want to catch all
2315 // the opportunities.
2316 // Currently the NumberOfIgnoredLowBits argument helps to recover
2317 // from these situations when matching bigger pattern (bitfield insert).
2318
2319 // For unsigned extracts, check for a shift right and mask
2320 uint64_t AndImm = 0;
2321 if (!isOpcWithIntImmediate(N, Opc: ISD::AND, Imm&: AndImm))
2322 return false;
2323
2324 const SDNode *Op0 = N->getOperand(Num: 0).getNode();
2325
2326 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2327 // simplified. Try to undo that
2328 AndImm |= maskTrailingOnes<uint64_t>(N: NumberOfIgnoredLowBits);
2329
2330 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2331 if (AndImm & (AndImm + 1))
2332 return false;
2333
2334 bool ClampMSB = false;
2335 uint64_t SrlImm = 0;
2336 // Handle the SRL + ANY_EXTEND case.
2337 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2338 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2339 // Extend the incoming operand of the SRL to 64-bit.
2340 Opd0 = Widen(CurDAG, N: Op0->getOperand(Num: 0).getOperand(i: 0));
2341 // Make sure to clamp the MSB so that we preserve the semantics of the
2342 // original operations.
2343 ClampMSB = true;
2344 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2345 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,
2346 SrlImm)) {
2347 // If the shift result was truncated, we can still combine them.
2348 Opd0 = Op0->getOperand(Num: 0).getOperand(i: 0);
2349
2350 // Use the type of SRL node.
2351 VT = Opd0->getValueType(ResNo: 0);
2352 } else if (isOpcWithIntImmediate(N: Op0, Opc: ISD::SRL, Imm&: SrlImm)) {
2353 Opd0 = Op0->getOperand(Num: 0);
2354 ClampMSB = (VT == MVT::i32);
2355 } else if (BiggerPattern) {
2356 // Let's pretend a 0 shift right has been performed.
2357 // The resulting code will be at least as good as the original one
2358 // plus it may expose more opportunities for bitfield insert pattern.
2359 // FIXME: Currently we limit this to the bigger pattern, because
2360 // some optimizations expect AND and not UBFM.
2361 Opd0 = N->getOperand(Num: 0);
2362 } else
2363 return false;
2364
2365 // Bail out on large immediates. This happens when no proper
2366 // combining/constant folding was performed.
2367 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2368 LLVM_DEBUG(
2369 (dbgs() << N
2370 << ": Found large shift immediate, this should not happen\n"));
2371 return false;
2372 }
2373
2374 LSB = SrlImm;
2375 MSB = SrlImm +
2376 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2377 : llvm::countr_one<uint64_t>(AndImm)) -
2378 1;
2379 if (ClampMSB)
2380 // Since we're moving the extend before the right shift operation, we need
2381 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2382 // the zeros which would get shifted in with the original right shift
2383 // operation.
2384 MSB = MSB > 31 ? 31 : MSB;
2385
2386 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2387 return true;
2388}
2389
2390static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
2391 SDValue &Opd0, unsigned &Immr,
2392 unsigned &Imms) {
2393 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2394
2395 EVT VT = N->getValueType(ResNo: 0);
2396 unsigned BitWidth = VT.getSizeInBits();
2397 assert((VT == MVT::i32 || VT == MVT::i64) &&
2398 "Type checking must have been done before calling this function");
2399
2400 SDValue Op = N->getOperand(Num: 0);
2401 if (Op->getOpcode() == ISD::TRUNCATE) {
2402 Op = Op->getOperand(Num: 0);
2403 VT = Op->getValueType(ResNo: 0);
2404 BitWidth = VT.getSizeInBits();
2405 }
2406
2407 uint64_t ShiftImm;
2408 if (!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::SRL, Imm&: ShiftImm) &&
2409 !isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::SRA, Imm&: ShiftImm))
2410 return false;
2411
2412 unsigned Width = cast<VTSDNode>(Val: N->getOperand(Num: 1))->getVT().getSizeInBits();
2413 if (ShiftImm + Width > BitWidth)
2414 return false;
2415
2416 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2417 Opd0 = Op.getOperand(i: 0);
2418 Immr = ShiftImm;
2419 Imms = ShiftImm + Width - 1;
2420 return true;
2421}
2422
2423static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
2424 SDValue &Opd0, unsigned &LSB,
2425 unsigned &MSB) {
2426 // We are looking for the following pattern which basically extracts several
2427 // continuous bits from the source value and places it from the LSB of the
2428 // destination value, all other bits of the destination value or set to zero:
2429 //
2430 // Value2 = AND Value, MaskImm
2431 // SRL Value2, ShiftImm
2432 //
2433 // with MaskImm >> ShiftImm to search for the bit width.
2434 //
2435 // This gets selected into a single UBFM:
2436 //
2437 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2438 //
2439
2440 if (N->getOpcode() != ISD::SRL)
2441 return false;
2442
2443 uint64_t AndMask = 0;
2444 if (!isOpcWithIntImmediate(N: N->getOperand(Num: 0).getNode(), Opc: ISD::AND, Imm&: AndMask))
2445 return false;
2446
2447 Opd0 = N->getOperand(Num: 0).getOperand(i: 0);
2448
2449 uint64_t SrlImm = 0;
2450 if (!isIntImmediate(N: N->getOperand(Num: 1), Imm&: SrlImm))
2451 return false;
2452
2453 // Check whether we really have several bits extract here.
2454 if (!isMask_64(Value: AndMask >> SrlImm))
2455 return false;
2456
2457 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2458 LSB = SrlImm;
2459 MSB = llvm::Log2_64(Value: AndMask);
2460 return true;
2461}
2462
2463static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2464 unsigned &Immr, unsigned &Imms,
2465 bool BiggerPattern) {
2466 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2467 "N must be a SHR/SRA operation to call this function");
2468
2469 EVT VT = N->getValueType(ResNo: 0);
2470
2471 // Here we can test the type of VT and return false when the type does not
2472 // match, but since it is done prior to that call in the current context
2473 // we turned that into an assert to avoid redundant code.
2474 assert((VT == MVT::i32 || VT == MVT::i64) &&
2475 "Type checking must have been done before calling this function");
2476
2477 // Check for AND + SRL doing several bits extract.
2478 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, LSB&: Immr, MSB&: Imms))
2479 return true;
2480
2481 // We're looking for a shift of a shift.
2482 uint64_t ShlImm = 0;
2483 uint64_t TruncBits = 0;
2484 if (isOpcWithIntImmediate(N: N->getOperand(Num: 0).getNode(), Opc: ISD::SHL, Imm&: ShlImm)) {
2485 Opd0 = N->getOperand(Num: 0).getOperand(i: 0);
2486 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2487 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2488 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2489 // be considered as setting high 32 bits as zero. Our strategy here is to
2490 // always generate 64bit UBFM. This consistency will help the CSE pass
2491 // later find more redundancy.
2492 Opd0 = N->getOperand(Num: 0).getOperand(i: 0);
2493 TruncBits = Opd0->getValueType(ResNo: 0).getSizeInBits() - VT.getSizeInBits();
2494 VT = Opd0.getValueType();
2495 assert(VT == MVT::i64 && "the promoted type should be i64");
2496 } else if (BiggerPattern) {
2497 // Let's pretend a 0 shift left has been performed.
2498 // FIXME: Currently we limit this to the bigger pattern case,
2499 // because some optimizations expect AND and not UBFM
2500 Opd0 = N->getOperand(Num: 0);
2501 } else
2502 return false;
2503
2504 // Missing combines/constant folding may have left us with strange
2505 // constants.
2506 if (ShlImm >= VT.getSizeInBits()) {
2507 LLVM_DEBUG(
2508 (dbgs() << N
2509 << ": Found large shift immediate, this should not happen\n"));
2510 return false;
2511 }
2512
2513 uint64_t SrlImm = 0;
2514 if (!isIntImmediate(N: N->getOperand(Num: 1), Imm&: SrlImm))
2515 return false;
2516
2517 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2518 "bad amount in shift node!");
2519 int immr = SrlImm - ShlImm;
2520 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2521 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2522 // SRA requires a signed extraction
2523 if (VT == MVT::i32)
2524 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2525 else
2526 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2527 return true;
2528}
2529
2530bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2531 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2532
2533 EVT VT = N->getValueType(ResNo: 0);
2534 EVT NarrowVT = N->getOperand(Num: 0)->getValueType(ResNo: 0);
2535 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2536 return false;
2537
2538 uint64_t ShiftImm;
2539 SDValue Op = N->getOperand(Num: 0);
2540 if (!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::SRA, Imm&: ShiftImm))
2541 return false;
2542
2543 SDLoc dl(N);
2544 // Extend the incoming operand of the shift to 64-bits.
2545 SDValue Opd0 = Widen(CurDAG, N: Op.getOperand(i: 0));
2546 unsigned Immr = ShiftImm;
2547 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2548 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Val: Immr, DL: dl, VT),
2549 CurDAG->getTargetConstant(Val: Imms, DL: dl, VT)};
2550 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2551 return true;
2552}
2553
2554static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2555 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2556 unsigned NumberOfIgnoredLowBits = 0,
2557 bool BiggerPattern = false) {
2558 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2559 return false;
2560
2561 switch (N->getOpcode()) {
2562 default:
2563 if (!N->isMachineOpcode())
2564 return false;
2565 break;
2566 case ISD::AND:
2567 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB&: Immr, MSB&: Imms,
2568 NumberOfIgnoredLowBits, BiggerPattern);
2569 case ISD::SRL:
2570 case ISD::SRA:
2571 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2572
2573 case ISD::SIGN_EXTEND_INREG:
2574 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2575 }
2576
2577 unsigned NOpc = N->getMachineOpcode();
2578 switch (NOpc) {
2579 default:
2580 return false;
2581 case AArch64::SBFMWri:
2582 case AArch64::UBFMWri:
2583 case AArch64::SBFMXri:
2584 case AArch64::UBFMXri:
2585 Opc = NOpc;
2586 Opd0 = N->getOperand(Num: 0);
2587 Immr = N->getConstantOperandVal(Num: 1);
2588 Imms = N->getConstantOperandVal(Num: 2);
2589 return true;
2590 }
2591 // Unreachable
2592 return false;
2593}
2594
2595bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2596 unsigned Opc, Immr, Imms;
2597 SDValue Opd0;
2598 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2599 return false;
2600
2601 EVT VT = N->getValueType(ResNo: 0);
2602 SDLoc dl(N);
2603
2604 // If the bit extract operation is 64bit but the original type is 32bit, we
2605 // need to add one EXTRACT_SUBREG.
2606 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2607 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2608 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2609
2610 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2611 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2612 MVT::i32, SDValue(BFM, 0));
2613 ReplaceNode(F: N, T: Inner.getNode());
2614 return true;
2615 }
2616
2617 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Val: Immr, DL: dl, VT),
2618 CurDAG->getTargetConstant(Val: Imms, DL: dl, VT)};
2619 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
2620 return true;
2621}
2622
2623/// Does DstMask form a complementary pair with the mask provided by
2624/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2625/// this asks whether DstMask zeroes precisely those bits that will be set by
2626/// the other half.
2627static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2628 unsigned NumberOfIgnoredHighBits, EVT VT) {
2629 assert((VT == MVT::i32 || VT == MVT::i64) &&
2630 "i32 or i64 mask type expected!");
2631 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2632
2633 APInt SignificantDstMask = APInt(BitWidth, DstMask);
2634 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(width: BitWidth);
2635
2636 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2637 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2638}
2639
2640// Look for bits that will be useful for later uses.
2641// A bit is consider useless as soon as it is dropped and never used
2642// before it as been dropped.
2643// E.g., looking for useful bit of x
2644// 1. y = x & 0x7
2645// 2. z = y >> 2
2646// After #1, x useful bits are 0x7, then the useful bits of x, live through
2647// y.
2648// After #2, the useful bits of x are 0x4.
2649// However, if x is used on an unpredicatable instruction, then all its bits
2650// are useful.
2651// E.g.
2652// 1. y = x & 0x7
2653// 2. z = y >> 2
2654// 3. str x, [@x]
2655static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2656
2657static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
2658 unsigned Depth) {
2659 uint64_t Imm =
2660 cast<const ConstantSDNode>(Val: Op.getOperand(i: 1).getNode())->getZExtValue();
2661 Imm = AArch64_AM::decodeLogicalImmediate(val: Imm, regSize: UsefulBits.getBitWidth());
2662 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2663 getUsefulBits(Op, UsefulBits, Depth: Depth + 1);
2664}
2665
2666static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
2667 uint64_t Imm, uint64_t MSB,
2668 unsigned Depth) {
2669 // inherit the bitwidth value
2670 APInt OpUsefulBits(UsefulBits);
2671 OpUsefulBits = 1;
2672
2673 if (MSB >= Imm) {
2674 OpUsefulBits <<= MSB - Imm + 1;
2675 --OpUsefulBits;
2676 // The interesting part will be in the lower part of the result
2677 getUsefulBits(Op, UsefulBits&: OpUsefulBits, Depth: Depth + 1);
2678 // The interesting part was starting at Imm in the argument
2679 OpUsefulBits <<= Imm;
2680 } else {
2681 OpUsefulBits <<= MSB + 1;
2682 --OpUsefulBits;
2683 // The interesting part will be shifted in the result
2684 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2685 getUsefulBits(Op, UsefulBits&: OpUsefulBits, Depth: Depth + 1);
2686 // The interesting part was at zero in the argument
2687 OpUsefulBits.lshrInPlace(ShiftAmt: OpUsefulBits.getBitWidth() - Imm);
2688 }
2689
2690 UsefulBits &= OpUsefulBits;
2691}
2692
2693static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2694 unsigned Depth) {
2695 uint64_t Imm =
2696 cast<const ConstantSDNode>(Val: Op.getOperand(i: 1).getNode())->getZExtValue();
2697 uint64_t MSB =
2698 cast<const ConstantSDNode>(Val: Op.getOperand(i: 2).getNode())->getZExtValue();
2699
2700 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2701}
2702
2703static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
2704 unsigned Depth) {
2705 uint64_t ShiftTypeAndValue =
2706 cast<const ConstantSDNode>(Val: Op.getOperand(i: 2).getNode())->getZExtValue();
2707 APInt Mask(UsefulBits);
2708 Mask.clearAllBits();
2709 Mask.flipAllBits();
2710
2711 if (AArch64_AM::getShiftType(Imm: ShiftTypeAndValue) == AArch64_AM::LSL) {
2712 // Shift Left
2713 uint64_t ShiftAmt = AArch64_AM::getShiftValue(Imm: ShiftTypeAndValue);
2714 Mask <<= ShiftAmt;
2715 getUsefulBits(Op, UsefulBits&: Mask, Depth: Depth + 1);
2716 Mask.lshrInPlace(ShiftAmt);
2717 } else if (AArch64_AM::getShiftType(Imm: ShiftTypeAndValue) == AArch64_AM::LSR) {
2718 // Shift Right
2719 // We do not handle AArch64_AM::ASR, because the sign will change the
2720 // number of useful bits
2721 uint64_t ShiftAmt = AArch64_AM::getShiftValue(Imm: ShiftTypeAndValue);
2722 Mask.lshrInPlace(ShiftAmt);
2723 getUsefulBits(Op, UsefulBits&: Mask, Depth: Depth + 1);
2724 Mask <<= ShiftAmt;
2725 } else
2726 return;
2727
2728 UsefulBits &= Mask;
2729}
2730
2731static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2732 unsigned Depth) {
2733 uint64_t Imm =
2734 cast<const ConstantSDNode>(Val: Op.getOperand(i: 2).getNode())->getZExtValue();
2735 uint64_t MSB =
2736 cast<const ConstantSDNode>(Val: Op.getOperand(i: 3).getNode())->getZExtValue();
2737
2738 APInt OpUsefulBits(UsefulBits);
2739 OpUsefulBits = 1;
2740
2741 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
2742 ResultUsefulBits.flipAllBits();
2743 APInt Mask(UsefulBits.getBitWidth(), 0);
2744
2745 getUsefulBits(Op, UsefulBits&: ResultUsefulBits, Depth: Depth + 1);
2746
2747 if (MSB >= Imm) {
2748 // The instruction is a BFXIL.
2749 uint64_t Width = MSB - Imm + 1;
2750 uint64_t LSB = Imm;
2751
2752 OpUsefulBits <<= Width;
2753 --OpUsefulBits;
2754
2755 if (Op.getOperand(i: 1) == Orig) {
2756 // Copy the low bits from the result to bits starting from LSB.
2757 Mask = ResultUsefulBits & OpUsefulBits;
2758 Mask <<= LSB;
2759 }
2760
2761 if (Op.getOperand(i: 0) == Orig)
2762 // Bits starting from LSB in the input contribute to the result.
2763 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2764 } else {
2765 // The instruction is a BFI.
2766 uint64_t Width = MSB + 1;
2767 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
2768
2769 OpUsefulBits <<= Width;
2770 --OpUsefulBits;
2771 OpUsefulBits <<= LSB;
2772
2773 if (Op.getOperand(i: 1) == Orig) {
2774 // Copy the bits from the result to the zero bits.
2775 Mask = ResultUsefulBits & OpUsefulBits;
2776 Mask.lshrInPlace(ShiftAmt: LSB);
2777 }
2778
2779 if (Op.getOperand(i: 0) == Orig)
2780 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2781 }
2782
2783 UsefulBits &= Mask;
2784}
2785
2786static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2787 SDValue Orig, unsigned Depth) {
2788
2789 // Users of this node should have already been instruction selected
2790 // FIXME: Can we turn that into an assert?
2791 if (!UserNode->isMachineOpcode())
2792 return;
2793
2794 switch (UserNode->getMachineOpcode()) {
2795 default:
2796 return;
2797 case AArch64::ANDSWri:
2798 case AArch64::ANDSXri:
2799 case AArch64::ANDWri:
2800 case AArch64::ANDXri:
2801 // We increment Depth only when we call the getUsefulBits
2802 return getUsefulBitsFromAndWithImmediate(Op: SDValue(UserNode, 0), UsefulBits,
2803 Depth);
2804 case AArch64::UBFMWri:
2805 case AArch64::UBFMXri:
2806 return getUsefulBitsFromUBFM(Op: SDValue(UserNode, 0), UsefulBits, Depth);
2807
2808 case AArch64::ORRWrs:
2809 case AArch64::ORRXrs:
2810 if (UserNode->getOperand(Num: 0) != Orig && UserNode->getOperand(Num: 1) == Orig)
2811 getUsefulBitsFromOrWithShiftedReg(Op: SDValue(UserNode, 0), UsefulBits,
2812 Depth);
2813 return;
2814 case AArch64::BFMWri:
2815 case AArch64::BFMXri:
2816 return getUsefulBitsFromBFM(Op: SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2817
2818 case AArch64::STRBBui:
2819 case AArch64::STURBBi:
2820 if (UserNode->getOperand(Num: 0) != Orig)
2821 return;
2822 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2823 return;
2824
2825 case AArch64::STRHHui:
2826 case AArch64::STURHHi:
2827 if (UserNode->getOperand(Num: 0) != Orig)
2828 return;
2829 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2830 return;
2831 }
2832}
2833
2834static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2835 if (Depth >= SelectionDAG::MaxRecursionDepth)
2836 return;
2837 // Initialize UsefulBits
2838 if (!Depth) {
2839 unsigned Bitwidth = Op.getScalarValueSizeInBits();
2840 // At the beginning, assume every produced bits is useful
2841 UsefulBits = APInt(Bitwidth, 0);
2842 UsefulBits.flipAllBits();
2843 }
2844 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2845
2846 for (SDNode *Node : Op.getNode()->uses()) {
2847 // A use cannot produce useful bits
2848 APInt UsefulBitsForUse = APInt(UsefulBits);
2849 getUsefulBitsForUse(UserNode: Node, UsefulBits&: UsefulBitsForUse, Orig: Op, Depth);
2850 UsersUsefulBits |= UsefulBitsForUse;
2851 }
2852 // UsefulBits contains the produced bits that are meaningful for the
2853 // current definition, thus a user cannot make a bit meaningful at
2854 // this point
2855 UsefulBits &= UsersUsefulBits;
2856}
2857
2858/// Create a machine node performing a notional SHL of Op by ShlAmount. If
2859/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2860/// 0, return Op unchanged.
2861static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2862 if (ShlAmount == 0)
2863 return Op;
2864
2865 EVT VT = Op.getValueType();
2866 SDLoc dl(Op);
2867 unsigned BitWidth = VT.getSizeInBits();
2868 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2869
2870 SDNode *ShiftNode;
2871 if (ShlAmount > 0) {
2872 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2873 ShiftNode = CurDAG->getMachineNode(
2874 Opcode: UBFMOpc, dl, VT, Op1: Op,
2875 Op2: CurDAG->getTargetConstant(Val: BitWidth - ShlAmount, DL: dl, VT),
2876 Op3: CurDAG->getTargetConstant(Val: BitWidth - 1 - ShlAmount, DL: dl, VT));
2877 } else {
2878 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2879 assert(ShlAmount < 0 && "expected right shift");
2880 int ShrAmount = -ShlAmount;
2881 ShiftNode = CurDAG->getMachineNode(
2882 Opcode: UBFMOpc, dl, VT, Op1: Op, Op2: CurDAG->getTargetConstant(Val: ShrAmount, DL: dl, VT),
2883 Op3: CurDAG->getTargetConstant(Val: BitWidth - 1, DL: dl, VT));
2884 }
2885
2886 return SDValue(ShiftNode, 0);
2887}
2888
2889// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
2890static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
2891 bool BiggerPattern,
2892 const uint64_t NonZeroBits,
2893 SDValue &Src, int &DstLSB,
2894 int &Width);
2895
2896// For bit-field-positioning pattern "shl VAL, N)".
2897static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
2898 bool BiggerPattern,
2899 const uint64_t NonZeroBits,
2900 SDValue &Src, int &DstLSB,
2901 int &Width);
2902
2903/// Does this tree qualify as an attempt to move a bitfield into position,
2904/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
2905static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
2906 bool BiggerPattern, SDValue &Src,
2907 int &DstLSB, int &Width) {
2908 EVT VT = Op.getValueType();
2909 unsigned BitWidth = VT.getSizeInBits();
2910 (void)BitWidth;
2911 assert(BitWidth == 32 || BitWidth == 64);
2912
2913 KnownBits Known = CurDAG->computeKnownBits(Op);
2914
2915 // Non-zero in the sense that they're not provably zero, which is the key
2916 // point if we want to use this value
2917 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2918 if (!isShiftedMask_64(Value: NonZeroBits))
2919 return false;
2920
2921 switch (Op.getOpcode()) {
2922 default:
2923 break;
2924 case ISD::AND:
2925 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
2926 NonZeroBits, Src, DstLSB, Width);
2927 case ISD::SHL:
2928 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
2929 NonZeroBits, Src, DstLSB, Width);
2930 }
2931
2932 return false;
2933}
2934
2935static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
2936 bool BiggerPattern,
2937 const uint64_t NonZeroBits,
2938 SDValue &Src, int &DstLSB,
2939 int &Width) {
2940 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
2941
2942 EVT VT = Op.getValueType();
2943 assert((VT == MVT::i32 || VT == MVT::i64) &&
2944 "Caller guarantees VT is one of i32 or i64");
2945 (void)VT;
2946
2947 uint64_t AndImm;
2948 if (!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::AND, Imm&: AndImm))
2949 return false;
2950
2951 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
2952 // 1) (AndImm & (1 << POS) == 0)
2953 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
2954 //
2955 // 1) and 2) don't agree so something must be wrong (e.g., in
2956 // 'SelectionDAG::computeKnownBits')
2957 assert((~AndImm & NonZeroBits) == 0 &&
2958 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
2959
2960 SDValue AndOp0 = Op.getOperand(i: 0);
2961
2962 uint64_t ShlImm;
2963 SDValue ShlOp0;
2964 if (isOpcWithIntImmediate(N: AndOp0.getNode(), Opc: ISD::SHL, Imm&: ShlImm)) {
2965 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
2966 ShlOp0 = AndOp0.getOperand(i: 0);
2967 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
2968 isOpcWithIntImmediate(AndOp0.getOperand(0).getNode(), ISD::SHL,
2969 ShlImm)) {
2970 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
2971
2972 // ShlVal == shl(val, N), which is a left shift on a smaller type.
2973 SDValue ShlVal = AndOp0.getOperand(i: 0);
2974
2975 // Since this is after type legalization and ShlVal is extended to MVT::i64,
2976 // expect VT to be MVT::i32.
2977 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
2978
2979 // Widens 'val' to MVT::i64 as the source of bit field positioning.
2980 ShlOp0 = Widen(CurDAG, N: ShlVal.getOperand(i: 0));
2981 } else
2982 return false;
2983
2984 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
2985 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
2986 // AndOp0+AND.
2987 if (!BiggerPattern && !AndOp0.hasOneUse())
2988 return false;
2989
2990 DstLSB = llvm::countr_zero(Val: NonZeroBits);
2991 Width = llvm::countr_one(Value: NonZeroBits >> DstLSB);
2992
2993 // Bail out on large Width. This happens when no proper combining / constant
2994 // folding was performed.
2995 if (Width >= (int)VT.getSizeInBits()) {
2996 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
2997 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
2998 // "val".
2999 // If VT is i32, what Width >= 32 means:
3000 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3001 // demands at least 'Width' bits (after dag-combiner). This together with
3002 // `any_extend` Op (undefined higher bits) indicates missed combination
3003 // when lowering the 'and' IR instruction to an machine IR instruction.
3004 LLVM_DEBUG(
3005 dbgs()
3006 << "Found large Width in bit-field-positioning -- this indicates no "
3007 "proper combining / constant folding was performed\n");
3008 return false;
3009 }
3010
3011 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3012 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3013 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3014 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3015 // which case it is not profitable to insert an extra shift.
3016 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3017 return false;
3018
3019 Src = getLeftShift(CurDAG, Op: ShlOp0, ShlAmount: ShlImm - DstLSB);
3020 return true;
3021}
3022
3023// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3024// UBFIZ.
3025static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op,
3026 SDValue &Src, int &DstLSB,
3027 int &Width) {
3028 // Caller should have verified that N is a left shift with constant shift
3029 // amount; asserts that.
3030 assert(Op.getOpcode() == ISD::SHL &&
3031 "Op.getNode() should be a SHL node to call this function");
3032 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3033 "Op.getNode() should shift ShlImm to call this function");
3034
3035 uint64_t AndImm = 0;
3036 SDValue Op0 = Op.getOperand(i: 0);
3037 if (!isOpcWithIntImmediate(N: Op0.getNode(), Opc: ISD::AND, Imm&: AndImm))
3038 return false;
3039
3040 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3041 if (isMask_64(Value: ShiftedAndImm)) {
3042 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3043 // should end with Mask, and could be prefixed with random bits if those
3044 // bits are shifted out.
3045 //
3046 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3047 // the AND result corresponding to those bits are shifted out, so it's fine
3048 // to not extract them.
3049 Width = llvm::countr_one(Value: ShiftedAndImm);
3050 DstLSB = ShlImm;
3051 Src = Op0.getOperand(i: 0);
3052 return true;
3053 }
3054 return false;
3055}
3056
3057static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3058 bool BiggerPattern,
3059 const uint64_t NonZeroBits,
3060 SDValue &Src, int &DstLSB,
3061 int &Width) {
3062 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3063
3064 EVT VT = Op.getValueType();
3065 assert((VT == MVT::i32 || VT == MVT::i64) &&
3066 "Caller guarantees that type is i32 or i64");
3067 (void)VT;
3068
3069 uint64_t ShlImm;
3070 if (!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::SHL, Imm&: ShlImm))
3071 return false;
3072
3073 if (!BiggerPattern && !Op.hasOneUse())
3074 return false;
3075
3076 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3077 return true;
3078
3079 DstLSB = llvm::countr_zero(Val: NonZeroBits);
3080 Width = llvm::countr_one(Value: NonZeroBits >> DstLSB);
3081
3082 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3083 return false;
3084
3085 Src = getLeftShift(CurDAG, Op: Op.getOperand(i: 0), ShlAmount: ShlImm - DstLSB);
3086 return true;
3087}
3088
3089static bool isShiftedMask(uint64_t Mask, EVT VT) {
3090 assert(VT == MVT::i32 || VT == MVT::i64);
3091 if (VT == MVT::i32)
3092 return isShiftedMask_32(Value: Mask);
3093 return isShiftedMask_64(Value: Mask);
3094}
3095
3096// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3097// inserted only sets known zero bits.
3098static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
3099 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3100
3101 EVT VT = N->getValueType(ResNo: 0);
3102 if (VT != MVT::i32 && VT != MVT::i64)
3103 return false;
3104
3105 unsigned BitWidth = VT.getSizeInBits();
3106
3107 uint64_t OrImm;
3108 if (!isOpcWithIntImmediate(N, Opc: ISD::OR, Imm&: OrImm))
3109 return false;
3110
3111 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3112 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3113 // performance neutral.
3114 if (AArch64_AM::isLogicalImmediate(imm: OrImm, regSize: BitWidth))
3115 return false;
3116
3117 uint64_t MaskImm;
3118 SDValue And = N->getOperand(Num: 0);
3119 // Must be a single use AND with an immediate operand.
3120 if (!And.hasOneUse() ||
3121 !isOpcWithIntImmediate(N: And.getNode(), Opc: ISD::AND, Imm&: MaskImm))
3122 return false;
3123
3124 // Compute the Known Zero for the AND as this allows us to catch more general
3125 // cases than just looking for AND with imm.
3126 KnownBits Known = CurDAG->computeKnownBits(Op: And);
3127
3128 // Non-zero in the sense that they're not provably zero, which is the key
3129 // point if we want to use this value.
3130 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3131
3132 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3133 if (!isShiftedMask(Mask: Known.Zero.getZExtValue(), VT))
3134 return false;
3135
3136 // The bits being inserted must only set those bits that are known to be zero.
3137 if ((OrImm & NotKnownZero) != 0) {
3138 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3139 // currently handle this case.
3140 return false;
3141 }
3142
3143 // BFI/BFXIL dst, src, #lsb, #width.
3144 int LSB = llvm::countr_one(Value: NotKnownZero);
3145 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3146
3147 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3148 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3149 unsigned ImmS = Width - 1;
3150
3151 // If we're creating a BFI instruction avoid cases where we need more
3152 // instructions to materialize the BFI constant as compared to the original
3153 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3154 // should be no worse in this case.
3155 bool IsBFI = LSB != 0;
3156 uint64_t BFIImm = OrImm >> LSB;
3157 if (IsBFI && !AArch64_AM::isLogicalImmediate(imm: BFIImm, regSize: BitWidth)) {
3158 // We have a BFI instruction and we know the constant can't be materialized
3159 // with a ORR-immediate with the zero register.
3160 unsigned OrChunks = 0, BFIChunks = 0;
3161 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3162 if (((OrImm >> Shift) & 0xFFFF) != 0)
3163 ++OrChunks;
3164 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3165 ++BFIChunks;
3166 }
3167 if (BFIChunks > OrChunks)
3168 return false;
3169 }
3170
3171 // Materialize the constant to be inserted.
3172 SDLoc DL(N);
3173 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3174 SDNode *MOVI = CurDAG->getMachineNode(
3175 Opcode: MOVIOpc, dl: DL, VT, Op1: CurDAG->getTargetConstant(Val: BFIImm, DL, VT));
3176
3177 // Create the BFI/BFXIL instruction.
3178 SDValue Ops[] = {And.getOperand(i: 0), SDValue(MOVI, 0),
3179 CurDAG->getTargetConstant(Val: ImmR, DL, VT),
3180 CurDAG->getTargetConstant(Val: ImmS, DL, VT)};
3181 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3182 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3183 return true;
3184}
3185
3186static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG,
3187 SDValue &ShiftedOperand,
3188 uint64_t &EncodedShiftImm) {
3189 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3190 if (!Dst.hasOneUse())
3191 return false;
3192
3193 EVT VT = Dst.getValueType();
3194 assert((VT == MVT::i32 || VT == MVT::i64) &&
3195 "Caller should guarantee that VT is one of i32 or i64");
3196 const unsigned SizeInBits = VT.getSizeInBits();
3197
3198 SDLoc DL(Dst.getNode());
3199 uint64_t AndImm, ShlImm;
3200 if (isOpcWithIntImmediate(N: Dst.getNode(), Opc: ISD::AND, Imm&: AndImm) &&
3201 isShiftedMask_64(Value: AndImm)) {
3202 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3203 SDValue DstOp0 = Dst.getOperand(i: 0);
3204 if (!DstOp0.hasOneUse())
3205 return false;
3206
3207 // An example to illustrate the transformation
3208 // From:
3209 // lsr x8, x1, #1
3210 // and x8, x8, #0x3f80
3211 // bfxil x8, x1, #0, #7
3212 // To:
3213 // and x8, x23, #0x7f
3214 // ubfx x9, x23, #8, #7
3215 // orr x23, x8, x9, lsl #7
3216 //
3217 // The number of instructions remains the same, but ORR is faster than BFXIL
3218 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3219 // the dependency chain is improved after the transformation.
3220 uint64_t SrlImm;
3221 if (isOpcWithIntImmediate(N: DstOp0.getNode(), Opc: ISD::SRL, Imm&: SrlImm)) {
3222 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(Val: AndImm);
3223 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3224 unsigned MaskWidth =
3225 llvm::countr_one(Value: AndImm >> NumTrailingZeroInShiftedMask);
3226 unsigned UBFMOpc =
3227 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3228 SDNode *UBFMNode = CurDAG->getMachineNode(
3229 Opcode: UBFMOpc, dl: DL, VT, Op1: DstOp0.getOperand(i: 0),
3230 Op2: CurDAG->getTargetConstant(Val: SrlImm + NumTrailingZeroInShiftedMask, DL,
3231 VT),
3232 Op3: CurDAG->getTargetConstant(
3233 Val: SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3234 ShiftedOperand = SDValue(UBFMNode, 0);
3235 EncodedShiftImm = AArch64_AM::getShifterImm(
3236 ST: AArch64_AM::LSL, Imm: NumTrailingZeroInShiftedMask);
3237 return true;
3238 }
3239 }
3240 return false;
3241 }
3242
3243 if (isOpcWithIntImmediate(N: Dst.getNode(), Opc: ISD::SHL, Imm&: ShlImm)) {
3244 ShiftedOperand = Dst.getOperand(i: 0);
3245 EncodedShiftImm = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShlImm);
3246 return true;
3247 }
3248
3249 uint64_t SrlImm;
3250 if (isOpcWithIntImmediate(N: Dst.getNode(), Opc: ISD::SRL, Imm&: SrlImm)) {
3251 ShiftedOperand = Dst.getOperand(i: 0);
3252 EncodedShiftImm = AArch64_AM::getShifterImm(ST: AArch64_AM::LSR, Imm: SrlImm);
3253 return true;
3254 }
3255 return false;
3256}
3257
3258// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3259// the operands and select it to AArch64::ORR with shifted registers if
3260// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3261static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3262 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3263 const bool BiggerPattern) {
3264 EVT VT = N->getValueType(ResNo: 0);
3265 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3266 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3267 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3268 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3269 assert((VT == MVT::i32 || VT == MVT::i64) &&
3270 "Expect result type to be i32 or i64 since N is combinable to BFM");
3271 SDLoc DL(N);
3272
3273 // Bail out if BFM simplifies away one node in BFM Dst.
3274 if (OrOpd1 != Dst)
3275 return false;
3276
3277 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3278 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3279 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3280 if (BiggerPattern) {
3281 uint64_t SrcAndImm;
3282 if (isOpcWithIntImmediate(N: OrOpd0.getNode(), Opc: ISD::AND, Imm&: SrcAndImm) &&
3283 isMask_64(Value: SrcAndImm) && OrOpd0.getOperand(i: 0) == Src) {
3284 // OrOpd0 = AND Src, #Mask
3285 // So BFM simplifies away one AND node from Src and doesn't simplify away
3286 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3287 // one node (from Rd), ORR is better since it has higher throughput and
3288 // smaller latency than BFM on many AArch64 processors (and for the rest
3289 // ORR is at least as good as BFM).
3290 SDValue ShiftedOperand;
3291 uint64_t EncodedShiftImm;
3292 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3293 EncodedShiftImm)) {
3294 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3295 CurDAG->getTargetConstant(Val: EncodedShiftImm, DL, VT)};
3296 CurDAG->SelectNodeTo(N, MachineOpc: OrrOpc, VT, Ops);
3297 return true;
3298 }
3299 }
3300 return false;
3301 }
3302
3303 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3304
3305 uint64_t ShlImm;
3306 if (isOpcWithIntImmediate(N: OrOpd0.getNode(), Opc: ISD::SHL, Imm&: ShlImm)) {
3307 if (OrOpd0.getOperand(i: 0) == Src && OrOpd0.hasOneUse()) {
3308 SDValue Ops[] = {
3309 Dst, Src,
3310 CurDAG->getTargetConstant(
3311 Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShlImm), DL, VT)};
3312 CurDAG->SelectNodeTo(N, MachineOpc: OrrOpc, VT, Ops);
3313 return true;
3314 }
3315
3316 // Select the following pattern to left-shifted operand rather than BFI.
3317 // %val1 = op ..
3318 // %val2 = shl %val1, #imm
3319 // %res = or %val1, %val2
3320 //
3321 // If N is selected to be BFI, we know that
3322 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3323 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3324 //
3325 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3326 if (OrOpd0.getOperand(i: 0) == OrOpd1) {
3327 SDValue Ops[] = {
3328 OrOpd1, OrOpd1,
3329 CurDAG->getTargetConstant(
3330 Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShlImm), DL, VT)};
3331 CurDAG->SelectNodeTo(N, MachineOpc: OrrOpc, VT, Ops);
3332 return true;
3333 }
3334 }
3335
3336 uint64_t SrlImm;
3337 if (isOpcWithIntImmediate(N: OrOpd0.getNode(), Opc: ISD::SRL, Imm&: SrlImm)) {
3338 // Select the following pattern to right-shifted operand rather than BFXIL.
3339 // %val1 = op ..
3340 // %val2 = lshr %val1, #imm
3341 // %res = or %val1, %val2
3342 //
3343 // If N is selected to be BFXIL, we know that
3344 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3345 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3346 //
3347 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3348 if (OrOpd0.getOperand(i: 0) == OrOpd1) {
3349 SDValue Ops[] = {
3350 OrOpd1, OrOpd1,
3351 CurDAG->getTargetConstant(
3352 Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSR, Imm: SrlImm), DL, VT)};
3353 CurDAG->SelectNodeTo(N, MachineOpc: OrrOpc, VT, Ops);
3354 return true;
3355 }
3356 }
3357
3358 return false;
3359}
3360
3361static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3362 SelectionDAG *CurDAG) {
3363 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3364
3365 EVT VT = N->getValueType(ResNo: 0);
3366 if (VT != MVT::i32 && VT != MVT::i64)
3367 return false;
3368
3369 unsigned BitWidth = VT.getSizeInBits();
3370
3371 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3372 // have the expected shape. Try to undo that.
3373
3374 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3375 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3376
3377 // Given a OR operation, check if we have the following pattern
3378 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3379 // isBitfieldExtractOp)
3380 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3381 // countTrailingZeros(mask2) == imm2 - imm + 1
3382 // f = d | c
3383 // if yes, replace the OR instruction with:
3384 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3385
3386 // OR is commutative, check all combinations of operand order and values of
3387 // BiggerPattern, i.e.
3388 // Opd0, Opd1, BiggerPattern=false
3389 // Opd1, Opd0, BiggerPattern=false
3390 // Opd0, Opd1, BiggerPattern=true
3391 // Opd1, Opd0, BiggerPattern=true
3392 // Several of these combinations may match, so check with BiggerPattern=false
3393 // first since that will produce better results by matching more instructions
3394 // and/or inserting fewer extra instructions.
3395 for (int I = 0; I < 4; ++I) {
3396
3397 SDValue Dst, Src;
3398 unsigned ImmR, ImmS;
3399 bool BiggerPattern = I / 2;
3400 SDValue OrOpd0Val = N->getOperand(Num: I % 2);
3401 SDNode *OrOpd0 = OrOpd0Val.getNode();
3402 SDValue OrOpd1Val = N->getOperand(Num: (I + 1) % 2);
3403 SDNode *OrOpd1 = OrOpd1Val.getNode();
3404
3405 unsigned BFXOpc;
3406 int DstLSB, Width;
3407 if (isBitfieldExtractOp(CurDAG, N: OrOpd0, Opc&: BFXOpc, Opd0&: Src, Immr&: ImmR, Imms&: ImmS,
3408 NumberOfIgnoredLowBits, BiggerPattern)) {
3409 // Check that the returned opcode is compatible with the pattern,
3410 // i.e., same type and zero extended (U and not S)
3411 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3412 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3413 continue;
3414
3415 // Compute the width of the bitfield insertion
3416 DstLSB = 0;
3417 Width = ImmS - ImmR + 1;
3418 // FIXME: This constraint is to catch bitfield insertion we may
3419 // want to widen the pattern if we want to grab general bitfied
3420 // move case
3421 if (Width <= 0)
3422 continue;
3423
3424 // If the mask on the insertee is correct, we have a BFXIL operation. We
3425 // can share the ImmR and ImmS values from the already-computed UBFM.
3426 } else if (isBitfieldPositioningOp(CurDAG, Op: OrOpd0Val,
3427 BiggerPattern,
3428 Src, DstLSB, Width)) {
3429 ImmR = (BitWidth - DstLSB) % BitWidth;
3430 ImmS = Width - 1;
3431 } else
3432 continue;
3433
3434 // Check the second part of the pattern
3435 EVT VT = OrOpd1Val.getValueType();
3436 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3437
3438 // Compute the Known Zero for the candidate of the first operand.
3439 // This allows to catch more general case than just looking for
3440 // AND with imm. Indeed, simplify-demanded-bits may have removed
3441 // the AND instruction because it proves it was useless.
3442 KnownBits Known = CurDAG->computeKnownBits(Op: OrOpd1Val);
3443
3444 // Check if there is enough room for the second operand to appear
3445 // in the first one
3446 APInt BitsToBeInserted =
3447 APInt::getBitsSet(numBits: Known.getBitWidth(), loBit: DstLSB, hiBit: DstLSB + Width);
3448
3449 if ((BitsToBeInserted & ~Known.Zero) != 0)
3450 continue;
3451
3452 // Set the first operand
3453 uint64_t Imm;
3454 if (isOpcWithIntImmediate(N: OrOpd1, Opc: ISD::AND, Imm) &&
3455 isBitfieldDstMask(DstMask: Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3456 // In that case, we can eliminate the AND
3457 Dst = OrOpd1->getOperand(Num: 0);
3458 else
3459 // Maybe the AND has been removed by simplify-demanded-bits
3460 // or is useful because it discards more bits
3461 Dst = OrOpd1Val;
3462
3463 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3464 // with shifted operand is more efficient.
3465 if (tryOrrWithShift(N, OrOpd0: OrOpd0Val, OrOpd1: OrOpd1Val, Src, Dst, CurDAG,
3466 BiggerPattern))
3467 return true;
3468
3469 // both parts match
3470 SDLoc DL(N);
3471 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(Val: ImmR, DL, VT),
3472 CurDAG->getTargetConstant(Val: ImmS, DL, VT)};
3473 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3474 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3475 return true;
3476 }
3477
3478 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3479 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3480 // mask (e.g., 0x000ffff0).
3481 uint64_t Mask0Imm, Mask1Imm;
3482 SDValue And0 = N->getOperand(Num: 0);
3483 SDValue And1 = N->getOperand(Num: 1);
3484 if (And0.hasOneUse() && And1.hasOneUse() &&
3485 isOpcWithIntImmediate(N: And0.getNode(), Opc: ISD::AND, Imm&: Mask0Imm) &&
3486 isOpcWithIntImmediate(N: And1.getNode(), Opc: ISD::AND, Imm&: Mask1Imm) &&
3487 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3488 (isShiftedMask(Mask: Mask0Imm, VT) || isShiftedMask(Mask: Mask1Imm, VT))) {
3489
3490 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3491 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3492 // bits to be inserted.
3493 if (isShiftedMask(Mask: Mask0Imm, VT)) {
3494 std::swap(a&: And0, b&: And1);
3495 std::swap(a&: Mask0Imm, b&: Mask1Imm);
3496 }
3497
3498 SDValue Src = And1->getOperand(Num: 0);
3499 SDValue Dst = And0->getOperand(Num: 0);
3500 unsigned LSB = llvm::countr_zero(Val: Mask1Imm);
3501 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3502
3503 // The BFXIL inserts the low-order bits from a source register, so right
3504 // shift the needed bits into place.
3505 SDLoc DL(N);
3506 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3507 uint64_t LsrImm = LSB;
3508 if (Src->hasOneUse() &&
3509 isOpcWithIntImmediate(N: Src.getNode(), Opc: ISD::SRL, Imm&: LsrImm) &&
3510 (LsrImm + LSB) < BitWidth) {
3511 Src = Src->getOperand(Num: 0);
3512 LsrImm += LSB;
3513 }
3514
3515 SDNode *LSR = CurDAG->getMachineNode(
3516 Opcode: ShiftOpc, dl: DL, VT, Op1: Src, Op2: CurDAG->getTargetConstant(Val: LsrImm, DL, VT),
3517 Op3: CurDAG->getTargetConstant(Val: BitWidth - 1, DL, VT));
3518
3519 // BFXIL is an alias of BFM, so translate to BFM operands.
3520 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3521 unsigned ImmS = Width - 1;
3522
3523 // Create the BFXIL instruction.
3524 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3525 CurDAG->getTargetConstant(Val: ImmR, DL, VT),
3526 CurDAG->getTargetConstant(Val: ImmS, DL, VT)};
3527 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3528 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3529 return true;
3530 }
3531
3532 return false;
3533}
3534
3535bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3536 if (N->getOpcode() != ISD::OR)
3537 return false;
3538
3539 APInt NUsefulBits;
3540 getUsefulBits(Op: SDValue(N, 0), UsefulBits&: NUsefulBits);
3541
3542 // If all bits are not useful, just return UNDEF.
3543 if (!NUsefulBits) {
3544 CurDAG->SelectNodeTo(N, MachineOpc: TargetOpcode::IMPLICIT_DEF, VT: N->getValueType(ResNo: 0));
3545 return true;
3546 }
3547
3548 if (tryBitfieldInsertOpFromOr(N, UsefulBits: NUsefulBits, CurDAG))
3549 return true;
3550
3551 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3552}
3553
3554/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3555/// equivalent of a left shift by a constant amount followed by an and masking
3556/// out a contiguous set of bits.
3557bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3558 if (N->getOpcode() != ISD::AND)
3559 return false;
3560
3561 EVT VT = N->getValueType(ResNo: 0);
3562 if (VT != MVT::i32 && VT != MVT::i64)
3563 return false;
3564
3565 SDValue Op0;
3566 int DstLSB, Width;
3567 if (!isBitfieldPositioningOp(CurDAG, Op: SDValue(N, 0), /*BiggerPattern=*/false,
3568 Src&: Op0, DstLSB, Width))
3569 return false;
3570
3571 // ImmR is the rotate right amount.
3572 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3573 // ImmS is the most significant bit of the source to be moved.
3574 unsigned ImmS = Width - 1;
3575
3576 SDLoc DL(N);
3577 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(Val: ImmR, DL, VT),
3578 CurDAG->getTargetConstant(Val: ImmS, DL, VT)};
3579 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3580 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3581 return true;
3582}
3583
3584/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3585/// variable shift/rotate instructions.
3586bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3587 EVT VT = N->getValueType(ResNo: 0);
3588
3589 unsigned Opc;
3590 switch (N->getOpcode()) {
3591 case ISD::ROTR:
3592 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3593 break;
3594 case ISD::SHL:
3595 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3596 break;
3597 case ISD::SRL:
3598 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3599 break;
3600 case ISD::SRA:
3601 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3602 break;
3603 default:
3604 return false;
3605 }
3606
3607 uint64_t Size;
3608 uint64_t Bits;
3609 if (VT == MVT::i32) {
3610 Bits = 5;
3611 Size = 32;
3612 } else if (VT == MVT::i64) {
3613 Bits = 6;
3614 Size = 64;
3615 } else
3616 return false;
3617
3618 SDValue ShiftAmt = N->getOperand(Num: 1);
3619 SDLoc DL(N);
3620 SDValue NewShiftAmt;
3621
3622 // Skip over an extend of the shift amount.
3623 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3624 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3625 ShiftAmt = ShiftAmt->getOperand(Num: 0);
3626
3627 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3628 SDValue Add0 = ShiftAmt->getOperand(Num: 0);
3629 SDValue Add1 = ShiftAmt->getOperand(Num: 1);
3630 uint64_t Add0Imm;
3631 uint64_t Add1Imm;
3632 if (isIntImmediate(N: Add1, Imm&: Add1Imm) && (Add1Imm % Size == 0)) {
3633 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3634 // to avoid the ADD/SUB.
3635 NewShiftAmt = Add0;
3636 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3637 isIntImmediate(N: Add0, Imm&: Add0Imm) && Add0Imm != 0 &&
3638 (Add0Imm % Size == 0)) {
3639 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3640 // to generate a NEG instead of a SUB from a constant.
3641 unsigned NegOpc;
3642 unsigned ZeroReg;
3643 EVT SubVT = ShiftAmt->getValueType(ResNo: 0);
3644 if (SubVT == MVT::i32) {
3645 NegOpc = AArch64::SUBWrr;
3646 ZeroReg = AArch64::WZR;
3647 } else {
3648 assert(SubVT == MVT::i64);
3649 NegOpc = AArch64::SUBXrr;
3650 ZeroReg = AArch64::XZR;
3651 }
3652 SDValue Zero =
3653 CurDAG->getCopyFromReg(Chain: CurDAG->getEntryNode(), dl: DL, Reg: ZeroReg, VT: SubVT);
3654 MachineSDNode *Neg =
3655 CurDAG->getMachineNode(Opcode: NegOpc, dl: DL, VT: SubVT, Op1: Zero, Op2: Add1);
3656 NewShiftAmt = SDValue(Neg, 0);
3657 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3658 isIntImmediate(N: Add0, Imm&: Add0Imm) && (Add0Imm % Size == Size - 1)) {
3659 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3660 // to generate a NOT instead of a SUB from a constant.
3661 unsigned NotOpc;
3662 unsigned ZeroReg;
3663 EVT SubVT = ShiftAmt->getValueType(ResNo: 0);
3664 if (SubVT == MVT::i32) {
3665 NotOpc = AArch64::ORNWrr;
3666 ZeroReg = AArch64::WZR;
3667 } else {
3668 assert(SubVT == MVT::i64);
3669 NotOpc = AArch64::ORNXrr;
3670 ZeroReg = AArch64::XZR;
3671 }
3672 SDValue Zero =
3673 CurDAG->getCopyFromReg(Chain: CurDAG->getEntryNode(), dl: DL, Reg: ZeroReg, VT: SubVT);
3674 MachineSDNode *Not =
3675 CurDAG->getMachineNode(Opcode: NotOpc, dl: DL, VT: SubVT, Op1: Zero, Op2: Add1);
3676 NewShiftAmt = SDValue(Not, 0);
3677 } else
3678 return false;
3679 } else {
3680 // If the shift amount is masked with an AND, check that the mask covers the
3681 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3682 // the AND.
3683 uint64_t MaskImm;
3684 if (!isOpcWithIntImmediate(N: ShiftAmt.getNode(), Opc: ISD::AND, Imm&: MaskImm) &&
3685 !isOpcWithIntImmediate(N: ShiftAmt.getNode(), Opc: AArch64ISD::ANDS, Imm&: MaskImm))
3686 return false;
3687
3688 if ((unsigned)llvm::countr_one(Value: MaskImm) < Bits)
3689 return false;
3690
3691 NewShiftAmt = ShiftAmt->getOperand(Num: 0);
3692 }
3693
3694 // Narrow/widen the shift amount to match the size of the shift operation.
3695 if (VT == MVT::i32)
3696 NewShiftAmt = narrowIfNeeded(CurDAG, N: NewShiftAmt);
3697 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3698 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3699 MachineSDNode *Ext = CurDAG->getMachineNode(
3700 AArch64::SUBREG_TO_REG, DL, VT,
3701 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3702 NewShiftAmt = SDValue(Ext, 0);
3703 }
3704
3705 SDValue Ops[] = {N->getOperand(Num: 0), NewShiftAmt};
3706 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3707 return true;
3708}
3709
3710static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N,
3711 SDValue &FixedPos,
3712 unsigned RegWidth,
3713 bool isReciprocal) {
3714 APFloat FVal(0.0);
3715 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Val&: N))
3716 FVal = CN->getValueAPF();
3717 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(Val&: N)) {
3718 // Some otherwise illegal constants are allowed in this case.
3719 if (LN->getOperand(Num: 1).getOpcode() != AArch64ISD::ADDlow ||
3720 !isa<ConstantPoolSDNode>(Val: LN->getOperand(Num: 1)->getOperand(Num: 1)))
3721 return false;
3722
3723 ConstantPoolSDNode *CN =
3724 dyn_cast<ConstantPoolSDNode>(Val: LN->getOperand(Num: 1)->getOperand(Num: 1));
3725 FVal = cast<ConstantFP>(Val: CN->getConstVal())->getValueAPF();
3726 } else
3727 return false;
3728
3729 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
3730 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
3731 // x-register.
3732 //
3733 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
3734 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
3735 // integers.
3736 bool IsExact;
3737
3738 if (isReciprocal)
3739 if (!FVal.getExactInverse(inv: &FVal))
3740 return false;
3741
3742 // fbits is between 1 and 64 in the worst-case, which means the fmul
3743 // could have 2^64 as an actual operand. Need 65 bits of precision.
3744 APSInt IntVal(65, true);
3745 FVal.convertToInteger(Result&: IntVal, RM: APFloat::rmTowardZero, IsExact: &IsExact);
3746
3747 // N.b. isPowerOf2 also checks for > 0.
3748 if (!IsExact || !IntVal.isPowerOf2())
3749 return false;
3750 unsigned FBits = IntVal.logBase2();
3751
3752 // Checks above should have guaranteed that we haven't lost information in
3753 // finding FBits, but it must still be in range.
3754 if (FBits == 0 || FBits > RegWidth) return false;
3755
3756 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
3757 return true;
3758}
3759
3760bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
3761 unsigned RegWidth) {
3762 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3763 isReciprocal: false);
3764}
3765
3766bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
3767 SDValue &FixedPos,
3768 unsigned RegWidth) {
3769 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3770 isReciprocal: true);
3771}
3772
3773// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
3774// of the string and obtains the integer values from them and combines these
3775// into a single value to be used in the MRS/MSR instruction.
3776static int getIntOperandFromRegisterString(StringRef RegString) {
3777 SmallVector<StringRef, 5> Fields;
3778 RegString.split(A&: Fields, Separator: ':');
3779
3780 if (Fields.size() == 1)
3781 return -1;
3782
3783 assert(Fields.size() == 5
3784 && "Invalid number of fields in read register string");
3785
3786 SmallVector<int, 5> Ops;
3787 bool AllIntFields = true;
3788
3789 for (StringRef Field : Fields) {
3790 unsigned IntField;
3791 AllIntFields &= !Field.getAsInteger(Radix: 10, Result&: IntField);
3792 Ops.push_back(Elt: IntField);
3793 }
3794
3795 assert(AllIntFields &&
3796 "Unexpected non-integer value in special register string.");
3797 (void)AllIntFields;
3798
3799 // Need to combine the integer fields of the string into a single value
3800 // based on the bit encoding of MRS/MSR instruction.
3801 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
3802 (Ops[3] << 3) | (Ops[4]);
3803}
3804
3805// Lower the read_register intrinsic to an MRS instruction node if the special
3806// register string argument is either of the form detailed in the ALCE (the
3807// form described in getIntOperandsFromRegsterString) or is a named register
3808// known by the MRS SysReg mapper.
3809bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
3810 const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: 1));
3811 const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: 0));
3812 SDLoc DL(N);
3813
3814 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
3815
3816 unsigned Opcode64Bit = AArch64::MRS;
3817 int Imm = getIntOperandFromRegisterString(RegString: RegString->getString());
3818 if (Imm == -1) {
3819 // No match, Use the sysreg mapper to map the remaining possible strings to
3820 // the value for the register to be used for the instruction operand.
3821 const auto *TheReg =
3822 AArch64SysReg::lookupSysRegByName(RegString->getString());
3823 if (TheReg && TheReg->Readable &&
3824 TheReg->haveFeatures(ActiveFeatures: Subtarget->getFeatureBits()))
3825 Imm = TheReg->Encoding;
3826 else
3827 Imm = AArch64SysReg::parseGenericRegister(Name: RegString->getString());
3828
3829 if (Imm == -1) {
3830 // Still no match, see if this is "pc" or give up.
3831 if (!ReadIs128Bit && RegString->getString() == "pc") {
3832 Opcode64Bit = AArch64::ADR;
3833 Imm = 0;
3834 } else {
3835 return false;
3836 }
3837 }
3838 }
3839
3840 SDValue InChain = N->getOperand(Num: 0);
3841 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
3842 if (!ReadIs128Bit) {
3843 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
3844 {SysRegImm, InChain});
3845 } else {
3846 SDNode *MRRS = CurDAG->getMachineNode(
3847 AArch64::MRRS, DL,
3848 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
3849 {SysRegImm, InChain});
3850
3851 // Sysregs are not endian. The even register always contains the low half
3852 // of the register.
3853 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
3854 SDValue(MRRS, 0));
3855 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
3856 SDValue(MRRS, 0));
3857 SDValue OutChain = SDValue(MRRS, 1);
3858
3859 ReplaceUses(F: SDValue(N, 0), T: Lo);
3860 ReplaceUses(F: SDValue(N, 1), T: Hi);
3861 ReplaceUses(F: SDValue(N, 2), T: OutChain);
3862 };
3863 return true;
3864}
3865
3866// Lower the write_register intrinsic to an MSR instruction node if the special
3867// register string argument is either of the form detailed in the ALCE (the
3868// form described in getIntOperandsFromRegsterString) or is a named register
3869// known by the MSR SysReg mapper.
3870bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
3871 const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: 1));
3872 const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: 0));
3873 SDLoc DL(N);
3874
3875 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
3876
3877 if (!WriteIs128Bit) {
3878 // Check if the register was one of those allowed as the pstatefield value
3879 // in the MSR (immediate) instruction. To accept the values allowed in the
3880 // pstatefield for the MSR (immediate) instruction, we also require that an
3881 // immediate value has been provided as an argument, we know that this is
3882 // the case as it has been ensured by semantic checking.
3883 auto trySelectPState = [&](auto PMapper, unsigned State) {
3884 if (PMapper) {
3885 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
3886 "Expected a constant integer expression.");
3887 unsigned Reg = PMapper->Encoding;
3888 uint64_t Immed = N->getConstantOperandVal(Num: 2);
3889 CurDAG->SelectNodeTo(
3890 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
3891 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
3892 return true;
3893 }
3894 return false;
3895 };
3896
3897 if (trySelectPState(
3898 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
3899 AArch64::MSRpstateImm4))
3900 return true;
3901 if (trySelectPState(
3902 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
3903 AArch64::MSRpstateImm1))
3904 return true;
3905 }
3906
3907 int Imm = getIntOperandFromRegisterString(RegString: RegString->getString());
3908 if (Imm == -1) {
3909 // Use the sysreg mapper to attempt to map the remaining possible strings
3910 // to the value for the register to be used for the MSR (register)
3911 // instruction operand.
3912 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
3913 if (TheReg && TheReg->Writeable &&
3914 TheReg->haveFeatures(ActiveFeatures: Subtarget->getFeatureBits()))
3915 Imm = TheReg->Encoding;
3916 else
3917 Imm = AArch64SysReg::parseGenericRegister(Name: RegString->getString());
3918
3919 if (Imm == -1)
3920 return false;
3921 }
3922
3923 SDValue InChain = N->getOperand(Num: 0);
3924 if (!WriteIs128Bit) {
3925 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
3926 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
3927 N->getOperand(2), InChain);
3928 } else {
3929 // No endian swap. The lower half always goes into the even subreg, and the
3930 // higher half always into the odd supreg.
3931 SDNode *Pair = CurDAG->getMachineNode(
3932 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
3933 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
3934 MVT::i32),
3935 N->getOperand(2),
3936 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
3937 N->getOperand(3),
3938 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
3939
3940 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
3941 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
3942 SDValue(Pair, 0), InChain);
3943 }
3944
3945 return true;
3946}
3947
3948/// We've got special pseudo-instructions for these
3949bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3950 unsigned Opcode;
3951 EVT MemTy = cast<MemSDNode>(Val: N)->getMemoryVT();
3952
3953 // Leave IR for LSE if subtarget supports it.
3954 if (Subtarget->hasLSE()) return false;
3955
3956 if (MemTy == MVT::i8)
3957 Opcode = AArch64::CMP_SWAP_8;
3958 else if (MemTy == MVT::i16)
3959 Opcode = AArch64::CMP_SWAP_16;
3960 else if (MemTy == MVT::i32)
3961 Opcode = AArch64::CMP_SWAP_32;
3962 else if (MemTy == MVT::i64)
3963 Opcode = AArch64::CMP_SWAP_64;
3964 else
3965 llvm_unreachable("Unknown AtomicCmpSwap type");
3966
3967 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
3968 SDValue Ops[] = {N->getOperand(Num: 1), N->getOperand(Num: 2), N->getOperand(Num: 3),
3969 N->getOperand(Num: 0)};
3970 SDNode *CmpSwap = CurDAG->getMachineNode(
3971 Opcode, SDLoc(N),
3972 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
3973
3974 MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
3975 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: CmpSwap), NewMemRefs: {MemOp});
3976
3977 ReplaceUses(F: SDValue(N, 0), T: SDValue(CmpSwap, 0));
3978 ReplaceUses(F: SDValue(N, 1), T: SDValue(CmpSwap, 2));
3979 CurDAG->RemoveDeadNode(N);
3980
3981 return true;
3982}
3983
3984bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
3985 SDValue &Shift) {
3986 if (!isa<ConstantSDNode>(Val: N))
3987 return false;
3988
3989 SDLoc DL(N);
3990 uint64_t Val = cast<ConstantSDNode>(Val&: N)
3991 ->getAPIntValue()
3992 .trunc(width: VT.getFixedSizeInBits())
3993 .getZExtValue();
3994
3995 switch (VT.SimpleTy) {
3996 case MVT::i8:
3997 // All immediates are supported.
3998 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
3999 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4000 return true;
4001 case MVT::i16:
4002 case MVT::i32:
4003 case MVT::i64:
4004 // Support 8bit unsigned immediates.
4005 if (Val <= 255) {
4006 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4007 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4008 return true;
4009 }
4010 // Support 16bit unsigned immediates that are a multiple of 256.
4011 if (Val <= 65280 && Val % 256 == 0) {
4012 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4013 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4014 return true;
4015 }
4016 break;
4017 default:
4018 break;
4019 }
4020
4021 return false;
4022}
4023
4024bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4025 SDValue &Imm, SDValue &Shift,
4026 bool Negate) {
4027 if (!isa<ConstantSDNode>(Val: N))
4028 return false;
4029
4030 SDLoc DL(N);
4031 int64_t Val = cast<ConstantSDNode>(Val&: N)
4032 ->getAPIntValue()
4033 .trunc(width: VT.getFixedSizeInBits())
4034 .getSExtValue();
4035
4036 if (Negate)
4037 Val = -Val;
4038
4039 // Signed saturating instructions treat their immediate operand as unsigned,
4040 // whereas the related intrinsics define their operands to be signed. This
4041 // means we can only use the immediate form when the operand is non-negative.
4042 if (Val < 0)
4043 return false;
4044
4045 switch (VT.SimpleTy) {
4046 case MVT::i8:
4047 // All positive immediates are supported.
4048 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4049 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4050 return true;
4051 case MVT::i16:
4052 case MVT::i32:
4053 case MVT::i64:
4054 // Support 8bit positive immediates.
4055 if (Val <= 255) {
4056 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4057 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4058 return true;
4059 }
4060 // Support 16bit positive immediates that are a multiple of 256.
4061 if (Val <= 65280 && Val % 256 == 0) {
4062 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4063 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4064 return true;
4065 }
4066 break;
4067 default:
4068 break;
4069 }
4070
4071 return false;
4072}
4073
4074bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4075 SDValue &Shift) {
4076 if (!isa<ConstantSDNode>(Val: N))
4077 return false;
4078
4079 SDLoc DL(N);
4080 int64_t Val = cast<ConstantSDNode>(Val&: N)
4081 ->getAPIntValue()
4082 .trunc(width: VT.getFixedSizeInBits())
4083 .getSExtValue();
4084
4085 switch (VT.SimpleTy) {
4086 case MVT::i8:
4087 // All immediates are supported.
4088 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4089 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4090 return true;
4091 case MVT::i16:
4092 case MVT::i32:
4093 case MVT::i64:
4094 // Support 8bit signed immediates.
4095 if (Val >= -128 && Val <= 127) {
4096 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4097 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4098 return true;
4099 }
4100 // Support 16bit signed immediates that are a multiple of 256.
4101 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
4102 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4103 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
4104 return true;
4105 }
4106 break;
4107 default:
4108 break;
4109 }
4110
4111 return false;
4112}
4113
4114bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4115 if (auto CNode = dyn_cast<ConstantSDNode>(Val&: N)) {
4116 int64_t ImmVal = CNode->getSExtValue();
4117 SDLoc DL(N);
4118 if (ImmVal >= -128 && ImmVal < 128) {
4119 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4120 return true;
4121 }
4122 }
4123 return false;
4124}
4125
4126bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4127 if (auto CNode = dyn_cast<ConstantSDNode>(Val&: N)) {
4128 uint64_t ImmVal = CNode->getZExtValue();
4129
4130 switch (VT.SimpleTy) {
4131 case MVT::i8:
4132 ImmVal &= 0xFF;
4133 break;
4134 case MVT::i16:
4135 ImmVal &= 0xFFFF;
4136 break;
4137 case MVT::i32:
4138 ImmVal &= 0xFFFFFFFF;
4139 break;
4140 case MVT::i64:
4141 break;
4142 default:
4143 llvm_unreachable("Unexpected type");
4144 }
4145
4146 if (ImmVal < 256) {
4147 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4148 return true;
4149 }
4150 }
4151 return false;
4152}
4153
4154bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4155 bool Invert) {
4156 if (auto CNode = dyn_cast<ConstantSDNode>(Val&: N)) {
4157 uint64_t ImmVal = CNode->getZExtValue();
4158 SDLoc DL(N);
4159
4160 if (Invert)
4161 ImmVal = ~ImmVal;
4162
4163 // Shift mask depending on type size.
4164 switch (VT.SimpleTy) {
4165 case MVT::i8:
4166 ImmVal &= 0xFF;
4167 ImmVal |= ImmVal << 8;
4168 ImmVal |= ImmVal << 16;
4169 ImmVal |= ImmVal << 32;
4170 break;
4171 case MVT::i16:
4172 ImmVal &= 0xFFFF;
4173 ImmVal |= ImmVal << 16;
4174 ImmVal |= ImmVal << 32;
4175 break;
4176 case MVT::i32:
4177 ImmVal &= 0xFFFFFFFF;
4178 ImmVal |= ImmVal << 32;
4179 break;
4180 case MVT::i64:
4181 break;
4182 default:
4183 llvm_unreachable("Unexpected type");
4184 }
4185
4186 uint64_t encoding;
4187 if (AArch64_AM::processLogicalImmediate(Imm: ImmVal, RegSize: 64, Encoding&: encoding)) {
4188 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
4189 return true;
4190 }
4191 }
4192 return false;
4193}
4194
4195// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4196// Rather than attempt to normalise everything we can sometimes saturate the
4197// shift amount during selection. This function also allows for consistent
4198// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4199// required by the instructions.
4200bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4201 uint64_t High, bool AllowSaturation,
4202 SDValue &Imm) {
4203 if (auto *CN = dyn_cast<ConstantSDNode>(Val&: N)) {
4204 uint64_t ImmVal = CN->getZExtValue();
4205
4206 // Reject shift amounts that are too small.
4207 if (ImmVal < Low)
4208 return false;
4209
4210 // Reject or saturate shift amounts that are too big.
4211 if (ImmVal > High) {
4212 if (!AllowSaturation)
4213 return false;
4214 ImmVal = High;
4215 }
4216
4217 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4218 return true;
4219 }
4220
4221 return false;
4222}
4223
4224bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4225 // tagp(FrameIndex, IRGstack, tag_offset):
4226 // since the offset between FrameIndex and IRGstack is a compile-time
4227 // constant, this can be lowered to a single ADDG instruction.
4228 if (!(isa<FrameIndexSDNode>(Val: N->getOperand(Num: 1)))) {
4229 return false;
4230 }
4231
4232 SDValue IRG_SP = N->getOperand(Num: 2);
4233 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4234 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4235 return false;
4236 }
4237
4238 const TargetLowering *TLI = getTargetLowering();
4239 SDLoc DL(N);
4240 int FI = cast<FrameIndexSDNode>(Val: N->getOperand(Num: 1))->getIndex();
4241 SDValue FiOp = CurDAG->getTargetFrameIndex(
4242 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
4243 int TagOffset = N->getConstantOperandVal(Num: 3);
4244
4245 SDNode *Out = CurDAG->getMachineNode(
4246 AArch64::TAGPstack, DL, MVT::i64,
4247 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4248 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4249 ReplaceNode(F: N, T: Out);
4250 return true;
4251}
4252
4253void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4254 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4255 "llvm.aarch64.tagp third argument must be an immediate");
4256 if (trySelectStackSlotTagP(N))
4257 return;
4258 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4259 // compile-time constant, not just for stack allocations.
4260
4261 // General case for unrelated pointers in Op1 and Op2.
4262 SDLoc DL(N);
4263 int TagOffset = N->getConstantOperandVal(Num: 3);
4264 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4265 {N->getOperand(1), N->getOperand(2)});
4266 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4267 {SDValue(N1, 0), N->getOperand(2)});
4268 SDNode *N3 = CurDAG->getMachineNode(
4269 AArch64::ADDG, DL, MVT::i64,
4270 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4271 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4272 ReplaceNode(F: N, T: N3);
4273}
4274
4275bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4276 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4277
4278 // Bail when not a "cast" like insert_subvector.
4279 if (N->getConstantOperandVal(Num: 2) != 0)
4280 return false;
4281 if (!N->getOperand(Num: 0).isUndef())
4282 return false;
4283
4284 // Bail when normal isel should do the job.
4285 EVT VT = N->getValueType(ResNo: 0);
4286 EVT InVT = N->getOperand(Num: 1).getValueType();
4287 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4288 return false;
4289 if (InVT.getSizeInBits() <= 128)
4290 return false;
4291
4292 // NOTE: We can only get here when doing fixed length SVE code generation.
4293 // We do manual selection because the types involved are not linked to real
4294 // registers (despite being legal) and must be coerced into SVE registers.
4295
4296 assert(VT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&
4297 "Expected to insert into a packed scalable vector!");
4298
4299 SDLoc DL(N);
4300 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4301 ReplaceNode(F: N, T: CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4302 N->getOperand(Num: 1), RC));
4303 return true;
4304}
4305
4306bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4307 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4308
4309 // Bail when not a "cast" like extract_subvector.
4310 if (N->getConstantOperandVal(Num: 1) != 0)
4311 return false;
4312
4313 // Bail when normal isel can do the job.
4314 EVT VT = N->getValueType(ResNo: 0);
4315 EVT InVT = N->getOperand(Num: 0).getValueType();
4316 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4317 return false;
4318 if (VT.getSizeInBits() <= 128)
4319 return false;
4320
4321 // NOTE: We can only get here when doing fixed length SVE code generation.
4322 // We do manual selection because the types involved are not linked to real
4323 // registers (despite being legal) and must be coerced into SVE registers.
4324
4325 assert(InVT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&
4326 "Expected to extract from a packed scalable vector!");
4327
4328 SDLoc DL(N);
4329 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4330 ReplaceNode(F: N, T: CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4331 N->getOperand(Num: 0), RC));
4332 return true;
4333}
4334
4335bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4336 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4337
4338 SDValue N0 = N->getOperand(Num: 0);
4339 SDValue N1 = N->getOperand(Num: 1);
4340 EVT VT = N->getValueType(ResNo: 0);
4341
4342 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4343 // Rotate by a constant is a funnel shift in IR which is exanded to
4344 // an OR with shifted operands.
4345 // We do the following transform:
4346 // OR N0, N1 -> xar (x, y, imm)
4347 // Where:
4348 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4349 // N0 = SHL_PRED true, V, splat(bits-imm)
4350 // V = (xor x, y)
4351 if (VT.isScalableVector() && Subtarget->hasSVE2orSME()) {
4352 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4353 N1.getOpcode() != AArch64ISD::SRL_PRED)
4354 std::swap(a&: N0, b&: N1);
4355 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4356 N1.getOpcode() != AArch64ISD::SRL_PRED)
4357 return false;
4358
4359 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4360 if (!TLI->isAllActivePredicate(DAG&: *CurDAG, N: N0.getOperand(i: 0)) ||
4361 !TLI->isAllActivePredicate(DAG&: *CurDAG, N: N1.getOperand(i: 0)))
4362 return false;
4363
4364 SDValue XOR = N0.getOperand(i: 1);
4365 if (XOR.getOpcode() != ISD::XOR || XOR != N1.getOperand(i: 1))
4366 return false;
4367
4368 APInt ShlAmt, ShrAmt;
4369 if (!ISD::isConstantSplatVector(N: N0.getOperand(i: 2).getNode(), SplatValue&: ShlAmt) ||
4370 !ISD::isConstantSplatVector(N: N1.getOperand(i: 2).getNode(), SplatValue&: ShrAmt))
4371 return false;
4372
4373 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4374 return false;
4375
4376 SDLoc DL(N);
4377 SDValue Imm =
4378 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4379
4380 SDValue Ops[] = {XOR.getOperand(i: 0), XOR.getOperand(i: 1), Imm};
4381 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4382 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4383 AArch64::XAR_ZZZI_D})) {
4384 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4385 return true;
4386 }
4387 return false;
4388 }
4389
4390 if (!Subtarget->hasSHA3())
4391 return false;
4392
4393 if (N0->getOpcode() != AArch64ISD::VSHL ||
4394 N1->getOpcode() != AArch64ISD::VLSHR)
4395 return false;
4396
4397 if (N0->getOperand(Num: 0) != N1->getOperand(Num: 0) ||
4398 N1->getOperand(Num: 0)->getOpcode() != ISD::XOR)
4399 return false;
4400
4401 SDValue XOR = N0.getOperand(i: 0);
4402 SDValue R1 = XOR.getOperand(i: 0);
4403 SDValue R2 = XOR.getOperand(i: 1);
4404
4405 unsigned HsAmt = N0.getConstantOperandVal(i: 1);
4406 unsigned ShAmt = N1.getConstantOperandVal(i: 1);
4407
4408 SDLoc DL = SDLoc(N0.getOperand(i: 1));
4409 SDValue Imm = CurDAG->getTargetConstant(
4410 Val: ShAmt, DL, VT: N0.getOperand(i: 1).getValueType(), isOpaque: false);
4411
4412 if (ShAmt + HsAmt != 64)
4413 return false;
4414
4415 SDValue Ops[] = {R1, R2, Imm};
4416 CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops);
4417
4418 return true;
4419}
4420
4421void AArch64DAGToDAGISel::Select(SDNode *Node) {
4422 // If we have a custom node, we already have selected!
4423 if (Node->isMachineOpcode()) {
4424 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4425 Node->setNodeId(-1);
4426 return;
4427 }
4428
4429 // Few custom selection stuff.
4430 EVT VT = Node->getValueType(ResNo: 0);
4431
4432 switch (Node->getOpcode()) {
4433 default:
4434 break;
4435
4436 case ISD::ATOMIC_CMP_SWAP:
4437 if (SelectCMP_SWAP(N: Node))
4438 return;
4439 break;
4440
4441 case ISD::READ_REGISTER:
4442 case AArch64ISD::MRRS:
4443 if (tryReadRegister(N: Node))
4444 return;
4445 break;
4446
4447 case ISD::WRITE_REGISTER:
4448 case AArch64ISD::MSRR:
4449 if (tryWriteRegister(N: Node))
4450 return;
4451 break;
4452
4453 case ISD::LOAD: {
4454 // Try to select as an indexed load. Fall through to normal processing
4455 // if we can't.
4456 if (tryIndexedLoad(N: Node))
4457 return;
4458 break;
4459 }
4460
4461 case ISD::SRL:
4462 case ISD::AND:
4463 case ISD::SRA:
4464 case ISD::SIGN_EXTEND_INREG:
4465 if (tryBitfieldExtractOp(N: Node))
4466 return;
4467 if (tryBitfieldInsertInZeroOp(N: Node))
4468 return;
4469 [[fallthrough]];
4470 case ISD::ROTR:
4471 case ISD::SHL:
4472 if (tryShiftAmountMod(N: Node))
4473 return;
4474 break;
4475
4476 case ISD::SIGN_EXTEND:
4477 if (tryBitfieldExtractOpFromSExt(N: Node))
4478 return;
4479 break;
4480
4481 case ISD::OR:
4482 if (tryBitfieldInsertOp(N: Node))
4483 return;
4484 if (trySelectXAR(N: Node))
4485 return;
4486 break;
4487
4488 case ISD::EXTRACT_SUBVECTOR: {
4489 if (trySelectCastScalableToFixedLengthVector(N: Node))
4490 return;
4491 break;
4492 }
4493
4494 case ISD::INSERT_SUBVECTOR: {
4495 if (trySelectCastFixedLengthToScalableVector(N: Node))
4496 return;
4497 break;
4498 }
4499
4500 case ISD::Constant: {
4501 // Materialize zero constants as copies from WZR/XZR. This allows
4502 // the coalescer to propagate these into other instructions.
4503 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Val: Node);
4504 if (ConstNode->isZero()) {
4505 if (VT == MVT::i32) {
4506 SDValue New = CurDAG->getCopyFromReg(
4507 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4508 ReplaceNode(F: Node, T: New.getNode());
4509 return;
4510 } else if (VT == MVT::i64) {
4511 SDValue New = CurDAG->getCopyFromReg(
4512 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4513 ReplaceNode(F: Node, T: New.getNode());
4514 return;
4515 }
4516 }
4517 break;
4518 }
4519
4520 case ISD::FrameIndex: {
4521 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4522 int FI = cast<FrameIndexSDNode>(Val: Node)->getIndex();
4523 unsigned Shifter = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0);
4524 const TargetLowering *TLI = getTargetLowering();
4525 SDValue TFI = CurDAG->getTargetFrameIndex(
4526 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
4527 SDLoc DL(Node);
4528 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4529 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4530 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4531 return;
4532 }
4533 case ISD::INTRINSIC_W_CHAIN: {
4534 unsigned IntNo = Node->getConstantOperandVal(Num: 1);
4535 switch (IntNo) {
4536 default:
4537 break;
4538 case Intrinsic::aarch64_ldaxp:
4539 case Intrinsic::aarch64_ldxp: {
4540 unsigned Op =
4541 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4542 SDValue MemAddr = Node->getOperand(Num: 2);
4543 SDLoc DL(Node);
4544 SDValue Chain = Node->getOperand(Num: 0);
4545
4546 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4547 MVT::Other, MemAddr, Chain);
4548
4549 // Transfer memoperands.
4550 MachineMemOperand *MemOp =
4551 cast<MemIntrinsicSDNode>(Val: Node)->getMemOperand();
4552 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Ld), NewMemRefs: {MemOp});
4553 ReplaceNode(F: Node, T: Ld);
4554 return;
4555 }
4556 case Intrinsic::aarch64_stlxp:
4557 case Intrinsic::aarch64_stxp: {
4558 unsigned Op =
4559 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4560 SDLoc DL(Node);
4561 SDValue Chain = Node->getOperand(Num: 0);
4562 SDValue ValLo = Node->getOperand(Num: 2);
4563 SDValue ValHi = Node->getOperand(Num: 3);
4564 SDValue MemAddr = Node->getOperand(Num: 4);
4565
4566 // Place arguments in the right order.
4567 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4568
4569 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4570 // Transfer memoperands.
4571 MachineMemOperand *MemOp =
4572 cast<MemIntrinsicSDNode>(Val: Node)->getMemOperand();
4573 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
4574
4575 ReplaceNode(F: Node, T: St);
4576 return;
4577 }
4578 case Intrinsic::aarch64_neon_ld1x2:
4579 if (VT == MVT::v8i8) {
4580 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4581 return;
4582 } else if (VT == MVT::v16i8) {
4583 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4584 return;
4585 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4586 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4587 return;
4588 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4589 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4590 return;
4591 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4592 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4593 return;
4594 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4595 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4596 return;
4597 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4598 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4599 return;
4600 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4601 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4602 return;
4603 }
4604 break;
4605 case Intrinsic::aarch64_neon_ld1x3:
4606 if (VT == MVT::v8i8) {
4607 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4608 return;
4609 } else if (VT == MVT::v16i8) {
4610 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4611 return;
4612 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4613 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
4614 return;
4615 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4616 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
4617 return;
4618 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4619 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
4620 return;
4621 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4622 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
4623 return;
4624 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4625 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4626 return;
4627 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4628 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
4629 return;
4630 }
4631 break;
4632 case Intrinsic::aarch64_neon_ld1x4:
4633 if (VT == MVT::v8i8) {
4634 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
4635 return;
4636 } else if (VT == MVT::v16i8) {
4637 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
4638 return;
4639 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4640 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
4641 return;
4642 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4643 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
4644 return;
4645 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4646 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
4647 return;
4648 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4649 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
4650 return;
4651 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4652 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4653 return;
4654 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4655 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
4656 return;
4657 }
4658 break;
4659 case Intrinsic::aarch64_neon_ld2:
4660 if (VT == MVT::v8i8) {
4661 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
4662 return;
4663 } else if (VT == MVT::v16i8) {
4664 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
4665 return;
4666 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4667 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
4668 return;
4669 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4670 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
4671 return;
4672 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4673 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
4674 return;
4675 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4676 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
4677 return;
4678 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4679 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4680 return;
4681 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4682 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
4683 return;
4684 }
4685 break;
4686 case Intrinsic::aarch64_neon_ld3:
4687 if (VT == MVT::v8i8) {
4688 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
4689 return;
4690 } else if (VT == MVT::v16i8) {
4691 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
4692 return;
4693 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4694 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
4695 return;
4696 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4697 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
4698 return;
4699 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4700 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
4701 return;
4702 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4703 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
4704 return;
4705 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4706 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4707 return;
4708 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4709 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
4710 return;
4711 }
4712 break;
4713 case Intrinsic::aarch64_neon_ld4:
4714 if (VT == MVT::v8i8) {
4715 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
4716 return;
4717 } else if (VT == MVT::v16i8) {
4718 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
4719 return;
4720 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4721 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
4722 return;
4723 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4724 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
4725 return;
4726 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4727 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
4728 return;
4729 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4730 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
4731 return;
4732 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4733 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4734 return;
4735 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4736 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
4737 return;
4738 }
4739 break;
4740 case Intrinsic::aarch64_neon_ld2r:
4741 if (VT == MVT::v8i8) {
4742 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
4743 return;
4744 } else if (VT == MVT::v16i8) {
4745 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
4746 return;
4747 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4748 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
4749 return;
4750 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4751 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
4752 return;
4753 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4754 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
4755 return;
4756 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4757 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
4758 return;
4759 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4760 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
4761 return;
4762 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4763 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
4764 return;
4765 }
4766 break;
4767 case Intrinsic::aarch64_neon_ld3r:
4768 if (VT == MVT::v8i8) {
4769 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
4770 return;
4771 } else if (VT == MVT::v16i8) {
4772 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
4773 return;
4774 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4775 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
4776 return;
4777 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4778 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
4779 return;
4780 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4781 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
4782 return;
4783 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4784 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
4785 return;
4786 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4787 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
4788 return;
4789 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4790 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
4791 return;
4792 }
4793 break;
4794 case Intrinsic::aarch64_neon_ld4r:
4795 if (VT == MVT::v8i8) {
4796 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
4797 return;
4798 } else if (VT == MVT::v16i8) {
4799 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
4800 return;
4801 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4802 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
4803 return;
4804 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4805 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
4806 return;
4807 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4808 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
4809 return;
4810 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4811 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
4812 return;
4813 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4814 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
4815 return;
4816 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4817 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
4818 return;
4819 }
4820 break;
4821 case Intrinsic::aarch64_neon_ld2lane:
4822 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4823 SelectLoadLane(Node, 2, AArch64::LD2i8);
4824 return;
4825 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4826 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4827 SelectLoadLane(Node, 2, AArch64::LD2i16);
4828 return;
4829 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4830 VT == MVT::v2f32) {
4831 SelectLoadLane(Node, 2, AArch64::LD2i32);
4832 return;
4833 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4834 VT == MVT::v1f64) {
4835 SelectLoadLane(Node, 2, AArch64::LD2i64);
4836 return;
4837 }
4838 break;
4839 case Intrinsic::aarch64_neon_ld3lane:
4840 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4841 SelectLoadLane(Node, 3, AArch64::LD3i8);
4842 return;
4843 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4844 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4845 SelectLoadLane(Node, 3, AArch64::LD3i16);
4846 return;
4847 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4848 VT == MVT::v2f32) {
4849 SelectLoadLane(Node, 3, AArch64::LD3i32);
4850 return;
4851 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4852 VT == MVT::v1f64) {
4853 SelectLoadLane(Node, 3, AArch64::LD3i64);
4854 return;
4855 }
4856 break;
4857 case Intrinsic::aarch64_neon_ld4lane:
4858 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4859 SelectLoadLane(Node, 4, AArch64::LD4i8);
4860 return;
4861 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4862 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4863 SelectLoadLane(Node, 4, AArch64::LD4i16);
4864 return;
4865 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4866 VT == MVT::v2f32) {
4867 SelectLoadLane(Node, 4, AArch64::LD4i32);
4868 return;
4869 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4870 VT == MVT::v1f64) {
4871 SelectLoadLane(Node, 4, AArch64::LD4i64);
4872 return;
4873 }
4874 break;
4875 case Intrinsic::aarch64_ld64b:
4876 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
4877 return;
4878 case Intrinsic::aarch64_sve_ld2q_sret: {
4879 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
4880 return;
4881 }
4882 case Intrinsic::aarch64_sve_ld3q_sret: {
4883 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
4884 return;
4885 }
4886 case Intrinsic::aarch64_sve_ld4q_sret: {
4887 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
4888 return;
4889 }
4890 case Intrinsic::aarch64_sve_ld2_sret: {
4891 if (VT == MVT::nxv16i8) {
4892 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
4893 true);
4894 return;
4895 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4896 VT == MVT::nxv8bf16) {
4897 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
4898 true);
4899 return;
4900 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4901 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
4902 true);
4903 return;
4904 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4905 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
4906 true);
4907 return;
4908 }
4909 break;
4910 }
4911 case Intrinsic::aarch64_sve_ld1_pn_x2: {
4912 if (VT == MVT::nxv16i8) {
4913 if (Subtarget->hasSME2())
4914 SelectContiguousMultiVectorLoad(
4915 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
4916 else if (Subtarget->hasSVE2p1())
4917 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
4918 AArch64::LD1B_2Z);
4919 else
4920 break;
4921 return;
4922 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4923 VT == MVT::nxv8bf16) {
4924 if (Subtarget->hasSME2())
4925 SelectContiguousMultiVectorLoad(
4926 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
4927 else if (Subtarget->hasSVE2p1())
4928 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
4929 AArch64::LD1H_2Z);
4930 else
4931 break;
4932 return;
4933 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4934 if (Subtarget->hasSME2())
4935 SelectContiguousMultiVectorLoad(
4936 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
4937 else if (Subtarget->hasSVE2p1())
4938 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
4939 AArch64::LD1W_2Z);
4940 else
4941 break;
4942 return;
4943 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4944 if (Subtarget->hasSME2())
4945 SelectContiguousMultiVectorLoad(
4946 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
4947 else if (Subtarget->hasSVE2p1())
4948 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
4949 AArch64::LD1D_2Z);
4950 else
4951 break;
4952 return;
4953 }
4954 break;
4955 }
4956 case Intrinsic::aarch64_sve_ld1_pn_x4: {
4957 if (VT == MVT::nxv16i8) {
4958 if (Subtarget->hasSME2())
4959 SelectContiguousMultiVectorLoad(
4960 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
4961 else if (Subtarget->hasSVE2p1())
4962 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
4963 AArch64::LD1B_4Z);
4964 else
4965 break;
4966 return;
4967 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4968 VT == MVT::nxv8bf16) {
4969 if (Subtarget->hasSME2())
4970 SelectContiguousMultiVectorLoad(
4971 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
4972 else if (Subtarget->hasSVE2p1())
4973 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
4974 AArch64::LD1H_4Z);
4975 else
4976 break;
4977 return;
4978 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4979 if (Subtarget->hasSME2())
4980 SelectContiguousMultiVectorLoad(
4981 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
4982 else if (Subtarget->hasSVE2p1())
4983 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
4984 AArch64::LD1W_4Z);
4985 else
4986 break;
4987 return;
4988 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4989 if (Subtarget->hasSME2())
4990 SelectContiguousMultiVectorLoad(
4991 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
4992 else if (Subtarget->hasSVE2p1())
4993 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
4994 AArch64::LD1D_4Z);
4995 else
4996 break;
4997 return;
4998 }
4999 break;
5000 }
5001 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5002 if (VT == MVT::nxv16i8) {
5003 if (Subtarget->hasSME2())
5004 SelectContiguousMultiVectorLoad(Node, 2, 0,
5005 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5006 AArch64::LDNT1B_2Z_PSEUDO);
5007 else if (Subtarget->hasSVE2p1())
5008 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5009 AArch64::LDNT1B_2Z);
5010 else
5011 break;
5012 return;
5013 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5014 VT == MVT::nxv8bf16) {
5015 if (Subtarget->hasSME2())
5016 SelectContiguousMultiVectorLoad(Node, 2, 1,
5017 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5018 AArch64::LDNT1H_2Z_PSEUDO);
5019 else if (Subtarget->hasSVE2p1())
5020 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5021 AArch64::LDNT1H_2Z);
5022 else
5023 break;
5024 return;
5025 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5026 if (Subtarget->hasSME2())
5027 SelectContiguousMultiVectorLoad(Node, 2, 2,
5028 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5029 AArch64::LDNT1W_2Z_PSEUDO);
5030 else if (Subtarget->hasSVE2p1())
5031 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5032 AArch64::LDNT1W_2Z);
5033 else
5034 break;
5035 return;
5036 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5037 if (Subtarget->hasSME2())
5038 SelectContiguousMultiVectorLoad(Node, 2, 3,
5039 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5040 AArch64::LDNT1D_2Z_PSEUDO);
5041 else if (Subtarget->hasSVE2p1())
5042 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5043 AArch64::LDNT1D_2Z);
5044 else
5045 break;
5046 return;
5047 }
5048 break;
5049 }
5050 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5051 if (VT == MVT::nxv16i8) {
5052 if (Subtarget->hasSME2())
5053 SelectContiguousMultiVectorLoad(Node, 4, 0,
5054 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5055 AArch64::LDNT1B_4Z_PSEUDO);
5056 else if (Subtarget->hasSVE2p1())
5057 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5058 AArch64::LDNT1B_4Z);
5059 else
5060 break;
5061 return;
5062 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5063 VT == MVT::nxv8bf16) {
5064 if (Subtarget->hasSME2())
5065 SelectContiguousMultiVectorLoad(Node, 4, 1,
5066 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5067 AArch64::LDNT1H_4Z_PSEUDO);
5068 else if (Subtarget->hasSVE2p1())
5069 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5070 AArch64::LDNT1H_4Z);
5071 else
5072 break;
5073 return;
5074 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5075 if (Subtarget->hasSME2())
5076 SelectContiguousMultiVectorLoad(Node, 4, 2,
5077 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5078 AArch64::LDNT1W_4Z_PSEUDO);
5079 else if (Subtarget->hasSVE2p1())
5080 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5081 AArch64::LDNT1W_4Z);
5082 else
5083 break;
5084 return;
5085 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5086 if (Subtarget->hasSME2())
5087 SelectContiguousMultiVectorLoad(Node, 4, 3,
5088 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5089 AArch64::LDNT1D_4Z_PSEUDO);
5090 else if (Subtarget->hasSVE2p1())
5091 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5092 AArch64::LDNT1D_4Z);
5093 else
5094 break;
5095 return;
5096 }
5097 break;
5098 }
5099 case Intrinsic::aarch64_sve_ld3_sret: {
5100 if (VT == MVT::nxv16i8) {
5101 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5102 true);
5103 return;
5104 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5105 VT == MVT::nxv8bf16) {
5106 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5107 true);
5108 return;
5109 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5110 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5111 true);
5112 return;
5113 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5114 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5115 true);
5116 return;
5117 }
5118 break;
5119 }
5120 case Intrinsic::aarch64_sve_ld4_sret: {
5121 if (VT == MVT::nxv16i8) {
5122 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5123 true);
5124 return;
5125 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5126 VT == MVT::nxv8bf16) {
5127 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5128 true);
5129 return;
5130 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5131 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5132 true);
5133 return;
5134 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5135 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5136 true);
5137 return;
5138 }
5139 break;
5140 }
5141 case Intrinsic::aarch64_sme_read_hor_vg2: {
5142 if (VT == MVT::nxv16i8) {
5143 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5144 AArch64::MOVA_2ZMXI_H_B);
5145 return;
5146 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5147 VT == MVT::nxv8bf16) {
5148 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5149 AArch64::MOVA_2ZMXI_H_H);
5150 return;
5151 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5152 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5153 AArch64::MOVA_2ZMXI_H_S);
5154 return;
5155 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5156 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5157 AArch64::MOVA_2ZMXI_H_D);
5158 return;
5159 }
5160 break;
5161 }
5162 case Intrinsic::aarch64_sme_read_ver_vg2: {
5163 if (VT == MVT::nxv16i8) {
5164 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5165 AArch64::MOVA_2ZMXI_V_B);
5166 return;
5167 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5168 VT == MVT::nxv8bf16) {
5169 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5170 AArch64::MOVA_2ZMXI_V_H);
5171 return;
5172 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5173 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5174 AArch64::MOVA_2ZMXI_V_S);
5175 return;
5176 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5177 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5178 AArch64::MOVA_2ZMXI_V_D);
5179 return;
5180 }
5181 break;
5182 }
5183 case Intrinsic::aarch64_sme_read_hor_vg4: {
5184 if (VT == MVT::nxv16i8) {
5185 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5186 AArch64::MOVA_4ZMXI_H_B);
5187 return;
5188 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5189 VT == MVT::nxv8bf16) {
5190 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5191 AArch64::MOVA_4ZMXI_H_H);
5192 return;
5193 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5194 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5195 AArch64::MOVA_4ZMXI_H_S);
5196 return;
5197 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5198 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5199 AArch64::MOVA_4ZMXI_H_D);
5200 return;
5201 }
5202 break;
5203 }
5204 case Intrinsic::aarch64_sme_read_ver_vg4: {
5205 if (VT == MVT::nxv16i8) {
5206 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5207 AArch64::MOVA_4ZMXI_V_B);
5208 return;
5209 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5210 VT == MVT::nxv8bf16) {
5211 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5212 AArch64::MOVA_4ZMXI_V_H);
5213 return;
5214 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5215 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5216 AArch64::MOVA_4ZMXI_V_S);
5217 return;
5218 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5219 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5220 AArch64::MOVA_4ZMXI_V_D);
5221 return;
5222 }
5223 break;
5224 }
5225 case Intrinsic::aarch64_sme_read_vg1x2: {
5226 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5227 AArch64::MOVA_VG2_2ZMXI);
5228 return;
5229 }
5230 case Intrinsic::aarch64_sme_read_vg1x4: {
5231 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5232 AArch64::MOVA_VG4_4ZMXI);
5233 return;
5234 }
5235 case Intrinsic::swift_async_context_addr: {
5236 SDLoc DL(Node);
5237 SDValue Chain = Node->getOperand(Num: 0);
5238 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5239 SDValue Res = SDValue(
5240 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5241 CurDAG->getTargetConstant(8, DL, MVT::i32),
5242 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5243 0);
5244 ReplaceUses(F: SDValue(Node, 0), T: Res);
5245 ReplaceUses(F: SDValue(Node, 1), T: CopyFP.getValue(R: 1));
5246 CurDAG->RemoveDeadNode(N: Node);
5247
5248 auto &MF = CurDAG->getMachineFunction();
5249 MF.getFrameInfo().setFrameAddressIsTaken(true);
5250 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5251 return;
5252 }
5253 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5254 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5255 Node->getValueType(0),
5256 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5257 AArch64::LUTI2_4ZTZI_S}))
5258 // Second Immediate must be <= 3:
5259 SelectMultiVectorLuti(Node, NumOutVecs: 4, Opc: Opc, MaxImm: 3);
5260 return;
5261 }
5262 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5263 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5264 Node->getValueType(0),
5265 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5266 // Second Immediate must be <= 1:
5267 SelectMultiVectorLuti(Node, NumOutVecs: 4, Opc: Opc, MaxImm: 1);
5268 return;
5269 }
5270 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5271 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5272 Node->getValueType(0),
5273 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5274 AArch64::LUTI2_2ZTZI_S}))
5275 // Second Immediate must be <= 7:
5276 SelectMultiVectorLuti(Node, NumOutVecs: 2, Opc: Opc, MaxImm: 7);
5277 return;
5278 }
5279 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5280 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5281 Node->getValueType(0),
5282 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5283 AArch64::LUTI4_2ZTZI_S}))
5284 // Second Immediate must be <= 3:
5285 SelectMultiVectorLuti(Node, NumOutVecs: 2, Opc: Opc, MaxImm: 3);
5286 return;
5287 }
5288 }
5289 } break;
5290 case ISD::INTRINSIC_WO_CHAIN: {
5291 unsigned IntNo = Node->getConstantOperandVal(Num: 0);
5292 switch (IntNo) {
5293 default:
5294 break;
5295 case Intrinsic::aarch64_tagp:
5296 SelectTagP(N: Node);
5297 return;
5298 case Intrinsic::aarch64_neon_tbl2:
5299 SelectTable(Node, 2,
5300 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5301 false);
5302 return;
5303 case Intrinsic::aarch64_neon_tbl3:
5304 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5305 : AArch64::TBLv16i8Three,
5306 false);
5307 return;
5308 case Intrinsic::aarch64_neon_tbl4:
5309 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5310 : AArch64::TBLv16i8Four,
5311 false);
5312 return;
5313 case Intrinsic::aarch64_neon_tbx2:
5314 SelectTable(Node, 2,
5315 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5316 true);
5317 return;
5318 case Intrinsic::aarch64_neon_tbx3:
5319 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5320 : AArch64::TBXv16i8Three,
5321 true);
5322 return;
5323 case Intrinsic::aarch64_neon_tbx4:
5324 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5325 : AArch64::TBXv16i8Four,
5326 true);
5327 return;
5328 case Intrinsic::aarch64_sve_srshl_single_x2:
5329 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5330 Node->getValueType(0),
5331 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5332 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5333 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
5334 return;
5335 case Intrinsic::aarch64_sve_srshl_single_x4:
5336 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5337 Node->getValueType(0),
5338 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5339 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5340 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
5341 return;
5342 case Intrinsic::aarch64_sve_urshl_single_x2:
5343 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5344 Node->getValueType(0),
5345 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5346 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5347 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
5348 return;
5349 case Intrinsic::aarch64_sve_urshl_single_x4:
5350 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5351 Node->getValueType(0),
5352 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5353 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5354 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
5355 return;
5356 case Intrinsic::aarch64_sve_srshl_x2:
5357 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5358 Node->getValueType(0),
5359 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5360 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5361 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
5362 return;
5363 case Intrinsic::aarch64_sve_srshl_x4:
5364 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5365 Node->getValueType(0),
5366 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5367 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5368 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
5369 return;
5370 case Intrinsic::aarch64_sve_urshl_x2:
5371 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5372 Node->getValueType(0),
5373 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5374 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5375 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
5376 return;
5377 case Intrinsic::aarch64_sve_urshl_x4:
5378 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5379 Node->getValueType(0),
5380 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5381 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5382 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
5383 return;
5384 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5385 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5386 Node->getValueType(0),
5387 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5388 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5389 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
5390 return;
5391 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5392 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5393 Node->getValueType(0),
5394 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5395 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5396 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
5397 return;
5398 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5399 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5400 Node->getValueType(0),
5401 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5402 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5403 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
5404 return;
5405 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5406 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5407 Node->getValueType(0),
5408 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5409 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5410 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
5411 return;
5412 case Intrinsic::aarch64_sve_whilege_x2:
5413 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5414 Node->getValueType(0),
5415 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5416 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5417 SelectWhilePair(N: Node, Opc: Op);
5418 return;
5419 case Intrinsic::aarch64_sve_whilegt_x2:
5420 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5421 Node->getValueType(0),
5422 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5423 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5424 SelectWhilePair(N: Node, Opc: Op);
5425 return;
5426 case Intrinsic::aarch64_sve_whilehi_x2:
5427 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5428 Node->getValueType(0),
5429 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5430 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5431 SelectWhilePair(N: Node, Opc: Op);
5432 return;
5433 case Intrinsic::aarch64_sve_whilehs_x2:
5434 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5435 Node->getValueType(0),
5436 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5437 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5438 SelectWhilePair(N: Node, Opc: Op);
5439 return;
5440 case Intrinsic::aarch64_sve_whilele_x2:
5441 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5442 Node->getValueType(0),
5443 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5444 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5445 SelectWhilePair(N: Node, Opc: Op);
5446 return;
5447 case Intrinsic::aarch64_sve_whilelo_x2:
5448 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5449 Node->getValueType(0),
5450 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
5451 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
5452 SelectWhilePair(N: Node, Opc: Op);
5453 return;
5454 case Intrinsic::aarch64_sve_whilels_x2:
5455 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5456 Node->getValueType(0),
5457 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
5458 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
5459 SelectWhilePair(N: Node, Opc: Op);
5460 return;
5461 case Intrinsic::aarch64_sve_whilelt_x2:
5462 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5463 Node->getValueType(0),
5464 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
5465 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
5466 SelectWhilePair(N: Node, Opc: Op);
5467 return;
5468 case Intrinsic::aarch64_sve_smax_single_x2:
5469 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5470 Node->getValueType(0),
5471 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
5472 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
5473 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
5474 return;
5475 case Intrinsic::aarch64_sve_umax_single_x2:
5476 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5477 Node->getValueType(0),
5478 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
5479 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
5480 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
5481 return;
5482 case Intrinsic::aarch64_sve_fmax_single_x2:
5483 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5484 Node->getValueType(0),
5485 {0, AArch64::FMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_S,
5486 AArch64::FMAX_VG2_2ZZ_D}))
5487 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
5488 return;
5489 case Intrinsic::aarch64_sve_smax_single_x4:
5490 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5491 Node->getValueType(0),
5492 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
5493 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
5494 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
5495 return;
5496 case Intrinsic::aarch64_sve_umax_single_x4:
5497 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5498 Node->getValueType(0),
5499 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
5500 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
5501 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
5502 return;
5503 case Intrinsic::aarch64_sve_fmax_single_x4:
5504 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5505 Node->getValueType(0),
5506 {0, AArch64::FMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_S,
5507 AArch64::FMAX_VG4_4ZZ_D}))
5508 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
5509 return;
5510 case Intrinsic::aarch64_sve_smin_single_x2:
5511 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5512 Node->getValueType(0),
5513 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
5514 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
5515 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
5516 return;
5517 case Intrinsic::aarch64_sve_umin_single_x2:
5518 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5519 Node->getValueType(0),
5520 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
5521 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
5522 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
5523 return;
5524 case Intrinsic::aarch64_sve_fmin_single_x2:
5525 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5526 Node->getValueType(0),
5527 {0, AArch64::FMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_S,
5528 AArch64::FMIN_VG2_2ZZ_D}))
5529 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
5530 return;
5531 case Intrinsic::aarch64_sve_smin_single_x4:
5532 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5533 Node->getValueType(0),
5534 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
5535 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
5536 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
5537 return;
5538 case Intrinsic::aarch64_sve_umin_single_x4:
5539 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5540 Node->getValueType(0),
5541 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
5542 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
5543 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
5544 return;
5545 case Intrinsic::aarch64_sve_fmin_single_x4:
5546 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5547 Node->getValueType(0),
5548 {0, AArch64::FMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_S,
5549 AArch64::FMIN_VG4_4ZZ_D}))
5550 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
5551 return;
5552 case Intrinsic::aarch64_sve_smax_x2:
5553 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5554 Node->getValueType(0),
5555 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
5556 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
5557 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
5558 return;
5559 case Intrinsic::aarch64_sve_umax_x2:
5560 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5561 Node->getValueType(0),
5562 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
5563 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
5564 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
5565 return;
5566 case Intrinsic::aarch64_sve_fmax_x2:
5567 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5568 Node->getValueType(0),
5569 {0, AArch64::FMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_S,
5570 AArch64::FMAX_VG2_2Z2Z_D}))
5571 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
5572 return;
5573 case Intrinsic::aarch64_sve_smax_x4:
5574 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5575 Node->getValueType(0),
5576 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
5577 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
5578 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
5579 return;
5580 case Intrinsic::aarch64_sve_umax_x4:
5581 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5582 Node->getValueType(0),
5583 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
5584 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
5585 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
5586 return;
5587 case Intrinsic::aarch64_sve_fmax_x4:
5588 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5589 Node->getValueType(0),
5590 {0, AArch64::FMAX_VG4_4Z4Z_H, AArch64::FMAX_VG4_4Z4Z_S,
5591 AArch64::FMAX_VG4_4Z4Z_D}))
5592 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
5593 return;
5594 case Intrinsic::aarch64_sve_smin_x2:
5595 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5596 Node->getValueType(0),
5597 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
5598 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
5599 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
5600 return;
5601 case Intrinsic::aarch64_sve_umin_x2:
5602 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5603 Node->getValueType(0),
5604 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
5605 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
5606 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
5607 return;
5608 case Intrinsic::aarch64_sve_fmin_x2:
5609 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5610 Node->getValueType(0),
5611 {0, AArch64::FMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_S,
5612 AArch64::FMIN_VG2_2Z2Z_D}))
5613 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
5614 return;
5615 case Intrinsic::aarch64_sve_smin_x4:
5616 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5617 Node->getValueType(0),
5618 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
5619 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
5620 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
5621 return;
5622 case Intrinsic::aarch64_sve_umin_x4:
5623 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5624 Node->getValueType(0),
5625 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
5626 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
5627 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
5628 return;
5629 case Intrinsic::aarch64_sve_fmin_x4:
5630 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5631 Node->getValueType(0),
5632 {0, AArch64::FMIN_VG4_4Z4Z_H, AArch64::FMIN_VG4_4Z4Z_S,
5633 AArch64::FMIN_VG4_4Z4Z_D}))
5634 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
5635 return;
5636 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
5637 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5638 Node->getValueType(0),
5639 {0, AArch64::FMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_S,
5640 AArch64::FMAXNM_VG2_2ZZ_D}))
5641 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
5642 return;
5643 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
5644 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5645 Node->getValueType(0),
5646 {0, AArch64::FMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_S,
5647 AArch64::FMAXNM_VG4_4ZZ_D}))
5648 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
5649 return;
5650 case Intrinsic::aarch64_sve_fminnm_single_x2:
5651 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5652 Node->getValueType(0),
5653 {0, AArch64::FMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_S,
5654 AArch64::FMINNM_VG2_2ZZ_D}))
5655 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
5656 return;
5657 case Intrinsic::aarch64_sve_fminnm_single_x4:
5658 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5659 Node->getValueType(0),
5660 {0, AArch64::FMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_S,
5661 AArch64::FMINNM_VG4_4ZZ_D}))
5662 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
5663 return;
5664 case Intrinsic::aarch64_sve_fmaxnm_x2:
5665 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5666 Node->getValueType(0),
5667 {0, AArch64::FMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_S,
5668 AArch64::FMAXNM_VG2_2Z2Z_D}))
5669 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
5670 return;
5671 case Intrinsic::aarch64_sve_fmaxnm_x4:
5672 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5673 Node->getValueType(0),
5674 {0, AArch64::FMAXNM_VG4_4Z4Z_H, AArch64::FMAXNM_VG4_4Z4Z_S,
5675 AArch64::FMAXNM_VG4_4Z4Z_D}))
5676 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
5677 return;
5678 case Intrinsic::aarch64_sve_fminnm_x2:
5679 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5680 Node->getValueType(0),
5681 {0, AArch64::FMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_S,
5682 AArch64::FMINNM_VG2_2Z2Z_D}))
5683 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
5684 return;
5685 case Intrinsic::aarch64_sve_fminnm_x4:
5686 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5687 Node->getValueType(0),
5688 {0, AArch64::FMINNM_VG4_4Z4Z_H, AArch64::FMINNM_VG4_4Z4Z_S,
5689 AArch64::FMINNM_VG4_4Z4Z_D}))
5690 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
5691 return;
5692 case Intrinsic::aarch64_sve_fcvtzs_x2:
5693 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
5694 return;
5695 case Intrinsic::aarch64_sve_scvtf_x2:
5696 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
5697 return;
5698 case Intrinsic::aarch64_sve_fcvtzu_x2:
5699 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
5700 return;
5701 case Intrinsic::aarch64_sve_ucvtf_x2:
5702 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
5703 return;
5704 case Intrinsic::aarch64_sve_fcvtzs_x4:
5705 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
5706 return;
5707 case Intrinsic::aarch64_sve_scvtf_x4:
5708 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
5709 return;
5710 case Intrinsic::aarch64_sve_fcvtzu_x4:
5711 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
5712 return;
5713 case Intrinsic::aarch64_sve_ucvtf_x4:
5714 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
5715 return;
5716 case Intrinsic::aarch64_sve_sclamp_single_x2:
5717 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5718 Node->getValueType(0),
5719 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
5720 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
5721 SelectClamp(N: Node, NumVecs: 2, Op: Op);
5722 return;
5723 case Intrinsic::aarch64_sve_uclamp_single_x2:
5724 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5725 Node->getValueType(0),
5726 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
5727 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
5728 SelectClamp(N: Node, NumVecs: 2, Op: Op);
5729 return;
5730 case Intrinsic::aarch64_sve_fclamp_single_x2:
5731 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5732 Node->getValueType(0),
5733 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
5734 AArch64::FCLAMP_VG2_2Z2Z_D}))
5735 SelectClamp(N: Node, NumVecs: 2, Op: Op);
5736 return;
5737 case Intrinsic::aarch64_sve_sclamp_single_x4:
5738 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5739 Node->getValueType(0),
5740 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
5741 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
5742 SelectClamp(N: Node, NumVecs: 4, Op: Op);
5743 return;
5744 case Intrinsic::aarch64_sve_uclamp_single_x4:
5745 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5746 Node->getValueType(0),
5747 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
5748 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
5749 SelectClamp(N: Node, NumVecs: 4, Op: Op);
5750 return;
5751 case Intrinsic::aarch64_sve_fclamp_single_x4:
5752 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5753 Node->getValueType(0),
5754 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
5755 AArch64::FCLAMP_VG4_4Z4Z_D}))
5756 SelectClamp(N: Node, NumVecs: 4, Op: Op);
5757 return;
5758 case Intrinsic::aarch64_sve_add_single_x2:
5759 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5760 Node->getValueType(0),
5761 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
5762 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
5763 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
5764 return;
5765 case Intrinsic::aarch64_sve_add_single_x4:
5766 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5767 Node->getValueType(0),
5768 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
5769 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
5770 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
5771 return;
5772 case Intrinsic::aarch64_sve_zip_x2:
5773 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5774 Node->getValueType(0),
5775 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
5776 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
5777 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 2, /*IsTupleInput=*/false, Opc: Op);
5778 return;
5779 case Intrinsic::aarch64_sve_zipq_x2:
5780 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
5781 AArch64::ZIP_VG2_2ZZZ_Q);
5782 return;
5783 case Intrinsic::aarch64_sve_zip_x4:
5784 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5785 Node->getValueType(0),
5786 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
5787 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
5788 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 4, /*IsTupleInput=*/true, Opc: Op);
5789 return;
5790 case Intrinsic::aarch64_sve_zipq_x4:
5791 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
5792 AArch64::ZIP_VG4_4Z4Z_Q);
5793 return;
5794 case Intrinsic::aarch64_sve_uzp_x2:
5795 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5796 Node->getValueType(0),
5797 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
5798 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
5799 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 2, /*IsTupleInput=*/false, Opc: Op);
5800 return;
5801 case Intrinsic::aarch64_sve_uzpq_x2:
5802 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
5803 AArch64::UZP_VG2_2ZZZ_Q);
5804 return;
5805 case Intrinsic::aarch64_sve_uzp_x4:
5806 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5807 Node->getValueType(0),
5808 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
5809 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
5810 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 4, /*IsTupleInput=*/true, Opc: Op);
5811 return;
5812 case Intrinsic::aarch64_sve_uzpq_x4:
5813 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
5814 AArch64::UZP_VG4_4Z4Z_Q);
5815 return;
5816 case Intrinsic::aarch64_sve_sel_x2:
5817 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5818 Node->getValueType(0),
5819 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
5820 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
5821 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op, /*HasPred=*/true);
5822 return;
5823 case Intrinsic::aarch64_sve_sel_x4:
5824 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5825 Node->getValueType(0),
5826 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
5827 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
5828 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op, /*HasPred=*/true);
5829 return;
5830 case Intrinsic::aarch64_sve_frinta_x2:
5831 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
5832 return;
5833 case Intrinsic::aarch64_sve_frinta_x4:
5834 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
5835 return;
5836 case Intrinsic::aarch64_sve_frintm_x2:
5837 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
5838 return;
5839 case Intrinsic::aarch64_sve_frintm_x4:
5840 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
5841 return;
5842 case Intrinsic::aarch64_sve_frintn_x2:
5843 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
5844 return;
5845 case Intrinsic::aarch64_sve_frintn_x4:
5846 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
5847 return;
5848 case Intrinsic::aarch64_sve_frintp_x2:
5849 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
5850 return;
5851 case Intrinsic::aarch64_sve_frintp_x4:
5852 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
5853 return;
5854 case Intrinsic::aarch64_sve_sunpk_x2:
5855 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5856 Node->getValueType(0),
5857 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
5858 AArch64::SUNPK_VG2_2ZZ_D}))
5859 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 2, /*IsTupleInput=*/false, Opc: Op);
5860 return;
5861 case Intrinsic::aarch64_sve_uunpk_x2:
5862 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5863 Node->getValueType(0),
5864 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
5865 AArch64::UUNPK_VG2_2ZZ_D}))
5866 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 2, /*IsTupleInput=*/false, Opc: Op);
5867 return;
5868 case Intrinsic::aarch64_sve_sunpk_x4:
5869 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5870 Node->getValueType(0),
5871 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
5872 AArch64::SUNPK_VG4_4Z2Z_D}))
5873 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 4, /*IsTupleInput=*/true, Opc: Op);
5874 return;
5875 case Intrinsic::aarch64_sve_uunpk_x4:
5876 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5877 Node->getValueType(0),
5878 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
5879 AArch64::UUNPK_VG4_4Z2Z_D}))
5880 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 4, /*IsTupleInput=*/true, Opc: Op);
5881 return;
5882 case Intrinsic::aarch64_sve_pext_x2: {
5883 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5884 Node->getValueType(0),
5885 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
5886 AArch64::PEXT_2PCI_D}))
5887 SelectPExtPair(N: Node, Opc: Op);
5888 return;
5889 }
5890 }
5891 break;
5892 }
5893 case ISD::INTRINSIC_VOID: {
5894 unsigned IntNo = Node->getConstantOperandVal(Num: 1);
5895 if (Node->getNumOperands() >= 3)
5896 VT = Node->getOperand(Num: 2)->getValueType(ResNo: 0);
5897 switch (IntNo) {
5898 default:
5899 break;
5900 case Intrinsic::aarch64_neon_st1x2: {
5901 if (VT == MVT::v8i8) {
5902 SelectStore(Node, 2, AArch64::ST1Twov8b);
5903 return;
5904 } else if (VT == MVT::v16i8) {
5905 SelectStore(Node, 2, AArch64::ST1Twov16b);
5906 return;
5907 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
5908 VT == MVT::v4bf16) {
5909 SelectStore(Node, 2, AArch64::ST1Twov4h);
5910 return;
5911 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
5912 VT == MVT::v8bf16) {
5913 SelectStore(Node, 2, AArch64::ST1Twov8h);
5914 return;
5915 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5916 SelectStore(Node, 2, AArch64::ST1Twov2s);
5917 return;
5918 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5919 SelectStore(Node, 2, AArch64::ST1Twov4s);
5920 return;
5921 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5922 SelectStore(Node, 2, AArch64::ST1Twov2d);
5923 return;
5924 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5925 SelectStore(Node, 2, AArch64::ST1Twov1d);
5926 return;
5927 }
5928 break;
5929 }
5930 case Intrinsic::aarch64_neon_st1x3: {
5931 if (VT == MVT::v8i8) {
5932 SelectStore(Node, 3, AArch64::ST1Threev8b);
5933 return;
5934 } else if (VT == MVT::v16i8) {
5935 SelectStore(Node, 3, AArch64::ST1Threev16b);
5936 return;
5937 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
5938 VT == MVT::v4bf16) {
5939 SelectStore(Node, 3, AArch64::ST1Threev4h);
5940 return;
5941 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
5942 VT == MVT::v8bf16) {
5943 SelectStore(Node, 3, AArch64::ST1Threev8h);
5944 return;
5945 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5946 SelectStore(Node, 3, AArch64::ST1Threev2s);
5947 return;
5948 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5949 SelectStore(Node, 3, AArch64::ST1Threev4s);
5950 return;
5951 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5952 SelectStore(Node, 3, AArch64::ST1Threev2d);
5953 return;
5954 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5955 SelectStore(Node, 3, AArch64::ST1Threev1d);
5956 return;
5957 }
5958 break;
5959 }
5960 case Intrinsic::aarch64_neon_st1x4: {
5961 if (VT == MVT::v8i8) {
5962 SelectStore(Node, 4, AArch64::ST1Fourv8b);
5963 return;
5964 } else if (VT == MVT::v16i8) {
5965 SelectStore(Node, 4, AArch64::ST1Fourv16b);
5966 return;
5967 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
5968 VT == MVT::v4bf16) {
5969 SelectStore(Node, 4, AArch64::ST1Fourv4h);
5970 return;
5971 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
5972 VT == MVT::v8bf16) {
5973 SelectStore(Node, 4, AArch64::ST1Fourv8h);
5974 return;
5975 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5976 SelectStore(Node, 4, AArch64::ST1Fourv2s);
5977 return;
5978 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5979 SelectStore(Node, 4, AArch64::ST1Fourv4s);
5980 return;
5981 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5982 SelectStore(Node, 4, AArch64::ST1Fourv2d);
5983 return;
5984 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5985 SelectStore(Node, 4, AArch64::ST1Fourv1d);
5986 return;
5987 }
5988 break;
5989 }
5990 case Intrinsic::aarch64_neon_st2: {
5991 if (VT == MVT::v8i8) {
5992 SelectStore(Node, 2, AArch64::ST2Twov8b);
5993 return;
5994 } else if (VT == MVT::v16i8) {
5995 SelectStore(Node, 2, AArch64::ST2Twov16b);
5996 return;
5997 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
5998 VT == MVT::v4bf16) {
5999 SelectStore(Node, 2, AArch64::ST2Twov4h);
6000 return;
6001 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6002 VT == MVT::v8bf16) {
6003 SelectStore(Node, 2, AArch64::ST2Twov8h);
6004 return;
6005 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6006 SelectStore(Node, 2, AArch64::ST2Twov2s);
6007 return;
6008 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6009 SelectStore(Node, 2, AArch64::ST2Twov4s);
6010 return;
6011 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6012 SelectStore(Node, 2, AArch64::ST2Twov2d);
6013 return;
6014 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6015 SelectStore(Node, 2, AArch64::ST1Twov1d);
6016 return;
6017 }
6018 break;
6019 }
6020 case Intrinsic::aarch64_neon_st3: {
6021 if (VT == MVT::v8i8) {
6022 SelectStore(Node, 3, AArch64::ST3Threev8b);
6023 return;
6024 } else if (VT == MVT::v16i8) {
6025 SelectStore(Node, 3, AArch64::ST3Threev16b);
6026 return;
6027 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6028 VT == MVT::v4bf16) {
6029 SelectStore(Node, 3, AArch64::ST3Threev4h);
6030 return;
6031 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6032 VT == MVT::v8bf16) {
6033 SelectStore(Node, 3, AArch64::ST3Threev8h);
6034 return;
6035 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6036 SelectStore(Node, 3, AArch64::ST3Threev2s);
6037 return;
6038 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6039 SelectStore(Node, 3, AArch64::ST3Threev4s);
6040 return;
6041 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6042 SelectStore(Node, 3, AArch64::ST3Threev2d);
6043 return;
6044 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6045 SelectStore(Node, 3, AArch64::ST1Threev1d);
6046 return;
6047 }
6048 break;
6049 }
6050 case Intrinsic::aarch64_neon_st4: {
6051 if (VT == MVT::v8i8) {
6052 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6053 return;
6054 } else if (VT == MVT::v16i8) {
6055 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6056 return;
6057 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6058 VT == MVT::v4bf16) {
6059 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6060 return;
6061 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6062 VT == MVT::v8bf16) {
6063 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6064 return;
6065 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6066 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6067 return;
6068 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6069 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6070 return;
6071 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6072 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6073 return;
6074 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6075 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6076 return;
6077 }
6078 break;
6079 }
6080 case Intrinsic::aarch64_neon_st2lane: {
6081 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6082 SelectStoreLane(Node, 2, AArch64::ST2i8);
6083 return;
6084 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6085 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6086 SelectStoreLane(Node, 2, AArch64::ST2i16);
6087 return;
6088 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6089 VT == MVT::v2f32) {
6090 SelectStoreLane(Node, 2, AArch64::ST2i32);
6091 return;
6092 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6093 VT == MVT::v1f64) {
6094 SelectStoreLane(Node, 2, AArch64::ST2i64);
6095 return;
6096 }
6097 break;
6098 }
6099 case Intrinsic::aarch64_neon_st3lane: {
6100 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6101 SelectStoreLane(Node, 3, AArch64::ST3i8);
6102 return;
6103 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6104 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6105 SelectStoreLane(Node, 3, AArch64::ST3i16);
6106 return;
6107 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6108 VT == MVT::v2f32) {
6109 SelectStoreLane(Node, 3, AArch64::ST3i32);
6110 return;
6111 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6112 VT == MVT::v1f64) {
6113 SelectStoreLane(Node, 3, AArch64::ST3i64);
6114 return;
6115 }
6116 break;
6117 }
6118 case Intrinsic::aarch64_neon_st4lane: {
6119 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6120 SelectStoreLane(Node, 4, AArch64::ST4i8);
6121 return;
6122 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6123 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6124 SelectStoreLane(Node, 4, AArch64::ST4i16);
6125 return;
6126 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6127 VT == MVT::v2f32) {
6128 SelectStoreLane(Node, 4, AArch64::ST4i32);
6129 return;
6130 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6131 VT == MVT::v1f64) {
6132 SelectStoreLane(Node, 4, AArch64::ST4i64);
6133 return;
6134 }
6135 break;
6136 }
6137 case Intrinsic::aarch64_sve_st2q: {
6138 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6139 return;
6140 }
6141 case Intrinsic::aarch64_sve_st3q: {
6142 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6143 return;
6144 }
6145 case Intrinsic::aarch64_sve_st4q: {
6146 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6147 return;
6148 }
6149 case Intrinsic::aarch64_sve_st2: {
6150 if (VT == MVT::nxv16i8) {
6151 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6152 return;
6153 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6154 VT == MVT::nxv8bf16) {
6155 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6156 return;
6157 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6158 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6159 return;
6160 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6161 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6162 return;
6163 }
6164 break;
6165 }
6166 case Intrinsic::aarch64_sve_st3: {
6167 if (VT == MVT::nxv16i8) {
6168 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6169 return;
6170 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6171 VT == MVT::nxv8bf16) {
6172 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6173 return;
6174 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6175 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6176 return;
6177 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6178 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6179 return;
6180 }
6181 break;
6182 }
6183 case Intrinsic::aarch64_sve_st4: {
6184 if (VT == MVT::nxv16i8) {
6185 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6186 return;
6187 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6188 VT == MVT::nxv8bf16) {
6189 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6190 return;
6191 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6192 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6193 return;
6194 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6195 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6196 return;
6197 }
6198 break;
6199 }
6200 }
6201 break;
6202 }
6203 case AArch64ISD::LD2post: {
6204 if (VT == MVT::v8i8) {
6205 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6206 return;
6207 } else if (VT == MVT::v16i8) {
6208 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6209 return;
6210 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6211 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6212 return;
6213 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6214 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6215 return;
6216 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6217 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6218 return;
6219 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6220 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6221 return;
6222 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6223 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6224 return;
6225 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6226 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6227 return;
6228 }
6229 break;
6230 }
6231 case AArch64ISD::LD3post: {
6232 if (VT == MVT::v8i8) {
6233 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6234 return;
6235 } else if (VT == MVT::v16i8) {
6236 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6237 return;
6238 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6239 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6240 return;
6241 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6242 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6243 return;
6244 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6245 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6246 return;
6247 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6248 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6249 return;
6250 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6251 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6252 return;
6253 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6254 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6255 return;
6256 }
6257 break;
6258 }
6259 case AArch64ISD::LD4post: {
6260 if (VT == MVT::v8i8) {
6261 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6262 return;
6263 } else if (VT == MVT::v16i8) {
6264 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6265 return;
6266 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6267 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6268 return;
6269 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6270 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6271 return;
6272 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6273 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6274 return;
6275 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6276 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6277 return;
6278 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6279 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6280 return;
6281 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6282 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6283 return;
6284 }
6285 break;
6286 }
6287 case AArch64ISD::LD1x2post: {
6288 if (VT == MVT::v8i8) {
6289 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6290 return;
6291 } else if (VT == MVT::v16i8) {
6292 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6293 return;
6294 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6295 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6296 return;
6297 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6298 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6299 return;
6300 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6301 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6302 return;
6303 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6304 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6305 return;
6306 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6307 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6308 return;
6309 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6310 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
6311 return;
6312 }
6313 break;
6314 }
6315 case AArch64ISD::LD1x3post: {
6316 if (VT == MVT::v8i8) {
6317 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
6318 return;
6319 } else if (VT == MVT::v16i8) {
6320 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
6321 return;
6322 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6323 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
6324 return;
6325 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6326 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
6327 return;
6328 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6329 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
6330 return;
6331 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6332 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
6333 return;
6334 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6335 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6336 return;
6337 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6338 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
6339 return;
6340 }
6341 break;
6342 }
6343 case AArch64ISD::LD1x4post: {
6344 if (VT == MVT::v8i8) {
6345 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
6346 return;
6347 } else if (VT == MVT::v16i8) {
6348 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
6349 return;
6350 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6351 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
6352 return;
6353 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6354 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
6355 return;
6356 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6357 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
6358 return;
6359 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6360 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
6361 return;
6362 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6363 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6364 return;
6365 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6366 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
6367 return;
6368 }
6369 break;
6370 }
6371 case AArch64ISD::LD1DUPpost: {
6372 if (VT == MVT::v8i8) {
6373 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
6374 return;
6375 } else if (VT == MVT::v16i8) {
6376 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
6377 return;
6378 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6379 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
6380 return;
6381 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6382 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
6383 return;
6384 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6385 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
6386 return;
6387 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6388 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
6389 return;
6390 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6391 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
6392 return;
6393 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6394 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
6395 return;
6396 }
6397 break;
6398 }
6399 case AArch64ISD::LD2DUPpost: {
6400 if (VT == MVT::v8i8) {
6401 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
6402 return;
6403 } else if (VT == MVT::v16i8) {
6404 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
6405 return;
6406 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6407 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
6408 return;
6409 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6410 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
6411 return;
6412 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6413 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
6414 return;
6415 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6416 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
6417 return;
6418 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6419 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
6420 return;
6421 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6422 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
6423 return;
6424 }
6425 break;
6426 }
6427 case AArch64ISD::LD3DUPpost: {
6428 if (VT == MVT::v8i8) {
6429 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
6430 return;
6431 } else if (VT == MVT::v16i8) {
6432 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
6433 return;
6434 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6435 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
6436 return;
6437 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6438 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
6439 return;
6440 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6441 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
6442 return;
6443 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6444 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
6445 return;
6446 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6447 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
6448 return;
6449 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6450 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
6451 return;
6452 }
6453 break;
6454 }
6455 case AArch64ISD::LD4DUPpost: {
6456 if (VT == MVT::v8i8) {
6457 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
6458 return;
6459 } else if (VT == MVT::v16i8) {
6460 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
6461 return;
6462 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6463 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
6464 return;
6465 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6466 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
6467 return;
6468 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6469 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
6470 return;
6471 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6472 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
6473 return;
6474 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6475 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
6476 return;
6477 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6478 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
6479 return;
6480 }
6481 break;
6482 }
6483 case AArch64ISD::LD1LANEpost: {
6484 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6485 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
6486 return;
6487 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6488 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6489 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
6490 return;
6491 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6492 VT == MVT::v2f32) {
6493 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
6494 return;
6495 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6496 VT == MVT::v1f64) {
6497 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
6498 return;
6499 }
6500 break;
6501 }
6502 case AArch64ISD::LD2LANEpost: {
6503 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6504 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
6505 return;
6506 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6507 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6508 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
6509 return;
6510 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6511 VT == MVT::v2f32) {
6512 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
6513 return;
6514 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6515 VT == MVT::v1f64) {
6516 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
6517 return;
6518 }
6519 break;
6520 }
6521 case AArch64ISD::LD3LANEpost: {
6522 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6523 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
6524 return;
6525 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6526 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6527 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
6528 return;
6529 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6530 VT == MVT::v2f32) {
6531 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
6532 return;
6533 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6534 VT == MVT::v1f64) {
6535 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
6536 return;
6537 }
6538 break;
6539 }
6540 case AArch64ISD::LD4LANEpost: {
6541 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6542 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
6543 return;
6544 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6545 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6546 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
6547 return;
6548 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6549 VT == MVT::v2f32) {
6550 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
6551 return;
6552 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6553 VT == MVT::v1f64) {
6554 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
6555 return;
6556 }
6557 break;
6558 }
6559 case AArch64ISD::ST2post: {
6560 VT = Node->getOperand(Num: 1).getValueType();
6561 if (VT == MVT::v8i8) {
6562 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
6563 return;
6564 } else if (VT == MVT::v16i8) {
6565 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
6566 return;
6567 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6568 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
6569 return;
6570 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6571 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
6572 return;
6573 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6574 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
6575 return;
6576 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6577 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
6578 return;
6579 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6580 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
6581 return;
6582 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6583 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
6584 return;
6585 }
6586 break;
6587 }
6588 case AArch64ISD::ST3post: {
6589 VT = Node->getOperand(Num: 1).getValueType();
6590 if (VT == MVT::v8i8) {
6591 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
6592 return;
6593 } else if (VT == MVT::v16i8) {
6594 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
6595 return;
6596 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6597 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
6598 return;
6599 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6600 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
6601 return;
6602 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6603 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
6604 return;
6605 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6606 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
6607 return;
6608 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6609 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
6610 return;
6611 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6612 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
6613 return;
6614 }
6615 break;
6616 }
6617 case AArch64ISD::ST4post: {
6618 VT = Node->getOperand(Num: 1).getValueType();
6619 if (VT == MVT::v8i8) {
6620 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
6621 return;
6622 } else if (VT == MVT::v16i8) {
6623 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
6624 return;
6625 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6626 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
6627 return;
6628 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6629 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
6630 return;
6631 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6632 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
6633 return;
6634 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6635 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
6636 return;
6637 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6638 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
6639 return;
6640 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6641 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
6642 return;
6643 }
6644 break;
6645 }
6646 case AArch64ISD::ST1x2post: {
6647 VT = Node->getOperand(Num: 1).getValueType();
6648 if (VT == MVT::v8i8) {
6649 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
6650 return;
6651 } else if (VT == MVT::v16i8) {
6652 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
6653 return;
6654 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6655 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
6656 return;
6657 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6658 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
6659 return;
6660 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6661 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
6662 return;
6663 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6664 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
6665 return;
6666 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6667 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
6668 return;
6669 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6670 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
6671 return;
6672 }
6673 break;
6674 }
6675 case AArch64ISD::ST1x3post: {
6676 VT = Node->getOperand(Num: 1).getValueType();
6677 if (VT == MVT::v8i8) {
6678 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
6679 return;
6680 } else if (VT == MVT::v16i8) {
6681 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
6682 return;
6683 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6684 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
6685 return;
6686 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
6687 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
6688 return;
6689 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6690 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
6691 return;
6692 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6693 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
6694 return;
6695 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6696 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
6697 return;
6698 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6699 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
6700 return;
6701 }
6702 break;
6703 }
6704 case AArch64ISD::ST1x4post: {
6705 VT = Node->getOperand(Num: 1).getValueType();
6706 if (VT == MVT::v8i8) {
6707 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
6708 return;
6709 } else if (VT == MVT::v16i8) {
6710 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
6711 return;
6712 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6713 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
6714 return;
6715 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6716 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
6717 return;
6718 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6719 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
6720 return;
6721 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6722 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
6723 return;
6724 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6725 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
6726 return;
6727 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6728 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
6729 return;
6730 }
6731 break;
6732 }
6733 case AArch64ISD::ST2LANEpost: {
6734 VT = Node->getOperand(Num: 1).getValueType();
6735 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6736 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
6737 return;
6738 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6739 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6740 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
6741 return;
6742 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6743 VT == MVT::v2f32) {
6744 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
6745 return;
6746 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6747 VT == MVT::v1f64) {
6748 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
6749 return;
6750 }
6751 break;
6752 }
6753 case AArch64ISD::ST3LANEpost: {
6754 VT = Node->getOperand(Num: 1).getValueType();
6755 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6756 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
6757 return;
6758 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6759 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6760 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
6761 return;
6762 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6763 VT == MVT::v2f32) {
6764 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
6765 return;
6766 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6767 VT == MVT::v1f64) {
6768 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
6769 return;
6770 }
6771 break;
6772 }
6773 case AArch64ISD::ST4LANEpost: {
6774 VT = Node->getOperand(Num: 1).getValueType();
6775 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6776 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
6777 return;
6778 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6779 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6780 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
6781 return;
6782 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6783 VT == MVT::v2f32) {
6784 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
6785 return;
6786 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6787 VT == MVT::v1f64) {
6788 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
6789 return;
6790 }
6791 break;
6792 }
6793 case AArch64ISD::SVE_LD2_MERGE_ZERO: {
6794 if (VT == MVT::nxv16i8) {
6795 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
6796 return;
6797 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6798 VT == MVT::nxv8bf16) {
6799 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
6800 return;
6801 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6802 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);
6803 return;
6804 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6805 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);
6806 return;
6807 }
6808 break;
6809 }
6810 case AArch64ISD::SVE_LD3_MERGE_ZERO: {
6811 if (VT == MVT::nxv16i8) {
6812 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
6813 return;
6814 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6815 VT == MVT::nxv8bf16) {
6816 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
6817 return;
6818 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6819 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);
6820 return;
6821 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6822 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);
6823 return;
6824 }
6825 break;
6826 }
6827 case AArch64ISD::SVE_LD4_MERGE_ZERO: {
6828 if (VT == MVT::nxv16i8) {
6829 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
6830 return;
6831 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6832 VT == MVT::nxv8bf16) {
6833 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
6834 return;
6835 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6836 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);
6837 return;
6838 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6839 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);
6840 return;
6841 }
6842 break;
6843 }
6844 }
6845
6846 // Select the default instruction
6847 SelectCode(Node);
6848}
6849
6850/// createAArch64ISelDag - This pass converts a legalized DAG into a
6851/// AArch64-specific DAG, ready for instruction scheduling.
6852FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
6853 CodeGenOptLevel OptLevel) {
6854 return new AArch64DAGToDAGISel(TM, OptLevel);
6855}
6856
6857/// When \p PredVT is a scalable vector predicate in the form
6858/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
6859/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
6860/// structured vectors (NumVec >1), the output data type is
6861/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
6862/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
6863/// EVT.
6864static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT,
6865 unsigned NumVec) {
6866 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
6867 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
6868 return EVT();
6869
6870 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
6871 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
6872 return EVT();
6873
6874 ElementCount EC = PredVT.getVectorElementCount();
6875 EVT ScalarVT =
6876 EVT::getIntegerVT(Context&: Ctx, BitWidth: AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
6877 EVT MemVT = EVT::getVectorVT(Context&: Ctx, VT: ScalarVT, EC: EC * NumVec);
6878
6879 return MemVT;
6880}
6881
6882/// Return the EVT of the data associated to a memory operation in \p
6883/// Root. If such EVT cannot be retrived, it returns an invalid EVT.
6884static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
6885 if (isa<MemSDNode>(Val: Root))
6886 return cast<MemSDNode>(Val: Root)->getMemoryVT();
6887
6888 if (isa<MemIntrinsicSDNode>(Val: Root))
6889 return cast<MemIntrinsicSDNode>(Val: Root)->getMemoryVT();
6890
6891 const unsigned Opcode = Root->getOpcode();
6892 // For custom ISD nodes, we have to look at them individually to extract the
6893 // type of the data moved to/from memory.
6894 switch (Opcode) {
6895 case AArch64ISD::LD1_MERGE_ZERO:
6896 case AArch64ISD::LD1S_MERGE_ZERO:
6897 case AArch64ISD::LDNF1_MERGE_ZERO:
6898 case AArch64ISD::LDNF1S_MERGE_ZERO:
6899 return cast<VTSDNode>(Val: Root->getOperand(Num: 3))->getVT();
6900 case AArch64ISD::ST1_PRED:
6901 return cast<VTSDNode>(Val: Root->getOperand(Num: 4))->getVT();
6902 case AArch64ISD::SVE_LD2_MERGE_ZERO:
6903 return getPackedVectorTypeFromPredicateType(
6904 Ctx, PredVT: Root->getOperand(Num: 1)->getValueType(ResNo: 0), /*NumVec=*/2);
6905 case AArch64ISD::SVE_LD3_MERGE_ZERO:
6906 return getPackedVectorTypeFromPredicateType(
6907 Ctx, PredVT: Root->getOperand(Num: 1)->getValueType(ResNo: 0), /*NumVec=*/3);
6908 case AArch64ISD::SVE_LD4_MERGE_ZERO:
6909 return getPackedVectorTypeFromPredicateType(
6910 Ctx, PredVT: Root->getOperand(Num: 1)->getValueType(ResNo: 0), /*NumVec=*/4);
6911 default:
6912 break;
6913 }
6914
6915 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
6916 return EVT();
6917
6918 switch (Root->getConstantOperandVal(Num: 1)) {
6919 default:
6920 return EVT();
6921 case Intrinsic::aarch64_sme_ldr:
6922 case Intrinsic::aarch64_sme_str:
6923 return MVT::nxv16i8;
6924 case Intrinsic::aarch64_sve_prf:
6925 // We are using an SVE prefetch intrinsic. Type must be inferred from the
6926 // width of the predicate.
6927 return getPackedVectorTypeFromPredicateType(
6928 Ctx, PredVT: Root->getOperand(Num: 2)->getValueType(ResNo: 0), /*NumVec=*/1);
6929 case Intrinsic::aarch64_sve_ld2_sret:
6930 case Intrinsic::aarch64_sve_ld2q_sret:
6931 return getPackedVectorTypeFromPredicateType(
6932 Ctx, PredVT: Root->getOperand(Num: 2)->getValueType(ResNo: 0), /*NumVec=*/2);
6933 case Intrinsic::aarch64_sve_st2q:
6934 return getPackedVectorTypeFromPredicateType(
6935 Ctx, PredVT: Root->getOperand(Num: 4)->getValueType(ResNo: 0), /*NumVec=*/2);
6936 case Intrinsic::aarch64_sve_ld3_sret:
6937 case Intrinsic::aarch64_sve_ld3q_sret:
6938 return getPackedVectorTypeFromPredicateType(
6939 Ctx, PredVT: Root->getOperand(Num: 2)->getValueType(ResNo: 0), /*NumVec=*/3);
6940 case Intrinsic::aarch64_sve_st3q:
6941 return getPackedVectorTypeFromPredicateType(
6942 Ctx, PredVT: Root->getOperand(Num: 5)->getValueType(ResNo: 0), /*NumVec=*/3);
6943 case Intrinsic::aarch64_sve_ld4_sret:
6944 case Intrinsic::aarch64_sve_ld4q_sret:
6945 return getPackedVectorTypeFromPredicateType(
6946 Ctx, PredVT: Root->getOperand(Num: 2)->getValueType(ResNo: 0), /*NumVec=*/4);
6947 case Intrinsic::aarch64_sve_st4q:
6948 return getPackedVectorTypeFromPredicateType(
6949 Ctx, PredVT: Root->getOperand(Num: 6)->getValueType(ResNo: 0), /*NumVec=*/4);
6950 case Intrinsic::aarch64_sve_ld1udq:
6951 case Intrinsic::aarch64_sve_st1dq:
6952 return EVT(MVT::nxv1i64);
6953 case Intrinsic::aarch64_sve_ld1uwq:
6954 case Intrinsic::aarch64_sve_st1wq:
6955 return EVT(MVT::nxv1i32);
6956 }
6957}
6958
6959/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
6960/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
6961/// where Root is the memory access using N for its address.
6962template <int64_t Min, int64_t Max>
6963bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
6964 SDValue &Base,
6965 SDValue &OffImm) {
6966 const EVT MemVT = getMemVTFromNode(Ctx&: *(CurDAG->getContext()), Root);
6967 const DataLayout &DL = CurDAG->getDataLayout();
6968 const MachineFrameInfo &MFI = MF->getFrameInfo();
6969
6970 if (N.getOpcode() == ISD::FrameIndex) {
6971 int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
6972 // We can only encode VL scaled offsets, so only fold in frame indexes
6973 // referencing SVE objects.
6974 if (MFI.getStackID(ObjectIdx: FI) == TargetStackID::ScalableVector) {
6975 Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
6976 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
6977 return true;
6978 }
6979
6980 return false;
6981 }
6982
6983 if (MemVT == EVT())
6984 return false;
6985
6986 if (N.getOpcode() != ISD::ADD)
6987 return false;
6988
6989 SDValue VScale = N.getOperand(i: 1);
6990 if (VScale.getOpcode() != ISD::VSCALE)
6991 return false;
6992
6993 TypeSize TS = MemVT.getSizeInBits();
6994 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
6995 int64_t MulImm = cast<ConstantSDNode>(Val: VScale.getOperand(i: 0))->getSExtValue();
6996
6997 if ((MulImm % MemWidthBytes) != 0)
6998 return false;
6999
7000 int64_t Offset = MulImm / MemWidthBytes;
7001 if (Offset < Min || Offset > Max)
7002 return false;
7003
7004 Base = N.getOperand(i: 0);
7005 if (Base.getOpcode() == ISD::FrameIndex) {
7006 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
7007 // We can only encode VL scaled offsets, so only fold in frame indexes
7008 // referencing SVE objects.
7009 if (MFI.getStackID(ObjectIdx: FI) == TargetStackID::ScalableVector)
7010 Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
7011 }
7012
7013 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7014 return true;
7015}
7016
7017/// Select register plus register addressing mode for SVE, with scaled
7018/// offset.
7019bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7020 SDValue &Base,
7021 SDValue &Offset) {
7022 if (N.getOpcode() != ISD::ADD)
7023 return false;
7024
7025 // Process an ADD node.
7026 const SDValue LHS = N.getOperand(i: 0);
7027 const SDValue RHS = N.getOperand(i: 1);
7028
7029 // 8 bit data does not come with the SHL node, so it is treated
7030 // separately.
7031 if (Scale == 0) {
7032 Base = LHS;
7033 Offset = RHS;
7034 return true;
7035 }
7036
7037 if (auto C = dyn_cast<ConstantSDNode>(Val: RHS)) {
7038 int64_t ImmOff = C->getSExtValue();
7039 unsigned Size = 1 << Scale;
7040
7041 // To use the reg+reg addressing mode, the immediate must be a multiple of
7042 // the vector element's byte size.
7043 if (ImmOff % Size)
7044 return false;
7045
7046 SDLoc DL(N);
7047 Base = LHS;
7048 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7049 SDValue Ops[] = {Offset};
7050 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7051 Offset = SDValue(MI, 0);
7052 return true;
7053 }
7054
7055 // Check if the RHS is a shift node with a constant.
7056 if (RHS.getOpcode() != ISD::SHL)
7057 return false;
7058
7059 const SDValue ShiftRHS = RHS.getOperand(i: 1);
7060 if (auto *C = dyn_cast<ConstantSDNode>(Val: ShiftRHS))
7061 if (C->getZExtValue() == Scale) {
7062 Base = LHS;
7063 Offset = RHS.getOperand(i: 0);
7064 return true;
7065 }
7066
7067 return false;
7068}
7069
7070bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7071 const AArch64TargetLowering *TLI =
7072 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7073
7074 return TLI->isAllActivePredicate(DAG&: *CurDAG, N);
7075}
7076
7077bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7078 EVT VT = N.getValueType();
7079 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7080}
7081
7082bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7083 SDValue &Base, SDValue &Offset,
7084 unsigned Scale) {
7085 // Try to untangle an ADD node into a 'reg + offset'
7086 if (N.getOpcode() == ISD::ADD)
7087 if (auto C = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
7088 int64_t ImmOff = C->getSExtValue();
7089 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) {
7090 Base = N.getOperand(i: 0);
7091 Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7092 return true;
7093 }
7094 }
7095
7096 // By default, just match reg + 0.
7097 Base = N;
7098 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7099 return true;
7100}
7101

source code of llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp