1//===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that expands pseudo instructions into target
10// instructions to allow proper scheduling and other late optimizations. This
11// pass should be run after register allocation but before the post-regalloc
12// scheduling pass.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AArch64ExpandImm.h"
17#include "AArch64InstrInfo.h"
18#include "AArch64MachineFunctionInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/CodeGen/LivePhysRegs.h"
23#include "llvm/CodeGen/MachineBasicBlock.h"
24#include "llvm/CodeGen/MachineConstantPool.h"
25#include "llvm/CodeGen/MachineFunction.h"
26#include "llvm/CodeGen/MachineFunctionPass.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineOperand.h"
30#include "llvm/CodeGen/TargetSubtargetInfo.h"
31#include "llvm/IR/DebugLoc.h"
32#include "llvm/MC/MCInstrDesc.h"
33#include "llvm/Pass.h"
34#include "llvm/Support/CodeGen.h"
35#include "llvm/Support/MathExtras.h"
36#include "llvm/Target/TargetMachine.h"
37#include "llvm/TargetParser/Triple.h"
38#include <cassert>
39#include <cstdint>
40#include <iterator>
41#include <utility>
42
43using namespace llvm;
44
45#define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
46
47namespace {
48
49class AArch64ExpandPseudo : public MachineFunctionPass {
50public:
51 const AArch64InstrInfo *TII;
52
53 static char ID;
54
55 AArch64ExpandPseudo() : MachineFunctionPass(ID) {
56 initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
57 }
58
59 bool runOnMachineFunction(MachineFunction &Fn) override;
60
61 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
62
63private:
64 bool expandMBB(MachineBasicBlock &MBB);
65 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
66 MachineBasicBlock::iterator &NextMBBI);
67 bool expandMultiVecPseudo(MachineBasicBlock &MBB,
68 MachineBasicBlock::iterator MBBI,
69 TargetRegisterClass ContiguousClass,
70 TargetRegisterClass StridedClass,
71 unsigned ContiguousOpc, unsigned StridedOpc);
72 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
73 unsigned BitSize);
74
75 bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
76 MachineBasicBlock::iterator MBBI);
77 bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
78 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
79 unsigned ExtendImm, unsigned ZeroReg,
80 MachineBasicBlock::iterator &NextMBBI);
81 bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
82 MachineBasicBlock::iterator MBBI,
83 MachineBasicBlock::iterator &NextMBBI);
84 bool expandSetTagLoop(MachineBasicBlock &MBB,
85 MachineBasicBlock::iterator MBBI,
86 MachineBasicBlock::iterator &NextMBBI);
87 bool expandSVESpillFill(MachineBasicBlock &MBB,
88 MachineBasicBlock::iterator MBBI, unsigned Opc,
89 unsigned N);
90 bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
91 MachineBasicBlock::iterator MBBI);
92 bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
93 bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
94 MachineBasicBlock::iterator MBBI);
95 MachineBasicBlock *expandRestoreZA(MachineBasicBlock &MBB,
96 MachineBasicBlock::iterator MBBI);
97 MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB,
98 MachineBasicBlock::iterator MBBI);
99};
100
101} // end anonymous namespace
102
103char AArch64ExpandPseudo::ID = 0;
104
105INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
106 AARCH64_EXPAND_PSEUDO_NAME, false, false)
107
108/// Transfer implicit operands on the pseudo instruction to the
109/// instructions created from the expansion.
110static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
111 MachineInstrBuilder &DefMI) {
112 const MCInstrDesc &Desc = OldMI.getDesc();
113 for (const MachineOperand &MO :
114 llvm::drop_begin(RangeOrContainer: OldMI.operands(), N: Desc.getNumOperands())) {
115 assert(MO.isReg() && MO.getReg());
116 if (MO.isUse())
117 UseMI.add(MO);
118 else
119 DefMI.add(MO);
120 }
121}
122
123/// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
124/// real move-immediate instructions to synthesize the immediate.
125bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
126 MachineBasicBlock::iterator MBBI,
127 unsigned BitSize) {
128 MachineInstr &MI = *MBBI;
129 Register DstReg = MI.getOperand(i: 0).getReg();
130 uint64_t RenamableState =
131 MI.getOperand(i: 0).isRenamable() ? RegState::Renamable : 0;
132 uint64_t Imm = MI.getOperand(i: 1).getImm();
133
134 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
135 // Useless def, and we don't want to risk creating an invalid ORR (which
136 // would really write to sp).
137 MI.eraseFromParent();
138 return true;
139 }
140
141 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
142 AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
143 assert(Insn.size() != 0);
144
145 SmallVector<MachineInstrBuilder, 4> MIBS;
146 for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
147 bool LastItem = std::next(x: I) == E;
148 switch (I->Opcode)
149 {
150 default: llvm_unreachable("unhandled!"); break;
151
152 case AArch64::ORRWri:
153 case AArch64::ORRXri:
154 if (I->Op1 == 0) {
155 MIBS.push_back(Elt: BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
156 .add(MI.getOperand(i: 0))
157 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
158 .addImm(I->Op2));
159 } else {
160 Register DstReg = MI.getOperand(i: 0).getReg();
161 bool DstIsDead = MI.getOperand(i: 0).isDead();
162 MIBS.push_back(
163 Elt: BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
164 .addReg(DstReg, RegState::Define |
165 getDeadRegState(B: DstIsDead && LastItem) |
166 RenamableState)
167 .addReg(DstReg)
168 .addImm(I->Op2));
169 }
170 break;
171 case AArch64::ANDXri:
172 case AArch64::EORXri:
173 if (I->Op1 == 0) {
174 MIBS.push_back(Elt: BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
175 .add(MI.getOperand(i: 0))
176 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
177 .addImm(I->Op2));
178 } else {
179 Register DstReg = MI.getOperand(i: 0).getReg();
180 bool DstIsDead = MI.getOperand(i: 0).isDead();
181 MIBS.push_back(
182 Elt: BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
183 .addReg(DstReg, RegState::Define |
184 getDeadRegState(B: DstIsDead && LastItem) |
185 RenamableState)
186 .addReg(DstReg)
187 .addImm(I->Op2));
188 }
189 break;
190 case AArch64::MOVNWi:
191 case AArch64::MOVNXi:
192 case AArch64::MOVZWi:
193 case AArch64::MOVZXi: {
194 bool DstIsDead = MI.getOperand(i: 0).isDead();
195 MIBS.push_back(Elt: BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
196 .addReg(DstReg, RegState::Define |
197 getDeadRegState(B: DstIsDead && LastItem) |
198 RenamableState)
199 .addImm(I->Op1)
200 .addImm(I->Op2));
201 } break;
202 case AArch64::MOVKWi:
203 case AArch64::MOVKXi: {
204 Register DstReg = MI.getOperand(i: 0).getReg();
205 bool DstIsDead = MI.getOperand(i: 0).isDead();
206 MIBS.push_back(Elt: BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
207 .addReg(DstReg,
208 RegState::Define |
209 getDeadRegState(B: DstIsDead && LastItem) |
210 RenamableState)
211 .addReg(DstReg)
212 .addImm(I->Op1)
213 .addImm(I->Op2));
214 } break;
215 }
216 }
217 transferImpOps(OldMI&: MI, UseMI&: MIBS.front(), DefMI&: MIBS.back());
218 MI.eraseFromParent();
219 return true;
220}
221
222bool AArch64ExpandPseudo::expandCMP_SWAP(
223 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
224 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
225 MachineBasicBlock::iterator &NextMBBI) {
226 MachineInstr &MI = *MBBI;
227 MIMetadata MIMD(MI);
228 const MachineOperand &Dest = MI.getOperand(i: 0);
229 Register StatusReg = MI.getOperand(i: 1).getReg();
230 bool StatusDead = MI.getOperand(i: 1).isDead();
231 // Duplicating undef operands into 2 instructions does not guarantee the same
232 // value on both; However undef should be replaced by xzr anyway.
233 assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
234 Register AddrReg = MI.getOperand(i: 2).getReg();
235 Register DesiredReg = MI.getOperand(i: 3).getReg();
236 Register NewReg = MI.getOperand(i: 4).getReg();
237
238 MachineFunction *MF = MBB.getParent();
239 auto LoadCmpBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
240 auto StoreBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
241 auto DoneBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
242
243 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoadCmpBB);
244 MF->insert(MBBI: ++LoadCmpBB->getIterator(), MBB: StoreBB);
245 MF->insert(MBBI: ++StoreBB->getIterator(), MBB: DoneBB);
246
247 // .Lloadcmp:
248 // mov wStatus, 0
249 // ldaxr xDest, [xAddr]
250 // cmp xDest, xDesired
251 // b.ne .Ldone
252 if (!StatusDead)
253 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::MOVZWi), StatusReg)
254 .addImm(0).addImm(0);
255 BuildMI(LoadCmpBB, MIMD, TII->get(LdarOp), Dest.getReg())
256 .addReg(AddrReg);
257 BuildMI(LoadCmpBB, MIMD, TII->get(CmpOp), ZeroReg)
258 .addReg(Dest.getReg(), getKillRegState(B: Dest.isDead()))
259 .addReg(DesiredReg)
260 .addImm(ExtendImm);
261 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::Bcc))
262 .addImm(AArch64CC::NE)
263 .addMBB(DoneBB)
264 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
265 LoadCmpBB->addSuccessor(Succ: DoneBB);
266 LoadCmpBB->addSuccessor(Succ: StoreBB);
267
268 // .Lstore:
269 // stlxr wStatus, xNew, [xAddr]
270 // cbnz wStatus, .Lloadcmp
271 BuildMI(StoreBB, MIMD, TII->get(StlrOp), StatusReg)
272 .addReg(NewReg)
273 .addReg(AddrReg);
274 BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
275 .addReg(StatusReg, getKillRegState(B: StatusDead))
276 .addMBB(LoadCmpBB);
277 StoreBB->addSuccessor(Succ: LoadCmpBB);
278 StoreBB->addSuccessor(Succ: DoneBB);
279
280 DoneBB->splice(Where: DoneBB->end(), Other: &MBB, From: MI, To: MBB.end());
281 DoneBB->transferSuccessors(FromMBB: &MBB);
282
283 MBB.addSuccessor(Succ: LoadCmpBB);
284
285 NextMBBI = MBB.end();
286 MI.eraseFromParent();
287
288 // Recompute livein lists.
289 LivePhysRegs LiveRegs;
290 computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB);
291 computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB);
292 computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB);
293 // Do an extra pass around the loop to get loop carried registers right.
294 StoreBB->clearLiveIns();
295 computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB);
296 LoadCmpBB->clearLiveIns();
297 computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB);
298
299 return true;
300}
301
302bool AArch64ExpandPseudo::expandCMP_SWAP_128(
303 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
304 MachineBasicBlock::iterator &NextMBBI) {
305 MachineInstr &MI = *MBBI;
306 MIMetadata MIMD(MI);
307 MachineOperand &DestLo = MI.getOperand(i: 0);
308 MachineOperand &DestHi = MI.getOperand(i: 1);
309 Register StatusReg = MI.getOperand(i: 2).getReg();
310 bool StatusDead = MI.getOperand(i: 2).isDead();
311 // Duplicating undef operands into 2 instructions does not guarantee the same
312 // value on both; However undef should be replaced by xzr anyway.
313 assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
314 Register AddrReg = MI.getOperand(i: 3).getReg();
315 Register DesiredLoReg = MI.getOperand(i: 4).getReg();
316 Register DesiredHiReg = MI.getOperand(i: 5).getReg();
317 Register NewLoReg = MI.getOperand(i: 6).getReg();
318 Register NewHiReg = MI.getOperand(i: 7).getReg();
319
320 unsigned LdxpOp, StxpOp;
321
322 switch (MI.getOpcode()) {
323 case AArch64::CMP_SWAP_128_MONOTONIC:
324 LdxpOp = AArch64::LDXPX;
325 StxpOp = AArch64::STXPX;
326 break;
327 case AArch64::CMP_SWAP_128_RELEASE:
328 LdxpOp = AArch64::LDXPX;
329 StxpOp = AArch64::STLXPX;
330 break;
331 case AArch64::CMP_SWAP_128_ACQUIRE:
332 LdxpOp = AArch64::LDAXPX;
333 StxpOp = AArch64::STXPX;
334 break;
335 case AArch64::CMP_SWAP_128:
336 LdxpOp = AArch64::LDAXPX;
337 StxpOp = AArch64::STLXPX;
338 break;
339 default:
340 llvm_unreachable("Unexpected opcode");
341 }
342
343 MachineFunction *MF = MBB.getParent();
344 auto LoadCmpBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
345 auto StoreBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
346 auto FailBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
347 auto DoneBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
348
349 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoadCmpBB);
350 MF->insert(MBBI: ++LoadCmpBB->getIterator(), MBB: StoreBB);
351 MF->insert(MBBI: ++StoreBB->getIterator(), MBB: FailBB);
352 MF->insert(MBBI: ++FailBB->getIterator(), MBB: DoneBB);
353
354 // .Lloadcmp:
355 // ldaxp xDestLo, xDestHi, [xAddr]
356 // cmp xDestLo, xDesiredLo
357 // sbcs xDestHi, xDesiredHi
358 // b.ne .Ldone
359 BuildMI(LoadCmpBB, MIMD, TII->get(LdxpOp))
360 .addReg(DestLo.getReg(), RegState::Define)
361 .addReg(DestHi.getReg(), RegState::Define)
362 .addReg(AddrReg);
363 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
364 .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
365 .addReg(DesiredLoReg)
366 .addImm(0);
367 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
368 .addUse(AArch64::WZR)
369 .addUse(AArch64::WZR)
370 .addImm(AArch64CC::EQ);
371 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
372 .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
373 .addReg(DesiredHiReg)
374 .addImm(0);
375 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
376 .addUse(StatusReg, RegState::Kill)
377 .addUse(StatusReg, RegState::Kill)
378 .addImm(AArch64CC::EQ);
379 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CBNZW))
380 .addUse(StatusReg, getKillRegState(StatusDead))
381 .addMBB(FailBB);
382 LoadCmpBB->addSuccessor(Succ: FailBB);
383 LoadCmpBB->addSuccessor(Succ: StoreBB);
384
385 // .Lstore:
386 // stlxp wStatus, xNewLo, xNewHi, [xAddr]
387 // cbnz wStatus, .Lloadcmp
388 BuildMI(StoreBB, MIMD, TII->get(StxpOp), StatusReg)
389 .addReg(NewLoReg)
390 .addReg(NewHiReg)
391 .addReg(AddrReg);
392 BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
393 .addReg(StatusReg, getKillRegState(StatusDead))
394 .addMBB(LoadCmpBB);
395 BuildMI(StoreBB, MIMD, TII->get(AArch64::B)).addMBB(DoneBB);
396 StoreBB->addSuccessor(Succ: LoadCmpBB);
397 StoreBB->addSuccessor(Succ: DoneBB);
398
399 // .Lfail:
400 // stlxp wStatus, xDestLo, xDestHi, [xAddr]
401 // cbnz wStatus, .Lloadcmp
402 BuildMI(FailBB, MIMD, TII->get(StxpOp), StatusReg)
403 .addReg(DestLo.getReg())
404 .addReg(DestHi.getReg())
405 .addReg(AddrReg);
406 BuildMI(FailBB, MIMD, TII->get(AArch64::CBNZW))
407 .addReg(StatusReg, getKillRegState(StatusDead))
408 .addMBB(LoadCmpBB);
409 FailBB->addSuccessor(Succ: LoadCmpBB);
410 FailBB->addSuccessor(Succ: DoneBB);
411
412 DoneBB->splice(Where: DoneBB->end(), Other: &MBB, From: MI, To: MBB.end());
413 DoneBB->transferSuccessors(FromMBB: &MBB);
414
415 MBB.addSuccessor(Succ: LoadCmpBB);
416
417 NextMBBI = MBB.end();
418 MI.eraseFromParent();
419
420 // Recompute liveness bottom up.
421 LivePhysRegs LiveRegs;
422 computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB);
423 computeAndAddLiveIns(LiveRegs, MBB&: *FailBB);
424 computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB);
425 computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB);
426
427 // Do an extra pass in the loop to get the loop carried dependencies right.
428 FailBB->clearLiveIns();
429 computeAndAddLiveIns(LiveRegs, MBB&: *FailBB);
430 StoreBB->clearLiveIns();
431 computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB);
432 LoadCmpBB->clearLiveIns();
433 computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB);
434
435 return true;
436}
437
438/// \brief Expand Pseudos to Instructions with destructive operands.
439///
440/// This mechanism uses MOVPRFX instructions for zeroing the false lanes
441/// or for fixing relaxed register allocation conditions to comply with
442/// the instructions register constraints. The latter case may be cheaper
443/// than setting the register constraints in the register allocator,
444/// since that will insert regular MOV instructions rather than MOVPRFX.
445///
446/// Example (after register allocation):
447///
448/// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
449///
450/// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
451/// * We cannot map directly to FSUB_ZPmZ_B because the register
452/// constraints of the instruction are not met.
453/// * Also the _ZERO specifies the false lanes need to be zeroed.
454///
455/// We first try to see if the destructive operand == result operand,
456/// if not, we try to swap the operands, e.g.
457///
458/// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1
459///
460/// But because FSUB_ZPmZ is not commutative, this is semantically
461/// different, so we need a reverse instruction:
462///
463/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
464///
465/// Then we implement the zeroing of the false lanes of Z0 by adding
466/// a zeroing MOVPRFX instruction:
467///
468/// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
469/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
470///
471/// Note that this can only be done for _ZERO or _UNDEF variants where
472/// we can guarantee the false lanes to be zeroed (by implementing this)
473/// or that they are undef (don't care / not used), otherwise the
474/// swapping of operands is illegal because the operation is not
475/// (or cannot be emulated to be) fully commutative.
476bool AArch64ExpandPseudo::expand_DestructiveOp(
477 MachineInstr &MI,
478 MachineBasicBlock &MBB,
479 MachineBasicBlock::iterator MBBI) {
480 unsigned Opcode = AArch64::getSVEPseudoMap(Opcode: MI.getOpcode());
481 uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
482 uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
483 bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
484 Register DstReg = MI.getOperand(i: 0).getReg();
485 bool DstIsDead = MI.getOperand(i: 0).isDead();
486 bool UseRev = false;
487 unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
488
489 switch (DType) {
490 case AArch64::DestructiveBinaryComm:
491 case AArch64::DestructiveBinaryCommWithRev:
492 if (DstReg == MI.getOperand(i: 3).getReg()) {
493 // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1
494 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx) = std::make_tuple(args: 1, args: 3, args: 2);
495 UseRev = true;
496 break;
497 }
498 [[fallthrough]];
499 case AArch64::DestructiveBinary:
500 case AArch64::DestructiveBinaryImm:
501 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx) = std::make_tuple(args: 1, args: 2, args: 3);
502 break;
503 case AArch64::DestructiveUnaryPassthru:
504 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx) = std::make_tuple(args: 2, args: 3, args: 3);
505 break;
506 case AArch64::DestructiveTernaryCommWithRev:
507 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 2, args: 3, args: 4);
508 if (DstReg == MI.getOperand(i: 3).getReg()) {
509 // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za
510 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 3, args: 4, args: 2);
511 UseRev = true;
512 } else if (DstReg == MI.getOperand(i: 4).getReg()) {
513 // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za
514 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 4, args: 3, args: 2);
515 UseRev = true;
516 }
517 break;
518 default:
519 llvm_unreachable("Unsupported Destructive Operand type");
520 }
521
522 // MOVPRFX can only be used if the destination operand
523 // is the destructive operand, not as any other operand,
524 // so the Destructive Operand must be unique.
525 bool DOPRegIsUnique = false;
526 switch (DType) {
527 case AArch64::DestructiveBinary:
528 DOPRegIsUnique = DstReg != MI.getOperand(i: SrcIdx).getReg();
529 break;
530 case AArch64::DestructiveBinaryComm:
531 case AArch64::DestructiveBinaryCommWithRev:
532 DOPRegIsUnique =
533 DstReg != MI.getOperand(i: DOPIdx).getReg() ||
534 MI.getOperand(i: DOPIdx).getReg() != MI.getOperand(i: SrcIdx).getReg();
535 break;
536 case AArch64::DestructiveUnaryPassthru:
537 case AArch64::DestructiveBinaryImm:
538 DOPRegIsUnique = true;
539 break;
540 case AArch64::DestructiveTernaryCommWithRev:
541 DOPRegIsUnique =
542 DstReg != MI.getOperand(i: DOPIdx).getReg() ||
543 (MI.getOperand(i: DOPIdx).getReg() != MI.getOperand(i: SrcIdx).getReg() &&
544 MI.getOperand(i: DOPIdx).getReg() != MI.getOperand(i: Src2Idx).getReg());
545 break;
546 }
547
548 // Resolve the reverse opcode
549 if (UseRev) {
550 int NewOpcode;
551 // e.g. DIV -> DIVR
552 if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
553 Opcode = NewOpcode;
554 // e.g. DIVR -> DIV
555 else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
556 Opcode = NewOpcode;
557 }
558
559 // Get the right MOVPRFX
560 uint64_t ElementSize = TII->getElementSizeForOpcode(Opc: Opcode);
561 unsigned MovPrfx, LSLZero, MovPrfxZero;
562 switch (ElementSize) {
563 case AArch64::ElementSizeNone:
564 case AArch64::ElementSizeB:
565 MovPrfx = AArch64::MOVPRFX_ZZ;
566 LSLZero = AArch64::LSL_ZPmI_B;
567 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
568 break;
569 case AArch64::ElementSizeH:
570 MovPrfx = AArch64::MOVPRFX_ZZ;
571 LSLZero = AArch64::LSL_ZPmI_H;
572 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
573 break;
574 case AArch64::ElementSizeS:
575 MovPrfx = AArch64::MOVPRFX_ZZ;
576 LSLZero = AArch64::LSL_ZPmI_S;
577 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
578 break;
579 case AArch64::ElementSizeD:
580 MovPrfx = AArch64::MOVPRFX_ZZ;
581 LSLZero = AArch64::LSL_ZPmI_D;
582 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
583 break;
584 default:
585 llvm_unreachable("Unsupported ElementSize");
586 }
587
588 //
589 // Create the destructive operation (if required)
590 //
591 MachineInstrBuilder PRFX, DOP;
592 if (FalseZero) {
593 // If we cannot prefix the requested instruction we'll instead emit a
594 // prefixed_zeroing_mov for DestructiveBinary.
595 assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary ||
596 DType == AArch64::DestructiveBinaryComm ||
597 DType == AArch64::DestructiveBinaryCommWithRev) &&
598 "The destructive operand should be unique");
599 assert(ElementSize != AArch64::ElementSizeNone &&
600 "This instruction is unpredicated");
601
602 // Merge source operand into destination register
603 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
604 .addReg(DstReg, RegState::Define)
605 .addReg(MI.getOperand(i: PredIdx).getReg())
606 .addReg(MI.getOperand(i: DOPIdx).getReg());
607
608 // After the movprfx, the destructive operand is same as Dst
609 DOPIdx = 0;
610
611 // Create the additional LSL to zero the lanes when the DstReg is not
612 // unique. Zeros the lanes in z0 that aren't active in p0 with sequence
613 // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0;
614 if ((DType == AArch64::DestructiveBinary ||
615 DType == AArch64::DestructiveBinaryComm ||
616 DType == AArch64::DestructiveBinaryCommWithRev) &&
617 !DOPRegIsUnique) {
618 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero))
619 .addReg(DstReg, RegState::Define)
620 .add(MI.getOperand(i: PredIdx))
621 .addReg(DstReg)
622 .addImm(0);
623 }
624 } else if (DstReg != MI.getOperand(i: DOPIdx).getReg()) {
625 assert(DOPRegIsUnique && "The destructive operand should be unique");
626 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
627 .addReg(DstReg, RegState::Define)
628 .addReg(MI.getOperand(i: DOPIdx).getReg());
629 DOPIdx = 0;
630 }
631
632 //
633 // Create the destructive operation
634 //
635 DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
636 .addReg(DstReg, RegState::Define | getDeadRegState(B: DstIsDead));
637
638 switch (DType) {
639 case AArch64::DestructiveUnaryPassthru:
640 DOP.addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), flags: RegState::Kill)
641 .add(MO: MI.getOperand(i: PredIdx))
642 .add(MO: MI.getOperand(i: SrcIdx));
643 break;
644 case AArch64::DestructiveBinary:
645 case AArch64::DestructiveBinaryImm:
646 case AArch64::DestructiveBinaryComm:
647 case AArch64::DestructiveBinaryCommWithRev:
648 DOP.add(MO: MI.getOperand(i: PredIdx))
649 .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), flags: RegState::Kill)
650 .add(MO: MI.getOperand(i: SrcIdx));
651 break;
652 case AArch64::DestructiveTernaryCommWithRev:
653 DOP.add(MO: MI.getOperand(i: PredIdx))
654 .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), flags: RegState::Kill)
655 .add(MO: MI.getOperand(i: SrcIdx))
656 .add(MO: MI.getOperand(i: Src2Idx));
657 break;
658 }
659
660 if (PRFX) {
661 finalizeBundle(MBB, FirstMI: PRFX->getIterator(), LastMI: MBBI->getIterator());
662 transferImpOps(OldMI&: MI, UseMI&: PRFX, DefMI&: DOP);
663 } else
664 transferImpOps(OldMI&: MI, UseMI&: DOP, DefMI&: DOP);
665
666 MI.eraseFromParent();
667 return true;
668}
669
670bool AArch64ExpandPseudo::expandSetTagLoop(
671 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
672 MachineBasicBlock::iterator &NextMBBI) {
673 MachineInstr &MI = *MBBI;
674 DebugLoc DL = MI.getDebugLoc();
675 Register SizeReg = MI.getOperand(i: 0).getReg();
676 Register AddressReg = MI.getOperand(i: 1).getReg();
677
678 MachineFunction *MF = MBB.getParent();
679
680 bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
681 const unsigned OpCode1 =
682 ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
683 const unsigned OpCode2 =
684 ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
685
686 unsigned Size = MI.getOperand(i: 2).getImm();
687 assert(Size > 0 && Size % 16 == 0);
688 if (Size % (16 * 2) != 0) {
689 BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
690 .addReg(AddressReg)
691 .addReg(AddressReg)
692 .addImm(1);
693 Size -= 16;
694 }
695 MachineBasicBlock::iterator I =
696 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
697 .addImm(Size);
698 expandMOVImm(MBB, MBBI: I, BitSize: 64);
699
700 auto LoopBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
701 auto DoneBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
702
703 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopBB);
704 MF->insert(MBBI: ++LoopBB->getIterator(), MBB: DoneBB);
705
706 BuildMI(LoopBB, DL, TII->get(OpCode2))
707 .addDef(AddressReg)
708 .addReg(AddressReg)
709 .addReg(AddressReg)
710 .addImm(2)
711 .cloneMemRefs(MI)
712 .setMIFlags(MI.getFlags());
713 BuildMI(LoopBB, DL, TII->get(AArch64::SUBSXri))
714 .addDef(SizeReg)
715 .addReg(SizeReg)
716 .addImm(16 * 2)
717 .addImm(0);
718 BuildMI(LoopBB, DL, TII->get(AArch64::Bcc))
719 .addImm(AArch64CC::NE)
720 .addMBB(LoopBB)
721 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
722
723 LoopBB->addSuccessor(Succ: LoopBB);
724 LoopBB->addSuccessor(Succ: DoneBB);
725
726 DoneBB->splice(Where: DoneBB->end(), Other: &MBB, From: MI, To: MBB.end());
727 DoneBB->transferSuccessors(FromMBB: &MBB);
728
729 MBB.addSuccessor(Succ: LoopBB);
730
731 NextMBBI = MBB.end();
732 MI.eraseFromParent();
733 // Recompute liveness bottom up.
734 LivePhysRegs LiveRegs;
735 computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB);
736 computeAndAddLiveIns(LiveRegs, MBB&: *LoopBB);
737 // Do an extra pass in the loop to get the loop carried dependencies right.
738 // FIXME: is this necessary?
739 LoopBB->clearLiveIns();
740 computeAndAddLiveIns(LiveRegs, MBB&: *LoopBB);
741 DoneBB->clearLiveIns();
742 computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB);
743
744 return true;
745}
746
747bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
748 MachineBasicBlock::iterator MBBI,
749 unsigned Opc, unsigned N) {
750 assert((Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI ||
751 Opc == AArch64::LDR_PXI || Opc == AArch64::STR_PXI) &&
752 "Unexpected opcode");
753 unsigned RState = (Opc == AArch64::LDR_ZXI || Opc == AArch64::LDR_PXI)
754 ? RegState::Define
755 : 0;
756 unsigned sub0 = (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI)
757 ? AArch64::zsub0
758 : AArch64::psub0;
759 const TargetRegisterInfo *TRI =
760 MBB.getParent()->getSubtarget().getRegisterInfo();
761 MachineInstr &MI = *MBBI;
762 for (unsigned Offset = 0; Offset < N; ++Offset) {
763 int ImmOffset = MI.getOperand(i: 2).getImm() + Offset;
764 bool Kill = (Offset + 1 == N) ? MI.getOperand(i: 1).isKill() : false;
765 assert(ImmOffset >= -256 && ImmOffset < 256 &&
766 "Immediate spill offset out of range");
767 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
768 .addReg(TRI->getSubReg(Reg: MI.getOperand(i: 0).getReg(), Idx: sub0 + Offset),
769 RState)
770 .addReg(MI.getOperand(i: 1).getReg(), getKillRegState(B: Kill))
771 .addImm(ImmOffset);
772 }
773 MI.eraseFromParent();
774 return true;
775}
776
777// Create a call to CallTarget, copying over all the operands from *MBBI,
778// starting at the regmask.
779static MachineInstr *createCall(MachineBasicBlock &MBB,
780 MachineBasicBlock::iterator MBBI,
781 const AArch64InstrInfo *TII,
782 MachineOperand &CallTarget,
783 unsigned RegMaskStartIdx) {
784 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
785 MachineInstr *Call =
786 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(Opc)).getInstr();
787
788 assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
789 "invalid operand for regular call");
790 Call->addOperand(Op: CallTarget);
791
792 // Register arguments are added during ISel, but cannot be added as explicit
793 // operands of the branch as it expects to be B <target> which is only one
794 // operand. Instead they are implicit operands used by the branch.
795 while (!MBBI->getOperand(i: RegMaskStartIdx).isRegMask()) {
796 auto MOP = MBBI->getOperand(i: RegMaskStartIdx);
797 assert(MOP.isReg() && "can only add register operands");
798 Call->addOperand(Op: MachineOperand::CreateReg(
799 Reg: MOP.getReg(), /*Def=*/isDef: false, /*Implicit=*/isImp: true, /*isKill=*/false,
800 /*isDead=*/false, /*isUndef=*/MOP.isUndef()));
801 RegMaskStartIdx++;
802 }
803 for (const MachineOperand &MO :
804 llvm::drop_begin(RangeOrContainer: MBBI->operands(), N: RegMaskStartIdx))
805 Call->addOperand(Op: MO);
806
807 return Call;
808}
809
810bool AArch64ExpandPseudo::expandCALL_RVMARKER(
811 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
812 // Expand CALL_RVMARKER pseudo to:
813 // - a branch to the call target, followed by
814 // - the special `mov x29, x29` marker, and
815 // - another branch, to the runtime function
816 // Mark the sequence as bundle, to avoid passes moving other code in between.
817 MachineInstr &MI = *MBBI;
818 MachineOperand &RVTarget = MI.getOperand(i: 0);
819 assert(RVTarget.isGlobal() && "invalid operand for attached call");
820 MachineInstr *OriginalCall =
821 createCall(MBB, MBBI, TII, CallTarget&: MI.getOperand(i: 1),
822 // Regmask starts after the RV and call targets.
823 /*RegMaskStartIdx=*/2);
824
825 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
826 .addReg(AArch64::FP, RegState::Define)
827 .addReg(AArch64::XZR)
828 .addReg(AArch64::FP)
829 .addImm(0);
830
831 auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL))
832 .add(RVTarget)
833 .getInstr();
834
835 if (MI.shouldUpdateCallSiteInfo())
836 MBB.getParent()->moveCallSiteInfo(Old: &MI, New: OriginalCall);
837
838 MI.eraseFromParent();
839 finalizeBundle(MBB, OriginalCall->getIterator(),
840 std::next(RVCall->getIterator()));
841 return true;
842}
843
844bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB,
845 MachineBasicBlock::iterator MBBI) {
846 // Expand CALL_BTI pseudo to:
847 // - a branch to the call target
848 // - a BTI instruction
849 // Mark the sequence as a bundle, to avoid passes moving other code in
850 // between.
851 MachineInstr &MI = *MBBI;
852 MachineInstr *Call = createCall(MBB, MBBI, TII, CallTarget&: MI.getOperand(i: 0),
853 // Regmask starts after the call target.
854 /*RegMaskStartIdx=*/1);
855
856 Call->setCFIType(MF&: *MBB.getParent(), Type: MI.getCFIType());
857
858 MachineInstr *BTI =
859 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT))
860 // BTI J so that setjmp can to BR to this.
861 .addImm(36)
862 .getInstr();
863
864 if (MI.shouldUpdateCallSiteInfo())
865 MBB.getParent()->moveCallSiteInfo(Old: &MI, New: Call);
866
867 MI.eraseFromParent();
868 finalizeBundle(MBB, FirstMI: Call->getIterator(), LastMI: std::next(x: BTI->getIterator()));
869 return true;
870}
871
872bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
873 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
874 Register CtxReg = MBBI->getOperand(i: 0).getReg();
875 Register BaseReg = MBBI->getOperand(i: 1).getReg();
876 int Offset = MBBI->getOperand(i: 2).getImm();
877 DebugLoc DL(MBBI->getDebugLoc());
878 auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
879
880 if (STI.getTargetTriple().getArchName() != "arm64e") {
881 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
882 .addUse(CtxReg)
883 .addUse(BaseReg)
884 .addImm(Offset / 8)
885 .setMIFlag(MachineInstr::FrameSetup);
886 MBBI->eraseFromParent();
887 return true;
888 }
889
890 // We need to sign the context in an address-discriminated way. 0xc31a is a
891 // fixed random value, chosen as part of the ABI.
892 // add x16, xBase, #Offset
893 // movk x16, #0xc31a, lsl #48
894 // mov x17, x22/xzr
895 // pacdb x17, x16
896 // str x17, [xBase, #Offset]
897 unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
898 BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)
899 .addUse(BaseReg)
900 .addImm(abs(Offset))
901 .addImm(0)
902 .setMIFlag(MachineInstr::FrameSetup);
903 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)
904 .addUse(AArch64::X16)
905 .addImm(0xc31a)
906 .addImm(48)
907 .setMIFlag(MachineInstr::FrameSetup);
908 // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
909 // move it somewhere before signing.
910 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
911 .addUse(AArch64::XZR)
912 .addUse(CtxReg)
913 .addImm(0)
914 .setMIFlag(MachineInstr::FrameSetup);
915 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)
916 .addUse(AArch64::X17)
917 .addUse(AArch64::X16)
918 .setMIFlag(MachineInstr::FrameSetup);
919 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
920 .addUse(AArch64::X17)
921 .addUse(BaseReg)
922 .addImm(Offset / 8)
923 .setMIFlag(MachineInstr::FrameSetup);
924
925 MBBI->eraseFromParent();
926 return true;
927}
928
929MachineBasicBlock *
930AArch64ExpandPseudo::expandRestoreZA(MachineBasicBlock &MBB,
931 MachineBasicBlock::iterator MBBI) {
932 MachineInstr &MI = *MBBI;
933 assert((std::next(MBBI) != MBB.end() ||
934 MI.getParent()->successors().begin() !=
935 MI.getParent()->successors().end()) &&
936 "Unexpected unreachable in block that restores ZA");
937
938 // Compare TPIDR2_EL0 value against 0.
939 DebugLoc DL = MI.getDebugLoc();
940 MachineInstrBuilder Cbz = BuildMI(MBB, MBBI, DL, TII->get(AArch64::CBZX))
941 .add(MI.getOperand(0));
942
943 // Split MBB and create two new blocks:
944 // - MBB now contains all instructions before RestoreZAPseudo.
945 // - SMBB contains the RestoreZAPseudo instruction only.
946 // - EndBB contains all instructions after RestoreZAPseudo.
947 MachineInstr &PrevMI = *std::prev(x: MBBI);
948 MachineBasicBlock *SMBB = MBB.splitAt(SplitInst&: PrevMI, /*UpdateLiveIns*/ true);
949 MachineBasicBlock *EndBB = std::next(x: MI.getIterator()) == SMBB->end()
950 ? *SMBB->successors().begin()
951 : SMBB->splitAt(SplitInst&: MI, /*UpdateLiveIns*/ true);
952
953 // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
954 Cbz.addMBB(MBB: SMBB);
955 BuildMI(&MBB, DL, TII->get(AArch64::B))
956 .addMBB(EndBB);
957 MBB.addSuccessor(Succ: EndBB);
958
959 // Replace the pseudo with a call (BL).
960 MachineInstrBuilder MIB =
961 BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::BL));
962 MIB.addReg(RegNo: MI.getOperand(i: 1).getReg(), flags: RegState::Implicit);
963 for (unsigned I = 2; I < MI.getNumOperands(); ++I)
964 MIB.add(MO: MI.getOperand(i: I));
965 BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
966
967 MI.eraseFromParent();
968 return EndBB;
969}
970
971MachineBasicBlock *
972AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
973 MachineBasicBlock::iterator MBBI) {
974 MachineInstr &MI = *MBBI;
975 // In the case of a smstart/smstop before a unreachable, just remove the pseudo.
976 // Exception handling code generated by Clang may introduce unreachables and it
977 // seems unnecessary to restore pstate.sm when that happens. Note that it is
978 // not just an optimisation, the code below expects a successor instruction/block
979 // in order to split the block at MBBI.
980 if (std::next(x: MBBI) == MBB.end() &&
981 MI.getParent()->successors().begin() ==
982 MI.getParent()->successors().end()) {
983 MI.eraseFromParent();
984 return &MBB;
985 }
986
987 // Expand the pseudo into smstart or smstop instruction. The pseudo has the
988 // following operands:
989 //
990 // MSRpstatePseudo <za|sm|both>, <0|1>, condition[, pstate.sm], <regmask>
991 //
992 // The pseudo is expanded into a conditional smstart/smstop, with a
993 // check if pstate.sm (register) equals the expected value, and if not,
994 // invokes the smstart/smstop.
995 //
996 // As an example, the following block contains a normal call from a
997 // streaming-compatible function:
998 //
999 // OrigBB:
1000 // MSRpstatePseudo 3, 0, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTOP
1001 // bl @normal_callee
1002 // MSRpstatePseudo 3, 1, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTART
1003 //
1004 // ...which will be transformed into:
1005 //
1006 // OrigBB:
1007 // TBNZx %0:gpr64, 0, SMBB
1008 // b EndBB
1009 //
1010 // SMBB:
1011 // MSRpstatesvcrImm1 3, 0, <regmask> <- SMSTOP
1012 //
1013 // EndBB:
1014 // bl @normal_callee
1015 // MSRcond_pstatesvcrImm1 3, 1, <regmask> <- SMSTART
1016 //
1017 DebugLoc DL = MI.getDebugLoc();
1018
1019 // Create the conditional branch based on the third operand of the
1020 // instruction, which tells us if we are wrapping a normal or streaming
1021 // function.
1022 // We test the live value of pstate.sm and toggle pstate.sm if this is not the
1023 // expected value for the callee (0 for a normal callee and 1 for a streaming
1024 // callee).
1025 unsigned Opc;
1026 switch (MI.getOperand(i: 2).getImm()) {
1027 case AArch64SME::Always:
1028 llvm_unreachable("Should have matched to instruction directly");
1029 case AArch64SME::IfCallerIsStreaming:
1030 Opc = AArch64::TBNZW;
1031 break;
1032 case AArch64SME::IfCallerIsNonStreaming:
1033 Opc = AArch64::TBZW;
1034 break;
1035 }
1036 auto PStateSM = MI.getOperand(i: 3).getReg();
1037 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1038 unsigned SMReg32 = TRI->getSubReg(PStateSM, AArch64::sub_32);
1039 MachineInstrBuilder Tbx =
1040 BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SMReg32).addImm(0);
1041
1042 // Split MBB and create two new blocks:
1043 // - MBB now contains all instructions before MSRcond_pstatesvcrImm1.
1044 // - SMBB contains the MSRcond_pstatesvcrImm1 instruction only.
1045 // - EndBB contains all instructions after MSRcond_pstatesvcrImm1.
1046 MachineInstr &PrevMI = *std::prev(x: MBBI);
1047 MachineBasicBlock *SMBB = MBB.splitAt(SplitInst&: PrevMI, /*UpdateLiveIns*/ true);
1048 MachineBasicBlock *EndBB = std::next(x: MI.getIterator()) == SMBB->end()
1049 ? *SMBB->successors().begin()
1050 : SMBB->splitAt(SplitInst&: MI, /*UpdateLiveIns*/ true);
1051
1052 // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
1053 Tbx.addMBB(MBB: SMBB);
1054 BuildMI(&MBB, DL, TII->get(AArch64::B))
1055 .addMBB(EndBB);
1056 MBB.addSuccessor(Succ: EndBB);
1057
1058 // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB.
1059 MachineInstrBuilder MIB = BuildMI(*SMBB, SMBB->begin(), MI.getDebugLoc(),
1060 TII->get(AArch64::MSRpstatesvcrImm1));
1061 // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as
1062 // these contain the CopyFromReg for the first argument and the flag to
1063 // indicate whether the callee is streaming or normal).
1064 MIB.add(MO: MI.getOperand(i: 0));
1065 MIB.add(MO: MI.getOperand(i: 1));
1066 for (unsigned i = 4; i < MI.getNumOperands(); ++i)
1067 MIB.add(MO: MI.getOperand(i));
1068
1069 BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1070
1071 MI.eraseFromParent();
1072 return EndBB;
1073}
1074
1075bool AArch64ExpandPseudo::expandMultiVecPseudo(
1076 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
1077 TargetRegisterClass ContiguousClass, TargetRegisterClass StridedClass,
1078 unsigned ContiguousOp, unsigned StridedOpc) {
1079 MachineInstr &MI = *MBBI;
1080 Register Tuple = MI.getOperand(i: 0).getReg();
1081
1082 auto ContiguousRange = ContiguousClass.getRegisters();
1083 auto StridedRange = StridedClass.getRegisters();
1084 unsigned Opc;
1085 if (llvm::is_contained(Range&: ContiguousRange, Element: Tuple.asMCReg())) {
1086 Opc = ContiguousOp;
1087 } else if (llvm::is_contained(Range&: StridedRange, Element: Tuple.asMCReg())) {
1088 Opc = StridedOpc;
1089 } else
1090 llvm_unreachable("Cannot expand Multi-Vector pseudo");
1091
1092 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
1093 .add(MI.getOperand(i: 0))
1094 .add(MI.getOperand(i: 1))
1095 .add(MI.getOperand(i: 2))
1096 .add(MI.getOperand(i: 3));
1097 transferImpOps(OldMI&: MI, UseMI&: MIB, DefMI&: MIB);
1098 MI.eraseFromParent();
1099 return true;
1100}
1101
1102/// If MBBI references a pseudo instruction that should be expanded here,
1103/// do the expansion and return true. Otherwise return false.
1104bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
1105 MachineBasicBlock::iterator MBBI,
1106 MachineBasicBlock::iterator &NextMBBI) {
1107 MachineInstr &MI = *MBBI;
1108 unsigned Opcode = MI.getOpcode();
1109
1110 // Check if we can expand the destructive op
1111 int OrigInstr = AArch64::getSVEPseudoMap(Opcode: MI.getOpcode());
1112 if (OrigInstr != -1) {
1113 auto &Orig = TII->get(OrigInstr);
1114 if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) !=
1115 AArch64::NotDestructive) {
1116 return expand_DestructiveOp(MI, MBB, MBBI);
1117 }
1118 }
1119
1120 switch (Opcode) {
1121 default:
1122 break;
1123
1124 case AArch64::BSPv8i8:
1125 case AArch64::BSPv16i8: {
1126 Register DstReg = MI.getOperand(i: 0).getReg();
1127 if (DstReg == MI.getOperand(i: 3).getReg()) {
1128 // Expand to BIT
1129 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1130 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
1131 : AArch64::BITv16i8))
1132 .add(MI.getOperand(0))
1133 .add(MI.getOperand(3))
1134 .add(MI.getOperand(2))
1135 .add(MI.getOperand(1));
1136 } else if (DstReg == MI.getOperand(i: 2).getReg()) {
1137 // Expand to BIF
1138 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1139 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
1140 : AArch64::BIFv16i8))
1141 .add(MI.getOperand(0))
1142 .add(MI.getOperand(2))
1143 .add(MI.getOperand(3))
1144 .add(MI.getOperand(1));
1145 } else {
1146 // Expand to BSL, use additional move if required
1147 if (DstReg == MI.getOperand(i: 1).getReg()) {
1148 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1149 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1150 : AArch64::BSLv16i8))
1151 .add(MI.getOperand(0))
1152 .add(MI.getOperand(1))
1153 .add(MI.getOperand(2))
1154 .add(MI.getOperand(3));
1155 } else {
1156 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1157 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
1158 : AArch64::ORRv16i8))
1159 .addReg(DstReg,
1160 RegState::Define |
1161 getRenamableRegState(MI.getOperand(0).isRenamable()))
1162 .add(MI.getOperand(1))
1163 .add(MI.getOperand(1));
1164 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1165 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1166 : AArch64::BSLv16i8))
1167 .add(MI.getOperand(0))
1168 .addReg(DstReg,
1169 RegState::Kill |
1170 getRenamableRegState(MI.getOperand(0).isRenamable()))
1171 .add(MI.getOperand(2))
1172 .add(MI.getOperand(3));
1173 }
1174 }
1175 MI.eraseFromParent();
1176 return true;
1177 }
1178
1179 case AArch64::ADDWrr:
1180 case AArch64::SUBWrr:
1181 case AArch64::ADDXrr:
1182 case AArch64::SUBXrr:
1183 case AArch64::ADDSWrr:
1184 case AArch64::SUBSWrr:
1185 case AArch64::ADDSXrr:
1186 case AArch64::SUBSXrr:
1187 case AArch64::ANDWrr:
1188 case AArch64::ANDXrr:
1189 case AArch64::BICWrr:
1190 case AArch64::BICXrr:
1191 case AArch64::ANDSWrr:
1192 case AArch64::ANDSXrr:
1193 case AArch64::BICSWrr:
1194 case AArch64::BICSXrr:
1195 case AArch64::EONWrr:
1196 case AArch64::EONXrr:
1197 case AArch64::EORWrr:
1198 case AArch64::EORXrr:
1199 case AArch64::ORNWrr:
1200 case AArch64::ORNXrr:
1201 case AArch64::ORRWrr:
1202 case AArch64::ORRXrr: {
1203 unsigned Opcode;
1204 switch (MI.getOpcode()) {
1205 default:
1206 return false;
1207 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break;
1208 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break;
1209 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break;
1210 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break;
1211 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break;
1212 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break;
1213 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break;
1214 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break;
1215 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break;
1216 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break;
1217 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break;
1218 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break;
1219 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break;
1220 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break;
1221 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break;
1222 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break;
1223 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break;
1224 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break;
1225 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break;
1226 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break;
1227 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break;
1228 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break;
1229 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break;
1230 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break;
1231 }
1232 MachineFunction &MF = *MBB.getParent();
1233 // Try to create new inst without implicit operands added.
1234 MachineInstr *NewMI = MF.CreateMachineInstr(
1235 MCID: TII->get(Opcode), DL: MI.getDebugLoc(), /*NoImplicit=*/true);
1236 MBB.insert(I: MBBI, MI: NewMI);
1237 MachineInstrBuilder MIB1(MF, NewMI);
1238 MIB1->setPCSections(MF, MD: MI.getPCSections());
1239 MIB1.addReg(RegNo: MI.getOperand(i: 0).getReg(), flags: RegState::Define)
1240 .add(MO: MI.getOperand(i: 1))
1241 .add(MO: MI.getOperand(i: 2))
1242 .addImm(Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0));
1243 transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB1);
1244 if (auto DebugNumber = MI.peekDebugInstrNum())
1245 NewMI->setDebugInstrNum(DebugNumber);
1246 MI.eraseFromParent();
1247 return true;
1248 }
1249
1250 case AArch64::LOADgot: {
1251 MachineFunction *MF = MBB.getParent();
1252 Register DstReg = MI.getOperand(i: 0).getReg();
1253 const MachineOperand &MO1 = MI.getOperand(i: 1);
1254 unsigned Flags = MO1.getTargetFlags();
1255
1256 if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
1257 // Tiny codemodel expand to LDR
1258 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1259 TII->get(AArch64::LDRXl), DstReg);
1260
1261 if (MO1.isGlobal()) {
1262 MIB.addGlobalAddress(GV: MO1.getGlobal(), Offset: 0, TargetFlags: Flags);
1263 } else if (MO1.isSymbol()) {
1264 MIB.addExternalSymbol(FnName: MO1.getSymbolName(), TargetFlags: Flags);
1265 } else {
1266 assert(MO1.isCPI() &&
1267 "Only expect globals, externalsymbols, or constant pools");
1268 MIB.addConstantPoolIndex(Idx: MO1.getIndex(), Offset: MO1.getOffset(), TargetFlags: Flags);
1269 }
1270 } else {
1271 // Small codemodel expand into ADRP + LDR.
1272 MachineFunction &MF = *MI.getParent()->getParent();
1273 DebugLoc DL = MI.getDebugLoc();
1274 MachineInstrBuilder MIB1 =
1275 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
1276
1277 MachineInstrBuilder MIB2;
1278 if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
1279 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1280 unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);
1281 unsigned DstFlags = MI.getOperand(i: 0).getTargetFlags();
1282 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))
1283 .addDef(Reg32)
1284 .addReg(DstReg, RegState::Kill)
1285 .addReg(DstReg, DstFlags | RegState::Implicit);
1286 } else {
1287 Register DstReg = MI.getOperand(i: 0).getReg();
1288 MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
1289 .add(MI.getOperand(0))
1290 .addUse(DstReg, RegState::Kill);
1291 }
1292
1293 if (MO1.isGlobal()) {
1294 MIB1.addGlobalAddress(GV: MO1.getGlobal(), Offset: 0, TargetFlags: Flags | AArch64II::MO_PAGE);
1295 MIB2.addGlobalAddress(GV: MO1.getGlobal(), Offset: 0,
1296 TargetFlags: Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1297 } else if (MO1.isSymbol()) {
1298 MIB1.addExternalSymbol(FnName: MO1.getSymbolName(), TargetFlags: Flags | AArch64II::MO_PAGE);
1299 MIB2.addExternalSymbol(FnName: MO1.getSymbolName(), TargetFlags: Flags |
1300 AArch64II::MO_PAGEOFF |
1301 AArch64II::MO_NC);
1302 } else {
1303 assert(MO1.isCPI() &&
1304 "Only expect globals, externalsymbols, or constant pools");
1305 MIB1.addConstantPoolIndex(Idx: MO1.getIndex(), Offset: MO1.getOffset(),
1306 TargetFlags: Flags | AArch64II::MO_PAGE);
1307 MIB2.addConstantPoolIndex(Idx: MO1.getIndex(), Offset: MO1.getOffset(),
1308 TargetFlags: Flags | AArch64II::MO_PAGEOFF |
1309 AArch64II::MO_NC);
1310 }
1311
1312 transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB2);
1313 }
1314 MI.eraseFromParent();
1315 return true;
1316 }
1317 case AArch64::MOVaddrBA: {
1318 MachineFunction &MF = *MI.getParent()->getParent();
1319 if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) {
1320 // blockaddress expressions have to come from a constant pool because the
1321 // largest addend (and hence offset within a function) allowed for ADRP is
1322 // only 8MB.
1323 const BlockAddress *BA = MI.getOperand(i: 1).getBlockAddress();
1324 assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");
1325
1326 MachineConstantPool *MCP = MF.getConstantPool();
1327 unsigned CPIdx = MCP->getConstantPoolIndex(C: BA, Alignment: Align(8));
1328
1329 Register DstReg = MI.getOperand(i: 0).getReg();
1330 auto MIB1 =
1331 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1332 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
1333 auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1334 TII->get(AArch64::LDRXui), DstReg)
1335 .addUse(DstReg)
1336 .addConstantPoolIndex(
1337 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1338 transferImpOps(MI, MIB1, MIB2);
1339 MI.eraseFromParent();
1340 return true;
1341 }
1342 }
1343 [[fallthrough]];
1344 case AArch64::MOVaddr:
1345 case AArch64::MOVaddrJT:
1346 case AArch64::MOVaddrCP:
1347 case AArch64::MOVaddrTLS:
1348 case AArch64::MOVaddrEXT: {
1349 // Expand into ADRP + ADD.
1350 Register DstReg = MI.getOperand(i: 0).getReg();
1351 assert(DstReg != AArch64::XZR);
1352 MachineInstrBuilder MIB1 =
1353 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1354 .add(MI.getOperand(1));
1355
1356 if (MI.getOperand(i: 1).getTargetFlags() & AArch64II::MO_TAGGED) {
1357 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
1358 // We do so by creating a MOVK that sets bits 48-63 of the register to
1359 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
1360 // the small code model so we can assume a binary size of <= 4GB, which
1361 // makes the untagged PC relative offset positive. The binary must also be
1362 // loaded into address range [0, 2^48). Both of these properties need to
1363 // be ensured at runtime when using tagged addresses.
1364 auto Tag = MI.getOperand(i: 1);
1365 Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
1366 Tag.setOffset(0x100000000);
1367 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg)
1368 .addReg(DstReg)
1369 .add(Tag)
1370 .addImm(48);
1371 }
1372
1373 MachineInstrBuilder MIB2 =
1374 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1375 .add(MI.getOperand(0))
1376 .addReg(DstReg)
1377 .add(MI.getOperand(2))
1378 .addImm(0);
1379
1380 transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB2);
1381 MI.eraseFromParent();
1382 return true;
1383 }
1384 case AArch64::ADDlowTLS:
1385 // Produce a plain ADD
1386 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1387 .add(MI.getOperand(0))
1388 .add(MI.getOperand(1))
1389 .add(MI.getOperand(2))
1390 .addImm(0);
1391 MI.eraseFromParent();
1392 return true;
1393
1394 case AArch64::MOVbaseTLS: {
1395 Register DstReg = MI.getOperand(i: 0).getReg();
1396 auto SysReg = AArch64SysReg::TPIDR_EL0;
1397 MachineFunction *MF = MBB.getParent();
1398 if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
1399 SysReg = AArch64SysReg::TPIDR_EL3;
1400 else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
1401 SysReg = AArch64SysReg::TPIDR_EL2;
1402 else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
1403 SysReg = AArch64SysReg::TPIDR_EL1;
1404 else if (MF->getSubtarget<AArch64Subtarget>().useROEL0ForTP())
1405 SysReg = AArch64SysReg::TPIDRRO_EL0;
1406 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
1407 .addImm(SysReg);
1408 MI.eraseFromParent();
1409 return true;
1410 }
1411
1412 case AArch64::MOVi32imm:
1413 return expandMOVImm(MBB, MBBI, BitSize: 32);
1414 case AArch64::MOVi64imm:
1415 return expandMOVImm(MBB, MBBI, BitSize: 64);
1416 case AArch64::RET_ReallyLR: {
1417 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
1418 // function and missing live-ins. We are fine in practice because callee
1419 // saved register handling ensures the register value is restored before
1420 // RET, but we need the undef flag here to appease the MachineVerifier
1421 // liveness checks.
1422 MachineInstrBuilder MIB =
1423 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
1424 .addReg(AArch64::LR, RegState::Undef);
1425 transferImpOps(OldMI&: MI, UseMI&: MIB, DefMI&: MIB);
1426 MI.eraseFromParent();
1427 return true;
1428 }
1429 case AArch64::CMP_SWAP_8:
1430 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
1431 AArch64::SUBSWrx,
1432 AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0),
1433 AArch64::WZR, NextMBBI);
1434 case AArch64::CMP_SWAP_16:
1435 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
1436 AArch64::SUBSWrx,
1437 AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0),
1438 AArch64::WZR, NextMBBI);
1439 case AArch64::CMP_SWAP_32:
1440 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
1441 AArch64::SUBSWrs,
1442 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1443 AArch64::WZR, NextMBBI);
1444 case AArch64::CMP_SWAP_64:
1445 return expandCMP_SWAP(MBB, MBBI,
1446 AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
1447 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1448 AArch64::XZR, NextMBBI);
1449 case AArch64::CMP_SWAP_128:
1450 case AArch64::CMP_SWAP_128_RELEASE:
1451 case AArch64::CMP_SWAP_128_ACQUIRE:
1452 case AArch64::CMP_SWAP_128_MONOTONIC:
1453 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
1454
1455 case AArch64::AESMCrrTied:
1456 case AArch64::AESIMCrrTied: {
1457 MachineInstrBuilder MIB =
1458 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1459 TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
1460 AArch64::AESIMCrr))
1461 .add(MI.getOperand(0))
1462 .add(MI.getOperand(1));
1463 transferImpOps(OldMI&: MI, UseMI&: MIB, DefMI&: MIB);
1464 MI.eraseFromParent();
1465 return true;
1466 }
1467 case AArch64::IRGstack: {
1468 MachineFunction &MF = *MBB.getParent();
1469 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1470 const AArch64FrameLowering *TFI =
1471 MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
1472
1473 // IRG does not allow immediate offset. getTaggedBasePointerOffset should
1474 // almost always point to SP-after-prologue; if not, emit a longer
1475 // instruction sequence.
1476 int BaseOffset = -AFI->getTaggedBasePointerOffset();
1477 Register FrameReg;
1478 StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
1479 MF, ObjectOffset: BaseOffset, isFixed: false /*isFixed*/, isSVE: false /*isSVE*/, FrameReg,
1480 /*PreferFP=*/false,
1481 /*ForSimm=*/true);
1482 Register SrcReg = FrameReg;
1483 if (FrameRegOffset) {
1484 // Use output register as temporary.
1485 SrcReg = MI.getOperand(i: 0).getReg();
1486 emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
1487 FrameRegOffset, TII);
1488 }
1489 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
1490 .add(MI.getOperand(0))
1491 .addUse(SrcReg)
1492 .add(MI.getOperand(2));
1493 MI.eraseFromParent();
1494 return true;
1495 }
1496 case AArch64::TAGPstack: {
1497 int64_t Offset = MI.getOperand(i: 2).getImm();
1498 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1499 TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
1500 .add(MI.getOperand(0))
1501 .add(MI.getOperand(1))
1502 .addImm(std::abs(Offset))
1503 .add(MI.getOperand(4));
1504 MI.eraseFromParent();
1505 return true;
1506 }
1507 case AArch64::STGloop_wback:
1508 case AArch64::STZGloop_wback:
1509 return expandSetTagLoop(MBB, MBBI, NextMBBI);
1510 case AArch64::STGloop:
1511 case AArch64::STZGloop:
1512 report_fatal_error(
1513 reason: "Non-writeback variants of STGloop / STZGloop should not "
1514 "survive past PrologEpilogInserter.");
1515 case AArch64::STR_ZZZZXI:
1516 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
1517 case AArch64::STR_ZZZXI:
1518 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
1519 case AArch64::STR_ZZXI:
1520 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
1521 case AArch64::STR_PPXI:
1522 return expandSVESpillFill(MBB, MBBI, AArch64::STR_PXI, 2);
1523 case AArch64::LDR_ZZZZXI:
1524 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
1525 case AArch64::LDR_ZZZXI:
1526 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
1527 case AArch64::LDR_ZZXI:
1528 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
1529 case AArch64::LDR_PPXI:
1530 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2);
1531 case AArch64::BLR_RVMARKER:
1532 return expandCALL_RVMARKER(MBB, MBBI);
1533 case AArch64::BLR_BTI:
1534 return expandCALL_BTI(MBB, MBBI);
1535 case AArch64::StoreSwiftAsyncContext:
1536 return expandStoreSwiftAsyncContext(MBB, MBBI);
1537 case AArch64::RestoreZAPseudo: {
1538 auto *NewMBB = expandRestoreZA(MBB, MBBI);
1539 if (NewMBB != &MBB)
1540 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1541 return true;
1542 }
1543 case AArch64::MSRpstatePseudo: {
1544 auto *NewMBB = expandCondSMToggle(MBB, MBBI);
1545 if (NewMBB != &MBB)
1546 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1547 return true;
1548 }
1549 case AArch64::COALESCER_BARRIER_FPR16:
1550 case AArch64::COALESCER_BARRIER_FPR32:
1551 case AArch64::COALESCER_BARRIER_FPR64:
1552 case AArch64::COALESCER_BARRIER_FPR128:
1553 MI.eraseFromParent();
1554 return true;
1555 case AArch64::LD1B_2Z_IMM_PSEUDO:
1556 return expandMultiVecPseudo(
1557 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1558 AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z_STRIDED_IMM);
1559 case AArch64::LD1H_2Z_IMM_PSEUDO:
1560 return expandMultiVecPseudo(
1561 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1562 AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z_STRIDED_IMM);
1563 case AArch64::LD1W_2Z_IMM_PSEUDO:
1564 return expandMultiVecPseudo(
1565 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1566 AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z_STRIDED_IMM);
1567 case AArch64::LD1D_2Z_IMM_PSEUDO:
1568 return expandMultiVecPseudo(
1569 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1570 AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z_STRIDED_IMM);
1571 case AArch64::LDNT1B_2Z_IMM_PSEUDO:
1572 return expandMultiVecPseudo(
1573 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1574 AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z_STRIDED_IMM);
1575 case AArch64::LDNT1H_2Z_IMM_PSEUDO:
1576 return expandMultiVecPseudo(
1577 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1578 AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z_STRIDED_IMM);
1579 case AArch64::LDNT1W_2Z_IMM_PSEUDO:
1580 return expandMultiVecPseudo(
1581 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1582 AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z_STRIDED_IMM);
1583 case AArch64::LDNT1D_2Z_IMM_PSEUDO:
1584 return expandMultiVecPseudo(
1585 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1586 AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z_STRIDED_IMM);
1587 case AArch64::LD1B_2Z_PSEUDO:
1588 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1589 AArch64::ZPR2StridedRegClass, AArch64::LD1B_2Z,
1590 AArch64::LD1B_2Z_STRIDED);
1591 case AArch64::LD1H_2Z_PSEUDO:
1592 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1593 AArch64::ZPR2StridedRegClass, AArch64::LD1H_2Z,
1594 AArch64::LD1H_2Z_STRIDED);
1595 case AArch64::LD1W_2Z_PSEUDO:
1596 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1597 AArch64::ZPR2StridedRegClass, AArch64::LD1W_2Z,
1598 AArch64::LD1W_2Z_STRIDED);
1599 case AArch64::LD1D_2Z_PSEUDO:
1600 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1601 AArch64::ZPR2StridedRegClass, AArch64::LD1D_2Z,
1602 AArch64::LD1D_2Z_STRIDED);
1603 case AArch64::LDNT1B_2Z_PSEUDO:
1604 return expandMultiVecPseudo(
1605 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1606 AArch64::LDNT1B_2Z, AArch64::LDNT1B_2Z_STRIDED);
1607 case AArch64::LDNT1H_2Z_PSEUDO:
1608 return expandMultiVecPseudo(
1609 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1610 AArch64::LDNT1H_2Z, AArch64::LDNT1H_2Z_STRIDED);
1611 case AArch64::LDNT1W_2Z_PSEUDO:
1612 return expandMultiVecPseudo(
1613 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1614 AArch64::LDNT1W_2Z, AArch64::LDNT1W_2Z_STRIDED);
1615 case AArch64::LDNT1D_2Z_PSEUDO:
1616 return expandMultiVecPseudo(
1617 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1618 AArch64::LDNT1D_2Z, AArch64::LDNT1D_2Z_STRIDED);
1619 case AArch64::LD1B_4Z_IMM_PSEUDO:
1620 return expandMultiVecPseudo(
1621 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1622 AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z_STRIDED_IMM);
1623 case AArch64::LD1H_4Z_IMM_PSEUDO:
1624 return expandMultiVecPseudo(
1625 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1626 AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z_STRIDED_IMM);
1627 case AArch64::LD1W_4Z_IMM_PSEUDO:
1628 return expandMultiVecPseudo(
1629 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1630 AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z_STRIDED_IMM);
1631 case AArch64::LD1D_4Z_IMM_PSEUDO:
1632 return expandMultiVecPseudo(
1633 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1634 AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z_STRIDED_IMM);
1635 case AArch64::LDNT1B_4Z_IMM_PSEUDO:
1636 return expandMultiVecPseudo(
1637 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1638 AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z_STRIDED_IMM);
1639 case AArch64::LDNT1H_4Z_IMM_PSEUDO:
1640 return expandMultiVecPseudo(
1641 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1642 AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z_STRIDED_IMM);
1643 case AArch64::LDNT1W_4Z_IMM_PSEUDO:
1644 return expandMultiVecPseudo(
1645 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1646 AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z_STRIDED_IMM);
1647 case AArch64::LDNT1D_4Z_IMM_PSEUDO:
1648 return expandMultiVecPseudo(
1649 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1650 AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z_STRIDED_IMM);
1651 case AArch64::LD1B_4Z_PSEUDO:
1652 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1653 AArch64::ZPR4StridedRegClass, AArch64::LD1B_4Z,
1654 AArch64::LD1B_4Z_STRIDED);
1655 case AArch64::LD1H_4Z_PSEUDO:
1656 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1657 AArch64::ZPR4StridedRegClass, AArch64::LD1H_4Z,
1658 AArch64::LD1H_4Z_STRIDED);
1659 case AArch64::LD1W_4Z_PSEUDO:
1660 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1661 AArch64::ZPR4StridedRegClass, AArch64::LD1W_4Z,
1662 AArch64::LD1W_4Z_STRIDED);
1663 case AArch64::LD1D_4Z_PSEUDO:
1664 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1665 AArch64::ZPR4StridedRegClass, AArch64::LD1D_4Z,
1666 AArch64::LD1D_4Z_STRIDED);
1667 case AArch64::LDNT1B_4Z_PSEUDO:
1668 return expandMultiVecPseudo(
1669 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1670 AArch64::LDNT1B_4Z, AArch64::LDNT1B_4Z_STRIDED);
1671 case AArch64::LDNT1H_4Z_PSEUDO:
1672 return expandMultiVecPseudo(
1673 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1674 AArch64::LDNT1H_4Z, AArch64::LDNT1H_4Z_STRIDED);
1675 case AArch64::LDNT1W_4Z_PSEUDO:
1676 return expandMultiVecPseudo(
1677 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1678 AArch64::LDNT1W_4Z, AArch64::LDNT1W_4Z_STRIDED);
1679 case AArch64::LDNT1D_4Z_PSEUDO:
1680 return expandMultiVecPseudo(
1681 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1682 AArch64::LDNT1D_4Z, AArch64::LDNT1D_4Z_STRIDED);
1683 }
1684 return false;
1685}
1686
1687/// Iterate over the instructions in basic block MBB and expand any
1688/// pseudo instructions. Return true if anything was modified.
1689bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
1690 bool Modified = false;
1691
1692 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1693 while (MBBI != E) {
1694 MachineBasicBlock::iterator NMBBI = std::next(x: MBBI);
1695 Modified |= expandMI(MBB, MBBI, NextMBBI&: NMBBI);
1696 MBBI = NMBBI;
1697 }
1698
1699 return Modified;
1700}
1701
1702bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
1703 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
1704
1705 bool Modified = false;
1706 for (auto &MBB : MF)
1707 Modified |= expandMBB(MBB);
1708 return Modified;
1709}
1710
1711/// Returns an instance of the pseudo instruction expansion pass.
1712FunctionPass *llvm::createAArch64ExpandPseudoPass() {
1713 return new AArch64ExpandPseudo();
1714}
1715

source code of llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp