1 | //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains a pass that expands pseudo instructions into target |
10 | // instructions to allow proper scheduling and other late optimizations. This |
11 | // pass should be run after register allocation but before the post-regalloc |
12 | // scheduling pass. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #include "AArch64ExpandImm.h" |
17 | #include "AArch64InstrInfo.h" |
18 | #include "AArch64MachineFunctionInfo.h" |
19 | #include "AArch64Subtarget.h" |
20 | #include "MCTargetDesc/AArch64AddressingModes.h" |
21 | #include "Utils/AArch64BaseInfo.h" |
22 | #include "llvm/CodeGen/LivePhysRegs.h" |
23 | #include "llvm/CodeGen/MachineBasicBlock.h" |
24 | #include "llvm/CodeGen/MachineConstantPool.h" |
25 | #include "llvm/CodeGen/MachineFunction.h" |
26 | #include "llvm/CodeGen/MachineFunctionPass.h" |
27 | #include "llvm/CodeGen/MachineInstr.h" |
28 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
29 | #include "llvm/CodeGen/MachineOperand.h" |
30 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
31 | #include "llvm/IR/DebugLoc.h" |
32 | #include "llvm/MC/MCInstrDesc.h" |
33 | #include "llvm/Pass.h" |
34 | #include "llvm/Support/CodeGen.h" |
35 | #include "llvm/Support/MathExtras.h" |
36 | #include "llvm/Target/TargetMachine.h" |
37 | #include "llvm/TargetParser/Triple.h" |
38 | #include <cassert> |
39 | #include <cstdint> |
40 | #include <iterator> |
41 | #include <utility> |
42 | |
43 | using namespace llvm; |
44 | |
45 | #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass" |
46 | |
47 | namespace { |
48 | |
49 | class AArch64ExpandPseudo : public MachineFunctionPass { |
50 | public: |
51 | const AArch64InstrInfo *TII; |
52 | |
53 | static char ID; |
54 | |
55 | AArch64ExpandPseudo() : MachineFunctionPass(ID) { |
56 | initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry()); |
57 | } |
58 | |
59 | bool runOnMachineFunction(MachineFunction &Fn) override; |
60 | |
61 | StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; } |
62 | |
63 | private: |
64 | bool expandMBB(MachineBasicBlock &MBB); |
65 | bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
66 | MachineBasicBlock::iterator &NextMBBI); |
67 | bool expandMultiVecPseudo(MachineBasicBlock &MBB, |
68 | MachineBasicBlock::iterator MBBI, |
69 | TargetRegisterClass ContiguousClass, |
70 | TargetRegisterClass StridedClass, |
71 | unsigned ContiguousOpc, unsigned StridedOpc); |
72 | bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
73 | unsigned BitSize); |
74 | |
75 | bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB, |
76 | MachineBasicBlock::iterator MBBI); |
77 | bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
78 | unsigned LdarOp, unsigned StlrOp, unsigned CmpOp, |
79 | unsigned ExtendImm, unsigned ZeroReg, |
80 | MachineBasicBlock::iterator &NextMBBI); |
81 | bool expandCMP_SWAP_128(MachineBasicBlock &MBB, |
82 | MachineBasicBlock::iterator MBBI, |
83 | MachineBasicBlock::iterator &NextMBBI); |
84 | bool expandSetTagLoop(MachineBasicBlock &MBB, |
85 | MachineBasicBlock::iterator MBBI, |
86 | MachineBasicBlock::iterator &NextMBBI); |
87 | bool expandSVESpillFill(MachineBasicBlock &MBB, |
88 | MachineBasicBlock::iterator MBBI, unsigned Opc, |
89 | unsigned N); |
90 | bool expandCALL_RVMARKER(MachineBasicBlock &MBB, |
91 | MachineBasicBlock::iterator MBBI); |
92 | bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); |
93 | bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB, |
94 | MachineBasicBlock::iterator MBBI); |
95 | MachineBasicBlock *expandRestoreZA(MachineBasicBlock &MBB, |
96 | MachineBasicBlock::iterator MBBI); |
97 | MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB, |
98 | MachineBasicBlock::iterator MBBI); |
99 | }; |
100 | |
101 | } // end anonymous namespace |
102 | |
103 | char AArch64ExpandPseudo::ID = 0; |
104 | |
105 | INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo" , |
106 | AARCH64_EXPAND_PSEUDO_NAME, false, false) |
107 | |
108 | /// Transfer implicit operands on the pseudo instruction to the |
109 | /// instructions created from the expansion. |
110 | static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, |
111 | MachineInstrBuilder &DefMI) { |
112 | const MCInstrDesc &Desc = OldMI.getDesc(); |
113 | for (const MachineOperand &MO : |
114 | llvm::drop_begin(RangeOrContainer: OldMI.operands(), N: Desc.getNumOperands())) { |
115 | assert(MO.isReg() && MO.getReg()); |
116 | if (MO.isUse()) |
117 | UseMI.add(MO); |
118 | else |
119 | DefMI.add(MO); |
120 | } |
121 | } |
122 | |
123 | /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more |
124 | /// real move-immediate instructions to synthesize the immediate. |
125 | bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, |
126 | MachineBasicBlock::iterator MBBI, |
127 | unsigned BitSize) { |
128 | MachineInstr &MI = *MBBI; |
129 | Register DstReg = MI.getOperand(i: 0).getReg(); |
130 | uint64_t RenamableState = |
131 | MI.getOperand(i: 0).isRenamable() ? RegState::Renamable : 0; |
132 | uint64_t Imm = MI.getOperand(i: 1).getImm(); |
133 | |
134 | if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) { |
135 | // Useless def, and we don't want to risk creating an invalid ORR (which |
136 | // would really write to sp). |
137 | MI.eraseFromParent(); |
138 | return true; |
139 | } |
140 | |
141 | SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; |
142 | AArch64_IMM::expandMOVImm(Imm, BitSize, Insn); |
143 | assert(Insn.size() != 0); |
144 | |
145 | SmallVector<MachineInstrBuilder, 4> MIBS; |
146 | for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) { |
147 | bool LastItem = std::next(x: I) == E; |
148 | switch (I->Opcode) |
149 | { |
150 | default: llvm_unreachable("unhandled!" ); break; |
151 | |
152 | case AArch64::ORRWri: |
153 | case AArch64::ORRXri: |
154 | if (I->Op1 == 0) { |
155 | MIBS.push_back(Elt: BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) |
156 | .add(MI.getOperand(i: 0)) |
157 | .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) |
158 | .addImm(I->Op2)); |
159 | } else { |
160 | Register DstReg = MI.getOperand(i: 0).getReg(); |
161 | bool DstIsDead = MI.getOperand(i: 0).isDead(); |
162 | MIBS.push_back( |
163 | Elt: BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) |
164 | .addReg(DstReg, RegState::Define | |
165 | getDeadRegState(B: DstIsDead && LastItem) | |
166 | RenamableState) |
167 | .addReg(DstReg) |
168 | .addImm(I->Op2)); |
169 | } |
170 | break; |
171 | case AArch64::ANDXri: |
172 | case AArch64::EORXri: |
173 | if (I->Op1 == 0) { |
174 | MIBS.push_back(Elt: BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) |
175 | .add(MI.getOperand(i: 0)) |
176 | .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) |
177 | .addImm(I->Op2)); |
178 | } else { |
179 | Register DstReg = MI.getOperand(i: 0).getReg(); |
180 | bool DstIsDead = MI.getOperand(i: 0).isDead(); |
181 | MIBS.push_back( |
182 | Elt: BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) |
183 | .addReg(DstReg, RegState::Define | |
184 | getDeadRegState(B: DstIsDead && LastItem) | |
185 | RenamableState) |
186 | .addReg(DstReg) |
187 | .addImm(I->Op2)); |
188 | } |
189 | break; |
190 | case AArch64::MOVNWi: |
191 | case AArch64::MOVNXi: |
192 | case AArch64::MOVZWi: |
193 | case AArch64::MOVZXi: { |
194 | bool DstIsDead = MI.getOperand(i: 0).isDead(); |
195 | MIBS.push_back(Elt: BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) |
196 | .addReg(DstReg, RegState::Define | |
197 | getDeadRegState(B: DstIsDead && LastItem) | |
198 | RenamableState) |
199 | .addImm(I->Op1) |
200 | .addImm(I->Op2)); |
201 | } break; |
202 | case AArch64::MOVKWi: |
203 | case AArch64::MOVKXi: { |
204 | Register DstReg = MI.getOperand(i: 0).getReg(); |
205 | bool DstIsDead = MI.getOperand(i: 0).isDead(); |
206 | MIBS.push_back(Elt: BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) |
207 | .addReg(DstReg, |
208 | RegState::Define | |
209 | getDeadRegState(B: DstIsDead && LastItem) | |
210 | RenamableState) |
211 | .addReg(DstReg) |
212 | .addImm(I->Op1) |
213 | .addImm(I->Op2)); |
214 | } break; |
215 | } |
216 | } |
217 | transferImpOps(OldMI&: MI, UseMI&: MIBS.front(), DefMI&: MIBS.back()); |
218 | MI.eraseFromParent(); |
219 | return true; |
220 | } |
221 | |
222 | bool AArch64ExpandPseudo::expandCMP_SWAP( |
223 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp, |
224 | unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg, |
225 | MachineBasicBlock::iterator &NextMBBI) { |
226 | MachineInstr &MI = *MBBI; |
227 | MIMetadata MIMD(MI); |
228 | const MachineOperand &Dest = MI.getOperand(i: 0); |
229 | Register StatusReg = MI.getOperand(i: 1).getReg(); |
230 | bool StatusDead = MI.getOperand(i: 1).isDead(); |
231 | // Duplicating undef operands into 2 instructions does not guarantee the same |
232 | // value on both; However undef should be replaced by xzr anyway. |
233 | assert(!MI.getOperand(2).isUndef() && "cannot handle undef" ); |
234 | Register AddrReg = MI.getOperand(i: 2).getReg(); |
235 | Register DesiredReg = MI.getOperand(i: 3).getReg(); |
236 | Register NewReg = MI.getOperand(i: 4).getReg(); |
237 | |
238 | MachineFunction *MF = MBB.getParent(); |
239 | auto LoadCmpBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
240 | auto StoreBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
241 | auto DoneBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
242 | |
243 | MF->insert(MBBI: ++MBB.getIterator(), MBB: LoadCmpBB); |
244 | MF->insert(MBBI: ++LoadCmpBB->getIterator(), MBB: StoreBB); |
245 | MF->insert(MBBI: ++StoreBB->getIterator(), MBB: DoneBB); |
246 | |
247 | // .Lloadcmp: |
248 | // mov wStatus, 0 |
249 | // ldaxr xDest, [xAddr] |
250 | // cmp xDest, xDesired |
251 | // b.ne .Ldone |
252 | if (!StatusDead) |
253 | BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::MOVZWi), StatusReg) |
254 | .addImm(0).addImm(0); |
255 | BuildMI(LoadCmpBB, MIMD, TII->get(LdarOp), Dest.getReg()) |
256 | .addReg(AddrReg); |
257 | BuildMI(LoadCmpBB, MIMD, TII->get(CmpOp), ZeroReg) |
258 | .addReg(Dest.getReg(), getKillRegState(B: Dest.isDead())) |
259 | .addReg(DesiredReg) |
260 | .addImm(ExtendImm); |
261 | BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::Bcc)) |
262 | .addImm(AArch64CC::NE) |
263 | .addMBB(DoneBB) |
264 | .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill); |
265 | LoadCmpBB->addSuccessor(Succ: DoneBB); |
266 | LoadCmpBB->addSuccessor(Succ: StoreBB); |
267 | |
268 | // .Lstore: |
269 | // stlxr wStatus, xNew, [xAddr] |
270 | // cbnz wStatus, .Lloadcmp |
271 | BuildMI(StoreBB, MIMD, TII->get(StlrOp), StatusReg) |
272 | .addReg(NewReg) |
273 | .addReg(AddrReg); |
274 | BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW)) |
275 | .addReg(StatusReg, getKillRegState(B: StatusDead)) |
276 | .addMBB(LoadCmpBB); |
277 | StoreBB->addSuccessor(Succ: LoadCmpBB); |
278 | StoreBB->addSuccessor(Succ: DoneBB); |
279 | |
280 | DoneBB->splice(Where: DoneBB->end(), Other: &MBB, From: MI, To: MBB.end()); |
281 | DoneBB->transferSuccessors(FromMBB: &MBB); |
282 | |
283 | MBB.addSuccessor(Succ: LoadCmpBB); |
284 | |
285 | NextMBBI = MBB.end(); |
286 | MI.eraseFromParent(); |
287 | |
288 | // Recompute livein lists. |
289 | LivePhysRegs LiveRegs; |
290 | computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB); |
291 | computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB); |
292 | computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB); |
293 | // Do an extra pass around the loop to get loop carried registers right. |
294 | StoreBB->clearLiveIns(); |
295 | computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB); |
296 | LoadCmpBB->clearLiveIns(); |
297 | computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB); |
298 | |
299 | return true; |
300 | } |
301 | |
302 | bool AArch64ExpandPseudo::expandCMP_SWAP_128( |
303 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
304 | MachineBasicBlock::iterator &NextMBBI) { |
305 | MachineInstr &MI = *MBBI; |
306 | MIMetadata MIMD(MI); |
307 | MachineOperand &DestLo = MI.getOperand(i: 0); |
308 | MachineOperand &DestHi = MI.getOperand(i: 1); |
309 | Register StatusReg = MI.getOperand(i: 2).getReg(); |
310 | bool StatusDead = MI.getOperand(i: 2).isDead(); |
311 | // Duplicating undef operands into 2 instructions does not guarantee the same |
312 | // value on both; However undef should be replaced by xzr anyway. |
313 | assert(!MI.getOperand(3).isUndef() && "cannot handle undef" ); |
314 | Register AddrReg = MI.getOperand(i: 3).getReg(); |
315 | Register DesiredLoReg = MI.getOperand(i: 4).getReg(); |
316 | Register DesiredHiReg = MI.getOperand(i: 5).getReg(); |
317 | Register NewLoReg = MI.getOperand(i: 6).getReg(); |
318 | Register NewHiReg = MI.getOperand(i: 7).getReg(); |
319 | |
320 | unsigned LdxpOp, StxpOp; |
321 | |
322 | switch (MI.getOpcode()) { |
323 | case AArch64::CMP_SWAP_128_MONOTONIC: |
324 | LdxpOp = AArch64::LDXPX; |
325 | StxpOp = AArch64::STXPX; |
326 | break; |
327 | case AArch64::CMP_SWAP_128_RELEASE: |
328 | LdxpOp = AArch64::LDXPX; |
329 | StxpOp = AArch64::STLXPX; |
330 | break; |
331 | case AArch64::CMP_SWAP_128_ACQUIRE: |
332 | LdxpOp = AArch64::LDAXPX; |
333 | StxpOp = AArch64::STXPX; |
334 | break; |
335 | case AArch64::CMP_SWAP_128: |
336 | LdxpOp = AArch64::LDAXPX; |
337 | StxpOp = AArch64::STLXPX; |
338 | break; |
339 | default: |
340 | llvm_unreachable("Unexpected opcode" ); |
341 | } |
342 | |
343 | MachineFunction *MF = MBB.getParent(); |
344 | auto LoadCmpBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
345 | auto StoreBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
346 | auto FailBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
347 | auto DoneBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
348 | |
349 | MF->insert(MBBI: ++MBB.getIterator(), MBB: LoadCmpBB); |
350 | MF->insert(MBBI: ++LoadCmpBB->getIterator(), MBB: StoreBB); |
351 | MF->insert(MBBI: ++StoreBB->getIterator(), MBB: FailBB); |
352 | MF->insert(MBBI: ++FailBB->getIterator(), MBB: DoneBB); |
353 | |
354 | // .Lloadcmp: |
355 | // ldaxp xDestLo, xDestHi, [xAddr] |
356 | // cmp xDestLo, xDesiredLo |
357 | // sbcs xDestHi, xDesiredHi |
358 | // b.ne .Ldone |
359 | BuildMI(LoadCmpBB, MIMD, TII->get(LdxpOp)) |
360 | .addReg(DestLo.getReg(), RegState::Define) |
361 | .addReg(DestHi.getReg(), RegState::Define) |
362 | .addReg(AddrReg); |
363 | BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR) |
364 | .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead())) |
365 | .addReg(DesiredLoReg) |
366 | .addImm(0); |
367 | BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg) |
368 | .addUse(AArch64::WZR) |
369 | .addUse(AArch64::WZR) |
370 | .addImm(AArch64CC::EQ); |
371 | BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR) |
372 | .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead())) |
373 | .addReg(DesiredHiReg) |
374 | .addImm(0); |
375 | BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg) |
376 | .addUse(StatusReg, RegState::Kill) |
377 | .addUse(StatusReg, RegState::Kill) |
378 | .addImm(AArch64CC::EQ); |
379 | BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CBNZW)) |
380 | .addUse(StatusReg, getKillRegState(StatusDead)) |
381 | .addMBB(FailBB); |
382 | LoadCmpBB->addSuccessor(Succ: FailBB); |
383 | LoadCmpBB->addSuccessor(Succ: StoreBB); |
384 | |
385 | // .Lstore: |
386 | // stlxp wStatus, xNewLo, xNewHi, [xAddr] |
387 | // cbnz wStatus, .Lloadcmp |
388 | BuildMI(StoreBB, MIMD, TII->get(StxpOp), StatusReg) |
389 | .addReg(NewLoReg) |
390 | .addReg(NewHiReg) |
391 | .addReg(AddrReg); |
392 | BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW)) |
393 | .addReg(StatusReg, getKillRegState(StatusDead)) |
394 | .addMBB(LoadCmpBB); |
395 | BuildMI(StoreBB, MIMD, TII->get(AArch64::B)).addMBB(DoneBB); |
396 | StoreBB->addSuccessor(Succ: LoadCmpBB); |
397 | StoreBB->addSuccessor(Succ: DoneBB); |
398 | |
399 | // .Lfail: |
400 | // stlxp wStatus, xDestLo, xDestHi, [xAddr] |
401 | // cbnz wStatus, .Lloadcmp |
402 | BuildMI(FailBB, MIMD, TII->get(StxpOp), StatusReg) |
403 | .addReg(DestLo.getReg()) |
404 | .addReg(DestHi.getReg()) |
405 | .addReg(AddrReg); |
406 | BuildMI(FailBB, MIMD, TII->get(AArch64::CBNZW)) |
407 | .addReg(StatusReg, getKillRegState(StatusDead)) |
408 | .addMBB(LoadCmpBB); |
409 | FailBB->addSuccessor(Succ: LoadCmpBB); |
410 | FailBB->addSuccessor(Succ: DoneBB); |
411 | |
412 | DoneBB->splice(Where: DoneBB->end(), Other: &MBB, From: MI, To: MBB.end()); |
413 | DoneBB->transferSuccessors(FromMBB: &MBB); |
414 | |
415 | MBB.addSuccessor(Succ: LoadCmpBB); |
416 | |
417 | NextMBBI = MBB.end(); |
418 | MI.eraseFromParent(); |
419 | |
420 | // Recompute liveness bottom up. |
421 | LivePhysRegs LiveRegs; |
422 | computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB); |
423 | computeAndAddLiveIns(LiveRegs, MBB&: *FailBB); |
424 | computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB); |
425 | computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB); |
426 | |
427 | // Do an extra pass in the loop to get the loop carried dependencies right. |
428 | FailBB->clearLiveIns(); |
429 | computeAndAddLiveIns(LiveRegs, MBB&: *FailBB); |
430 | StoreBB->clearLiveIns(); |
431 | computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB); |
432 | LoadCmpBB->clearLiveIns(); |
433 | computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB); |
434 | |
435 | return true; |
436 | } |
437 | |
438 | /// \brief Expand Pseudos to Instructions with destructive operands. |
439 | /// |
440 | /// This mechanism uses MOVPRFX instructions for zeroing the false lanes |
441 | /// or for fixing relaxed register allocation conditions to comply with |
442 | /// the instructions register constraints. The latter case may be cheaper |
443 | /// than setting the register constraints in the register allocator, |
444 | /// since that will insert regular MOV instructions rather than MOVPRFX. |
445 | /// |
446 | /// Example (after register allocation): |
447 | /// |
448 | /// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0 |
449 | /// |
450 | /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B. |
451 | /// * We cannot map directly to FSUB_ZPmZ_B because the register |
452 | /// constraints of the instruction are not met. |
453 | /// * Also the _ZERO specifies the false lanes need to be zeroed. |
454 | /// |
455 | /// We first try to see if the destructive operand == result operand, |
456 | /// if not, we try to swap the operands, e.g. |
457 | /// |
458 | /// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1 |
459 | /// |
460 | /// But because FSUB_ZPmZ is not commutative, this is semantically |
461 | /// different, so we need a reverse instruction: |
462 | /// |
463 | /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1 |
464 | /// |
465 | /// Then we implement the zeroing of the false lanes of Z0 by adding |
466 | /// a zeroing MOVPRFX instruction: |
467 | /// |
468 | /// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0 |
469 | /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1 |
470 | /// |
471 | /// Note that this can only be done for _ZERO or _UNDEF variants where |
472 | /// we can guarantee the false lanes to be zeroed (by implementing this) |
473 | /// or that they are undef (don't care / not used), otherwise the |
474 | /// swapping of operands is illegal because the operation is not |
475 | /// (or cannot be emulated to be) fully commutative. |
476 | bool AArch64ExpandPseudo::expand_DestructiveOp( |
477 | MachineInstr &MI, |
478 | MachineBasicBlock &MBB, |
479 | MachineBasicBlock::iterator MBBI) { |
480 | unsigned Opcode = AArch64::getSVEPseudoMap(Opcode: MI.getOpcode()); |
481 | uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask; |
482 | uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask; |
483 | bool FalseZero = FalseLanes == AArch64::FalseLanesZero; |
484 | Register DstReg = MI.getOperand(i: 0).getReg(); |
485 | bool DstIsDead = MI.getOperand(i: 0).isDead(); |
486 | bool UseRev = false; |
487 | unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx; |
488 | |
489 | switch (DType) { |
490 | case AArch64::DestructiveBinaryComm: |
491 | case AArch64::DestructiveBinaryCommWithRev: |
492 | if (DstReg == MI.getOperand(i: 3).getReg()) { |
493 | // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1 |
494 | std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx) = std::make_tuple(args: 1, args: 3, args: 2); |
495 | UseRev = true; |
496 | break; |
497 | } |
498 | [[fallthrough]]; |
499 | case AArch64::DestructiveBinary: |
500 | case AArch64::DestructiveBinaryImm: |
501 | std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx) = std::make_tuple(args: 1, args: 2, args: 3); |
502 | break; |
503 | case AArch64::DestructiveUnaryPassthru: |
504 | std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx) = std::make_tuple(args: 2, args: 3, args: 3); |
505 | break; |
506 | case AArch64::DestructiveTernaryCommWithRev: |
507 | std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 2, args: 3, args: 4); |
508 | if (DstReg == MI.getOperand(i: 3).getReg()) { |
509 | // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za |
510 | std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 3, args: 4, args: 2); |
511 | UseRev = true; |
512 | } else if (DstReg == MI.getOperand(i: 4).getReg()) { |
513 | // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za |
514 | std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 4, args: 3, args: 2); |
515 | UseRev = true; |
516 | } |
517 | break; |
518 | default: |
519 | llvm_unreachable("Unsupported Destructive Operand type" ); |
520 | } |
521 | |
522 | // MOVPRFX can only be used if the destination operand |
523 | // is the destructive operand, not as any other operand, |
524 | // so the Destructive Operand must be unique. |
525 | bool DOPRegIsUnique = false; |
526 | switch (DType) { |
527 | case AArch64::DestructiveBinary: |
528 | DOPRegIsUnique = DstReg != MI.getOperand(i: SrcIdx).getReg(); |
529 | break; |
530 | case AArch64::DestructiveBinaryComm: |
531 | case AArch64::DestructiveBinaryCommWithRev: |
532 | DOPRegIsUnique = |
533 | DstReg != MI.getOperand(i: DOPIdx).getReg() || |
534 | MI.getOperand(i: DOPIdx).getReg() != MI.getOperand(i: SrcIdx).getReg(); |
535 | break; |
536 | case AArch64::DestructiveUnaryPassthru: |
537 | case AArch64::DestructiveBinaryImm: |
538 | DOPRegIsUnique = true; |
539 | break; |
540 | case AArch64::DestructiveTernaryCommWithRev: |
541 | DOPRegIsUnique = |
542 | DstReg != MI.getOperand(i: DOPIdx).getReg() || |
543 | (MI.getOperand(i: DOPIdx).getReg() != MI.getOperand(i: SrcIdx).getReg() && |
544 | MI.getOperand(i: DOPIdx).getReg() != MI.getOperand(i: Src2Idx).getReg()); |
545 | break; |
546 | } |
547 | |
548 | // Resolve the reverse opcode |
549 | if (UseRev) { |
550 | int NewOpcode; |
551 | // e.g. DIV -> DIVR |
552 | if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1) |
553 | Opcode = NewOpcode; |
554 | // e.g. DIVR -> DIV |
555 | else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1) |
556 | Opcode = NewOpcode; |
557 | } |
558 | |
559 | // Get the right MOVPRFX |
560 | uint64_t ElementSize = TII->getElementSizeForOpcode(Opc: Opcode); |
561 | unsigned MovPrfx, LSLZero, MovPrfxZero; |
562 | switch (ElementSize) { |
563 | case AArch64::ElementSizeNone: |
564 | case AArch64::ElementSizeB: |
565 | MovPrfx = AArch64::MOVPRFX_ZZ; |
566 | LSLZero = AArch64::LSL_ZPmI_B; |
567 | MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B; |
568 | break; |
569 | case AArch64::ElementSizeH: |
570 | MovPrfx = AArch64::MOVPRFX_ZZ; |
571 | LSLZero = AArch64::LSL_ZPmI_H; |
572 | MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H; |
573 | break; |
574 | case AArch64::ElementSizeS: |
575 | MovPrfx = AArch64::MOVPRFX_ZZ; |
576 | LSLZero = AArch64::LSL_ZPmI_S; |
577 | MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S; |
578 | break; |
579 | case AArch64::ElementSizeD: |
580 | MovPrfx = AArch64::MOVPRFX_ZZ; |
581 | LSLZero = AArch64::LSL_ZPmI_D; |
582 | MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D; |
583 | break; |
584 | default: |
585 | llvm_unreachable("Unsupported ElementSize" ); |
586 | } |
587 | |
588 | // |
589 | // Create the destructive operation (if required) |
590 | // |
591 | MachineInstrBuilder PRFX, DOP; |
592 | if (FalseZero) { |
593 | // If we cannot prefix the requested instruction we'll instead emit a |
594 | // prefixed_zeroing_mov for DestructiveBinary. |
595 | assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary || |
596 | DType == AArch64::DestructiveBinaryComm || |
597 | DType == AArch64::DestructiveBinaryCommWithRev) && |
598 | "The destructive operand should be unique" ); |
599 | assert(ElementSize != AArch64::ElementSizeNone && |
600 | "This instruction is unpredicated" ); |
601 | |
602 | // Merge source operand into destination register |
603 | PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero)) |
604 | .addReg(DstReg, RegState::Define) |
605 | .addReg(MI.getOperand(i: PredIdx).getReg()) |
606 | .addReg(MI.getOperand(i: DOPIdx).getReg()); |
607 | |
608 | // After the movprfx, the destructive operand is same as Dst |
609 | DOPIdx = 0; |
610 | |
611 | // Create the additional LSL to zero the lanes when the DstReg is not |
612 | // unique. Zeros the lanes in z0 that aren't active in p0 with sequence |
613 | // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0; |
614 | if ((DType == AArch64::DestructiveBinary || |
615 | DType == AArch64::DestructiveBinaryComm || |
616 | DType == AArch64::DestructiveBinaryCommWithRev) && |
617 | !DOPRegIsUnique) { |
618 | BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero)) |
619 | .addReg(DstReg, RegState::Define) |
620 | .add(MI.getOperand(i: PredIdx)) |
621 | .addReg(DstReg) |
622 | .addImm(0); |
623 | } |
624 | } else if (DstReg != MI.getOperand(i: DOPIdx).getReg()) { |
625 | assert(DOPRegIsUnique && "The destructive operand should be unique" ); |
626 | PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx)) |
627 | .addReg(DstReg, RegState::Define) |
628 | .addReg(MI.getOperand(i: DOPIdx).getReg()); |
629 | DOPIdx = 0; |
630 | } |
631 | |
632 | // |
633 | // Create the destructive operation |
634 | // |
635 | DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode)) |
636 | .addReg(DstReg, RegState::Define | getDeadRegState(B: DstIsDead)); |
637 | |
638 | switch (DType) { |
639 | case AArch64::DestructiveUnaryPassthru: |
640 | DOP.addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), flags: RegState::Kill) |
641 | .add(MO: MI.getOperand(i: PredIdx)) |
642 | .add(MO: MI.getOperand(i: SrcIdx)); |
643 | break; |
644 | case AArch64::DestructiveBinary: |
645 | case AArch64::DestructiveBinaryImm: |
646 | case AArch64::DestructiveBinaryComm: |
647 | case AArch64::DestructiveBinaryCommWithRev: |
648 | DOP.add(MO: MI.getOperand(i: PredIdx)) |
649 | .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), flags: RegState::Kill) |
650 | .add(MO: MI.getOperand(i: SrcIdx)); |
651 | break; |
652 | case AArch64::DestructiveTernaryCommWithRev: |
653 | DOP.add(MO: MI.getOperand(i: PredIdx)) |
654 | .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), flags: RegState::Kill) |
655 | .add(MO: MI.getOperand(i: SrcIdx)) |
656 | .add(MO: MI.getOperand(i: Src2Idx)); |
657 | break; |
658 | } |
659 | |
660 | if (PRFX) { |
661 | finalizeBundle(MBB, FirstMI: PRFX->getIterator(), LastMI: MBBI->getIterator()); |
662 | transferImpOps(OldMI&: MI, UseMI&: PRFX, DefMI&: DOP); |
663 | } else |
664 | transferImpOps(OldMI&: MI, UseMI&: DOP, DefMI&: DOP); |
665 | |
666 | MI.eraseFromParent(); |
667 | return true; |
668 | } |
669 | |
670 | bool AArch64ExpandPseudo::expandSetTagLoop( |
671 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
672 | MachineBasicBlock::iterator &NextMBBI) { |
673 | MachineInstr &MI = *MBBI; |
674 | DebugLoc DL = MI.getDebugLoc(); |
675 | Register SizeReg = MI.getOperand(i: 0).getReg(); |
676 | Register AddressReg = MI.getOperand(i: 1).getReg(); |
677 | |
678 | MachineFunction *MF = MBB.getParent(); |
679 | |
680 | bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback; |
681 | const unsigned OpCode1 = |
682 | ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex; |
683 | const unsigned OpCode2 = |
684 | ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex; |
685 | |
686 | unsigned Size = MI.getOperand(i: 2).getImm(); |
687 | assert(Size > 0 && Size % 16 == 0); |
688 | if (Size % (16 * 2) != 0) { |
689 | BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg) |
690 | .addReg(AddressReg) |
691 | .addReg(AddressReg) |
692 | .addImm(1); |
693 | Size -= 16; |
694 | } |
695 | MachineBasicBlock::iterator I = |
696 | BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg) |
697 | .addImm(Size); |
698 | expandMOVImm(MBB, MBBI: I, BitSize: 64); |
699 | |
700 | auto LoopBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
701 | auto DoneBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
702 | |
703 | MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopBB); |
704 | MF->insert(MBBI: ++LoopBB->getIterator(), MBB: DoneBB); |
705 | |
706 | BuildMI(LoopBB, DL, TII->get(OpCode2)) |
707 | .addDef(AddressReg) |
708 | .addReg(AddressReg) |
709 | .addReg(AddressReg) |
710 | .addImm(2) |
711 | .cloneMemRefs(MI) |
712 | .setMIFlags(MI.getFlags()); |
713 | BuildMI(LoopBB, DL, TII->get(AArch64::SUBSXri)) |
714 | .addDef(SizeReg) |
715 | .addReg(SizeReg) |
716 | .addImm(16 * 2) |
717 | .addImm(0); |
718 | BuildMI(LoopBB, DL, TII->get(AArch64::Bcc)) |
719 | .addImm(AArch64CC::NE) |
720 | .addMBB(LoopBB) |
721 | .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill); |
722 | |
723 | LoopBB->addSuccessor(Succ: LoopBB); |
724 | LoopBB->addSuccessor(Succ: DoneBB); |
725 | |
726 | DoneBB->splice(Where: DoneBB->end(), Other: &MBB, From: MI, To: MBB.end()); |
727 | DoneBB->transferSuccessors(FromMBB: &MBB); |
728 | |
729 | MBB.addSuccessor(Succ: LoopBB); |
730 | |
731 | NextMBBI = MBB.end(); |
732 | MI.eraseFromParent(); |
733 | // Recompute liveness bottom up. |
734 | LivePhysRegs LiveRegs; |
735 | computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB); |
736 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopBB); |
737 | // Do an extra pass in the loop to get the loop carried dependencies right. |
738 | // FIXME: is this necessary? |
739 | LoopBB->clearLiveIns(); |
740 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopBB); |
741 | DoneBB->clearLiveIns(); |
742 | computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB); |
743 | |
744 | return true; |
745 | } |
746 | |
747 | bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB, |
748 | MachineBasicBlock::iterator MBBI, |
749 | unsigned Opc, unsigned N) { |
750 | assert((Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI || |
751 | Opc == AArch64::LDR_PXI || Opc == AArch64::STR_PXI) && |
752 | "Unexpected opcode" ); |
753 | unsigned RState = (Opc == AArch64::LDR_ZXI || Opc == AArch64::LDR_PXI) |
754 | ? RegState::Define |
755 | : 0; |
756 | unsigned sub0 = (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI) |
757 | ? AArch64::zsub0 |
758 | : AArch64::psub0; |
759 | const TargetRegisterInfo *TRI = |
760 | MBB.getParent()->getSubtarget().getRegisterInfo(); |
761 | MachineInstr &MI = *MBBI; |
762 | for (unsigned Offset = 0; Offset < N; ++Offset) { |
763 | int ImmOffset = MI.getOperand(i: 2).getImm() + Offset; |
764 | bool Kill = (Offset + 1 == N) ? MI.getOperand(i: 1).isKill() : false; |
765 | assert(ImmOffset >= -256 && ImmOffset < 256 && |
766 | "Immediate spill offset out of range" ); |
767 | BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) |
768 | .addReg(TRI->getSubReg(Reg: MI.getOperand(i: 0).getReg(), Idx: sub0 + Offset), |
769 | RState) |
770 | .addReg(MI.getOperand(i: 1).getReg(), getKillRegState(B: Kill)) |
771 | .addImm(ImmOffset); |
772 | } |
773 | MI.eraseFromParent(); |
774 | return true; |
775 | } |
776 | |
777 | // Create a call to CallTarget, copying over all the operands from *MBBI, |
778 | // starting at the regmask. |
779 | static MachineInstr *createCall(MachineBasicBlock &MBB, |
780 | MachineBasicBlock::iterator MBBI, |
781 | const AArch64InstrInfo *TII, |
782 | MachineOperand &CallTarget, |
783 | unsigned RegMaskStartIdx) { |
784 | unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR; |
785 | MachineInstr *Call = |
786 | BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(Opc)).getInstr(); |
787 | |
788 | assert((CallTarget.isGlobal() || CallTarget.isReg()) && |
789 | "invalid operand for regular call" ); |
790 | Call->addOperand(Op: CallTarget); |
791 | |
792 | // Register arguments are added during ISel, but cannot be added as explicit |
793 | // operands of the branch as it expects to be B <target> which is only one |
794 | // operand. Instead they are implicit operands used by the branch. |
795 | while (!MBBI->getOperand(i: RegMaskStartIdx).isRegMask()) { |
796 | auto MOP = MBBI->getOperand(i: RegMaskStartIdx); |
797 | assert(MOP.isReg() && "can only add register operands" ); |
798 | Call->addOperand(Op: MachineOperand::CreateReg( |
799 | Reg: MOP.getReg(), /*Def=*/isDef: false, /*Implicit=*/isImp: true, /*isKill=*/false, |
800 | /*isDead=*/false, /*isUndef=*/MOP.isUndef())); |
801 | RegMaskStartIdx++; |
802 | } |
803 | for (const MachineOperand &MO : |
804 | llvm::drop_begin(RangeOrContainer: MBBI->operands(), N: RegMaskStartIdx)) |
805 | Call->addOperand(Op: MO); |
806 | |
807 | return Call; |
808 | } |
809 | |
810 | bool AArch64ExpandPseudo::expandCALL_RVMARKER( |
811 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { |
812 | // Expand CALL_RVMARKER pseudo to: |
813 | // - a branch to the call target, followed by |
814 | // - the special `mov x29, x29` marker, and |
815 | // - another branch, to the runtime function |
816 | // Mark the sequence as bundle, to avoid passes moving other code in between. |
817 | MachineInstr &MI = *MBBI; |
818 | MachineOperand &RVTarget = MI.getOperand(i: 0); |
819 | assert(RVTarget.isGlobal() && "invalid operand for attached call" ); |
820 | MachineInstr *OriginalCall = |
821 | createCall(MBB, MBBI, TII, CallTarget&: MI.getOperand(i: 1), |
822 | // Regmask starts after the RV and call targets. |
823 | /*RegMaskStartIdx=*/2); |
824 | |
825 | BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs)) |
826 | .addReg(AArch64::FP, RegState::Define) |
827 | .addReg(AArch64::XZR) |
828 | .addReg(AArch64::FP) |
829 | .addImm(0); |
830 | |
831 | auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL)) |
832 | .add(RVTarget) |
833 | .getInstr(); |
834 | |
835 | if (MI.shouldUpdateCallSiteInfo()) |
836 | MBB.getParent()->moveCallSiteInfo(Old: &MI, New: OriginalCall); |
837 | |
838 | MI.eraseFromParent(); |
839 | finalizeBundle(MBB, OriginalCall->getIterator(), |
840 | std::next(RVCall->getIterator())); |
841 | return true; |
842 | } |
843 | |
844 | bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB, |
845 | MachineBasicBlock::iterator MBBI) { |
846 | // Expand CALL_BTI pseudo to: |
847 | // - a branch to the call target |
848 | // - a BTI instruction |
849 | // Mark the sequence as a bundle, to avoid passes moving other code in |
850 | // between. |
851 | MachineInstr &MI = *MBBI; |
852 | MachineInstr *Call = createCall(MBB, MBBI, TII, CallTarget&: MI.getOperand(i: 0), |
853 | // Regmask starts after the call target. |
854 | /*RegMaskStartIdx=*/1); |
855 | |
856 | Call->setCFIType(MF&: *MBB.getParent(), Type: MI.getCFIType()); |
857 | |
858 | MachineInstr *BTI = |
859 | BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT)) |
860 | // BTI J so that setjmp can to BR to this. |
861 | .addImm(36) |
862 | .getInstr(); |
863 | |
864 | if (MI.shouldUpdateCallSiteInfo()) |
865 | MBB.getParent()->moveCallSiteInfo(Old: &MI, New: Call); |
866 | |
867 | MI.eraseFromParent(); |
868 | finalizeBundle(MBB, FirstMI: Call->getIterator(), LastMI: std::next(x: BTI->getIterator())); |
869 | return true; |
870 | } |
871 | |
872 | bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext( |
873 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { |
874 | Register CtxReg = MBBI->getOperand(i: 0).getReg(); |
875 | Register BaseReg = MBBI->getOperand(i: 1).getReg(); |
876 | int Offset = MBBI->getOperand(i: 2).getImm(); |
877 | DebugLoc DL(MBBI->getDebugLoc()); |
878 | auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>(); |
879 | |
880 | if (STI.getTargetTriple().getArchName() != "arm64e" ) { |
881 | BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui)) |
882 | .addUse(CtxReg) |
883 | .addUse(BaseReg) |
884 | .addImm(Offset / 8) |
885 | .setMIFlag(MachineInstr::FrameSetup); |
886 | MBBI->eraseFromParent(); |
887 | return true; |
888 | } |
889 | |
890 | // We need to sign the context in an address-discriminated way. 0xc31a is a |
891 | // fixed random value, chosen as part of the ABI. |
892 | // add x16, xBase, #Offset |
893 | // movk x16, #0xc31a, lsl #48 |
894 | // mov x17, x22/xzr |
895 | // pacdb x17, x16 |
896 | // str x17, [xBase, #Offset] |
897 | unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri; |
898 | BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16) |
899 | .addUse(BaseReg) |
900 | .addImm(abs(Offset)) |
901 | .addImm(0) |
902 | .setMIFlag(MachineInstr::FrameSetup); |
903 | BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16) |
904 | .addUse(AArch64::X16) |
905 | .addImm(0xc31a) |
906 | .addImm(48) |
907 | .setMIFlag(MachineInstr::FrameSetup); |
908 | // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so |
909 | // move it somewhere before signing. |
910 | BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17) |
911 | .addUse(AArch64::XZR) |
912 | .addUse(CtxReg) |
913 | .addImm(0) |
914 | .setMIFlag(MachineInstr::FrameSetup); |
915 | BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17) |
916 | .addUse(AArch64::X17) |
917 | .addUse(AArch64::X16) |
918 | .setMIFlag(MachineInstr::FrameSetup); |
919 | BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui)) |
920 | .addUse(AArch64::X17) |
921 | .addUse(BaseReg) |
922 | .addImm(Offset / 8) |
923 | .setMIFlag(MachineInstr::FrameSetup); |
924 | |
925 | MBBI->eraseFromParent(); |
926 | return true; |
927 | } |
928 | |
929 | MachineBasicBlock * |
930 | AArch64ExpandPseudo::expandRestoreZA(MachineBasicBlock &MBB, |
931 | MachineBasicBlock::iterator MBBI) { |
932 | MachineInstr &MI = *MBBI; |
933 | assert((std::next(MBBI) != MBB.end() || |
934 | MI.getParent()->successors().begin() != |
935 | MI.getParent()->successors().end()) && |
936 | "Unexpected unreachable in block that restores ZA" ); |
937 | |
938 | // Compare TPIDR2_EL0 value against 0. |
939 | DebugLoc DL = MI.getDebugLoc(); |
940 | MachineInstrBuilder Cbz = BuildMI(MBB, MBBI, DL, TII->get(AArch64::CBZX)) |
941 | .add(MI.getOperand(0)); |
942 | |
943 | // Split MBB and create two new blocks: |
944 | // - MBB now contains all instructions before RestoreZAPseudo. |
945 | // - SMBB contains the RestoreZAPseudo instruction only. |
946 | // - EndBB contains all instructions after RestoreZAPseudo. |
947 | MachineInstr &PrevMI = *std::prev(x: MBBI); |
948 | MachineBasicBlock *SMBB = MBB.splitAt(SplitInst&: PrevMI, /*UpdateLiveIns*/ true); |
949 | MachineBasicBlock *EndBB = std::next(x: MI.getIterator()) == SMBB->end() |
950 | ? *SMBB->successors().begin() |
951 | : SMBB->splitAt(SplitInst&: MI, /*UpdateLiveIns*/ true); |
952 | |
953 | // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB. |
954 | Cbz.addMBB(MBB: SMBB); |
955 | BuildMI(&MBB, DL, TII->get(AArch64::B)) |
956 | .addMBB(EndBB); |
957 | MBB.addSuccessor(Succ: EndBB); |
958 | |
959 | // Replace the pseudo with a call (BL). |
960 | MachineInstrBuilder MIB = |
961 | BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::BL)); |
962 | MIB.addReg(RegNo: MI.getOperand(i: 1).getReg(), flags: RegState::Implicit); |
963 | for (unsigned I = 2; I < MI.getNumOperands(); ++I) |
964 | MIB.add(MO: MI.getOperand(i: I)); |
965 | BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB); |
966 | |
967 | MI.eraseFromParent(); |
968 | return EndBB; |
969 | } |
970 | |
971 | MachineBasicBlock * |
972 | AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB, |
973 | MachineBasicBlock::iterator MBBI) { |
974 | MachineInstr &MI = *MBBI; |
975 | // In the case of a smstart/smstop before a unreachable, just remove the pseudo. |
976 | // Exception handling code generated by Clang may introduce unreachables and it |
977 | // seems unnecessary to restore pstate.sm when that happens. Note that it is |
978 | // not just an optimisation, the code below expects a successor instruction/block |
979 | // in order to split the block at MBBI. |
980 | if (std::next(x: MBBI) == MBB.end() && |
981 | MI.getParent()->successors().begin() == |
982 | MI.getParent()->successors().end()) { |
983 | MI.eraseFromParent(); |
984 | return &MBB; |
985 | } |
986 | |
987 | // Expand the pseudo into smstart or smstop instruction. The pseudo has the |
988 | // following operands: |
989 | // |
990 | // MSRpstatePseudo <za|sm|both>, <0|1>, condition[, pstate.sm], <regmask> |
991 | // |
992 | // The pseudo is expanded into a conditional smstart/smstop, with a |
993 | // check if pstate.sm (register) equals the expected value, and if not, |
994 | // invokes the smstart/smstop. |
995 | // |
996 | // As an example, the following block contains a normal call from a |
997 | // streaming-compatible function: |
998 | // |
999 | // OrigBB: |
1000 | // MSRpstatePseudo 3, 0, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTOP |
1001 | // bl @normal_callee |
1002 | // MSRpstatePseudo 3, 1, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTART |
1003 | // |
1004 | // ...which will be transformed into: |
1005 | // |
1006 | // OrigBB: |
1007 | // TBNZx %0:gpr64, 0, SMBB |
1008 | // b EndBB |
1009 | // |
1010 | // SMBB: |
1011 | // MSRpstatesvcrImm1 3, 0, <regmask> <- SMSTOP |
1012 | // |
1013 | // EndBB: |
1014 | // bl @normal_callee |
1015 | // MSRcond_pstatesvcrImm1 3, 1, <regmask> <- SMSTART |
1016 | // |
1017 | DebugLoc DL = MI.getDebugLoc(); |
1018 | |
1019 | // Create the conditional branch based on the third operand of the |
1020 | // instruction, which tells us if we are wrapping a normal or streaming |
1021 | // function. |
1022 | // We test the live value of pstate.sm and toggle pstate.sm if this is not the |
1023 | // expected value for the callee (0 for a normal callee and 1 for a streaming |
1024 | // callee). |
1025 | unsigned Opc; |
1026 | switch (MI.getOperand(i: 2).getImm()) { |
1027 | case AArch64SME::Always: |
1028 | llvm_unreachable("Should have matched to instruction directly" ); |
1029 | case AArch64SME::IfCallerIsStreaming: |
1030 | Opc = AArch64::TBNZW; |
1031 | break; |
1032 | case AArch64SME::IfCallerIsNonStreaming: |
1033 | Opc = AArch64::TBZW; |
1034 | break; |
1035 | } |
1036 | auto PStateSM = MI.getOperand(i: 3).getReg(); |
1037 | auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); |
1038 | unsigned SMReg32 = TRI->getSubReg(PStateSM, AArch64::sub_32); |
1039 | MachineInstrBuilder Tbx = |
1040 | BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SMReg32).addImm(0); |
1041 | |
1042 | // Split MBB and create two new blocks: |
1043 | // - MBB now contains all instructions before MSRcond_pstatesvcrImm1. |
1044 | // - SMBB contains the MSRcond_pstatesvcrImm1 instruction only. |
1045 | // - EndBB contains all instructions after MSRcond_pstatesvcrImm1. |
1046 | MachineInstr &PrevMI = *std::prev(x: MBBI); |
1047 | MachineBasicBlock *SMBB = MBB.splitAt(SplitInst&: PrevMI, /*UpdateLiveIns*/ true); |
1048 | MachineBasicBlock *EndBB = std::next(x: MI.getIterator()) == SMBB->end() |
1049 | ? *SMBB->successors().begin() |
1050 | : SMBB->splitAt(SplitInst&: MI, /*UpdateLiveIns*/ true); |
1051 | |
1052 | // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB. |
1053 | Tbx.addMBB(MBB: SMBB); |
1054 | BuildMI(&MBB, DL, TII->get(AArch64::B)) |
1055 | .addMBB(EndBB); |
1056 | MBB.addSuccessor(Succ: EndBB); |
1057 | |
1058 | // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB. |
1059 | MachineInstrBuilder MIB = BuildMI(*SMBB, SMBB->begin(), MI.getDebugLoc(), |
1060 | TII->get(AArch64::MSRpstatesvcrImm1)); |
1061 | // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as |
1062 | // these contain the CopyFromReg for the first argument and the flag to |
1063 | // indicate whether the callee is streaming or normal). |
1064 | MIB.add(MO: MI.getOperand(i: 0)); |
1065 | MIB.add(MO: MI.getOperand(i: 1)); |
1066 | for (unsigned i = 4; i < MI.getNumOperands(); ++i) |
1067 | MIB.add(MO: MI.getOperand(i)); |
1068 | |
1069 | BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB); |
1070 | |
1071 | MI.eraseFromParent(); |
1072 | return EndBB; |
1073 | } |
1074 | |
1075 | bool AArch64ExpandPseudo::expandMultiVecPseudo( |
1076 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
1077 | TargetRegisterClass ContiguousClass, TargetRegisterClass StridedClass, |
1078 | unsigned ContiguousOp, unsigned StridedOpc) { |
1079 | MachineInstr &MI = *MBBI; |
1080 | Register Tuple = MI.getOperand(i: 0).getReg(); |
1081 | |
1082 | auto ContiguousRange = ContiguousClass.getRegisters(); |
1083 | auto StridedRange = StridedClass.getRegisters(); |
1084 | unsigned Opc; |
1085 | if (llvm::is_contained(Range&: ContiguousRange, Element: Tuple.asMCReg())) { |
1086 | Opc = ContiguousOp; |
1087 | } else if (llvm::is_contained(Range&: StridedRange, Element: Tuple.asMCReg())) { |
1088 | Opc = StridedOpc; |
1089 | } else |
1090 | llvm_unreachable("Cannot expand Multi-Vector pseudo" ); |
1091 | |
1092 | MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) |
1093 | .add(MI.getOperand(i: 0)) |
1094 | .add(MI.getOperand(i: 1)) |
1095 | .add(MI.getOperand(i: 2)) |
1096 | .add(MI.getOperand(i: 3)); |
1097 | transferImpOps(OldMI&: MI, UseMI&: MIB, DefMI&: MIB); |
1098 | MI.eraseFromParent(); |
1099 | return true; |
1100 | } |
1101 | |
1102 | /// If MBBI references a pseudo instruction that should be expanded here, |
1103 | /// do the expansion and return true. Otherwise return false. |
1104 | bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, |
1105 | MachineBasicBlock::iterator MBBI, |
1106 | MachineBasicBlock::iterator &NextMBBI) { |
1107 | MachineInstr &MI = *MBBI; |
1108 | unsigned Opcode = MI.getOpcode(); |
1109 | |
1110 | // Check if we can expand the destructive op |
1111 | int OrigInstr = AArch64::getSVEPseudoMap(Opcode: MI.getOpcode()); |
1112 | if (OrigInstr != -1) { |
1113 | auto &Orig = TII->get(OrigInstr); |
1114 | if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) != |
1115 | AArch64::NotDestructive) { |
1116 | return expand_DestructiveOp(MI, MBB, MBBI); |
1117 | } |
1118 | } |
1119 | |
1120 | switch (Opcode) { |
1121 | default: |
1122 | break; |
1123 | |
1124 | case AArch64::BSPv8i8: |
1125 | case AArch64::BSPv16i8: { |
1126 | Register DstReg = MI.getOperand(i: 0).getReg(); |
1127 | if (DstReg == MI.getOperand(i: 3).getReg()) { |
1128 | // Expand to BIT |
1129 | BuildMI(MBB, MBBI, MI.getDebugLoc(), |
1130 | TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8 |
1131 | : AArch64::BITv16i8)) |
1132 | .add(MI.getOperand(0)) |
1133 | .add(MI.getOperand(3)) |
1134 | .add(MI.getOperand(2)) |
1135 | .add(MI.getOperand(1)); |
1136 | } else if (DstReg == MI.getOperand(i: 2).getReg()) { |
1137 | // Expand to BIF |
1138 | BuildMI(MBB, MBBI, MI.getDebugLoc(), |
1139 | TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8 |
1140 | : AArch64::BIFv16i8)) |
1141 | .add(MI.getOperand(0)) |
1142 | .add(MI.getOperand(2)) |
1143 | .add(MI.getOperand(3)) |
1144 | .add(MI.getOperand(1)); |
1145 | } else { |
1146 | // Expand to BSL, use additional move if required |
1147 | if (DstReg == MI.getOperand(i: 1).getReg()) { |
1148 | BuildMI(MBB, MBBI, MI.getDebugLoc(), |
1149 | TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 |
1150 | : AArch64::BSLv16i8)) |
1151 | .add(MI.getOperand(0)) |
1152 | .add(MI.getOperand(1)) |
1153 | .add(MI.getOperand(2)) |
1154 | .add(MI.getOperand(3)); |
1155 | } else { |
1156 | BuildMI(MBB, MBBI, MI.getDebugLoc(), |
1157 | TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8 |
1158 | : AArch64::ORRv16i8)) |
1159 | .addReg(DstReg, |
1160 | RegState::Define | |
1161 | getRenamableRegState(MI.getOperand(0).isRenamable())) |
1162 | .add(MI.getOperand(1)) |
1163 | .add(MI.getOperand(1)); |
1164 | BuildMI(MBB, MBBI, MI.getDebugLoc(), |
1165 | TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 |
1166 | : AArch64::BSLv16i8)) |
1167 | .add(MI.getOperand(0)) |
1168 | .addReg(DstReg, |
1169 | RegState::Kill | |
1170 | getRenamableRegState(MI.getOperand(0).isRenamable())) |
1171 | .add(MI.getOperand(2)) |
1172 | .add(MI.getOperand(3)); |
1173 | } |
1174 | } |
1175 | MI.eraseFromParent(); |
1176 | return true; |
1177 | } |
1178 | |
1179 | case AArch64::ADDWrr: |
1180 | case AArch64::SUBWrr: |
1181 | case AArch64::ADDXrr: |
1182 | case AArch64::SUBXrr: |
1183 | case AArch64::ADDSWrr: |
1184 | case AArch64::SUBSWrr: |
1185 | case AArch64::ADDSXrr: |
1186 | case AArch64::SUBSXrr: |
1187 | case AArch64::ANDWrr: |
1188 | case AArch64::ANDXrr: |
1189 | case AArch64::BICWrr: |
1190 | case AArch64::BICXrr: |
1191 | case AArch64::ANDSWrr: |
1192 | case AArch64::ANDSXrr: |
1193 | case AArch64::BICSWrr: |
1194 | case AArch64::BICSXrr: |
1195 | case AArch64::EONWrr: |
1196 | case AArch64::EONXrr: |
1197 | case AArch64::EORWrr: |
1198 | case AArch64::EORXrr: |
1199 | case AArch64::ORNWrr: |
1200 | case AArch64::ORNXrr: |
1201 | case AArch64::ORRWrr: |
1202 | case AArch64::ORRXrr: { |
1203 | unsigned Opcode; |
1204 | switch (MI.getOpcode()) { |
1205 | default: |
1206 | return false; |
1207 | case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break; |
1208 | case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break; |
1209 | case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break; |
1210 | case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break; |
1211 | case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break; |
1212 | case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break; |
1213 | case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break; |
1214 | case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break; |
1215 | case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break; |
1216 | case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break; |
1217 | case AArch64::BICWrr: Opcode = AArch64::BICWrs; break; |
1218 | case AArch64::BICXrr: Opcode = AArch64::BICXrs; break; |
1219 | case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break; |
1220 | case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break; |
1221 | case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break; |
1222 | case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break; |
1223 | case AArch64::EONWrr: Opcode = AArch64::EONWrs; break; |
1224 | case AArch64::EONXrr: Opcode = AArch64::EONXrs; break; |
1225 | case AArch64::EORWrr: Opcode = AArch64::EORWrs; break; |
1226 | case AArch64::EORXrr: Opcode = AArch64::EORXrs; break; |
1227 | case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break; |
1228 | case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break; |
1229 | case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break; |
1230 | case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break; |
1231 | } |
1232 | MachineFunction &MF = *MBB.getParent(); |
1233 | // Try to create new inst without implicit operands added. |
1234 | MachineInstr *NewMI = MF.CreateMachineInstr( |
1235 | MCID: TII->get(Opcode), DL: MI.getDebugLoc(), /*NoImplicit=*/true); |
1236 | MBB.insert(I: MBBI, MI: NewMI); |
1237 | MachineInstrBuilder MIB1(MF, NewMI); |
1238 | MIB1->setPCSections(MF, MD: MI.getPCSections()); |
1239 | MIB1.addReg(RegNo: MI.getOperand(i: 0).getReg(), flags: RegState::Define) |
1240 | .add(MO: MI.getOperand(i: 1)) |
1241 | .add(MO: MI.getOperand(i: 2)) |
1242 | .addImm(Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0)); |
1243 | transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB1); |
1244 | if (auto DebugNumber = MI.peekDebugInstrNum()) |
1245 | NewMI->setDebugInstrNum(DebugNumber); |
1246 | MI.eraseFromParent(); |
1247 | return true; |
1248 | } |
1249 | |
1250 | case AArch64::LOADgot: { |
1251 | MachineFunction *MF = MBB.getParent(); |
1252 | Register DstReg = MI.getOperand(i: 0).getReg(); |
1253 | const MachineOperand &MO1 = MI.getOperand(i: 1); |
1254 | unsigned Flags = MO1.getTargetFlags(); |
1255 | |
1256 | if (MF->getTarget().getCodeModel() == CodeModel::Tiny) { |
1257 | // Tiny codemodel expand to LDR |
1258 | MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), |
1259 | TII->get(AArch64::LDRXl), DstReg); |
1260 | |
1261 | if (MO1.isGlobal()) { |
1262 | MIB.addGlobalAddress(GV: MO1.getGlobal(), Offset: 0, TargetFlags: Flags); |
1263 | } else if (MO1.isSymbol()) { |
1264 | MIB.addExternalSymbol(FnName: MO1.getSymbolName(), TargetFlags: Flags); |
1265 | } else { |
1266 | assert(MO1.isCPI() && |
1267 | "Only expect globals, externalsymbols, or constant pools" ); |
1268 | MIB.addConstantPoolIndex(Idx: MO1.getIndex(), Offset: MO1.getOffset(), TargetFlags: Flags); |
1269 | } |
1270 | } else { |
1271 | // Small codemodel expand into ADRP + LDR. |
1272 | MachineFunction &MF = *MI.getParent()->getParent(); |
1273 | DebugLoc DL = MI.getDebugLoc(); |
1274 | MachineInstrBuilder MIB1 = |
1275 | BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg); |
1276 | |
1277 | MachineInstrBuilder MIB2; |
1278 | if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) { |
1279 | auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); |
1280 | unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32); |
1281 | unsigned DstFlags = MI.getOperand(i: 0).getTargetFlags(); |
1282 | MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui)) |
1283 | .addDef(Reg32) |
1284 | .addReg(DstReg, RegState::Kill) |
1285 | .addReg(DstReg, DstFlags | RegState::Implicit); |
1286 | } else { |
1287 | Register DstReg = MI.getOperand(i: 0).getReg(); |
1288 | MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui)) |
1289 | .add(MI.getOperand(0)) |
1290 | .addUse(DstReg, RegState::Kill); |
1291 | } |
1292 | |
1293 | if (MO1.isGlobal()) { |
1294 | MIB1.addGlobalAddress(GV: MO1.getGlobal(), Offset: 0, TargetFlags: Flags | AArch64II::MO_PAGE); |
1295 | MIB2.addGlobalAddress(GV: MO1.getGlobal(), Offset: 0, |
1296 | TargetFlags: Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
1297 | } else if (MO1.isSymbol()) { |
1298 | MIB1.addExternalSymbol(FnName: MO1.getSymbolName(), TargetFlags: Flags | AArch64II::MO_PAGE); |
1299 | MIB2.addExternalSymbol(FnName: MO1.getSymbolName(), TargetFlags: Flags | |
1300 | AArch64II::MO_PAGEOFF | |
1301 | AArch64II::MO_NC); |
1302 | } else { |
1303 | assert(MO1.isCPI() && |
1304 | "Only expect globals, externalsymbols, or constant pools" ); |
1305 | MIB1.addConstantPoolIndex(Idx: MO1.getIndex(), Offset: MO1.getOffset(), |
1306 | TargetFlags: Flags | AArch64II::MO_PAGE); |
1307 | MIB2.addConstantPoolIndex(Idx: MO1.getIndex(), Offset: MO1.getOffset(), |
1308 | TargetFlags: Flags | AArch64II::MO_PAGEOFF | |
1309 | AArch64II::MO_NC); |
1310 | } |
1311 | |
1312 | transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB2); |
1313 | } |
1314 | MI.eraseFromParent(); |
1315 | return true; |
1316 | } |
1317 | case AArch64::MOVaddrBA: { |
1318 | MachineFunction &MF = *MI.getParent()->getParent(); |
1319 | if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) { |
1320 | // blockaddress expressions have to come from a constant pool because the |
1321 | // largest addend (and hence offset within a function) allowed for ADRP is |
1322 | // only 8MB. |
1323 | const BlockAddress *BA = MI.getOperand(i: 1).getBlockAddress(); |
1324 | assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset" ); |
1325 | |
1326 | MachineConstantPool *MCP = MF.getConstantPool(); |
1327 | unsigned CPIdx = MCP->getConstantPoolIndex(C: BA, Alignment: Align(8)); |
1328 | |
1329 | Register DstReg = MI.getOperand(i: 0).getReg(); |
1330 | auto MIB1 = |
1331 | BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) |
1332 | .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE); |
1333 | auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), |
1334 | TII->get(AArch64::LDRXui), DstReg) |
1335 | .addUse(DstReg) |
1336 | .addConstantPoolIndex( |
1337 | CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
1338 | transferImpOps(MI, MIB1, MIB2); |
1339 | MI.eraseFromParent(); |
1340 | return true; |
1341 | } |
1342 | } |
1343 | [[fallthrough]]; |
1344 | case AArch64::MOVaddr: |
1345 | case AArch64::MOVaddrJT: |
1346 | case AArch64::MOVaddrCP: |
1347 | case AArch64::MOVaddrTLS: |
1348 | case AArch64::MOVaddrEXT: { |
1349 | // Expand into ADRP + ADD. |
1350 | Register DstReg = MI.getOperand(i: 0).getReg(); |
1351 | assert(DstReg != AArch64::XZR); |
1352 | MachineInstrBuilder MIB1 = |
1353 | BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) |
1354 | .add(MI.getOperand(1)); |
1355 | |
1356 | if (MI.getOperand(i: 1).getTargetFlags() & AArch64II::MO_TAGGED) { |
1357 | // MO_TAGGED on the page indicates a tagged address. Set the tag now. |
1358 | // We do so by creating a MOVK that sets bits 48-63 of the register to |
1359 | // (global address + 0x100000000 - PC) >> 48. This assumes that we're in |
1360 | // the small code model so we can assume a binary size of <= 4GB, which |
1361 | // makes the untagged PC relative offset positive. The binary must also be |
1362 | // loaded into address range [0, 2^48). Both of these properties need to |
1363 | // be ensured at runtime when using tagged addresses. |
1364 | auto Tag = MI.getOperand(i: 1); |
1365 | Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3); |
1366 | Tag.setOffset(0x100000000); |
1367 | BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg) |
1368 | .addReg(DstReg) |
1369 | .add(Tag) |
1370 | .addImm(48); |
1371 | } |
1372 | |
1373 | MachineInstrBuilder MIB2 = |
1374 | BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) |
1375 | .add(MI.getOperand(0)) |
1376 | .addReg(DstReg) |
1377 | .add(MI.getOperand(2)) |
1378 | .addImm(0); |
1379 | |
1380 | transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB2); |
1381 | MI.eraseFromParent(); |
1382 | return true; |
1383 | } |
1384 | case AArch64::ADDlowTLS: |
1385 | // Produce a plain ADD |
1386 | BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) |
1387 | .add(MI.getOperand(0)) |
1388 | .add(MI.getOperand(1)) |
1389 | .add(MI.getOperand(2)) |
1390 | .addImm(0); |
1391 | MI.eraseFromParent(); |
1392 | return true; |
1393 | |
1394 | case AArch64::MOVbaseTLS: { |
1395 | Register DstReg = MI.getOperand(i: 0).getReg(); |
1396 | auto SysReg = AArch64SysReg::TPIDR_EL0; |
1397 | MachineFunction *MF = MBB.getParent(); |
1398 | if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP()) |
1399 | SysReg = AArch64SysReg::TPIDR_EL3; |
1400 | else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP()) |
1401 | SysReg = AArch64SysReg::TPIDR_EL2; |
1402 | else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP()) |
1403 | SysReg = AArch64SysReg::TPIDR_EL1; |
1404 | else if (MF->getSubtarget<AArch64Subtarget>().useROEL0ForTP()) |
1405 | SysReg = AArch64SysReg::TPIDRRO_EL0; |
1406 | BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg) |
1407 | .addImm(SysReg); |
1408 | MI.eraseFromParent(); |
1409 | return true; |
1410 | } |
1411 | |
1412 | case AArch64::MOVi32imm: |
1413 | return expandMOVImm(MBB, MBBI, BitSize: 32); |
1414 | case AArch64::MOVi64imm: |
1415 | return expandMOVImm(MBB, MBBI, BitSize: 64); |
1416 | case AArch64::RET_ReallyLR: { |
1417 | // Hiding the LR use with RET_ReallyLR may lead to extra kills in the |
1418 | // function and missing live-ins. We are fine in practice because callee |
1419 | // saved register handling ensures the register value is restored before |
1420 | // RET, but we need the undef flag here to appease the MachineVerifier |
1421 | // liveness checks. |
1422 | MachineInstrBuilder MIB = |
1423 | BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET)) |
1424 | .addReg(AArch64::LR, RegState::Undef); |
1425 | transferImpOps(OldMI&: MI, UseMI&: MIB, DefMI&: MIB); |
1426 | MI.eraseFromParent(); |
1427 | return true; |
1428 | } |
1429 | case AArch64::CMP_SWAP_8: |
1430 | return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB, |
1431 | AArch64::SUBSWrx, |
1432 | AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0), |
1433 | AArch64::WZR, NextMBBI); |
1434 | case AArch64::CMP_SWAP_16: |
1435 | return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH, |
1436 | AArch64::SUBSWrx, |
1437 | AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0), |
1438 | AArch64::WZR, NextMBBI); |
1439 | case AArch64::CMP_SWAP_32: |
1440 | return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW, |
1441 | AArch64::SUBSWrs, |
1442 | AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), |
1443 | AArch64::WZR, NextMBBI); |
1444 | case AArch64::CMP_SWAP_64: |
1445 | return expandCMP_SWAP(MBB, MBBI, |
1446 | AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs, |
1447 | AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), |
1448 | AArch64::XZR, NextMBBI); |
1449 | case AArch64::CMP_SWAP_128: |
1450 | case AArch64::CMP_SWAP_128_RELEASE: |
1451 | case AArch64::CMP_SWAP_128_ACQUIRE: |
1452 | case AArch64::CMP_SWAP_128_MONOTONIC: |
1453 | return expandCMP_SWAP_128(MBB, MBBI, NextMBBI); |
1454 | |
1455 | case AArch64::AESMCrrTied: |
1456 | case AArch64::AESIMCrrTied: { |
1457 | MachineInstrBuilder MIB = |
1458 | BuildMI(MBB, MBBI, MI.getDebugLoc(), |
1459 | TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr : |
1460 | AArch64::AESIMCrr)) |
1461 | .add(MI.getOperand(0)) |
1462 | .add(MI.getOperand(1)); |
1463 | transferImpOps(OldMI&: MI, UseMI&: MIB, DefMI&: MIB); |
1464 | MI.eraseFromParent(); |
1465 | return true; |
1466 | } |
1467 | case AArch64::IRGstack: { |
1468 | MachineFunction &MF = *MBB.getParent(); |
1469 | const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); |
1470 | const AArch64FrameLowering *TFI = |
1471 | MF.getSubtarget<AArch64Subtarget>().getFrameLowering(); |
1472 | |
1473 | // IRG does not allow immediate offset. getTaggedBasePointerOffset should |
1474 | // almost always point to SP-after-prologue; if not, emit a longer |
1475 | // instruction sequence. |
1476 | int BaseOffset = -AFI->getTaggedBasePointerOffset(); |
1477 | Register FrameReg; |
1478 | StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference( |
1479 | MF, ObjectOffset: BaseOffset, isFixed: false /*isFixed*/, isSVE: false /*isSVE*/, FrameReg, |
1480 | /*PreferFP=*/false, |
1481 | /*ForSimm=*/true); |
1482 | Register SrcReg = FrameReg; |
1483 | if (FrameRegOffset) { |
1484 | // Use output register as temporary. |
1485 | SrcReg = MI.getOperand(i: 0).getReg(); |
1486 | emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg, |
1487 | FrameRegOffset, TII); |
1488 | } |
1489 | BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG)) |
1490 | .add(MI.getOperand(0)) |
1491 | .addUse(SrcReg) |
1492 | .add(MI.getOperand(2)); |
1493 | MI.eraseFromParent(); |
1494 | return true; |
1495 | } |
1496 | case AArch64::TAGPstack: { |
1497 | int64_t Offset = MI.getOperand(i: 2).getImm(); |
1498 | BuildMI(MBB, MBBI, MI.getDebugLoc(), |
1499 | TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG)) |
1500 | .add(MI.getOperand(0)) |
1501 | .add(MI.getOperand(1)) |
1502 | .addImm(std::abs(Offset)) |
1503 | .add(MI.getOperand(4)); |
1504 | MI.eraseFromParent(); |
1505 | return true; |
1506 | } |
1507 | case AArch64::STGloop_wback: |
1508 | case AArch64::STZGloop_wback: |
1509 | return expandSetTagLoop(MBB, MBBI, NextMBBI); |
1510 | case AArch64::STGloop: |
1511 | case AArch64::STZGloop: |
1512 | report_fatal_error( |
1513 | reason: "Non-writeback variants of STGloop / STZGloop should not " |
1514 | "survive past PrologEpilogInserter." ); |
1515 | case AArch64::STR_ZZZZXI: |
1516 | return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4); |
1517 | case AArch64::STR_ZZZXI: |
1518 | return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3); |
1519 | case AArch64::STR_ZZXI: |
1520 | return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2); |
1521 | case AArch64::STR_PPXI: |
1522 | return expandSVESpillFill(MBB, MBBI, AArch64::STR_PXI, 2); |
1523 | case AArch64::LDR_ZZZZXI: |
1524 | return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4); |
1525 | case AArch64::LDR_ZZZXI: |
1526 | return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3); |
1527 | case AArch64::LDR_ZZXI: |
1528 | return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2); |
1529 | case AArch64::LDR_PPXI: |
1530 | return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2); |
1531 | case AArch64::BLR_RVMARKER: |
1532 | return expandCALL_RVMARKER(MBB, MBBI); |
1533 | case AArch64::BLR_BTI: |
1534 | return expandCALL_BTI(MBB, MBBI); |
1535 | case AArch64::StoreSwiftAsyncContext: |
1536 | return expandStoreSwiftAsyncContext(MBB, MBBI); |
1537 | case AArch64::RestoreZAPseudo: { |
1538 | auto *NewMBB = expandRestoreZA(MBB, MBBI); |
1539 | if (NewMBB != &MBB) |
1540 | NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated. |
1541 | return true; |
1542 | } |
1543 | case AArch64::MSRpstatePseudo: { |
1544 | auto *NewMBB = expandCondSMToggle(MBB, MBBI); |
1545 | if (NewMBB != &MBB) |
1546 | NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated. |
1547 | return true; |
1548 | } |
1549 | case AArch64::COALESCER_BARRIER_FPR16: |
1550 | case AArch64::COALESCER_BARRIER_FPR32: |
1551 | case AArch64::COALESCER_BARRIER_FPR64: |
1552 | case AArch64::COALESCER_BARRIER_FPR128: |
1553 | MI.eraseFromParent(); |
1554 | return true; |
1555 | case AArch64::LD1B_2Z_IMM_PSEUDO: |
1556 | return expandMultiVecPseudo( |
1557 | MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, |
1558 | AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z_STRIDED_IMM); |
1559 | case AArch64::LD1H_2Z_IMM_PSEUDO: |
1560 | return expandMultiVecPseudo( |
1561 | MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, |
1562 | AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z_STRIDED_IMM); |
1563 | case AArch64::LD1W_2Z_IMM_PSEUDO: |
1564 | return expandMultiVecPseudo( |
1565 | MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, |
1566 | AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z_STRIDED_IMM); |
1567 | case AArch64::LD1D_2Z_IMM_PSEUDO: |
1568 | return expandMultiVecPseudo( |
1569 | MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, |
1570 | AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z_STRIDED_IMM); |
1571 | case AArch64::LDNT1B_2Z_IMM_PSEUDO: |
1572 | return expandMultiVecPseudo( |
1573 | MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, |
1574 | AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z_STRIDED_IMM); |
1575 | case AArch64::LDNT1H_2Z_IMM_PSEUDO: |
1576 | return expandMultiVecPseudo( |
1577 | MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, |
1578 | AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z_STRIDED_IMM); |
1579 | case AArch64::LDNT1W_2Z_IMM_PSEUDO: |
1580 | return expandMultiVecPseudo( |
1581 | MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, |
1582 | AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z_STRIDED_IMM); |
1583 | case AArch64::LDNT1D_2Z_IMM_PSEUDO: |
1584 | return expandMultiVecPseudo( |
1585 | MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, |
1586 | AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z_STRIDED_IMM); |
1587 | case AArch64::LD1B_2Z_PSEUDO: |
1588 | return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass, |
1589 | AArch64::ZPR2StridedRegClass, AArch64::LD1B_2Z, |
1590 | AArch64::LD1B_2Z_STRIDED); |
1591 | case AArch64::LD1H_2Z_PSEUDO: |
1592 | return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass, |
1593 | AArch64::ZPR2StridedRegClass, AArch64::LD1H_2Z, |
1594 | AArch64::LD1H_2Z_STRIDED); |
1595 | case AArch64::LD1W_2Z_PSEUDO: |
1596 | return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass, |
1597 | AArch64::ZPR2StridedRegClass, AArch64::LD1W_2Z, |
1598 | AArch64::LD1W_2Z_STRIDED); |
1599 | case AArch64::LD1D_2Z_PSEUDO: |
1600 | return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass, |
1601 | AArch64::ZPR2StridedRegClass, AArch64::LD1D_2Z, |
1602 | AArch64::LD1D_2Z_STRIDED); |
1603 | case AArch64::LDNT1B_2Z_PSEUDO: |
1604 | return expandMultiVecPseudo( |
1605 | MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, |
1606 | AArch64::LDNT1B_2Z, AArch64::LDNT1B_2Z_STRIDED); |
1607 | case AArch64::LDNT1H_2Z_PSEUDO: |
1608 | return expandMultiVecPseudo( |
1609 | MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, |
1610 | AArch64::LDNT1H_2Z, AArch64::LDNT1H_2Z_STRIDED); |
1611 | case AArch64::LDNT1W_2Z_PSEUDO: |
1612 | return expandMultiVecPseudo( |
1613 | MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, |
1614 | AArch64::LDNT1W_2Z, AArch64::LDNT1W_2Z_STRIDED); |
1615 | case AArch64::LDNT1D_2Z_PSEUDO: |
1616 | return expandMultiVecPseudo( |
1617 | MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, |
1618 | AArch64::LDNT1D_2Z, AArch64::LDNT1D_2Z_STRIDED); |
1619 | case AArch64::LD1B_4Z_IMM_PSEUDO: |
1620 | return expandMultiVecPseudo( |
1621 | MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, |
1622 | AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z_STRIDED_IMM); |
1623 | case AArch64::LD1H_4Z_IMM_PSEUDO: |
1624 | return expandMultiVecPseudo( |
1625 | MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, |
1626 | AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z_STRIDED_IMM); |
1627 | case AArch64::LD1W_4Z_IMM_PSEUDO: |
1628 | return expandMultiVecPseudo( |
1629 | MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, |
1630 | AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z_STRIDED_IMM); |
1631 | case AArch64::LD1D_4Z_IMM_PSEUDO: |
1632 | return expandMultiVecPseudo( |
1633 | MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, |
1634 | AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z_STRIDED_IMM); |
1635 | case AArch64::LDNT1B_4Z_IMM_PSEUDO: |
1636 | return expandMultiVecPseudo( |
1637 | MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, |
1638 | AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z_STRIDED_IMM); |
1639 | case AArch64::LDNT1H_4Z_IMM_PSEUDO: |
1640 | return expandMultiVecPseudo( |
1641 | MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, |
1642 | AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z_STRIDED_IMM); |
1643 | case AArch64::LDNT1W_4Z_IMM_PSEUDO: |
1644 | return expandMultiVecPseudo( |
1645 | MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, |
1646 | AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z_STRIDED_IMM); |
1647 | case AArch64::LDNT1D_4Z_IMM_PSEUDO: |
1648 | return expandMultiVecPseudo( |
1649 | MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, |
1650 | AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z_STRIDED_IMM); |
1651 | case AArch64::LD1B_4Z_PSEUDO: |
1652 | return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass, |
1653 | AArch64::ZPR4StridedRegClass, AArch64::LD1B_4Z, |
1654 | AArch64::LD1B_4Z_STRIDED); |
1655 | case AArch64::LD1H_4Z_PSEUDO: |
1656 | return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass, |
1657 | AArch64::ZPR4StridedRegClass, AArch64::LD1H_4Z, |
1658 | AArch64::LD1H_4Z_STRIDED); |
1659 | case AArch64::LD1W_4Z_PSEUDO: |
1660 | return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass, |
1661 | AArch64::ZPR4StridedRegClass, AArch64::LD1W_4Z, |
1662 | AArch64::LD1W_4Z_STRIDED); |
1663 | case AArch64::LD1D_4Z_PSEUDO: |
1664 | return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass, |
1665 | AArch64::ZPR4StridedRegClass, AArch64::LD1D_4Z, |
1666 | AArch64::LD1D_4Z_STRIDED); |
1667 | case AArch64::LDNT1B_4Z_PSEUDO: |
1668 | return expandMultiVecPseudo( |
1669 | MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, |
1670 | AArch64::LDNT1B_4Z, AArch64::LDNT1B_4Z_STRIDED); |
1671 | case AArch64::LDNT1H_4Z_PSEUDO: |
1672 | return expandMultiVecPseudo( |
1673 | MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, |
1674 | AArch64::LDNT1H_4Z, AArch64::LDNT1H_4Z_STRIDED); |
1675 | case AArch64::LDNT1W_4Z_PSEUDO: |
1676 | return expandMultiVecPseudo( |
1677 | MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, |
1678 | AArch64::LDNT1W_4Z, AArch64::LDNT1W_4Z_STRIDED); |
1679 | case AArch64::LDNT1D_4Z_PSEUDO: |
1680 | return expandMultiVecPseudo( |
1681 | MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, |
1682 | AArch64::LDNT1D_4Z, AArch64::LDNT1D_4Z_STRIDED); |
1683 | } |
1684 | return false; |
1685 | } |
1686 | |
1687 | /// Iterate over the instructions in basic block MBB and expand any |
1688 | /// pseudo instructions. Return true if anything was modified. |
1689 | bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { |
1690 | bool Modified = false; |
1691 | |
1692 | MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
1693 | while (MBBI != E) { |
1694 | MachineBasicBlock::iterator NMBBI = std::next(x: MBBI); |
1695 | Modified |= expandMI(MBB, MBBI, NextMBBI&: NMBBI); |
1696 | MBBI = NMBBI; |
1697 | } |
1698 | |
1699 | return Modified; |
1700 | } |
1701 | |
1702 | bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { |
1703 | TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); |
1704 | |
1705 | bool Modified = false; |
1706 | for (auto &MBB : MF) |
1707 | Modified |= expandMBB(MBB); |
1708 | return Modified; |
1709 | } |
1710 | |
1711 | /// Returns an instance of the pseudo instruction expansion pass. |
1712 | FunctionPass *llvm::createAArch64ExpandPseudoPass() { |
1713 | return new AArch64ExpandPseudo(); |
1714 | } |
1715 | |