1 | //===- AArch64LowerHomogeneousPrologEpilog.cpp ----------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains a pass that lowers homogeneous prolog/epilog instructions. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "AArch64InstrInfo.h" |
14 | #include "AArch64Subtarget.h" |
15 | #include "MCTargetDesc/AArch64InstPrinter.h" |
16 | #include "Utils/AArch64BaseInfo.h" |
17 | #include "llvm/CodeGen/MachineBasicBlock.h" |
18 | #include "llvm/CodeGen/MachineFunction.h" |
19 | #include "llvm/CodeGen/MachineFunctionPass.h" |
20 | #include "llvm/CodeGen/MachineInstr.h" |
21 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
22 | #include "llvm/CodeGen/MachineModuleInfo.h" |
23 | #include "llvm/CodeGen/MachineOperand.h" |
24 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
25 | #include "llvm/IR/DebugLoc.h" |
26 | #include "llvm/IR/IRBuilder.h" |
27 | #include "llvm/Pass.h" |
28 | #include "llvm/Support/raw_ostream.h" |
29 | #include <optional> |
30 | #include <sstream> |
31 | |
32 | using namespace llvm; |
33 | |
34 | #define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME \ |
35 | "AArch64 homogeneous prolog/epilog lowering pass" |
36 | |
37 | cl::opt<int> FrameHelperSizeThreshold( |
38 | "frame-helper-size-threshold" , cl::init(Val: 2), cl::Hidden, |
39 | cl::desc("The minimum number of instructions that are outlined in a frame " |
40 | "helper (default = 2)" )); |
41 | |
42 | namespace { |
43 | |
44 | class AArch64LowerHomogeneousPE { |
45 | public: |
46 | const AArch64InstrInfo *TII; |
47 | |
48 | AArch64LowerHomogeneousPE(Module *M, MachineModuleInfo *MMI) |
49 | : M(M), MMI(MMI) {} |
50 | |
51 | bool run(); |
52 | bool runOnMachineFunction(MachineFunction &Fn); |
53 | |
54 | private: |
55 | Module *M; |
56 | MachineModuleInfo *MMI; |
57 | |
58 | bool runOnMBB(MachineBasicBlock &MBB); |
59 | bool runOnMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
60 | MachineBasicBlock::iterator &NextMBBI); |
61 | |
62 | /// Lower a HOM_Prolog pseudo instruction into a helper call |
63 | /// or a sequence of homogeneous stores. |
64 | /// When a fp setup follows, it can be optimized. |
65 | bool lowerProlog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
66 | MachineBasicBlock::iterator &NextMBBI); |
67 | /// Lower a HOM_Epilog pseudo instruction into a helper call |
68 | /// or a sequence of homogeneous loads. |
69 | /// When a return follow, it can be optimized. |
70 | bool lowerEpilog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
71 | MachineBasicBlock::iterator &NextMBBI); |
72 | }; |
73 | |
74 | class AArch64LowerHomogeneousPrologEpilog : public ModulePass { |
75 | public: |
76 | static char ID; |
77 | |
78 | AArch64LowerHomogeneousPrologEpilog() : ModulePass(ID) { |
79 | initializeAArch64LowerHomogeneousPrologEpilogPass( |
80 | *PassRegistry::getPassRegistry()); |
81 | } |
82 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
83 | AU.addRequired<MachineModuleInfoWrapperPass>(); |
84 | AU.addPreserved<MachineModuleInfoWrapperPass>(); |
85 | AU.setPreservesAll(); |
86 | ModulePass::getAnalysisUsage(AU); |
87 | } |
88 | bool runOnModule(Module &M) override; |
89 | |
90 | StringRef getPassName() const override { |
91 | return AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME; |
92 | } |
93 | }; |
94 | |
95 | } // end anonymous namespace |
96 | |
97 | char AArch64LowerHomogeneousPrologEpilog::ID = 0; |
98 | |
99 | INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilog, |
100 | "aarch64-lower-homogeneous-prolog-epilog" , |
101 | AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false) |
102 | |
103 | bool AArch64LowerHomogeneousPrologEpilog::runOnModule(Module &M) { |
104 | if (skipModule(M)) |
105 | return false; |
106 | |
107 | MachineModuleInfo *MMI = |
108 | &getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); |
109 | return AArch64LowerHomogeneousPE(&M, MMI).run(); |
110 | } |
111 | |
112 | bool AArch64LowerHomogeneousPE::run() { |
113 | bool Changed = false; |
114 | for (auto &F : *M) { |
115 | if (F.empty()) |
116 | continue; |
117 | |
118 | MachineFunction *MF = MMI->getMachineFunction(F); |
119 | if (!MF) |
120 | continue; |
121 | Changed |= runOnMachineFunction(Fn&: *MF); |
122 | } |
123 | |
124 | return Changed; |
125 | } |
126 | enum FrameHelperType { Prolog, PrologFrame, Epilog, EpilogTail }; |
127 | |
128 | /// Return a frame helper name with the given CSRs and the helper type. |
129 | /// For instance, a prolog helper that saves x19 and x20 is named as |
130 | /// OUTLINED_FUNCTION_PROLOG_x19x20. |
131 | static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs, |
132 | FrameHelperType Type, unsigned FpOffset) { |
133 | std::ostringstream RegStream; |
134 | switch (Type) { |
135 | case FrameHelperType::Prolog: |
136 | RegStream << "OUTLINED_FUNCTION_PROLOG_" ; |
137 | break; |
138 | case FrameHelperType::PrologFrame: |
139 | RegStream << "OUTLINED_FUNCTION_PROLOG_FRAME" << FpOffset << "_" ; |
140 | break; |
141 | case FrameHelperType::Epilog: |
142 | RegStream << "OUTLINED_FUNCTION_EPILOG_" ; |
143 | break; |
144 | case FrameHelperType::EpilogTail: |
145 | RegStream << "OUTLINED_FUNCTION_EPILOG_TAIL_" ; |
146 | break; |
147 | } |
148 | |
149 | for (auto Reg : Regs) { |
150 | if (Reg == AArch64::NoRegister) |
151 | continue; |
152 | RegStream << AArch64InstPrinter::getRegisterName(Reg); |
153 | } |
154 | |
155 | return RegStream.str(); |
156 | } |
157 | |
158 | /// Create a Function for the unique frame helper with the given name. |
159 | /// Return a newly created MachineFunction with an empty MachineBasicBlock. |
160 | static MachineFunction &createFrameHelperMachineFunction(Module *M, |
161 | MachineModuleInfo *MMI, |
162 | StringRef Name) { |
163 | LLVMContext &C = M->getContext(); |
164 | Function *F = M->getFunction(Name); |
165 | assert(F == nullptr && "Function has been created before" ); |
166 | F = Function::Create(Ty: FunctionType::get(Result: Type::getVoidTy(C), isVarArg: false), |
167 | Linkage: Function::ExternalLinkage, N: Name, M); |
168 | assert(F && "Function was null!" ); |
169 | |
170 | // Use ODR linkage to avoid duplication. |
171 | F->setLinkage(GlobalValue::LinkOnceODRLinkage); |
172 | F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); |
173 | |
174 | // Set no-opt/minsize, so we don't insert padding between outlined |
175 | // functions. |
176 | F->addFnAttr(Attribute::OptimizeNone); |
177 | F->addFnAttr(Attribute::NoInline); |
178 | F->addFnAttr(Attribute::MinSize); |
179 | F->addFnAttr(Attribute::Kind: Naked); |
180 | |
181 | MachineFunction &MF = MMI->getOrCreateMachineFunction(F&: *F); |
182 | // Remove unnecessary register liveness and set NoVRegs. |
183 | MF.getProperties().reset(P: MachineFunctionProperties::Property::TracksLiveness); |
184 | MF.getProperties().reset(P: MachineFunctionProperties::Property::IsSSA); |
185 | MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); |
186 | MF.getRegInfo().freezeReservedRegs(); |
187 | |
188 | // Create entry block. |
189 | BasicBlock *EntryBB = BasicBlock::Create(Context&: C, Name: "entry" , Parent: F); |
190 | IRBuilder<> Builder(EntryBB); |
191 | Builder.CreateRetVoid(); |
192 | |
193 | // Insert the new block into the function. |
194 | MachineBasicBlock *MBB = MF.CreateMachineBasicBlock(); |
195 | MF.insert(MBBI: MF.begin(), MBB); |
196 | |
197 | return MF; |
198 | } |
199 | |
200 | /// Emit a store-pair instruction for frame-setup. |
201 | /// If Reg2 is AArch64::NoRegister, emit STR instead. |
202 | static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, |
203 | MachineBasicBlock::iterator Pos, |
204 | const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, |
205 | int Offset, bool IsPreDec) { |
206 | assert(Reg1 != AArch64::NoRegister); |
207 | const bool IsPaired = Reg2 != AArch64::NoRegister; |
208 | bool IsFloat = AArch64::FPR64RegClass.contains(Reg1); |
209 | assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2))); |
210 | unsigned Opc; |
211 | if (IsPreDec) { |
212 | if (IsFloat) |
213 | Opc = IsPaired ? AArch64::STPDpre : AArch64::STRDpre; |
214 | else |
215 | Opc = IsPaired ? AArch64::STPXpre : AArch64::STRXpre; |
216 | } else { |
217 | if (IsFloat) |
218 | Opc = IsPaired ? AArch64::STPDi : AArch64::STRDui; |
219 | else |
220 | Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui; |
221 | } |
222 | // The implicit scale for Offset is 8. |
223 | TypeSize Scale(0U, false), Width(0U, false); |
224 | int64_t MinOffset, MaxOffset; |
225 | [[maybe_unused]] bool Success = |
226 | AArch64InstrInfo::getMemOpInfo(Opcode: Opc, Scale, Width, MinOffset, MaxOffset); |
227 | assert(Success && "Invalid Opcode" ); |
228 | Offset *= (8 / (int)Scale); |
229 | |
230 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: Pos, MIMD: DebugLoc(), MCID: TII.get(Opcode: Opc)); |
231 | if (IsPreDec) |
232 | MIB.addDef(AArch64::RegNo: SP); |
233 | if (IsPaired) |
234 | MIB.addReg(RegNo: Reg2); |
235 | MIB.addReg(RegNo: Reg1) |
236 | .addReg(AArch64::RegNo: SP) |
237 | .addImm(Offset) |
238 | .setMIFlag(MachineInstr::FrameSetup); |
239 | } |
240 | |
241 | /// Emit a load-pair instruction for frame-destroy. |
242 | /// If Reg2 is AArch64::NoRegister, emit LDR instead. |
243 | static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, |
244 | MachineBasicBlock::iterator Pos, |
245 | const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, |
246 | int Offset, bool IsPostDec) { |
247 | assert(Reg1 != AArch64::NoRegister); |
248 | const bool IsPaired = Reg2 != AArch64::NoRegister; |
249 | bool IsFloat = AArch64::FPR64RegClass.contains(Reg1); |
250 | assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2))); |
251 | unsigned Opc; |
252 | if (IsPostDec) { |
253 | if (IsFloat) |
254 | Opc = IsPaired ? AArch64::LDPDpost : AArch64::LDRDpost; |
255 | else |
256 | Opc = IsPaired ? AArch64::LDPXpost : AArch64::LDRXpost; |
257 | } else { |
258 | if (IsFloat) |
259 | Opc = IsPaired ? AArch64::LDPDi : AArch64::LDRDui; |
260 | else |
261 | Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui; |
262 | } |
263 | // The implicit scale for Offset is 8. |
264 | TypeSize Scale(0U, false), Width(0U, false); |
265 | int64_t MinOffset, MaxOffset; |
266 | [[maybe_unused]] bool Success = |
267 | AArch64InstrInfo::getMemOpInfo(Opcode: Opc, Scale, Width, MinOffset, MaxOffset); |
268 | assert(Success && "Invalid Opcode" ); |
269 | Offset *= (8 / (int)Scale); |
270 | |
271 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: Pos, MIMD: DebugLoc(), MCID: TII.get(Opcode: Opc)); |
272 | if (IsPostDec) |
273 | MIB.addDef(AArch64::RegNo: SP); |
274 | if (IsPaired) |
275 | MIB.addReg(RegNo: Reg2, flags: getDefRegState(B: true)); |
276 | MIB.addReg(RegNo: Reg1, flags: getDefRegState(B: true)) |
277 | .addReg(AArch64::RegNo: SP) |
278 | .addImm(Offset) |
279 | .setMIFlag(MachineInstr::FrameDestroy); |
280 | } |
281 | |
282 | /// Return a unique function if a helper can be formed with the given Regs |
283 | /// and frame type. |
284 | /// 1) _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22: |
285 | /// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller |
286 | /// stp x20, x19, [sp, #16] |
287 | /// ret |
288 | /// |
289 | /// 2) _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22: |
290 | /// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller |
291 | /// stp x20, x19, [sp, #16] |
292 | /// add fp, sp, #32 |
293 | /// ret |
294 | /// |
295 | /// 3) _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22: |
296 | /// mov x16, x30 |
297 | /// ldp x29, x30, [sp, #32] |
298 | /// ldp x20, x19, [sp, #16] |
299 | /// ldp x22, x21, [sp], #48 |
300 | /// ret x16 |
301 | /// |
302 | /// 4) _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22: |
303 | /// ldp x29, x30, [sp, #32] |
304 | /// ldp x20, x19, [sp, #16] |
305 | /// ldp x22, x21, [sp], #48 |
306 | /// ret |
307 | /// @param M module |
308 | /// @param MMI machine module info |
309 | /// @param Regs callee save regs that the helper will handle |
310 | /// @param Type frame helper type |
311 | /// @return a helper function |
312 | static Function *getOrCreateFrameHelper(Module *M, MachineModuleInfo *MMI, |
313 | SmallVectorImpl<unsigned> &Regs, |
314 | FrameHelperType Type, |
315 | unsigned FpOffset = 0) { |
316 | assert(Regs.size() >= 2); |
317 | auto Name = getFrameHelperName(Regs, Type, FpOffset); |
318 | auto *F = M->getFunction(Name); |
319 | if (F) |
320 | return F; |
321 | |
322 | auto &MF = createFrameHelperMachineFunction(M, MMI, Name); |
323 | MachineBasicBlock &MBB = *MF.begin(); |
324 | const TargetSubtargetInfo &STI = MF.getSubtarget(); |
325 | const TargetInstrInfo &TII = *STI.getInstrInfo(); |
326 | |
327 | int Size = (int)Regs.size(); |
328 | switch (Type) { |
329 | case FrameHelperType::Prolog: |
330 | case FrameHelperType::PrologFrame: { |
331 | // Compute the remaining SP adjust beyond FP/LR. |
332 | auto LRIdx = std::distance(Regs.begin(), llvm::find(Regs, AArch64::LR)); |
333 | |
334 | // If the register stored to the lowest address is not LR, we must subtract |
335 | // more from SP here. |
336 | if (LRIdx != Size - 2) { |
337 | assert(Regs[Size - 2] != AArch64::LR); |
338 | emitStore(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], |
339 | LRIdx - Size + 2, true); |
340 | } |
341 | |
342 | // Store CSRs in the reverse order. |
343 | for (int I = Size - 3; I >= 0; I -= 2) { |
344 | // FP/LR has been stored at call-site. |
345 | if (Regs[I - 1] == AArch64::LR) |
346 | continue; |
347 | emitStore(MF, MBB, Pos: MBB.end(), TII, Reg1: Regs[I - 1], Reg2: Regs[I], Offset: Size - I - 1, |
348 | IsPreDec: false); |
349 | } |
350 | if (Type == FrameHelperType::PrologFrame) |
351 | BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::Opcode: ADDXri)) |
352 | .addDef(AArch64::FP) |
353 | .addUse(AArch64::SP) |
354 | .addImm(FpOffset) |
355 | .addImm(0) |
356 | .setMIFlag(MachineInstr::FrameSetup); |
357 | |
358 | BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::Opcode: RET)) |
359 | .addReg(AArch64::LR); |
360 | break; |
361 | } |
362 | case FrameHelperType::Epilog: |
363 | case FrameHelperType::EpilogTail: |
364 | if (Type == FrameHelperType::Epilog) |
365 | // Stash LR to X16 |
366 | BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ORRXrs)) |
367 | .addDef(AArch64::X16) |
368 | .addReg(AArch64::XZR) |
369 | .addUse(AArch64::LR) |
370 | .addImm(0); |
371 | |
372 | for (int I = 0; I < Size - 2; I += 2) |
373 | emitLoad(MF, MBB, Pos: MBB.end(), TII, Reg1: Regs[I], Reg2: Regs[I + 1], Offset: Size - I - 2, |
374 | IsPostDec: false); |
375 | // Restore the last CSR with post-increment of SP. |
376 | emitLoad(MF, MBB, Pos: MBB.end(), TII, Reg1: Regs[Size - 2], Reg2: Regs[Size - 1], Offset: Size, |
377 | IsPostDec: true); |
378 | |
379 | BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET)) |
380 | .addReg(Type == FrameHelperType::Epilog ? AArch64::X16 : AArch64::LR); |
381 | break; |
382 | } |
383 | |
384 | return M->getFunction(Name); |
385 | } |
386 | |
387 | /// This function checks if a frame helper should be used for |
388 | /// HOM_Prolog/HOM_Epilog pseudo instruction expansion. |
389 | /// @param MBB machine basic block |
390 | /// @param NextMBBI next instruction following HOM_Prolog/HOM_Epilog |
391 | /// @param Regs callee save registers that are saved or restored. |
392 | /// @param Type frame helper type |
393 | /// @return True if a use of helper is qualified. |
394 | static bool shouldUseFrameHelper(MachineBasicBlock &MBB, |
395 | MachineBasicBlock::iterator &NextMBBI, |
396 | SmallVectorImpl<unsigned> &Regs, |
397 | FrameHelperType Type) { |
398 | const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); |
399 | auto RegCount = Regs.size(); |
400 | assert(RegCount > 0 && (RegCount % 2 == 0)); |
401 | // # of instructions that will be outlined. |
402 | int InstCount = RegCount / 2; |
403 | |
404 | // Do not use a helper call when not saving LR. |
405 | if (!llvm::is_contained(Regs, AArch64::LR)) |
406 | return false; |
407 | |
408 | switch (Type) { |
409 | case FrameHelperType::Prolog: |
410 | // Prolog helper cannot save FP/LR. |
411 | InstCount--; |
412 | break; |
413 | case FrameHelperType::PrologFrame: { |
414 | // Effecitvely no change in InstCount since FpAdjusment is included. |
415 | break; |
416 | } |
417 | case FrameHelperType::Epilog: |
418 | // Bail-out if X16 is live across the epilog helper because it is used in |
419 | // the helper to handle X30. |
420 | for (auto NextMI = NextMBBI; NextMI != MBB.end(); NextMI++) { |
421 | if (NextMI->readsRegister(AArch64::W16, TRI)) |
422 | return false; |
423 | } |
424 | // Epilog may not be in the last block. Check the liveness in successors. |
425 | for (const MachineBasicBlock *SuccMBB : MBB.successors()) { |
426 | if (SuccMBB->isLiveIn(AArch64::W16) || SuccMBB->isLiveIn(AArch64::X16)) |
427 | return false; |
428 | } |
429 | // No change in InstCount for the regular epilog case. |
430 | break; |
431 | case FrameHelperType::EpilogTail: { |
432 | // EpilogTail helper includes the caller's return. |
433 | if (NextMBBI == MBB.end()) |
434 | return false; |
435 | if (NextMBBI->getOpcode() != AArch64::RET_ReallyLR) |
436 | return false; |
437 | InstCount++; |
438 | break; |
439 | } |
440 | } |
441 | |
442 | return InstCount >= FrameHelperSizeThreshold; |
443 | } |
444 | |
445 | /// Lower a HOM_Epilog pseudo instruction into a helper call while |
446 | /// creating the helper on demand. Or emit a sequence of loads in place when not |
447 | /// using a helper call. |
448 | /// |
449 | /// 1. With a helper including ret |
450 | /// HOM_Epilog x30, x29, x19, x20, x21, x22 ; MBBI |
451 | /// ret ; NextMBBI |
452 | /// => |
453 | /// b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22 |
454 | /// ... ; NextMBBI |
455 | /// |
456 | /// 2. With a helper |
457 | /// HOM_Epilog x30, x29, x19, x20, x21, x22 |
458 | /// => |
459 | /// bl _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22 |
460 | /// |
461 | /// 3. Without a helper |
462 | /// HOM_Epilog x30, x29, x19, x20, x21, x22 |
463 | /// => |
464 | /// ldp x29, x30, [sp, #32] |
465 | /// ldp x20, x19, [sp, #16] |
466 | /// ldp x22, x21, [sp], #48 |
467 | bool AArch64LowerHomogeneousPE::lowerEpilog( |
468 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
469 | MachineBasicBlock::iterator &NextMBBI) { |
470 | auto &MF = *MBB.getParent(); |
471 | MachineInstr &MI = *MBBI; |
472 | |
473 | DebugLoc DL = MI.getDebugLoc(); |
474 | SmallVector<unsigned, 8> Regs; |
475 | bool HasUnpairedReg = false; |
476 | for (auto &MO : MI.operands()) |
477 | if (MO.isReg()) { |
478 | if (!MO.getReg().isValid()) { |
479 | // For now we are only expecting unpaired GP registers which should |
480 | // occur exactly once. |
481 | assert(!HasUnpairedReg); |
482 | HasUnpairedReg = true; |
483 | } |
484 | Regs.push_back(Elt: MO.getReg()); |
485 | } |
486 | (void)HasUnpairedReg; |
487 | int Size = (int)Regs.size(); |
488 | if (Size == 0) |
489 | return false; |
490 | // Registers are in pair. |
491 | assert(Size % 2 == 0); |
492 | assert(MI.getOpcode() == AArch64::HOM_Epilog); |
493 | |
494 | auto Return = NextMBBI; |
495 | if (shouldUseFrameHelper(MBB, NextMBBI, Regs, Type: FrameHelperType::EpilogTail)) { |
496 | // When MBB ends with a return, emit a tail-call to the epilog helper |
497 | auto *EpilogTailHelper = |
498 | getOrCreateFrameHelper(M, MMI, Regs, Type: FrameHelperType::EpilogTail); |
499 | BuildMI(MBB, MBBI, DL, TII->get(AArch64::TCRETURNdi)) |
500 | .addGlobalAddress(EpilogTailHelper) |
501 | .addImm(0) |
502 | .setMIFlag(MachineInstr::FrameDestroy) |
503 | .copyImplicitOps(MI) |
504 | .copyImplicitOps(*Return); |
505 | NextMBBI = std::next(x: Return); |
506 | Return->removeFromParent(); |
507 | } else if (shouldUseFrameHelper(MBB, NextMBBI, Regs, |
508 | Type: FrameHelperType::Epilog)) { |
509 | // The default epilog helper case. |
510 | auto *EpilogHelper = |
511 | getOrCreateFrameHelper(M, MMI, Regs, Type: FrameHelperType::Epilog); |
512 | BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) |
513 | .addGlobalAddress(EpilogHelper) |
514 | .setMIFlag(MachineInstr::FrameDestroy) |
515 | .copyImplicitOps(MI); |
516 | } else { |
517 | // Fall back to no-helper. |
518 | for (int I = 0; I < Size - 2; I += 2) |
519 | emitLoad(MF, MBB, MBBI, *TII, Regs[I], Regs[I + 1], Size - I - 2, false); |
520 | // Restore the last CSR with post-increment of SP. |
521 | emitLoad(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], Size, true); |
522 | } |
523 | |
524 | MBBI->removeFromParent(); |
525 | return true; |
526 | } |
527 | |
528 | /// Lower a HOM_Prolog pseudo instruction into a helper call while |
529 | /// creating the helper on demand. Or emit a sequence of stores in place when |
530 | /// not using a helper call. |
531 | /// |
532 | /// 1. With a helper including frame-setup |
533 | /// HOM_Prolog x30, x29, x19, x20, x21, x22, 32 |
534 | /// => |
535 | /// stp x29, x30, [sp, #-16]! |
536 | /// bl _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22 |
537 | /// |
538 | /// 2. With a helper |
539 | /// HOM_Prolog x30, x29, x19, x20, x21, x22 |
540 | /// => |
541 | /// stp x29, x30, [sp, #-16]! |
542 | /// bl _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22 |
543 | /// |
544 | /// 3. Without a helper |
545 | /// HOM_Prolog x30, x29, x19, x20, x21, x22 |
546 | /// => |
547 | /// stp x22, x21, [sp, #-48]! |
548 | /// stp x20, x19, [sp, #16] |
549 | /// stp x29, x30, [sp, #32] |
550 | bool AArch64LowerHomogeneousPE::lowerProlog( |
551 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
552 | MachineBasicBlock::iterator &NextMBBI) { |
553 | auto &MF = *MBB.getParent(); |
554 | MachineInstr &MI = *MBBI; |
555 | |
556 | DebugLoc DL = MI.getDebugLoc(); |
557 | SmallVector<unsigned, 8> Regs; |
558 | bool HasUnpairedReg = false; |
559 | int LRIdx = 0; |
560 | std::optional<int> FpOffset; |
561 | for (auto &MO : MI.operands()) { |
562 | if (MO.isReg()) { |
563 | if (MO.getReg().isValid()) { |
564 | if (MO.getReg() == AArch64::LR) |
565 | LRIdx = Regs.size(); |
566 | } else { |
567 | // For now we are only expecting unpaired GP registers which should |
568 | // occur exactly once. |
569 | assert(!HasUnpairedReg); |
570 | HasUnpairedReg = true; |
571 | } |
572 | Regs.push_back(Elt: MO.getReg()); |
573 | } else if (MO.isImm()) { |
574 | FpOffset = MO.getImm(); |
575 | } |
576 | } |
577 | (void)HasUnpairedReg; |
578 | int Size = (int)Regs.size(); |
579 | if (Size == 0) |
580 | return false; |
581 | // Allow compact unwind case only for oww. |
582 | assert(Size % 2 == 0); |
583 | assert(MI.getOpcode() == AArch64::HOM_Prolog); |
584 | |
585 | if (FpOffset && |
586 | shouldUseFrameHelper(MBB, NextMBBI, Regs, Type: FrameHelperType::PrologFrame)) { |
587 | // FP/LR is stored at the top of stack before the prolog helper call. |
588 | emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true); |
589 | auto *PrologFrameHelper = getOrCreateFrameHelper( |
590 | M, MMI, Regs, Type: FrameHelperType::PrologFrame, FpOffset: *FpOffset); |
591 | BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) |
592 | .addGlobalAddress(PrologFrameHelper) |
593 | .setMIFlag(MachineInstr::FrameSetup) |
594 | .copyImplicitOps(MI) |
595 | .addReg(AArch64::FP, RegState::Implicit | RegState::Define) |
596 | .addReg(AArch64::SP, RegState::Implicit); |
597 | } else if (!FpOffset && shouldUseFrameHelper(MBB, NextMBBI, Regs, |
598 | Type: FrameHelperType::Prolog)) { |
599 | // FP/LR is stored at the top of stack before the prolog helper call. |
600 | emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true); |
601 | auto *PrologHelper = |
602 | getOrCreateFrameHelper(M, MMI, Regs, Type: FrameHelperType::Prolog); |
603 | BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) |
604 | .addGlobalAddress(PrologHelper) |
605 | .setMIFlag(MachineInstr::FrameSetup) |
606 | .copyImplicitOps(MI); |
607 | } else { |
608 | // Fall back to no-helper. |
609 | emitStore(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], -Size, true); |
610 | for (int I = Size - 3; I >= 0; I -= 2) |
611 | emitStore(MF, MBB, MBBI, *TII, Regs[I - 1], Regs[I], Size - I - 1, false); |
612 | if (FpOffset) { |
613 | BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri)) |
614 | .addDef(AArch64::FP) |
615 | .addUse(AArch64::SP) |
616 | .addImm(*FpOffset) |
617 | .addImm(0) |
618 | .setMIFlag(MachineInstr::FrameSetup); |
619 | } |
620 | } |
621 | |
622 | MBBI->removeFromParent(); |
623 | return true; |
624 | } |
625 | |
626 | /// Process each machine instruction |
627 | /// @param MBB machine basic block |
628 | /// @param MBBI current instruction iterator |
629 | /// @param NextMBBI next instruction iterator which can be updated |
630 | /// @return True when IR is changed. |
631 | bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB, |
632 | MachineBasicBlock::iterator MBBI, |
633 | MachineBasicBlock::iterator &NextMBBI) { |
634 | MachineInstr &MI = *MBBI; |
635 | unsigned Opcode = MI.getOpcode(); |
636 | switch (Opcode) { |
637 | default: |
638 | break; |
639 | case AArch64::HOM_Prolog: |
640 | return lowerProlog(MBB, MBBI, NextMBBI); |
641 | case AArch64::HOM_Epilog: |
642 | return lowerEpilog(MBB, MBBI, NextMBBI); |
643 | } |
644 | return false; |
645 | } |
646 | |
647 | bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) { |
648 | bool Modified = false; |
649 | |
650 | MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
651 | while (MBBI != E) { |
652 | MachineBasicBlock::iterator NMBBI = std::next(x: MBBI); |
653 | Modified |= runOnMI(MBB, MBBI, NextMBBI&: NMBBI); |
654 | MBBI = NMBBI; |
655 | } |
656 | |
657 | return Modified; |
658 | } |
659 | |
660 | bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) { |
661 | TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); |
662 | |
663 | bool Modified = false; |
664 | for (auto &MBB : MF) |
665 | Modified |= runOnMBB(MBB); |
666 | return Modified; |
667 | } |
668 | |
669 | ModulePass *llvm::createAArch64LowerHomogeneousPrologEpilogPass() { |
670 | return new AArch64LowerHomogeneousPrologEpilog(); |
671 | } |
672 | |