1//===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the ARM implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12//
13// This file contains the ARM implementation of TargetFrameLowering class.
14//
15// On ARM, stack frames are structured as follows:
16//
17// The stack grows downward.
18//
19// All of the individual frame areas on the frame below are optional, i.e. it's
20// possible to create a function so that the particular area isn't present
21// in the frame.
22//
23// At function entry, the "frame" looks as follows:
24//
25// | | Higher address
26// |-----------------------------------|
27// | |
28// | arguments passed on the stack |
29// | |
30// |-----------------------------------| <- sp
31// | | Lower address
32//
33//
34// After the prologue has run, the frame has the following general structure.
35// Technically the last frame area (VLAs) doesn't get created until in the
36// main function body, after the prologue is run. However, it's depicted here
37// for completeness.
38//
39// | | Higher address
40// |-----------------------------------|
41// | |
42// | arguments passed on the stack |
43// | |
44// |-----------------------------------| <- (sp at function entry)
45// | |
46// | varargs from registers |
47// | |
48// |-----------------------------------|
49// | |
50// | prev_lr |
51// | prev_fp |
52// | (a.k.a. "frame record") |
53// | |
54// |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11)
55// | |
56// | callee-saved gpr registers |
57// | |
58// |-----------------------------------|
59// | |
60// | callee-saved fp/simd regs |
61// | |
62// |-----------------------------------|
63// |.empty.space.to.make.part.below....|
64// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
65// |.the.standard.8-byte.alignment.....| compile time; if present)
66// |-----------------------------------|
67// | |
68// | local variables of fixed size |
69// | including spill slots |
70// |-----------------------------------| <- base pointer (not defined by ABI,
71// |.variable-sized.local.variables....| LLVM chooses r6)
72// |.(VLAs)............................| (size of this area is unknown at
73// |...................................| compile time)
74// |-----------------------------------| <- sp
75// | | Lower address
76//
77//
78// To access the data in a frame, at-compile time, a constant offset must be
79// computable from one of the pointers (fp, bp, sp) to access it. The size
80// of the areas with a dotted background cannot be computed at compile-time
81// if they are present, making it required to have all three of fp, bp and
82// sp to be set up to be able to access all contents in the frame areas,
83// assuming all of the frame areas are non-empty.
84//
85// For most functions, some of the frame areas are empty. For those functions,
86// it may not be necessary to set up fp or bp:
87// * A base pointer is definitely needed when there are both VLAs and local
88// variables with more-than-default alignment requirements.
89// * A frame pointer is definitely needed when there are local variables with
90// more-than-default alignment requirements.
91//
92// In some cases when a base pointer is not strictly needed, it is generated
93// anyway when offsets from the frame pointer to access local variables become
94// so large that the offset can't be encoded in the immediate fields of loads
95// or stores.
96//
97// The frame pointer might be chosen to be r7 or r11, depending on the target
98// architecture and operating system. See ARMSubtarget::getFramePointerReg for
99// details.
100//
101// Outgoing function arguments must be at the bottom of the stack frame when
102// calling another function. If we do not have variable-sized stack objects, we
103// can allocate a "reserved call frame" area at the bottom of the local
104// variable area, large enough for all outgoing calls. If we do have VLAs, then
105// the stack pointer must be decremented and incremented around each call to
106// make space for the arguments below the VLAs.
107//
108//===----------------------------------------------------------------------===//
109
110#include "ARMFrameLowering.h"
111#include "ARMBaseInstrInfo.h"
112#include "ARMBaseRegisterInfo.h"
113#include "ARMConstantPoolValue.h"
114#include "ARMMachineFunctionInfo.h"
115#include "ARMSubtarget.h"
116#include "MCTargetDesc/ARMAddressingModes.h"
117#include "MCTargetDesc/ARMBaseInfo.h"
118#include "Utils/ARMBaseInfo.h"
119#include "llvm/ADT/BitVector.h"
120#include "llvm/ADT/STLExtras.h"
121#include "llvm/ADT/SmallPtrSet.h"
122#include "llvm/ADT/SmallVector.h"
123#include "llvm/CodeGen/MachineBasicBlock.h"
124#include "llvm/CodeGen/MachineConstantPool.h"
125#include "llvm/CodeGen/MachineFrameInfo.h"
126#include "llvm/CodeGen/MachineFunction.h"
127#include "llvm/CodeGen/MachineInstr.h"
128#include "llvm/CodeGen/MachineInstrBuilder.h"
129#include "llvm/CodeGen/MachineJumpTableInfo.h"
130#include "llvm/CodeGen/MachineModuleInfo.h"
131#include "llvm/CodeGen/MachineOperand.h"
132#include "llvm/CodeGen/MachineRegisterInfo.h"
133#include "llvm/CodeGen/RegisterScavenging.h"
134#include "llvm/CodeGen/TargetInstrInfo.h"
135#include "llvm/CodeGen/TargetOpcodes.h"
136#include "llvm/CodeGen/TargetRegisterInfo.h"
137#include "llvm/CodeGen/TargetSubtargetInfo.h"
138#include "llvm/IR/Attributes.h"
139#include "llvm/IR/CallingConv.h"
140#include "llvm/IR/DebugLoc.h"
141#include "llvm/IR/Function.h"
142#include "llvm/MC/MCAsmInfo.h"
143#include "llvm/MC/MCContext.h"
144#include "llvm/MC/MCDwarf.h"
145#include "llvm/MC/MCInstrDesc.h"
146#include "llvm/MC/MCRegisterInfo.h"
147#include "llvm/Support/CodeGen.h"
148#include "llvm/Support/CommandLine.h"
149#include "llvm/Support/Compiler.h"
150#include "llvm/Support/Debug.h"
151#include "llvm/Support/ErrorHandling.h"
152#include "llvm/Support/MathExtras.h"
153#include "llvm/Support/raw_ostream.h"
154#include "llvm/Target/TargetMachine.h"
155#include "llvm/Target/TargetOptions.h"
156#include <algorithm>
157#include <cassert>
158#include <cstddef>
159#include <cstdint>
160#include <iterator>
161#include <utility>
162#include <vector>
163
164#define DEBUG_TYPE "arm-frame-lowering"
165
166using namespace llvm;
167
168static cl::opt<bool>
169SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(Val: true),
170 cl::desc("Align ARM NEON spills in prolog and epilog"));
171
172static MachineBasicBlock::iterator
173skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
174 unsigned NumAlignedDPRCS2Regs);
175
176ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
177 : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)),
178 STI(sti) {}
179
180bool ARMFrameLowering::keepFramePointer(const MachineFunction &MF) const {
181 // iOS always has a FP for backtracking, force other targets to keep their FP
182 // when doing FastISel. The emitted code is currently superior, and in cases
183 // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
184 return MF.getSubtarget<ARMSubtarget>().useFastISel();
185}
186
187/// Returns true if the target can safely skip saving callee-saved registers
188/// for noreturn nounwind functions.
189bool ARMFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const {
190 assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
191 MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
192 !MF.getFunction().hasFnAttribute(Attribute::UWTable));
193
194 // Frame pointer and link register are not treated as normal CSR, thus we
195 // can always skip CSR saves for nonreturning functions.
196 return true;
197}
198
199/// hasFP - Return true if the specified function should have a dedicated frame
200/// pointer register. This is true if the function has variable sized allocas
201/// or if frame pointer elimination is disabled.
202bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
203 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
204 const MachineFrameInfo &MFI = MF.getFrameInfo();
205
206 // ABI-required frame pointer.
207 if (MF.getTarget().Options.DisableFramePointerElim(MF))
208 return true;
209
210 // Frame pointer required for use within this function.
211 return (RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
212 MFI.isFrameAddressTaken());
213}
214
215/// isFPReserved - Return true if the frame pointer register should be
216/// considered a reserved register on the scope of the specified function.
217bool ARMFrameLowering::isFPReserved(const MachineFunction &MF) const {
218 return hasFP(MF) || MF.getSubtarget<ARMSubtarget>().createAAPCSFrameChain();
219}
220
221/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
222/// not required, we reserve argument space for call sites in the function
223/// immediately on entry to the current function. This eliminates the need for
224/// add/sub sp brackets around call sites. Returns true if the call frame is
225/// included as part of the stack frame.
226bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
227 const MachineFrameInfo &MFI = MF.getFrameInfo();
228 unsigned CFSize = MFI.getMaxCallFrameSize();
229 // It's not always a good idea to include the call frame as part of the
230 // stack frame. ARM (especially Thumb) has small immediate offset to
231 // address the stack frame. So a large call frame can cause poor codegen
232 // and may even makes it impossible to scavenge a register.
233 if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
234 return false;
235
236 return !MFI.hasVarSizedObjects();
237}
238
239/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
240/// call frame pseudos can be simplified. Unlike most targets, having a FP
241/// is not sufficient here since we still may reference some objects via SP
242/// even when FP is available in Thumb2 mode.
243bool
244ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
245 return hasReservedCallFrame(MF) || MF.getFrameInfo().hasVarSizedObjects();
246}
247
248// Returns how much of the incoming argument stack area we should clean up in an
249// epilogue. For the C calling convention this will be 0, for guaranteed tail
250// call conventions it can be positive (a normal return or a tail call to a
251// function that uses less stack space for arguments) or negative (for a tail
252// call to a function that needs more stack space than us for arguments).
253static int getArgumentStackToRestore(MachineFunction &MF,
254 MachineBasicBlock &MBB) {
255 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
256 bool IsTailCallReturn = false;
257 if (MBB.end() != MBBI) {
258 unsigned RetOpcode = MBBI->getOpcode();
259 IsTailCallReturn = RetOpcode == ARM::TCRETURNdi ||
260 RetOpcode == ARM::TCRETURNri;
261 }
262 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
263
264 int ArgumentPopSize = 0;
265 if (IsTailCallReturn) {
266 MachineOperand &StackAdjust = MBBI->getOperand(i: 1);
267
268 // For a tail-call in a callee-pops-arguments environment, some or all of
269 // the stack may actually be in use for the call's arguments, this is
270 // calculated during LowerCall and consumed here...
271 ArgumentPopSize = StackAdjust.getImm();
272 } else {
273 // ... otherwise the amount to pop is *all* of the argument space,
274 // conveniently stored in the MachineFunctionInfo by
275 // LowerFormalArguments. This will, of course, be zero for the C calling
276 // convention.
277 ArgumentPopSize = AFI->getArgumentStackToRestore();
278 }
279
280 return ArgumentPopSize;
281}
282
283static bool needsWinCFI(const MachineFunction &MF) {
284 const Function &F = MF.getFunction();
285 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
286 F.needsUnwindTableEntry();
287}
288
289// Given a load or a store instruction, generate an appropriate unwinding SEH
290// code on Windows.
291static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI,
292 const TargetInstrInfo &TII,
293 unsigned Flags) {
294 unsigned Opc = MBBI->getOpcode();
295 MachineBasicBlock *MBB = MBBI->getParent();
296 MachineFunction &MF = *MBB->getParent();
297 DebugLoc DL = MBBI->getDebugLoc();
298 MachineInstrBuilder MIB;
299 const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
300 const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
301
302 Flags |= MachineInstr::NoMerge;
303
304 switch (Opc) {
305 default:
306 report_fatal_error(reason: "No SEH Opcode for instruction " + TII.getName(Opcode: Opc));
307 break;
308 case ARM::t2ADDri: // add.w r11, sp, #xx
309 case ARM::t2ADDri12: // add.w r11, sp, #xx
310 case ARM::t2MOVTi16: // movt r4, #xx
311 case ARM::tBL: // bl __chkstk
312 // These are harmless if used for just setting up a frame pointer,
313 // but that frame pointer can't be relied upon for unwinding, unless
314 // set up with SEH_SaveSP.
315 MIB = BuildMI(MF, DL, TII.get(ARM::Opcode: SEH_Nop))
316 .addImm(/*Wide=*/1)
317 .setMIFlags(Flags);
318 break;
319
320 case ARM::t2MOVi16: { // mov(w) r4, #xx
321 bool Wide = MBBI->getOperand(i: 1).getImm() >= 256;
322 if (!Wide) {
323 MachineInstrBuilder NewInstr =
324 BuildMI(MF, DL, TII.get(ARM::Opcode: tMOVi8)).setMIFlags(MBBI->getFlags());
325 NewInstr.add(MO: MBBI->getOperand(i: 0));
326 NewInstr.add(MO: t1CondCodeOp(/*isDead=*/true));
327 for (MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MBBI->operands()))
328 NewInstr.add(MO);
329 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(I: MBBI, MI: NewInstr);
330 MBB->erase(I: MBBI);
331 MBBI = NewMBBI;
332 }
333 MIB = BuildMI(MF, DL, TII.get(ARM::Opcode: SEH_Nop)).addImm(Wide).setMIFlags(Flags);
334 break;
335 }
336
337 case ARM::tBLXr: // blx r12 (__chkstk)
338 MIB = BuildMI(MF, DL, TII.get(ARM::Opcode: SEH_Nop))
339 .addImm(/*Wide=*/0)
340 .setMIFlags(Flags);
341 break;
342
343 case ARM::t2MOVi32imm: // movw+movt
344 // This pseudo instruction expands into two mov instructions. If the
345 // second operand is a symbol reference, this will stay as two wide
346 // instructions, movw+movt. If they're immediates, the first one can
347 // end up as a narrow mov though.
348 // As two SEH instructions are appended here, they won't get interleaved
349 // between the two final movw/movt instructions, but it doesn't make any
350 // practical difference.
351 MIB = BuildMI(MF, DL, TII.get(ARM::Opcode: SEH_Nop))
352 .addImm(/*Wide=*/1)
353 .setMIFlags(Flags);
354 MBB->insertAfter(I: MBBI, MI: MIB);
355 MIB = BuildMI(MF, DL, TII.get(ARM::Opcode: SEH_Nop))
356 .addImm(/*Wide=*/1)
357 .setMIFlags(Flags);
358 break;
359
360 case ARM::t2STR_PRE:
361 if (MBBI->getOperand(i: 0).getReg() == ARM::SP &&
362 MBBI->getOperand(i: 2).getReg() == ARM::SP &&
363 MBBI->getOperand(i: 3).getImm() == -4) {
364 unsigned Reg = RegInfo->getSEHRegNum(i: MBBI->getOperand(i: 1).getReg());
365 MIB = BuildMI(MF, DL, TII.get(ARM::Opcode: SEH_SaveRegs))
366 .addImm(1ULL << Reg)
367 .addImm(/*Wide=*/1)
368 .setMIFlags(Flags);
369 } else {
370 report_fatal_error(reason: "No matching SEH Opcode for t2STR_PRE");
371 }
372 break;
373
374 case ARM::t2LDR_POST:
375 if (MBBI->getOperand(i: 1).getReg() == ARM::SP &&
376 MBBI->getOperand(i: 2).getReg() == ARM::SP &&
377 MBBI->getOperand(i: 3).getImm() == 4) {
378 unsigned Reg = RegInfo->getSEHRegNum(i: MBBI->getOperand(i: 0).getReg());
379 MIB = BuildMI(MF, DL, TII.get(ARM::Opcode: SEH_SaveRegs))
380 .addImm(1ULL << Reg)
381 .addImm(/*Wide=*/1)
382 .setMIFlags(Flags);
383 } else {
384 report_fatal_error(reason: "No matching SEH Opcode for t2LDR_POST");
385 }
386 break;
387
388 case ARM::t2LDMIA_RET:
389 case ARM::t2LDMIA_UPD:
390 case ARM::t2STMDB_UPD: {
391 unsigned Mask = 0;
392 bool Wide = false;
393 for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
394 const MachineOperand &MO = MBBI->getOperand(i);
395 if (!MO.isReg() || MO.isImplicit())
396 continue;
397 unsigned Reg = RegInfo->getSEHRegNum(i: MO.getReg());
398 if (Reg == 15)
399 Reg = 14;
400 if (Reg >= 8 && Reg <= 13)
401 Wide = true;
402 else if (Opc == ARM::t2LDMIA_UPD && Reg == 14)
403 Wide = true;
404 Mask |= 1 << Reg;
405 }
406 if (!Wide) {
407 unsigned NewOpc;
408 switch (Opc) {
409 case ARM::t2LDMIA_RET:
410 NewOpc = ARM::tPOP_RET;
411 break;
412 case ARM::t2LDMIA_UPD:
413 NewOpc = ARM::tPOP;
414 break;
415 case ARM::t2STMDB_UPD:
416 NewOpc = ARM::tPUSH;
417 break;
418 default:
419 llvm_unreachable("");
420 }
421 MachineInstrBuilder NewInstr =
422 BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: NewOpc)).setMIFlags(MBBI->getFlags());
423 for (unsigned i = 2, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
424 NewInstr.add(MO: MBBI->getOperand(i));
425 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(I: MBBI, MI: NewInstr);
426 MBB->erase(I: MBBI);
427 MBBI = NewMBBI;
428 }
429 unsigned SEHOpc =
430 (Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;
431 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: SEHOpc))
432 .addImm(Val: Mask)
433 .addImm(Val: Wide ? 1 : 0)
434 .setMIFlags(Flags);
435 break;
436 }
437 case ARM::VSTMDDB_UPD:
438 case ARM::VLDMDIA_UPD: {
439 int First = -1, Last = 0;
440 for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MBBI->operands(), N: 4)) {
441 unsigned Reg = RegInfo->getSEHRegNum(i: MO.getReg());
442 if (First == -1)
443 First = Reg;
444 Last = Reg;
445 }
446 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveFRegs))
447 .addImm(First)
448 .addImm(Last)
449 .setMIFlags(Flags);
450 break;
451 }
452 case ARM::tSUBspi:
453 case ARM::tADDspi:
454 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
455 .addImm(MBBI->getOperand(2).getImm() * 4)
456 .addImm(/*Wide=*/0)
457 .setMIFlags(Flags);
458 break;
459 case ARM::t2SUBspImm:
460 case ARM::t2SUBspImm12:
461 case ARM::t2ADDspImm:
462 case ARM::t2ADDspImm12:
463 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
464 .addImm(MBBI->getOperand(2).getImm())
465 .addImm(/*Wide=*/1)
466 .setMIFlags(Flags);
467 break;
468
469 case ARM::tMOVr:
470 if (MBBI->getOperand(1).getReg() == ARM::SP &&
471 (Flags & MachineInstr::FrameSetup)) {
472 unsigned Reg = RegInfo->getSEHRegNum(i: MBBI->getOperand(i: 0).getReg());
473 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
474 .addImm(Reg)
475 .setMIFlags(Flags);
476 } else if (MBBI->getOperand(0).getReg() == ARM::SP &&
477 (Flags & MachineInstr::FrameDestroy)) {
478 unsigned Reg = RegInfo->getSEHRegNum(i: MBBI->getOperand(i: 1).getReg());
479 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
480 .addImm(Reg)
481 .setMIFlags(Flags);
482 } else {
483 report_fatal_error(reason: "No SEH Opcode for MOV");
484 }
485 break;
486
487 case ARM::tBX_RET:
488 case ARM::TCRETURNri:
489 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
490 .addImm(/*Wide=*/0)
491 .setMIFlags(Flags);
492 break;
493
494 case ARM::TCRETURNdi:
495 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
496 .addImm(/*Wide=*/1)
497 .setMIFlags(Flags);
498 break;
499 }
500 return MBB->insertAfter(I: MBBI, MI: MIB);
501}
502
503static MachineBasicBlock::iterator
504initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI) {
505 if (MBBI == MBB.begin())
506 return MachineBasicBlock::iterator();
507 return std::prev(x: MBBI);
508}
509
510static void insertSEHRange(MachineBasicBlock &MBB,
511 MachineBasicBlock::iterator Start,
512 const MachineBasicBlock::iterator &End,
513 const ARMBaseInstrInfo &TII, unsigned MIFlags) {
514 if (Start.isValid())
515 Start = std::next(x: Start);
516 else
517 Start = MBB.begin();
518
519 for (auto MI = Start; MI != End;) {
520 auto Next = std::next(x: MI);
521 // Check if this instruction already has got a SEH opcode added. In that
522 // case, don't do this generic mapping.
523 if (Next != End && isSEHInstruction(MI: *Next)) {
524 MI = std::next(x: Next);
525 while (MI != End && isSEHInstruction(MI: *MI))
526 ++MI;
527 continue;
528 }
529 insertSEH(MI, TII, MIFlags);
530 MI = Next;
531 }
532}
533
534static void emitRegPlusImmediate(
535 bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
536 const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
537 unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
538 ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
539 if (isARM)
540 emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, BaseReg: SrcReg, NumBytes,
541 Pred, PredReg, TII, MIFlags);
542 else
543 emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, BaseReg: SrcReg, NumBytes,
544 Pred, PredReg, TII, MIFlags);
545}
546
547static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
548 MachineBasicBlock::iterator &MBBI, const DebugLoc &dl,
549 const ARMBaseInstrInfo &TII, int NumBytes,
550 unsigned MIFlags = MachineInstr::NoFlags,
551 ARMCC::CondCodes Pred = ARMCC::AL,
552 unsigned PredReg = 0) {
553 emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
554 MIFlags, Pred, PredReg);
555}
556
557static int sizeOfSPAdjustment(const MachineInstr &MI) {
558 int RegSize;
559 switch (MI.getOpcode()) {
560 case ARM::VSTMDDB_UPD:
561 RegSize = 8;
562 break;
563 case ARM::STMDB_UPD:
564 case ARM::t2STMDB_UPD:
565 RegSize = 4;
566 break;
567 case ARM::t2STR_PRE:
568 case ARM::STR_PRE_IMM:
569 return 4;
570 default:
571 llvm_unreachable("Unknown push or pop like instruction");
572 }
573
574 int count = 0;
575 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
576 // pred) so the list starts at 4.
577 for (int i = MI.getNumOperands() - 1; i >= 4; --i)
578 count += RegSize;
579 return count;
580}
581
582static bool WindowsRequiresStackProbe(const MachineFunction &MF,
583 size_t StackSizeInBytes) {
584 const MachineFrameInfo &MFI = MF.getFrameInfo();
585 const Function &F = MF.getFunction();
586 unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
587
588 StackProbeSize =
589 F.getFnAttributeAsParsedInteger(Kind: "stack-probe-size", Default: StackProbeSize);
590 return (StackSizeInBytes >= StackProbeSize) &&
591 !F.hasFnAttribute(Kind: "no-stack-arg-probe");
592}
593
594namespace {
595
596struct StackAdjustingInsts {
597 struct InstInfo {
598 MachineBasicBlock::iterator I;
599 unsigned SPAdjust;
600 bool BeforeFPSet;
601 };
602
603 SmallVector<InstInfo, 4> Insts;
604
605 void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
606 bool BeforeFPSet = false) {
607 InstInfo Info = {.I: I, .SPAdjust: SPAdjust, .BeforeFPSet: BeforeFPSet};
608 Insts.push_back(Elt: Info);
609 }
610
611 void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
612 auto Info =
613 llvm::find_if(Range&: Insts, P: [&](InstInfo &Info) { return Info.I == I; });
614 assert(Info != Insts.end() && "invalid sp adjusting instruction");
615 Info->SPAdjust += ExtraBytes;
616 }
617
618 void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,
619 const ARMBaseInstrInfo &TII, bool HasFP) {
620 MachineFunction &MF = *MBB.getParent();
621 unsigned CFAOffset = 0;
622 for (auto &Info : Insts) {
623 if (HasFP && !Info.BeforeFPSet)
624 return;
625
626 CFAOffset += Info.SPAdjust;
627 unsigned CFIIndex = MF.addFrameInst(
628 Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: CFAOffset));
629 BuildMI(MBB, std::next(x: Info.I), dl,
630 TII.get(TargetOpcode::CFI_INSTRUCTION))
631 .addCFIIndex(CFIIndex)
632 .setMIFlags(MachineInstr::FrameSetup);
633 }
634 }
635};
636
637} // end anonymous namespace
638
639/// Emit an instruction sequence that will align the address in
640/// register Reg by zero-ing out the lower bits. For versions of the
641/// architecture that support Neon, this must be done in a single
642/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
643/// single instruction. That function only gets called when optimizing
644/// spilling of D registers on a core with the Neon instruction set
645/// present.
646static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
647 const TargetInstrInfo &TII,
648 MachineBasicBlock &MBB,
649 MachineBasicBlock::iterator MBBI,
650 const DebugLoc &DL, const unsigned Reg,
651 const Align Alignment,
652 const bool MustBeSingleInstruction) {
653 const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>();
654 const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
655 const unsigned AlignMask = Alignment.value() - 1U;
656 const unsigned NrBitsToZero = Log2(A: Alignment);
657 assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
658 if (!AFI->isThumbFunction()) {
659 // if the BFC instruction is available, use that to zero the lower
660 // bits:
661 // bfc Reg, #0, log2(Alignment)
662 // otherwise use BIC, if the mask to zero the required number of bits
663 // can be encoded in the bic immediate field
664 // bic Reg, Reg, Alignment-1
665 // otherwise, emit
666 // lsr Reg, Reg, log2(Alignment)
667 // lsl Reg, Reg, log2(Alignment)
668 if (CanUseBFC) {
669 BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
670 .addReg(Reg, RegState::Kill)
671 .addImm(~AlignMask)
672 .add(predOps(ARMCC::AL));
673 } else if (AlignMask <= 255) {
674 BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
675 .addReg(Reg, RegState::Kill)
676 .addImm(AlignMask)
677 .add(predOps(ARMCC::AL))
678 .add(condCodeOp());
679 } else {
680 assert(!MustBeSingleInstruction &&
681 "Shouldn't call emitAligningInstructions demanding a single "
682 "instruction to be emitted for large stack alignment for a target "
683 "without BFC.");
684 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
685 .addReg(Reg, RegState::Kill)
686 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
687 .add(predOps(ARMCC::AL))
688 .add(condCodeOp());
689 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
690 .addReg(Reg, RegState::Kill)
691 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
692 .add(predOps(ARMCC::AL))
693 .add(condCodeOp());
694 }
695 } else {
696 // Since this is only reached for Thumb-2 targets, the BFC instruction
697 // should always be available.
698 assert(CanUseBFC);
699 BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
700 .addReg(Reg, RegState::Kill)
701 .addImm(~AlignMask)
702 .add(predOps(ARMCC::AL));
703 }
704}
705
706/// We need the offset of the frame pointer relative to other MachineFrameInfo
707/// offsets which are encoded relative to SP at function begin.
708/// See also emitPrologue() for how the FP is set up.
709/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
710/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
711/// this to produce a conservative estimate that we check in an assert() later.
712static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
713 const MachineFunction &MF) {
714 // For Thumb1, push.w isn't available, so the first push will always push
715 // r7 and lr onto the stack first.
716 if (AFI.isThumb1OnlyFunction())
717 return -AFI.getArgRegsSaveSize() - (2 * 4);
718 // This is a conservative estimation: Assume the frame pointer being r7 and
719 // pc("r15") up to r8 getting spilled before (= 8 registers).
720 int MaxRegBytes = 8 * 4;
721 if (STI.splitFramePointerPush(MF)) {
722 // Here, r11 can be stored below all of r4-r15 (3 registers more than
723 // above), plus d8-d15.
724 MaxRegBytes = 11 * 4 + 8 * 8;
725 }
726 int FPCXTSaveSize =
727 (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
728 return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;
729}
730
731void ARMFrameLowering::emitPrologue(MachineFunction &MF,
732 MachineBasicBlock &MBB) const {
733 MachineBasicBlock::iterator MBBI = MBB.begin();
734 MachineFrameInfo &MFI = MF.getFrameInfo();
735 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
736 MachineModuleInfo &MMI = MF.getMMI();
737 MCContext &Context = MMI.getContext();
738 const TargetMachine &TM = MF.getTarget();
739 const MCRegisterInfo *MRI = Context.getRegisterInfo();
740 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
741 const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
742 assert(!AFI->isThumb1OnlyFunction() &&
743 "This emitPrologue does not support Thumb1!");
744 bool isARM = !AFI->isThumbFunction();
745 Align Alignment = STI.getFrameLowering()->getStackAlign();
746 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
747 unsigned NumBytes = MFI.getStackSize();
748 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
749 int FPCXTSaveSize = 0;
750 bool NeedsWinCFI = needsWinCFI(MF);
751
752 // Debug location must be unknown since the first debug location is used
753 // to determine the end of the prologue.
754 DebugLoc dl;
755
756 Register FramePtr = RegInfo->getFrameRegister(MF);
757
758 // Determine the sizes of each callee-save spill areas and record which frame
759 // belongs to which callee-save spill areas.
760 unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
761 int FramePtrSpillFI = 0;
762 int D8SpillFI = 0;
763
764 // All calls are tail calls in GHC calling conv, and functions have no
765 // prologue/epilogue.
766 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
767 return;
768
769 StackAdjustingInsts DefCFAOffsetCandidates;
770 bool HasFP = hasFP(MF);
771
772 if (!AFI->hasStackFrame() &&
773 (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, StackSizeInBytes: NumBytes))) {
774 if (NumBytes != 0) {
775 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -NumBytes,
776 MIFlags: MachineInstr::FrameSetup);
777 DefCFAOffsetCandidates.addInst(I: std::prev(x: MBBI), SPAdjust: NumBytes, BeforeFPSet: true);
778 }
779 if (!NeedsWinCFI)
780 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
781 if (NeedsWinCFI && MBBI != MBB.begin()) {
782 insertSEHRange(MBB, Start: {}, End: MBBI, TII, MIFlags: MachineInstr::FrameSetup);
783 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd))
784 .setMIFlag(MachineInstr::FrameSetup);
785 MF.setHasWinCFI(true);
786 }
787 return;
788 }
789
790 // Determine spill area sizes.
791 if (STI.splitFramePointerPush(MF)) {
792 for (const CalleeSavedInfo &I : CSI) {
793 Register Reg = I.getReg();
794 int FI = I.getFrameIdx();
795 switch (Reg) {
796 case ARM::R11:
797 case ARM::LR:
798 if (Reg == FramePtr)
799 FramePtrSpillFI = FI;
800 GPRCS2Size += 4;
801 break;
802 case ARM::R0:
803 case ARM::R1:
804 case ARM::R2:
805 case ARM::R3:
806 case ARM::R4:
807 case ARM::R5:
808 case ARM::R6:
809 case ARM::R7:
810 case ARM::R8:
811 case ARM::R9:
812 case ARM::R10:
813 case ARM::R12:
814 GPRCS1Size += 4;
815 break;
816 case ARM::FPCXTNS:
817 FPCXTSaveSize = 4;
818 break;
819 default:
820 // This is a DPR. Exclude the aligned DPRCS2 spills.
821 if (Reg == ARM::D8)
822 D8SpillFI = FI;
823 if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
824 DPRCSSize += 8;
825 }
826 }
827 } else {
828 for (const CalleeSavedInfo &I : CSI) {
829 Register Reg = I.getReg();
830 int FI = I.getFrameIdx();
831 switch (Reg) {
832 case ARM::R8:
833 case ARM::R9:
834 case ARM::R10:
835 case ARM::R11:
836 case ARM::R12:
837 if (STI.splitFramePushPop(MF)) {
838 GPRCS2Size += 4;
839 break;
840 }
841 [[fallthrough]];
842 case ARM::R0:
843 case ARM::R1:
844 case ARM::R2:
845 case ARM::R3:
846 case ARM::R4:
847 case ARM::R5:
848 case ARM::R6:
849 case ARM::R7:
850 case ARM::LR:
851 if (Reg == FramePtr)
852 FramePtrSpillFI = FI;
853 GPRCS1Size += 4;
854 break;
855 case ARM::FPCXTNS:
856 FPCXTSaveSize = 4;
857 break;
858 default:
859 // This is a DPR. Exclude the aligned DPRCS2 spills.
860 if (Reg == ARM::D8)
861 D8SpillFI = FI;
862 if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
863 DPRCSSize += 8;
864 }
865 }
866 }
867
868 MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
869
870 // Move past the PAC computation.
871 if (AFI->shouldSignReturnAddress())
872 LastPush = MBBI++;
873
874 // Move past FPCXT area.
875 if (FPCXTSaveSize > 0) {
876 LastPush = MBBI++;
877 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: FPCXTSaveSize, BeforeFPSet: true);
878 }
879
880 // Allocate the vararg register save area.
881 if (ArgRegsSaveSize) {
882 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -ArgRegsSaveSize,
883 MIFlags: MachineInstr::FrameSetup);
884 LastPush = std::prev(x: MBBI);
885 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: ArgRegsSaveSize, BeforeFPSet: true);
886 }
887
888 // Move past area 1.
889 if (GPRCS1Size > 0) {
890 GPRCS1Push = LastPush = MBBI++;
891 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: GPRCS1Size, BeforeFPSet: true);
892 }
893
894 // Determine starting offsets of spill areas.
895 unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
896 unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
897 unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
898 Align DPRAlign = DPRCSSize ? std::min(a: Align(8), b: Alignment) : Align(4);
899 unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize;
900 if (!STI.splitFramePointerPush(MF)) {
901 DPRGapSize += GPRCS2Size;
902 }
903 DPRGapSize %= DPRAlign.value();
904
905 unsigned DPRCSOffset;
906 if (STI.splitFramePointerPush(MF)) {
907 DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize;
908 GPRCS2Offset = DPRCSOffset - GPRCS2Size;
909 } else {
910 DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
911 }
912 int FramePtrOffsetInPush = 0;
913 if (HasFP) {
914 int FPOffset = MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI);
915 assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
916 "Max FP estimation is wrong");
917 FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
918 AFI->setFramePtrSpillOffset(MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) +
919 NumBytes);
920 }
921 AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
922 AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
923 AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
924
925 // Move past area 2.
926 if (GPRCS2Size > 0 && !STI.splitFramePointerPush(MF)) {
927 GPRCS2Push = LastPush = MBBI++;
928 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: GPRCS2Size);
929 }
930
931 // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
932 // .cfi_offset operations will reflect that.
933 if (DPRGapSize) {
934 assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
935 if (LastPush != MBB.end() &&
936 tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*LastPush, NumBytes: DPRGapSize))
937 DefCFAOffsetCandidates.addExtraBytes(I: LastPush, ExtraBytes: DPRGapSize);
938 else {
939 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -DPRGapSize,
940 MIFlags: MachineInstr::FrameSetup);
941 DefCFAOffsetCandidates.addInst(I: std::prev(x: MBBI), SPAdjust: DPRGapSize);
942 }
943 }
944
945 // Move past area 3.
946 if (DPRCSSize > 0) {
947 // Since vpush register list cannot have gaps, there may be multiple vpush
948 // instructions in the prologue.
949 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
950 DefCFAOffsetCandidates.addInst(I: MBBI, SPAdjust: sizeOfSPAdjustment(MI: *MBBI));
951 LastPush = MBBI++;
952 }
953 }
954
955 // Move past the aligned DPRCS2 area.
956 if (AFI->getNumAlignedDPRCS2Regs() > 0) {
957 MBBI = skipAlignedDPRCS2Spills(MI: MBBI, NumAlignedDPRCS2Regs: AFI->getNumAlignedDPRCS2Regs());
958 // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
959 // leaves the stack pointer pointing to the DPRCS2 area.
960 //
961 // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
962 NumBytes += MFI.getObjectOffset(ObjectIdx: D8SpillFI);
963 } else
964 NumBytes = DPRCSOffset;
965
966 if (GPRCS2Size > 0 && STI.splitFramePointerPush(MF)) {
967 GPRCS2Push = LastPush = MBBI++;
968 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: GPRCS2Size);
969 }
970
971 bool NeedsWinCFIStackAlloc = NeedsWinCFI;
972 if (STI.splitFramePointerPush(MF) && HasFP)
973 NeedsWinCFIStackAlloc = false;
974
975 if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, StackSizeInBytes: NumBytes)) {
976 uint32_t NumWords = NumBytes >> 2;
977
978 if (NumWords < 65536) {
979 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
980 .addImm(NumWords)
981 .setMIFlags(MachineInstr::FrameSetup)
982 .add(predOps(ARMCC::AL));
983 } else {
984 // Split into two instructions here, instead of using t2MOVi32imm,
985 // to allow inserting accurate SEH instructions (including accurate
986 // instruction size for each of them).
987 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
988 .addImm(NumWords & 0xffff)
989 .setMIFlags(MachineInstr::FrameSetup)
990 .add(predOps(ARMCC::AL));
991 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), ARM::R4)
992 .addReg(ARM::R4)
993 .addImm(NumWords >> 16)
994 .setMIFlags(MachineInstr::FrameSetup)
995 .add(predOps(ARMCC::AL));
996 }
997
998 switch (TM.getCodeModel()) {
999 case CodeModel::Tiny:
1000 llvm_unreachable("Tiny code model not available on ARM.");
1001 case CodeModel::Small:
1002 case CodeModel::Medium:
1003 case CodeModel::Kernel:
1004 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
1005 .add(predOps(ARMCC::AL))
1006 .addExternalSymbol("__chkstk")
1007 .addReg(ARM::R4, RegState::Implicit)
1008 .setMIFlags(MachineInstr::FrameSetup);
1009 break;
1010 case CodeModel::Large:
1011 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
1012 .addExternalSymbol("__chkstk")
1013 .setMIFlags(MachineInstr::FrameSetup);
1014
1015 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
1016 .add(predOps(ARMCC::AL))
1017 .addReg(ARM::R12, RegState::Kill)
1018 .addReg(ARM::R4, RegState::Implicit)
1019 .setMIFlags(MachineInstr::FrameSetup);
1020 break;
1021 }
1022
1023 MachineInstrBuilder Instr, SEH;
1024 Instr = BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
1025 .addReg(ARM::SP, RegState::Kill)
1026 .addReg(ARM::R4, RegState::Kill)
1027 .setMIFlags(MachineInstr::FrameSetup)
1028 .add(predOps(ARMCC::AL))
1029 .add(condCodeOp());
1030 if (NeedsWinCFIStackAlloc) {
1031 SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc))
1032 .addImm(NumBytes)
1033 .addImm(/*Wide=*/1)
1034 .setMIFlags(MachineInstr::FrameSetup);
1035 MBB.insertAfter(I: Instr, MI: SEH);
1036 }
1037 NumBytes = 0;
1038 }
1039
1040 if (NumBytes) {
1041 // Adjust SP after all the callee-save spills.
1042 if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
1043 tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*LastPush, NumBytes))
1044 DefCFAOffsetCandidates.addExtraBytes(I: LastPush, ExtraBytes: NumBytes);
1045 else {
1046 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -NumBytes,
1047 MIFlags: MachineInstr::FrameSetup);
1048 DefCFAOffsetCandidates.addInst(I: std::prev(x: MBBI), SPAdjust: NumBytes);
1049 }
1050
1051 if (HasFP && isARM)
1052 // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
1053 // Note it's not safe to do this in Thumb2 mode because it would have
1054 // taken two instructions:
1055 // mov sp, r7
1056 // sub sp, #24
1057 // If an interrupt is taken between the two instructions, then sp is in
1058 // an inconsistent state (pointing to the middle of callee-saved area).
1059 // The interrupt handler can end up clobbering the registers.
1060 AFI->setShouldRestoreSPFromFP(true);
1061 }
1062
1063 // Set FP to point to the stack slot that contains the previous FP.
1064 // For iOS, FP is R7, which has now been stored in spill area 1.
1065 // Otherwise, if this is not iOS, all the callee-saved registers go
1066 // into spill area 1, including the FP in R11. In either case, it
1067 // is in area one and the adjustment needs to take place just after
1068 // that push.
1069 // FIXME: The above is not necessary true when PACBTI is enabled.
1070 // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes,
1071 // so FP ends up on area two.
1072 MachineBasicBlock::iterator AfterPush;
1073 if (HasFP) {
1074 AfterPush = std::next(x: GPRCS1Push);
1075 unsigned PushSize = sizeOfSPAdjustment(MI: *GPRCS1Push);
1076 int FPOffset = PushSize + FramePtrOffsetInPush;
1077 if (STI.splitFramePointerPush(MF)) {
1078 AfterPush = std::next(x: GPRCS2Push);
1079 emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1080 FramePtr, ARM::SP, 0, MachineInstr::FrameSetup);
1081 } else {
1082 emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1083 FramePtr, ARM::SP, FPOffset,
1084 MachineInstr::FrameSetup);
1085 }
1086 if (!NeedsWinCFI) {
1087 if (FramePtrOffsetInPush + PushSize != 0) {
1088 unsigned CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfa(
1089 L: nullptr, Register: MRI->getDwarfRegNum(RegNum: FramePtr, isEH: true),
1090 Offset: FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
1091 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1092 .addCFIIndex(CFIIndex)
1093 .setMIFlags(MachineInstr::FrameSetup);
1094 } else {
1095 unsigned CFIIndex =
1096 MF.addFrameInst(Inst: MCCFIInstruction::createDefCfaRegister(
1097 L: nullptr, Register: MRI->getDwarfRegNum(RegNum: FramePtr, isEH: true)));
1098 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1099 .addCFIIndex(CFIIndex)
1100 .setMIFlags(MachineInstr::FrameSetup);
1101 }
1102 }
1103 }
1104
1105 // Emit a SEH opcode indicating the prologue end. The rest of the prologue
1106 // instructions below don't need to be replayed to unwind the stack.
1107 if (NeedsWinCFI && MBBI != MBB.begin()) {
1108 MachineBasicBlock::iterator End = MBBI;
1109 if (HasFP && STI.splitFramePointerPush(MF))
1110 End = AfterPush;
1111 insertSEHRange(MBB, Start: {}, End, TII, MIFlags: MachineInstr::FrameSetup);
1112 BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd))
1113 .setMIFlag(MachineInstr::FrameSetup);
1114 MF.setHasWinCFI(true);
1115 }
1116
1117 // Now that the prologue's actual instructions are finalised, we can insert
1118 // the necessary DWARF cf instructions to describe the situation. Start by
1119 // recording where each register ended up:
1120 if (GPRCS1Size > 0 && !NeedsWinCFI) {
1121 MachineBasicBlock::iterator Pos = std::next(x: GPRCS1Push);
1122 int CFIIndex;
1123 for (const auto &Entry : CSI) {
1124 Register Reg = Entry.getReg();
1125 int FI = Entry.getFrameIdx();
1126 switch (Reg) {
1127 case ARM::R8:
1128 case ARM::R9:
1129 case ARM::R10:
1130 case ARM::R11:
1131 case ARM::R12:
1132 if (STI.splitFramePushPop(MF))
1133 break;
1134 [[fallthrough]];
1135 case ARM::R0:
1136 case ARM::R1:
1137 case ARM::R2:
1138 case ARM::R3:
1139 case ARM::R4:
1140 case ARM::R5:
1141 case ARM::R6:
1142 case ARM::R7:
1143 case ARM::LR:
1144 CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createOffset(
1145 L: nullptr, Register: MRI->getDwarfRegNum(RegNum: Reg, isEH: true), Offset: MFI.getObjectOffset(ObjectIdx: FI)));
1146 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1147 .addCFIIndex(CFIIndex)
1148 .setMIFlags(MachineInstr::FrameSetup);
1149 break;
1150 }
1151 }
1152 }
1153
1154 if (GPRCS2Size > 0 && !NeedsWinCFI) {
1155 MachineBasicBlock::iterator Pos = std::next(x: GPRCS2Push);
1156 for (const auto &Entry : CSI) {
1157 Register Reg = Entry.getReg();
1158 int FI = Entry.getFrameIdx();
1159 switch (Reg) {
1160 case ARM::R8:
1161 case ARM::R9:
1162 case ARM::R10:
1163 case ARM::R11:
1164 case ARM::R12:
1165 if (STI.splitFramePushPop(MF)) {
1166 unsigned DwarfReg = MRI->getDwarfRegNum(
1167 Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true);
1168 unsigned Offset = MFI.getObjectOffset(ObjectIdx: FI);
1169 unsigned CFIIndex = MF.addFrameInst(
1170 Inst: MCCFIInstruction::createOffset(L: nullptr, Register: DwarfReg, Offset));
1171 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1172 .addCFIIndex(CFIIndex)
1173 .setMIFlags(MachineInstr::FrameSetup);
1174 }
1175 break;
1176 }
1177 }
1178 }
1179
1180 if (DPRCSSize > 0 && !NeedsWinCFI) {
1181 // Since vpush register list cannot have gaps, there may be multiple vpush
1182 // instructions in the prologue.
1183 MachineBasicBlock::iterator Pos = std::next(x: LastPush);
1184 for (const auto &Entry : CSI) {
1185 Register Reg = Entry.getReg();
1186 int FI = Entry.getFrameIdx();
1187 if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
1188 (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
1189 unsigned DwarfReg = MRI->getDwarfRegNum(RegNum: Reg, isEH: true);
1190 unsigned Offset = MFI.getObjectOffset(ObjectIdx: FI);
1191 unsigned CFIIndex = MF.addFrameInst(
1192 Inst: MCCFIInstruction::createOffset(L: nullptr, Register: DwarfReg, Offset));
1193 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1194 .addCFIIndex(CFIIndex)
1195 .setMIFlags(MachineInstr::FrameSetup);
1196 }
1197 }
1198 }
1199
1200 // Now we can emit descriptions of where the canonical frame address was
1201 // throughout the process. If we have a frame pointer, it takes over the job
1202 // half-way through, so only the first few .cfi_def_cfa_offset instructions
1203 // actually get emitted.
1204 if (!NeedsWinCFI)
1205 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
1206
1207 if (STI.isTargetELF() && hasFP(MF))
1208 MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
1209 AFI->getFramePtrSpillOffset());
1210
1211 AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
1212 AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
1213 AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
1214 AFI->setDPRCalleeSavedGapSize(DPRGapSize);
1215 AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
1216
1217 // If we need dynamic stack realignment, do it here. Be paranoid and make
1218 // sure if we also have VLAs, we have a base pointer for frame access.
1219 // If aligned NEON registers were spilled, the stack has already been
1220 // realigned.
1221 if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->hasStackRealignment(MF)) {
1222 Align MaxAlign = MFI.getMaxAlign();
1223 assert(!AFI->isThumb1OnlyFunction());
1224 if (!AFI->isThumbFunction()) {
1225 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
1226 false);
1227 } else {
1228 // We cannot use sp as source/dest register here, thus we're using r4 to
1229 // perform the calculations. We're emitting the following sequence:
1230 // mov r4, sp
1231 // -- use emitAligningInstructions to produce best sequence to zero
1232 // -- out lower bits in r4
1233 // mov sp, r4
1234 // FIXME: It will be better just to find spare register here.
1235 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
1236 .addReg(ARM::SP, RegState::Kill)
1237 .add(predOps(ARMCC::AL));
1238 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
1239 false);
1240 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1241 .addReg(ARM::R4, RegState::Kill)
1242 .add(predOps(ARMCC::AL));
1243 }
1244
1245 AFI->setShouldRestoreSPFromFP(true);
1246 }
1247
1248 // If we need a base pointer, set it up here. It's whatever the value
1249 // of the stack pointer is at this point. Any variable size objects
1250 // will be allocated after this, so we can still use the base pointer
1251 // to reference locals.
1252 // FIXME: Clarify FrameSetup flags here.
1253 if (RegInfo->hasBasePointer(MF)) {
1254 if (isARM)
1255 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
1256 .addReg(ARM::SP)
1257 .add(predOps(ARMCC::AL))
1258 .add(condCodeOp());
1259 else
1260 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
1261 .addReg(ARM::SP)
1262 .add(predOps(ARMCC::AL));
1263 }
1264
1265 // If the frame has variable sized objects then the epilogue must restore
1266 // the sp from fp. We can assume there's an FP here since hasFP already
1267 // checks for hasVarSizedObjects.
1268 if (MFI.hasVarSizedObjects())
1269 AFI->setShouldRestoreSPFromFP(true);
1270}
1271
1272void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
1273 MachineBasicBlock &MBB) const {
1274 MachineFrameInfo &MFI = MF.getFrameInfo();
1275 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1276 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
1277 const ARMBaseInstrInfo &TII =
1278 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1279 assert(!AFI->isThumb1OnlyFunction() &&
1280 "This emitEpilogue does not support Thumb1!");
1281 bool isARM = !AFI->isThumbFunction();
1282
1283 // Amount of stack space we reserved next to incoming args for either
1284 // varargs registers or stack arguments in tail calls made by this function.
1285 unsigned ReservedArgStack = AFI->getArgRegsSaveSize();
1286
1287 // How much of the stack used by incoming arguments this function is expected
1288 // to restore in this particular epilogue.
1289 int IncomingArgStackToRestore = getArgumentStackToRestore(MF, MBB);
1290 int NumBytes = (int)MFI.getStackSize();
1291 Register FramePtr = RegInfo->getFrameRegister(MF);
1292
1293 // All calls are tail calls in GHC calling conv, and functions have no
1294 // prologue/epilogue.
1295 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1296 return;
1297
1298 // First put ourselves on the first (from top) terminator instructions.
1299 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1300 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
1301
1302 MachineBasicBlock::iterator RangeStart;
1303 if (!AFI->hasStackFrame()) {
1304 if (MF.hasWinCFI()) {
1305 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1306 .setMIFlag(MachineInstr::FrameDestroy);
1307 RangeStart = initMBBRange(MBB, MBBI);
1308 }
1309
1310 if (NumBytes + IncomingArgStackToRestore != 0)
1311 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1312 NumBytes: NumBytes + IncomingArgStackToRestore,
1313 MIFlags: MachineInstr::FrameDestroy);
1314 } else {
1315 // Unwind MBBI to point to first LDR / VLDRD.
1316 if (MBBI != MBB.begin()) {
1317 do {
1318 --MBBI;
1319 } while (MBBI != MBB.begin() &&
1320 MBBI->getFlag(Flag: MachineInstr::FrameDestroy));
1321 if (!MBBI->getFlag(Flag: MachineInstr::FrameDestroy))
1322 ++MBBI;
1323 }
1324
1325 if (MF.hasWinCFI()) {
1326 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1327 .setMIFlag(MachineInstr::FrameDestroy);
1328 RangeStart = initMBBRange(MBB, MBBI);
1329 }
1330
1331 // Move SP to start of FP callee save spill area.
1332 NumBytes -= (ReservedArgStack +
1333 AFI->getFPCXTSaveAreaSize() +
1334 AFI->getGPRCalleeSavedArea1Size() +
1335 AFI->getGPRCalleeSavedArea2Size() +
1336 AFI->getDPRCalleeSavedGapSize() +
1337 AFI->getDPRCalleeSavedAreaSize());
1338
1339 // Reset SP based on frame pointer only if the stack frame extends beyond
1340 // frame pointer stack slot or target is ELF and the function has FP.
1341 if (AFI->shouldRestoreSPFromFP()) {
1342 NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
1343 if (NumBytes) {
1344 if (isARM)
1345 emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
1346 ARMCC::AL, 0, TII,
1347 MachineInstr::FrameDestroy);
1348 else {
1349 // It's not possible to restore SP from FP in a single instruction.
1350 // For iOS, this looks like:
1351 // mov sp, r7
1352 // sub sp, #24
1353 // This is bad, if an interrupt is taken after the mov, sp is in an
1354 // inconsistent state.
1355 // Use the first callee-saved register as a scratch register.
1356 assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
1357 "No scratch register to restore SP from FP!");
1358 emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
1359 ARMCC::AL, 0, TII, MachineInstr::FrameDestroy);
1360 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1361 .addReg(ARM::R4)
1362 .add(predOps(ARMCC::AL))
1363 .setMIFlag(MachineInstr::FrameDestroy);
1364 }
1365 } else {
1366 // Thumb2 or ARM.
1367 if (isARM)
1368 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
1369 .addReg(FramePtr)
1370 .add(predOps(ARMCC::AL))
1371 .add(condCodeOp())
1372 .setMIFlag(MachineInstr::FrameDestroy);
1373 else
1374 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1375 .addReg(FramePtr)
1376 .add(predOps(ARMCC::AL))
1377 .setMIFlag(MachineInstr::FrameDestroy);
1378 }
1379 } else if (NumBytes &&
1380 !tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*MBBI, NumBytes))
1381 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes,
1382 MIFlags: MachineInstr::FrameDestroy);
1383
1384 // Increment past our save areas.
1385 if (AFI->getGPRCalleeSavedArea2Size() && STI.splitFramePointerPush(MF))
1386 MBBI++;
1387
1388 if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
1389 MBBI++;
1390 // Since vpop register list cannot have gaps, there may be multiple vpop
1391 // instructions in the epilogue.
1392 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
1393 MBBI++;
1394 }
1395 if (AFI->getDPRCalleeSavedGapSize()) {
1396 assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
1397 "unexpected DPR alignment gap");
1398 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: AFI->getDPRCalleeSavedGapSize(),
1399 MIFlags: MachineInstr::FrameDestroy);
1400 }
1401
1402 if (AFI->getGPRCalleeSavedArea2Size() && !STI.splitFramePointerPush(MF))
1403 MBBI++;
1404 if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
1405
1406 if (ReservedArgStack || IncomingArgStackToRestore) {
1407 assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 &&
1408 "attempting to restore negative stack amount");
1409 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1410 NumBytes: ReservedArgStack + IncomingArgStackToRestore,
1411 MIFlags: MachineInstr::FrameDestroy);
1412 }
1413
1414 // Validate PAC, It should have been already popped into R12. For CMSE entry
1415 // function, the validation instruction is emitted during expansion of the
1416 // tBXNS_RET, since the validation must use the value of SP at function
1417 // entry, before saving, resp. after restoring, FPCXTNS.
1418 if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction())
1419 BuildMI(MBB, MBBI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2AUT));
1420 }
1421
1422 if (MF.hasWinCFI()) {
1423 insertSEHRange(MBB, Start: RangeStart, End: MBB.end(), TII, MIFlags: MachineInstr::FrameDestroy);
1424 BuildMI(MBB, MBB.end(), dl, TII.get(ARM::SEH_EpilogEnd))
1425 .setMIFlag(MachineInstr::FrameDestroy);
1426 }
1427}
1428
1429/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1430/// debug info. It's the same as what we use for resolving the code-gen
1431/// references for now. FIXME: This can go wrong when references are
1432/// SP-relative and simple call frames aren't used.
1433StackOffset ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1434 int FI,
1435 Register &FrameReg) const {
1436 return StackOffset::getFixed(Fixed: ResolveFrameIndexReference(MF, FI, FrameReg, SPAdj: 0));
1437}
1438
1439int ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
1440 int FI, Register &FrameReg,
1441 int SPAdj) const {
1442 const MachineFrameInfo &MFI = MF.getFrameInfo();
1443 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1444 MF.getSubtarget().getRegisterInfo());
1445 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1446 int Offset = MFI.getObjectOffset(ObjectIdx: FI) + MFI.getStackSize();
1447 int FPOffset = Offset - AFI->getFramePtrSpillOffset();
1448 bool isFixed = MFI.isFixedObjectIndex(ObjectIdx: FI);
1449
1450 FrameReg = ARM::SP;
1451 Offset += SPAdj;
1452
1453 // SP can move around if there are allocas. We may also lose track of SP
1454 // when emergency spilling inside a non-reserved call frame setup.
1455 bool hasMovingSP = !hasReservedCallFrame(MF);
1456
1457 // When dynamically realigning the stack, use the frame pointer for
1458 // parameters, and the stack/base pointer for locals.
1459 if (RegInfo->hasStackRealignment(MF)) {
1460 assert(hasFP(MF) && "dynamic stack realignment without a FP!");
1461 if (isFixed) {
1462 FrameReg = RegInfo->getFrameRegister(MF);
1463 Offset = FPOffset;
1464 } else if (hasMovingSP) {
1465 assert(RegInfo->hasBasePointer(MF) &&
1466 "VLAs and dynamic stack alignment, but missing base pointer!");
1467 FrameReg = RegInfo->getBaseRegister();
1468 Offset -= SPAdj;
1469 }
1470 return Offset;
1471 }
1472
1473 // If there is a frame pointer, use it when we can.
1474 if (hasFP(MF) && AFI->hasStackFrame()) {
1475 // Use frame pointer to reference fixed objects. Use it for locals if
1476 // there are VLAs (and thus the SP isn't reliable as a base).
1477 if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
1478 FrameReg = RegInfo->getFrameRegister(MF);
1479 return FPOffset;
1480 } else if (hasMovingSP) {
1481 assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
1482 if (AFI->isThumb2Function()) {
1483 // Try to use the frame pointer if we can, else use the base pointer
1484 // since it's available. This is handy for the emergency spill slot, in
1485 // particular.
1486 if (FPOffset >= -255 && FPOffset < 0) {
1487 FrameReg = RegInfo->getFrameRegister(MF);
1488 return FPOffset;
1489 }
1490 }
1491 } else if (AFI->isThumbFunction()) {
1492 // Prefer SP to base pointer, if the offset is suitably aligned and in
1493 // range as the effective range of the immediate offset is bigger when
1494 // basing off SP.
1495 // Use add <rd>, sp, #<imm8>
1496 // ldr <rd>, [sp, #<imm8>]
1497 if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
1498 return Offset;
1499 // In Thumb2 mode, the negative offset is very limited. Try to avoid
1500 // out of range references. ldr <rt>,[<rn>, #-<imm8>]
1501 if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
1502 FrameReg = RegInfo->getFrameRegister(MF);
1503 return FPOffset;
1504 }
1505 } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
1506 // Otherwise, use SP or FP, whichever is closer to the stack slot.
1507 FrameReg = RegInfo->getFrameRegister(MF);
1508 return FPOffset;
1509 }
1510 }
1511 // Use the base pointer if we have one.
1512 // FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
1513 // That can happen if we forced a base pointer for a large call frame.
1514 if (RegInfo->hasBasePointer(MF)) {
1515 FrameReg = RegInfo->getBaseRegister();
1516 Offset -= SPAdj;
1517 }
1518 return Offset;
1519}
1520
1521void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
1522 MachineBasicBlock::iterator MI,
1523 ArrayRef<CalleeSavedInfo> CSI,
1524 unsigned StmOpc, unsigned StrOpc,
1525 bool NoGap, bool (*Func)(unsigned, bool),
1526 unsigned NumAlignedDPRCS2Regs,
1527 unsigned MIFlags) const {
1528 MachineFunction &MF = *MBB.getParent();
1529 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1530 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1531
1532 DebugLoc DL;
1533
1534 using RegAndKill = std::pair<unsigned, bool>;
1535
1536 SmallVector<RegAndKill, 4> Regs;
1537 unsigned i = CSI.size();
1538 while (i != 0) {
1539 unsigned LastReg = 0;
1540 for (; i != 0; --i) {
1541 Register Reg = CSI[i-1].getReg();
1542 if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1543
1544 // D-registers in the aligned area DPRCS2 are NOT spilled here.
1545 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1546 continue;
1547
1548 const MachineRegisterInfo &MRI = MF.getRegInfo();
1549 bool isLiveIn = MRI.isLiveIn(Reg);
1550 if (!isLiveIn && !MRI.isReserved(PhysReg: Reg))
1551 MBB.addLiveIn(PhysReg: Reg);
1552 // If NoGap is true, push consecutive registers and then leave the rest
1553 // for other instructions. e.g.
1554 // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1555 if (NoGap && LastReg && LastReg != Reg-1)
1556 break;
1557 LastReg = Reg;
1558 // Do not set a kill flag on values that are also marked as live-in. This
1559 // happens with the @llvm-returnaddress intrinsic and with arguments
1560 // passed in callee saved registers.
1561 // Omitting the kill flags is conservatively correct even if the live-in
1562 // is not used after all.
1563 Regs.push_back(Elt: std::make_pair(x&: Reg, /*isKill=*/y: !isLiveIn));
1564 }
1565
1566 if (Regs.empty())
1567 continue;
1568
1569 llvm::sort(C&: Regs, Comp: [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1570 return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
1571 });
1572
1573 if (Regs.size() > 1 || StrOpc== 0) {
1574 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1575 .addReg(ARM::SP)
1576 .setMIFlags(MIFlags)
1577 .add(predOps(ARMCC::AL));
1578 for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1579 MIB.addReg(RegNo: Regs[i].first, flags: getKillRegState(B: Regs[i].second));
1580 } else if (Regs.size() == 1) {
1581 BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1582 .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1583 .addReg(ARM::SP)
1584 .setMIFlags(MIFlags)
1585 .addImm(-4)
1586 .add(predOps(ARMCC::AL));
1587 }
1588 Regs.clear();
1589
1590 // Put any subsequent vpush instructions before this one: they will refer to
1591 // higher register numbers so need to be pushed first in order to preserve
1592 // monotonicity.
1593 if (MI != MBB.begin())
1594 --MI;
1595 }
1596}
1597
1598void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1599 MachineBasicBlock::iterator MI,
1600 MutableArrayRef<CalleeSavedInfo> CSI,
1601 unsigned LdmOpc, unsigned LdrOpc,
1602 bool isVarArg, bool NoGap,
1603 bool (*Func)(unsigned, bool),
1604 unsigned NumAlignedDPRCS2Regs) const {
1605 MachineFunction &MF = *MBB.getParent();
1606 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1607 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1608 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1609 bool hasPAC = AFI->shouldSignReturnAddress();
1610 DebugLoc DL;
1611 bool isTailCall = false;
1612 bool isInterrupt = false;
1613 bool isTrap = false;
1614 bool isCmseEntry = false;
1615 if (MBB.end() != MI) {
1616 DL = MI->getDebugLoc();
1617 unsigned RetOpcode = MI->getOpcode();
1618 isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);
1619 isInterrupt =
1620 RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1621 isTrap =
1622 RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
1623 RetOpcode == ARM::tTRAP;
1624 isCmseEntry = (RetOpcode == ARM::tBXNS || RetOpcode == ARM::tBXNS_RET);
1625 }
1626
1627 SmallVector<unsigned, 4> Regs;
1628 unsigned i = CSI.size();
1629 while (i != 0) {
1630 unsigned LastReg = 0;
1631 bool DeleteRet = false;
1632 for (; i != 0; --i) {
1633 CalleeSavedInfo &Info = CSI[i-1];
1634 Register Reg = Info.getReg();
1635 if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1636
1637 // The aligned reloads from area DPRCS2 are not inserted here.
1638 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1639 continue;
1640 if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1641 !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
1642 STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
1643 !STI.splitFramePointerPush(MF)) {
1644 Reg = ARM::PC;
1645 // Fold the return instruction into the LDM.
1646 DeleteRet = true;
1647 LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1648 }
1649
1650 // If NoGap is true, pop consecutive registers and then leave the rest
1651 // for other instructions. e.g.
1652 // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1653 if (NoGap && LastReg && LastReg != Reg-1)
1654 break;
1655
1656 LastReg = Reg;
1657 Regs.push_back(Elt: Reg);
1658 }
1659
1660 if (Regs.empty())
1661 continue;
1662
1663 llvm::sort(C&: Regs, Comp: [&](unsigned LHS, unsigned RHS) {
1664 return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1665 });
1666
1667 if (Regs.size() > 1 || LdrOpc == 0) {
1668 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1669 .addReg(ARM::SP)
1670 .add(predOps(ARMCC::AL))
1671 .setMIFlags(MachineInstr::FrameDestroy);
1672 for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1673 MIB.addReg(RegNo: Regs[i], flags: getDefRegState(B: true));
1674 if (DeleteRet) {
1675 if (MI != MBB.end()) {
1676 MIB.copyImplicitOps(OtherMI: *MI);
1677 MI->eraseFromParent();
1678 }
1679 }
1680 MI = MIB;
1681 } else if (Regs.size() == 1) {
1682 // If we adjusted the reg to PC from LR above, switch it back here. We
1683 // only do that for LDM.
1684 if (Regs[0] == ARM::PC)
1685 Regs[0] = ARM::LR;
1686 MachineInstrBuilder MIB =
1687 BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1688 .addReg(ARM::SP, RegState::Define)
1689 .addReg(ARM::SP)
1690 .setMIFlags(MachineInstr::FrameDestroy);
1691 // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1692 // that refactoring is complete (eventually).
1693 if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1694 MIB.addReg(RegNo: 0);
1695 MIB.addImm(Val: ARM_AM::getAM2Opc(Opc: ARM_AM::add, Imm12: 4, SO: ARM_AM::no_shift));
1696 } else
1697 MIB.addImm(Val: 4);
1698 MIB.add(MOs: predOps(Pred: ARMCC::AL));
1699 }
1700 Regs.clear();
1701
1702 // Put any subsequent vpop instructions after this one: they will refer to
1703 // higher register numbers so need to be popped afterwards.
1704 if (MI != MBB.end())
1705 ++MI;
1706 }
1707}
1708
1709/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1710/// starting from d8. Also insert stack realignment code and leave the stack
1711/// pointer pointing to the d8 spill slot.
1712static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
1713 MachineBasicBlock::iterator MI,
1714 unsigned NumAlignedDPRCS2Regs,
1715 ArrayRef<CalleeSavedInfo> CSI,
1716 const TargetRegisterInfo *TRI) {
1717 MachineFunction &MF = *MBB.getParent();
1718 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1719 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1720 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1721 MachineFrameInfo &MFI = MF.getFrameInfo();
1722
1723 // Mark the D-register spill slots as properly aligned. Since MFI computes
1724 // stack slot layout backwards, this can actually mean that the d-reg stack
1725 // slot offsets can be wrong. The offset for d8 will always be correct.
1726 for (const CalleeSavedInfo &I : CSI) {
1727 unsigned DNum = I.getReg() - ARM::D8;
1728 if (DNum > NumAlignedDPRCS2Regs - 1)
1729 continue;
1730 int FI = I.getFrameIdx();
1731 // The even-numbered registers will be 16-byte aligned, the odd-numbered
1732 // registers will be 8-byte aligned.
1733 MFI.setObjectAlignment(ObjectIdx: FI, Alignment: DNum % 2 ? Align(8) : Align(16));
1734
1735 // The stack slot for D8 needs to be maximally aligned because this is
1736 // actually the point where we align the stack pointer. MachineFrameInfo
1737 // computes all offsets relative to the incoming stack pointer which is a
1738 // bit weird when realigning the stack. Any extra padding for this
1739 // over-alignment is not realized because the code inserted below adjusts
1740 // the stack pointer by numregs * 8 before aligning the stack pointer.
1741 if (DNum == 0)
1742 MFI.setObjectAlignment(ObjectIdx: FI, Alignment: MFI.getMaxAlign());
1743 }
1744
1745 // Move the stack pointer to the d8 spill slot, and align it at the same
1746 // time. Leave the stack slot address in the scratch register r4.
1747 //
1748 // sub r4, sp, #numregs * 8
1749 // bic r4, r4, #align - 1
1750 // mov sp, r4
1751 //
1752 bool isThumb = AFI->isThumbFunction();
1753 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1754 AFI->setShouldRestoreSPFromFP(true);
1755
1756 // sub r4, sp, #numregs * 8
1757 // The immediate is <= 64, so it doesn't need any special encoding.
1758 unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1759 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1760 .addReg(ARM::SP)
1761 .addImm(8 * NumAlignedDPRCS2Regs)
1762 .add(predOps(ARMCC::AL))
1763 .add(condCodeOp());
1764
1765 Align MaxAlign = MF.getFrameInfo().getMaxAlign();
1766 // We must set parameter MustBeSingleInstruction to true, since
1767 // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1768 // stack alignment. Luckily, this can always be done since all ARM
1769 // architecture versions that support Neon also support the BFC
1770 // instruction.
1771 emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
1772
1773 // mov sp, r4
1774 // The stack pointer must be adjusted before spilling anything, otherwise
1775 // the stack slots could be clobbered by an interrupt handler.
1776 // Leave r4 live, it is used below.
1777 Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
1778 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
1779 .addReg(ARM::R4)
1780 .add(predOps(ARMCC::AL));
1781 if (!isThumb)
1782 MIB.add(MO: condCodeOp());
1783
1784 // Now spill NumAlignedDPRCS2Regs registers starting from d8.
1785 // r4 holds the stack slot address.
1786 unsigned NextReg = ARM::D8;
1787
1788 // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
1789 // The writeback is only needed when emitting two vst1.64 instructions.
1790 if (NumAlignedDPRCS2Regs >= 6) {
1791 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1792 &ARM::QQPRRegClass);
1793 MBB.addLiveIn(PhysReg: SupReg);
1794 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
1795 .addReg(ARM::R4, RegState::Kill)
1796 .addImm(16)
1797 .addReg(NextReg)
1798 .addReg(SupReg, RegState::ImplicitKill)
1799 .add(predOps(ARMCC::AL));
1800 NextReg += 4;
1801 NumAlignedDPRCS2Regs -= 4;
1802 }
1803
1804 // We won't modify r4 beyond this point. It currently points to the next
1805 // register to be spilled.
1806 unsigned R4BaseReg = NextReg;
1807
1808 // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
1809 if (NumAlignedDPRCS2Regs >= 4) {
1810 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1811 &ARM::QQPRRegClass);
1812 MBB.addLiveIn(PhysReg: SupReg);
1813 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
1814 .addReg(ARM::R4)
1815 .addImm(16)
1816 .addReg(NextReg)
1817 .addReg(SupReg, RegState::ImplicitKill)
1818 .add(predOps(ARMCC::AL));
1819 NextReg += 4;
1820 NumAlignedDPRCS2Regs -= 4;
1821 }
1822
1823 // 16-byte aligned vst1.64 with 2 d-regs.
1824 if (NumAlignedDPRCS2Regs >= 2) {
1825 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1826 &ARM::QPRRegClass);
1827 MBB.addLiveIn(PhysReg: SupReg);
1828 BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
1829 .addReg(ARM::R4)
1830 .addImm(16)
1831 .addReg(SupReg)
1832 .add(predOps(ARMCC::AL));
1833 NextReg += 2;
1834 NumAlignedDPRCS2Regs -= 2;
1835 }
1836
1837 // Finally, use a vanilla vstr.64 for the odd last register.
1838 if (NumAlignedDPRCS2Regs) {
1839 MBB.addLiveIn(PhysReg: NextReg);
1840 // vstr.64 uses addrmode5 which has an offset scale of 4.
1841 BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
1842 .addReg(NextReg)
1843 .addReg(ARM::R4)
1844 .addImm((NextReg - R4BaseReg) * 2)
1845 .add(predOps(ARMCC::AL));
1846 }
1847
1848 // The last spill instruction inserted should kill the scratch register r4.
1849 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1850}
1851
1852/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
1853/// iterator to the following instruction.
1854static MachineBasicBlock::iterator
1855skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
1856 unsigned NumAlignedDPRCS2Regs) {
1857 // sub r4, sp, #numregs * 8
1858 // bic r4, r4, #align - 1
1859 // mov sp, r4
1860 ++MI; ++MI; ++MI;
1861 assert(MI->mayStore() && "Expecting spill instruction");
1862
1863 // These switches all fall through.
1864 switch(NumAlignedDPRCS2Regs) {
1865 case 7:
1866 ++MI;
1867 assert(MI->mayStore() && "Expecting spill instruction");
1868 [[fallthrough]];
1869 default:
1870 ++MI;
1871 assert(MI->mayStore() && "Expecting spill instruction");
1872 [[fallthrough]];
1873 case 1:
1874 case 2:
1875 case 4:
1876 assert(MI->killsRegister(ARM::R4, /*TRI=*/nullptr) && "Missed kill flag");
1877 ++MI;
1878 }
1879 return MI;
1880}
1881
1882/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
1883/// starting from d8. These instructions are assumed to execute while the
1884/// stack is still aligned, unlike the code inserted by emitPopInst.
1885static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
1886 MachineBasicBlock::iterator MI,
1887 unsigned NumAlignedDPRCS2Regs,
1888 ArrayRef<CalleeSavedInfo> CSI,
1889 const TargetRegisterInfo *TRI) {
1890 MachineFunction &MF = *MBB.getParent();
1891 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1892 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1893 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1894
1895 // Find the frame index assigned to d8.
1896 int D8SpillFI = 0;
1897 for (const CalleeSavedInfo &I : CSI)
1898 if (I.getReg() == ARM::D8) {
1899 D8SpillFI = I.getFrameIdx();
1900 break;
1901 }
1902
1903 // Materialize the address of the d8 spill slot into the scratch register r4.
1904 // This can be fairly complicated if the stack frame is large, so just use
1905 // the normal frame index elimination mechanism to do it. This code runs as
1906 // the initial part of the epilog where the stack and base pointers haven't
1907 // been changed yet.
1908 bool isThumb = AFI->isThumbFunction();
1909 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1910
1911 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
1912 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1913 .addFrameIndex(D8SpillFI)
1914 .addImm(0)
1915 .add(predOps(ARMCC::AL))
1916 .add(condCodeOp());
1917
1918 // Now restore NumAlignedDPRCS2Regs registers starting from d8.
1919 unsigned NextReg = ARM::D8;
1920
1921 // 16-byte aligned vld1.64 with 4 d-regs and writeback.
1922 if (NumAlignedDPRCS2Regs >= 6) {
1923 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1924 &ARM::QQPRRegClass);
1925 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
1926 .addReg(ARM::R4, RegState::Define)
1927 .addReg(ARM::R4, RegState::Kill)
1928 .addImm(16)
1929 .addReg(SupReg, RegState::ImplicitDefine)
1930 .add(predOps(ARMCC::AL));
1931 NextReg += 4;
1932 NumAlignedDPRCS2Regs -= 4;
1933 }
1934
1935 // We won't modify r4 beyond this point. It currently points to the next
1936 // register to be spilled.
1937 unsigned R4BaseReg = NextReg;
1938
1939 // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
1940 if (NumAlignedDPRCS2Regs >= 4) {
1941 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1942 &ARM::QQPRRegClass);
1943 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
1944 .addReg(ARM::R4)
1945 .addImm(16)
1946 .addReg(SupReg, RegState::ImplicitDefine)
1947 .add(predOps(ARMCC::AL));
1948 NextReg += 4;
1949 NumAlignedDPRCS2Regs -= 4;
1950 }
1951
1952 // 16-byte aligned vld1.64 with 2 d-regs.
1953 if (NumAlignedDPRCS2Regs >= 2) {
1954 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1955 &ARM::QPRRegClass);
1956 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
1957 .addReg(ARM::R4)
1958 .addImm(16)
1959 .add(predOps(ARMCC::AL));
1960 NextReg += 2;
1961 NumAlignedDPRCS2Regs -= 2;
1962 }
1963
1964 // Finally, use a vanilla vldr.64 for the remaining odd register.
1965 if (NumAlignedDPRCS2Regs)
1966 BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
1967 .addReg(ARM::R4)
1968 .addImm(2 * (NextReg - R4BaseReg))
1969 .add(predOps(ARMCC::AL));
1970
1971 // Last store kills r4.
1972 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1973}
1974
1975bool ARMFrameLowering::spillCalleeSavedRegisters(
1976 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1977 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
1978 if (CSI.empty())
1979 return false;
1980
1981 MachineFunction &MF = *MBB.getParent();
1982 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1983
1984 unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
1985 unsigned PushOneOpc = AFI->isThumbFunction() ?
1986 ARM::t2STR_PRE : ARM::STR_PRE_IMM;
1987 unsigned FltOpc = ARM::VSTMDDB_UPD;
1988 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1989 // Compute PAC in R12.
1990 if (AFI->shouldSignReturnAddress()) {
1991 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2PAC))
1992 .setMIFlags(MachineInstr::FrameSetup);
1993 }
1994 // Save the non-secure floating point context.
1995 if (llvm::any_of(Range&: CSI, P: [](const CalleeSavedInfo &C) {
1996 return C.getReg() == ARM::FPCXTNS;
1997 })) {
1998 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VSTR_FPCXTNS_pre),
1999 ARM::SP)
2000 .addReg(ARM::SP)
2001 .addImm(-4)
2002 .add(predOps(ARMCC::AL));
2003 }
2004 if (STI.splitFramePointerPush(MF)) {
2005 emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false,
2006 Func: &isSplitFPArea1Register, NumAlignedDPRCS2Regs: 0, MIFlags: MachineInstr::FrameSetup);
2007 emitPushInst(MBB, MI, CSI, StmOpc: FltOpc, StrOpc: 0, NoGap: true, Func: &isARMArea3Register,
2008 NumAlignedDPRCS2Regs, MIFlags: MachineInstr::FrameSetup);
2009 emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false,
2010 Func: &isSplitFPArea2Register, NumAlignedDPRCS2Regs: 0, MIFlags: MachineInstr::FrameSetup);
2011 } else {
2012 emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false, Func: &isARMArea1Register,
2013 NumAlignedDPRCS2Regs: 0, MIFlags: MachineInstr::FrameSetup);
2014 emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false, Func: &isARMArea2Register,
2015 NumAlignedDPRCS2Regs: 0, MIFlags: MachineInstr::FrameSetup);
2016 emitPushInst(MBB, MI, CSI, StmOpc: FltOpc, StrOpc: 0, NoGap: true, Func: &isARMArea3Register,
2017 NumAlignedDPRCS2Regs, MIFlags: MachineInstr::FrameSetup);
2018 }
2019
2020 // The code above does not insert spill code for the aligned DPRCS2 registers.
2021 // The stack realignment code will be inserted between the push instructions
2022 // and these spills.
2023 if (NumAlignedDPRCS2Regs)
2024 emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2025
2026 return true;
2027}
2028
2029bool ARMFrameLowering::restoreCalleeSavedRegisters(
2030 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2031 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2032 if (CSI.empty())
2033 return false;
2034
2035 MachineFunction &MF = *MBB.getParent();
2036 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2037 bool isVarArg = AFI->getArgRegsSaveSize() > 0;
2038 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2039
2040 // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
2041 // registers. Do that here instead.
2042 if (NumAlignedDPRCS2Regs)
2043 emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2044
2045 unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
2046 unsigned LdrOpc =
2047 AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
2048 unsigned FltOpc = ARM::VLDMDIA_UPD;
2049 if (STI.splitFramePointerPush(MF)) {
2050 emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false,
2051 Func: &isSplitFPArea2Register, NumAlignedDPRCS2Regs: 0);
2052 emitPopInst(MBB, MI, CSI, LdmOpc: FltOpc, LdrOpc: 0, isVarArg, NoGap: true, Func: &isARMArea3Register,
2053 NumAlignedDPRCS2Regs);
2054 emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false,
2055 Func: &isSplitFPArea1Register, NumAlignedDPRCS2Regs: 0);
2056 } else {
2057 emitPopInst(MBB, MI, CSI, LdmOpc: FltOpc, LdrOpc: 0, isVarArg, NoGap: true, Func: &isARMArea3Register,
2058 NumAlignedDPRCS2Regs);
2059 emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false,
2060 Func: &isARMArea2Register, NumAlignedDPRCS2Regs: 0);
2061 emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false,
2062 Func: &isARMArea1Register, NumAlignedDPRCS2Regs: 0);
2063 }
2064
2065 return true;
2066}
2067
2068// FIXME: Make generic?
2069static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF,
2070 const ARMBaseInstrInfo &TII) {
2071 unsigned FnSize = 0;
2072 for (auto &MBB : MF) {
2073 for (auto &MI : MBB)
2074 FnSize += TII.getInstSizeInBytes(MI);
2075 }
2076 if (MF.getJumpTableInfo())
2077 for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
2078 FnSize += Table.MBBs.size() * 4;
2079 FnSize += MF.getConstantPool()->getConstants().size() * 4;
2080 return FnSize;
2081}
2082
2083/// estimateRSStackSizeLimit - Look at each instruction that references stack
2084/// frames and return the stack size limit beyond which some of these
2085/// instructions will require a scratch register during their expansion later.
2086// FIXME: Move to TII?
2087static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
2088 const TargetFrameLowering *TFI,
2089 bool &HasNonSPFrameIndex) {
2090 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2091 const ARMBaseInstrInfo &TII =
2092 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2093 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2094 unsigned Limit = (1 << 12) - 1;
2095 for (auto &MBB : MF) {
2096 for (auto &MI : MBB) {
2097 if (MI.isDebugInstr())
2098 continue;
2099 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
2100 if (!MI.getOperand(i).isFI())
2101 continue;
2102
2103 // When using ADDri to get the address of a stack object, 255 is the
2104 // largest offset guaranteed to fit in the immediate offset.
2105 if (MI.getOpcode() == ARM::ADDri) {
2106 Limit = std::min(a: Limit, b: (1U << 8) - 1);
2107 break;
2108 }
2109 // t2ADDri will not require an extra register, it can reuse the
2110 // destination.
2111 if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12)
2112 break;
2113
2114 const MCInstrDesc &MCID = MI.getDesc();
2115 const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI, MF);
2116 if (RegClass && !RegClass->contains(ARM::SP))
2117 HasNonSPFrameIndex = true;
2118
2119 // Otherwise check the addressing mode.
2120 switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
2121 case ARMII::AddrMode_i12:
2122 case ARMII::AddrMode2:
2123 // Default 12 bit limit.
2124 break;
2125 case ARMII::AddrMode3:
2126 case ARMII::AddrModeT2_i8neg:
2127 Limit = std::min(a: Limit, b: (1U << 8) - 1);
2128 break;
2129 case ARMII::AddrMode5FP16:
2130 Limit = std::min(a: Limit, b: ((1U << 8) - 1) * 2);
2131 break;
2132 case ARMII::AddrMode5:
2133 case ARMII::AddrModeT2_i8s4:
2134 case ARMII::AddrModeT2_ldrex:
2135 Limit = std::min(a: Limit, b: ((1U << 8) - 1) * 4);
2136 break;
2137 case ARMII::AddrModeT2_i12:
2138 // i12 supports only positive offset so these will be converted to
2139 // i8 opcodes. See llvm::rewriteT2FrameIndex.
2140 if (TFI->hasFP(MF) && AFI->hasStackFrame())
2141 Limit = std::min(a: Limit, b: (1U << 8) - 1);
2142 break;
2143 case ARMII::AddrMode4:
2144 case ARMII::AddrMode6:
2145 // Addressing modes 4 & 6 (load/store) instructions can't encode an
2146 // immediate offset for stack references.
2147 return 0;
2148 case ARMII::AddrModeT2_i7:
2149 Limit = std::min(a: Limit, b: ((1U << 7) - 1) * 1);
2150 break;
2151 case ARMII::AddrModeT2_i7s2:
2152 Limit = std::min(a: Limit, b: ((1U << 7) - 1) * 2);
2153 break;
2154 case ARMII::AddrModeT2_i7s4:
2155 Limit = std::min(a: Limit, b: ((1U << 7) - 1) * 4);
2156 break;
2157 default:
2158 llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
2159 }
2160 break; // At most one FI per instruction
2161 }
2162 }
2163 }
2164
2165 return Limit;
2166}
2167
2168// In functions that realign the stack, it can be an advantage to spill the
2169// callee-saved vector registers after realigning the stack. The vst1 and vld1
2170// instructions take alignment hints that can improve performance.
2171static void
2172checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {
2173 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
2174 if (!SpillAlignedNEONRegs)
2175 return;
2176
2177 // Naked functions don't spill callee-saved registers.
2178 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
2179 return;
2180
2181 // We are planning to use NEON instructions vst1 / vld1.
2182 if (!MF.getSubtarget<ARMSubtarget>().hasNEON())
2183 return;
2184
2185 // Don't bother if the default stack alignment is sufficiently high.
2186 if (MF.getSubtarget().getFrameLowering()->getStackAlign() >= Align(8))
2187 return;
2188
2189 // Aligned spills require stack realignment.
2190 if (!static_cast<const ARMBaseRegisterInfo *>(
2191 MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))
2192 return;
2193
2194 // We always spill contiguous d-registers starting from d8. Count how many
2195 // needs spilling. The register allocator will almost always use the
2196 // callee-saved registers in order, but it can happen that there are holes in
2197 // the range. Registers above the hole will be spilled to the standard DPRCS
2198 // area.
2199 unsigned NumSpills = 0;
2200 for (; NumSpills < 8; ++NumSpills)
2201 if (!SavedRegs.test(ARM::D8 + NumSpills))
2202 break;
2203
2204 // Don't do this for just one d-register. It's not worth it.
2205 if (NumSpills < 2)
2206 return;
2207
2208 // Spill the first NumSpills D-registers after realigning the stack.
2209 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
2210
2211 // A scratch register is required for the vst1 / vld1 instructions.
2212 SavedRegs.set(ARM::R4);
2213}
2214
2215bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2216 // For CMSE entry functions, we want to save the FPCXT_NS immediately
2217 // upon function entry (resp. restore it immmediately before return)
2218 if (STI.hasV8_1MMainlineOps() &&
2219 MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction())
2220 return false;
2221
2222 // We are disabling shrinkwrapping for now when PAC is enabled, as
2223 // shrinkwrapping can cause clobbering of r12 when the PAC code is
2224 // generated. A follow-up patch will fix this in a more performant manner.
2225 if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(
2226 SpillsLR: true /* SpillsLR */))
2227 return false;
2228
2229 return true;
2230}
2231
2232static bool requiresAAPCSFrameRecord(const MachineFunction &MF) {
2233 const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
2234 return Subtarget.createAAPCSFrameChainLeaf() ||
2235 (Subtarget.createAAPCSFrameChain() && MF.getFrameInfo().hasCalls());
2236}
2237
2238// Thumb1 may require a spill when storing to a frame index through FP (or any
2239// access with execute-only), for cases where FP is a high register (R11). This
2240// scans the function for cases where this may happen.
2241static bool canSpillOnFrameIndexAccess(const MachineFunction &MF,
2242 const TargetFrameLowering &TFI) {
2243 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2244 if (!AFI->isThumb1OnlyFunction())
2245 return false;
2246
2247 const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
2248 for (const auto &MBB : MF)
2249 for (const auto &MI : MBB)
2250 if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi ||
2251 STI.genExecuteOnly())
2252 for (const auto &Op : MI.operands())
2253 if (Op.isFI()) {
2254 Register Reg;
2255 TFI.getFrameIndexReference(MF, FI: Op.getIndex(), FrameReg&: Reg);
2256 if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP)
2257 return true;
2258 }
2259 return false;
2260}
2261
2262void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
2263 BitVector &SavedRegs,
2264 RegScavenger *RS) const {
2265 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
2266 // This tells PEI to spill the FP as if it is any other callee-save register
2267 // to take advantage the eliminateFrameIndex machinery. This also ensures it
2268 // is spilled in the order specified by getCalleeSavedRegs() to make it easier
2269 // to combine multiple loads / stores.
2270 bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF));
2271 bool CS1Spilled = false;
2272 bool LRSpilled = false;
2273 unsigned NumGPRSpills = 0;
2274 unsigned NumFPRSpills = 0;
2275 SmallVector<unsigned, 4> UnspilledCS1GPRs;
2276 SmallVector<unsigned, 4> UnspilledCS2GPRs;
2277 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
2278 MF.getSubtarget().getRegisterInfo());
2279 const ARMBaseInstrInfo &TII =
2280 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2281 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2282 MachineFrameInfo &MFI = MF.getFrameInfo();
2283 MachineRegisterInfo &MRI = MF.getRegInfo();
2284 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2285 (void)TRI; // Silence unused warning in non-assert builds.
2286 Register FramePtr = RegInfo->getFrameRegister(MF);
2287
2288 // Spill R4 if Thumb2 function requires stack realignment - it will be used as
2289 // scratch register. Also spill R4 if Thumb2 function has varsized objects,
2290 // since it's not always possible to restore sp from fp in a single
2291 // instruction.
2292 // FIXME: It will be better just to find spare register here.
2293 if (AFI->isThumb2Function() &&
2294 (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF)))
2295 SavedRegs.set(ARM::R4);
2296
2297 // If a stack probe will be emitted, spill R4 and LR, since they are
2298 // clobbered by the stack probe call.
2299 // This estimate should be a safe, conservative estimate. The actual
2300 // stack probe is enabled based on the size of the local objects;
2301 // this estimate also includes the varargs store size.
2302 if (STI.isTargetWindows() &&
2303 WindowsRequiresStackProbe(MF, StackSizeInBytes: MFI.estimateStackSize(MF))) {
2304 SavedRegs.set(ARM::R4);
2305 SavedRegs.set(ARM::LR);
2306 }
2307
2308 if (AFI->isThumb1OnlyFunction()) {
2309 // Spill LR if Thumb1 function uses variable length argument lists.
2310 if (AFI->getArgRegsSaveSize() > 0)
2311 SavedRegs.set(ARM::LR);
2312
2313 // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
2314 // requires stack alignment. We don't know for sure what the stack size
2315 // will be, but for this, an estimate is good enough. If there anything
2316 // changes it, it'll be a spill, which implies we've used all the registers
2317 // and so R4 is already used, so not marking it here will be OK.
2318 // FIXME: It will be better just to find spare register here.
2319 if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF) ||
2320 MFI.estimateStackSize(MF) > 508)
2321 SavedRegs.set(ARM::R4);
2322 }
2323
2324 // See if we can spill vector registers to aligned stack.
2325 checkNumAlignedDPRCS2Regs(MF, SavedRegs);
2326
2327 // Spill the BasePtr if it's used.
2328 if (RegInfo->hasBasePointer(MF))
2329 SavedRegs.set(RegInfo->getBaseRegister());
2330
2331 // On v8.1-M.Main CMSE entry functions save/restore FPCXT.
2332 if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
2333 CanEliminateFrame = false;
2334
2335 // When return address signing is enabled R12 is treated as callee-saved.
2336 if (AFI->shouldSignReturnAddress())
2337 CanEliminateFrame = false;
2338
2339 // Don't spill FP if the frame can be eliminated. This is determined
2340 // by scanning the callee-save registers to see if any is modified.
2341 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MF: &MF);
2342 for (unsigned i = 0; CSRegs[i]; ++i) {
2343 unsigned Reg = CSRegs[i];
2344 bool Spilled = false;
2345 if (SavedRegs.test(Idx: Reg)) {
2346 Spilled = true;
2347 CanEliminateFrame = false;
2348 }
2349
2350 if (!ARM::GPRRegClass.contains(Reg)) {
2351 if (Spilled) {
2352 if (ARM::SPRRegClass.contains(Reg))
2353 NumFPRSpills++;
2354 else if (ARM::DPRRegClass.contains(Reg))
2355 NumFPRSpills += 2;
2356 else if (ARM::QPRRegClass.contains(Reg))
2357 NumFPRSpills += 4;
2358 }
2359 continue;
2360 }
2361
2362 if (Spilled) {
2363 NumGPRSpills++;
2364
2365 if (!STI.splitFramePushPop(MF)) {
2366 if (Reg == ARM::LR)
2367 LRSpilled = true;
2368 CS1Spilled = true;
2369 continue;
2370 }
2371
2372 // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
2373 switch (Reg) {
2374 case ARM::LR:
2375 LRSpilled = true;
2376 [[fallthrough]];
2377 case ARM::R0: case ARM::R1:
2378 case ARM::R2: case ARM::R3:
2379 case ARM::R4: case ARM::R5:
2380 case ARM::R6: case ARM::R7:
2381 CS1Spilled = true;
2382 break;
2383 default:
2384 break;
2385 }
2386 } else {
2387 if (!STI.splitFramePushPop(MF)) {
2388 UnspilledCS1GPRs.push_back(Elt: Reg);
2389 continue;
2390 }
2391
2392 switch (Reg) {
2393 case ARM::R0: case ARM::R1:
2394 case ARM::R2: case ARM::R3:
2395 case ARM::R4: case ARM::R5:
2396 case ARM::R6: case ARM::R7:
2397 case ARM::LR:
2398 UnspilledCS1GPRs.push_back(Elt: Reg);
2399 break;
2400 default:
2401 UnspilledCS2GPRs.push_back(Elt: Reg);
2402 break;
2403 }
2404 }
2405 }
2406
2407 bool ForceLRSpill = false;
2408 if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
2409 unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
2410 // Force LR to be spilled if the Thumb function size is > 2048. This enables
2411 // use of BL to implement far jump.
2412 if (FnSize >= (1 << 11)) {
2413 CanEliminateFrame = false;
2414 ForceLRSpill = true;
2415 }
2416 }
2417
2418 // If any of the stack slot references may be out of range of an immediate
2419 // offset, make sure a register (or a spill slot) is available for the
2420 // register scavenger. Note that if we're indexing off the frame pointer, the
2421 // effective stack size is 4 bytes larger since the FP points to the stack
2422 // slot of the previous FP. Also, if we have variable sized objects in the
2423 // function, stack slot references will often be negative, and some of
2424 // our instructions are positive-offset only, so conservatively consider
2425 // that case to want a spill slot (or register) as well. Similarly, if
2426 // the function adjusts the stack pointer during execution and the
2427 // adjustments aren't already part of our stack size estimate, our offset
2428 // calculations may be off, so be conservative.
2429 // FIXME: We could add logic to be more precise about negative offsets
2430 // and which instructions will need a scratch register for them. Is it
2431 // worth the effort and added fragility?
2432 unsigned EstimatedStackSize =
2433 MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
2434
2435 // Determine biggest (positive) SP offset in MachineFrameInfo.
2436 int MaxFixedOffset = 0;
2437 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
2438 int MaxObjectOffset = MFI.getObjectOffset(ObjectIdx: I) + MFI.getObjectSize(ObjectIdx: I);
2439 MaxFixedOffset = std::max(a: MaxFixedOffset, b: MaxObjectOffset);
2440 }
2441
2442 bool HasFP = hasFP(MF);
2443 if (HasFP) {
2444 if (AFI->hasStackFrame())
2445 EstimatedStackSize += 4;
2446 } else {
2447 // If FP is not used, SP will be used to access arguments, so count the
2448 // size of arguments into the estimation.
2449 EstimatedStackSize += MaxFixedOffset;
2450 }
2451 EstimatedStackSize += 16; // For possible paddings.
2452
2453 unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
2454 bool HasNonSPFrameIndex = false;
2455 if (AFI->isThumb1OnlyFunction()) {
2456 // For Thumb1, don't bother to iterate over the function. The only
2457 // instruction that requires an emergency spill slot is a store to a
2458 // frame index.
2459 //
2460 // tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
2461 // immediate. tSTRi, which is used for bp- and fp-relative accesses, has
2462 // a 5-bit unsigned immediate.
2463 //
2464 // We could try to check if the function actually contains a tSTRspi
2465 // that might need the spill slot, but it's not really important.
2466 // Functions with VLAs or extremely large call frames are rare, and
2467 // if a function is allocating more than 1KB of stack, an extra 4-byte
2468 // slot probably isn't relevant.
2469 //
2470 // A special case is the scenario where r11 is used as FP, where accesses
2471 // to a frame index will require its value to be moved into a low reg.
2472 // This is handled later on, once we are able to determine if we have any
2473 // fp-relative accesses.
2474 if (RegInfo->hasBasePointer(MF))
2475 EstimatedRSStackSizeLimit = (1U << 5) * 4;
2476 else
2477 EstimatedRSStackSizeLimit = (1U << 8) * 4;
2478 EstimatedRSFixedSizeLimit = (1U << 5) * 4;
2479 } else {
2480 EstimatedRSStackSizeLimit =
2481 estimateRSStackSizeLimit(MF, TFI: this, HasNonSPFrameIndex);
2482 EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
2483 }
2484 // Final estimate of whether sp or bp-relative accesses might require
2485 // scavenging.
2486 bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
2487
2488 // If the stack pointer moves and we don't have a base pointer, the
2489 // estimate logic doesn't work. The actual offsets might be larger when
2490 // we're constructing a call frame, or we might need to use negative
2491 // offsets from fp.
2492 bool HasMovingSP = MFI.hasVarSizedObjects() ||
2493 (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
2494 bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) || !HasMovingSP;
2495
2496 // If we have a frame pointer, we assume arguments will be accessed
2497 // relative to the frame pointer. Check whether fp-relative accesses to
2498 // arguments require scavenging.
2499 //
2500 // We could do slightly better on Thumb1; in some cases, an sp-relative
2501 // offset would be legal even though an fp-relative offset is not.
2502 int MaxFPOffset = getMaxFPOffset(STI, AFI: *AFI, MF);
2503 bool HasLargeArgumentList =
2504 HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
2505
2506 bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
2507 HasLargeArgumentList || HasNonSPFrameIndex;
2508 LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
2509 << "; EstimatedStack: " << EstimatedStackSize
2510 << "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
2511 << "; BigFrameOffsets: " << BigFrameOffsets << "\n");
2512 if (BigFrameOffsets ||
2513 !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
2514 AFI->setHasStackFrame(true);
2515
2516 if (HasFP) {
2517 SavedRegs.set(FramePtr);
2518 // If the frame pointer is required by the ABI, also spill LR so that we
2519 // emit a complete frame record.
2520 if ((requiresAAPCSFrameRecord(MF) ||
2521 MF.getTarget().Options.DisableFramePointerElim(MF)) &&
2522 !LRSpilled) {
2523 SavedRegs.set(ARM::LR);
2524 LRSpilled = true;
2525 NumGPRSpills++;
2526 auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
2527 if (LRPos != UnspilledCS1GPRs.end())
2528 UnspilledCS1GPRs.erase(LRPos);
2529 }
2530 auto FPPos = llvm::find(Range&: UnspilledCS1GPRs, Val: FramePtr);
2531 if (FPPos != UnspilledCS1GPRs.end())
2532 UnspilledCS1GPRs.erase(CI: FPPos);
2533 NumGPRSpills++;
2534 if (FramePtr == ARM::R7)
2535 CS1Spilled = true;
2536 }
2537
2538 // This is the number of extra spills inserted for callee-save GPRs which
2539 // would not otherwise be used by the function. When greater than zero it
2540 // guaranteees that it is possible to scavenge a register to hold the
2541 // address of a stack slot. On Thumb1, the register must be a valid operand
2542 // to tSTRi, i.e. r4-r7. For other subtargets, this is any GPR, i.e. r4-r11
2543 // or lr.
2544 //
2545 // If we don't insert a spill, we instead allocate an emergency spill
2546 // slot, which can be used by scavenging to spill an arbitrary register.
2547 //
2548 // We currently don't try to figure out whether any specific instruction
2549 // requires scavening an additional register.
2550 unsigned NumExtraCSSpill = 0;
2551
2552 if (AFI->isThumb1OnlyFunction()) {
2553 // For Thumb1-only targets, we need some low registers when we save and
2554 // restore the high registers (which aren't allocatable, but could be
2555 // used by inline assembly) because the push/pop instructions can not
2556 // access high registers. If necessary, we might need to push more low
2557 // registers to ensure that there is at least one free that can be used
2558 // for the saving & restoring, and preferably we should ensure that as
2559 // many as are needed are available so that fewer push/pop instructions
2560 // are required.
2561
2562 // Low registers which are not currently pushed, but could be (r4-r7).
2563 SmallVector<unsigned, 4> AvailableRegs;
2564
2565 // Unused argument registers (r0-r3) can be clobbered in the prologue for
2566 // free.
2567 int EntryRegDeficit = 0;
2568 for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
2569 if (!MF.getRegInfo().isLiveIn(Reg)) {
2570 --EntryRegDeficit;
2571 LLVM_DEBUG(dbgs()
2572 << printReg(Reg, TRI)
2573 << " is unused argument register, EntryRegDeficit = "
2574 << EntryRegDeficit << "\n");
2575 }
2576 }
2577
2578 // Unused return registers can be clobbered in the epilogue for free.
2579 int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
2580 LLVM_DEBUG(dbgs() << AFI->getReturnRegsCount()
2581 << " return regs used, ExitRegDeficit = "
2582 << ExitRegDeficit << "\n");
2583
2584 int RegDeficit = std::max(a: EntryRegDeficit, b: ExitRegDeficit);
2585 LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
2586
2587 // r4-r6 can be used in the prologue if they are pushed by the first push
2588 // instruction.
2589 for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
2590 if (SavedRegs.test(Reg)) {
2591 --RegDeficit;
2592 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2593 << " is saved low register, RegDeficit = "
2594 << RegDeficit << "\n");
2595 } else {
2596 AvailableRegs.push_back(Reg);
2597 LLVM_DEBUG(
2598 dbgs()
2599 << printReg(Reg, TRI)
2600 << " is non-saved low register, adding to AvailableRegs\n");
2601 }
2602 }
2603
2604 // r7 can be used if it is not being used as the frame pointer.
2605 if (!HasFP || FramePtr != ARM::R7) {
2606 if (SavedRegs.test(ARM::R7)) {
2607 --RegDeficit;
2608 LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
2609 << RegDeficit << "\n");
2610 } else {
2611 AvailableRegs.push_back(ARM::R7);
2612 LLVM_DEBUG(
2613 dbgs()
2614 << "%r7 is non-saved low register, adding to AvailableRegs\n");
2615 }
2616 }
2617
2618 // Each of r8-r11 needs to be copied to a low register, then pushed.
2619 for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
2620 if (SavedRegs.test(Reg)) {
2621 ++RegDeficit;
2622 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2623 << " is saved high register, RegDeficit = "
2624 << RegDeficit << "\n");
2625 }
2626 }
2627
2628 // LR can only be used by PUSH, not POP, and can't be used at all if the
2629 // llvm.returnaddress intrinsic is used. This is only worth doing if we
2630 // are more limited at function entry than exit.
2631 if ((EntryRegDeficit > ExitRegDeficit) &&
2632 !(MF.getRegInfo().isLiveIn(ARM::LR) &&
2633 MF.getFrameInfo().isReturnAddressTaken())) {
2634 if (SavedRegs.test(ARM::LR)) {
2635 --RegDeficit;
2636 LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
2637 << RegDeficit << "\n");
2638 } else {
2639 AvailableRegs.push_back(ARM::LR);
2640 LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
2641 }
2642 }
2643
2644 // If there are more high registers that need pushing than low registers
2645 // available, push some more low registers so that we can use fewer push
2646 // instructions. This might not reduce RegDeficit all the way to zero,
2647 // because we can only guarantee that r4-r6 are available, but r8-r11 may
2648 // need saving.
2649 LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
2650 for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
2651 unsigned Reg = AvailableRegs.pop_back_val();
2652 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2653 << " to make up reg deficit\n");
2654 SavedRegs.set(Reg);
2655 NumGPRSpills++;
2656 CS1Spilled = true;
2657 assert(!MRI.isReserved(Reg) && "Should not be reserved");
2658 if (Reg != ARM::LR && !MRI.isPhysRegUsed(Reg))
2659 NumExtraCSSpill++;
2660 UnspilledCS1GPRs.erase(CI: llvm::find(Range&: UnspilledCS1GPRs, Val: Reg));
2661 if (Reg == ARM::LR)
2662 LRSpilled = true;
2663 }
2664 LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
2665 << "\n");
2666 }
2667
2668 // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
2669 // restore LR in that case.
2670 bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
2671
2672 // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
2673 // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
2674 if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
2675 SavedRegs.set(ARM::LR);
2676 NumGPRSpills++;
2677 SmallVectorImpl<unsigned>::iterator LRPos;
2678 LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
2679 if (LRPos != UnspilledCS1GPRs.end())
2680 UnspilledCS1GPRs.erase(CI: LRPos);
2681
2682 ForceLRSpill = false;
2683 if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR) &&
2684 !AFI->isThumb1OnlyFunction())
2685 NumExtraCSSpill++;
2686 }
2687
2688 // If stack and double are 8-byte aligned and we are spilling an odd number
2689 // of GPRs, spill one extra callee save GPR so we won't have to pad between
2690 // the integer and double callee save areas.
2691 LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
2692 const Align TargetAlign = getStackAlign();
2693 if (TargetAlign >= Align(8) && (NumGPRSpills & 1)) {
2694 if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
2695 for (unsigned Reg : UnspilledCS1GPRs) {
2696 // Don't spill high register if the function is thumb. In the case of
2697 // Windows on ARM, accept R11 (frame pointer)
2698 if (!AFI->isThumbFunction() ||
2699 (STI.isTargetWindows() && Reg == ARM::R11) ||
2700 isARMLowRegister(Reg) ||
2701 (Reg == ARM::LR && !ExpensiveLRRestore)) {
2702 SavedRegs.set(Reg);
2703 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2704 << " to make up alignment\n");
2705 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg) &&
2706 !(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
2707 NumExtraCSSpill++;
2708 break;
2709 }
2710 }
2711 } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
2712 unsigned Reg = UnspilledCS2GPRs.front();
2713 SavedRegs.set(Reg);
2714 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2715 << " to make up alignment\n");
2716 if (!MRI.isReserved(PhysReg: Reg) && !MRI.isPhysRegUsed(PhysReg: Reg))
2717 NumExtraCSSpill++;
2718 }
2719 }
2720
2721 // Estimate if we might need to scavenge registers at some point in order
2722 // to materialize a stack offset. If so, either spill one additional
2723 // callee-saved register or reserve a special spill slot to facilitate
2724 // register scavenging. Thumb1 needs a spill slot for stack pointer
2725 // adjustments and for frame index accesses when FP is high register,
2726 // even when the frame itself is small.
2727 unsigned RegsNeeded = 0;
2728 if (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, TFI: *this)) {
2729 RegsNeeded++;
2730 // With thumb1 execute-only we may need an additional register for saving
2731 // and restoring the CPSR.
2732 if (AFI->isThumb1OnlyFunction() && STI.genExecuteOnly() && !STI.useMovt())
2733 RegsNeeded++;
2734 }
2735
2736 if (RegsNeeded > NumExtraCSSpill) {
2737 // If any non-reserved CS register isn't spilled, just spill one or two
2738 // extra. That should take care of it!
2739 unsigned NumExtras = TargetAlign.value() / 4;
2740 SmallVector<unsigned, 2> Extras;
2741 while (NumExtras && !UnspilledCS1GPRs.empty()) {
2742 unsigned Reg = UnspilledCS1GPRs.pop_back_val();
2743 if (!MRI.isReserved(PhysReg: Reg) &&
2744 (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
2745 Extras.push_back(Elt: Reg);
2746 NumExtras--;
2747 }
2748 }
2749 // For non-Thumb1 functions, also check for hi-reg CS registers
2750 if (!AFI->isThumb1OnlyFunction()) {
2751 while (NumExtras && !UnspilledCS2GPRs.empty()) {
2752 unsigned Reg = UnspilledCS2GPRs.pop_back_val();
2753 if (!MRI.isReserved(PhysReg: Reg)) {
2754 Extras.push_back(Elt: Reg);
2755 NumExtras--;
2756 }
2757 }
2758 }
2759 if (NumExtras == 0) {
2760 for (unsigned Reg : Extras) {
2761 SavedRegs.set(Reg);
2762 if (!MRI.isPhysRegUsed(PhysReg: Reg))
2763 NumExtraCSSpill++;
2764 }
2765 }
2766 while ((RegsNeeded > NumExtraCSSpill) && RS) {
2767 // Reserve a slot closest to SP or frame pointer.
2768 LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
2769 const TargetRegisterClass &RC = ARM::GPRRegClass;
2770 unsigned Size = TRI->getSpillSize(RC);
2771 Align Alignment = TRI->getSpillAlign(RC);
2772 RS->addScavengingFrameIndex(
2773 FI: MFI.CreateStackObject(Size, Alignment, isSpillSlot: false));
2774 --RegsNeeded;
2775 }
2776 }
2777 }
2778
2779 if (ForceLRSpill)
2780 SavedRegs.set(ARM::LR);
2781 AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
2782}
2783
2784void ARMFrameLowering::updateLRRestored(MachineFunction &MF) {
2785 MachineFrameInfo &MFI = MF.getFrameInfo();
2786 if (!MFI.isCalleeSavedInfoValid())
2787 return;
2788
2789 // Check if all terminators do not implicitly use LR. Then we can 'restore' LR
2790 // into PC so it is not live out of the return block: Clear the Restored bit
2791 // in that case.
2792 for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
2793 if (Info.getReg() != ARM::LR)
2794 continue;
2795 if (all_of(Range&: MF, P: [](const MachineBasicBlock &MBB) {
2796 return all_of(Range: MBB.terminators(), P: [](const MachineInstr &Term) {
2797 return !Term.isReturn() || Term.getOpcode() == ARM::LDMIA_RET ||
2798 Term.getOpcode() == ARM::t2LDMIA_RET ||
2799 Term.getOpcode() == ARM::tPOP_RET;
2800 });
2801 })) {
2802 Info.setRestored(false);
2803 break;
2804 }
2805 }
2806}
2807
2808void ARMFrameLowering::processFunctionBeforeFrameFinalized(
2809 MachineFunction &MF, RegScavenger *RS) const {
2810 TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS);
2811 updateLRRestored(MF);
2812}
2813
2814void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF,
2815 BitVector &SavedRegs) const {
2816 TargetFrameLowering::getCalleeSaves(MF, SavedRegs);
2817
2818 // If we have the "returned" parameter attribute which guarantees that we
2819 // return the value which was passed in r0 unmodified (e.g. C++ 'structors),
2820 // record that fact for IPRA.
2821 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2822 if (AFI->getPreservesR0())
2823 SavedRegs.set(ARM::R0);
2824}
2825
2826bool ARMFrameLowering::assignCalleeSavedSpillSlots(
2827 MachineFunction &MF, const TargetRegisterInfo *TRI,
2828 std::vector<CalleeSavedInfo> &CSI) const {
2829 // For CMSE entry functions, handle floating-point context as if it was a
2830 // callee-saved register.
2831 if (STI.hasV8_1MMainlineOps() &&
2832 MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction()) {
2833 CSI.emplace_back(ARM::FPCXTNS);
2834 CSI.back().setRestored(false);
2835 }
2836
2837 // For functions, which sign their return address, upon function entry, the
2838 // return address PAC is computed in R12. Treat R12 as a callee-saved register
2839 // in this case.
2840 const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
2841 if (AFI.shouldSignReturnAddress()) {
2842 // The order of register must match the order we push them, because the
2843 // PEI assigns frame indices in that order. When compiling for return
2844 // address sign and authenication, we use split push, therefore the orders
2845 // we want are:
2846 // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
2847 CSI.insert(find_if(CSI,
2848 [=](const auto &CS) {
2849 Register Reg = CS.getReg();
2850 return Reg == ARM::R10 || Reg == ARM::R11 ||
2851 Reg == ARM::R8 || Reg == ARM::R9 ||
2852 ARM::DPRRegClass.contains(Reg);
2853 }),
2854 CalleeSavedInfo(ARM::R12));
2855 }
2856
2857 return false;
2858}
2859
2860const TargetFrameLowering::SpillSlot *
2861ARMFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
2862 static const SpillSlot FixedSpillOffsets[] = {{ARM::FPCXTNS, -4}};
2863 NumEntries = std::size(FixedSpillOffsets);
2864 return FixedSpillOffsets;
2865}
2866
2867MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
2868 MachineFunction &MF, MachineBasicBlock &MBB,
2869 MachineBasicBlock::iterator I) const {
2870 const ARMBaseInstrInfo &TII =
2871 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2872 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2873 bool isARM = !AFI->isThumbFunction();
2874 DebugLoc dl = I->getDebugLoc();
2875 unsigned Opc = I->getOpcode();
2876 bool IsDestroy = Opc == TII.getCallFrameDestroyOpcode();
2877 unsigned CalleePopAmount = IsDestroy ? I->getOperand(i: 1).getImm() : 0;
2878
2879 assert(!AFI->isThumb1OnlyFunction() &&
2880 "This eliminateCallFramePseudoInstr does not support Thumb1!");
2881
2882 int PIdx = I->findFirstPredOperandIdx();
2883 ARMCC::CondCodes Pred = (PIdx == -1)
2884 ? ARMCC::AL
2885 : (ARMCC::CondCodes)I->getOperand(i: PIdx).getImm();
2886 unsigned PredReg = TII.getFramePred(MI: *I);
2887
2888 if (!hasReservedCallFrame(MF)) {
2889 // Bail early if the callee is expected to do the adjustment.
2890 if (IsDestroy && CalleePopAmount != -1U)
2891 return MBB.erase(I);
2892
2893 // If we have alloca, convert as follows:
2894 // ADJCALLSTACKDOWN -> sub, sp, sp, amount
2895 // ADJCALLSTACKUP -> add, sp, sp, amount
2896 unsigned Amount = TII.getFrameSize(*I);
2897 if (Amount != 0) {
2898 // We need to keep the stack aligned properly. To do this, we round the
2899 // amount of space needed for the outgoing arguments up to the next
2900 // alignment boundary.
2901 Amount = alignSPAdjust(SPAdj: Amount);
2902
2903 if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
2904 emitSPUpdate(isARM, MBB, MBBI&: I, dl, TII, NumBytes: -Amount, MIFlags: MachineInstr::NoFlags,
2905 Pred, PredReg);
2906 } else {
2907 assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
2908 emitSPUpdate(isARM, MBB, MBBI&: I, dl, TII, NumBytes: Amount, MIFlags: MachineInstr::NoFlags,
2909 Pred, PredReg);
2910 }
2911 }
2912 } else if (CalleePopAmount != -1U) {
2913 // If the calling convention demands that the callee pops arguments from the
2914 // stack, we want to add it back if we have a reserved call frame.
2915 emitSPUpdate(isARM, MBB, MBBI&: I, dl, TII, NumBytes: -CalleePopAmount,
2916 MIFlags: MachineInstr::NoFlags, Pred, PredReg);
2917 }
2918 return MBB.erase(I);
2919}
2920
2921/// Get the minimum constant for ARM that is greater than or equal to the
2922/// argument. In ARM, constants can have any value that can be produced by
2923/// rotating an 8-bit value to the right by an even number of bits within a
2924/// 32-bit word.
2925static uint32_t alignToARMConstant(uint32_t Value) {
2926 unsigned Shifted = 0;
2927
2928 if (Value == 0)
2929 return 0;
2930
2931 while (!(Value & 0xC0000000)) {
2932 Value = Value << 2;
2933 Shifted += 2;
2934 }
2935
2936 bool Carry = (Value & 0x00FFFFFF);
2937 Value = ((Value & 0xFF000000) >> 24) + Carry;
2938
2939 if (Value & 0x0000100)
2940 Value = Value & 0x000001FC;
2941
2942 if (Shifted > 24)
2943 Value = Value >> (Shifted - 24);
2944 else
2945 Value = Value << (24 - Shifted);
2946
2947 return Value;
2948}
2949
2950// The stack limit in the TCB is set to this many bytes above the actual
2951// stack limit.
2952static const uint64_t kSplitStackAvailable = 256;
2953
2954// Adjust the function prologue to enable split stacks. This currently only
2955// supports android and linux.
2956//
2957// The ABI of the segmented stack prologue is a little arbitrarily chosen, but
2958// must be well defined in order to allow for consistent implementations of the
2959// __morestack helper function. The ABI is also not a normal ABI in that it
2960// doesn't follow the normal calling conventions because this allows the
2961// prologue of each function to be optimized further.
2962//
2963// Currently, the ABI looks like (when calling __morestack)
2964//
2965// * r4 holds the minimum stack size requested for this function call
2966// * r5 holds the stack size of the arguments to the function
2967// * the beginning of the function is 3 instructions after the call to
2968// __morestack
2969//
2970// Implementations of __morestack should use r4 to allocate a new stack, r5 to
2971// place the arguments on to the new stack, and the 3-instruction knowledge to
2972// jump directly to the body of the function when working on the new stack.
2973//
2974// An old (and possibly no longer compatible) implementation of __morestack for
2975// ARM can be found at [1].
2976//
2977// [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
2978void ARMFrameLowering::adjustForSegmentedStacks(
2979 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2980 unsigned Opcode;
2981 unsigned CFIIndex;
2982 const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
2983 bool Thumb = ST->isThumb();
2984 bool Thumb2 = ST->isThumb2();
2985
2986 // Sadly, this currently doesn't support varargs, platforms other than
2987 // android/linux. Note that thumb1/thumb2 are support for android/linux.
2988 if (MF.getFunction().isVarArg())
2989 report_fatal_error(reason: "Segmented stacks do not support vararg functions.");
2990 if (!ST->isTargetAndroid() && !ST->isTargetLinux())
2991 report_fatal_error(reason: "Segmented stacks not supported on this platform.");
2992
2993 MachineFrameInfo &MFI = MF.getFrameInfo();
2994 MachineModuleInfo &MMI = MF.getMMI();
2995 MCContext &Context = MMI.getContext();
2996 const MCRegisterInfo *MRI = Context.getRegisterInfo();
2997 const ARMBaseInstrInfo &TII =
2998 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2999 ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
3000 DebugLoc DL;
3001
3002 if (!MFI.needsSplitStackProlog())
3003 return;
3004
3005 uint64_t StackSize = MFI.getStackSize();
3006
3007 // Use R4 and R5 as scratch registers.
3008 // We save R4 and R5 before use and restore them before leaving the function.
3009 unsigned ScratchReg0 = ARM::R4;
3010 unsigned ScratchReg1 = ARM::R5;
3011 unsigned MovOp = ST->useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
3012 uint64_t AlignedStackSize;
3013
3014 MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
3015 MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
3016 MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();
3017 MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();
3018 MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();
3019
3020 // Grab everything that reaches PrologueMBB to update there liveness as well.
3021 SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
3022 SmallVector<MachineBasicBlock *, 2> WalkList;
3023 WalkList.push_back(Elt: &PrologueMBB);
3024
3025 do {
3026 MachineBasicBlock *CurMBB = WalkList.pop_back_val();
3027 for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
3028 if (BeforePrologueRegion.insert(Ptr: PredBB).second)
3029 WalkList.push_back(Elt: PredBB);
3030 }
3031 } while (!WalkList.empty());
3032
3033 // The order in that list is important.
3034 // The blocks will all be inserted before PrologueMBB using that order.
3035 // Therefore the block that should appear first in the CFG should appear
3036 // first in the list.
3037 MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
3038 PostStackMBB};
3039
3040 for (MachineBasicBlock *B : AddedBlocks)
3041 BeforePrologueRegion.insert(Ptr: B);
3042
3043 for (const auto &LI : PrologueMBB.liveins()) {
3044 for (MachineBasicBlock *PredBB : BeforePrologueRegion)
3045 PredBB->addLiveIn(RegMaskPair: LI);
3046 }
3047
3048 // Remove the newly added blocks from the list, since we know
3049 // we do not have to do the following updates for them.
3050 for (MachineBasicBlock *B : AddedBlocks) {
3051 BeforePrologueRegion.erase(Ptr: B);
3052 MF.insert(MBBI: PrologueMBB.getIterator(), MBB: B);
3053 }
3054
3055 for (MachineBasicBlock *MBB : BeforePrologueRegion) {
3056 // Make sure the LiveIns are still sorted and unique.
3057 MBB->sortUniqueLiveIns();
3058 // Replace the edges to PrologueMBB by edges to the sequences
3059 // we are about to add, but only update for immediate predecessors.
3060 if (MBB->isSuccessor(MBB: &PrologueMBB))
3061 MBB->ReplaceUsesOfBlockWith(Old: &PrologueMBB, New: AddedBlocks[0]);
3062 }
3063
3064 // The required stack size that is aligned to ARM constant criterion.
3065 AlignedStackSize = alignToARMConstant(Value: StackSize);
3066
3067 // When the frame size is less than 256 we just compare the stack
3068 // boundary directly to the value of the stack pointer, per gcc.
3069 bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
3070
3071 // We will use two of the callee save registers as scratch registers so we
3072 // need to save those registers onto the stack.
3073 // We will use SR0 to hold stack limit and SR1 to hold the stack size
3074 // requested and arguments for __morestack().
3075 // SR0: Scratch Register #0
3076 // SR1: Scratch Register #1
3077 // push {SR0, SR1}
3078 if (Thumb) {
3079 BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
3080 .add(predOps(ARMCC::AL))
3081 .addReg(ScratchReg0)
3082 .addReg(ScratchReg1);
3083 } else {
3084 BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
3085 .addReg(ARM::SP, RegState::Define)
3086 .addReg(ARM::SP)
3087 .add(predOps(ARMCC::AL))
3088 .addReg(ScratchReg0)
3089 .addReg(ScratchReg1);
3090 }
3091
3092 // Emit the relevant DWARF information about the change in stack pointer as
3093 // well as where to find both r4 and r5 (the callee-save registers)
3094 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3095 CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: 8));
3096 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3097 .addCFIIndex(CFIIndex);
3098 CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createOffset(
3099 L: nullptr, Register: MRI->getDwarfRegNum(RegNum: ScratchReg1, isEH: true), Offset: -4));
3100 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3101 .addCFIIndex(CFIIndex);
3102 CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createOffset(
3103 L: nullptr, Register: MRI->getDwarfRegNum(RegNum: ScratchReg0, isEH: true), Offset: -8));
3104 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3105 .addCFIIndex(CFIIndex);
3106 }
3107
3108 // mov SR1, sp
3109 if (Thumb) {
3110 BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
3111 .addReg(ARM::SP)
3112 .add(predOps(ARMCC::AL));
3113 } else if (CompareStackPointer) {
3114 BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
3115 .addReg(ARM::SP)
3116 .add(predOps(ARMCC::AL))
3117 .add(condCodeOp());
3118 }
3119
3120 // sub SR1, sp, #StackSize
3121 if (!CompareStackPointer && Thumb) {
3122 if (AlignedStackSize < 256) {
3123 BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
3124 .add(condCodeOp())
3125 .addReg(ScratchReg1)
3126 .addImm(AlignedStackSize)
3127 .add(predOps(ARMCC::AL));
3128 } else {
3129 if (Thumb2 || ST->genExecuteOnly()) {
3130 BuildMI(McrMBB, DL, TII.get(MovOp), ScratchReg0)
3131 .addImm(AlignedStackSize);
3132 } else {
3133 auto MBBI = McrMBB->end();
3134 auto RegInfo = STI.getRegisterInfo();
3135 RegInfo->emitLoadConstPool(MBB&: *McrMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: 0,
3136 Val: AlignedStackSize);
3137 }
3138 BuildMI(McrMBB, DL, TII.get(ARM::tSUBrr), ScratchReg1)
3139 .add(condCodeOp())
3140 .addReg(ScratchReg1)
3141 .addReg(ScratchReg0)
3142 .add(predOps(ARMCC::AL));
3143 }
3144 } else if (!CompareStackPointer) {
3145 if (AlignedStackSize < 256) {
3146 BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
3147 .addReg(ARM::SP)
3148 .addImm(AlignedStackSize)
3149 .add(predOps(ARMCC::AL))
3150 .add(condCodeOp());
3151 } else {
3152 auto MBBI = McrMBB->end();
3153 auto RegInfo = STI.getRegisterInfo();
3154 RegInfo->emitLoadConstPool(MBB&: *McrMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: 0,
3155 Val: AlignedStackSize);
3156 BuildMI(McrMBB, DL, TII.get(ARM::SUBrr), ScratchReg1)
3157 .addReg(ARM::SP)
3158 .addReg(ScratchReg0)
3159 .add(predOps(ARMCC::AL))
3160 .add(condCodeOp());
3161 }
3162 }
3163
3164 if (Thumb && ST->isThumb1Only()) {
3165 if (ST->genExecuteOnly()) {
3166 BuildMI(GetMBB, DL, TII.get(MovOp), ScratchReg0)
3167 .addExternalSymbol("__STACK_LIMIT");
3168 } else {
3169 unsigned PCLabelId = ARMFI->createPICLabelUId();
3170 ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create(
3171 C&: MF.getFunction().getContext(), s: "__STACK_LIMIT", ID: PCLabelId, PCAdj: 0);
3172 MachineConstantPool *MCP = MF.getConstantPool();
3173 unsigned CPI = MCP->getConstantPoolIndex(V: NewCPV, Alignment: Align(4));
3174
3175 // ldr SR0, [pc, offset(STACK_LIMIT)]
3176 BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
3177 .addConstantPoolIndex(CPI)
3178 .add(predOps(ARMCC::AL));
3179 }
3180
3181 // ldr SR0, [SR0]
3182 BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
3183 .addReg(ScratchReg0)
3184 .addImm(0)
3185 .add(predOps(ARMCC::AL));
3186 } else {
3187 // Get TLS base address from the coprocessor
3188 // mrc p15, #0, SR0, c13, c0, #3
3189 BuildMI(McrMBB, DL, TII.get(Thumb ? ARM::t2MRC : ARM::MRC),
3190 ScratchReg0)
3191 .addImm(15)
3192 .addImm(0)
3193 .addImm(13)
3194 .addImm(0)
3195 .addImm(3)
3196 .add(predOps(ARMCC::AL));
3197
3198 // Use the last tls slot on android and a private field of the TCP on linux.
3199 assert(ST->isTargetAndroid() || ST->isTargetLinux());
3200 unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
3201
3202 // Get the stack limit from the right offset
3203 // ldr SR0, [sr0, #4 * TlsOffset]
3204 BuildMI(GetMBB, DL, TII.get(Thumb ? ARM::t2LDRi12 : ARM::LDRi12),
3205 ScratchReg0)
3206 .addReg(ScratchReg0)
3207 .addImm(4 * TlsOffset)
3208 .add(predOps(ARMCC::AL));
3209 }
3210
3211 // Compare stack limit with stack size requested.
3212 // cmp SR0, SR1
3213 Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
3214 BuildMI(GetMBB, DL, TII.get(Opcode))
3215 .addReg(ScratchReg0)
3216 .addReg(ScratchReg1)
3217 .add(predOps(Pred: ARMCC::AL));
3218
3219 // This jump is taken if StackLimit <= SP - stack required.
3220 Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
3221 BuildMI(GetMBB, DL, TII.get(Opcode))
3222 .addMBB(PostStackMBB)
3223 .addImm(ARMCC::LS)
3224 .addReg(ARM::CPSR);
3225
3226 // Calling __morestack(StackSize, Size of stack arguments).
3227 // __morestack knows that the stack size requested is in SR0(r4)
3228 // and amount size of stack arguments is in SR1(r5).
3229
3230 // Pass first argument for the __morestack by Scratch Register #0.
3231 // The amount size of stack required
3232 if (Thumb) {
3233 if (AlignedStackSize < 256) {
3234 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
3235 .add(condCodeOp())
3236 .addImm(AlignedStackSize)
3237 .add(predOps(ARMCC::AL));
3238 } else {
3239 if (Thumb2 || ST->genExecuteOnly()) {
3240 BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg0)
3241 .addImm(AlignedStackSize);
3242 } else {
3243 auto MBBI = AllocMBB->end();
3244 auto RegInfo = STI.getRegisterInfo();
3245 RegInfo->emitLoadConstPool(MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: 0,
3246 Val: AlignedStackSize);
3247 }
3248 }
3249 } else {
3250 if (AlignedStackSize < 256) {
3251 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
3252 .addImm(AlignedStackSize)
3253 .add(predOps(ARMCC::AL))
3254 .add(condCodeOp());
3255 } else {
3256 auto MBBI = AllocMBB->end();
3257 auto RegInfo = STI.getRegisterInfo();
3258 RegInfo->emitLoadConstPool(MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: 0,
3259 Val: AlignedStackSize);
3260 }
3261 }
3262
3263 // Pass second argument for the __morestack by Scratch Register #1.
3264 // The amount size of stack consumed to save function arguments.
3265 if (Thumb) {
3266 if (ARMFI->getArgumentStackSize() < 256) {
3267 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
3268 .add(condCodeOp())
3269 .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
3270 .add(predOps(ARMCC::AL));
3271 } else {
3272 if (Thumb2 || ST->genExecuteOnly()) {
3273 BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg1)
3274 .addImm(alignToARMConstant(Value: ARMFI->getArgumentStackSize()));
3275 } else {
3276 auto MBBI = AllocMBB->end();
3277 auto RegInfo = STI.getRegisterInfo();
3278 RegInfo->emitLoadConstPool(
3279 MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg1, SubIdx: 0,
3280 Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()));
3281 }
3282 }
3283 } else {
3284 if (alignToARMConstant(Value: ARMFI->getArgumentStackSize()) < 256) {
3285 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
3286 .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
3287 .add(predOps(ARMCC::AL))
3288 .add(condCodeOp());
3289 } else {
3290 auto MBBI = AllocMBB->end();
3291 auto RegInfo = STI.getRegisterInfo();
3292 RegInfo->emitLoadConstPool(
3293 MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg1, SubIdx: 0,
3294 Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()));
3295 }
3296 }
3297
3298 // push {lr} - Save return address of this function.
3299 if (Thumb) {
3300 BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
3301 .add(predOps(ARMCC::AL))
3302 .addReg(ARM::LR);
3303 } else {
3304 BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
3305 .addReg(ARM::SP, RegState::Define)
3306 .addReg(ARM::SP)
3307 .add(predOps(ARMCC::AL))
3308 .addReg(ARM::LR);
3309 }
3310
3311 // Emit the DWARF info about the change in stack as well as where to find the
3312 // previous link register
3313 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3314 CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: 12));
3315 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3316 .addCFIIndex(CFIIndex);
3317 CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
3318 nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
3319 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3320 .addCFIIndex(CFIIndex);
3321 }
3322
3323 // Call __morestack().
3324 if (Thumb) {
3325 BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
3326 .add(predOps(ARMCC::AL))
3327 .addExternalSymbol("__morestack");
3328 } else {
3329 BuildMI(AllocMBB, DL, TII.get(ARM::BL))
3330 .addExternalSymbol("__morestack");
3331 }
3332
3333 // pop {lr} - Restore return address of this original function.
3334 if (Thumb) {
3335 if (ST->isThumb1Only()) {
3336 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3337 .add(predOps(ARMCC::AL))
3338 .addReg(ScratchReg0);
3339 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
3340 .addReg(ScratchReg0)
3341 .add(predOps(ARMCC::AL));
3342 } else {
3343 BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
3344 .addReg(ARM::LR, RegState::Define)
3345 .addReg(ARM::SP, RegState::Define)
3346 .addReg(ARM::SP)
3347 .addImm(4)
3348 .add(predOps(ARMCC::AL));
3349 }
3350 } else {
3351 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3352 .addReg(ARM::SP, RegState::Define)
3353 .addReg(ARM::SP)
3354 .add(predOps(ARMCC::AL))
3355 .addReg(ARM::LR);
3356 }
3357
3358 // Restore SR0 and SR1 in case of __morestack() was called.
3359 // __morestack() will skip PostStackMBB block so we need to restore
3360 // scratch registers from here.
3361 // pop {SR0, SR1}
3362 if (Thumb) {
3363 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3364 .add(predOps(ARMCC::AL))
3365 .addReg(ScratchReg0)
3366 .addReg(ScratchReg1);
3367 } else {
3368 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3369 .addReg(ARM::SP, RegState::Define)
3370 .addReg(ARM::SP)
3371 .add(predOps(ARMCC::AL))
3372 .addReg(ScratchReg0)
3373 .addReg(ScratchReg1);
3374 }
3375
3376 // Update the CFA offset now that we've popped
3377 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3378 CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: 0));
3379 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3380 .addCFIIndex(CFIIndex);
3381 }
3382
3383 // Return from this function.
3384 BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(Pred: ARMCC::AL));
3385
3386 // Restore SR0 and SR1 in case of __morestack() was not called.
3387 // pop {SR0, SR1}
3388 if (Thumb) {
3389 BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
3390 .add(predOps(ARMCC::AL))
3391 .addReg(ScratchReg0)
3392 .addReg(ScratchReg1);
3393 } else {
3394 BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
3395 .addReg(ARM::SP, RegState::Define)
3396 .addReg(ARM::SP)
3397 .add(predOps(ARMCC::AL))
3398 .addReg(ScratchReg0)
3399 .addReg(ScratchReg1);
3400 }
3401
3402 // Update the CFA offset now that we've popped
3403 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3404 CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: 0));
3405 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3406 .addCFIIndex(CFIIndex);
3407
3408 // Tell debuggers that r4 and r5 are now the same as they were in the
3409 // previous function, that they're the "Same Value".
3410 CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createSameValue(
3411 L: nullptr, Register: MRI->getDwarfRegNum(RegNum: ScratchReg0, isEH: true)));
3412 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3413 .addCFIIndex(CFIIndex);
3414 CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createSameValue(
3415 L: nullptr, Register: MRI->getDwarfRegNum(RegNum: ScratchReg1, isEH: true)));
3416 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3417 .addCFIIndex(CFIIndex);
3418 }
3419
3420 // Organizing MBB lists
3421 PostStackMBB->addSuccessor(Succ: &PrologueMBB);
3422
3423 AllocMBB->addSuccessor(Succ: PostStackMBB);
3424
3425 GetMBB->addSuccessor(Succ: PostStackMBB);
3426 GetMBB->addSuccessor(Succ: AllocMBB);
3427
3428 McrMBB->addSuccessor(Succ: GetMBB);
3429
3430 PrevStackMBB->addSuccessor(Succ: McrMBB);
3431
3432#ifdef EXPENSIVE_CHECKS
3433 MF.verify();
3434#endif
3435}
3436

source code of llvm/lib/Target/ARM/ARMFrameLowering.cpp