ARMFrameLowering.cpp source code [llvm/lib/Target/ARM/ARMFrameLowering.cpp]

1	//===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains the ARM implementation of TargetFrameLowering class.
10	//
11	//===----------------------------------------------------------------------===//
12	//
13	// This file contains the ARM implementation of TargetFrameLowering class.
14	//
15	// On ARM, stack frames are structured as follows:
16	//
17	// The stack grows downward.
18	//
19	// All of the individual frame areas on the frame below are optional, i.e. it's
20	// possible to create a function so that the particular area isn't present
21	// in the frame.
22	//
23	// At function entry, the "frame" looks as follows:
24	//
25	// \| \| Higher address
26	// \|-----------------------------------\|
27	// \| \|
28	// \| arguments passed on the stack \|
29	// \| \|
30	// \|-----------------------------------\| <- sp
31	// \| \| Lower address
32	//
33	//
34	// After the prologue has run, the frame has the following general structure.
35	// Technically the last frame area (VLAs) doesn't get created until in the
36	// main function body, after the prologue is run. However, it's depicted here
37	// for completeness.
38	//
39	// \| \| Higher address
40	// \|-----------------------------------\|
41	// \| \|
42	// \| arguments passed on the stack \|
43	// \| \|
44	// \|-----------------------------------\| <- (sp at function entry)
45	// \| \|
46	// \| varargs from registers \|
47	// \| \|
48	// \|-----------------------------------\|
49	// \| \|
50	// \| prev_lr \|
51	// \| prev_fp \|
52	// \| (a.k.a. "frame record") \|
53	// \| \|
54	// \|- - - - - - - - - - - - - - - - - -\| <- fp (r7 or r11)
55	// \| \|
56	// \| callee-saved gpr registers \|
57	// \| \|
58	// \|-----------------------------------\|
59	// \| \|
60	// \| callee-saved fp/simd regs \|
61	// \| \|
62	// \|-----------------------------------\|
63	// \|.empty.space.to.make.part.below....\|
64	// \|.aligned.in.case.it.needs.more.than\| (size of this area is unknown at
65	// \|.the.standard.8-byte.alignment.....\| compile time; if present)
66	// \|-----------------------------------\|
67	// \| \|
68	// \| local variables of fixed size \|
69	// \| including spill slots \|
70	// \|-----------------------------------\| <- base pointer (not defined by ABI,
71	// \|.variable-sized.local.variables....\| LLVM chooses r6)
72	// \|.(VLAs)............................\| (size of this area is unknown at
73	// \|...................................\| compile time)
74	// \|-----------------------------------\| <- sp
75	// \| \| Lower address
76	//
77	//
78	// To access the data in a frame, at-compile time, a constant offset must be
79	// computable from one of the pointers (fp, bp, sp) to access it. The size
80	// of the areas with a dotted background cannot be computed at compile-time
81	// if they are present, making it required to have all three of fp, bp and
82	// sp to be set up to be able to access all contents in the frame areas,
83	// assuming all of the frame areas are non-empty.
84	//
85	// For most functions, some of the frame areas are empty. For those functions,
86	// it may not be necessary to set up fp or bp:
87	// A base pointer is definitely needed when there are both VLAs and local*
88	// variables with more-than-default alignment requirements.
89	// A frame pointer is definitely needed when there are local variables with*
90	// more-than-default alignment requirements.
91	//
92	// In some cases when a base pointer is not strictly needed, it is generated
93	// anyway when offsets from the frame pointer to access local variables become
94	// so large that the offset can't be encoded in the immediate fields of loads
95	// or stores.
96	//
97	// The frame pointer might be chosen to be r7 or r11, depending on the target
98	// architecture and operating system. See ARMSubtarget::getFramePointerReg for
99	// details.
100	//
101	// Outgoing function arguments must be at the bottom of the stack frame when
102	// calling another function. If we do not have variable-sized stack objects, we
103	// can allocate a "reserved call frame" area at the bottom of the local
104	// variable area, large enough for all outgoing calls. If we do have VLAs, then
105	// the stack pointer must be decremented and incremented around each call to
106	// make space for the arguments below the VLAs.
107	//
108	//===----------------------------------------------------------------------===//
109
110	#include "ARMFrameLowering.h"
111	#include "ARMBaseInstrInfo.h"
112	#include "ARMBaseRegisterInfo.h"
113	#include "ARMConstantPoolValue.h"
114	#include "ARMMachineFunctionInfo.h"
115	#include "ARMSubtarget.h"
116	#include "MCTargetDesc/ARMAddressingModes.h"
117	#include "MCTargetDesc/ARMBaseInfo.h"
118	#include "Utils/ARMBaseInfo.h"
119	#include "llvm/ADT/BitVector.h"
120	#include "llvm/ADT/STLExtras.h"
121	#include "llvm/ADT/SmallPtrSet.h"
122	#include "llvm/ADT/SmallVector.h"
123	#include "llvm/CodeGen/MachineBasicBlock.h"
124	#include "llvm/CodeGen/MachineConstantPool.h"
125	#include "llvm/CodeGen/MachineFrameInfo.h"
126	#include "llvm/CodeGen/MachineFunction.h"
127	#include "llvm/CodeGen/MachineInstr.h"
128	#include "llvm/CodeGen/MachineInstrBuilder.h"
129	#include "llvm/CodeGen/MachineJumpTableInfo.h"
130	#include "llvm/CodeGen/MachineModuleInfo.h"
131	#include "llvm/CodeGen/MachineOperand.h"
132	#include "llvm/CodeGen/MachineRegisterInfo.h"
133	#include "llvm/CodeGen/RegisterScavenging.h"
134	#include "llvm/CodeGen/TargetInstrInfo.h"
135	#include "llvm/CodeGen/TargetOpcodes.h"
136	#include "llvm/CodeGen/TargetRegisterInfo.h"
137	#include "llvm/CodeGen/TargetSubtargetInfo.h"
138	#include "llvm/IR/Attributes.h"
139	#include "llvm/IR/CallingConv.h"
140	#include "llvm/IR/DebugLoc.h"
141	#include "llvm/IR/Function.h"
142	#include "llvm/MC/MCAsmInfo.h"
143	#include "llvm/MC/MCContext.h"
144	#include "llvm/MC/MCDwarf.h"
145	#include "llvm/MC/MCInstrDesc.h"
146	#include "llvm/MC/MCRegisterInfo.h"
147	#include "llvm/Support/CodeGen.h"
148	#include "llvm/Support/CommandLine.h"
149	#include "llvm/Support/Compiler.h"
150	#include "llvm/Support/Debug.h"
151	#include "llvm/Support/ErrorHandling.h"
152	#include "llvm/Support/MathExtras.h"
153	#include "llvm/Support/raw_ostream.h"
154	#include "llvm/Target/TargetMachine.h"
155	#include "llvm/Target/TargetOptions.h"
156	#include <algorithm>
157	#include <cassert>
158	#include <cstddef>
159	#include <cstdint>
160	#include <iterator>
161	#include <utility>
162	#include <vector>
163
164	#define DEBUG_TYPE "arm-frame-lowering"
165
166	using namespace llvm;
167
168	static cl::opt<bool>
169	SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(Val: true),
170	cl::desc ("Align ARM NEON spills in prolog and epilog"));
171
172	static MachineBasicBlock::iterator
173	skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
174	unsigned NumAlignedDPRCS2Regs);
175
176	ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
177	: TargetFrameLowering (StackGrowsDown, sti.getStackAlignment(), `0`, Align (`4`)),
178	STI(sti) {}
179
180	bool ARMFrameLowering::keepFramePointer(const MachineFunction &MF) const {
181	// iOS always has a FP for backtracking, force other targets to keep their FP
182	// when doing FastISel. The emitted code is currently superior, and in cases
183	// like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
184	return MF.getSubtarget<ARMSubtarget>().useFastISel();
185	}
186
187	/// Returns true if the target can safely skip saving callee-saved registers
188	/// for noreturn nounwind functions.
189	bool ARMFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const {
190	assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
191	MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
192	!MF.getFunction().hasFnAttribute(Attribute::UWTable));
193
194	// Frame pointer and link register are not treated as normal CSR, thus we
195	// can always skip CSR saves for nonreturning functions.
196	return true;
197	}
198
199	/// hasFP - Return true if the specified function should have a dedicated frame
200	/// pointer register. This is true if the function has variable sized allocas
201	/// or if frame pointer elimination is disabled.
202	bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
203	const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
204	const MachineFrameInfo &MFI = MF.getFrameInfo();
205
206	// ABI-required frame pointer.
207	if (MF.getTarget().Options.DisableFramePointerElim(MF))
208	return true;
209
210	// Frame pointer required for use within this function.
211	return (RegInfo->hasStackRealignment(MF) \|\| MFI.hasVarSizedObjects() \|\|
212	MFI.isFrameAddressTaken());
213	}
214
215	/// isFPReserved - Return true if the frame pointer register should be
216	/// considered a reserved register on the scope of the specified function.
217	bool ARMFrameLowering::isFPReserved(const MachineFunction &MF) const {
218	return hasFP(MF) \|\| MF.getSubtarget<ARMSubtarget>().createAAPCSFrameChain();
219	}
220
221	/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
222	/// not required, we reserve argument space for call sites in the function
223	/// immediately on entry to the current function. This eliminates the need for
224	/// add/sub sp brackets around call sites. Returns true if the call frame is
225	/// included as part of the stack frame.
226	bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
227	const MachineFrameInfo &MFI = MF.getFrameInfo();
228	unsigned CFSize = MFI.getMaxCallFrameSize();
229	// It's not always a good idea to include the call frame as part of the
230	// stack frame. ARM (especially Thumb) has small immediate offset to
231	// address the stack frame. So a large call frame can cause poor codegen
232	// and may even makes it impossible to scavenge a register.
233	if (CFSize >= ((`1` << `12`) - `1`) / `2`) // Half of imm12
234	return false;
235
236	return !MFI.hasVarSizedObjects();
237	}
238
239	/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
240	/// call frame pseudos can be simplified. Unlike most targets, having a FP
241	/// is not sufficient here since we still may reference some objects via SP
242	/// even when FP is available in Thumb2 mode.
243	bool
244	ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
245	return hasReservedCallFrame(MF) \|\| MF.getFrameInfo().hasVarSizedObjects();
246	}
247
248	// Returns how much of the incoming argument stack area we should clean up in an
249	// epilogue. For the C calling convention this will be 0, for guaranteed tail
250	// call conventions it can be positive (a normal return or a tail call to a
251	// function that uses less stack space for arguments) or negative (for a tail
252	// call to a function that needs more stack space than us for arguments).
253	static int getArgumentStackToRestore(MachineFunction &MF,
254	MachineBasicBlock &MBB) {
255	MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
256	bool IsTailCallReturn = false;
257	if (MBB.end() != MBBI) {
258	unsigned RetOpcode = MBBI ->getOpcode();
259	IsTailCallReturn = RetOpcode == ARM::TCRETURNdi \|\|
260	RetOpcode == ARM::TCRETURNri;
261	}
262	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
263
264	int ArgumentPopSize = `0`;
265	if (IsTailCallReturn) {
266	MachineOperand &StackAdjust = MBBI ->getOperand(i: `1`);
267
268	// For a tail-call in a callee-pops-arguments environment, some or all of
269	// the stack may actually be in use for the call's arguments, this is
270	// calculated during LowerCall and consumed here...
271	ArgumentPopSize = StackAdjust.getImm();
272	} else {
273	// ... otherwise the amount to pop is all* of the argument space,*
274	// conveniently stored in the MachineFunctionInfo by
275	// LowerFormalArguments. This will, of course, be zero for the C calling
276	// convention.
277	ArgumentPopSize = AFI->getArgumentStackToRestore();
278	}
279
280	return ArgumentPopSize;
281	}
282
283	static bool needsWinCFI(const MachineFunction &MF) {
284	const Function &F = MF.getFunction();
285	return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
286	F.needsUnwindTableEntry();
287	}
288
289	// Given a load or a store instruction, generate an appropriate unwinding SEH
290	// code on Windows.
291	static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI,
292	const TargetInstrInfo &TII,
293	unsigned Flags) {
294	unsigned Opc = MBBI ->getOpcode();
295	MachineBasicBlock *MBB = MBBI ->getParent();
296	MachineFunction &MF = *MBB->getParent();
297	DebugLoc DL = MBBI ->getDebugLoc();
298	MachineInstrBuilder MIB;
299	const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
300	const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
301
302	Flags \|= MachineInstr::NoMerge;
303
304	switch (Opc) {
305	default:
306	report_fatal_error(reason: "No SEH Opcode for instruction " + TII.getName(Opcode: Opc));
307	break;
308	case ARM::t2ADDri: // add.w r11, sp, #xx
309	case ARM::t2ADDri12: // add.w r11, sp, #xx
310	case ARM::t2MOVTi16: // movt r4, #xx
311	case ARM::tBL: // bl __chkstk
312	// These are harmless if used for just setting up a frame pointer,
313	// but that frame pointer can't be relied upon for unwinding, unless
314	// set up with SEH_SaveSP.
315	MIB = BuildMI(MF, DL, TII.get(ARM::Opcode: SEH_Nop))
316	.addImm(/Wide=/`1`)
317	.setMIFlags(Flags);
318	break;
319
320	case ARM::t2MOVi16: { // mov(w) r4, #xx
321	bool Wide = MBBI ->getOperand(i: `1`).getImm() >= `256`;
322	if (!Wide) {
323	MachineInstrBuilder NewInstr =
324	BuildMI(MF, DL, TII.get(ARM::Opcode: tMOVi8)).setMIFlags(MBBI ->getFlags());
325	NewInstr.add(MO: MBBI ->getOperand(i: `0`));
326	NewInstr.add(MO: t1CondCodeOp(/isDead=/true));
327	for (MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MBBI ->operands()))
328	NewInstr.add(MO);
329	MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(I: MBBI, MI: NewInstr);
330	MBB->erase(I: MBBI);
331	MBBI = NewMBBI;
332	}
333	MIB = BuildMI(MF, DL, TII.get(ARM::Opcode: SEH_Nop)).addImm(Wide).setMIFlags(Flags);
334	break;
335	}
336
337	case ARM::tBLXr: // blx r12 (__chkstk)
338	MIB = BuildMI(MF, DL, TII.get(ARM::Opcode: SEH_Nop))
339	.addImm(/Wide=/`0`)
340	.setMIFlags(Flags);
341	break;
342
343	case ARM::t2MOVi32imm: // movw+movt
344	// This pseudo instruction expands into two mov instructions. If the
345	// second operand is a symbol reference, this will stay as two wide
346	// instructions, movw+movt. If they're immediates, the first one can
347	// end up as a narrow mov though.
348	// As two SEH instructions are appended here, they won't get interleaved
349	// between the two final movw/movt instructions, but it doesn't make any
350	// practical difference.
351	MIB = BuildMI(MF, DL, TII.get(ARM::Opcode: SEH_Nop))
352	.addImm(/Wide=/`1`)
353	.setMIFlags(Flags);
354	MBB->insertAfter(I: MBBI, MI: MIB);
355	MIB = BuildMI(MF, DL, TII.get(ARM::Opcode: SEH_Nop))
356	.addImm(/Wide=/`1`)
357	.setMIFlags(Flags);
358	break;
359
360	case ARM::t2STR_PRE:
361	if (MBBI ->getOperand(i: `0`).getReg() == ARM::SP &&
362	MBBI ->getOperand(i: `2`).getReg() == ARM::SP &&
363	MBBI ->getOperand(i: `3`).getImm() == -`4`) {
364	unsigned Reg = RegInfo->getSEHRegNum(i: MBBI ->getOperand(i: `1`).getReg());
365	MIB = BuildMI(MF, DL, TII.get(ARM::Opcode: SEH_SaveRegs))
366	.addImm(`1ULL` << Reg)
367	.addImm(/Wide=/`1`)
368	.setMIFlags(Flags);
369	} else {
370	report_fatal_error(reason: "No matching SEH Opcode for t2STR_PRE");
371	}
372	break;
373
374	case ARM::t2LDR_POST:
375	if (MBBI ->getOperand(i: `1`).getReg() == ARM::SP &&
376	MBBI ->getOperand(i: `2`).getReg() == ARM::SP &&
377	MBBI ->getOperand(i: `3`).getImm() == `4`) {
378	unsigned Reg = RegInfo->getSEHRegNum(i: MBBI ->getOperand(i: `0`).getReg());
379	MIB = BuildMI(MF, DL, TII.get(ARM::Opcode: SEH_SaveRegs))
380	.addImm(`1ULL` << Reg)
381	.addImm(/Wide=/`1`)
382	.setMIFlags(Flags);
383	} else {
384	report_fatal_error(reason: "No matching SEH Opcode for t2LDR_POST");
385	}
386	break;
387
388	case ARM::t2LDMIA_RET:
389	case ARM::t2LDMIA_UPD:
390	case ARM::t2STMDB_UPD: {
391	unsigned Mask = `0`;
392	bool Wide = false;
393	for (unsigned i = `4`, NumOps = MBBI ->getNumOperands(); i != NumOps; ++i) {
394	const MachineOperand &MO = MBBI ->getOperand(i);
395	if (!MO.isReg() \|\| MO.isImplicit())
396	continue;
397	unsigned Reg = RegInfo->getSEHRegNum(i: MO.getReg());
398	if (Reg == `15`)
399	Reg = `14`;
400	if (Reg >= `8` && Reg <= `13`)
401	Wide = true;
402	else if (Opc == ARM::t2LDMIA_UPD && Reg == `14`)
403	Wide = true;
404	Mask \|= `1` << Reg;
405	}
406	if (!Wide) {
407	unsigned NewOpc;
408	switch (Opc) {
409	case ARM::t2LDMIA_RET:
410	NewOpc = ARM::tPOP_RET;
411	break;
412	case ARM::t2LDMIA_UPD:
413	NewOpc = ARM::tPOP;
414	break;
415	case ARM::t2STMDB_UPD:
416	NewOpc = ARM::tPUSH;
417	break;
418	default:
419	llvm_unreachable("");
420	}
421	MachineInstrBuilder NewInstr =
422	BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: NewOpc)).setMIFlags(MBBI ->getFlags());
423	for (unsigned i = `2`, NumOps = MBBI ->getNumOperands(); i != NumOps; ++i)
424	NewInstr.add(MO: MBBI ->getOperand(i));
425	MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(I: MBBI, MI: NewInstr);
426	MBB->erase(I: MBBI);
427	MBBI = NewMBBI;
428	}
429	unsigned SEHOpc =
430	(Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;
431	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: SEHOpc))
432	.addImm(Val: Mask)
433	.addImm(Val: Wide ? `1` : `0`)
434	.setMIFlags(Flags);
435	break;
436	}
437	case ARM::VSTMDDB_UPD:
438	case ARM::VLDMDIA_UPD: {
439	int First = -`1`, Last = `0`;
440	for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MBBI ->operands(), N: `4`)) {
441	unsigned Reg = RegInfo->getSEHRegNum(i: MO.getReg());
442	if (First == -`1`)
443	First = Reg;
444	Last = Reg;
445	}
446	MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveFRegs))
447	.addImm(First)
448	.addImm(Last)
449	.setMIFlags(Flags);
450	break;
451	}
452	case ARM::tSUBspi:
453	case ARM::tADDspi:
454	MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
455	.addImm(MBBI->getOperand(`2`).getImm() * `4`)
456	.addImm(/Wide=/`0`)
457	.setMIFlags(Flags);
458	break;
459	case ARM::t2SUBspImm:
460	case ARM::t2SUBspImm12:
461	case ARM::t2ADDspImm:
462	case ARM::t2ADDspImm12:
463	MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
464	.addImm(MBBI->getOperand(`2`).getImm())
465	.addImm(/Wide=/`1`)
466	.setMIFlags(Flags);
467	break;
468
469	case ARM::tMOVr:
470	if (MBBI->getOperand(`1`).getReg() == ARM::SP &&
471	(Flags & MachineInstr::FrameSetup)) {
472	unsigned Reg = RegInfo->getSEHRegNum(i: MBBI ->getOperand(i: `0`).getReg());
473	MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
474	.addImm(Reg)
475	.setMIFlags(Flags);
476	} else if (MBBI->getOperand(`0`).getReg() == ARM::SP &&
477	(Flags & MachineInstr::FrameDestroy)) {
478	unsigned Reg = RegInfo->getSEHRegNum(i: MBBI ->getOperand(i: `1`).getReg());
479	MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
480	.addImm(Reg)
481	.setMIFlags(Flags);
482	} else {
483	report_fatal_error(reason: "No SEH Opcode for MOV");
484	}
485	break;
486
487	case ARM::tBX_RET:
488	case ARM::TCRETURNri:
489	MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
490	.addImm(/Wide=/`0`)
491	.setMIFlags(Flags);
492	break;
493
494	case ARM::TCRETURNdi:
495	MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
496	.addImm(/Wide=/`1`)
497	.setMIFlags(Flags);
498	break;
499	}
500	return MBB->insertAfter(I: MBBI, MI: MIB);
501	}
502
503	static MachineBasicBlock::iterator
504	initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI) {
505	if (MBBI == MBB.begin())
506	return MachineBasicBlock::iterator ();
507	return std::prev(x: MBBI);
508	}
509
510	static void insertSEHRange(MachineBasicBlock &MBB,
511	MachineBasicBlock::iterator Start,
512	const MachineBasicBlock::iterator &End,
513	const ARMBaseInstrInfo &TII, unsigned MIFlags) {
514	if (Start.isValid())
515	Start = std::next(x: Start);
516	else
517	Start = MBB.begin();
518
519	for (auto MI = Start; MI != End;) {
520	auto Next = std::next(x: MI);
521	// Check if this instruction already has got a SEH opcode added. In that
522	// case, don't do this generic mapping.
523	if (Next != End && isSEHInstruction(MI: *Next)) {
524	MI = std::next(x: Next);
525	while (MI != End && isSEHInstruction(MI: *MI))
526	++MI;
527	continue;
528	}
529	insertSEH(MI, TII, MIFlags);
530	MI = Next;
531	}
532	}
533
534	static void emitRegPlusImmediate(
535	bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
536	const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
537	unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
538	ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = `0`) {
539	if (isARM)
540	emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, BaseReg: SrcReg, NumBytes,
541	Pred, PredReg, TII, MIFlags);
542	else
543	emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, BaseReg: SrcReg, NumBytes,
544	Pred, PredReg, TII, MIFlags);
545	}
546
547	static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
548	MachineBasicBlock::iterator &MBBI, const DebugLoc &dl,
549	const ARMBaseInstrInfo &TII, int NumBytes,
550	unsigned MIFlags = MachineInstr::NoFlags,
551	ARMCC::CondCodes Pred = ARMCC::AL,
552	unsigned PredReg = `0`) {
553	emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
554	MIFlags, Pred, PredReg);
555	}
556
557	static int sizeOfSPAdjustment(const MachineInstr &MI) {
558	int RegSize;
559	switch (MI.getOpcode()) {
560	case ARM::VSTMDDB_UPD:
561	RegSize = `8`;
562	break;
563	case ARM::STMDB_UPD:
564	case ARM::t2STMDB_UPD:
565	RegSize = `4`;
566	break;
567	case ARM::t2STR_PRE:
568	case ARM::STR_PRE_IMM:
569	return `4`;
570	default:
571	llvm_unreachable("Unknown push or pop like instruction");
572	}
573
574	int count = `0`;
575	// ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
576	// pred) so the list starts at 4.
577	for (int i = MI.getNumOperands() - `1`; i >= `4`; --i)
578	count += RegSize;
579	return count;
580	}
581
582	static bool WindowsRequiresStackProbe(const MachineFunction &MF,
583	size_t StackSizeInBytes) {
584	const MachineFrameInfo &MFI = MF.getFrameInfo();
585	const Function &F = MF.getFunction();
586	unsigned StackProbeSize = (MFI.getStackProtectorIndex() > `0`) ? `4080` : `4096`;
587
588	StackProbeSize =
589	F.getFnAttributeAsParsedInteger(Kind: "stack-probe-size", Default: StackProbeSize);
590	return (StackSizeInBytes >= StackProbeSize) &&
591	!F.hasFnAttribute(Kind: "no-stack-arg-probe");
592	}
593
594	namespace {
595
596	struct StackAdjustingInsts {
597	struct InstInfo {
598	MachineBasicBlock::iterator I;
599	unsigned SPAdjust;
600	bool BeforeFPSet;
601	};
602
603	SmallVector<InstInfo, `4`> Insts;
604
605	void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
606	bool BeforeFPSet = false) {
607	InstInfo Info = {.I: I, .SPAdjust: SPAdjust, .BeforeFPSet: BeforeFPSet};
608	Insts.push_back(Elt: Info);
609	}
610
611	void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
612	auto Info =
613	llvm::find_if(Range&: Insts, P: [&](InstInfo &Info) { return Info.I == I; });
614	assert(Info != Insts.end() && "invalid sp adjusting instruction");
615	Info->SPAdjust += ExtraBytes;
616	}
617
618	void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,
619	const ARMBaseInstrInfo &TII, bool HasFP) {
620	MachineFunction &MF = *MBB.getParent();
621	unsigned CFAOffset = `0`;
622	for (auto &Info : Insts) {
623	if (HasFP && !Info.BeforeFPSet)
624	return;
625
626	CFAOffset += Info.SPAdjust;
627	unsigned CFIIndex = MF.addFrameInst(
628	Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: CFAOffset));
629	BuildMI(MBB, std::next(x: Info.I), dl,
630	TII.get(TargetOpcode::CFI_INSTRUCTION))
631	.addCFIIndex(CFIIndex)
632	.setMIFlags(MachineInstr::FrameSetup);
633	}
634	}
635	};
636
637	} // end anonymous namespace
638
639	/// Emit an instruction sequence that will align the address in
640	/// register Reg by zero-ing out the lower bits. For versions of the
641	/// architecture that support Neon, this must be done in a single
642	/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
643	/// single instruction. That function only gets called when optimizing
644	/// spilling of D registers on a core with the Neon instruction set
645	/// present.
646	static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
647	const TargetInstrInfo &TII,
648	MachineBasicBlock &MBB,
649	MachineBasicBlock::iterator MBBI,
650	const DebugLoc &DL, const unsigned Reg,
651	const Align Alignment,
652	const bool MustBeSingleInstruction) {
653	const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>();
654	const bool CanUseBFC = AST.hasV6T2Ops() \|\| AST.hasV7Ops();
655	const unsigned AlignMask = Alignment.value() - `1U`;
656	const unsigned NrBitsToZero = Log2(A: Alignment);
657	assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
658	if (!AFI->isThumbFunction()) {
659	// if the BFC instruction is available, use that to zero the lower
660	// bits:
661	// bfc Reg, #0, log2(Alignment)
662	// otherwise use BIC, if the mask to zero the required number of bits
663	// can be encoded in the bic immediate field
664	// bic Reg, Reg, Alignment-1
665	// otherwise, emit
666	// lsr Reg, Reg, log2(Alignment)
667	// lsl Reg, Reg, log2(Alignment)
668	if (CanUseBFC) {
669	BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
670	.addReg(Reg, RegState::Kill)
671	.addImm(~AlignMask)
672	.add(predOps(ARMCC::AL));
673	} else if (AlignMask <= `255`) {
674	BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
675	.addReg(Reg, RegState::Kill)
676	.addImm(AlignMask)
677	.add(predOps(ARMCC::AL))
678	.add(condCodeOp());
679	} else {
680	assert(!MustBeSingleInstruction &&
681	"Shouldn't call emitAligningInstructions demanding a single "
682	"instruction to be emitted for large stack alignment for a target "
683	"without BFC.");
684	BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
685	.addReg(Reg, RegState::Kill)
686	.addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
687	.add(predOps(ARMCC::AL))
688	.add(condCodeOp());
689	BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
690	.addReg(Reg, RegState::Kill)
691	.addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
692	.add(predOps(ARMCC::AL))
693	.add(condCodeOp());
694	}
695	} else {
696	// Since this is only reached for Thumb-2 targets, the BFC instruction
697	// should always be available.
698	assert(CanUseBFC);
699	BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
700	.addReg(Reg, RegState::Kill)
701	.addImm(~AlignMask)
702	.add(predOps(ARMCC::AL));
703	}
704	}
705
706	/// We need the offset of the frame pointer relative to other MachineFrameInfo
707	/// offsets which are encoded relative to SP at function begin.
708	/// See also emitPrologue() for how the FP is set up.
709	/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
710	/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
711	/// this to produce a conservative estimate that we check in an assert() later.
712	static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
713	const MachineFunction &MF) {
714	// For Thumb1, push.w isn't available, so the first push will always push
715	// r7 and lr onto the stack first.
716	if (AFI.isThumb1OnlyFunction())
717	return -AFI.getArgRegsSaveSize() - (`2` * `4`);
718	// This is a conservative estimation: Assume the frame pointer being r7 and
719	// pc("r15") up to r8 getting spilled before (= 8 registers).
720	int MaxRegBytes = `8` * `4`;
721	if (STI.splitFramePointerPush(MF)) {
722	// Here, r11 can be stored below all of r4-r15 (3 registers more than
723	// above), plus d8-d15.
724	MaxRegBytes = `11` * `4` + `8` * `8`;
725	}
726	int FPCXTSaveSize =
727	(STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? `4` : `0`;
728	return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;
729	}
730
731	void ARMFrameLowering::emitPrologue(MachineFunction &MF,
732	MachineBasicBlock &MBB) const {
733	MachineBasicBlock::iterator MBBI = MBB.begin();
734	MachineFrameInfo &MFI = MF.getFrameInfo();
735	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
736	MachineModuleInfo &MMI = MF.getMMI();
737	MCContext &Context = MMI.getContext();
738	const TargetMachine &TM = MF.getTarget();
739	const MCRegisterInfo *MRI = Context.getRegisterInfo();
740	const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
741	const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
742	assert(!AFI->isThumb1OnlyFunction() &&
743	"This emitPrologue does not support Thumb1!");
744	bool isARM = !AFI->isThumbFunction();
745	Align Alignment = STI.getFrameLowering()->getStackAlign();
746	unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
747	unsigned NumBytes = MFI.getStackSize();
748	const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
749	int FPCXTSaveSize = `0`;
750	bool NeedsWinCFI = needsWinCFI(MF);
751
752	// Debug location must be unknown since the first debug location is used
753	// to determine the end of the prologue.
754	DebugLoc dl;
755
756	Register FramePtr = RegInfo->getFrameRegister(MF);
757
758	// Determine the sizes of each callee-save spill areas and record which frame
759	// belongs to which callee-save spill areas.
760	unsigned GPRCS1Size = `0`, GPRCS2Size = `0`, DPRCSSize = `0`;
761	int FramePtrSpillFI = `0`;
762	int D8SpillFI = `0`;
763
764	// All calls are tail calls in GHC calling conv, and functions have no
765	// prologue/epilogue.
766	if (MF.getFunction().getCallingConv() == CallingConv::GHC)
767	return;
768
769	StackAdjustingInsts DefCFAOffsetCandidates;
770	bool HasFP = hasFP(MF);
771
772	if (!AFI->hasStackFrame() &&
773	(!STI.isTargetWindows() \|\| !WindowsRequiresStackProbe(MF, StackSizeInBytes: NumBytes))) {
774	if (NumBytes != `0`) {
775	emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -NumBytes,
776	MIFlags: MachineInstr::FrameSetup);
777	DefCFAOffsetCandidates.addInst(I: std::prev(x: MBBI), SPAdjust: NumBytes, BeforeFPSet: true);
778	}
779	if (!NeedsWinCFI)
780	DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
781	if (NeedsWinCFI && MBBI != MBB.begin()) {
782	insertSEHRange(MBB, Start: {}, End: MBBI, TII, MIFlags: MachineInstr::FrameSetup);
783	BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd))
784	.setMIFlag(MachineInstr::FrameSetup);
785	MF.setHasWinCFI(true);
786	}
787	return;
788	}
789
790	// Determine spill area sizes.
791	if (STI.splitFramePointerPush(MF)) {
792	for (const CalleeSavedInfo &I : CSI) {
793	Register Reg = I.getReg();
794	int FI = I.getFrameIdx();
795	switch (Reg) {
796	case ARM::R11:
797	case ARM::LR:
798	if (Reg == FramePtr)
799	FramePtrSpillFI = FI;
800	GPRCS2Size += `4`;
801	break;
802	case ARM::R0:
803	case ARM::R1:
804	case ARM::R2:
805	case ARM::R3:
806	case ARM::R4:
807	case ARM::R5:
808	case ARM::R6:
809	case ARM::R7:
810	case ARM::R8:
811	case ARM::R9:
812	case ARM::R10:
813	case ARM::R12:
814	GPRCS1Size += `4`;
815	break;
816	case ARM::FPCXTNS:
817	FPCXTSaveSize = `4`;
818	break;
819	default:
820	// This is a DPR. Exclude the aligned DPRCS2 spills.
821	if (Reg == ARM::D8)
822	D8SpillFI = FI;
823	if (Reg < ARM::D8 \|\| Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
824	DPRCSSize += `8`;
825	}
826	}
827	} else {
828	for (const CalleeSavedInfo &I : CSI) {
829	Register Reg = I.getReg();
830	int FI = I.getFrameIdx();
831	switch (Reg) {
832	case ARM::R8:
833	case ARM::R9:
834	case ARM::R10:
835	case ARM::R11:
836	case ARM::R12:
837	if (STI.splitFramePushPop(MF)) {
838	GPRCS2Size += `4`;
839	break;
840	}
841	[[fallthrough]];
842	case ARM::R0:
843	case ARM::R1:
844	case ARM::R2:
845	case ARM::R3:
846	case ARM::R4:
847	case ARM::R5:
848	case ARM::R6:
849	case ARM::R7:
850	case ARM::LR:
851	if (Reg == FramePtr)
852	FramePtrSpillFI = FI;
853	GPRCS1Size += `4`;
854	break;
855	case ARM::FPCXTNS:
856	FPCXTSaveSize = `4`;
857	break;
858	default:
859	// This is a DPR. Exclude the aligned DPRCS2 spills.
860	if (Reg == ARM::D8)
861	D8SpillFI = FI;
862	if (Reg < ARM::D8 \|\| Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
863	DPRCSSize += `8`;
864	}
865	}
866	}
867
868	MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
869
870	// Move past the PAC computation.
871	if (AFI->shouldSignReturnAddress())
872	LastPush = MBBI ++;
873
874	// Move past FPCXT area.
875	if (FPCXTSaveSize > `0`) {
876	LastPush = MBBI ++;
877	DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: FPCXTSaveSize, BeforeFPSet: true);
878	}
879
880	// Allocate the vararg register save area.
881	if (ArgRegsSaveSize) {
882	emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -ArgRegsSaveSize,
883	MIFlags: MachineInstr::FrameSetup);
884	LastPush = std::prev(x: MBBI);
885	DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: ArgRegsSaveSize, BeforeFPSet: true);
886	}
887
888	// Move past area 1.
889	if (GPRCS1Size > `0`) {
890	GPRCS1Push = LastPush = MBBI ++;
891	DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: GPRCS1Size, BeforeFPSet: true);
892	}
893
894	// Determine starting offsets of spill areas.
895	unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
896	unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
897	unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
898	Align DPRAlign = DPRCSSize ? std::min(a: Align (`8`), b: Alignment) : Align (`4`);
899	unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize;
900	if (!STI.splitFramePointerPush(MF)) {
901	DPRGapSize += GPRCS2Size;
902	}
903	DPRGapSize %= DPRAlign.value();
904
905	unsigned DPRCSOffset;
906	if (STI.splitFramePointerPush(MF)) {
907	DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize;
908	GPRCS2Offset = DPRCSOffset - GPRCS2Size;
909	} else {
910	DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
911	}
912	int FramePtrOffsetInPush = `0`;
913	if (HasFP) {
914	int FPOffset = MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI);
915	assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
916	"Max FP estimation is wrong");
917	FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
918	AFI->setFramePtrSpillOffset(MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) +
919	NumBytes);
920	}
921	AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
922	AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
923	AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
924
925	// Move past area 2.
926	if (GPRCS2Size > `0` && !STI.splitFramePointerPush(MF)) {
927	GPRCS2Push = LastPush = MBBI ++;
928	DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: GPRCS2Size);
929	}
930
931	// Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
932	// .cfi_offset operations will reflect that.
933	if (DPRGapSize) {
934	assert(DPRGapSize == `4` && "unexpected alignment requirements for DPRs");
935	if (LastPush != MBB.end() &&
936	tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*LastPush, NumBytes: DPRGapSize))
937	DefCFAOffsetCandidates.addExtraBytes(I: LastPush, ExtraBytes: DPRGapSize);
938	else {
939	emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -DPRGapSize,
940	MIFlags: MachineInstr::FrameSetup);
941	DefCFAOffsetCandidates.addInst(I: std::prev(x: MBBI), SPAdjust: DPRGapSize);
942	}
943	}
944
945	// Move past area 3.
946	if (DPRCSSize > `0`) {
947	// Since vpush register list cannot have gaps, there may be multiple vpush
948	// instructions in the prologue.
949	while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
950	DefCFAOffsetCandidates.addInst(I: MBBI, SPAdjust: sizeOfSPAdjustment(MI: *MBBI));
951	LastPush = MBBI ++;
952	}
953	}
954
955	// Move past the aligned DPRCS2 area.
956	if (AFI->getNumAlignedDPRCS2Regs() > `0`) {
957	MBBI = skipAlignedDPRCS2Spills(MI: MBBI, NumAlignedDPRCS2Regs: AFI->getNumAlignedDPRCS2Regs());
958	// The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
959	// leaves the stack pointer pointing to the DPRCS2 area.
960	//
961	// Adjust NumBytes to represent the stack slots below the DPRCS2 area.
962	NumBytes += MFI.getObjectOffset(ObjectIdx: D8SpillFI);
963	} else
964	NumBytes = DPRCSOffset;
965
966	if (GPRCS2Size > `0` && STI.splitFramePointerPush(MF)) {
967	GPRCS2Push = LastPush = MBBI ++;
968	DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: GPRCS2Size);
969	}
970
971	bool NeedsWinCFIStackAlloc = NeedsWinCFI;
972	if (STI.splitFramePointerPush(MF) && HasFP)
973	NeedsWinCFIStackAlloc = false;
974
975	if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, StackSizeInBytes: NumBytes)) {
976	uint32_t NumWords = NumBytes >> `2`;
977
978	if (NumWords < `65536`) {
979	BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
980	.addImm(NumWords)
981	.setMIFlags(MachineInstr::FrameSetup)
982	.add(predOps(ARMCC::AL));
983	} else {
984	// Split into two instructions here, instead of using t2MOVi32imm,
985	// to allow inserting accurate SEH instructions (including accurate
986	// instruction size for each of them).
987	BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
988	.addImm(NumWords & `0xffff`)
989	.setMIFlags(MachineInstr::FrameSetup)
990	.add(predOps(ARMCC::AL));
991	BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), ARM::R4)
992	.addReg(ARM::R4)
993	.addImm(NumWords >> `16`)
994	.setMIFlags(MachineInstr::FrameSetup)
995	.add(predOps(ARMCC::AL));
996	}
997
998	switch (TM.getCodeModel()) {
999	case CodeModel::Tiny:
1000	llvm_unreachable("Tiny code model not available on ARM.");
1001	case CodeModel::Small:
1002	case CodeModel::Medium:
1003	case CodeModel::Kernel:
1004	BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
1005	.add(predOps(ARMCC::AL))
1006	.addExternalSymbol("__chkstk")
1007	.addReg(ARM::R4, RegState::Implicit)
1008	.setMIFlags(MachineInstr::FrameSetup);
1009	break;
1010	case CodeModel::Large:
1011	BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
1012	.addExternalSymbol("__chkstk")
1013	.setMIFlags(MachineInstr::FrameSetup);
1014
1015	BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
1016	.add(predOps(ARMCC::AL))
1017	.addReg(ARM::R12, RegState::Kill)
1018	.addReg(ARM::R4, RegState::Implicit)
1019	.setMIFlags(MachineInstr::FrameSetup);
1020	break;
1021	}
1022
1023	MachineInstrBuilder Instr, SEH;
1024	Instr = BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
1025	.addReg(ARM::SP, RegState::Kill)
1026	.addReg(ARM::R4, RegState::Kill)
1027	.setMIFlags(MachineInstr::FrameSetup)
1028	.add(predOps(ARMCC::AL))
1029	.add(condCodeOp());
1030	if (NeedsWinCFIStackAlloc) {
1031	SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc))
1032	.addImm(NumBytes)
1033	.addImm(/Wide=/`1`)
1034	.setMIFlags(MachineInstr::FrameSetup);
1035	MBB.insertAfter(I: Instr, MI: SEH);
1036	}
1037	NumBytes = `0`;
1038	}
1039
1040	if (NumBytes) {
1041	// Adjust SP after all the callee-save spills.
1042	if (AFI->getNumAlignedDPRCS2Regs() == `0` &&
1043	tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*LastPush, NumBytes))
1044	DefCFAOffsetCandidates.addExtraBytes(I: LastPush, ExtraBytes: NumBytes);
1045	else {
1046	emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -NumBytes,
1047	MIFlags: MachineInstr::FrameSetup);
1048	DefCFAOffsetCandidates.addInst(I: std::prev(x: MBBI), SPAdjust: NumBytes);
1049	}
1050
1051	if (HasFP && isARM)
1052	// Restore from fp only in ARM mode: e.g. sub sp, r7, #24
1053	// Note it's not safe to do this in Thumb2 mode because it would have
1054	// taken two instructions:
1055	// mov sp, r7
1056	// sub sp, #24
1057	// If an interrupt is taken between the two instructions, then sp is in
1058	// an inconsistent state (pointing to the middle of callee-saved area).
1059	// The interrupt handler can end up clobbering the registers.
1060	AFI->setShouldRestoreSPFromFP(true);
1061	}
1062
1063	// Set FP to point to the stack slot that contains the previous FP.
1064	// For iOS, FP is R7, which has now been stored in spill area 1.
1065	// Otherwise, if this is not iOS, all the callee-saved registers go
1066	// into spill area 1, including the FP in R11. In either case, it
1067	// is in area one and the adjustment needs to take place just after
1068	// that push.
1069	// FIXME: The above is not necessary true when PACBTI is enabled.
1070	// AAPCS requires use of R11, and PACBTI gets in the way of regular pushes,
1071	// so FP ends up on area two.
1072	MachineBasicBlock::iterator AfterPush;
1073	if (HasFP) {
1074	AfterPush = std::next(x: GPRCS1Push);
1075	unsigned PushSize = sizeOfSPAdjustment(MI: *GPRCS1Push);
1076	int FPOffset = PushSize + FramePtrOffsetInPush;
1077	if (STI.splitFramePointerPush(MF)) {
1078	AfterPush = std::next(x: GPRCS2Push);
1079	emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1080	FramePtr, ARM::SP, `0`, MachineInstr::FrameSetup);
1081	} else {
1082	emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1083	FramePtr, ARM::SP, FPOffset,
1084	MachineInstr::FrameSetup);
1085	}
1086	if (!NeedsWinCFI) {
1087	if (FramePtrOffsetInPush + PushSize != `0`) {
1088	unsigned CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfa(
1089	L: nullptr, Register: MRI->getDwarfRegNum(RegNum: FramePtr, isEH: true),
1090	Offset: FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
1091	BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1092	.addCFIIndex(CFIIndex)
1093	.setMIFlags(MachineInstr::FrameSetup);
1094	} else {
1095	unsigned CFIIndex =
1096	MF.addFrameInst(Inst: MCCFIInstruction::createDefCfaRegister(
1097	L: nullptr, Register: MRI->getDwarfRegNum(RegNum: FramePtr, isEH: true)));
1098	BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1099	.addCFIIndex(CFIIndex)
1100	.setMIFlags(MachineInstr::FrameSetup);
1101	}
1102	}
1103	}
1104
1105	// Emit a SEH opcode indicating the prologue end. The rest of the prologue
1106	// instructions below don't need to be replayed to unwind the stack.
1107	if (NeedsWinCFI && MBBI != MBB.begin()) {
1108	MachineBasicBlock::iterator End = MBBI;
1109	if (HasFP && STI.splitFramePointerPush(MF))
1110	End = AfterPush;
1111	insertSEHRange(MBB, Start: {}, End, TII, MIFlags: MachineInstr::FrameSetup);
1112	BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd))
1113	.setMIFlag(MachineInstr::FrameSetup);
1114	MF.setHasWinCFI(true);
1115	}
1116
1117	// Now that the prologue's actual instructions are finalised, we can insert
1118	// the necessary DWARF cf instructions to describe the situation. Start by
1119	// recording where each register ended up:
1120	if (GPRCS1Size > `0` && !NeedsWinCFI) {
1121	MachineBasicBlock::iterator Pos = std::next(x: GPRCS1Push);
1122	int CFIIndex;
1123	for (const auto &Entry : CSI) {
1124	Register Reg = Entry.getReg();
1125	int FI = Entry.getFrameIdx();
1126	switch (Reg) {
1127	case ARM::R8:
1128	case ARM::R9:
1129	case ARM::R10:
1130	case ARM::R11:
1131	case ARM::R12:
1132	if (STI.splitFramePushPop(MF))
1133	break;
1134	[[fallthrough]];
1135	case ARM::R0:
1136	case ARM::R1:
1137	case ARM::R2:
1138	case ARM::R3:
1139	case ARM::R4:
1140	case ARM::R5:
1141	case ARM::R6:
1142	case ARM::R7:
1143	case ARM::LR:
1144	CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createOffset(
1145	L: nullptr, Register: MRI->getDwarfRegNum(RegNum: Reg, isEH: true), Offset: MFI.getObjectOffset(ObjectIdx: FI)));
1146	BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1147	.addCFIIndex(CFIIndex)
1148	.setMIFlags(MachineInstr::FrameSetup);
1149	break;
1150	}
1151	}
1152	}
1153
1154	if (GPRCS2Size > `0` && !NeedsWinCFI) {
1155	MachineBasicBlock::iterator Pos = std::next(x: GPRCS2Push);
1156	for (const auto &Entry : CSI) {
1157	Register Reg = Entry.getReg();
1158	int FI = Entry.getFrameIdx();
1159	switch (Reg) {
1160	case ARM::R8:
1161	case ARM::R9:
1162	case ARM::R10:
1163	case ARM::R11:
1164	case ARM::R12:
1165	if (STI.splitFramePushPop(MF)) {
1166	unsigned DwarfReg = MRI->getDwarfRegNum(
1167	Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true);
1168	unsigned Offset = MFI.getObjectOffset(ObjectIdx: FI);
1169	unsigned CFIIndex = MF.addFrameInst(
1170	Inst: MCCFIInstruction::createOffset(L: nullptr, Register: DwarfReg, Offset));
1171	BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1172	.addCFIIndex(CFIIndex)
1173	.setMIFlags(MachineInstr::FrameSetup);
1174	}
1175	break;
1176	}
1177	}
1178	}
1179
1180	if (DPRCSSize > `0` && !NeedsWinCFI) {
1181	// Since vpush register list cannot have gaps, there may be multiple vpush
1182	// instructions in the prologue.
1183	MachineBasicBlock::iterator Pos = std::next(x: LastPush);
1184	for (const auto &Entry : CSI) {
1185	Register Reg = Entry.getReg();
1186	int FI = Entry.getFrameIdx();
1187	if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
1188	(Reg < ARM::D8 \|\| Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
1189	unsigned DwarfReg = MRI->getDwarfRegNum(RegNum: Reg, isEH: true);
1190	unsigned Offset = MFI.getObjectOffset(ObjectIdx: FI);
1191	unsigned CFIIndex = MF.addFrameInst(
1192	Inst: MCCFIInstruction::createOffset(L: nullptr, Register: DwarfReg, Offset));
1193	BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1194	.addCFIIndex(CFIIndex)
1195	.setMIFlags(MachineInstr::FrameSetup);
1196	}
1197	}
1198	}
1199
1200	// Now we can emit descriptions of where the canonical frame address was
1201	// throughout the process. If we have a frame pointer, it takes over the job
1202	// half-way through, so only the first few .cfi_def_cfa_offset instructions
1203	// actually get emitted.
1204	if (!NeedsWinCFI)
1205	DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
1206
1207	if (STI.isTargetELF() && hasFP(MF))
1208	MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
1209	AFI->getFramePtrSpillOffset());
1210
1211	AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
1212	AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
1213	AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
1214	AFI->setDPRCalleeSavedGapSize(DPRGapSize);
1215	AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
1216
1217	// If we need dynamic stack realignment, do it here. Be paranoid and make
1218	// sure if we also have VLAs, we have a base pointer for frame access.
1219	// If aligned NEON registers were spilled, the stack has already been
1220	// realigned.
1221	if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->hasStackRealignment(MF)) {
1222	Align MaxAlign = MFI.getMaxAlign();
1223	assert(!AFI->isThumb1OnlyFunction());
1224	if (!AFI->isThumbFunction()) {
1225	emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
1226	false);
1227	} else {
1228	// We cannot use sp as source/dest register here, thus we're using r4 to
1229	// perform the calculations. We're emitting the following sequence:
1230	// mov r4, sp
1231	// -- use emitAligningInstructions to produce best sequence to zero
1232	// -- out lower bits in r4
1233	// mov sp, r4
1234	// FIXME: It will be better just to find spare register here.
1235	BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
1236	.addReg(ARM::SP, RegState::Kill)
1237	.add(predOps(ARMCC::AL));
1238	emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
1239	false);
1240	BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1241	.addReg(ARM::R4, RegState::Kill)
1242	.add(predOps(ARMCC::AL));
1243	}
1244
1245	AFI->setShouldRestoreSPFromFP(true);
1246	}
1247
1248	// If we need a base pointer, set it up here. It's whatever the value
1249	// of the stack pointer is at this point. Any variable size objects
1250	// will be allocated after this, so we can still use the base pointer
1251	// to reference locals.
1252	// FIXME: Clarify FrameSetup flags here.
1253	if (RegInfo->hasBasePointer(MF)) {
1254	if (isARM)
1255	BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
1256	.addReg(ARM::SP)
1257	.add(predOps(ARMCC::AL))
1258	.add(condCodeOp());
1259	else
1260	BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
1261	.addReg(ARM::SP)
1262	.add(predOps(ARMCC::AL));
1263	}
1264
1265	// If the frame has variable sized objects then the epilogue must restore
1266	// the sp from fp. We can assume there's an FP here since hasFP already
1267	// checks for hasVarSizedObjects.
1268	if (MFI.hasVarSizedObjects())
1269	AFI->setShouldRestoreSPFromFP(true);
1270	}
1271
1272	void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
1273	MachineBasicBlock &MBB) const {
1274	MachineFrameInfo &MFI = MF.getFrameInfo();
1275	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1276	const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
1277	const ARMBaseInstrInfo &TII =
1278	*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1279	assert(!AFI->isThumb1OnlyFunction() &&
1280	"This emitEpilogue does not support Thumb1!");
1281	bool isARM = !AFI->isThumbFunction();
1282
1283	// Amount of stack space we reserved next to incoming args for either
1284	// varargs registers or stack arguments in tail calls made by this function.
1285	unsigned ReservedArgStack = AFI->getArgRegsSaveSize();
1286
1287	// How much of the stack used by incoming arguments this function is expected
1288	// to restore in this particular epilogue.
1289	int IncomingArgStackToRestore = getArgumentStackToRestore(MF, MBB);
1290	int NumBytes = (int)MFI.getStackSize();
1291	Register FramePtr = RegInfo->getFrameRegister(MF);
1292
1293	// All calls are tail calls in GHC calling conv, and functions have no
1294	// prologue/epilogue.
1295	if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1296	return;
1297
1298	// First put ourselves on the first (from top) terminator instructions.
1299	MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1300	DebugLoc dl = MBBI != MBB.end() ? MBBI ->getDebugLoc() : DebugLoc ();
1301
1302	MachineBasicBlock::iterator RangeStart;
1303	if (!AFI->hasStackFrame()) {
1304	if (MF.hasWinCFI()) {
1305	BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1306	.setMIFlag(MachineInstr::FrameDestroy);
1307	RangeStart = initMBBRange(MBB, MBBI);
1308	}
1309
1310	if (NumBytes + IncomingArgStackToRestore != `0`)
1311	emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1312	NumBytes: NumBytes + IncomingArgStackToRestore,
1313	MIFlags: MachineInstr::FrameDestroy);
1314	} else {
1315	// Unwind MBBI to point to first LDR / VLDRD.
1316	if (MBBI != MBB.begin()) {
1317	do {
1318	--MBBI;
1319	} while (MBBI != MBB.begin() &&
1320	MBBI ->getFlag(Flag: MachineInstr::FrameDestroy));
1321	if (!MBBI ->getFlag(Flag: MachineInstr::FrameDestroy))
1322	++MBBI;
1323	}
1324
1325	if (MF.hasWinCFI()) {
1326	BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1327	.setMIFlag(MachineInstr::FrameDestroy);
1328	RangeStart = initMBBRange(MBB, MBBI);
1329	}
1330
1331	// Move SP to start of FP callee save spill area.
1332	NumBytes -= (ReservedArgStack +
1333	AFI->getFPCXTSaveAreaSize() +
1334	AFI->getGPRCalleeSavedArea1Size() +
1335	AFI->getGPRCalleeSavedArea2Size() +
1336	AFI->getDPRCalleeSavedGapSize() +
1337	AFI->getDPRCalleeSavedAreaSize());
1338
1339	// Reset SP based on frame pointer only if the stack frame extends beyond
1340	// frame pointer stack slot or target is ELF and the function has FP.
1341	if (AFI->shouldRestoreSPFromFP()) {
1342	NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
1343	if (NumBytes) {
1344	if (isARM)
1345	emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
1346	ARMCC::AL, `0`, TII,
1347	MachineInstr::FrameDestroy);
1348	else {
1349	// It's not possible to restore SP from FP in a single instruction.
1350	// For iOS, this looks like:
1351	// mov sp, r7
1352	// sub sp, #24
1353	// This is bad, if an interrupt is taken after the mov, sp is in an
1354	// inconsistent state.
1355	// Use the first callee-saved register as a scratch register.
1356	assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
1357	"No scratch register to restore SP from FP!");
1358	emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
1359	ARMCC::AL, `0`, TII, MachineInstr::FrameDestroy);
1360	BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1361	.addReg(ARM::R4)
1362	.add(predOps(ARMCC::AL))
1363	.setMIFlag(MachineInstr::FrameDestroy);
1364	}
1365	} else {
1366	// Thumb2 or ARM.
1367	if (isARM)
1368	BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
1369	.addReg(FramePtr)
1370	.add(predOps(ARMCC::AL))
1371	.add(condCodeOp())
1372	.setMIFlag(MachineInstr::FrameDestroy);
1373	else
1374	BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1375	.addReg(FramePtr)
1376	.add(predOps(ARMCC::AL))
1377	.setMIFlag(MachineInstr::FrameDestroy);
1378	}
1379	} else if (NumBytes &&
1380	!tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*MBBI, NumBytes))
1381	emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes,
1382	MIFlags: MachineInstr::FrameDestroy);
1383
1384	// Increment past our save areas.
1385	if (AFI->getGPRCalleeSavedArea2Size() && STI.splitFramePointerPush(MF))
1386	MBBI ++;
1387
1388	if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
1389	MBBI ++;
1390	// Since vpop register list cannot have gaps, there may be multiple vpop
1391	// instructions in the epilogue.
1392	while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
1393	MBBI ++;
1394	}
1395	if (AFI->getDPRCalleeSavedGapSize()) {
1396	assert(AFI->getDPRCalleeSavedGapSize() == `4` &&
1397	"unexpected DPR alignment gap");
1398	emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: AFI->getDPRCalleeSavedGapSize(),
1399	MIFlags: MachineInstr::FrameDestroy);
1400	}
1401
1402	if (AFI->getGPRCalleeSavedArea2Size() && !STI.splitFramePointerPush(MF))
1403	MBBI ++;
1404	if (AFI->getGPRCalleeSavedArea1Size()) MBBI ++;
1405
1406	if (ReservedArgStack \|\| IncomingArgStackToRestore) {
1407	assert((int)ReservedArgStack + IncomingArgStackToRestore >= `0` &&
1408	"attempting to restore negative stack amount");
1409	emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1410	NumBytes: ReservedArgStack + IncomingArgStackToRestore,
1411	MIFlags: MachineInstr::FrameDestroy);
1412	}
1413
1414	// Validate PAC, It should have been already popped into R12. For CMSE entry
1415	// function, the validation instruction is emitted during expansion of the
1416	// tBXNS_RET, since the validation must use the value of SP at function
1417	// entry, before saving, resp. after restoring, FPCXTNS.
1418	if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction())
1419	BuildMI(MBB, MBBI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2AUT));
1420	}
1421
1422	if (MF.hasWinCFI()) {
1423	insertSEHRange(MBB, Start: RangeStart, End: MBB.end(), TII, MIFlags: MachineInstr::FrameDestroy);
1424	BuildMI(MBB, MBB.end(), dl, TII.get(ARM::SEH_EpilogEnd))
1425	.setMIFlag(MachineInstr::FrameDestroy);
1426	}
1427	}
1428
1429	/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1430	/// debug info. It's the same as what we use for resolving the code-gen
1431	/// references for now. FIXME: This can go wrong when references are
1432	/// SP-relative and simple call frames aren't used.
1433	StackOffset ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1434	int FI,
1435	Register &FrameReg) const {
1436	return StackOffset::getFixed(Fixed: ResolveFrameIndexReference(MF, FI, FrameReg, SPAdj: `0`));
1437	}
1438
1439	int ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
1440	int FI, Register &FrameReg,
1441	int SPAdj) const {
1442	const MachineFrameInfo &MFI = MF.getFrameInfo();
1443	const ARMBaseRegisterInfo RegInfo = static_cast<const* ARMBaseRegisterInfo *>(
1444	MF.getSubtarget().getRegisterInfo());
1445	const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1446	int Offset = MFI.getObjectOffset(ObjectIdx: FI) + MFI.getStackSize();
1447	int FPOffset = Offset - AFI->getFramePtrSpillOffset();
1448	bool isFixed = MFI.isFixedObjectIndex(ObjectIdx: FI);
1449
1450	FrameReg = ARM::SP;
1451	Offset += SPAdj;
1452
1453	// SP can move around if there are allocas. We may also lose track of SP
1454	// when emergency spilling inside a non-reserved call frame setup.
1455	bool hasMovingSP = !hasReservedCallFrame(MF);
1456
1457	// When dynamically realigning the stack, use the frame pointer for
1458	// parameters, and the stack/base pointer for locals.
1459	if (RegInfo->hasStackRealignment(MF)) {
1460	assert(hasFP(MF) && "dynamic stack realignment without a FP!");
1461	if (isFixed) {
1462	FrameReg = RegInfo->getFrameRegister(MF);
1463	Offset = FPOffset;
1464	} else if (hasMovingSP) {
1465	assert(RegInfo->hasBasePointer(MF) &&
1466	"VLAs and dynamic stack alignment, but missing base pointer!");
1467	FrameReg = RegInfo->getBaseRegister();
1468	Offset -= SPAdj;
1469	}
1470	return Offset;
1471	}
1472
1473	// If there is a frame pointer, use it when we can.
1474	if (hasFP(MF) && AFI->hasStackFrame()) {
1475	// Use frame pointer to reference fixed objects. Use it for locals if
1476	// there are VLAs (and thus the SP isn't reliable as a base).
1477	if (isFixed \|\| (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
1478	FrameReg = RegInfo->getFrameRegister(MF);
1479	return FPOffset;
1480	} else if (hasMovingSP) {
1481	assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
1482	if (AFI->isThumb2Function()) {
1483	// Try to use the frame pointer if we can, else use the base pointer
1484	// since it's available. This is handy for the emergency spill slot, in
1485	// particular.
1486	if (FPOffset >= -`255` && FPOffset < `0`) {
1487	FrameReg = RegInfo->getFrameRegister(MF);
1488	return FPOffset;
1489	}
1490	}
1491	} else if (AFI->isThumbFunction()) {
1492	// Prefer SP to base pointer, if the offset is suitably aligned and in
1493	// range as the effective range of the immediate offset is bigger when
1494	// basing off SP.
1495	// Use add <rd>, sp, #<imm8>
1496	// ldr <rd>, [sp, #<imm8>]
1497	if (Offset >= `0` && (Offset & `3`) == `0` && Offset <= `1020`)
1498	return Offset;
1499	// In Thumb2 mode, the negative offset is very limited. Try to avoid
1500	// out of range references. ldr <rt>,[<rn>, #-<imm8>]
1501	if (AFI->isThumb2Function() && FPOffset >= -`255` && FPOffset < `0`) {
1502	FrameReg = RegInfo->getFrameRegister(MF);
1503	return FPOffset;
1504	}
1505	} else if (Offset > (FPOffset < `0` ? -FPOffset : FPOffset)) {
1506	// Otherwise, use SP or FP, whichever is closer to the stack slot.
1507	FrameReg = RegInfo->getFrameRegister(MF);
1508	return FPOffset;
1509	}
1510	}
1511	// Use the base pointer if we have one.
1512	// FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
1513	// That can happen if we forced a base pointer for a large call frame.
1514	if (RegInfo->hasBasePointer(MF)) {
1515	FrameReg = RegInfo->getBaseRegister();
1516	Offset -= SPAdj;
1517	}
1518	return Offset;
1519	}
1520
1521	void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
1522	MachineBasicBlock::iterator MI,
1523	ArrayRef<CalleeSavedInfo> CSI,
1524	unsigned StmOpc, unsigned StrOpc,
1525	bool NoGap, bool (Func)(unsigned, bool*),
1526	unsigned NumAlignedDPRCS2Regs,
1527	unsigned MIFlags) const {
1528	MachineFunction &MF = *MBB.getParent();
1529	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1530	const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1531
1532	DebugLoc DL;
1533
1534	using RegAndKill = std::pair<unsigned, bool>;
1535
1536	SmallVector<RegAndKill, `4`> Regs;
1537	unsigned i = CSI.size();
1538	while (i != `0`) {
1539	unsigned LastReg = `0`;
1540	for (; i != `0`; --i) {
1541	Register Reg = CSI [i-`1`].getReg();
1542	if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1543
1544	// D-registers in the aligned area DPRCS2 are NOT spilled here.
1545	if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1546	continue;
1547
1548	const MachineRegisterInfo &MRI = MF.getRegInfo();
1549	bool isLiveIn = MRI.isLiveIn(Reg);
1550	if (!isLiveIn && !MRI.isReserved(PhysReg: Reg))
1551	MBB.addLiveIn(PhysReg: Reg);
1552	// If NoGap is true, push consecutive registers and then leave the rest
1553	// for other instructions. e.g.
1554	// vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1555	if (NoGap && LastReg && LastReg != Reg-`1`)
1556	break;
1557	LastReg = Reg;
1558	// Do not set a kill flag on values that are also marked as live-in. This
1559	// happens with the @llvm-returnaddress intrinsic and with arguments
1560	// passed in callee saved registers.
1561	// Omitting the kill flags is conservatively correct even if the live-in
1562	// is not used after all.
1563	Regs.push_back(Elt: std::make_pair(x&: Reg, /isKill=/y: !isLiveIn));
1564	}
1565
1566	if (Regs.empty())
1567	continue;
1568
1569	llvm::sort(C&: Regs, Comp: [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1570	return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
1571	});
1572
1573	if (Regs.size() > `1` \|\| StrOpc== `0`) {
1574	MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1575	.addReg(ARM::SP)
1576	.setMIFlags(MIFlags)
1577	.add(predOps(ARMCC::AL));
1578	for (unsigned i = `0`, e = Regs.size(); i < e; ++i)
1579	MIB.addReg(RegNo: Regs [i].first, flags: getKillRegState(B: Regs [i].second));
1580	} else if (Regs.size() == `1`) {
1581	BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1582	.addReg(Regs[`0`].first, getKillRegState(Regs[`0`].second))
1583	.addReg(ARM::SP)
1584	.setMIFlags(MIFlags)
1585	.addImm(-`4`)
1586	.add(predOps(ARMCC::AL));
1587	}
1588	Regs.clear();
1589
1590	// Put any subsequent vpush instructions before this one: they will refer to
1591	// higher register numbers so need to be pushed first in order to preserve
1592	// monotonicity.
1593	if (MI != MBB.begin())
1594	--MI;
1595	}
1596	}
1597
1598	void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1599	MachineBasicBlock::iterator MI,
1600	MutableArrayRef<CalleeSavedInfo> CSI,
1601	unsigned LdmOpc, unsigned LdrOpc,
1602	bool isVarArg, bool NoGap,
1603	bool (Func)(unsigned, bool*),
1604	unsigned NumAlignedDPRCS2Regs) const {
1605	MachineFunction &MF = *MBB.getParent();
1606	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1607	const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1608	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1609	bool hasPAC = AFI->shouldSignReturnAddress();
1610	DebugLoc DL;
1611	bool isTailCall = false;
1612	bool isInterrupt = false;
1613	bool isTrap = false;
1614	bool isCmseEntry = false;
1615	if (MBB.end() != MI) {
1616	DL = MI ->getDebugLoc();
1617	unsigned RetOpcode = MI ->getOpcode();
1618	isTailCall = (RetOpcode == ARM::TCRETURNdi \|\| RetOpcode == ARM::TCRETURNri);
1619	isInterrupt =
1620	RetOpcode == ARM::SUBS_PC_LR \|\| RetOpcode == ARM::t2SUBS_PC_LR;
1621	isTrap =
1622	RetOpcode == ARM::TRAP \|\| RetOpcode == ARM::TRAPNaCl \|\|
1623	RetOpcode == ARM::tTRAP;
1624	isCmseEntry = (RetOpcode == ARM::tBXNS \|\| RetOpcode == ARM::tBXNS_RET);
1625	}
1626
1627	SmallVector<unsigned, `4`> Regs;
1628	unsigned i = CSI.size();
1629	while (i != `0`) {
1630	unsigned LastReg = `0`;
1631	bool DeleteRet = false;
1632	for (; i != `0`; --i) {
1633	CalleeSavedInfo &Info = CSI [i-`1`];
1634	Register Reg = Info.getReg();
1635	if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1636
1637	// The aligned reloads from area DPRCS2 are not inserted here.
1638	if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1639	continue;
1640	if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1641	!isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == `0` &&
1642	STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
1643	!STI.splitFramePointerPush(MF)) {
1644	Reg = ARM::PC;
1645	// Fold the return instruction into the LDM.
1646	DeleteRet = true;
1647	LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1648	}
1649
1650	// If NoGap is true, pop consecutive registers and then leave the rest
1651	// for other instructions. e.g.
1652	// vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1653	if (NoGap && LastReg && LastReg != Reg-`1`)
1654	break;
1655
1656	LastReg = Reg;
1657	Regs.push_back(Elt: Reg);
1658	}
1659
1660	if (Regs.empty())
1661	continue;
1662
1663	llvm::sort(C&: Regs, Comp: [&](unsigned LHS, unsigned RHS) {
1664	return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1665	});
1666
1667	if (Regs.size() > `1` \|\| LdrOpc == `0`) {
1668	MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1669	.addReg(ARM::SP)
1670	.add(predOps(ARMCC::AL))
1671	.setMIFlags(MachineInstr::FrameDestroy);
1672	for (unsigned i = `0`, e = Regs.size(); i < e; ++i)
1673	MIB.addReg(RegNo: Regs [i], flags: getDefRegState(B: true));
1674	if (DeleteRet) {
1675	if (MI != MBB.end()) {
1676	MIB.copyImplicitOps(OtherMI: *MI);
1677	MI ->eraseFromParent();
1678	}
1679	}
1680	MI = MIB;
1681	} else if (Regs.size() == `1`) {
1682	// If we adjusted the reg to PC from LR above, switch it back here. We
1683	// only do that for LDM.
1684	if (Regs[`0`] == ARM::PC)
1685	Regs[`0`] = ARM::LR;
1686	MachineInstrBuilder MIB =
1687	BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[`0`])
1688	.addReg(ARM::SP, RegState::Define)
1689	.addReg(ARM::SP)
1690	.setMIFlags(MachineInstr::FrameDestroy);
1691	// ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1692	// that refactoring is complete (eventually).
1693	if (LdrOpc == ARM::LDR_POST_REG \|\| LdrOpc == ARM::LDR_POST_IMM) {
1694	MIB.addReg(RegNo: `0`);
1695	MIB.addImm(Val: ARM_AM::getAM2Opc(Opc: ARM_AM::add, Imm12: `4`, SO: ARM_AM::no_shift));
1696	} else
1697	MIB.addImm(Val: `4`);
1698	MIB.add(MOs: predOps(Pred: ARMCC::AL));
1699	}
1700	Regs.clear();
1701
1702	// Put any subsequent vpop instructions after this one: they will refer to
1703	// higher register numbers so need to be popped afterwards.
1704	if (MI != MBB.end())
1705	++MI;
1706	}
1707	}
1708
1709	/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1710	/// starting from d8. Also insert stack realignment code and leave the stack
1711	/// pointer pointing to the d8 spill slot.
1712	static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
1713	MachineBasicBlock::iterator MI,
1714	unsigned NumAlignedDPRCS2Regs,
1715	ArrayRef<CalleeSavedInfo> CSI,
1716	const TargetRegisterInfo *TRI) {
1717	MachineFunction &MF = *MBB.getParent();
1718	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1719	DebugLoc DL = MI != MBB.end() ? MI ->getDebugLoc() : DebugLoc ();
1720	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1721	MachineFrameInfo &MFI = MF.getFrameInfo();
1722
1723	// Mark the D-register spill slots as properly aligned. Since MFI computes
1724	// stack slot layout backwards, this can actually mean that the d-reg stack
1725	// slot offsets can be wrong. The offset for d8 will always be correct.
1726	for (const CalleeSavedInfo &I : CSI) {
1727	unsigned DNum = I.getReg() - ARM::D8;
1728	if (DNum > NumAlignedDPRCS2Regs - `1`)
1729	continue;
1730	int FI = I.getFrameIdx();
1731	// The even-numbered registers will be 16-byte aligned, the odd-numbered
1732	// registers will be 8-byte aligned.
1733	MFI.setObjectAlignment(ObjectIdx: FI, Alignment: DNum % `2` ? Align (`8`) : Align (`16`));
1734
1735	// The stack slot for D8 needs to be maximally aligned because this is
1736	// actually the point where we align the stack pointer. MachineFrameInfo
1737	// computes all offsets relative to the incoming stack pointer which is a
1738	// bit weird when realigning the stack. Any extra padding for this
1739	// over-alignment is not realized because the code inserted below adjusts
1740	// the stack pointer by numregs 8 before aligning the stack pointer.*
1741	if (DNum == `0`)
1742	MFI.setObjectAlignment(ObjectIdx: FI, Alignment: MFI.getMaxAlign());
1743	}
1744
1745	// Move the stack pointer to the d8 spill slot, and align it at the same
1746	// time. Leave the stack slot address in the scratch register r4.
1747	//
1748	// sub r4, sp, #numregs 8*
1749	// bic r4, r4, #align - 1
1750	// mov sp, r4
1751	//
1752	bool isThumb = AFI->isThumbFunction();
1753	assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1754	AFI->setShouldRestoreSPFromFP(true);
1755
1756	// sub r4, sp, #numregs 8*
1757	// The immediate is <= 64, so it doesn't need any special encoding.
1758	unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1759	BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1760	.addReg(ARM::SP)
1761	.addImm(`8` * NumAlignedDPRCS2Regs)
1762	.add(predOps(ARMCC::AL))
1763	.add(condCodeOp());
1764
1765	Align MaxAlign = MF.getFrameInfo().getMaxAlign();
1766	// We must set parameter MustBeSingleInstruction to true, since
1767	// skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1768	// stack alignment. Luckily, this can always be done since all ARM
1769	// architecture versions that support Neon also support the BFC
1770	// instruction.
1771	emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
1772
1773	// mov sp, r4
1774	// The stack pointer must be adjusted before spilling anything, otherwise
1775	// the stack slots could be clobbered by an interrupt handler.
1776	// Leave r4 live, it is used below.
1777	Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
1778	MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
1779	.addReg(ARM::R4)
1780	.add(predOps(ARMCC::AL));
1781	if (!isThumb)
1782	MIB.add(MO: condCodeOp());
1783
1784	// Now spill NumAlignedDPRCS2Regs registers starting from d8.
1785	// r4 holds the stack slot address.
1786	unsigned NextReg = ARM::D8;
1787
1788	// 16-byte aligned vst1.64 with 4 d-regs and address writeback.
1789	// The writeback is only needed when emitting two vst1.64 instructions.
1790	if (NumAlignedDPRCS2Regs >= `6`) {
1791	unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1792	&ARM::QQPRRegClass);
1793	MBB.addLiveIn(PhysReg: SupReg);
1794	BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
1795	.addReg(ARM::R4, RegState::Kill)
1796	.addImm(`16`)
1797	.addReg(NextReg)
1798	.addReg(SupReg, RegState::ImplicitKill)
1799	.add(predOps(ARMCC::AL));
1800	NextReg += `4`;
1801	NumAlignedDPRCS2Regs -= `4`;
1802	}
1803
1804	// We won't modify r4 beyond this point. It currently points to the next
1805	// register to be spilled.
1806	unsigned R4BaseReg = NextReg;
1807
1808	// 16-byte aligned vst1.64 with 4 d-regs, no writeback.
1809	if (NumAlignedDPRCS2Regs >= `4`) {
1810	unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1811	&ARM::QQPRRegClass);
1812	MBB.addLiveIn(PhysReg: SupReg);
1813	BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
1814	.addReg(ARM::R4)
1815	.addImm(`16`)
1816	.addReg(NextReg)
1817	.addReg(SupReg, RegState::ImplicitKill)
1818	.add(predOps(ARMCC::AL));
1819	NextReg += `4`;
1820	NumAlignedDPRCS2Regs -= `4`;
1821	}
1822
1823	// 16-byte aligned vst1.64 with 2 d-regs.
1824	if (NumAlignedDPRCS2Regs >= `2`) {
1825	unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1826	&ARM::QPRRegClass);
1827	MBB.addLiveIn(PhysReg: SupReg);
1828	BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
1829	.addReg(ARM::R4)
1830	.addImm(`16`)
1831	.addReg(SupReg)
1832	.add(predOps(ARMCC::AL));
1833	NextReg += `2`;
1834	NumAlignedDPRCS2Regs -= `2`;
1835	}
1836
1837	// Finally, use a vanilla vstr.64 for the odd last register.
1838	if (NumAlignedDPRCS2Regs) {
1839	MBB.addLiveIn(PhysReg: NextReg);
1840	// vstr.64 uses addrmode5 which has an offset scale of 4.
1841	BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
1842	.addReg(NextReg)
1843	.addReg(ARM::R4)
1844	.addImm((NextReg - R4BaseReg) * `2`)
1845	.add(predOps(ARMCC::AL));
1846	}
1847
1848	// The last spill instruction inserted should kill the scratch register r4.
1849	std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1850	}
1851
1852	/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
1853	/// iterator to the following instruction.
1854	static MachineBasicBlock::iterator
1855	skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
1856	unsigned NumAlignedDPRCS2Regs) {
1857	// sub r4, sp, #numregs 8*
1858	// bic r4, r4, #align - 1
1859	// mov sp, r4
1860	++MI; ++MI; ++MI;
1861	assert(MI ->mayStore() && "Expecting spill instruction");
1862
1863	// These switches all fall through.
1864	switch(NumAlignedDPRCS2Regs) {
1865	case `7`:
1866	++MI;
1867	assert(MI ->mayStore() && "Expecting spill instruction");
1868	[[fallthrough]];
1869	default:
1870	++MI;
1871	assert(MI ->mayStore() && "Expecting spill instruction");
1872	[[fallthrough]];
1873	case `1`:
1874	case `2`:
1875	case `4`:
1876	assert(MI->killsRegister(ARM::R4, /TRI=/nullptr) && "Missed kill flag");
1877	++MI;
1878	}
1879	return MI;
1880	}
1881
1882	/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
1883	/// starting from d8. These instructions are assumed to execute while the
1884	/// stack is still aligned, unlike the code inserted by emitPopInst.
1885	static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
1886	MachineBasicBlock::iterator MI,
1887	unsigned NumAlignedDPRCS2Regs,
1888	ArrayRef<CalleeSavedInfo> CSI,
1889	const TargetRegisterInfo *TRI) {
1890	MachineFunction &MF = *MBB.getParent();
1891	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1892	DebugLoc DL = MI != MBB.end() ? MI ->getDebugLoc() : DebugLoc ();
1893	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1894
1895	// Find the frame index assigned to d8.
1896	int D8SpillFI = `0`;
1897	for (const CalleeSavedInfo &I : CSI)
1898	if (I.getReg() == ARM::D8) {
1899	D8SpillFI = I.getFrameIdx();
1900	break;
1901	}
1902
1903	// Materialize the address of the d8 spill slot into the scratch register r4.
1904	// This can be fairly complicated if the stack frame is large, so just use
1905	// the normal frame index elimination mechanism to do it. This code runs as
1906	// the initial part of the epilog where the stack and base pointers haven't
1907	// been changed yet.
1908	bool isThumb = AFI->isThumbFunction();
1909	assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1910
1911	unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
1912	BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1913	.addFrameIndex(D8SpillFI)
1914	.addImm(`0`)
1915	.add(predOps(ARMCC::AL))
1916	.add(condCodeOp());
1917
1918	// Now restore NumAlignedDPRCS2Regs registers starting from d8.
1919	unsigned NextReg = ARM::D8;
1920
1921	// 16-byte aligned vld1.64 with 4 d-regs and writeback.
1922	if (NumAlignedDPRCS2Regs >= `6`) {
1923	unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1924	&ARM::QQPRRegClass);
1925	BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
1926	.addReg(ARM::R4, RegState::Define)
1927	.addReg(ARM::R4, RegState::Kill)
1928	.addImm(`16`)
1929	.addReg(SupReg, RegState::ImplicitDefine)
1930	.add(predOps(ARMCC::AL));
1931	NextReg += `4`;
1932	NumAlignedDPRCS2Regs -= `4`;
1933	}
1934
1935	// We won't modify r4 beyond this point. It currently points to the next
1936	// register to be spilled.
1937	unsigned R4BaseReg = NextReg;
1938
1939	// 16-byte aligned vld1.64 with 4 d-regs, no writeback.
1940	if (NumAlignedDPRCS2Regs >= `4`) {
1941	unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1942	&ARM::QQPRRegClass);
1943	BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
1944	.addReg(ARM::R4)
1945	.addImm(`16`)
1946	.addReg(SupReg, RegState::ImplicitDefine)
1947	.add(predOps(ARMCC::AL));
1948	NextReg += `4`;
1949	NumAlignedDPRCS2Regs -= `4`;
1950	}
1951
1952	// 16-byte aligned vld1.64 with 2 d-regs.
1953	if (NumAlignedDPRCS2Regs >= `2`) {
1954	unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1955	&ARM::QPRRegClass);
1956	BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
1957	.addReg(ARM::R4)
1958	.addImm(`16`)
1959	.add(predOps(ARMCC::AL));
1960	NextReg += `2`;
1961	NumAlignedDPRCS2Regs -= `2`;
1962	}
1963
1964	// Finally, use a vanilla vldr.64 for the remaining odd register.
1965	if (NumAlignedDPRCS2Regs)
1966	BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
1967	.addReg(ARM::R4)
1968	.addImm(`2` * (NextReg - R4BaseReg))
1969	.add(predOps(ARMCC::AL));
1970
1971	// Last store kills r4.
1972	std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1973	}
1974
1975	bool ARMFrameLowering::spillCalleeSavedRegisters(
1976	MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1977	ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo TRI) const* {
1978	if (CSI.empty())
1979	return false;
1980
1981	MachineFunction &MF = *MBB.getParent();
1982	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1983
1984	unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
1985	unsigned PushOneOpc = AFI->isThumbFunction() ?
1986	ARM::t2STR_PRE : ARM::STR_PRE_IMM;
1987	unsigned FltOpc = ARM::VSTMDDB_UPD;
1988	unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1989	// Compute PAC in R12.
1990	if (AFI->shouldSignReturnAddress()) {
1991	BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2PAC))
1992	.setMIFlags(MachineInstr::FrameSetup);
1993	}
1994	// Save the non-secure floating point context.
1995	if (llvm::any_of(Range&: CSI, P: [](const CalleeSavedInfo &C) {
1996	return C.getReg() == ARM::FPCXTNS;
1997	})) {
1998	BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VSTR_FPCXTNS_pre),
1999	ARM::SP)
2000	.addReg(ARM::SP)
2001	.addImm(-`4`)
2002	.add(predOps(ARMCC::AL));
2003	}
2004	if (STI.splitFramePointerPush(MF)) {
2005	emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false,
2006	Func: &isSplitFPArea1Register, NumAlignedDPRCS2Regs: `0`, MIFlags: MachineInstr::FrameSetup);
2007	emitPushInst(MBB, MI, CSI, StmOpc: FltOpc, StrOpc: `0`, NoGap: true, Func: &isARMArea3Register,
2008	NumAlignedDPRCS2Regs, MIFlags: MachineInstr::FrameSetup);
2009	emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false,
2010	Func: &isSplitFPArea2Register, NumAlignedDPRCS2Regs: `0`, MIFlags: MachineInstr::FrameSetup);
2011	} else {
2012	emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false, Func: &isARMArea1Register,
2013	NumAlignedDPRCS2Regs: `0`, MIFlags: MachineInstr::FrameSetup);
2014	emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false, Func: &isARMArea2Register,
2015	NumAlignedDPRCS2Regs: `0`, MIFlags: MachineInstr::FrameSetup);
2016	emitPushInst(MBB, MI, CSI, StmOpc: FltOpc, StrOpc: `0`, NoGap: true, Func: &isARMArea3Register,
2017	NumAlignedDPRCS2Regs, MIFlags: MachineInstr::FrameSetup);
2018	}
2019
2020	// The code above does not insert spill code for the aligned DPRCS2 registers.
2021	// The stack realignment code will be inserted between the push instructions
2022	// and these spills.
2023	if (NumAlignedDPRCS2Regs)
2024	emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2025
2026	return true;
2027	}
2028
2029	bool ARMFrameLowering::restoreCalleeSavedRegisters(
2030	MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2031	MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo TRI) const* {
2032	if (CSI.empty())
2033	return false;
2034
2035	MachineFunction &MF = *MBB.getParent();
2036	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2037	bool isVarArg = AFI->getArgRegsSaveSize() > `0`;
2038	unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2039
2040	// The emitPopInst calls below do not insert reloads for the aligned DPRCS2
2041	// registers. Do that here instead.
2042	if (NumAlignedDPRCS2Regs)
2043	emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2044
2045	unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
2046	unsigned LdrOpc =
2047	AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
2048	unsigned FltOpc = ARM::VLDMDIA_UPD;
2049	if (STI.splitFramePointerPush(MF)) {
2050	emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false,
2051	Func: &isSplitFPArea2Register, NumAlignedDPRCS2Regs: `0`);
2052	emitPopInst(MBB, MI, CSI, LdmOpc: FltOpc, LdrOpc: `0`, isVarArg, NoGap: true, Func: &isARMArea3Register,
2053	NumAlignedDPRCS2Regs);
2054	emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false,
2055	Func: &isSplitFPArea1Register, NumAlignedDPRCS2Regs: `0`);
2056	} else {
2057	emitPopInst(MBB, MI, CSI, LdmOpc: FltOpc, LdrOpc: `0`, isVarArg, NoGap: true, Func: &isARMArea3Register,
2058	NumAlignedDPRCS2Regs);
2059	emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false,
2060	Func: &isARMArea2Register, NumAlignedDPRCS2Regs: `0`);
2061	emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false,
2062	Func: &isARMArea1Register, NumAlignedDPRCS2Regs: `0`);
2063	}
2064
2065	return true;
2066	}
2067
2068	// FIXME: Make generic?
2069	static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF,
2070	const ARMBaseInstrInfo &TII) {
2071	unsigned FnSize = `0`;
2072	for (auto &MBB : MF) {
2073	for (auto &MI : MBB)
2074	FnSize += TII.getInstSizeInBytes(MI);
2075	}
2076	if (MF.getJumpTableInfo())
2077	for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
2078	FnSize += Table.MBBs.size() * `4`;
2079	FnSize += MF.getConstantPool()->getConstants().size() * `4`;
2080	return FnSize;
2081	}
2082
2083	/// estimateRSStackSizeLimit - Look at each instruction that references stack
2084	/// frames and return the stack size limit beyond which some of these
2085	/// instructions will require a scratch register during their expansion later.
2086	// FIXME: Move to TII?
2087	static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
2088	const TargetFrameLowering *TFI,
2089	bool &HasNonSPFrameIndex) {
2090	const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2091	const ARMBaseInstrInfo &TII =
2092	*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2093	const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2094	unsigned Limit = (`1` << `12`) - `1`;
2095	for (auto &MBB : MF) {
2096	for (auto &MI : MBB) {
2097	if (MI.isDebugInstr())
2098	continue;
2099	for (unsigned i = `0`, e = MI.getNumOperands(); i != e; ++i) {
2100	if (!MI.getOperand(i).isFI())
2101	continue;
2102
2103	// When using ADDri to get the address of a stack object, 255 is the
2104	// largest offset guaranteed to fit in the immediate offset.
2105	if (MI.getOpcode() == ARM::ADDri) {
2106	Limit = std::min(a: Limit, b: (`1U` << `8`) - `1`);
2107	break;
2108	}
2109	// t2ADDri will not require an extra register, it can reuse the
2110	// destination.
2111	if (MI.getOpcode() == ARM::t2ADDri \|\| MI.getOpcode() == ARM::t2ADDri12)
2112	break;
2113
2114	const MCInstrDesc &MCID = MI.getDesc();
2115	const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI, MF);
2116	if (RegClass && !RegClass->contains(ARM::SP))
2117	HasNonSPFrameIndex = true;
2118
2119	// Otherwise check the addressing mode.
2120	switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
2121	case ARMII::AddrMode_i12:
2122	case ARMII::AddrMode2:
2123	// Default 12 bit limit.
2124	break;
2125	case ARMII::AddrMode3:
2126	case ARMII::AddrModeT2_i8neg:
2127	Limit = std::min(a: Limit, b: (`1U` << `8`) - `1`);
2128	break;
2129	case ARMII::AddrMode5FP16:
2130	Limit = std::min(a: Limit, b: ((`1U` << `8`) - `1`) * `2`);
2131	break;
2132	case ARMII::AddrMode5:
2133	case ARMII::AddrModeT2_i8s4:
2134	case ARMII::AddrModeT2_ldrex:
2135	Limit = std::min(a: Limit, b: ((`1U` << `8`) - `1`) * `4`);
2136	break;
2137	case ARMII::AddrModeT2_i12:
2138	// i12 supports only positive offset so these will be converted to
2139	// i8 opcodes. See llvm::rewriteT2FrameIndex.
2140	if (TFI->hasFP(MF) && AFI->hasStackFrame())
2141	Limit = std::min(a: Limit, b: (`1U` << `8`) - `1`);
2142	break;
2143	case ARMII::AddrMode4:
2144	case ARMII::AddrMode6:
2145	// Addressing modes 4 & 6 (load/store) instructions can't encode an
2146	// immediate offset for stack references.
2147	return `0`;
2148	case ARMII::AddrModeT2_i7:
2149	Limit = std::min(a: Limit, b: ((`1U` << `7`) - `1`) * `1`);
2150	break;
2151	case ARMII::AddrModeT2_i7s2:
2152	Limit = std::min(a: Limit, b: ((`1U` << `7`) - `1`) * `2`);
2153	break;
2154	case ARMII::AddrModeT2_i7s4:
2155	Limit = std::min(a: Limit, b: ((`1U` << `7`) - `1`) * `4`);
2156	break;
2157	default:
2158	llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
2159	}
2160	break; // At most one FI per instruction
2161	}
2162	}
2163	}
2164
2165	return Limit;
2166	}
2167
2168	// In functions that realign the stack, it can be an advantage to spill the
2169	// callee-saved vector registers after realigning the stack. The vst1 and vld1
2170	// instructions take alignment hints that can improve performance.
2171	static void
2172	checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {
2173	MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(`0`);
2174	if (!SpillAlignedNEONRegs)
2175	return;
2176
2177	// Naked functions don't spill callee-saved registers.
2178	if (MF.getFunction().hasFnAttribute(Attribute::Naked))
2179	return;
2180
2181	// We are planning to use NEON instructions vst1 / vld1.
2182	if (!MF.getSubtarget<ARMSubtarget>().hasNEON())
2183	return;
2184
2185	// Don't bother if the default stack alignment is sufficiently high.
2186	if (MF.getSubtarget().getFrameLowering()->getStackAlign() >= Align (`8`))
2187	return;
2188
2189	// Aligned spills require stack realignment.
2190	if (!static_cast<const ARMBaseRegisterInfo *>(
2191	MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))
2192	return;
2193
2194	// We always spill contiguous d-registers starting from d8. Count how many
2195	// needs spilling. The register allocator will almost always use the
2196	// callee-saved registers in order, but it can happen that there are holes in
2197	// the range. Registers above the hole will be spilled to the standard DPRCS
2198	// area.
2199	unsigned NumSpills = `0`;
2200	for (; NumSpills < `8`; ++NumSpills)
2201	if (!SavedRegs.test(ARM::D8 + NumSpills))
2202	break;
2203
2204	// Don't do this for just one d-register. It's not worth it.
2205	if (NumSpills < `2`)
2206	return;
2207
2208	// Spill the first NumSpills D-registers after realigning the stack.
2209	MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
2210
2211	// A scratch register is required for the vst1 / vld1 instructions.
2212	SavedRegs.set(ARM::R4);
2213	}
2214
2215	bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2216	// For CMSE entry functions, we want to save the FPCXT_NS immediately
2217	// upon function entry (resp. restore it immmediately before return)
2218	if (STI.hasV8_1MMainlineOps() &&
2219	MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction())
2220	return false;
2221
2222	// We are disabling shrinkwrapping for now when PAC is enabled, as
2223	// shrinkwrapping can cause clobbering of r12 when the PAC code is
2224	// generated. A follow-up patch will fix this in a more performant manner.
2225	if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(
2226	SpillsLR: true / SpillsLR /))
2227	return false;
2228
2229	return true;
2230	}
2231
2232	static bool requiresAAPCSFrameRecord(const MachineFunction &MF) {
2233	const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
2234	return Subtarget.createAAPCSFrameChainLeaf() \|\|
2235	(Subtarget.createAAPCSFrameChain() && MF.getFrameInfo().hasCalls());
2236	}
2237
2238	// Thumb1 may require a spill when storing to a frame index through FP (or any
2239	// access with execute-only), for cases where FP is a high register (R11). This
2240	// scans the function for cases where this may happen.
2241	static bool canSpillOnFrameIndexAccess(const MachineFunction &MF,
2242	const TargetFrameLowering &TFI) {
2243	const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2244	if (!AFI->isThumb1OnlyFunction())
2245	return false;
2246
2247	const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
2248	for (const auto &MBB : MF)
2249	for (const auto &MI : MBB)
2250	if (MI.getOpcode() == ARM::tSTRspi \|\| MI.getOpcode() == ARM::tSTRi \|\|
2251	STI.genExecuteOnly())
2252	for (const auto &Op : MI.operands())
2253	if (Op.isFI()) {
2254	Register Reg;
2255	TFI.getFrameIndexReference(MF, FI: Op.getIndex(), FrameReg&: Reg);
2256	if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP)
2257	return true;
2258	}
2259	return false;
2260	}
2261
2262	void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
2263	BitVector &SavedRegs,
2264	RegScavenger RS) const* {
2265	TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
2266	// This tells PEI to spill the FP as if it is any other callee-save register
2267	// to take advantage the eliminateFrameIndex machinery. This also ensures it
2268	// is spilled in the order specified by getCalleeSavedRegs() to make it easier
2269	// to combine multiple loads / stores.
2270	bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF));
2271	bool CS1Spilled = false;
2272	bool LRSpilled = false;
2273	unsigned NumGPRSpills = `0`;
2274	unsigned NumFPRSpills = `0`;
2275	SmallVector<unsigned, `4`> UnspilledCS1GPRs;
2276	SmallVector<unsigned, `4`> UnspilledCS2GPRs;
2277	const ARMBaseRegisterInfo RegInfo = static_cast<const* ARMBaseRegisterInfo *>(
2278	MF.getSubtarget().getRegisterInfo());
2279	const ARMBaseInstrInfo &TII =
2280	*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2281	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2282	MachineFrameInfo &MFI = MF.getFrameInfo();
2283	MachineRegisterInfo &MRI = MF.getRegInfo();
2284	const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2285	(void)TRI; // Silence unused warning in non-assert builds.
2286	Register FramePtr = RegInfo->getFrameRegister(MF);
2287
2288	// Spill R4 if Thumb2 function requires stack realignment - it will be used as
2289	// scratch register. Also spill R4 if Thumb2 function has varsized objects,
2290	// since it's not always possible to restore sp from fp in a single
2291	// instruction.
2292	// FIXME: It will be better just to find spare register here.
2293	if (AFI->isThumb2Function() &&
2294	(MFI.hasVarSizedObjects() \|\| RegInfo->hasStackRealignment(MF)))
2295	SavedRegs.set(ARM::R4);
2296
2297	// If a stack probe will be emitted, spill R4 and LR, since they are
2298	// clobbered by the stack probe call.
2299	// This estimate should be a safe, conservative estimate. The actual
2300	// stack probe is enabled based on the size of the local objects;
2301	// this estimate also includes the varargs store size.
2302	if (STI.isTargetWindows() &&
2303	WindowsRequiresStackProbe(MF, StackSizeInBytes: MFI.estimateStackSize(MF))) {
2304	SavedRegs.set(ARM::R4);
2305	SavedRegs.set(ARM::LR);
2306	}
2307
2308	if (AFI->isThumb1OnlyFunction()) {
2309	// Spill LR if Thumb1 function uses variable length argument lists.
2310	if (AFI->getArgRegsSaveSize() > `0`)
2311	SavedRegs.set(ARM::LR);
2312
2313	// Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
2314	// requires stack alignment. We don't know for sure what the stack size
2315	// will be, but for this, an estimate is good enough. If there anything
2316	// changes it, it'll be a spill, which implies we've used all the registers
2317	// and so R4 is already used, so not marking it here will be OK.
2318	// FIXME: It will be better just to find spare register here.
2319	if (MFI.hasVarSizedObjects() \|\| RegInfo->hasStackRealignment(MF) \|\|
2320	MFI.estimateStackSize(MF) > `508`)
2321	SavedRegs.set(ARM::R4);
2322	}
2323
2324	// See if we can spill vector registers to aligned stack.
2325	checkNumAlignedDPRCS2Regs(MF, SavedRegs);
2326
2327	// Spill the BasePtr if it's used.
2328	if (RegInfo->hasBasePointer(MF))
2329	SavedRegs.set(RegInfo->getBaseRegister());
2330
2331	// On v8.1-M.Main CMSE entry functions save/restore FPCXT.
2332	if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
2333	CanEliminateFrame = false;
2334
2335	// When return address signing is enabled R12 is treated as callee-saved.
2336	if (AFI->shouldSignReturnAddress())
2337	CanEliminateFrame = false;
2338
2339	// Don't spill FP if the frame can be eliminated. This is determined
2340	// by scanning the callee-save registers to see if any is modified.
2341	const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MF: &MF);
2342	for (unsigned i = `0`; CSRegs[i]; ++i) {
2343	unsigned Reg = CSRegs[i];
2344	bool Spilled = false;
2345	if (SavedRegs.test(Idx: Reg)) {
2346	Spilled = true;
2347	CanEliminateFrame = false;
2348	}
2349
2350	if (!ARM::GPRRegClass.contains(Reg)) {
2351	if (Spilled) {
2352	if (ARM::SPRRegClass.contains(Reg))
2353	NumFPRSpills++;
2354	else if (ARM::DPRRegClass.contains(Reg))
2355	NumFPRSpills += `2`;
2356	else if (ARM::QPRRegClass.contains(Reg))
2357	NumFPRSpills += `4`;
2358	}
2359	continue;
2360	}
2361
2362	if (Spilled) {
2363	NumGPRSpills++;
2364
2365	if (!STI.splitFramePushPop(MF)) {
2366	if (Reg == ARM::LR)
2367	LRSpilled = true;
2368	CS1Spilled = true;
2369	continue;
2370	}
2371
2372	// Keep track if LR and any of R4, R5, R6, and R7 is spilled.
2373	switch (Reg) {
2374	case ARM::LR:
2375	LRSpilled = true;
2376	[[fallthrough]];
2377	case ARM::R0: case ARM::R1:
2378	case ARM::R2: case ARM::R3:
2379	case ARM::R4: case ARM::R5:
2380	case ARM::R6: case ARM::R7:
2381	CS1Spilled = true;
2382	break;
2383	default:
2384	break;
2385	}
2386	} else {
2387	if (!STI.splitFramePushPop(MF)) {
2388	UnspilledCS1GPRs.push_back(Elt: Reg);
2389	continue;
2390	}
2391
2392	switch (Reg) {
2393	case ARM::R0: case ARM::R1:
2394	case ARM::R2: case ARM::R3:
2395	case ARM::R4: case ARM::R5:
2396	case ARM::R6: case ARM::R7:
2397	case ARM::LR:
2398	UnspilledCS1GPRs.push_back(Elt: Reg);
2399	break;
2400	default:
2401	UnspilledCS2GPRs.push_back(Elt: Reg);
2402	break;
2403	}
2404	}
2405	}
2406
2407	bool ForceLRSpill = false;
2408	if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
2409	unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
2410	// Force LR to be spilled if the Thumb function size is > 2048. This enables
2411	// use of BL to implement far jump.
2412	if (FnSize >= (`1` << `11`)) {
2413	CanEliminateFrame = false;
2414	ForceLRSpill = true;
2415	}
2416	}
2417
2418	// If any of the stack slot references may be out of range of an immediate
2419	// offset, make sure a register (or a spill slot) is available for the
2420	// register scavenger. Note that if we're indexing off the frame pointer, the
2421	// effective stack size is 4 bytes larger since the FP points to the stack
2422	// slot of the previous FP. Also, if we have variable sized objects in the
2423	// function, stack slot references will often be negative, and some of
2424	// our instructions are positive-offset only, so conservatively consider
2425	// that case to want a spill slot (or register) as well. Similarly, if
2426	// the function adjusts the stack pointer during execution and the
2427	// adjustments aren't already part of our stack size estimate, our offset
2428	// calculations may be off, so be conservative.
2429	// FIXME: We could add logic to be more precise about negative offsets
2430	// and which instructions will need a scratch register for them. Is it
2431	// worth the effort and added fragility?
2432	unsigned EstimatedStackSize =
2433	MFI.estimateStackSize(MF) + `4` * (NumGPRSpills + NumFPRSpills);
2434
2435	// Determine biggest (positive) SP offset in MachineFrameInfo.
2436	int MaxFixedOffset = `0`;
2437	for (int I = MFI.getObjectIndexBegin(); I < `0`; ++I) {
2438	int MaxObjectOffset = MFI.getObjectOffset(ObjectIdx: I) + MFI.getObjectSize(ObjectIdx: I);
2439	MaxFixedOffset = std::max(a: MaxFixedOffset, b: MaxObjectOffset);
2440	}
2441
2442	bool HasFP = hasFP(MF);
2443	if (HasFP) {
2444	if (AFI->hasStackFrame())
2445	EstimatedStackSize += `4`;
2446	} else {
2447	// If FP is not used, SP will be used to access arguments, so count the
2448	// size of arguments into the estimation.
2449	EstimatedStackSize += MaxFixedOffset;
2450	}
2451	EstimatedStackSize += `16`; // For possible paddings.
2452
2453	unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
2454	bool HasNonSPFrameIndex = false;
2455	if (AFI->isThumb1OnlyFunction()) {
2456	// For Thumb1, don't bother to iterate over the function. The only
2457	// instruction that requires an emergency spill slot is a store to a
2458	// frame index.
2459	//
2460	// tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
2461	// immediate. tSTRi, which is used for bp- and fp-relative accesses, has
2462	// a 5-bit unsigned immediate.
2463	//
2464	// We could try to check if the function actually contains a tSTRspi
2465	// that might need the spill slot, but it's not really important.
2466	// Functions with VLAs or extremely large call frames are rare, and
2467	// if a function is allocating more than 1KB of stack, an extra 4-byte
2468	// slot probably isn't relevant.
2469	//
2470	// A special case is the scenario where r11 is used as FP, where accesses
2471	// to a frame index will require its value to be moved into a low reg.
2472	// This is handled later on, once we are able to determine if we have any
2473	// fp-relative accesses.
2474	if (RegInfo->hasBasePointer(MF))
2475	EstimatedRSStackSizeLimit = (`1U` << `5`) * `4`;
2476	else
2477	EstimatedRSStackSizeLimit = (`1U` << `8`) * `4`;
2478	EstimatedRSFixedSizeLimit = (`1U` << `5`) * `4`;
2479	} else {
2480	EstimatedRSStackSizeLimit =
2481	estimateRSStackSizeLimit(MF, TFI: this, HasNonSPFrameIndex);
2482	EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
2483	}
2484	// Final estimate of whether sp or bp-relative accesses might require
2485	// scavenging.
2486	bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
2487
2488	// If the stack pointer moves and we don't have a base pointer, the
2489	// estimate logic doesn't work. The actual offsets might be larger when
2490	// we're constructing a call frame, or we might need to use negative
2491	// offsets from fp.
2492	bool HasMovingSP = MFI.hasVarSizedObjects() \|\|
2493	(MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
2494	bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) \|\| !HasMovingSP;
2495
2496	// If we have a frame pointer, we assume arguments will be accessed
2497	// relative to the frame pointer. Check whether fp-relative accesses to
2498	// arguments require scavenging.
2499	//
2500	// We could do slightly better on Thumb1; in some cases, an sp-relative
2501	// offset would be legal even though an fp-relative offset is not.
2502	int MaxFPOffset = getMaxFPOffset(STI, AFI: *AFI, MF);
2503	bool HasLargeArgumentList =
2504	HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
2505
2506	bool BigFrameOffsets = HasLargeStack \|\| !HasBPOrFixedSP \|\|
2507	HasLargeArgumentList \|\| HasNonSPFrameIndex;
2508	LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
2509	<< "; EstimatedStack: " << EstimatedStackSize
2510	<< "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
2511	<< "; BigFrameOffsets: " << BigFrameOffsets << "\n");
2512	if (BigFrameOffsets \|\|
2513	!CanEliminateFrame \|\| RegInfo->cannotEliminateFrame(MF)) {
2514	AFI->setHasStackFrame(true);
2515
2516	if (HasFP) {
2517	SavedRegs.set(FramePtr);
2518	// If the frame pointer is required by the ABI, also spill LR so that we
2519	// emit a complete frame record.
2520	if ((requiresAAPCSFrameRecord(MF) \|\|
2521	MF.getTarget().Options.DisableFramePointerElim(MF)) &&
2522	!LRSpilled) {
2523	SavedRegs.set(ARM::LR);
2524	LRSpilled = true;
2525	NumGPRSpills++;
2526	auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
2527	if (LRPos != UnspilledCS1GPRs.end())
2528	UnspilledCS1GPRs.erase(LRPos);
2529	}
2530	auto FPPos = llvm::find(Range&: UnspilledCS1GPRs, Val: FramePtr);
2531	if (FPPos != UnspilledCS1GPRs.end())
2532	UnspilledCS1GPRs.erase(CI: FPPos);
2533	NumGPRSpills++;
2534	if (FramePtr == ARM::R7)
2535	CS1Spilled = true;
2536	}
2537
2538	// This is the number of extra spills inserted for callee-save GPRs which
2539	// would not otherwise be used by the function. When greater than zero it
2540	// guaranteees that it is possible to scavenge a register to hold the
2541	// address of a stack slot. On Thumb1, the register must be a valid operand
2542	// to tSTRi, i.e. r4-r7. For other subtargets, this is any GPR, i.e. r4-r11
2543	// or lr.
2544	//
2545	// If we don't insert a spill, we instead allocate an emergency spill
2546	// slot, which can be used by scavenging to spill an arbitrary register.
2547	//
2548	// We currently don't try to figure out whether any specific instruction
2549	// requires scavening an additional register.
2550	unsigned NumExtraCSSpill = `0`;
2551
2552	if (AFI->isThumb1OnlyFunction()) {
2553	// For Thumb1-only targets, we need some low registers when we save and
2554	// restore the high registers (which aren't allocatable, but could be
2555	// used by inline assembly) because the push/pop instructions can not
2556	// access high registers. If necessary, we might need to push more low
2557	// registers to ensure that there is at least one free that can be used
2558	// for the saving & restoring, and preferably we should ensure that as
2559	// many as are needed are available so that fewer push/pop instructions
2560	// are required.
2561
2562	// Low registers which are not currently pushed, but could be (r4-r7).
2563	SmallVector<unsigned, `4`> AvailableRegs;
2564
2565	// Unused argument registers (r0-r3) can be clobbered in the prologue for
2566	// free.
2567	int EntryRegDeficit = `0`;
2568	for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
2569	if (!MF.getRegInfo().isLiveIn(Reg)) {
2570	--EntryRegDeficit;
2571	LLVM_DEBUG(dbgs()
2572	<< printReg(Reg, TRI)
2573	<< " is unused argument register, EntryRegDeficit = "
2574	<< EntryRegDeficit << "\n");
2575	}
2576	}
2577
2578	// Unused return registers can be clobbered in the epilogue for free.
2579	int ExitRegDeficit = AFI->getReturnRegsCount() - `4`;
2580	LLVM_DEBUG(dbgs() << AFI->getReturnRegsCount()
2581	<< " return regs used, ExitRegDeficit = "
2582	<< ExitRegDeficit << "\n");
2583
2584	int RegDeficit = std::max(a: EntryRegDeficit, b: ExitRegDeficit);
2585	LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
2586
2587	// r4-r6 can be used in the prologue if they are pushed by the first push
2588	// instruction.
2589	for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
2590	if (SavedRegs.test(Reg)) {
2591	--RegDeficit;
2592	LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2593	<< " is saved low register, RegDeficit = "
2594	<< RegDeficit << "\n");
2595	} else {
2596	AvailableRegs.push_back(Reg);
2597	LLVM_DEBUG(
2598	dbgs()
2599	<< printReg(Reg, TRI)
2600	<< " is non-saved low register, adding to AvailableRegs\n");
2601	}
2602	}
2603
2604	// r7 can be used if it is not being used as the frame pointer.
2605	if (!HasFP \|\| FramePtr != ARM::R7) {
2606	if (SavedRegs.test(ARM::R7)) {
2607	--RegDeficit;
2608	LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
2609	<< RegDeficit << "\n");
2610	} else {
2611	AvailableRegs.push_back(ARM::R7);
2612	LLVM_DEBUG(
2613	dbgs()
2614	<< "%r7 is non-saved low register, adding to AvailableRegs\n");
2615	}
2616	}
2617
2618	// Each of r8-r11 needs to be copied to a low register, then pushed.
2619	for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
2620	if (SavedRegs.test(Reg)) {
2621	++RegDeficit;
2622	LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2623	<< " is saved high register, RegDeficit = "
2624	<< RegDeficit << "\n");
2625	}
2626	}
2627
2628	// LR can only be used by PUSH, not POP, and can't be used at all if the
2629	// llvm.returnaddress intrinsic is used. This is only worth doing if we
2630	// are more limited at function entry than exit.
2631	if ((EntryRegDeficit > ExitRegDeficit) &&
2632	!(MF.getRegInfo().isLiveIn(ARM::LR) &&
2633	MF.getFrameInfo().isReturnAddressTaken())) {
2634	if (SavedRegs.test(ARM::LR)) {
2635	--RegDeficit;
2636	LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
2637	<< RegDeficit << "\n");
2638	} else {
2639	AvailableRegs.push_back(ARM::LR);
2640	LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
2641	}
2642	}
2643
2644	// If there are more high registers that need pushing than low registers
2645	// available, push some more low registers so that we can use fewer push
2646	// instructions. This might not reduce RegDeficit all the way to zero,
2647	// because we can only guarantee that r4-r6 are available, but r8-r11 may
2648	// need saving.
2649	LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
2650	for (; RegDeficit > `0` && !AvailableRegs.empty(); --RegDeficit) {
2651	unsigned Reg = AvailableRegs.pop_back_val();
2652	LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2653	<< " to make up reg deficit\n");
2654	SavedRegs.set(Reg);
2655	NumGPRSpills++;
2656	CS1Spilled = true;
2657	assert(!MRI.isReserved(Reg) && "Should not be reserved");
2658	if (Reg != ARM::LR && !MRI.isPhysRegUsed(Reg))
2659	NumExtraCSSpill++;
2660	UnspilledCS1GPRs.erase(CI: llvm::find(Range&: UnspilledCS1GPRs, Val: Reg));
2661	if (Reg == ARM::LR)
2662	LRSpilled = true;
2663	}
2664	LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
2665	<< "\n");
2666	}
2667
2668	// Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
2669	// restore LR in that case.
2670	bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
2671
2672	// If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
2673	// Spill LR as well so we can fold BX_RET to the registers restore (LDM).
2674	if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
2675	SavedRegs.set(ARM::LR);
2676	NumGPRSpills++;
2677	SmallVectorImpl<unsigned>::iterator LRPos;
2678	LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
2679	if (LRPos != UnspilledCS1GPRs.end())
2680	UnspilledCS1GPRs.erase(CI: LRPos);
2681
2682	ForceLRSpill = false;
2683	if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR) &&
2684	!AFI->isThumb1OnlyFunction())
2685	NumExtraCSSpill++;
2686	}
2687
2688	// If stack and double are 8-byte aligned and we are spilling an odd number
2689	// of GPRs, spill one extra callee save GPR so we won't have to pad between
2690	// the integer and double callee save areas.
2691	LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
2692	const Align TargetAlign = getStackAlign();
2693	if (TargetAlign >= Align (`8`) && (NumGPRSpills & `1`)) {
2694	if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
2695	for (unsigned Reg : UnspilledCS1GPRs) {
2696	// Don't spill high register if the function is thumb. In the case of
2697	// Windows on ARM, accept R11 (frame pointer)
2698	if (!AFI->isThumbFunction() \|\|
2699	(STI.isTargetWindows() && Reg == ARM::R11) \|\|
2700	isARMLowRegister(Reg) \|\|
2701	(Reg == ARM::LR && !ExpensiveLRRestore)) {
2702	SavedRegs.set(Reg);
2703	LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2704	<< " to make up alignment\n");
2705	if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg) &&
2706	!(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
2707	NumExtraCSSpill++;
2708	break;
2709	}
2710	}
2711	} else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
2712	unsigned Reg = UnspilledCS2GPRs.front();
2713	SavedRegs.set(Reg);
2714	LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2715	<< " to make up alignment\n");
2716	if (!MRI.isReserved(PhysReg: Reg) && !MRI.isPhysRegUsed(PhysReg: Reg))
2717	NumExtraCSSpill++;
2718	}
2719	}
2720
2721	// Estimate if we might need to scavenge registers at some point in order
2722	// to materialize a stack offset. If so, either spill one additional
2723	// callee-saved register or reserve a special spill slot to facilitate
2724	// register scavenging. Thumb1 needs a spill slot for stack pointer
2725	// adjustments and for frame index accesses when FP is high register,
2726	// even when the frame itself is small.
2727	unsigned RegsNeeded = `0`;
2728	if (BigFrameOffsets \|\| canSpillOnFrameIndexAccess(MF, TFI: *this)) {
2729	RegsNeeded++;
2730	// With thumb1 execute-only we may need an additional register for saving
2731	// and restoring the CPSR.
2732	if (AFI->isThumb1OnlyFunction() && STI.genExecuteOnly() && !STI.useMovt())
2733	RegsNeeded++;
2734	}
2735
2736	if (RegsNeeded > NumExtraCSSpill) {
2737	// If any non-reserved CS register isn't spilled, just spill one or two
2738	// extra. That should take care of it!
2739	unsigned NumExtras = TargetAlign.value() / `4`;
2740	SmallVector<unsigned, `2`> Extras;
2741	while (NumExtras && !UnspilledCS1GPRs.empty()) {
2742	unsigned Reg = UnspilledCS1GPRs.pop_back_val();
2743	if (!MRI.isReserved(PhysReg: Reg) &&
2744	(!AFI->isThumb1OnlyFunction() \|\| isARMLowRegister(Reg))) {
2745	Extras.push_back(Elt: Reg);
2746	NumExtras--;
2747	}
2748	}
2749	// For non-Thumb1 functions, also check for hi-reg CS registers
2750	if (!AFI->isThumb1OnlyFunction()) {
2751	while (NumExtras && !UnspilledCS2GPRs.empty()) {
2752	unsigned Reg = UnspilledCS2GPRs.pop_back_val();
2753	if (!MRI.isReserved(PhysReg: Reg)) {
2754	Extras.push_back(Elt: Reg);
2755	NumExtras--;
2756	}
2757	}
2758	}
2759	if (NumExtras == `0`) {
2760	for (unsigned Reg : Extras) {
2761	SavedRegs.set(Reg);
2762	if (!MRI.isPhysRegUsed(PhysReg: Reg))
2763	NumExtraCSSpill++;
2764	}
2765	}
2766	while ((RegsNeeded > NumExtraCSSpill) && RS) {
2767	// Reserve a slot closest to SP or frame pointer.
2768	LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
2769	const TargetRegisterClass &RC = ARM::GPRRegClass;
2770	unsigned Size = TRI->getSpillSize(RC);
2771	Align Alignment = TRI->getSpillAlign(RC);
2772	RS->addScavengingFrameIndex(
2773	FI: MFI.CreateStackObject(Size, Alignment, isSpillSlot: false));
2774	--RegsNeeded;
2775	}
2776	}
2777	}
2778
2779	if (ForceLRSpill)
2780	SavedRegs.set(ARM::LR);
2781	AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
2782	}
2783
2784	void ARMFrameLowering::updateLRRestored(MachineFunction &MF) {
2785	MachineFrameInfo &MFI = MF.getFrameInfo();
2786	if (!MFI.isCalleeSavedInfoValid())
2787	return;
2788
2789	// Check if all terminators do not implicitly use LR. Then we can 'restore' LR
2790	// into PC so it is not live out of the return block: Clear the Restored bit
2791	// in that case.
2792	for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
2793	if (Info.getReg() != ARM::LR)
2794	continue;
2795	if (all_of(Range&: MF, P: [](const MachineBasicBlock &MBB) {
2796	return all_of(Range: MBB.terminators(), P: [](const MachineInstr &Term) {
2797	return !Term.isReturn() \|\| Term.getOpcode() == ARM::LDMIA_RET \|\|
2798	Term.getOpcode() == ARM::t2LDMIA_RET \|\|
2799	Term.getOpcode() == ARM::tPOP_RET;
2800	});
2801	})) {
2802	Info.setRestored(false);
2803	break;
2804	}
2805	}
2806	}
2807
2808	void ARMFrameLowering::processFunctionBeforeFrameFinalized(
2809	MachineFunction &MF, RegScavenger RS) const* {
2810	TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS);
2811	updateLRRestored(MF);
2812	}
2813
2814	void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF,
2815	BitVector &SavedRegs) const {
2816	TargetFrameLowering::getCalleeSaves(MF, SavedRegs);
2817
2818	// If we have the "returned" parameter attribute which guarantees that we
2819	// return the value which was passed in r0 unmodified (e.g. C++ 'structors),
2820	// record that fact for IPRA.
2821	const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2822	if (AFI->getPreservesR0())
2823	SavedRegs.set(ARM::R0);
2824	}
2825
2826	bool ARMFrameLowering::assignCalleeSavedSpillSlots(
2827	MachineFunction &MF, const TargetRegisterInfo *TRI,
2828	std::vector<CalleeSavedInfo> &CSI) const {
2829	// For CMSE entry functions, handle floating-point context as if it was a
2830	// callee-saved register.
2831	if (STI.hasV8_1MMainlineOps() &&
2832	MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction()) {
2833	CSI.emplace_back(ARM::FPCXTNS);
2834	CSI.back().setRestored(false);
2835	}
2836
2837	// For functions, which sign their return address, upon function entry, the
2838	// return address PAC is computed in R12. Treat R12 as a callee-saved register
2839	// in this case.
2840	const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
2841	if (AFI.shouldSignReturnAddress()) {
2842	// The order of register must match the order we push them, because the
2843	// PEI assigns frame indices in that order. When compiling for return
2844	// address sign and authenication, we use split push, therefore the orders
2845	// we want are:
2846	// LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
2847	CSI.insert(find_if(CSI,
2848	[=](const auto &CS) {
2849	Register Reg = CS.getReg();
2850	return Reg == ARM::R10 \|\| Reg == ARM::R11 \|\|
2851	Reg == ARM::R8 \|\| Reg == ARM::R9 \|\|
2852	ARM::DPRRegClass.contains(Reg);
2853	}),
2854	CalleeSavedInfo(ARM::R12));
2855	}
2856
2857	return false;
2858	}
2859
2860	const TargetFrameLowering::SpillSlot *
2861	ARMFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
2862	static const SpillSlot FixedSpillOffsets[] = {{ARM::FPCXTNS, -`4`}};
2863	NumEntries = std::size(FixedSpillOffsets);
2864	return FixedSpillOffsets;
2865	}
2866
2867	MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
2868	MachineFunction &MF, MachineBasicBlock &MBB,
2869	MachineBasicBlock::iterator I) const {
2870	const ARMBaseInstrInfo &TII =
2871	*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2872	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2873	bool isARM = !AFI->isThumbFunction();
2874	DebugLoc dl = I ->getDebugLoc();
2875	unsigned Opc = I ->getOpcode();
2876	bool IsDestroy = Opc == TII.getCallFrameDestroyOpcode();
2877	unsigned CalleePopAmount = IsDestroy ? I ->getOperand(i: `1`).getImm() : `0`;
2878
2879	assert(!AFI->isThumb1OnlyFunction() &&
2880	"This eliminateCallFramePseudoInstr does not support Thumb1!");
2881
2882	int PIdx = I ->findFirstPredOperandIdx();
2883	ARMCC::CondCodes Pred = (PIdx == -`1`)
2884	? ARMCC::AL
2885	: (ARMCC::CondCodes)I ->getOperand(i: PIdx).getImm();
2886	unsigned PredReg = TII.getFramePred(MI: *I);
2887
2888	if (!hasReservedCallFrame(MF)) {
2889	// Bail early if the callee is expected to do the adjustment.
2890	if (IsDestroy && CalleePopAmount != -`1U`)
2891	return MBB.erase(I);
2892
2893	// If we have alloca, convert as follows:
2894	// ADJCALLSTACKDOWN -> sub, sp, sp, amount
2895	// ADJCALLSTACKUP -> add, sp, sp, amount
2896	unsigned Amount = TII.getFrameSize(*I);
2897	if (Amount != `0`) {
2898	// We need to keep the stack aligned properly. To do this, we round the
2899	// amount of space needed for the outgoing arguments up to the next
2900	// alignment boundary.
2901	Amount = alignSPAdjust(SPAdj: Amount);
2902
2903	if (Opc == ARM::ADJCALLSTACKDOWN \|\| Opc == ARM::tADJCALLSTACKDOWN) {
2904	emitSPUpdate(isARM, MBB, MBBI&: I, dl, TII, NumBytes: -Amount, MIFlags: MachineInstr::NoFlags,
2905	Pred, PredReg);
2906	} else {
2907	assert(Opc == ARM::ADJCALLSTACKUP \|\| Opc == ARM::tADJCALLSTACKUP);
2908	emitSPUpdate(isARM, MBB, MBBI&: I, dl, TII, NumBytes: Amount, MIFlags: MachineInstr::NoFlags,
2909	Pred, PredReg);
2910	}
2911	}
2912	} else if (CalleePopAmount != -`1U`) {
2913	// If the calling convention demands that the callee pops arguments from the
2914	// stack, we want to add it back if we have a reserved call frame.
2915	emitSPUpdate(isARM, MBB, MBBI&: I, dl, TII, NumBytes: -CalleePopAmount,
2916	MIFlags: MachineInstr::NoFlags, Pred, PredReg);
2917	}
2918	return MBB.erase(I);
2919	}
2920
2921	/// Get the minimum constant for ARM that is greater than or equal to the
2922	/// argument. In ARM, constants can have any value that can be produced by
2923	/// rotating an 8-bit value to the right by an even number of bits within a
2924	/// 32-bit word.
2925	static uint32_t alignToARMConstant(uint32_t Value) {
2926	unsigned Shifted = `0`;
2927
2928	if (Value == `0`)
2929	return `0`;
2930
2931	while (!(Value & `0xC0000000`)) {
2932	Value = Value << `2`;
2933	Shifted += `2`;
2934	}
2935
2936	bool Carry = (Value & `0x00FFFFFF`);
2937	Value = ((Value & `0xFF000000`) >> `24`) + Carry;
2938
2939	if (Value & `0x0000100`)
2940	Value = Value & `0x000001FC`;
2941
2942	if (Shifted > `24`)
2943	Value = Value >> (Shifted - `24`);
2944	else
2945	Value = Value << (`24` - Shifted);
2946
2947	return Value;
2948	}
2949
2950	// The stack limit in the TCB is set to this many bytes above the actual
2951	// stack limit.
2952	static const uint64_t kSplitStackAvailable = `256`;
2953
2954	// Adjust the function prologue to enable split stacks. This currently only
2955	// supports android and linux.
2956	//
2957	// The ABI of the segmented stack prologue is a little arbitrarily chosen, but
2958	// must be well defined in order to allow for consistent implementations of the
2959	// __morestack helper function. The ABI is also not a normal ABI in that it
2960	// doesn't follow the normal calling conventions because this allows the
2961	// prologue of each function to be optimized further.
2962	//
2963	// Currently, the ABI looks like (when calling __morestack)
2964	//
2965	// r4 holds the minimum stack size requested for this function call*
2966	// r5 holds the stack size of the arguments to the function*
2967	// the beginning of the function is 3 instructions after the call to*
2968	// __morestack
2969	//
2970	// Implementations of __morestack should use r4 to allocate a new stack, r5 to
2971	// place the arguments on to the new stack, and the 3-instruction knowledge to
2972	// jump directly to the body of the function when working on the new stack.
2973	//
2974	// An old (and possibly no longer compatible) implementation of __morestack for
2975	// ARM can be found at [1].
2976	//
2977	// [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
2978	void ARMFrameLowering::adjustForSegmentedStacks(
2979	MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2980	unsigned Opcode;
2981	unsigned CFIIndex;
2982	const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
2983	bool Thumb = ST->isThumb();
2984	bool Thumb2 = ST->isThumb2();
2985
2986	// Sadly, this currently doesn't support varargs, platforms other than
2987	// android/linux. Note that thumb1/thumb2 are support for android/linux.
2988	if (MF.getFunction().isVarArg())
2989	report_fatal_error(reason: "Segmented stacks do not support vararg functions.");
2990	if (!ST->isTargetAndroid() && !ST->isTargetLinux())
2991	report_fatal_error(reason: "Segmented stacks not supported on this platform.");
2992
2993	MachineFrameInfo &MFI = MF.getFrameInfo();
2994	MachineModuleInfo &MMI = MF.getMMI();
2995	MCContext &Context = MMI.getContext();
2996	const MCRegisterInfo *MRI = Context.getRegisterInfo();
2997	const ARMBaseInstrInfo &TII =
2998	*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2999	ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
3000	DebugLoc DL;
3001
3002	if (!MFI.needsSplitStackProlog())
3003	return;
3004
3005	uint64_t StackSize = MFI.getStackSize();
3006
3007	// Use R4 and R5 as scratch registers.
3008	// We save R4 and R5 before use and restore them before leaving the function.
3009	unsigned ScratchReg0 = ARM::R4;
3010	unsigned ScratchReg1 = ARM::R5;
3011	unsigned MovOp = ST->useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
3012	uint64_t AlignedStackSize;
3013
3014	MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
3015	MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
3016	MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();
3017	MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();
3018	MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();
3019
3020	// Grab everything that reaches PrologueMBB to update there liveness as well.
3021	SmallPtrSet<MachineBasicBlock *, `8`> BeforePrologueRegion;
3022	SmallVector<MachineBasicBlock *, `2`> WalkList;
3023	WalkList.push_back(Elt: &PrologueMBB);
3024
3025	do {
3026	MachineBasicBlock *CurMBB = WalkList.pop_back_val();
3027	for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
3028	if (BeforePrologueRegion.insert(Ptr: PredBB).second)
3029	WalkList.push_back(Elt: PredBB);
3030	}
3031	} while (!WalkList.empty());
3032
3033	// The order in that list is important.
3034	// The blocks will all be inserted before PrologueMBB using that order.
3035	// Therefore the block that should appear first in the CFG should appear
3036	// first in the list.
3037	MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
3038	PostStackMBB};
3039
3040	for (MachineBasicBlock *B : AddedBlocks)
3041	BeforePrologueRegion.insert(Ptr: B);
3042
3043	for (const auto &LI : PrologueMBB.liveins()) {
3044	for (MachineBasicBlock *PredBB : BeforePrologueRegion)
3045	PredBB->addLiveIn(RegMaskPair: LI);
3046	}
3047
3048	// Remove the newly added blocks from the list, since we know
3049	// we do not have to do the following updates for them.
3050	for (MachineBasicBlock *B : AddedBlocks) {
3051	BeforePrologueRegion.erase(Ptr: B);
3052	MF.insert(MBBI: PrologueMBB.getIterator(), MBB: B);
3053	}
3054
3055	for (MachineBasicBlock *MBB : BeforePrologueRegion) {
3056	// Make sure the LiveIns are still sorted and unique.
3057	MBB->sortUniqueLiveIns();
3058	// Replace the edges to PrologueMBB by edges to the sequences
3059	// we are about to add, but only update for immediate predecessors.
3060	if (MBB->isSuccessor(MBB: &PrologueMBB))
3061	MBB->ReplaceUsesOfBlockWith(Old: &PrologueMBB, New: AddedBlocks[`0`]);
3062	}
3063
3064	// The required stack size that is aligned to ARM constant criterion.
3065	AlignedStackSize = alignToARMConstant(Value: StackSize);
3066
3067	// When the frame size is less than 256 we just compare the stack
3068	// boundary directly to the value of the stack pointer, per gcc.
3069	bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
3070
3071	// We will use two of the callee save registers as scratch registers so we
3072	// need to save those registers onto the stack.
3073	// We will use SR0 to hold stack limit and SR1 to hold the stack size
3074	// requested and arguments for __morestack().
3075	// SR0: Scratch Register #0
3076	// SR1: Scratch Register #1
3077	// push {SR0, SR1}
3078	if (Thumb) {
3079	BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
3080	.add(predOps(ARMCC::AL))
3081	.addReg(ScratchReg0)
3082	.addReg(ScratchReg1);
3083	} else {
3084	BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
3085	.addReg(ARM::SP, RegState::Define)
3086	.addReg(ARM::SP)
3087	.add(predOps(ARMCC::AL))
3088	.addReg(ScratchReg0)
3089	.addReg(ScratchReg1);
3090	}
3091
3092	// Emit the relevant DWARF information about the change in stack pointer as
3093	// well as where to find both r4 and r5 (the callee-save registers)
3094	if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3095	CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: `8`));
3096	BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3097	.addCFIIndex(CFIIndex);
3098	CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createOffset(
3099	L: nullptr, Register: MRI->getDwarfRegNum(RegNum: ScratchReg1, isEH: true), Offset: -`4`));
3100	BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3101	.addCFIIndex(CFIIndex);
3102	CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createOffset(
3103	L: nullptr, Register: MRI->getDwarfRegNum(RegNum: ScratchReg0, isEH: true), Offset: -`8`));
3104	BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3105	.addCFIIndex(CFIIndex);
3106	}
3107
3108	// mov SR1, sp
3109	if (Thumb) {
3110	BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
3111	.addReg(ARM::SP)
3112	.add(predOps(ARMCC::AL));
3113	} else if (CompareStackPointer) {
3114	BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
3115	.addReg(ARM::SP)
3116	.add(predOps(ARMCC::AL))
3117	.add(condCodeOp());
3118	}
3119
3120	// sub SR1, sp, #StackSize
3121	if (!CompareStackPointer && Thumb) {
3122	if (AlignedStackSize < `256`) {
3123	BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
3124	.add(condCodeOp())
3125	.addReg(ScratchReg1)
3126	.addImm(AlignedStackSize)
3127	.add(predOps(ARMCC::AL));
3128	} else {
3129	if (Thumb2 \|\| ST->genExecuteOnly()) {
3130	BuildMI(McrMBB, DL, TII.get(MovOp), ScratchReg0)
3131	.addImm(AlignedStackSize);
3132	} else {
3133	auto MBBI = McrMBB->end();
3134	auto RegInfo = STI.getRegisterInfo();
3135	RegInfo->emitLoadConstPool(MBB&: *McrMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: `0`,
3136	Val: AlignedStackSize);
3137	}
3138	BuildMI(McrMBB, DL, TII.get(ARM::tSUBrr), ScratchReg1)
3139	.add(condCodeOp())
3140	.addReg(ScratchReg1)
3141	.addReg(ScratchReg0)
3142	.add(predOps(ARMCC::AL));
3143	}
3144	} else if (!CompareStackPointer) {
3145	if (AlignedStackSize < `256`) {
3146	BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
3147	.addReg(ARM::SP)
3148	.addImm(AlignedStackSize)
3149	.add(predOps(ARMCC::AL))
3150	.add(condCodeOp());
3151	} else {
3152	auto MBBI = McrMBB->end();
3153	auto RegInfo = STI.getRegisterInfo();
3154	RegInfo->emitLoadConstPool(MBB&: *McrMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: `0`,
3155	Val: AlignedStackSize);
3156	BuildMI(McrMBB, DL, TII.get(ARM::SUBrr), ScratchReg1)
3157	.addReg(ARM::SP)
3158	.addReg(ScratchReg0)
3159	.add(predOps(ARMCC::AL))
3160	.add(condCodeOp());
3161	}
3162	}
3163
3164	if (Thumb && ST->isThumb1Only()) {
3165	if (ST->genExecuteOnly()) {
3166	BuildMI(GetMBB, DL, TII.get(MovOp), ScratchReg0)
3167	.addExternalSymbol("__STACK_LIMIT");
3168	} else {
3169	unsigned PCLabelId = ARMFI->createPICLabelUId();
3170	ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create(
3171	C&: MF.getFunction().getContext(), s: "__STACK_LIMIT", ID: PCLabelId, PCAdj: `0`);
3172	MachineConstantPool *MCP = MF.getConstantPool();
3173	unsigned CPI = MCP->getConstantPoolIndex(V: NewCPV, Alignment: Align (`4`));
3174
3175	// ldr SR0, [pc, offset(STACK_LIMIT)]
3176	BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
3177	.addConstantPoolIndex(CPI)
3178	.add(predOps(ARMCC::AL));
3179	}
3180
3181	// ldr SR0, [SR0]
3182	BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
3183	.addReg(ScratchReg0)
3184	.addImm(`0`)
3185	.add(predOps(ARMCC::AL));
3186	} else {
3187	// Get TLS base address from the coprocessor
3188	// mrc p15, #0, SR0, c13, c0, #3
3189	BuildMI(McrMBB, DL, TII.get(Thumb ? ARM::t2MRC : ARM::MRC),
3190	ScratchReg0)
3191	.addImm(`15`)
3192	.addImm(`0`)
3193	.addImm(`13`)
3194	.addImm(`0`)
3195	.addImm(`3`)
3196	.add(predOps(ARMCC::AL));
3197
3198	// Use the last tls slot on android and a private field of the TCP on linux.
3199	assert(ST->isTargetAndroid() \|\| ST->isTargetLinux());
3200	unsigned TlsOffset = ST->isTargetAndroid() ? `63` : `1`;
3201
3202	// Get the stack limit from the right offset
3203	// ldr SR0, [sr0, #4 TlsOffset]*
3204	BuildMI(GetMBB, DL, TII.get(Thumb ? ARM::t2LDRi12 : ARM::LDRi12),
3205	ScratchReg0)
3206	.addReg(ScratchReg0)
3207	.addImm(`4` * TlsOffset)
3208	.add(predOps(ARMCC::AL));
3209	}
3210
3211	// Compare stack limit with stack size requested.
3212	// cmp SR0, SR1
3213	Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
3214	BuildMI(GetMBB, DL, TII.get(Opcode))
3215	.addReg(ScratchReg0)
3216	.addReg(ScratchReg1)
3217	.add(predOps(Pred: ARMCC::AL));
3218
3219	// This jump is taken if StackLimit <= SP - stack required.
3220	Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
3221	BuildMI(GetMBB, DL, TII.get(Opcode))
3222	.addMBB(PostStackMBB)
3223	.addImm(ARMCC::LS)
3224	.addReg(ARM::CPSR);
3225
3226	// Calling __morestack(StackSize, Size of stack arguments).
3227	// __morestack knows that the stack size requested is in SR0(r4)
3228	// and amount size of stack arguments is in SR1(r5).
3229
3230	// Pass first argument for the __morestack by Scratch Register #0.
3231	// The amount size of stack required
3232	if (Thumb) {
3233	if (AlignedStackSize < `256`) {
3234	BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
3235	.add(condCodeOp())
3236	.addImm(AlignedStackSize)
3237	.add(predOps(ARMCC::AL));
3238	} else {
3239	if (Thumb2 \|\| ST->genExecuteOnly()) {
3240	BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg0)
3241	.addImm(AlignedStackSize);
3242	} else {
3243	auto MBBI = AllocMBB->end();
3244	auto RegInfo = STI.getRegisterInfo();
3245	RegInfo->emitLoadConstPool(MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: `0`,
3246	Val: AlignedStackSize);
3247	}
3248	}
3249	} else {
3250	if (AlignedStackSize < `256`) {
3251	BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
3252	.addImm(AlignedStackSize)
3253	.add(predOps(ARMCC::AL))
3254	.add(condCodeOp());
3255	} else {
3256	auto MBBI = AllocMBB->end();
3257	auto RegInfo = STI.getRegisterInfo();
3258	RegInfo->emitLoadConstPool(MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: `0`,
3259	Val: AlignedStackSize);
3260	}
3261	}
3262
3263	// Pass second argument for the __morestack by Scratch Register #1.
3264	// The amount size of stack consumed to save function arguments.
3265	if (Thumb) {
3266	if (ARMFI->getArgumentStackSize() < `256`) {
3267	BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
3268	.add(condCodeOp())
3269	.addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
3270	.add(predOps(ARMCC::AL));
3271	} else {
3272	if (Thumb2 \|\| ST->genExecuteOnly()) {
3273	BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg1)
3274	.addImm(alignToARMConstant(Value: ARMFI->getArgumentStackSize()));
3275	} else {
3276	auto MBBI = AllocMBB->end();
3277	auto RegInfo = STI.getRegisterInfo();
3278	RegInfo->emitLoadConstPool(
3279	MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg1, SubIdx: `0`,
3280	Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()));
3281	}
3282	}
3283	} else {
3284	if (alignToARMConstant(Value: ARMFI->getArgumentStackSize()) < `256`) {
3285	BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
3286	.addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
3287	.add(predOps(ARMCC::AL))
3288	.add(condCodeOp());
3289	} else {
3290	auto MBBI = AllocMBB->end();
3291	auto RegInfo = STI.getRegisterInfo();
3292	RegInfo->emitLoadConstPool(
3293	MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg1, SubIdx: `0`,
3294	Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()));
3295	}
3296	}
3297
3298	// push {lr} - Save return address of this function.
3299	if (Thumb) {
3300	BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
3301	.add(predOps(ARMCC::AL))
3302	.addReg(ARM::LR);
3303	} else {
3304	BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
3305	.addReg(ARM::SP, RegState::Define)
3306	.addReg(ARM::SP)
3307	.add(predOps(ARMCC::AL))
3308	.addReg(ARM::LR);
3309	}
3310
3311	// Emit the DWARF info about the change in stack as well as where to find the
3312	// previous link register
3313	if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3314	CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: `12`));
3315	BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3316	.addCFIIndex(CFIIndex);
3317	CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
3318	nullptr, MRI->getDwarfRegNum(ARM::LR, true), -`12`));
3319	BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3320	.addCFIIndex(CFIIndex);
3321	}
3322
3323	// Call __morestack().
3324	if (Thumb) {
3325	BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
3326	.add(predOps(ARMCC::AL))
3327	.addExternalSymbol("__morestack");
3328	} else {
3329	BuildMI(AllocMBB, DL, TII.get(ARM::BL))
3330	.addExternalSymbol("__morestack");
3331	}
3332
3333	// pop {lr} - Restore return address of this original function.
3334	if (Thumb) {
3335	if (ST->isThumb1Only()) {
3336	BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3337	.add(predOps(ARMCC::AL))
3338	.addReg(ScratchReg0);
3339	BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
3340	.addReg(ScratchReg0)
3341	.add(predOps(ARMCC::AL));
3342	} else {
3343	BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
3344	.addReg(ARM::LR, RegState::Define)
3345	.addReg(ARM::SP, RegState::Define)
3346	.addReg(ARM::SP)
3347	.addImm(`4`)
3348	.add(predOps(ARMCC::AL));
3349	}
3350	} else {
3351	BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3352	.addReg(ARM::SP, RegState::Define)
3353	.addReg(ARM::SP)
3354	.add(predOps(ARMCC::AL))
3355	.addReg(ARM::LR);
3356	}
3357
3358	// Restore SR0 and SR1 in case of __morestack() was called.
3359	// __morestack() will skip PostStackMBB block so we need to restore
3360	// scratch registers from here.
3361	// pop {SR0, SR1}
3362	if (Thumb) {
3363	BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3364	.add(predOps(ARMCC::AL))
3365	.addReg(ScratchReg0)
3366	.addReg(ScratchReg1);
3367	} else {
3368	BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3369	.addReg(ARM::SP, RegState::Define)
3370	.addReg(ARM::SP)
3371	.add(predOps(ARMCC::AL))
3372	.addReg(ScratchReg0)
3373	.addReg(ScratchReg1);
3374	}
3375
3376	// Update the CFA offset now that we've popped
3377	if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3378	CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: `0`));
3379	BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3380	.addCFIIndex(CFIIndex);
3381	}
3382
3383	// Return from this function.
3384	BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(Pred: ARMCC::AL));
3385
3386	// Restore SR0 and SR1 in case of __morestack() was not called.
3387	// pop {SR0, SR1}
3388	if (Thumb) {
3389	BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
3390	.add(predOps(ARMCC::AL))
3391	.addReg(ScratchReg0)
3392	.addReg(ScratchReg1);
3393	} else {
3394	BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
3395	.addReg(ARM::SP, RegState::Define)
3396	.addReg(ARM::SP)
3397	.add(predOps(ARMCC::AL))
3398	.addReg(ScratchReg0)
3399	.addReg(ScratchReg1);
3400	}
3401
3402	// Update the CFA offset now that we've popped
3403	if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3404	CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: `0`));
3405	BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3406	.addCFIIndex(CFIIndex);
3407
3408	// Tell debuggers that r4 and r5 are now the same as they were in the
3409	// previous function, that they're the "Same Value".
3410	CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createSameValue(
3411	L: nullptr, Register: MRI->getDwarfRegNum(RegNum: ScratchReg0, isEH: true)));
3412	BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3413	.addCFIIndex(CFIIndex);
3414	CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createSameValue(
3415	L: nullptr, Register: MRI->getDwarfRegNum(RegNum: ScratchReg1, isEH: true)));
3416	BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3417	.addCFIIndex(CFIIndex);
3418	}
3419
3420	// Organizing MBB lists
3421	PostStackMBB->addSuccessor(Succ: &PrologueMBB);
3422
3423	AllocMBB->addSuccessor(Succ: PostStackMBB);
3424
3425	GetMBB->addSuccessor(Succ: PostStackMBB);
3426	GetMBB->addSuccessor(Succ: AllocMBB);
3427
3428	McrMBB->addSuccessor(Succ: GetMBB);
3429
3430	PrevStackMBB->addSuccessor(Succ: McrMBB);
3431
3432	#ifdef EXPENSIVE_CHECKS
3433	MF.verify();
3434	#endif
3435	}
3436

source code of llvm/lib/Target/ARM/ARMFrameLowering.cpp