1 | //===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the X86 implementation of TargetFrameLowering class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "X86FrameLowering.h" |
14 | #include "MCTargetDesc/X86MCTargetDesc.h" |
15 | #include "X86InstrBuilder.h" |
16 | #include "X86InstrInfo.h" |
17 | #include "X86MachineFunctionInfo.h" |
18 | #include "X86Subtarget.h" |
19 | #include "X86TargetMachine.h" |
20 | #include "llvm/ADT/Statistic.h" |
21 | #include "llvm/CodeGen/LivePhysRegs.h" |
22 | #include "llvm/CodeGen/MachineFrameInfo.h" |
23 | #include "llvm/CodeGen/MachineFunction.h" |
24 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
25 | #include "llvm/CodeGen/MachineModuleInfo.h" |
26 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
27 | #include "llvm/CodeGen/WinEHFuncInfo.h" |
28 | #include "llvm/IR/DataLayout.h" |
29 | #include "llvm/IR/EHPersonalities.h" |
30 | #include "llvm/IR/Function.h" |
31 | #include "llvm/MC/MCAsmInfo.h" |
32 | #include "llvm/MC/MCObjectFileInfo.h" |
33 | #include "llvm/MC/MCSymbol.h" |
34 | #include "llvm/Support/Debug.h" |
35 | #include "llvm/Support/LEB128.h" |
36 | #include "llvm/Target/TargetOptions.h" |
37 | #include <cstdlib> |
38 | |
39 | #define DEBUG_TYPE "x86-fl" |
40 | |
41 | STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue" ); |
42 | STATISTIC(, |
43 | "Number of extra stack probes generated in prologue" ); |
44 | STATISTIC(NumFunctionUsingPush2Pop2, "Number of funtions using push2/pop2" ); |
45 | |
46 | using namespace llvm; |
47 | |
48 | X86FrameLowering::X86FrameLowering(const X86Subtarget &STI, |
49 | MaybeAlign StackAlignOverride) |
50 | : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(), |
51 | STI.is64Bit() ? -8 : -4), |
52 | STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) { |
53 | // Cache a bunch of frame-related predicates for this subtarget. |
54 | SlotSize = TRI->getSlotSize(); |
55 | Is64Bit = STI.is64Bit(); |
56 | IsLP64 = STI.isTarget64BitLP64(); |
57 | // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. |
58 | Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64(); |
59 | StackPtr = TRI->getStackRegister(); |
60 | } |
61 | |
62 | bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { |
63 | return !MF.getFrameInfo().hasVarSizedObjects() && |
64 | !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() && |
65 | !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall(); |
66 | } |
67 | |
68 | /// canSimplifyCallFramePseudos - If there is a reserved call frame, the |
69 | /// call frame pseudos can be simplified. Having a FP, as in the default |
70 | /// implementation, is not sufficient here since we can't always use it. |
71 | /// Use a more nuanced condition. |
72 | bool X86FrameLowering::canSimplifyCallFramePseudos( |
73 | const MachineFunction &MF) const { |
74 | return hasReservedCallFrame(MF) || |
75 | MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() || |
76 | (hasFP(MF) && !TRI->hasStackRealignment(MF)) || |
77 | TRI->hasBasePointer(MF); |
78 | } |
79 | |
80 | // needsFrameIndexResolution - Do we need to perform FI resolution for |
81 | // this function. Normally, this is required only when the function |
82 | // has any stack objects. However, FI resolution actually has another job, |
83 | // not apparent from the title - it resolves callframesetup/destroy |
84 | // that were not simplified earlier. |
85 | // So, this is required for x86 functions that have push sequences even |
86 | // when there are no stack objects. |
87 | bool X86FrameLowering::needsFrameIndexResolution( |
88 | const MachineFunction &MF) const { |
89 | return MF.getFrameInfo().hasStackObjects() || |
90 | MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences(); |
91 | } |
92 | |
93 | /// hasFP - Return true if the specified function should have a dedicated frame |
94 | /// pointer register. This is true if the function has variable sized allocas |
95 | /// or if frame pointer elimination is disabled. |
96 | bool X86FrameLowering::hasFP(const MachineFunction &MF) const { |
97 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
98 | return (MF.getTarget().Options.DisableFramePointerElim(MF) || |
99 | TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() || |
100 | MFI.isFrameAddressTaken() || MFI.hasOpaqueSPAdjustment() || |
101 | MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || |
102 | MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() || |
103 | MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() || |
104 | MFI.hasStackMap() || MFI.hasPatchPoint() || |
105 | (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment())); |
106 | } |
107 | |
108 | static unsigned getSUBriOpcode(bool IsLP64) { |
109 | return IsLP64 ? X86::SUB64ri32 : X86::SUB32ri; |
110 | } |
111 | |
112 | static unsigned getADDriOpcode(bool IsLP64) { |
113 | return IsLP64 ? X86::ADD64ri32 : X86::ADD32ri; |
114 | } |
115 | |
116 | static unsigned getSUBrrOpcode(bool IsLP64) { |
117 | return IsLP64 ? X86::SUB64rr : X86::SUB32rr; |
118 | } |
119 | |
120 | static unsigned getADDrrOpcode(bool IsLP64) { |
121 | return IsLP64 ? X86::ADD64rr : X86::ADD32rr; |
122 | } |
123 | |
124 | static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) { |
125 | return IsLP64 ? X86::AND64ri32 : X86::AND32ri; |
126 | } |
127 | |
128 | static unsigned getLEArOpcode(bool IsLP64) { |
129 | return IsLP64 ? X86::LEA64r : X86::LEA32r; |
130 | } |
131 | |
132 | static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) { |
133 | if (Use64BitReg) { |
134 | if (isUInt<32>(x: Imm)) |
135 | return X86::MOV32ri64; |
136 | if (isInt<32>(x: Imm)) |
137 | return X86::MOV64ri32; |
138 | return X86::MOV64ri; |
139 | } |
140 | return X86::MOV32ri; |
141 | } |
142 | |
143 | // Push-Pop Acceleration (PPX) hint is used to indicate that the POP reads the |
144 | // value written by the PUSH from the stack. The processor tracks these marked |
145 | // instructions internally and fast-forwards register data between matching PUSH |
146 | // and POP instructions, without going through memory or through the training |
147 | // loop of the Fast Store Forwarding Predictor (FSFP). Instead, a more efficient |
148 | // memory-renaming optimization can be used. |
149 | // |
150 | // The PPX hint is purely a performance hint. Instructions with this hint have |
151 | // the same functional semantics as those without. PPX hints set by the |
152 | // compiler that violate the balancing rule may turn off the PPX optimization, |
153 | // but they will not affect program semantics. |
154 | // |
155 | // Hence, PPX is used for balanced spill/reloads (Exceptions and setjmp/longjmp |
156 | // are not considered). |
157 | // |
158 | // PUSH2 and POP2 are instructions for (respectively) pushing/popping 2 |
159 | // GPRs at a time to/from the stack. |
160 | static unsigned getPUSHOpcode(const X86Subtarget &ST) { |
161 | return ST.is64Bit() ? (ST.hasPPX() ? X86::PUSHP64r : X86::PUSH64r) |
162 | : X86::PUSH32r; |
163 | } |
164 | static unsigned getPOPOpcode(const X86Subtarget &ST) { |
165 | return ST.is64Bit() ? (ST.hasPPX() ? X86::POPP64r : X86::POP64r) |
166 | : X86::POP32r; |
167 | } |
168 | static unsigned getPUSH2Opcode(const X86Subtarget &ST) { |
169 | return ST.hasPPX() ? X86::PUSH2P : X86::PUSH2; |
170 | } |
171 | static unsigned getPOP2Opcode(const X86Subtarget &ST) { |
172 | return ST.hasPPX() ? X86::POP2P : X86::POP2; |
173 | } |
174 | |
175 | static bool isEAXLiveIn(MachineBasicBlock &MBB) { |
176 | for (MachineBasicBlock::RegisterMaskPair RegMask : MBB.liveins()) { |
177 | unsigned Reg = RegMask.PhysReg; |
178 | |
179 | if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX || |
180 | Reg == X86::AH || Reg == X86::AL) |
181 | return true; |
182 | } |
183 | |
184 | return false; |
185 | } |
186 | |
187 | /// Check if the flags need to be preserved before the terminators. |
188 | /// This would be the case, if the eflags is live-in of the region |
189 | /// composed by the terminators or live-out of that region, without |
190 | /// being defined by a terminator. |
191 | static bool |
192 | flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB) { |
193 | for (const MachineInstr &MI : MBB.terminators()) { |
194 | bool BreakNext = false; |
195 | for (const MachineOperand &MO : MI.operands()) { |
196 | if (!MO.isReg()) |
197 | continue; |
198 | Register Reg = MO.getReg(); |
199 | if (Reg != X86::EFLAGS) |
200 | continue; |
201 | |
202 | // This terminator needs an eflags that is not defined |
203 | // by a previous another terminator: |
204 | // EFLAGS is live-in of the region composed by the terminators. |
205 | if (!MO.isDef()) |
206 | return true; |
207 | // This terminator defines the eflags, i.e., we don't need to preserve it. |
208 | // However, we still need to check this specific terminator does not |
209 | // read a live-in value. |
210 | BreakNext = true; |
211 | } |
212 | // We found a definition of the eflags, no need to preserve them. |
213 | if (BreakNext) |
214 | return false; |
215 | } |
216 | |
217 | // None of the terminators use or define the eflags. |
218 | // Check if they are live-out, that would imply we need to preserve them. |
219 | for (const MachineBasicBlock *Succ : MBB.successors()) |
220 | if (Succ->isLiveIn(X86::Reg: EFLAGS)) |
221 | return true; |
222 | |
223 | return false; |
224 | } |
225 | |
226 | /// emitSPUpdate - Emit a series of instructions to increment / decrement the |
227 | /// stack pointer by a constant value. |
228 | void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, |
229 | MachineBasicBlock::iterator &MBBI, |
230 | const DebugLoc &DL, int64_t NumBytes, |
231 | bool InEpilogue) const { |
232 | bool isSub = NumBytes < 0; |
233 | uint64_t Offset = isSub ? -NumBytes : NumBytes; |
234 | MachineInstr::MIFlag Flag = |
235 | isSub ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy; |
236 | |
237 | uint64_t Chunk = (1LL << 31) - 1; |
238 | |
239 | MachineFunction &MF = *MBB.getParent(); |
240 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
241 | const X86TargetLowering &TLI = *STI.getTargetLowering(); |
242 | const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF); |
243 | |
244 | // It's ok to not take into account large chunks when probing, as the |
245 | // allocation is split in smaller chunks anyway. |
246 | if (EmitInlineStackProbe && !InEpilogue) { |
247 | |
248 | // This pseudo-instruction is going to be expanded, potentially using a |
249 | // loop, by inlineStackProbe(). |
250 | BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset); |
251 | return; |
252 | } else if (Offset > Chunk) { |
253 | // Rather than emit a long series of instructions for large offsets, |
254 | // load the offset into a register and do one sub/add |
255 | unsigned Reg = 0; |
256 | unsigned Rax = (unsigned)(Is64Bit ? X86::RAX : X86::EAX); |
257 | |
258 | if (isSub && !isEAXLiveIn(MBB)) |
259 | Reg = Rax; |
260 | else |
261 | Reg = TRI->findDeadCallerSavedReg(MBB, MBBI); |
262 | |
263 | unsigned AddSubRROpc = |
264 | isSub ? getSUBrrOpcode(IsLP64: Is64Bit) : getADDrrOpcode(IsLP64: Is64Bit); |
265 | if (Reg) { |
266 | BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Use64BitReg: Is64Bit, Imm: Offset)), Reg) |
267 | .addImm(Offset) |
268 | .setMIFlag(Flag); |
269 | MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr) |
270 | .addReg(StackPtr) |
271 | .addReg(Reg); |
272 | MI->getOperand(i: 3).setIsDead(); // The EFLAGS implicit def is dead. |
273 | return; |
274 | } else if (Offset > 8 * Chunk) { |
275 | // If we would need more than 8 add or sub instructions (a >16GB stack |
276 | // frame), it's worth spilling RAX to materialize this immediate. |
277 | // pushq %rax |
278 | // movabsq +-$Offset+-SlotSize, %rax |
279 | // addq %rsp, %rax |
280 | // xchg %rax, (%rsp) |
281 | // movq (%rsp), %rsp |
282 | assert(Is64Bit && "can't have 32-bit 16GB stack frame" ); |
283 | BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) |
284 | .addReg(Rax, RegState::Kill) |
285 | .setMIFlag(Flag); |
286 | // Subtract is not commutative, so negate the offset and always use add. |
287 | // Subtract 8 less and add 8 more to account for the PUSH we just did. |
288 | if (isSub) |
289 | Offset = -(Offset - SlotSize); |
290 | else |
291 | Offset = Offset + SlotSize; |
292 | BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Use64BitReg: Is64Bit, Imm: Offset)), Rax) |
293 | .addImm(Offset) |
294 | .setMIFlag(Flag); |
295 | MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax) |
296 | .addReg(Rax) |
297 | .addReg(StackPtr); |
298 | MI->getOperand(i: 3).setIsDead(); // The EFLAGS implicit def is dead. |
299 | // Exchange the new SP in RAX with the top of the stack. |
300 | addRegOffset( |
301 | BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax), |
302 | StackPtr, false, 0); |
303 | // Load new SP from the top of the stack into RSP. |
304 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr), |
305 | StackPtr, false, 0); |
306 | return; |
307 | } |
308 | } |
309 | |
310 | while (Offset) { |
311 | uint64_t ThisVal = std::min(a: Offset, b: Chunk); |
312 | if (ThisVal == SlotSize) { |
313 | // Use push / pop for slot sized adjustments as a size optimization. We |
314 | // need to find a dead register when using pop. |
315 | unsigned Reg = isSub ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX) |
316 | : TRI->findDeadCallerSavedReg(MBB, MBBI); |
317 | if (Reg) { |
318 | unsigned Opc = isSub ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r) |
319 | : (Is64Bit ? X86::POP64r : X86::POP32r); |
320 | BuildMI(MBB, MBBI, DL, TII.get(Opc)) |
321 | .addReg(Reg, getDefRegState(B: !isSub) | getUndefRegState(B: isSub)) |
322 | .setMIFlag(Flag); |
323 | Offset -= ThisVal; |
324 | continue; |
325 | } |
326 | } |
327 | |
328 | BuildStackAdjustment(MBB, MBBI, DL, Offset: isSub ? -ThisVal : ThisVal, InEpilogue) |
329 | .setMIFlag(Flag); |
330 | |
331 | Offset -= ThisVal; |
332 | } |
333 | } |
334 | |
335 | MachineInstrBuilder X86FrameLowering::BuildStackAdjustment( |
336 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
337 | const DebugLoc &DL, int64_t Offset, bool InEpilogue) const { |
338 | assert(Offset != 0 && "zero offset stack adjustment requested" ); |
339 | |
340 | // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue |
341 | // is tricky. |
342 | bool UseLEA; |
343 | if (!InEpilogue) { |
344 | // Check if inserting the prologue at the beginning |
345 | // of MBB would require to use LEA operations. |
346 | // We need to use LEA operations if EFLAGS is live in, because |
347 | // it means an instruction will read it before it gets defined. |
348 | UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS); |
349 | } else { |
350 | // If we can use LEA for SP but we shouldn't, check that none |
351 | // of the terminators uses the eflags. Otherwise we will insert |
352 | // a ADD that will redefine the eflags and break the condition. |
353 | // Alternatively, we could move the ADD, but this may not be possible |
354 | // and is an optimization anyway. |
355 | UseLEA = canUseLEAForSPInEpilogue(MF: *MBB.getParent()); |
356 | if (UseLEA && !STI.useLeaForSP()) |
357 | UseLEA = flagsNeedToBePreservedBeforeTheTerminators(MBB); |
358 | // If that assert breaks, that means we do not do the right thing |
359 | // in canUseAsEpilogue. |
360 | assert((UseLEA || !flagsNeedToBePreservedBeforeTheTerminators(MBB)) && |
361 | "We shouldn't have allowed this insertion point" ); |
362 | } |
363 | |
364 | MachineInstrBuilder MI; |
365 | if (UseLEA) { |
366 | MI = addRegOffset(BuildMI(MBB, MBBI, DL, |
367 | TII.get(getLEArOpcode(IsLP64: Uses64BitFramePtr)), |
368 | StackPtr), |
369 | StackPtr, false, Offset); |
370 | } else { |
371 | bool IsSub = Offset < 0; |
372 | uint64_t AbsOffset = IsSub ? -Offset : Offset; |
373 | const unsigned Opc = IsSub ? getSUBriOpcode(IsLP64: Uses64BitFramePtr) |
374 | : getADDriOpcode(IsLP64: Uses64BitFramePtr); |
375 | MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) |
376 | .addReg(StackPtr) |
377 | .addImm(AbsOffset); |
378 | MI->getOperand(i: 3).setIsDead(); // The EFLAGS implicit def is dead. |
379 | } |
380 | return MI; |
381 | } |
382 | |
383 | int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB, |
384 | MachineBasicBlock::iterator &MBBI, |
385 | bool doMergeWithPrevious) const { |
386 | if ((doMergeWithPrevious && MBBI == MBB.begin()) || |
387 | (!doMergeWithPrevious && MBBI == MBB.end())) |
388 | return 0; |
389 | |
390 | MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(x: MBBI) : MBBI; |
391 | |
392 | PI = skipDebugInstructionsBackward(It: PI, Begin: MBB.begin()); |
393 | // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI |
394 | // instruction, and that there are no DBG_VALUE or other instructions between |
395 | // ADD/SUB/LEA and its corresponding CFI instruction. |
396 | /* TODO: Add support for the case where there are multiple CFI instructions |
397 | below the ADD/SUB/LEA, e.g.: |
398 | ... |
399 | add |
400 | cfi_def_cfa_offset |
401 | cfi_offset |
402 | ... |
403 | */ |
404 | if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction()) |
405 | PI = std::prev(x: PI); |
406 | |
407 | unsigned Opc = PI->getOpcode(); |
408 | int Offset = 0; |
409 | |
410 | if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) && |
411 | PI->getOperand(0).getReg() == StackPtr) { |
412 | assert(PI->getOperand(1).getReg() == StackPtr); |
413 | Offset = PI->getOperand(i: 2).getImm(); |
414 | } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) && |
415 | PI->getOperand(0).getReg() == StackPtr && |
416 | PI->getOperand(1).getReg() == StackPtr && |
417 | PI->getOperand(2).getImm() == 1 && |
418 | PI->getOperand(3).getReg() == X86::NoRegister && |
419 | PI->getOperand(5).getReg() == X86::NoRegister) { |
420 | // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg. |
421 | Offset = PI->getOperand(i: 4).getImm(); |
422 | } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) && |
423 | PI->getOperand(0).getReg() == StackPtr) { |
424 | assert(PI->getOperand(1).getReg() == StackPtr); |
425 | Offset = -PI->getOperand(i: 2).getImm(); |
426 | } else |
427 | return 0; |
428 | |
429 | PI = MBB.erase(I: PI); |
430 | if (PI != MBB.end() && PI->isCFIInstruction()) { |
431 | auto CIs = MBB.getParent()->getFrameInstructions(); |
432 | MCCFIInstruction CI = CIs[PI->getOperand(i: 0).getCFIIndex()]; |
433 | if (CI.getOperation() == MCCFIInstruction::OpDefCfaOffset || |
434 | CI.getOperation() == MCCFIInstruction::OpAdjustCfaOffset) |
435 | PI = MBB.erase(I: PI); |
436 | } |
437 | if (!doMergeWithPrevious) |
438 | MBBI = skipDebugInstructionsForward(It: PI, End: MBB.end()); |
439 | |
440 | return Offset; |
441 | } |
442 | |
443 | void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB, |
444 | MachineBasicBlock::iterator MBBI, |
445 | const DebugLoc &DL, |
446 | const MCCFIInstruction &CFIInst, |
447 | MachineInstr::MIFlag Flag) const { |
448 | MachineFunction &MF = *MBB.getParent(); |
449 | unsigned CFIIndex = MF.addFrameInst(Inst: CFIInst); |
450 | |
451 | if (CFIInst.getOperation() == MCCFIInstruction::OpAdjustCfaOffset) |
452 | MF.getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true); |
453 | |
454 | BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
455 | .addCFIIndex(CFIIndex) |
456 | .setMIFlag(Flag); |
457 | } |
458 | |
459 | /// Emits Dwarf Info specifying offsets of callee saved registers and |
460 | /// frame pointer. This is called only when basic block sections are enabled. |
461 | void X86FrameLowering::emitCalleeSavedFrameMovesFullCFA( |
462 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { |
463 | MachineFunction &MF = *MBB.getParent(); |
464 | if (!hasFP(MF)) { |
465 | emitCalleeSavedFrameMoves(MBB, MBBI, DL: DebugLoc{}, IsPrologue: true); |
466 | return; |
467 | } |
468 | const MachineModuleInfo &MMI = MF.getMMI(); |
469 | const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); |
470 | const Register FramePtr = TRI->getFrameRegister(MF); |
471 | const Register MachineFramePtr = |
472 | STI.isTarget64BitILP32() ? Register(getX86SubSuperRegister(Reg: FramePtr, Size: 64)) |
473 | : FramePtr; |
474 | unsigned DwarfReg = MRI->getDwarfRegNum(RegNum: MachineFramePtr, isEH: true); |
475 | // Offset = space for return address + size of the frame pointer itself. |
476 | unsigned Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4); |
477 | BuildCFI(MBB, MBBI, DL: DebugLoc{}, |
478 | CFIInst: MCCFIInstruction::createOffset(L: nullptr, Register: DwarfReg, Offset: -Offset)); |
479 | emitCalleeSavedFrameMoves(MBB, MBBI, DL: DebugLoc{}, IsPrologue: true); |
480 | } |
481 | |
482 | void X86FrameLowering::emitCalleeSavedFrameMoves( |
483 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
484 | const DebugLoc &DL, bool IsPrologue) const { |
485 | MachineFunction &MF = *MBB.getParent(); |
486 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
487 | MachineModuleInfo &MMI = MF.getMMI(); |
488 | const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); |
489 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
490 | |
491 | // Add callee saved registers to move list. |
492 | const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); |
493 | |
494 | // Calculate offsets. |
495 | for (const CalleeSavedInfo &I : CSI) { |
496 | int64_t Offset = MFI.getObjectOffset(ObjectIdx: I.getFrameIdx()); |
497 | Register Reg = I.getReg(); |
498 | unsigned DwarfReg = MRI->getDwarfRegNum(RegNum: Reg, isEH: true); |
499 | |
500 | if (IsPrologue) { |
501 | if (X86FI->getStackPtrSaveMI()) { |
502 | // +2*SlotSize because there is return address and ebp at the bottom |
503 | // of the stack. |
504 | // | retaddr | |
505 | // | ebp | |
506 | // | |<--ebp |
507 | Offset += 2 * SlotSize; |
508 | SmallString<64> CfaExpr; |
509 | CfaExpr.push_back(Elt: dwarf::DW_CFA_expression); |
510 | uint8_t buffer[16]; |
511 | CfaExpr.append(in_start: buffer, in_end: buffer + encodeULEB128(Value: DwarfReg, p: buffer)); |
512 | CfaExpr.push_back(Elt: 2); |
513 | Register FramePtr = TRI->getFrameRegister(MF); |
514 | const Register MachineFramePtr = |
515 | STI.isTarget64BitILP32() |
516 | ? Register(getX86SubSuperRegister(Reg: FramePtr, Size: 64)) |
517 | : FramePtr; |
518 | unsigned DwarfFramePtr = MRI->getDwarfRegNum(RegNum: MachineFramePtr, isEH: true); |
519 | CfaExpr.push_back(Elt: (uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr)); |
520 | CfaExpr.append(in_start: buffer, in_end: buffer + encodeSLEB128(Value: Offset, p: buffer)); |
521 | BuildCFI(MBB, MBBI, DL, |
522 | CFIInst: MCCFIInstruction::createEscape(L: nullptr, Vals: CfaExpr.str()), |
523 | Flag: MachineInstr::FrameSetup); |
524 | } else { |
525 | BuildCFI(MBB, MBBI, DL, |
526 | CFIInst: MCCFIInstruction::createOffset(L: nullptr, Register: DwarfReg, Offset)); |
527 | } |
528 | } else { |
529 | BuildCFI(MBB, MBBI, DL, |
530 | CFIInst: MCCFIInstruction::createRestore(L: nullptr, Register: DwarfReg)); |
531 | } |
532 | } |
533 | if (auto *MI = X86FI->getStackPtrSaveMI()) { |
534 | int FI = MI->getOperand(i: 1).getIndex(); |
535 | int64_t Offset = MFI.getObjectOffset(ObjectIdx: FI) + 2 * SlotSize; |
536 | SmallString<64> CfaExpr; |
537 | Register FramePtr = TRI->getFrameRegister(MF); |
538 | const Register MachineFramePtr = |
539 | STI.isTarget64BitILP32() |
540 | ? Register(getX86SubSuperRegister(Reg: FramePtr, Size: 64)) |
541 | : FramePtr; |
542 | unsigned DwarfFramePtr = MRI->getDwarfRegNum(RegNum: MachineFramePtr, isEH: true); |
543 | CfaExpr.push_back(Elt: (uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr)); |
544 | uint8_t buffer[16]; |
545 | CfaExpr.append(in_start: buffer, in_end: buffer + encodeSLEB128(Value: Offset, p: buffer)); |
546 | CfaExpr.push_back(Elt: dwarf::DW_OP_deref); |
547 | |
548 | SmallString<64> DefCfaExpr; |
549 | DefCfaExpr.push_back(Elt: dwarf::DW_CFA_def_cfa_expression); |
550 | DefCfaExpr.append(in_start: buffer, in_end: buffer + encodeSLEB128(Value: CfaExpr.size(), p: buffer)); |
551 | DefCfaExpr.append(RHS: CfaExpr.str()); |
552 | // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref |
553 | BuildCFI(MBB, MBBI, DL, |
554 | CFIInst: MCCFIInstruction::createEscape(L: nullptr, Vals: DefCfaExpr.str()), |
555 | Flag: MachineInstr::FrameSetup); |
556 | } |
557 | } |
558 | |
559 | void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero, |
560 | MachineBasicBlock &MBB) const { |
561 | const MachineFunction &MF = *MBB.getParent(); |
562 | |
563 | // Insertion point. |
564 | MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); |
565 | |
566 | // Fake a debug loc. |
567 | DebugLoc DL; |
568 | if (MBBI != MBB.end()) |
569 | DL = MBBI->getDebugLoc(); |
570 | |
571 | // Zero out FP stack if referenced. Do this outside of the loop below so that |
572 | // it's done only once. |
573 | const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); |
574 | for (MCRegister Reg : RegsToZero.set_bits()) { |
575 | if (!X86::RFP80RegClass.contains(Reg)) |
576 | continue; |
577 | |
578 | unsigned NumFPRegs = ST.is64Bit() ? 8 : 7; |
579 | for (unsigned i = 0; i != NumFPRegs; ++i) |
580 | BuildMI(MBB, MBBI, DL, TII.get(X86::LD_F0)); |
581 | |
582 | for (unsigned i = 0; i != NumFPRegs; ++i) |
583 | BuildMI(MBB, MBBI, DL, TII.get(X86::ST_FPrr)).addReg(X86::ST0); |
584 | break; |
585 | } |
586 | |
587 | // For GPRs, we only care to clear out the 32-bit register. |
588 | BitVector GPRsToZero(TRI->getNumRegs()); |
589 | for (MCRegister Reg : RegsToZero.set_bits()) |
590 | if (TRI->isGeneralPurposeRegister(MF, Reg)) { |
591 | GPRsToZero.set(getX86SubSuperRegister(Reg, Size: 32)); |
592 | RegsToZero.reset(Idx: Reg); |
593 | } |
594 | |
595 | // Zero out the GPRs first. |
596 | for (MCRegister Reg : GPRsToZero.set_bits()) |
597 | TII.buildClearRegister(Reg, MBB, MBBI, DL); |
598 | |
599 | // Zero out the remaining registers. |
600 | for (MCRegister Reg : RegsToZero.set_bits()) |
601 | TII.buildClearRegister(Reg, MBB, Iter: MBBI, DL); |
602 | } |
603 | |
604 | void X86FrameLowering::emitStackProbe( |
605 | MachineFunction &MF, MachineBasicBlock &MBB, |
606 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, |
607 | std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const { |
608 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
609 | if (STI.isTargetWindowsCoreCLR()) { |
610 | if (InProlog) { |
611 | BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)) |
612 | .addImm(0 /* no explicit stack size */); |
613 | } else { |
614 | emitStackProbeInline(MF, MBB, MBBI, DL, InProlog: false); |
615 | } |
616 | } else { |
617 | emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum); |
618 | } |
619 | } |
620 | |
621 | bool X86FrameLowering::stackProbeFunctionModifiesSP() const { |
622 | return STI.isOSWindows() && !STI.isTargetWin64(); |
623 | } |
624 | |
625 | void X86FrameLowering::inlineStackProbe(MachineFunction &MF, |
626 | MachineBasicBlock &PrologMBB) const { |
627 | auto Where = llvm::find_if(Range&: PrologMBB, P: [](MachineInstr &MI) { |
628 | return MI.getOpcode() == X86::STACKALLOC_W_PROBING; |
629 | }); |
630 | if (Where != PrologMBB.end()) { |
631 | DebugLoc DL = PrologMBB.findDebugLoc(MBBI: Where); |
632 | emitStackProbeInline(MF, MBB&: PrologMBB, MBBI: Where, DL, InProlog: true); |
633 | Where->eraseFromParent(); |
634 | } |
635 | } |
636 | |
637 | void X86FrameLowering::emitStackProbeInline(MachineFunction &MF, |
638 | MachineBasicBlock &MBB, |
639 | MachineBasicBlock::iterator MBBI, |
640 | const DebugLoc &DL, |
641 | bool InProlog) const { |
642 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
643 | if (STI.isTargetWindowsCoreCLR() && STI.is64Bit()) |
644 | emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog); |
645 | else |
646 | emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog); |
647 | } |
648 | |
649 | void X86FrameLowering::emitStackProbeInlineGeneric( |
650 | MachineFunction &MF, MachineBasicBlock &MBB, |
651 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const { |
652 | MachineInstr &AllocWithProbe = *MBBI; |
653 | uint64_t Offset = AllocWithProbe.getOperand(i: 0).getImm(); |
654 | |
655 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
656 | const X86TargetLowering &TLI = *STI.getTargetLowering(); |
657 | assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) && |
658 | "different expansion expected for CoreCLR 64 bit" ); |
659 | |
660 | const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); |
661 | uint64_t ProbeChunk = StackProbeSize * 8; |
662 | |
663 | uint64_t MaxAlign = |
664 | TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0; |
665 | |
666 | // Synthesize a loop or unroll it, depending on the number of iterations. |
667 | // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left |
668 | // between the unaligned rsp and current rsp. |
669 | if (Offset > ProbeChunk) { |
670 | emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset, |
671 | Align: MaxAlign % StackProbeSize); |
672 | } else { |
673 | emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset, |
674 | Align: MaxAlign % StackProbeSize); |
675 | } |
676 | } |
677 | |
678 | void X86FrameLowering::emitStackProbeInlineGenericBlock( |
679 | MachineFunction &MF, MachineBasicBlock &MBB, |
680 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset, |
681 | uint64_t AlignOffset) const { |
682 | |
683 | const bool NeedsDwarfCFI = needsDwarfCFI(MF); |
684 | const bool HasFP = hasFP(MF); |
685 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
686 | const X86TargetLowering &TLI = *STI.getTargetLowering(); |
687 | const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; |
688 | const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); |
689 | |
690 | uint64_t CurrentOffset = 0; |
691 | |
692 | assert(AlignOffset < StackProbeSize); |
693 | |
694 | // If the offset is so small it fits within a page, there's nothing to do. |
695 | if (StackProbeSize < Offset + AlignOffset) { |
696 | |
697 | uint64_t StackAdjustment = StackProbeSize - AlignOffset; |
698 | BuildStackAdjustment(MBB, MBBI, DL, Offset: -StackAdjustment, /*InEpilogue=*/false) |
699 | .setMIFlag(MachineInstr::FrameSetup); |
700 | if (!HasFP && NeedsDwarfCFI) { |
701 | BuildCFI( |
702 | MBB, MBBI, DL, |
703 | CFIInst: MCCFIInstruction::createAdjustCfaOffset(L: nullptr, Adjustment: StackAdjustment)); |
704 | } |
705 | |
706 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) |
707 | .setMIFlag(MachineInstr::FrameSetup), |
708 | StackPtr, false, 0) |
709 | .addImm(0) |
710 | .setMIFlag(MachineInstr::FrameSetup); |
711 | NumFrameExtraProbe++; |
712 | CurrentOffset = StackProbeSize - AlignOffset; |
713 | } |
714 | |
715 | // For the next N - 1 pages, just probe. I tried to take advantage of |
716 | // natural probes but it implies much more logic and there was very few |
717 | // interesting natural probes to interleave. |
718 | while (CurrentOffset + StackProbeSize < Offset) { |
719 | BuildStackAdjustment(MBB, MBBI, DL, Offset: -StackProbeSize, /*InEpilogue=*/false) |
720 | .setMIFlag(MachineInstr::FrameSetup); |
721 | |
722 | if (!HasFP && NeedsDwarfCFI) { |
723 | BuildCFI( |
724 | MBB, MBBI, DL, |
725 | CFIInst: MCCFIInstruction::createAdjustCfaOffset(L: nullptr, Adjustment: StackProbeSize)); |
726 | } |
727 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) |
728 | .setMIFlag(MachineInstr::FrameSetup), |
729 | StackPtr, false, 0) |
730 | .addImm(0) |
731 | .setMIFlag(MachineInstr::FrameSetup); |
732 | NumFrameExtraProbe++; |
733 | CurrentOffset += StackProbeSize; |
734 | } |
735 | |
736 | // No need to probe the tail, it is smaller than a Page. |
737 | uint64_t ChunkSize = Offset - CurrentOffset; |
738 | if (ChunkSize == SlotSize) { |
739 | // Use push for slot sized adjustments as a size optimization, |
740 | // like emitSPUpdate does when not probing. |
741 | unsigned Reg = Is64Bit ? X86::RAX : X86::EAX; |
742 | unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r; |
743 | BuildMI(MBB, MBBI, DL, TII.get(Opc)) |
744 | .addReg(Reg, RegState::Undef) |
745 | .setMIFlag(MachineInstr::FrameSetup); |
746 | } else { |
747 | BuildStackAdjustment(MBB, MBBI, DL, Offset: -ChunkSize, /*InEpilogue=*/false) |
748 | .setMIFlag(MachineInstr::FrameSetup); |
749 | } |
750 | // No need to adjust Dwarf CFA offset here, the last position of the stack has |
751 | // been defined |
752 | } |
753 | |
754 | void X86FrameLowering::emitStackProbeInlineGenericLoop( |
755 | MachineFunction &MF, MachineBasicBlock &MBB, |
756 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset, |
757 | uint64_t AlignOffset) const { |
758 | assert(Offset && "null offset" ); |
759 | |
760 | assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) != |
761 | MachineBasicBlock::LQR_Live && |
762 | "Inline stack probe loop will clobber live EFLAGS." ); |
763 | |
764 | const bool NeedsDwarfCFI = needsDwarfCFI(MF); |
765 | const bool HasFP = hasFP(MF); |
766 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
767 | const X86TargetLowering &TLI = *STI.getTargetLowering(); |
768 | const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; |
769 | const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); |
770 | |
771 | if (AlignOffset) { |
772 | if (AlignOffset < StackProbeSize) { |
773 | // Perform a first smaller allocation followed by a probe. |
774 | BuildStackAdjustment(MBB, MBBI, DL, Offset: -AlignOffset, /*InEpilogue=*/false) |
775 | .setMIFlag(MachineInstr::FrameSetup); |
776 | |
777 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) |
778 | .setMIFlag(MachineInstr::FrameSetup), |
779 | StackPtr, false, 0) |
780 | .addImm(0) |
781 | .setMIFlag(MachineInstr::FrameSetup); |
782 | NumFrameExtraProbe++; |
783 | Offset -= AlignOffset; |
784 | } |
785 | } |
786 | |
787 | // Synthesize a loop |
788 | NumFrameLoopProbe++; |
789 | const BasicBlock *LLVM_BB = MBB.getBasicBlock(); |
790 | |
791 | MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(BB: LLVM_BB); |
792 | MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(BB: LLVM_BB); |
793 | |
794 | MachineFunction::iterator MBBIter = ++MBB.getIterator(); |
795 | MF.insert(MBBI: MBBIter, MBB: testMBB); |
796 | MF.insert(MBBI: MBBIter, MBB: tailMBB); |
797 | |
798 | Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 |
799 | : Is64Bit ? X86::R11D |
800 | : X86::EAX; |
801 | |
802 | BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed) |
803 | .addReg(StackPtr) |
804 | .setMIFlag(MachineInstr::FrameSetup); |
805 | |
806 | // save loop bound |
807 | { |
808 | const unsigned BoundOffset = alignDown(Value: Offset, Align: StackProbeSize); |
809 | const unsigned SUBOpc = getSUBriOpcode(IsLP64: Uses64BitFramePtr); |
810 | BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed) |
811 | .addReg(FinalStackProbed) |
812 | .addImm(BoundOffset) |
813 | .setMIFlag(MachineInstr::FrameSetup); |
814 | |
815 | // while in the loop, use loop-invariant reg for CFI, |
816 | // instead of the stack pointer, which changes during the loop |
817 | if (!HasFP && NeedsDwarfCFI) { |
818 | // x32 uses the same DWARF register numbers as x86-64, |
819 | // so there isn't a register number for r11d, we must use r11 instead |
820 | const Register DwarfFinalStackProbed = |
821 | STI.isTarget64BitILP32() |
822 | ? Register(getX86SubSuperRegister(Reg: FinalStackProbed, Size: 64)) |
823 | : FinalStackProbed; |
824 | |
825 | BuildCFI(MBB, MBBI, DL, |
826 | CFIInst: MCCFIInstruction::createDefCfaRegister( |
827 | L: nullptr, Register: TRI->getDwarfRegNum(DwarfFinalStackProbed, true))); |
828 | BuildCFI(MBB, MBBI, DL, |
829 | CFIInst: MCCFIInstruction::createAdjustCfaOffset(L: nullptr, Adjustment: BoundOffset)); |
830 | } |
831 | } |
832 | |
833 | // allocate a page |
834 | BuildStackAdjustment(MBB&: *testMBB, MBBI: testMBB->end(), DL, Offset: -StackProbeSize, |
835 | /*InEpilogue=*/false) |
836 | .setMIFlag(MachineInstr::FrameSetup); |
837 | |
838 | // touch the page |
839 | addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc)) |
840 | .setMIFlag(MachineInstr::FrameSetup), |
841 | StackPtr, false, 0) |
842 | .addImm(0) |
843 | .setMIFlag(MachineInstr::FrameSetup); |
844 | |
845 | // cmp with stack pointer bound |
846 | BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) |
847 | .addReg(StackPtr) |
848 | .addReg(FinalStackProbed) |
849 | .setMIFlag(MachineInstr::FrameSetup); |
850 | |
851 | // jump |
852 | BuildMI(testMBB, DL, TII.get(X86::JCC_1)) |
853 | .addMBB(testMBB) |
854 | .addImm(X86::COND_NE) |
855 | .setMIFlag(MachineInstr::FrameSetup); |
856 | testMBB->addSuccessor(Succ: testMBB); |
857 | testMBB->addSuccessor(Succ: tailMBB); |
858 | |
859 | // BB management |
860 | tailMBB->splice(Where: tailMBB->end(), Other: &MBB, From: MBBI, To: MBB.end()); |
861 | tailMBB->transferSuccessorsAndUpdatePHIs(FromMBB: &MBB); |
862 | MBB.addSuccessor(Succ: testMBB); |
863 | |
864 | // handle tail |
865 | const uint64_t TailOffset = Offset % StackProbeSize; |
866 | MachineBasicBlock::iterator TailMBBIter = tailMBB->begin(); |
867 | if (TailOffset) { |
868 | BuildStackAdjustment(MBB&: *tailMBB, MBBI: TailMBBIter, DL, Offset: -TailOffset, |
869 | /*InEpilogue=*/false) |
870 | .setMIFlag(MachineInstr::FrameSetup); |
871 | } |
872 | |
873 | // after the loop, switch back to stack pointer for CFI |
874 | if (!HasFP && NeedsDwarfCFI) { |
875 | // x32 uses the same DWARF register numbers as x86-64, |
876 | // so there isn't a register number for esp, we must use rsp instead |
877 | const Register DwarfStackPtr = |
878 | STI.isTarget64BitILP32() |
879 | ? Register(getX86SubSuperRegister(Reg: StackPtr, Size: 64)) |
880 | : Register(StackPtr); |
881 | |
882 | BuildCFI(MBB&: *tailMBB, MBBI: TailMBBIter, DL, |
883 | CFIInst: MCCFIInstruction::createDefCfaRegister( |
884 | L: nullptr, Register: TRI->getDwarfRegNum(DwarfStackPtr, true))); |
885 | } |
886 | |
887 | // Update Live In information |
888 | fullyRecomputeLiveIns(MBBs: {tailMBB, testMBB}); |
889 | } |
890 | |
891 | void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64( |
892 | MachineFunction &MF, MachineBasicBlock &MBB, |
893 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const { |
894 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
895 | assert(STI.is64Bit() && "different expansion needed for 32 bit" ); |
896 | assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR" ); |
897 | const TargetInstrInfo &TII = *STI.getInstrInfo(); |
898 | const BasicBlock *LLVM_BB = MBB.getBasicBlock(); |
899 | |
900 | assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) != |
901 | MachineBasicBlock::LQR_Live && |
902 | "Inline stack probe loop will clobber live EFLAGS." ); |
903 | |
904 | // RAX contains the number of bytes of desired stack adjustment. |
905 | // The handling here assumes this value has already been updated so as to |
906 | // maintain stack alignment. |
907 | // |
908 | // We need to exit with RSP modified by this amount and execute suitable |
909 | // page touches to notify the OS that we're growing the stack responsibly. |
910 | // All stack probing must be done without modifying RSP. |
911 | // |
912 | // MBB: |
913 | // SizeReg = RAX; |
914 | // ZeroReg = 0 |
915 | // CopyReg = RSP |
916 | // Flags, TestReg = CopyReg - SizeReg |
917 | // FinalReg = !Flags.Ovf ? TestReg : ZeroReg |
918 | // LimitReg = gs magic thread env access |
919 | // if FinalReg >= LimitReg goto ContinueMBB |
920 | // RoundBB: |
921 | // RoundReg = page address of FinalReg |
922 | // LoopMBB: |
923 | // LoopReg = PHI(LimitReg,ProbeReg) |
924 | // ProbeReg = LoopReg - PageSize |
925 | // [ProbeReg] = 0 |
926 | // if (ProbeReg > RoundReg) goto LoopMBB |
927 | // ContinueMBB: |
928 | // RSP = RSP - RAX |
929 | // [rest of original MBB] |
930 | |
931 | // Set up the new basic blocks |
932 | MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(BB: LLVM_BB); |
933 | MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(BB: LLVM_BB); |
934 | MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(BB: LLVM_BB); |
935 | |
936 | MachineFunction::iterator MBBIter = std::next(x: MBB.getIterator()); |
937 | MF.insert(MBBI: MBBIter, MBB: RoundMBB); |
938 | MF.insert(MBBI: MBBIter, MBB: LoopMBB); |
939 | MF.insert(MBBI: MBBIter, MBB: ContinueMBB); |
940 | |
941 | // Split MBB and move the tail portion down to ContinueMBB. |
942 | MachineBasicBlock::iterator BeforeMBBI = std::prev(x: MBBI); |
943 | ContinueMBB->splice(Where: ContinueMBB->begin(), Other: &MBB, From: MBBI, To: MBB.end()); |
944 | ContinueMBB->transferSuccessorsAndUpdatePHIs(FromMBB: &MBB); |
945 | |
946 | // Some useful constants |
947 | const int64_t ThreadEnvironmentStackLimit = 0x10; |
948 | const int64_t PageSize = 0x1000; |
949 | const int64_t PageMask = ~(PageSize - 1); |
950 | |
951 | // Registers we need. For the normal case we use virtual |
952 | // registers. For the prolog expansion we use RAX, RCX and RDX. |
953 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
954 | const TargetRegisterClass *RegClass = &X86::GR64RegClass; |
955 | const Register |
956 | SizeReg = InProlog ? X86::RAX : MRI.createVirtualRegister(RegClass), |
957 | ZeroReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass), |
958 | CopyReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass), |
959 | TestReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass), |
960 | FinalReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass), |
961 | RoundedReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass), |
962 | LimitReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass), |
963 | JoinReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass), |
964 | ProbeReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass); |
965 | |
966 | // SP-relative offsets where we can save RCX and RDX. |
967 | int64_t RCXShadowSlot = 0; |
968 | int64_t RDXShadowSlot = 0; |
969 | |
970 | // If inlining in the prolog, save RCX and RDX. |
971 | if (InProlog) { |
972 | // Compute the offsets. We need to account for things already |
973 | // pushed onto the stack at this point: return address, frame |
974 | // pointer (if used), and callee saves. |
975 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
976 | const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize(); |
977 | const bool HasFP = hasFP(MF); |
978 | |
979 | // Check if we need to spill RCX and/or RDX. |
980 | // Here we assume that no earlier prologue instruction changes RCX and/or |
981 | // RDX, so checking the block live-ins is enough. |
982 | const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX); |
983 | const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX); |
984 | int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0); |
985 | // Assign the initial slot to both registers, then change RDX's slot if both |
986 | // need to be spilled. |
987 | if (IsRCXLiveIn) |
988 | RCXShadowSlot = InitSlot; |
989 | if (IsRDXLiveIn) |
990 | RDXShadowSlot = InitSlot; |
991 | if (IsRDXLiveIn && IsRCXLiveIn) |
992 | RDXShadowSlot += 8; |
993 | // Emit the saves if needed. |
994 | if (IsRCXLiveIn) |
995 | addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, |
996 | RCXShadowSlot) |
997 | .addReg(X86::RCX); |
998 | if (IsRDXLiveIn) |
999 | addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, |
1000 | RDXShadowSlot) |
1001 | .addReg(X86::RDX); |
1002 | } else { |
1003 | // Not in the prolog. Copy RAX to a virtual reg. |
1004 | BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX); |
1005 | } |
1006 | |
1007 | // Add code to MBB to check for overflow and set the new target stack pointer |
1008 | // to zero if so. |
1009 | BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg) |
1010 | .addReg(ZeroReg, RegState::Undef) |
1011 | .addReg(ZeroReg, RegState::Undef); |
1012 | BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP); |
1013 | BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg) |
1014 | .addReg(CopyReg) |
1015 | .addReg(SizeReg); |
1016 | BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg) |
1017 | .addReg(TestReg) |
1018 | .addReg(ZeroReg) |
1019 | .addImm(X86::COND_B); |
1020 | |
1021 | // FinalReg now holds final stack pointer value, or zero if |
1022 | // allocation would overflow. Compare against the current stack |
1023 | // limit from the thread environment block. Note this limit is the |
1024 | // lowest touched page on the stack, not the point at which the OS |
1025 | // will cause an overflow exception, so this is just an optimization |
1026 | // to avoid unnecessarily touching pages that are below the current |
1027 | // SP but already committed to the stack by the OS. |
1028 | BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg) |
1029 | .addReg(0) |
1030 | .addImm(1) |
1031 | .addReg(0) |
1032 | .addImm(ThreadEnvironmentStackLimit) |
1033 | .addReg(X86::GS); |
1034 | BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg); |
1035 | // Jump if the desired stack pointer is at or above the stack limit. |
1036 | BuildMI(&MBB, DL, TII.get(X86::JCC_1)) |
1037 | .addMBB(ContinueMBB) |
1038 | .addImm(X86::COND_AE); |
1039 | |
1040 | // Add code to roundMBB to round the final stack pointer to a page boundary. |
1041 | RoundMBB->addLiveIn(PhysReg: FinalReg); |
1042 | BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg) |
1043 | .addReg(FinalReg) |
1044 | .addImm(PageMask); |
1045 | BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB); |
1046 | |
1047 | // LimitReg now holds the current stack limit, RoundedReg page-rounded |
1048 | // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page |
1049 | // and probe until we reach RoundedReg. |
1050 | if (!InProlog) { |
1051 | BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg) |
1052 | .addReg(LimitReg) |
1053 | .addMBB(RoundMBB) |
1054 | .addReg(ProbeReg) |
1055 | .addMBB(LoopMBB); |
1056 | } |
1057 | |
1058 | LoopMBB->addLiveIn(PhysReg: JoinReg); |
1059 | addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg, |
1060 | false, -PageSize); |
1061 | |
1062 | // Probe by storing a byte onto the stack. |
1063 | BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi)) |
1064 | .addReg(ProbeReg) |
1065 | .addImm(1) |
1066 | .addReg(0) |
1067 | .addImm(0) |
1068 | .addReg(0) |
1069 | .addImm(0); |
1070 | |
1071 | LoopMBB->addLiveIn(PhysReg: RoundedReg); |
1072 | BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr)) |
1073 | .addReg(RoundedReg) |
1074 | .addReg(ProbeReg); |
1075 | BuildMI(LoopMBB, DL, TII.get(X86::JCC_1)) |
1076 | .addMBB(LoopMBB) |
1077 | .addImm(X86::COND_NE); |
1078 | |
1079 | MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI(); |
1080 | |
1081 | // If in prolog, restore RDX and RCX. |
1082 | if (InProlog) { |
1083 | if (RCXShadowSlot) // It means we spilled RCX in the prologue. |
1084 | addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, |
1085 | TII.get(X86::MOV64rm), X86::RCX), |
1086 | X86::RSP, false, RCXShadowSlot); |
1087 | if (RDXShadowSlot) // It means we spilled RDX in the prologue. |
1088 | addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, |
1089 | TII.get(X86::MOV64rm), X86::RDX), |
1090 | X86::RSP, false, RDXShadowSlot); |
1091 | } |
1092 | |
1093 | // Now that the probing is done, add code to continueMBB to update |
1094 | // the stack pointer for real. |
1095 | ContinueMBB->addLiveIn(PhysReg: SizeReg); |
1096 | BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP) |
1097 | .addReg(X86::RSP) |
1098 | .addReg(SizeReg); |
1099 | |
1100 | // Add the control flow edges we need. |
1101 | MBB.addSuccessor(Succ: ContinueMBB); |
1102 | MBB.addSuccessor(Succ: RoundMBB); |
1103 | RoundMBB->addSuccessor(Succ: LoopMBB); |
1104 | LoopMBB->addSuccessor(Succ: ContinueMBB); |
1105 | LoopMBB->addSuccessor(Succ: LoopMBB); |
1106 | |
1107 | // Mark all the instructions added to the prolog as frame setup. |
1108 | if (InProlog) { |
1109 | for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) { |
1110 | BeforeMBBI->setFlag(MachineInstr::FrameSetup); |
1111 | } |
1112 | for (MachineInstr &MI : *RoundMBB) { |
1113 | MI.setFlag(MachineInstr::FrameSetup); |
1114 | } |
1115 | for (MachineInstr &MI : *LoopMBB) { |
1116 | MI.setFlag(MachineInstr::FrameSetup); |
1117 | } |
1118 | for (MachineInstr &MI : |
1119 | llvm::make_range(x: ContinueMBB->begin(), y: ContinueMBBI)) { |
1120 | MI.setFlag(MachineInstr::FrameSetup); |
1121 | } |
1122 | } |
1123 | } |
1124 | |
1125 | void X86FrameLowering::emitStackProbeCall( |
1126 | MachineFunction &MF, MachineBasicBlock &MBB, |
1127 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, |
1128 | std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const { |
1129 | bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; |
1130 | |
1131 | // FIXME: Add indirect thunk support and remove this. |
1132 | if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls()) |
1133 | report_fatal_error(reason: "Emitting stack probe calls on 64-bit with the large " |
1134 | "code model and indirect thunks not yet implemented." ); |
1135 | |
1136 | assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) != |
1137 | MachineBasicBlock::LQR_Live && |
1138 | "Stack probe calls will clobber live EFLAGS." ); |
1139 | |
1140 | unsigned CallOp; |
1141 | if (Is64Bit) |
1142 | CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32; |
1143 | else |
1144 | CallOp = X86::CALLpcrel32; |
1145 | |
1146 | StringRef Symbol = STI.getTargetLowering()->getStackProbeSymbolName(MF); |
1147 | |
1148 | MachineInstrBuilder CI; |
1149 | MachineBasicBlock::iterator ExpansionMBBI = std::prev(x: MBBI); |
1150 | |
1151 | // All current stack probes take AX and SP as input, clobber flags, and |
1152 | // preserve all registers. x86_64 probes leave RSP unmodified. |
1153 | if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { |
1154 | // For the large code model, we have to call through a register. Use R11, |
1155 | // as it is scratch in all supported calling conventions. |
1156 | BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11) |
1157 | .addExternalSymbol(MF.createExternalSymbolName(Symbol)); |
1158 | CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11); |
1159 | } else { |
1160 | CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)) |
1161 | .addExternalSymbol(MF.createExternalSymbolName(Name: Symbol)); |
1162 | } |
1163 | |
1164 | unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX; |
1165 | unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP; |
1166 | CI.addReg(AX, RegState::Implicit) |
1167 | .addReg(SP, RegState::Implicit) |
1168 | .addReg(AX, RegState::Define | RegState::Implicit) |
1169 | .addReg(SP, RegState::Define | RegState::Implicit) |
1170 | .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); |
1171 | |
1172 | MachineInstr *ModInst = CI; |
1173 | if (STI.isTargetWin64() || !STI.isOSWindows()) { |
1174 | // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves. |
1175 | // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp |
1176 | // themselves. They also does not clobber %rax so we can reuse it when |
1177 | // adjusting %rsp. |
1178 | // All other platforms do not specify a particular ABI for the stack probe |
1179 | // function, so we arbitrarily define it to not adjust %esp/%rsp itself. |
1180 | ModInst = |
1181 | BuildMI(MBB, MBBI, DL, TII.get(getSUBrrOpcode(IsLP64: Uses64BitFramePtr)), SP) |
1182 | .addReg(SP) |
1183 | .addReg(AX); |
1184 | } |
1185 | |
1186 | // DebugInfo variable locations -- if there's an instruction number for the |
1187 | // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that |
1188 | // modifies SP. |
1189 | if (InstrNum) { |
1190 | if (STI.isTargetWin64() || !STI.isOSWindows()) { |
1191 | // Label destination operand of the subtract. |
1192 | MF.makeDebugValueSubstitution(*InstrNum, |
1193 | {ModInst->getDebugInstrNum(), 0}); |
1194 | } else { |
1195 | // Label the call. The operand number is the penultimate operand, zero |
1196 | // based. |
1197 | unsigned SPDefOperand = ModInst->getNumOperands() - 2; |
1198 | MF.makeDebugValueSubstitution( |
1199 | *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand}); |
1200 | } |
1201 | } |
1202 | |
1203 | if (InProlog) { |
1204 | // Apply the frame setup flag to all inserted instrs. |
1205 | for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI) |
1206 | ExpansionMBBI->setFlag(MachineInstr::FrameSetup); |
1207 | } |
1208 | } |
1209 | |
1210 | static unsigned calculateSetFPREG(uint64_t SPAdjust) { |
1211 | // Win64 ABI has a less restrictive limitation of 240; 128 works equally well |
1212 | // and might require smaller successive adjustments. |
1213 | const uint64_t Win64MaxSEHOffset = 128; |
1214 | uint64_t SEHFrameOffset = std::min(a: SPAdjust, b: Win64MaxSEHOffset); |
1215 | // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode. |
1216 | return SEHFrameOffset & -16; |
1217 | } |
1218 | |
1219 | // If we're forcing a stack realignment we can't rely on just the frame |
1220 | // info, we need to know the ABI stack alignment as well in case we |
1221 | // have a call out. Otherwise just make sure we have some alignment - we'll |
1222 | // go with the minimum SlotSize. |
1223 | uint64_t |
1224 | X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const { |
1225 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
1226 | Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment. |
1227 | Align StackAlign = getStackAlign(); |
1228 | bool HasRealign = MF.getFunction().hasFnAttribute(Kind: "stackrealign" ); |
1229 | if (HasRealign) { |
1230 | if (MFI.hasCalls()) |
1231 | MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; |
1232 | else if (MaxAlign < SlotSize) |
1233 | MaxAlign = Align(SlotSize); |
1234 | } |
1235 | |
1236 | if (!Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR) { |
1237 | if (HasRealign) |
1238 | MaxAlign = (MaxAlign > 16) ? MaxAlign : Align(16); |
1239 | else |
1240 | MaxAlign = Align(16); |
1241 | } |
1242 | return MaxAlign.value(); |
1243 | } |
1244 | |
1245 | void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB, |
1246 | MachineBasicBlock::iterator MBBI, |
1247 | const DebugLoc &DL, unsigned Reg, |
1248 | uint64_t MaxAlign) const { |
1249 | uint64_t Val = -MaxAlign; |
1250 | unsigned AndOp = getANDriOpcode(IsLP64: Uses64BitFramePtr, Imm: Val); |
1251 | |
1252 | MachineFunction &MF = *MBB.getParent(); |
1253 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
1254 | const X86TargetLowering &TLI = *STI.getTargetLowering(); |
1255 | const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); |
1256 | const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF); |
1257 | |
1258 | // We want to make sure that (in worst case) less than StackProbeSize bytes |
1259 | // are not probed after the AND. This assumption is used in |
1260 | // emitStackProbeInlineGeneric. |
1261 | if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) { |
1262 | { |
1263 | NumFrameLoopProbe++; |
1264 | MachineBasicBlock *entryMBB = |
1265 | MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
1266 | MachineBasicBlock *headMBB = |
1267 | MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
1268 | MachineBasicBlock *bodyMBB = |
1269 | MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
1270 | MachineBasicBlock * = |
1271 | MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
1272 | |
1273 | MachineFunction::iterator MBBIter = MBB.getIterator(); |
1274 | MF.insert(MBBI: MBBIter, MBB: entryMBB); |
1275 | MF.insert(MBBI: MBBIter, MBB: headMBB); |
1276 | MF.insert(MBBI: MBBIter, MBB: bodyMBB); |
1277 | MF.insert(MBBI: MBBIter, MBB: footMBB); |
1278 | const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; |
1279 | Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 |
1280 | : Is64Bit ? X86::R11D |
1281 | : X86::EAX; |
1282 | |
1283 | // Setup entry block |
1284 | { |
1285 | |
1286 | entryMBB->splice(Where: entryMBB->end(), Other: &MBB, From: MBB.begin(), To: MBBI); |
1287 | BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed) |
1288 | .addReg(StackPtr) |
1289 | .setMIFlag(MachineInstr::FrameSetup); |
1290 | MachineInstr *MI = |
1291 | BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed) |
1292 | .addReg(FinalStackProbed) |
1293 | .addImm(Val) |
1294 | .setMIFlag(MachineInstr::FrameSetup); |
1295 | |
1296 | // The EFLAGS implicit def is dead. |
1297 | MI->getOperand(i: 3).setIsDead(); |
1298 | |
1299 | BuildMI(entryMBB, DL, |
1300 | TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) |
1301 | .addReg(FinalStackProbed) |
1302 | .addReg(StackPtr) |
1303 | .setMIFlag(MachineInstr::FrameSetup); |
1304 | BuildMI(entryMBB, DL, TII.get(X86::JCC_1)) |
1305 | .addMBB(&MBB) |
1306 | .addImm(X86::COND_E) |
1307 | .setMIFlag(MachineInstr::FrameSetup); |
1308 | entryMBB->addSuccessor(Succ: headMBB); |
1309 | entryMBB->addSuccessor(Succ: &MBB); |
1310 | } |
1311 | |
1312 | // Loop entry block |
1313 | |
1314 | { |
1315 | const unsigned SUBOpc = getSUBriOpcode(IsLP64: Uses64BitFramePtr); |
1316 | BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr) |
1317 | .addReg(StackPtr) |
1318 | .addImm(StackProbeSize) |
1319 | .setMIFlag(MachineInstr::FrameSetup); |
1320 | |
1321 | BuildMI(headMBB, DL, |
1322 | TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) |
1323 | .addReg(StackPtr) |
1324 | .addReg(FinalStackProbed) |
1325 | .setMIFlag(MachineInstr::FrameSetup); |
1326 | |
1327 | // jump to the footer if StackPtr < FinalStackProbed |
1328 | BuildMI(headMBB, DL, TII.get(X86::JCC_1)) |
1329 | .addMBB(footMBB) |
1330 | .addImm(X86::COND_B) |
1331 | .setMIFlag(MachineInstr::FrameSetup); |
1332 | |
1333 | headMBB->addSuccessor(Succ: bodyMBB); |
1334 | headMBB->addSuccessor(Succ: footMBB); |
1335 | } |
1336 | |
1337 | // setup loop body |
1338 | { |
1339 | addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc)) |
1340 | .setMIFlag(MachineInstr::FrameSetup), |
1341 | StackPtr, false, 0) |
1342 | .addImm(0) |
1343 | .setMIFlag(MachineInstr::FrameSetup); |
1344 | |
1345 | const unsigned SUBOpc = getSUBriOpcode(IsLP64: Uses64BitFramePtr); |
1346 | BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr) |
1347 | .addReg(StackPtr) |
1348 | .addImm(StackProbeSize) |
1349 | .setMIFlag(MachineInstr::FrameSetup); |
1350 | |
1351 | // cmp with stack pointer bound |
1352 | BuildMI(bodyMBB, DL, |
1353 | TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) |
1354 | .addReg(FinalStackProbed) |
1355 | .addReg(StackPtr) |
1356 | .setMIFlag(MachineInstr::FrameSetup); |
1357 | |
1358 | // jump back while FinalStackProbed < StackPtr |
1359 | BuildMI(bodyMBB, DL, TII.get(X86::JCC_1)) |
1360 | .addMBB(bodyMBB) |
1361 | .addImm(X86::COND_B) |
1362 | .setMIFlag(MachineInstr::FrameSetup); |
1363 | bodyMBB->addSuccessor(Succ: bodyMBB); |
1364 | bodyMBB->addSuccessor(Succ: footMBB); |
1365 | } |
1366 | |
1367 | // setup loop footer |
1368 | { |
1369 | BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr) |
1370 | .addReg(FinalStackProbed) |
1371 | .setMIFlag(MachineInstr::FrameSetup); |
1372 | addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc)) |
1373 | .setMIFlag(MachineInstr::FrameSetup), |
1374 | StackPtr, false, 0) |
1375 | .addImm(0) |
1376 | .setMIFlag(MachineInstr::FrameSetup); |
1377 | footMBB->addSuccessor(Succ: &MBB); |
1378 | } |
1379 | |
1380 | fullyRecomputeLiveIns(MBBs: {footMBB, bodyMBB, headMBB, &MBB}); |
1381 | } |
1382 | } else { |
1383 | MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg) |
1384 | .addReg(Reg) |
1385 | .addImm(Val) |
1386 | .setMIFlag(MachineInstr::FrameSetup); |
1387 | |
1388 | // The EFLAGS implicit def is dead. |
1389 | MI->getOperand(i: 3).setIsDead(); |
1390 | } |
1391 | } |
1392 | |
1393 | bool X86FrameLowering::has128ByteRedZone(const MachineFunction &MF) const { |
1394 | // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be |
1395 | // clobbered by any interrupt handler. |
1396 | assert(&STI == &MF.getSubtarget<X86Subtarget>() && |
1397 | "MF used frame lowering for wrong subtarget" ); |
1398 | const Function &Fn = MF.getFunction(); |
1399 | const bool IsWin64CC = STI.isCallingConvWin64(CC: Fn.getCallingConv()); |
1400 | return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone); |
1401 | } |
1402 | |
1403 | /// Return true if we need to use the restricted Windows x64 prologue and |
1404 | /// epilogue code patterns that can be described with WinCFI (.seh_* |
1405 | /// directives). |
1406 | bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const { |
1407 | return MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); |
1408 | } |
1409 | |
1410 | bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const { |
1411 | return !isWin64Prologue(MF) && MF.needsFrameMoves(); |
1412 | } |
1413 | |
1414 | /// Return true if an opcode is part of the REP group of instructions |
1415 | static bool isOpcodeRep(unsigned Opcode) { |
1416 | switch (Opcode) { |
1417 | case X86::REPNE_PREFIX: |
1418 | case X86::REP_MOVSB_32: |
1419 | case X86::REP_MOVSB_64: |
1420 | case X86::REP_MOVSD_32: |
1421 | case X86::REP_MOVSD_64: |
1422 | case X86::REP_MOVSQ_32: |
1423 | case X86::REP_MOVSQ_64: |
1424 | case X86::REP_MOVSW_32: |
1425 | case X86::REP_MOVSW_64: |
1426 | case X86::REP_PREFIX: |
1427 | case X86::REP_STOSB_32: |
1428 | case X86::REP_STOSB_64: |
1429 | case X86::REP_STOSD_32: |
1430 | case X86::REP_STOSD_64: |
1431 | case X86::REP_STOSQ_32: |
1432 | case X86::REP_STOSQ_64: |
1433 | case X86::REP_STOSW_32: |
1434 | case X86::REP_STOSW_64: |
1435 | return true; |
1436 | default: |
1437 | break; |
1438 | } |
1439 | return false; |
1440 | } |
1441 | |
1442 | /// emitPrologue - Push callee-saved registers onto the stack, which |
1443 | /// automatically adjust the stack pointer. Adjust the stack pointer to allocate |
1444 | /// space for local variables. Also emit labels used by the exception handler to |
1445 | /// generate the exception handling frames. |
1446 | |
1447 | /* |
1448 | Here's a gist of what gets emitted: |
1449 | |
1450 | ; Establish frame pointer, if needed |
1451 | [if needs FP] |
1452 | push %rbp |
1453 | .cfi_def_cfa_offset 16 |
1454 | .cfi_offset %rbp, -16 |
1455 | .seh_pushreg %rpb |
1456 | mov %rsp, %rbp |
1457 | .cfi_def_cfa_register %rbp |
1458 | |
1459 | ; Spill general-purpose registers |
1460 | [for all callee-saved GPRs] |
1461 | pushq %<reg> |
1462 | [if not needs FP] |
1463 | .cfi_def_cfa_offset (offset from RETADDR) |
1464 | .seh_pushreg %<reg> |
1465 | |
1466 | ; If the required stack alignment > default stack alignment |
1467 | ; rsp needs to be re-aligned. This creates a "re-alignment gap" |
1468 | ; of unknown size in the stack frame. |
1469 | [if stack needs re-alignment] |
1470 | and $MASK, %rsp |
1471 | |
1472 | ; Allocate space for locals |
1473 | [if target is Windows and allocated space > 4096 bytes] |
1474 | ; Windows needs special care for allocations larger |
1475 | ; than one page. |
1476 | mov $NNN, %rax |
1477 | call ___chkstk_ms/___chkstk |
1478 | sub %rax, %rsp |
1479 | [else] |
1480 | sub $NNN, %rsp |
1481 | |
1482 | [if needs FP] |
1483 | .seh_stackalloc (size of XMM spill slots) |
1484 | .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots |
1485 | [else] |
1486 | .seh_stackalloc NNN |
1487 | |
1488 | ; Spill XMMs |
1489 | ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved, |
1490 | ; they may get spilled on any platform, if the current function |
1491 | ; calls @llvm.eh.unwind.init |
1492 | [if needs FP] |
1493 | [for all callee-saved XMM registers] |
1494 | movaps %<xmm reg>, -MMM(%rbp) |
1495 | [for all callee-saved XMM registers] |
1496 | .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset) |
1497 | ; i.e. the offset relative to (%rbp - SEHFrameOffset) |
1498 | [else] |
1499 | [for all callee-saved XMM registers] |
1500 | movaps %<xmm reg>, KKK(%rsp) |
1501 | [for all callee-saved XMM registers] |
1502 | .seh_savexmm %<xmm reg>, KKK |
1503 | |
1504 | .seh_endprologue |
1505 | |
1506 | [if needs base pointer] |
1507 | mov %rsp, %rbx |
1508 | [if needs to restore base pointer] |
1509 | mov %rsp, -MMM(%rbp) |
1510 | |
1511 | ; Emit CFI info |
1512 | [if needs FP] |
1513 | [for all callee-saved registers] |
1514 | .cfi_offset %<reg>, (offset from %rbp) |
1515 | [else] |
1516 | .cfi_def_cfa_offset (offset from RETADDR) |
1517 | [for all callee-saved registers] |
1518 | .cfi_offset %<reg>, (offset from %rsp) |
1519 | |
1520 | Notes: |
1521 | - .seh directives are emitted only for Windows 64 ABI |
1522 | - .cv_fpo directives are emitted on win32 when emitting CodeView |
1523 | - .cfi directives are emitted for all other ABIs |
1524 | - for 32-bit code, substitute %e?? registers for %r?? |
1525 | */ |
1526 | |
1527 | void X86FrameLowering::emitPrologue(MachineFunction &MF, |
1528 | MachineBasicBlock &MBB) const { |
1529 | assert(&STI == &MF.getSubtarget<X86Subtarget>() && |
1530 | "MF used frame lowering for wrong subtarget" ); |
1531 | MachineBasicBlock::iterator MBBI = MBB.begin(); |
1532 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1533 | const Function &Fn = MF.getFunction(); |
1534 | MachineModuleInfo &MMI = MF.getMMI(); |
1535 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
1536 | uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment. |
1537 | uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate. |
1538 | bool IsFunclet = MBB.isEHFuncletEntry(); |
1539 | EHPersonality Personality = EHPersonality::Unknown; |
1540 | if (Fn.hasPersonalityFn()) |
1541 | Personality = classifyEHPersonality(Pers: Fn.getPersonalityFn()); |
1542 | bool FnHasClrFunclet = |
1543 | MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR; |
1544 | bool IsClrFunclet = IsFunclet && FnHasClrFunclet; |
1545 | bool HasFP = hasFP(MF); |
1546 | bool IsWin64Prologue = isWin64Prologue(MF); |
1547 | bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry(); |
1548 | // FIXME: Emit FPO data for EH funclets. |
1549 | bool NeedsWinFPO = |
1550 | !IsFunclet && STI.isTargetWin32() && MMI.getModule()->getCodeViewFlag(); |
1551 | bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO; |
1552 | bool NeedsDwarfCFI = needsDwarfCFI(MF); |
1553 | Register FramePtr = TRI->getFrameRegister(MF); |
1554 | const Register MachineFramePtr = |
1555 | STI.isTarget64BitILP32() ? Register(getX86SubSuperRegister(Reg: FramePtr, Size: 64)) |
1556 | : FramePtr; |
1557 | Register BasePtr = TRI->getBaseRegister(); |
1558 | bool HasWinCFI = false; |
1559 | |
1560 | // Debug location must be unknown since the first debug location is used |
1561 | // to determine the end of the prologue. |
1562 | DebugLoc DL; |
1563 | Register ArgBaseReg; |
1564 | |
1565 | // Emit extra prolog for argument stack slot reference. |
1566 | if (auto *MI = X86FI->getStackPtrSaveMI()) { |
1567 | // MI is lea instruction that created in X86ArgumentStackSlotPass. |
1568 | // Creat extra prolog for stack realignment. |
1569 | ArgBaseReg = MI->getOperand(i: 0).getReg(); |
1570 | // leal 4(%esp), %basereg |
1571 | // .cfi_def_cfa %basereg, 0 |
1572 | // andl $-128, %esp |
1573 | // pushl -4(%basereg) |
1574 | BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::LEA64r : X86::LEA32r), |
1575 | ArgBaseReg) |
1576 | .addUse(StackPtr) |
1577 | .addImm(1) |
1578 | .addUse(X86::NoRegister) |
1579 | .addImm(SlotSize) |
1580 | .addUse(X86::NoRegister) |
1581 | .setMIFlag(MachineInstr::FrameSetup); |
1582 | if (NeedsDwarfCFI) { |
1583 | // .cfi_def_cfa %basereg, 0 |
1584 | unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true); |
1585 | BuildCFI(MBB, MBBI, DL, |
1586 | CFIInst: MCCFIInstruction::cfiDefCfa(L: nullptr, Register: DwarfStackPtr, Offset: 0), |
1587 | Flag: MachineInstr::FrameSetup); |
1588 | } |
1589 | BuildStackAlignAND(MBB, MBBI, DL, Reg: StackPtr, MaxAlign); |
1590 | int64_t Offset = -(int64_t)SlotSize; |
1591 | BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm)) |
1592 | .addReg(ArgBaseReg) |
1593 | .addImm(1) |
1594 | .addReg(X86::NoRegister) |
1595 | .addImm(Offset) |
1596 | .addReg(X86::NoRegister) |
1597 | .setMIFlag(MachineInstr::FrameSetup); |
1598 | } |
1599 | |
1600 | // Space reserved for stack-based arguments when making a (ABI-guaranteed) |
1601 | // tail call. |
1602 | unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta(); |
1603 | if (TailCallArgReserveSize && IsWin64Prologue) |
1604 | report_fatal_error(reason: "Can't handle guaranteed tail call under win64 yet" ); |
1605 | |
1606 | const bool EmitStackProbeCall = |
1607 | STI.getTargetLowering()->hasStackProbeSymbol(MF); |
1608 | unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF); |
1609 | |
1610 | if (HasFP && X86FI->hasSwiftAsyncContext()) { |
1611 | switch (MF.getTarget().Options.SwiftAsyncFramePointer) { |
1612 | case SwiftAsyncFramePointerMode::DeploymentBased: |
1613 | if (STI.swiftAsyncContextIsDynamicallySet()) { |
1614 | // The special symbol below is absolute and has a *value* suitable to be |
1615 | // combined with the frame pointer directly. |
1616 | BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr) |
1617 | .addUse(MachineFramePtr) |
1618 | .addUse(X86::RIP) |
1619 | .addImm(1) |
1620 | .addUse(X86::NoRegister) |
1621 | .addExternalSymbol("swift_async_extendedFramePointerFlags" , |
1622 | X86II::MO_GOTPCREL) |
1623 | .addUse(X86::NoRegister); |
1624 | break; |
1625 | } |
1626 | [[fallthrough]]; |
1627 | |
1628 | case SwiftAsyncFramePointerMode::Always: |
1629 | assert( |
1630 | !IsWin64Prologue && |
1631 | "win64 prologue does not set the bit 60 in the saved frame pointer" ); |
1632 | BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr) |
1633 | .addUse(MachineFramePtr) |
1634 | .addImm(60) |
1635 | .setMIFlag(MachineInstr::FrameSetup); |
1636 | break; |
1637 | |
1638 | case SwiftAsyncFramePointerMode::Never: |
1639 | break; |
1640 | } |
1641 | } |
1642 | |
1643 | // Re-align the stack on 64-bit if the x86-interrupt calling convention is |
1644 | // used and an error code was pushed, since the x86-64 ABI requires a 16-byte |
1645 | // stack alignment. |
1646 | if (Fn.getCallingConv() == CallingConv::X86_INTR && Is64Bit && |
1647 | Fn.arg_size() == 2) { |
1648 | StackSize += 8; |
1649 | MFI.setStackSize(StackSize); |
1650 | |
1651 | // Update the stack pointer by pushing a register. This is the instruction |
1652 | // emitted that would be end up being emitted by a call to `emitSPUpdate`. |
1653 | // Hard-coding the update to a push avoids emitting a second |
1654 | // `STACKALLOC_W_PROBING` instruction in the save block: We know that stack |
1655 | // probing isn't needed anyways for an 8-byte update. |
1656 | // Pushing a register leaves us in a similar situation to a regular |
1657 | // function call where we know that the address at (rsp-8) is writeable. |
1658 | // That way we avoid any off-by-ones with stack probing for additional |
1659 | // stack pointer updates later on. |
1660 | BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) |
1661 | .addReg(X86::RAX, RegState::Undef) |
1662 | .setMIFlag(MachineInstr::FrameSetup); |
1663 | } |
1664 | |
1665 | // If this is x86-64 and the Red Zone is not disabled, if we are a leaf |
1666 | // function, and use up to 128 bytes of stack space, don't have a frame |
1667 | // pointer, calls, or dynamic alloca then we do not need to adjust the |
1668 | // stack pointer (we fit in the Red Zone). We also check that we don't |
1669 | // push and pop from the stack. |
1670 | if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) && |
1671 | !MFI.hasVarSizedObjects() && // No dynamic alloca. |
1672 | !MFI.adjustsStack() && // No calls. |
1673 | !EmitStackProbeCall && // No stack probes. |
1674 | !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop. |
1675 | !MF.shouldSplitStack()) { // Regular stack |
1676 | uint64_t MinSize = |
1677 | X86FI->getCalleeSavedFrameSize() - X86FI->getTCReturnAddrDelta(); |
1678 | if (HasFP) |
1679 | MinSize += SlotSize; |
1680 | X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0); |
1681 | StackSize = std::max(a: MinSize, b: StackSize > 128 ? StackSize - 128 : 0); |
1682 | MFI.setStackSize(StackSize); |
1683 | } |
1684 | |
1685 | // Insert stack pointer adjustment for later moving of return addr. Only |
1686 | // applies to tail call optimized functions where the callee argument stack |
1687 | // size is bigger than the callers. |
1688 | if (TailCallArgReserveSize != 0) { |
1689 | BuildStackAdjustment(MBB, MBBI, DL, Offset: -(int)TailCallArgReserveSize, |
1690 | /*InEpilogue=*/false) |
1691 | .setMIFlag(MachineInstr::FrameSetup); |
1692 | } |
1693 | |
1694 | // Mapping for machine moves: |
1695 | // |
1696 | // DST: VirtualFP AND |
1697 | // SRC: VirtualFP => DW_CFA_def_cfa_offset |
1698 | // ELSE => DW_CFA_def_cfa |
1699 | // |
1700 | // SRC: VirtualFP AND |
1701 | // DST: Register => DW_CFA_def_cfa_register |
1702 | // |
1703 | // ELSE |
1704 | // OFFSET < 0 => DW_CFA_offset_extended_sf |
1705 | // REG < 64 => DW_CFA_offset + Reg |
1706 | // ELSE => DW_CFA_offset_extended |
1707 | |
1708 | uint64_t NumBytes = 0; |
1709 | int stackGrowth = -SlotSize; |
1710 | |
1711 | // Find the funclet establisher parameter |
1712 | Register Establisher = X86::NoRegister; |
1713 | if (IsClrFunclet) |
1714 | Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX; |
1715 | else if (IsFunclet) |
1716 | Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX; |
1717 | |
1718 | if (IsWin64Prologue && IsFunclet && !IsClrFunclet) { |
1719 | // Immediately spill establisher into the home slot. |
1720 | // The runtime cares about this. |
1721 | // MOV64mr %rdx, 16(%rsp) |
1722 | unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; |
1723 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16) |
1724 | .addReg(Establisher) |
1725 | .setMIFlag(MachineInstr::FrameSetup); |
1726 | MBB.addLiveIn(PhysReg: Establisher); |
1727 | } |
1728 | |
1729 | if (HasFP) { |
1730 | assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved" ); |
1731 | |
1732 | // Calculate required stack adjustment. |
1733 | uint64_t FrameSize = StackSize - SlotSize; |
1734 | NumBytes = |
1735 | FrameSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize); |
1736 | |
1737 | // Callee-saved registers are pushed on stack before the stack is realigned. |
1738 | if (TRI->hasStackRealignment(MF) && !IsWin64Prologue) |
1739 | NumBytes = alignTo(Value: NumBytes, Align: MaxAlign); |
1740 | |
1741 | // Save EBP/RBP into the appropriate stack slot. |
1742 | BuildMI(MBB, MBBI, DL, |
1743 | TII.get(getPUSHOpcode(ST: MF.getSubtarget<X86Subtarget>()))) |
1744 | .addReg(MachineFramePtr, RegState::Kill) |
1745 | .setMIFlag(MachineInstr::FrameSetup); |
1746 | |
1747 | if (NeedsDwarfCFI && !ArgBaseReg.isValid()) { |
1748 | // Mark the place where EBP/RBP was saved. |
1749 | // Define the current CFA rule to use the provided offset. |
1750 | assert(StackSize); |
1751 | BuildCFI(MBB, MBBI, DL, |
1752 | CFIInst: MCCFIInstruction::cfiDefCfaOffset( |
1753 | L: nullptr, Offset: -2 * stackGrowth + (int)TailCallArgReserveSize), |
1754 | Flag: MachineInstr::FrameSetup); |
1755 | |
1756 | // Change the rule for the FramePtr to be an "offset" rule. |
1757 | unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); |
1758 | BuildCFI(MBB, MBBI, DL, |
1759 | CFIInst: MCCFIInstruction::createOffset(L: nullptr, Register: DwarfFramePtr, |
1760 | Offset: 2 * stackGrowth - |
1761 | (int)TailCallArgReserveSize), |
1762 | Flag: MachineInstr::FrameSetup); |
1763 | } |
1764 | |
1765 | if (NeedsWinCFI) { |
1766 | HasWinCFI = true; |
1767 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) |
1768 | .addImm(FramePtr) |
1769 | .setMIFlag(MachineInstr::FrameSetup); |
1770 | } |
1771 | |
1772 | if (!IsFunclet) { |
1773 | if (X86FI->hasSwiftAsyncContext()) { |
1774 | assert(!IsWin64Prologue && |
1775 | "win64 prologue does not store async context right below rbp" ); |
1776 | const auto &Attrs = MF.getFunction().getAttributes(); |
1777 | |
1778 | // Before we update the live frame pointer we have to ensure there's a |
1779 | // valid (or null) asynchronous context in its slot just before FP in |
1780 | // the frame record, so store it now. |
1781 | if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) { |
1782 | // We have an initial context in r14, store it just before the frame |
1783 | // pointer. |
1784 | MBB.addLiveIn(X86::R14); |
1785 | BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) |
1786 | .addReg(X86::R14) |
1787 | .setMIFlag(MachineInstr::FrameSetup); |
1788 | } else { |
1789 | // No initial context, store null so that there's no pointer that |
1790 | // could be misused. |
1791 | BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i32)) |
1792 | .addImm(0) |
1793 | .setMIFlag(MachineInstr::FrameSetup); |
1794 | } |
1795 | |
1796 | if (NeedsWinCFI) { |
1797 | HasWinCFI = true; |
1798 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) |
1799 | .addImm(X86::R14) |
1800 | .setMIFlag(MachineInstr::FrameSetup); |
1801 | } |
1802 | |
1803 | BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr) |
1804 | .addUse(X86::RSP) |
1805 | .addImm(1) |
1806 | .addUse(X86::NoRegister) |
1807 | .addImm(8) |
1808 | .addUse(X86::NoRegister) |
1809 | .setMIFlag(MachineInstr::FrameSetup); |
1810 | BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri32), X86::RSP) |
1811 | .addUse(X86::RSP) |
1812 | .addImm(8) |
1813 | .setMIFlag(MachineInstr::FrameSetup); |
1814 | } |
1815 | |
1816 | if (!IsWin64Prologue && !IsFunclet) { |
1817 | // Update EBP with the new base value. |
1818 | if (!X86FI->hasSwiftAsyncContext()) |
1819 | BuildMI(MBB, MBBI, DL, |
1820 | TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), |
1821 | FramePtr) |
1822 | .addReg(StackPtr) |
1823 | .setMIFlag(MachineInstr::FrameSetup); |
1824 | |
1825 | if (NeedsDwarfCFI) { |
1826 | if (ArgBaseReg.isValid()) { |
1827 | SmallString<64> CfaExpr; |
1828 | CfaExpr.push_back(Elt: dwarf::DW_CFA_expression); |
1829 | uint8_t buffer[16]; |
1830 | unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true); |
1831 | CfaExpr.append(in_start: buffer, in_end: buffer + encodeULEB128(Value: DwarfReg, p: buffer)); |
1832 | CfaExpr.push_back(Elt: 2); |
1833 | CfaExpr.push_back(Elt: (uint8_t)(dwarf::DW_OP_breg0 + DwarfReg)); |
1834 | CfaExpr.push_back(Elt: 0); |
1835 | // DW_CFA_expression: reg5 DW_OP_breg5 +0 |
1836 | BuildCFI(MBB, MBBI, DL, |
1837 | CFIInst: MCCFIInstruction::createEscape(L: nullptr, Vals: CfaExpr.str()), |
1838 | Flag: MachineInstr::FrameSetup); |
1839 | } else { |
1840 | // Mark effective beginning of when frame pointer becomes valid. |
1841 | // Define the current CFA to use the EBP/RBP register. |
1842 | unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); |
1843 | BuildCFI( |
1844 | MBB, MBBI, DL, |
1845 | CFIInst: MCCFIInstruction::createDefCfaRegister(L: nullptr, Register: DwarfFramePtr), |
1846 | Flag: MachineInstr::FrameSetup); |
1847 | } |
1848 | } |
1849 | |
1850 | if (NeedsWinFPO) { |
1851 | // .cv_fpo_setframe $FramePtr |
1852 | HasWinCFI = true; |
1853 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) |
1854 | .addImm(FramePtr) |
1855 | .addImm(0) |
1856 | .setMIFlag(MachineInstr::FrameSetup); |
1857 | } |
1858 | } |
1859 | } |
1860 | } else { |
1861 | assert(!IsFunclet && "funclets without FPs not yet implemented" ); |
1862 | NumBytes = |
1863 | StackSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize); |
1864 | } |
1865 | |
1866 | // Update the offset adjustment, which is mainly used by codeview to translate |
1867 | // from ESP to VFRAME relative local variable offsets. |
1868 | if (!IsFunclet) { |
1869 | if (HasFP && TRI->hasStackRealignment(MF)) |
1870 | MFI.setOffsetAdjustment(-NumBytes); |
1871 | else |
1872 | MFI.setOffsetAdjustment(-StackSize); |
1873 | } |
1874 | |
1875 | // For EH funclets, only allocate enough space for outgoing calls. Save the |
1876 | // NumBytes value that we would've used for the parent frame. |
1877 | unsigned = NumBytes; |
1878 | if (IsFunclet) |
1879 | NumBytes = getWinEHFuncletFrameSize(MF); |
1880 | |
1881 | // Skip the callee-saved push instructions. |
1882 | bool PushedRegs = false; |
1883 | int StackOffset = 2 * stackGrowth; |
1884 | MachineBasicBlock::const_iterator LastCSPush = MBBI; |
1885 | auto IsCSPush = [&](const MachineBasicBlock::iterator &MBBI) { |
1886 | if (MBBI == MBB.end() || !MBBI->getFlag(Flag: MachineInstr::FrameSetup)) |
1887 | return false; |
1888 | unsigned Opc = MBBI->getOpcode(); |
1889 | return Opc == X86::PUSH32r || Opc == X86::PUSH64r || Opc == X86::PUSHP64r || |
1890 | Opc == X86::PUSH2 || Opc == X86::PUSH2P; |
1891 | }; |
1892 | |
1893 | while (IsCSPush(MBBI)) { |
1894 | PushedRegs = true; |
1895 | Register Reg = MBBI->getOperand(i: 0).getReg(); |
1896 | LastCSPush = MBBI; |
1897 | ++MBBI; |
1898 | unsigned Opc = LastCSPush->getOpcode(); |
1899 | |
1900 | if (!HasFP && NeedsDwarfCFI) { |
1901 | // Mark callee-saved push instruction. |
1902 | // Define the current CFA rule to use the provided offset. |
1903 | assert(StackSize); |
1904 | // Compared to push, push2 introduces more stack offset (one more |
1905 | // register). |
1906 | if (Opc == X86::PUSH2 || Opc == X86::PUSH2P) |
1907 | StackOffset += stackGrowth; |
1908 | BuildCFI(MBB, MBBI, DL, |
1909 | CFIInst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: -StackOffset), |
1910 | Flag: MachineInstr::FrameSetup); |
1911 | StackOffset += stackGrowth; |
1912 | } |
1913 | |
1914 | if (NeedsWinCFI) { |
1915 | HasWinCFI = true; |
1916 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) |
1917 | .addImm(Reg) |
1918 | .setMIFlag(MachineInstr::FrameSetup); |
1919 | if (Opc == X86::PUSH2 || Opc == X86::PUSH2P) |
1920 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) |
1921 | .addImm(LastCSPush->getOperand(1).getReg()) |
1922 | .setMIFlag(MachineInstr::FrameSetup); |
1923 | } |
1924 | } |
1925 | |
1926 | // Realign stack after we pushed callee-saved registers (so that we'll be |
1927 | // able to calculate their offsets from the frame pointer). |
1928 | // Don't do this for Win64, it needs to realign the stack after the prologue. |
1929 | if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) && |
1930 | !ArgBaseReg.isValid()) { |
1931 | assert(HasFP && "There should be a frame pointer if stack is realigned." ); |
1932 | BuildStackAlignAND(MBB, MBBI, DL, Reg: StackPtr, MaxAlign); |
1933 | |
1934 | if (NeedsWinCFI) { |
1935 | HasWinCFI = true; |
1936 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign)) |
1937 | .addImm(MaxAlign) |
1938 | .setMIFlag(MachineInstr::FrameSetup); |
1939 | } |
1940 | } |
1941 | |
1942 | // If there is an SUB32ri of ESP immediately before this instruction, merge |
1943 | // the two. This can be the case when tail call elimination is enabled and |
1944 | // the callee has more arguments then the caller. |
1945 | NumBytes -= mergeSPUpdates(MBB, MBBI, doMergeWithPrevious: true); |
1946 | |
1947 | // Adjust stack pointer: ESP -= numbytes. |
1948 | |
1949 | // Windows and cygwin/mingw require a prologue helper routine when allocating |
1950 | // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw |
1951 | // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the |
1952 | // stack and adjust the stack pointer in one go. The 64-bit version of |
1953 | // __chkstk is only responsible for probing the stack. The 64-bit prologue is |
1954 | // responsible for adjusting the stack pointer. Touching the stack at 4K |
1955 | // increments is necessary to ensure that the guard pages used by the OS |
1956 | // virtual memory manager are allocated in correct sequence. |
1957 | uint64_t AlignedNumBytes = NumBytes; |
1958 | if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF)) |
1959 | AlignedNumBytes = alignTo(Value: AlignedNumBytes, Align: MaxAlign); |
1960 | if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) { |
1961 | assert(!X86FI->getUsesRedZone() && |
1962 | "The Red Zone is not accounted for in stack probes" ); |
1963 | |
1964 | // Check whether EAX is livein for this block. |
1965 | bool isEAXAlive = isEAXLiveIn(MBB); |
1966 | |
1967 | if (isEAXAlive) { |
1968 | if (Is64Bit) { |
1969 | // Save RAX |
1970 | BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) |
1971 | .addReg(X86::RAX, RegState::Kill) |
1972 | .setMIFlag(MachineInstr::FrameSetup); |
1973 | } else { |
1974 | // Save EAX |
1975 | BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) |
1976 | .addReg(X86::EAX, RegState::Kill) |
1977 | .setMIFlag(MachineInstr::FrameSetup); |
1978 | } |
1979 | } |
1980 | |
1981 | if (Is64Bit) { |
1982 | // Handle the 64-bit Windows ABI case where we need to call __chkstk. |
1983 | // Function prologue is responsible for adjusting the stack pointer. |
1984 | int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes; |
1985 | BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX) |
1986 | .addImm(Alloc) |
1987 | .setMIFlag(MachineInstr::FrameSetup); |
1988 | } else { |
1989 | // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. |
1990 | // We'll also use 4 already allocated bytes for EAX. |
1991 | BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) |
1992 | .addImm(isEAXAlive ? NumBytes - 4 : NumBytes) |
1993 | .setMIFlag(MachineInstr::FrameSetup); |
1994 | } |
1995 | |
1996 | // Call __chkstk, __chkstk_ms, or __alloca. |
1997 | emitStackProbe(MF, MBB, MBBI, DL, InProlog: true); |
1998 | |
1999 | if (isEAXAlive) { |
2000 | // Restore RAX/EAX |
2001 | MachineInstr *MI; |
2002 | if (Is64Bit) |
2003 | MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX), |
2004 | StackPtr, false, NumBytes - 8); |
2005 | else |
2006 | MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX), |
2007 | StackPtr, false, NumBytes - 4); |
2008 | MI->setFlag(MachineInstr::FrameSetup); |
2009 | MBB.insert(I: MBBI, MI); |
2010 | } |
2011 | } else if (NumBytes) { |
2012 | emitSPUpdate(MBB, MBBI, DL, NumBytes: -(int64_t)NumBytes, /*InEpilogue=*/false); |
2013 | } |
2014 | |
2015 | if (NeedsWinCFI && NumBytes) { |
2016 | HasWinCFI = true; |
2017 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) |
2018 | .addImm(NumBytes) |
2019 | .setMIFlag(MachineInstr::FrameSetup); |
2020 | } |
2021 | |
2022 | int SEHFrameOffset = 0; |
2023 | unsigned SPOrEstablisher; |
2024 | if (IsFunclet) { |
2025 | if (IsClrFunclet) { |
2026 | // The establisher parameter passed to a CLR funclet is actually a pointer |
2027 | // to the (mostly empty) frame of its nearest enclosing funclet; we have |
2028 | // to find the root function establisher frame by loading the PSPSym from |
2029 | // the intermediate frame. |
2030 | unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF); |
2031 | MachinePointerInfo NoInfo; |
2032 | MBB.addLiveIn(PhysReg: Establisher); |
2033 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher), |
2034 | Establisher, false, PSPSlotOffset) |
2035 | .addMemOperand(MF.getMachineMemOperand( |
2036 | NoInfo, MachineMemOperand::MOLoad, SlotSize, Align(SlotSize))); |
2037 | ; |
2038 | // Save the root establisher back into the current funclet's (mostly |
2039 | // empty) frame, in case a sub-funclet or the GC needs it. |
2040 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, |
2041 | false, PSPSlotOffset) |
2042 | .addReg(Establisher) |
2043 | .addMemOperand(MF.getMachineMemOperand( |
2044 | NoInfo, |
2045 | MachineMemOperand::MOStore | MachineMemOperand::MOVolatile, |
2046 | SlotSize, Align(SlotSize))); |
2047 | } |
2048 | SPOrEstablisher = Establisher; |
2049 | } else { |
2050 | SPOrEstablisher = StackPtr; |
2051 | } |
2052 | |
2053 | if (IsWin64Prologue && HasFP) { |
2054 | // Set RBP to a small fixed offset from RSP. In the funclet case, we base |
2055 | // this calculation on the incoming establisher, which holds the value of |
2056 | // RSP from the parent frame at the end of the prologue. |
2057 | SEHFrameOffset = calculateSetFPREG(SPAdjust: ParentFrameNumBytes); |
2058 | if (SEHFrameOffset) |
2059 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr), |
2060 | SPOrEstablisher, false, SEHFrameOffset); |
2061 | else |
2062 | BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr) |
2063 | .addReg(SPOrEstablisher); |
2064 | |
2065 | // If this is not a funclet, emit the CFI describing our frame pointer. |
2066 | if (NeedsWinCFI && !IsFunclet) { |
2067 | assert(!NeedsWinFPO && "this setframe incompatible with FPO data" ); |
2068 | HasWinCFI = true; |
2069 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) |
2070 | .addImm(FramePtr) |
2071 | .addImm(SEHFrameOffset) |
2072 | .setMIFlag(MachineInstr::FrameSetup); |
2073 | if (isAsynchronousEHPersonality(Pers: Personality)) |
2074 | MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset; |
2075 | } |
2076 | } else if (IsFunclet && STI.is32Bit()) { |
2077 | // Reset EBP / ESI to something good for funclets. |
2078 | MBBI = restoreWin32EHStackPointers(MBB, MBBI, DL); |
2079 | // If we're a catch funclet, we can be returned to via catchret. Save ESP |
2080 | // into the registration node so that the runtime will restore it for us. |
2081 | if (!MBB.isCleanupFuncletEntry()) { |
2082 | assert(Personality == EHPersonality::MSVC_CXX); |
2083 | Register FrameReg; |
2084 | int FI = MF.getWinEHFuncInfo()->EHRegNodeFrameIndex; |
2085 | int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed(); |
2086 | // ESP is the first field, so no extra displacement is needed. |
2087 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg, |
2088 | false, EHRegOffset) |
2089 | .addReg(X86::ESP); |
2090 | } |
2091 | } |
2092 | |
2093 | while (MBBI != MBB.end() && MBBI->getFlag(Flag: MachineInstr::FrameSetup)) { |
2094 | const MachineInstr &FrameInstr = *MBBI; |
2095 | ++MBBI; |
2096 | |
2097 | if (NeedsWinCFI) { |
2098 | int FI; |
2099 | if (Register Reg = TII.isStoreToStackSlot(MI: FrameInstr, FrameIndex&: FI)) { |
2100 | if (X86::FR64RegClass.contains(Reg)) { |
2101 | int Offset; |
2102 | Register IgnoredFrameReg; |
2103 | if (IsWin64Prologue && IsFunclet) |
2104 | Offset = getWin64EHFrameIndexRef(MF, FI, SPReg&: IgnoredFrameReg); |
2105 | else |
2106 | Offset = |
2107 | getFrameIndexReference(MF, FI, FrameReg&: IgnoredFrameReg).getFixed() + |
2108 | SEHFrameOffset; |
2109 | |
2110 | HasWinCFI = true; |
2111 | assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data" ); |
2112 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM)) |
2113 | .addImm(Reg) |
2114 | .addImm(Offset) |
2115 | .setMIFlag(MachineInstr::FrameSetup); |
2116 | } |
2117 | } |
2118 | } |
2119 | } |
2120 | |
2121 | if (NeedsWinCFI && HasWinCFI) |
2122 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue)) |
2123 | .setMIFlag(MachineInstr::FrameSetup); |
2124 | |
2125 | if (FnHasClrFunclet && !IsFunclet) { |
2126 | // Save the so-called Initial-SP (i.e. the value of the stack pointer |
2127 | // immediately after the prolog) into the PSPSlot so that funclets |
2128 | // and the GC can recover it. |
2129 | unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF); |
2130 | auto PSPInfo = MachinePointerInfo::getFixedStack( |
2131 | MF, FI: MF.getWinEHFuncInfo()->PSPSymFrameIdx); |
2132 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false, |
2133 | PSPSlotOffset) |
2134 | .addReg(StackPtr) |
2135 | .addMemOperand(MF.getMachineMemOperand( |
2136 | PSPInfo, MachineMemOperand::MOStore | MachineMemOperand::MOVolatile, |
2137 | SlotSize, Align(SlotSize))); |
2138 | } |
2139 | |
2140 | // Realign stack after we spilled callee-saved registers (so that we'll be |
2141 | // able to calculate their offsets from the frame pointer). |
2142 | // Win64 requires aligning the stack after the prologue. |
2143 | if (IsWin64Prologue && TRI->hasStackRealignment(MF)) { |
2144 | assert(HasFP && "There should be a frame pointer if stack is realigned." ); |
2145 | BuildStackAlignAND(MBB, MBBI, DL, Reg: SPOrEstablisher, MaxAlign); |
2146 | } |
2147 | |
2148 | // We already dealt with stack realignment and funclets above. |
2149 | if (IsFunclet && STI.is32Bit()) |
2150 | return; |
2151 | |
2152 | // If we need a base pointer, set it up here. It's whatever the value |
2153 | // of the stack pointer is at this point. Any variable size objects |
2154 | // will be allocated after this, so we can still use the base pointer |
2155 | // to reference locals. |
2156 | if (TRI->hasBasePointer(MF)) { |
2157 | // Update the base pointer with the current stack pointer. |
2158 | unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr; |
2159 | BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) |
2160 | .addReg(SPOrEstablisher) |
2161 | .setMIFlag(MachineInstr::FrameSetup); |
2162 | if (X86FI->getRestoreBasePointer()) { |
2163 | // Stash value of base pointer. Saving RSP instead of EBP shortens |
2164 | // dependence chain. Used by SjLj EH. |
2165 | unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; |
2166 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), FramePtr, true, |
2167 | X86FI->getRestoreBasePointerOffset()) |
2168 | .addReg(SPOrEstablisher) |
2169 | .setMIFlag(MachineInstr::FrameSetup); |
2170 | } |
2171 | |
2172 | if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) { |
2173 | // Stash the value of the frame pointer relative to the base pointer for |
2174 | // Win32 EH. This supports Win32 EH, which does the inverse of the above: |
2175 | // it recovers the frame pointer from the base pointer rather than the |
2176 | // other way around. |
2177 | unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; |
2178 | Register UsedReg; |
2179 | int Offset = |
2180 | getFrameIndexReference(MF, FI: X86FI->getSEHFramePtrSaveIndex(), FrameReg&: UsedReg) |
2181 | .getFixed(); |
2182 | assert(UsedReg == BasePtr); |
2183 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset) |
2184 | .addReg(FramePtr) |
2185 | .setMIFlag(MachineInstr::FrameSetup); |
2186 | } |
2187 | } |
2188 | if (ArgBaseReg.isValid()) { |
2189 | // Save argument base pointer. |
2190 | auto *MI = X86FI->getStackPtrSaveMI(); |
2191 | int FI = MI->getOperand(i: 1).getIndex(); |
2192 | unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr; |
2193 | // movl %basereg, offset(%ebp) |
2194 | addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), FI) |
2195 | .addReg(ArgBaseReg) |
2196 | .setMIFlag(MachineInstr::FrameSetup); |
2197 | } |
2198 | |
2199 | if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) { |
2200 | // Mark end of stack pointer adjustment. |
2201 | if (!HasFP && NumBytes) { |
2202 | // Define the current CFA rule to use the provided offset. |
2203 | assert(StackSize); |
2204 | BuildCFI( |
2205 | MBB, MBBI, DL, |
2206 | CFIInst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: StackSize - stackGrowth), |
2207 | Flag: MachineInstr::FrameSetup); |
2208 | } |
2209 | |
2210 | // Emit DWARF info specifying the offsets of the callee-saved registers. |
2211 | emitCalleeSavedFrameMoves(MBB, MBBI, DL, IsPrologue: true); |
2212 | } |
2213 | |
2214 | // X86 Interrupt handling function cannot assume anything about the direction |
2215 | // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction |
2216 | // in each prologue of interrupt handler function. |
2217 | // |
2218 | // Create "cld" instruction only in these cases: |
2219 | // 1. The interrupt handling function uses any of the "rep" instructions. |
2220 | // 2. Interrupt handling function calls another function. |
2221 | // 3. If there are any inline asm blocks, as we do not know what they do |
2222 | // |
2223 | // TODO: We should also emit cld if we detect the use of std, but as of now, |
2224 | // the compiler does not even emit that instruction or even define it, so in |
2225 | // practice, this would only happen with inline asm, which we cover anyway. |
2226 | if (Fn.getCallingConv() == CallingConv::X86_INTR) { |
2227 | bool NeedsCLD = false; |
2228 | |
2229 | for (const MachineBasicBlock &B : MF) { |
2230 | for (const MachineInstr &MI : B) { |
2231 | if (MI.isCall()) { |
2232 | NeedsCLD = true; |
2233 | break; |
2234 | } |
2235 | |
2236 | if (isOpcodeRep(Opcode: MI.getOpcode())) { |
2237 | NeedsCLD = true; |
2238 | break; |
2239 | } |
2240 | |
2241 | if (MI.isInlineAsm()) { |
2242 | // TODO: Parse asm for rep instructions or call sites? |
2243 | // For now, let's play it safe and emit a cld instruction |
2244 | // just in case. |
2245 | NeedsCLD = true; |
2246 | break; |
2247 | } |
2248 | } |
2249 | } |
2250 | |
2251 | if (NeedsCLD) { |
2252 | BuildMI(MBB, MBBI, DL, TII.get(X86::CLD)) |
2253 | .setMIFlag(MachineInstr::FrameSetup); |
2254 | } |
2255 | } |
2256 | |
2257 | // At this point we know if the function has WinCFI or not. |
2258 | MF.setHasWinCFI(HasWinCFI); |
2259 | } |
2260 | |
2261 | bool X86FrameLowering::canUseLEAForSPInEpilogue( |
2262 | const MachineFunction &MF) const { |
2263 | // We can't use LEA instructions for adjusting the stack pointer if we don't |
2264 | // have a frame pointer in the Win64 ABI. Only ADD instructions may be used |
2265 | // to deallocate the stack. |
2266 | // This means that we can use LEA for SP in two situations: |
2267 | // 1. We *aren't* using the Win64 ABI which means we are free to use LEA. |
2268 | // 2. We *have* a frame pointer which means we are permitted to use LEA. |
2269 | return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF); |
2270 | } |
2271 | |
2272 | static bool isFuncletReturnInstr(MachineInstr &MI) { |
2273 | switch (MI.getOpcode()) { |
2274 | case X86::CATCHRET: |
2275 | case X86::CLEANUPRET: |
2276 | return true; |
2277 | default: |
2278 | return false; |
2279 | } |
2280 | llvm_unreachable("impossible" ); |
2281 | } |
2282 | |
2283 | // CLR funclets use a special "Previous Stack Pointer Symbol" slot on the |
2284 | // stack. It holds a pointer to the bottom of the root function frame. The |
2285 | // establisher frame pointer passed to a nested funclet may point to the |
2286 | // (mostly empty) frame of its parent funclet, but it will need to find |
2287 | // the frame of the root function to access locals. To facilitate this, |
2288 | // every funclet copies the pointer to the bottom of the root function |
2289 | // frame into a PSPSym slot in its own (mostly empty) stack frame. Using the |
2290 | // same offset for the PSPSym in the root function frame that's used in the |
2291 | // funclets' frames allows each funclet to dynamically accept any ancestor |
2292 | // frame as its establisher argument (the runtime doesn't guarantee the |
2293 | // immediate parent for some reason lost to history), and also allows the GC, |
2294 | // which uses the PSPSym for some bookkeeping, to find it in any funclet's |
2295 | // frame with only a single offset reported for the entire method. |
2296 | unsigned |
2297 | X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const { |
2298 | const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo(); |
2299 | Register SPReg; |
2300 | int Offset = getFrameIndexReferencePreferSP(MF, FI: Info.PSPSymFrameIdx, FrameReg&: SPReg, |
2301 | /*IgnoreSPUpdates*/ true) |
2302 | .getFixed(); |
2303 | assert(Offset >= 0 && SPReg == TRI->getStackRegister()); |
2304 | return static_cast<unsigned>(Offset); |
2305 | } |
2306 | |
2307 | unsigned |
2308 | X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const { |
2309 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
2310 | // This is the size of the pushed CSRs. |
2311 | unsigned CSSize = X86FI->getCalleeSavedFrameSize(); |
2312 | // This is the size of callee saved XMMs. |
2313 | const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); |
2314 | unsigned XMMSize = |
2315 | WinEHXMMSlotInfo.size() * TRI->getSpillSize(X86::VR128RegClass); |
2316 | // This is the amount of stack a funclet needs to allocate. |
2317 | unsigned UsedSize; |
2318 | EHPersonality Personality = |
2319 | classifyEHPersonality(Pers: MF.getFunction().getPersonalityFn()); |
2320 | if (Personality == EHPersonality::CoreCLR) { |
2321 | // CLR funclets need to hold enough space to include the PSPSym, at the |
2322 | // same offset from the stack pointer (immediately after the prolog) as it |
2323 | // resides at in the main function. |
2324 | UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize; |
2325 | } else { |
2326 | // Other funclets just need enough stack for outgoing call arguments. |
2327 | UsedSize = MF.getFrameInfo().getMaxCallFrameSize(); |
2328 | } |
2329 | // RBP is not included in the callee saved register block. After pushing RBP, |
2330 | // everything is 16 byte aligned. Everything we allocate before an outgoing |
2331 | // call must also be 16 byte aligned. |
2332 | unsigned FrameSizeMinusRBP = alignTo(Size: CSSize + UsedSize, A: getStackAlign()); |
2333 | // Subtract out the size of the callee saved registers. This is how much stack |
2334 | // each funclet will allocate. |
2335 | return FrameSizeMinusRBP + XMMSize - CSSize; |
2336 | } |
2337 | |
2338 | static bool isTailCallOpcode(unsigned Opc) { |
2339 | return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi || |
2340 | Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 || |
2341 | Opc == X86::TCRETURNdi64 || Opc == X86::TCRETURNmi64; |
2342 | } |
2343 | |
2344 | void X86FrameLowering::emitEpilogue(MachineFunction &MF, |
2345 | MachineBasicBlock &MBB) const { |
2346 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
2347 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
2348 | MachineBasicBlock::iterator Terminator = MBB.getFirstTerminator(); |
2349 | MachineBasicBlock::iterator MBBI = Terminator; |
2350 | DebugLoc DL; |
2351 | if (MBBI != MBB.end()) |
2352 | DL = MBBI->getDebugLoc(); |
2353 | // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. |
2354 | const bool Is64BitILP32 = STI.isTarget64BitILP32(); |
2355 | Register FramePtr = TRI->getFrameRegister(MF); |
2356 | Register MachineFramePtr = |
2357 | Is64BitILP32 ? Register(getX86SubSuperRegister(Reg: FramePtr, Size: 64)) : FramePtr; |
2358 | |
2359 | bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); |
2360 | bool NeedsWin64CFI = |
2361 | IsWin64Prologue && MF.getFunction().needsUnwindTableEntry(); |
2362 | bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(MI&: *MBBI); |
2363 | |
2364 | // Get the number of bytes to allocate from the FrameInfo. |
2365 | uint64_t StackSize = MFI.getStackSize(); |
2366 | uint64_t MaxAlign = calculateMaxStackAlign(MF); |
2367 | unsigned CSSize = X86FI->getCalleeSavedFrameSize(); |
2368 | unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta(); |
2369 | bool HasFP = hasFP(MF); |
2370 | uint64_t NumBytes = 0; |
2371 | |
2372 | bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() && |
2373 | !MF.getTarget().getTargetTriple().isOSWindows()) && |
2374 | MF.needsFrameMoves(); |
2375 | |
2376 | Register ArgBaseReg; |
2377 | if (auto *MI = X86FI->getStackPtrSaveMI()) { |
2378 | unsigned Opc = X86::LEA32r; |
2379 | Register StackReg = X86::ESP; |
2380 | ArgBaseReg = MI->getOperand(i: 0).getReg(); |
2381 | if (STI.is64Bit()) { |
2382 | Opc = X86::LEA64r; |
2383 | StackReg = X86::RSP; |
2384 | } |
2385 | // leal -4(%basereg), %esp |
2386 | // .cfi_def_cfa %esp, 4 |
2387 | BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg) |
2388 | .addUse(ArgBaseReg) |
2389 | .addImm(1) |
2390 | .addUse(X86::NoRegister) |
2391 | .addImm(-(int64_t)SlotSize) |
2392 | .addUse(X86::NoRegister) |
2393 | .setMIFlag(MachineInstr::FrameDestroy); |
2394 | if (NeedsDwarfCFI) { |
2395 | unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true); |
2396 | BuildCFI(MBB, MBBI, DL, |
2397 | CFIInst: MCCFIInstruction::cfiDefCfa(L: nullptr, Register: DwarfStackPtr, Offset: SlotSize), |
2398 | Flag: MachineInstr::FrameDestroy); |
2399 | --MBBI; |
2400 | } |
2401 | --MBBI; |
2402 | } |
2403 | |
2404 | if (IsFunclet) { |
2405 | assert(HasFP && "EH funclets without FP not yet implemented" ); |
2406 | NumBytes = getWinEHFuncletFrameSize(MF); |
2407 | } else if (HasFP) { |
2408 | // Calculate required stack adjustment. |
2409 | uint64_t FrameSize = StackSize - SlotSize; |
2410 | NumBytes = FrameSize - CSSize - TailCallArgReserveSize; |
2411 | |
2412 | // Callee-saved registers were pushed on stack before the stack was |
2413 | // realigned. |
2414 | if (TRI->hasStackRealignment(MF) && !IsWin64Prologue) |
2415 | NumBytes = alignTo(Value: FrameSize, Align: MaxAlign); |
2416 | } else { |
2417 | NumBytes = StackSize - CSSize - TailCallArgReserveSize; |
2418 | } |
2419 | uint64_t SEHStackAllocAmt = NumBytes; |
2420 | |
2421 | // AfterPop is the position to insert .cfi_restore. |
2422 | MachineBasicBlock::iterator AfterPop = MBBI; |
2423 | if (HasFP) { |
2424 | if (X86FI->hasSwiftAsyncContext()) { |
2425 | // Discard the context. |
2426 | int Offset = 16 + mergeSPUpdates(MBB, MBBI, doMergeWithPrevious: true); |
2427 | emitSPUpdate(MBB, MBBI, DL, NumBytes: Offset, /*InEpilogue*/ true); |
2428 | } |
2429 | // Pop EBP. |
2430 | BuildMI(MBB, MBBI, DL, |
2431 | TII.get(getPOPOpcode(ST: MF.getSubtarget<X86Subtarget>())), |
2432 | MachineFramePtr) |
2433 | .setMIFlag(MachineInstr::FrameDestroy); |
2434 | |
2435 | // We need to reset FP to its untagged state on return. Bit 60 is currently |
2436 | // used to show the presence of an extended frame. |
2437 | if (X86FI->hasSwiftAsyncContext()) { |
2438 | BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8), MachineFramePtr) |
2439 | .addUse(MachineFramePtr) |
2440 | .addImm(60) |
2441 | .setMIFlag(MachineInstr::FrameDestroy); |
2442 | } |
2443 | |
2444 | if (NeedsDwarfCFI) { |
2445 | if (!ArgBaseReg.isValid()) { |
2446 | unsigned DwarfStackPtr = |
2447 | TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true); |
2448 | BuildCFI(MBB, MBBI, DL, |
2449 | CFIInst: MCCFIInstruction::cfiDefCfa(L: nullptr, Register: DwarfStackPtr, Offset: SlotSize), |
2450 | Flag: MachineInstr::FrameDestroy); |
2451 | } |
2452 | if (!MBB.succ_empty() && !MBB.isReturnBlock()) { |
2453 | unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); |
2454 | BuildCFI(MBB, MBBI: AfterPop, DL, |
2455 | CFIInst: MCCFIInstruction::createRestore(L: nullptr, Register: DwarfFramePtr), |
2456 | Flag: MachineInstr::FrameDestroy); |
2457 | --MBBI; |
2458 | --AfterPop; |
2459 | } |
2460 | --MBBI; |
2461 | } |
2462 | } |
2463 | |
2464 | MachineBasicBlock::iterator FirstCSPop = MBBI; |
2465 | // Skip the callee-saved pop instructions. |
2466 | while (MBBI != MBB.begin()) { |
2467 | MachineBasicBlock::iterator PI = std::prev(x: MBBI); |
2468 | unsigned Opc = PI->getOpcode(); |
2469 | |
2470 | if (Opc != X86::DBG_VALUE && !PI->isTerminator()) { |
2471 | if (!PI->getFlag(MachineInstr::FrameDestroy) || |
2472 | (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::BTR64ri8 && |
2473 | Opc != X86::ADD64ri32 && Opc != X86::POPP64r && Opc != X86::POP2 && |
2474 | Opc != X86::POP2P && Opc != X86::LEA64r)) |
2475 | break; |
2476 | FirstCSPop = PI; |
2477 | } |
2478 | |
2479 | --MBBI; |
2480 | } |
2481 | if (ArgBaseReg.isValid()) { |
2482 | // Restore argument base pointer. |
2483 | auto *MI = X86FI->getStackPtrSaveMI(); |
2484 | int FI = MI->getOperand(i: 1).getIndex(); |
2485 | unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm; |
2486 | // movl offset(%ebp), %basereg |
2487 | addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVrm), ArgBaseReg), FI) |
2488 | .setMIFlag(MachineInstr::FrameDestroy); |
2489 | } |
2490 | MBBI = FirstCSPop; |
2491 | |
2492 | if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET) |
2493 | emitCatchRetReturnValue(MBB, MBBI: FirstCSPop, CatchRet: &*Terminator); |
2494 | |
2495 | if (MBBI != MBB.end()) |
2496 | DL = MBBI->getDebugLoc(); |
2497 | // If there is an ADD32ri or SUB32ri of ESP immediately before this |
2498 | // instruction, merge the two instructions. |
2499 | if (NumBytes || MFI.hasVarSizedObjects()) |
2500 | NumBytes += mergeSPUpdates(MBB, MBBI, doMergeWithPrevious: true); |
2501 | |
2502 | // If dynamic alloca is used, then reset esp to point to the last callee-saved |
2503 | // slot before popping them off! Same applies for the case, when stack was |
2504 | // realigned. Don't do this if this was a funclet epilogue, since the funclets |
2505 | // will not do realignment or dynamic stack allocation. |
2506 | if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) && |
2507 | !IsFunclet) { |
2508 | if (TRI->hasStackRealignment(MF)) |
2509 | MBBI = FirstCSPop; |
2510 | unsigned SEHFrameOffset = calculateSetFPREG(SPAdjust: SEHStackAllocAmt); |
2511 | uint64_t LEAAmount = |
2512 | IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize; |
2513 | |
2514 | if (X86FI->hasSwiftAsyncContext()) |
2515 | LEAAmount -= 16; |
2516 | |
2517 | // There are only two legal forms of epilogue: |
2518 | // - add SEHAllocationSize, %rsp |
2519 | // - lea SEHAllocationSize(%FramePtr), %rsp |
2520 | // |
2521 | // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence. |
2522 | // However, we may use this sequence if we have a frame pointer because the |
2523 | // effects of the prologue can safely be undone. |
2524 | if (LEAAmount != 0) { |
2525 | unsigned Opc = getLEArOpcode(IsLP64: Uses64BitFramePtr); |
2526 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), FramePtr, |
2527 | false, LEAAmount); |
2528 | --MBBI; |
2529 | } else { |
2530 | unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr); |
2531 | BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr).addReg(FramePtr); |
2532 | --MBBI; |
2533 | } |
2534 | } else if (NumBytes) { |
2535 | // Adjust stack pointer back: ESP += numbytes. |
2536 | emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true); |
2537 | if (!HasFP && NeedsDwarfCFI) { |
2538 | // Define the current CFA rule to use the provided offset. |
2539 | BuildCFI(MBB, MBBI, DL, |
2540 | CFIInst: MCCFIInstruction::cfiDefCfaOffset( |
2541 | L: nullptr, Offset: CSSize + TailCallArgReserveSize + SlotSize), |
2542 | Flag: MachineInstr::FrameDestroy); |
2543 | } |
2544 | --MBBI; |
2545 | } |
2546 | |
2547 | // Windows unwinder will not invoke function's exception handler if IP is |
2548 | // either in prologue or in epilogue. This behavior causes a problem when a |
2549 | // call immediately precedes an epilogue, because the return address points |
2550 | // into the epilogue. To cope with that, we insert an epilogue marker here, |
2551 | // then replace it with a 'nop' if it ends up immediately after a CALL in the |
2552 | // final emitted code. |
2553 | if (NeedsWin64CFI && MF.hasWinCFI()) |
2554 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue)); |
2555 | |
2556 | if (!HasFP && NeedsDwarfCFI) { |
2557 | MBBI = FirstCSPop; |
2558 | int64_t Offset = -CSSize - SlotSize; |
2559 | // Mark callee-saved pop instruction. |
2560 | // Define the current CFA rule to use the provided offset. |
2561 | while (MBBI != MBB.end()) { |
2562 | MachineBasicBlock::iterator PI = MBBI; |
2563 | unsigned Opc = PI->getOpcode(); |
2564 | ++MBBI; |
2565 | if (Opc == X86::POP32r || Opc == X86::POP64r || Opc == X86::POPP64r || |
2566 | Opc == X86::POP2 || Opc == X86::POP2P) { |
2567 | Offset += SlotSize; |
2568 | // Compared to pop, pop2 introduces more stack offset (one more |
2569 | // register). |
2570 | if (Opc == X86::POP2 || Opc == X86::POP2P) |
2571 | Offset += SlotSize; |
2572 | BuildCFI(MBB, MBBI, DL, |
2573 | CFIInst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: -Offset), |
2574 | Flag: MachineInstr::FrameDestroy); |
2575 | } |
2576 | } |
2577 | } |
2578 | |
2579 | // Emit DWARF info specifying the restores of the callee-saved registers. |
2580 | // For epilogue with return inside or being other block without successor, |
2581 | // no need to generate .cfi_restore for callee-saved registers. |
2582 | if (NeedsDwarfCFI && !MBB.succ_empty()) |
2583 | emitCalleeSavedFrameMoves(MBB, MBBI: AfterPop, DL, IsPrologue: false); |
2584 | |
2585 | if (Terminator == MBB.end() || !isTailCallOpcode(Opc: Terminator->getOpcode())) { |
2586 | // Add the return addr area delta back since we are not tail calling. |
2587 | int Offset = -1 * X86FI->getTCReturnAddrDelta(); |
2588 | assert(Offset >= 0 && "TCDelta should never be positive" ); |
2589 | if (Offset) { |
2590 | // Check for possible merge with preceding ADD instruction. |
2591 | Offset += mergeSPUpdates(MBB, MBBI&: Terminator, doMergeWithPrevious: true); |
2592 | emitSPUpdate(MBB, MBBI&: Terminator, DL, NumBytes: Offset, /*InEpilogue=*/true); |
2593 | } |
2594 | } |
2595 | |
2596 | // Emit tilerelease for AMX kernel. |
2597 | if (X86FI->hasVirtualTileReg()) |
2598 | BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE)); |
2599 | } |
2600 | |
2601 | StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, |
2602 | int FI, |
2603 | Register &FrameReg) const { |
2604 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
2605 | |
2606 | bool IsFixed = MFI.isFixedObjectIndex(ObjectIdx: FI); |
2607 | // We can't calculate offset from frame pointer if the stack is realigned, |
2608 | // so enforce usage of stack/base pointer. The base pointer is used when we |
2609 | // have dynamic allocas in addition to dynamic realignment. |
2610 | if (TRI->hasBasePointer(MF)) |
2611 | FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister(); |
2612 | else if (TRI->hasStackRealignment(MF)) |
2613 | FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister(); |
2614 | else |
2615 | FrameReg = TRI->getFrameRegister(MF); |
2616 | |
2617 | // Offset will hold the offset from the stack pointer at function entry to the |
2618 | // object. |
2619 | // We need to factor in additional offsets applied during the prologue to the |
2620 | // frame, base, and stack pointer depending on which is used. |
2621 | int Offset = MFI.getObjectOffset(ObjectIdx: FI) - getOffsetOfLocalArea(); |
2622 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
2623 | unsigned CSSize = X86FI->getCalleeSavedFrameSize(); |
2624 | uint64_t StackSize = MFI.getStackSize(); |
2625 | bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); |
2626 | int64_t FPDelta = 0; |
2627 | |
2628 | // In an x86 interrupt, remove the offset we added to account for the return |
2629 | // address from any stack object allocated in the caller's frame. Interrupts |
2630 | // do not have a standard return address. Fixed objects in the current frame, |
2631 | // such as SSE register spills, should not get this treatment. |
2632 | if (MF.getFunction().getCallingConv() == CallingConv::X86_INTR && |
2633 | Offset >= 0) { |
2634 | Offset += getOffsetOfLocalArea(); |
2635 | } |
2636 | |
2637 | if (IsWin64Prologue) { |
2638 | assert(!MFI.hasCalls() || (StackSize % 16) == 8); |
2639 | |
2640 | // Calculate required stack adjustment. |
2641 | uint64_t FrameSize = StackSize - SlotSize; |
2642 | // If required, include space for extra hidden slot for stashing base |
2643 | // pointer. |
2644 | if (X86FI->getRestoreBasePointer()) |
2645 | FrameSize += SlotSize; |
2646 | uint64_t NumBytes = FrameSize - CSSize; |
2647 | |
2648 | uint64_t SEHFrameOffset = calculateSetFPREG(SPAdjust: NumBytes); |
2649 | if (FI && FI == X86FI->getFAIndex()) |
2650 | return StackOffset::getFixed(Fixed: -SEHFrameOffset); |
2651 | |
2652 | // FPDelta is the offset from the "traditional" FP location of the old base |
2653 | // pointer followed by return address and the location required by the |
2654 | // restricted Win64 prologue. |
2655 | // Add FPDelta to all offsets below that go through the frame pointer. |
2656 | FPDelta = FrameSize - SEHFrameOffset; |
2657 | assert((!MFI.hasCalls() || (FPDelta % 16) == 0) && |
2658 | "FPDelta isn't aligned per the Win64 ABI!" ); |
2659 | } |
2660 | |
2661 | if (FrameReg == TRI->getFramePtr()) { |
2662 | // Skip saved EBP/RBP |
2663 | Offset += SlotSize; |
2664 | |
2665 | // Account for restricted Windows prologue. |
2666 | Offset += FPDelta; |
2667 | |
2668 | // Skip the RETADDR move area |
2669 | int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); |
2670 | if (TailCallReturnAddrDelta < 0) |
2671 | Offset -= TailCallReturnAddrDelta; |
2672 | |
2673 | return StackOffset::getFixed(Fixed: Offset); |
2674 | } |
2675 | |
2676 | // FrameReg is either the stack pointer or a base pointer. But the base is |
2677 | // located at the end of the statically known StackSize so the distinction |
2678 | // doesn't really matter. |
2679 | if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF)) |
2680 | assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize))); |
2681 | return StackOffset::getFixed(Fixed: Offset + StackSize); |
2682 | } |
2683 | |
2684 | int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF, int FI, |
2685 | Register &FrameReg) const { |
2686 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
2687 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
2688 | const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); |
2689 | const auto it = WinEHXMMSlotInfo.find(Val: FI); |
2690 | |
2691 | if (it == WinEHXMMSlotInfo.end()) |
2692 | return getFrameIndexReference(MF, FI, FrameReg).getFixed(); |
2693 | |
2694 | FrameReg = TRI->getStackRegister(); |
2695 | return alignDown(Value: MFI.getMaxCallFrameSize(), Align: getStackAlign().value()) + |
2696 | it->second; |
2697 | } |
2698 | |
2699 | StackOffset |
2700 | X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF, int FI, |
2701 | Register &FrameReg, |
2702 | int Adjustment) const { |
2703 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
2704 | FrameReg = TRI->getStackRegister(); |
2705 | return StackOffset::getFixed(Fixed: MFI.getObjectOffset(ObjectIdx: FI) - |
2706 | getOffsetOfLocalArea() + Adjustment); |
2707 | } |
2708 | |
2709 | StackOffset |
2710 | X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF, |
2711 | int FI, Register &FrameReg, |
2712 | bool IgnoreSPUpdates) const { |
2713 | |
2714 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
2715 | // Does not include any dynamic realign. |
2716 | const uint64_t StackSize = MFI.getStackSize(); |
2717 | // LLVM arranges the stack as follows: |
2718 | // ... |
2719 | // ARG2 |
2720 | // ARG1 |
2721 | // RETADDR |
2722 | // PUSH RBP <-- RBP points here |
2723 | // PUSH CSRs |
2724 | // ~~~~~~~ <-- possible stack realignment (non-win64) |
2725 | // ... |
2726 | // STACK OBJECTS |
2727 | // ... <-- RSP after prologue points here |
2728 | // ~~~~~~~ <-- possible stack realignment (win64) |
2729 | // |
2730 | // if (hasVarSizedObjects()): |
2731 | // ... <-- "base pointer" (ESI/RBX) points here |
2732 | // DYNAMIC ALLOCAS |
2733 | // ... <-- RSP points here |
2734 | // |
2735 | // Case 1: In the simple case of no stack realignment and no dynamic |
2736 | // allocas, both "fixed" stack objects (arguments and CSRs) are addressable |
2737 | // with fixed offsets from RSP. |
2738 | // |
2739 | // Case 2: In the case of stack realignment with no dynamic allocas, fixed |
2740 | // stack objects are addressed with RBP and regular stack objects with RSP. |
2741 | // |
2742 | // Case 3: In the case of dynamic allocas and stack realignment, RSP is used |
2743 | // to address stack arguments for outgoing calls and nothing else. The "base |
2744 | // pointer" points to local variables, and RBP points to fixed objects. |
2745 | // |
2746 | // In cases 2 and 3, we can only answer for non-fixed stack objects, and the |
2747 | // answer we give is relative to the SP after the prologue, and not the |
2748 | // SP in the middle of the function. |
2749 | |
2750 | if (MFI.isFixedObjectIndex(ObjectIdx: FI) && TRI->hasStackRealignment(MF) && |
2751 | !STI.isTargetWin64()) |
2752 | return getFrameIndexReference(MF, FI, FrameReg); |
2753 | |
2754 | // If !hasReservedCallFrame the function might have SP adjustement in the |
2755 | // body. So, even though the offset is statically known, it depends on where |
2756 | // we are in the function. |
2757 | if (!IgnoreSPUpdates && !hasReservedCallFrame(MF)) |
2758 | return getFrameIndexReference(MF, FI, FrameReg); |
2759 | |
2760 | // We don't handle tail calls, and shouldn't be seeing them either. |
2761 | assert(MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta() >= 0 && |
2762 | "we don't handle this case!" ); |
2763 | |
2764 | // This is how the math works out: |
2765 | // |
2766 | // %rsp grows (i.e. gets lower) left to right. Each box below is |
2767 | // one word (eight bytes). Obj0 is the stack slot we're trying to |
2768 | // get to. |
2769 | // |
2770 | // ---------------------------------- |
2771 | // | BP | Obj0 | Obj1 | ... | ObjN | |
2772 | // ---------------------------------- |
2773 | // ^ ^ ^ ^ |
2774 | // A B C E |
2775 | // |
2776 | // A is the incoming stack pointer. |
2777 | // (B - A) is the local area offset (-8 for x86-64) [1] |
2778 | // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2] |
2779 | // |
2780 | // |(E - B)| is the StackSize (absolute value, positive). For a |
2781 | // stack that grown down, this works out to be (B - E). [3] |
2782 | // |
2783 | // E is also the value of %rsp after stack has been set up, and we |
2784 | // want (C - E) -- the value we can add to %rsp to get to Obj0. Now |
2785 | // (C - E) == (C - A) - (B - A) + (B - E) |
2786 | // { Using [1], [2] and [3] above } |
2787 | // == getObjectOffset - LocalAreaOffset + StackSize |
2788 | |
2789 | return getFrameIndexReferenceSP(MF, FI, FrameReg, Adjustment: StackSize); |
2790 | } |
2791 | |
2792 | bool X86FrameLowering::assignCalleeSavedSpillSlots( |
2793 | MachineFunction &MF, const TargetRegisterInfo *TRI, |
2794 | std::vector<CalleeSavedInfo> &CSI) const { |
2795 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
2796 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
2797 | |
2798 | unsigned CalleeSavedFrameSize = 0; |
2799 | unsigned XMMCalleeSavedFrameSize = 0; |
2800 | auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); |
2801 | int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta(); |
2802 | |
2803 | int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); |
2804 | |
2805 | if (TailCallReturnAddrDelta < 0) { |
2806 | // create RETURNADDR area |
2807 | // arg |
2808 | // arg |
2809 | // RETADDR |
2810 | // { ... |
2811 | // RETADDR area |
2812 | // ... |
2813 | // } |
2814 | // [EBP] |
2815 | MFI.CreateFixedObject(Size: -TailCallReturnAddrDelta, |
2816 | SPOffset: TailCallReturnAddrDelta - SlotSize, IsImmutable: true); |
2817 | } |
2818 | |
2819 | // Spill the BasePtr if it's used. |
2820 | if (this->TRI->hasBasePointer(MF)) { |
2821 | // Allocate a spill slot for EBP if we have a base pointer and EH funclets. |
2822 | if (MF.hasEHFunclets()) { |
2823 | int FI = MFI.CreateSpillStackObject(Size: SlotSize, Alignment: Align(SlotSize)); |
2824 | X86FI->setHasSEHFramePtrSave(true); |
2825 | X86FI->setSEHFramePtrSaveIndex(FI); |
2826 | } |
2827 | } |
2828 | |
2829 | if (hasFP(MF)) { |
2830 | // emitPrologue always spills frame register the first thing. |
2831 | SpillSlotOffset -= SlotSize; |
2832 | MFI.CreateFixedSpillStackObject(Size: SlotSize, SPOffset: SpillSlotOffset); |
2833 | |
2834 | // The async context lives directly before the frame pointer, and we |
2835 | // allocate a second slot to preserve stack alignment. |
2836 | if (X86FI->hasSwiftAsyncContext()) { |
2837 | SpillSlotOffset -= SlotSize; |
2838 | MFI.CreateFixedSpillStackObject(Size: SlotSize, SPOffset: SpillSlotOffset); |
2839 | SpillSlotOffset -= SlotSize; |
2840 | } |
2841 | |
2842 | // Since emitPrologue and emitEpilogue will handle spilling and restoring of |
2843 | // the frame register, we can delete it from CSI list and not have to worry |
2844 | // about avoiding it later. |
2845 | Register FPReg = TRI->getFrameRegister(MF); |
2846 | for (unsigned i = 0; i < CSI.size(); ++i) { |
2847 | if (TRI->regsOverlap(RegA: CSI[i].getReg(), RegB: FPReg)) { |
2848 | CSI.erase(position: CSI.begin() + i); |
2849 | break; |
2850 | } |
2851 | } |
2852 | } |
2853 | |
2854 | // Strategy: |
2855 | // 1. Use push2 when |
2856 | // a) number of CSR > 1 if no need padding |
2857 | // b) number of CSR > 2 if need padding |
2858 | // 2. When the number of CSR push is odd |
2859 | // a. Start to use push2 from the 1st push if stack is 16B aligned. |
2860 | // b. Start to use push2 from the 2nd push if stack is not 16B aligned. |
2861 | // 3. When the number of CSR push is even, start to use push2 from the 1st |
2862 | // push and make the stack 16B aligned before the push |
2863 | unsigned NumRegsForPush2 = 0; |
2864 | if (STI.hasPush2Pop2()) { |
2865 | unsigned NumCSGPR = llvm::count_if(Range&: CSI, P: [](const CalleeSavedInfo &I) { |
2866 | return X86::GR64RegClass.contains(I.getReg()); |
2867 | }); |
2868 | bool NeedPadding = (SpillSlotOffset % 16 != 0) && (NumCSGPR % 2 == 0); |
2869 | bool UsePush2Pop2 = NeedPadding ? NumCSGPR > 2 : NumCSGPR > 1; |
2870 | X86FI->setPadForPush2Pop2(NeedPadding && UsePush2Pop2); |
2871 | NumRegsForPush2 = UsePush2Pop2 ? alignDown(Value: NumCSGPR, Align: 2) : 0; |
2872 | if (X86FI->padForPush2Pop2()) { |
2873 | SpillSlotOffset -= SlotSize; |
2874 | MFI.CreateFixedSpillStackObject(Size: SlotSize, SPOffset: SpillSlotOffset); |
2875 | } |
2876 | } |
2877 | |
2878 | // Assign slots for GPRs. It increases frame size. |
2879 | for (CalleeSavedInfo &I : llvm::reverse(C&: CSI)) { |
2880 | Register Reg = I.getReg(); |
2881 | |
2882 | if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) |
2883 | continue; |
2884 | |
2885 | // A CSR is a candidate for push2/pop2 when it's slot offset is 16B aligned |
2886 | // or only an odd number of registers in the candidates. |
2887 | if (X86FI->getNumCandidatesForPush2Pop2() < NumRegsForPush2 && |
2888 | (SpillSlotOffset % 16 == 0 || |
2889 | X86FI->getNumCandidatesForPush2Pop2() % 2)) |
2890 | X86FI->addCandidateForPush2Pop2(Reg); |
2891 | |
2892 | SpillSlotOffset -= SlotSize; |
2893 | CalleeSavedFrameSize += SlotSize; |
2894 | |
2895 | int SlotIndex = MFI.CreateFixedSpillStackObject(Size: SlotSize, SPOffset: SpillSlotOffset); |
2896 | I.setFrameIdx(SlotIndex); |
2897 | } |
2898 | |
2899 | // Adjust the offset of spill slot as we know the accurate callee saved frame |
2900 | // size. |
2901 | if (X86FI->getRestoreBasePointer()) { |
2902 | SpillSlotOffset -= SlotSize; |
2903 | CalleeSavedFrameSize += SlotSize; |
2904 | |
2905 | MFI.CreateFixedSpillStackObject(Size: SlotSize, SPOffset: SpillSlotOffset); |
2906 | // TODO: saving the slot index is better? |
2907 | X86FI->setRestoreBasePointer(CalleeSavedFrameSize); |
2908 | } |
2909 | assert(X86FI->getNumCandidatesForPush2Pop2() % 2 == 0 && |
2910 | "Expect even candidates for push2/pop2" ); |
2911 | if (X86FI->getNumCandidatesForPush2Pop2()) |
2912 | ++NumFunctionUsingPush2Pop2; |
2913 | X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize); |
2914 | MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize); |
2915 | |
2916 | // Assign slots for XMMs. |
2917 | for (CalleeSavedInfo &I : llvm::reverse(C&: CSI)) { |
2918 | Register Reg = I.getReg(); |
2919 | if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) |
2920 | continue; |
2921 | |
2922 | // If this is k-register make sure we lookup via the largest legal type. |
2923 | MVT VT = MVT::Other; |
2924 | if (X86::VK16RegClass.contains(Reg)) |
2925 | VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; |
2926 | |
2927 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); |
2928 | unsigned Size = TRI->getSpillSize(RC: *RC); |
2929 | Align Alignment = TRI->getSpillAlign(RC: *RC); |
2930 | // ensure alignment |
2931 | assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86" ); |
2932 | SpillSlotOffset = -alignTo(Size: -SpillSlotOffset, A: Alignment); |
2933 | |
2934 | // spill into slot |
2935 | SpillSlotOffset -= Size; |
2936 | int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SPOffset: SpillSlotOffset); |
2937 | I.setFrameIdx(SlotIndex); |
2938 | MFI.ensureMaxAlignment(Alignment); |
2939 | |
2940 | // Save the start offset and size of XMM in stack frame for funclets. |
2941 | if (X86::VR128RegClass.contains(Reg)) { |
2942 | WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize; |
2943 | XMMCalleeSavedFrameSize += Size; |
2944 | } |
2945 | } |
2946 | |
2947 | return true; |
2948 | } |
2949 | |
2950 | bool X86FrameLowering::spillCalleeSavedRegisters( |
2951 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
2952 | ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { |
2953 | DebugLoc DL = MBB.findDebugLoc(MBBI: MI); |
2954 | |
2955 | // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI |
2956 | // for us, and there are no XMM CSRs on Win32. |
2957 | if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows()) |
2958 | return true; |
2959 | |
2960 | // Push GPRs. It increases frame size. |
2961 | const MachineFunction &MF = *MBB.getParent(); |
2962 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
2963 | if (X86FI->padForPush2Pop2()) |
2964 | emitSPUpdate(MBB, MBBI&: MI, DL, NumBytes: -(int64_t)SlotSize, /*InEpilogue=*/false); |
2965 | |
2966 | // Update LiveIn of the basic block and decide whether we can add a kill flag |
2967 | // to the use. |
2968 | auto UpdateLiveInCheckCanKill = [&](Register Reg) { |
2969 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
2970 | // Do not set a kill flag on values that are also marked as live-in. This |
2971 | // happens with the @llvm-returnaddress intrinsic and with arguments |
2972 | // passed in callee saved registers. |
2973 | // Omitting the kill flags is conservatively correct even if the live-in |
2974 | // is not used after all. |
2975 | if (MRI.isLiveIn(Reg)) |
2976 | return false; |
2977 | MBB.addLiveIn(PhysReg: Reg); |
2978 | // Check if any subregister is live-in |
2979 | for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg) |
2980 | if (MRI.isLiveIn(Reg: *AReg)) |
2981 | return false; |
2982 | return true; |
2983 | }; |
2984 | auto UpdateLiveInGetKillRegState = [&](Register Reg) { |
2985 | return getKillRegState(B: UpdateLiveInCheckCanKill(Reg)); |
2986 | }; |
2987 | |
2988 | for (auto RI = CSI.rbegin(), RE = CSI.rend(); RI != RE; ++RI) { |
2989 | Register Reg = RI->getReg(); |
2990 | if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) |
2991 | continue; |
2992 | |
2993 | if (X86FI->isCandidateForPush2Pop2(Reg)) { |
2994 | Register Reg2 = (++RI)->getReg(); |
2995 | BuildMI(MBB, MI, DL, TII.get(getPUSH2Opcode(ST: STI))) |
2996 | .addReg(Reg, UpdateLiveInGetKillRegState(Reg)) |
2997 | .addReg(Reg2, UpdateLiveInGetKillRegState(Reg2)) |
2998 | .setMIFlag(MachineInstr::FrameSetup); |
2999 | } else { |
3000 | BuildMI(MBB, MI, DL, TII.get(getPUSHOpcode(ST: STI))) |
3001 | .addReg(Reg, UpdateLiveInGetKillRegState(Reg)) |
3002 | .setMIFlag(MachineInstr::FrameSetup); |
3003 | } |
3004 | } |
3005 | |
3006 | if (X86FI->getRestoreBasePointer()) { |
3007 | unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; |
3008 | Register BaseReg = this->TRI->getBaseRegister(); |
3009 | BuildMI(MBB, MI, DL, TII.get(Opc)) |
3010 | .addReg(BaseReg, getKillRegState(B: true)) |
3011 | .setMIFlag(MachineInstr::FrameSetup); |
3012 | } |
3013 | |
3014 | // Make XMM regs spilled. X86 does not have ability of push/pop XMM. |
3015 | // It can be done by spilling XMMs to stack frame. |
3016 | for (const CalleeSavedInfo &I : llvm::reverse(C&: CSI)) { |
3017 | Register Reg = I.getReg(); |
3018 | if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) |
3019 | continue; |
3020 | |
3021 | // If this is k-register make sure we lookup via the largest legal type. |
3022 | MVT VT = MVT::Other; |
3023 | if (X86::VK16RegClass.contains(Reg)) |
3024 | VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; |
3025 | |
3026 | // Add the callee-saved register as live-in. It's killed at the spill. |
3027 | MBB.addLiveIn(PhysReg: Reg); |
3028 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); |
3029 | |
3030 | TII.storeRegToStackSlot(MBB, MI, SrcReg: Reg, isKill: true, FrameIndex: I.getFrameIdx(), RC, TRI, |
3031 | VReg: Register()); |
3032 | --MI; |
3033 | MI->setFlag(MachineInstr::FrameSetup); |
3034 | ++MI; |
3035 | } |
3036 | |
3037 | return true; |
3038 | } |
3039 | |
3040 | void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB, |
3041 | MachineBasicBlock::iterator MBBI, |
3042 | MachineInstr *CatchRet) const { |
3043 | // SEH shouldn't use catchret. |
3044 | assert(!isAsynchronousEHPersonality(classifyEHPersonality( |
3045 | MBB.getParent()->getFunction().getPersonalityFn())) && |
3046 | "SEH should not use CATCHRET" ); |
3047 | const DebugLoc &DL = CatchRet->getDebugLoc(); |
3048 | MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(i: 0).getMBB(); |
3049 | |
3050 | // Fill EAX/RAX with the address of the target block. |
3051 | if (STI.is64Bit()) { |
3052 | // LEA64r CatchRetTarget(%rip), %rax |
3053 | BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX) |
3054 | .addReg(X86::RIP) |
3055 | .addImm(0) |
3056 | .addReg(0) |
3057 | .addMBB(CatchRetTarget) |
3058 | .addReg(0); |
3059 | } else { |
3060 | // MOV32ri $CatchRetTarget, %eax |
3061 | BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) |
3062 | .addMBB(CatchRetTarget); |
3063 | } |
3064 | |
3065 | // Record that we've taken the address of CatchRetTarget and no longer just |
3066 | // reference it in a terminator. |
3067 | CatchRetTarget->setMachineBlockAddressTaken(); |
3068 | } |
3069 | |
3070 | bool X86FrameLowering::restoreCalleeSavedRegisters( |
3071 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
3072 | MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { |
3073 | if (CSI.empty()) |
3074 | return false; |
3075 | |
3076 | if (MI != MBB.end() && isFuncletReturnInstr(MI&: *MI) && STI.isOSWindows()) { |
3077 | // Don't restore CSRs in 32-bit EH funclets. Matches |
3078 | // spillCalleeSavedRegisters. |
3079 | if (STI.is32Bit()) |
3080 | return true; |
3081 | // Don't restore CSRs before an SEH catchret. SEH except blocks do not form |
3082 | // funclets. emitEpilogue transforms these to normal jumps. |
3083 | if (MI->getOpcode() == X86::CATCHRET) { |
3084 | const Function &F = MBB.getParent()->getFunction(); |
3085 | bool IsSEH = isAsynchronousEHPersonality( |
3086 | Pers: classifyEHPersonality(Pers: F.getPersonalityFn())); |
3087 | if (IsSEH) |
3088 | return true; |
3089 | } |
3090 | } |
3091 | |
3092 | DebugLoc DL = MBB.findDebugLoc(MBBI: MI); |
3093 | |
3094 | // Reload XMMs from stack frame. |
3095 | for (const CalleeSavedInfo &I : CSI) { |
3096 | Register Reg = I.getReg(); |
3097 | if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) |
3098 | continue; |
3099 | |
3100 | // If this is k-register make sure we lookup via the largest legal type. |
3101 | MVT VT = MVT::Other; |
3102 | if (X86::VK16RegClass.contains(Reg)) |
3103 | VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; |
3104 | |
3105 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); |
3106 | TII.loadRegFromStackSlot(MBB, MI, DestReg: Reg, FrameIndex: I.getFrameIdx(), RC, TRI, |
3107 | VReg: Register()); |
3108 | } |
3109 | |
3110 | // Clear the stack slot for spill base pointer register. |
3111 | MachineFunction &MF = *MBB.getParent(); |
3112 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
3113 | if (X86FI->getRestoreBasePointer()) { |
3114 | unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; |
3115 | Register BaseReg = this->TRI->getBaseRegister(); |
3116 | BuildMI(MBB, MI, DL, TII.get(Opc), BaseReg) |
3117 | .setMIFlag(MachineInstr::FrameDestroy); |
3118 | } |
3119 | |
3120 | // POP GPRs. |
3121 | for (auto I = CSI.begin(), E = CSI.end(); I != E; ++I) { |
3122 | Register Reg = I->getReg(); |
3123 | if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) |
3124 | continue; |
3125 | |
3126 | if (X86FI->isCandidateForPush2Pop2(Reg)) |
3127 | BuildMI(MBB, MI, DL, TII.get(getPOP2Opcode(ST: STI)), Reg) |
3128 | .addReg((++I)->getReg(), RegState::Define) |
3129 | .setMIFlag(MachineInstr::FrameDestroy); |
3130 | else |
3131 | BuildMI(MBB, MI, DL, TII.get(getPOPOpcode(ST: STI)), Reg) |
3132 | .setMIFlag(MachineInstr::FrameDestroy); |
3133 | } |
3134 | if (X86FI->padForPush2Pop2()) |
3135 | emitSPUpdate(MBB, MBBI&: MI, DL, NumBytes: SlotSize, /*InEpilogue=*/true); |
3136 | |
3137 | return true; |
3138 | } |
3139 | |
3140 | void X86FrameLowering::determineCalleeSaves(MachineFunction &MF, |
3141 | BitVector &SavedRegs, |
3142 | RegScavenger *RS) const { |
3143 | TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); |
3144 | |
3145 | // Spill the BasePtr if it's used. |
3146 | if (TRI->hasBasePointer(MF)) { |
3147 | Register BasePtr = TRI->getBaseRegister(); |
3148 | if (STI.isTarget64BitILP32()) |
3149 | BasePtr = getX86SubSuperRegister(Reg: BasePtr, Size: 64); |
3150 | SavedRegs.set(BasePtr); |
3151 | } |
3152 | } |
3153 | |
3154 | static bool HasNestArgument(const MachineFunction *MF) { |
3155 | const Function &F = MF->getFunction(); |
3156 | for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; |
3157 | I++) { |
3158 | if (I->hasNestAttr() && !I->use_empty()) |
3159 | return true; |
3160 | } |
3161 | return false; |
3162 | } |
3163 | |
3164 | /// GetScratchRegister - Get a temp register for performing work in the |
3165 | /// segmented stack and the Erlang/HiPE stack prologue. Depending on platform |
3166 | /// and the properties of the function either one or two registers will be |
3167 | /// needed. Set primary to true for the first register, false for the second. |
3168 | static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64, |
3169 | const MachineFunction &MF, bool Primary) { |
3170 | CallingConv::ID CallingConvention = MF.getFunction().getCallingConv(); |
3171 | |
3172 | // Erlang stuff. |
3173 | if (CallingConvention == CallingConv::HiPE) { |
3174 | if (Is64Bit) |
3175 | return Primary ? X86::R14 : X86::R13; |
3176 | else |
3177 | return Primary ? X86::EBX : X86::EDI; |
3178 | } |
3179 | |
3180 | if (Is64Bit) { |
3181 | if (IsLP64) |
3182 | return Primary ? X86::R11 : X86::R12; |
3183 | else |
3184 | return Primary ? X86::R11D : X86::R12D; |
3185 | } |
3186 | |
3187 | bool IsNested = HasNestArgument(MF: &MF); |
3188 | |
3189 | if (CallingConvention == CallingConv::X86_FastCall || |
3190 | CallingConvention == CallingConv::Fast || |
3191 | CallingConvention == CallingConv::Tail) { |
3192 | if (IsNested) |
3193 | report_fatal_error(reason: "Segmented stacks does not support fastcall with " |
3194 | "nested function." ); |
3195 | return Primary ? X86::EAX : X86::ECX; |
3196 | } |
3197 | if (IsNested) |
3198 | return Primary ? X86::EDX : X86::EAX; |
3199 | return Primary ? X86::ECX : X86::EAX; |
3200 | } |
3201 | |
3202 | // The stack limit in the TCB is set to this many bytes above the actual stack |
3203 | // limit. |
3204 | static const uint64_t kSplitStackAvailable = 256; |
3205 | |
3206 | void X86FrameLowering::adjustForSegmentedStacks( |
3207 | MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { |
3208 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
3209 | uint64_t StackSize; |
3210 | unsigned TlsReg, TlsOffset; |
3211 | DebugLoc DL; |
3212 | |
3213 | // To support shrink-wrapping we would need to insert the new blocks |
3214 | // at the right place and update the branches to PrologueMBB. |
3215 | assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet" ); |
3216 | |
3217 | unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, Primary: true); |
3218 | assert(!MF.getRegInfo().isLiveIn(ScratchReg) && |
3219 | "Scratch register is live-in" ); |
3220 | |
3221 | if (MF.getFunction().isVarArg()) |
3222 | report_fatal_error(reason: "Segmented stacks do not support vararg functions." ); |
3223 | if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() && |
3224 | !STI.isTargetWin64() && !STI.isTargetFreeBSD() && |
3225 | !STI.isTargetDragonFly()) |
3226 | report_fatal_error(reason: "Segmented stacks not supported on this platform." ); |
3227 | |
3228 | // Eventually StackSize will be calculated by a link-time pass; which will |
3229 | // also decide whether checking code needs to be injected into this particular |
3230 | // prologue. |
3231 | StackSize = MFI.getStackSize(); |
3232 | |
3233 | if (!MFI.needsSplitStackProlog()) |
3234 | return; |
3235 | |
3236 | MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); |
3237 | MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock(); |
3238 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
3239 | bool IsNested = false; |
3240 | |
3241 | // We need to know if the function has a nest argument only in 64 bit mode. |
3242 | if (Is64Bit) |
3243 | IsNested = HasNestArgument(MF: &MF); |
3244 | |
3245 | // The MOV R10, RAX needs to be in a different block, since the RET we emit in |
3246 | // allocMBB needs to be last (terminating) instruction. |
3247 | |
3248 | for (const auto &LI : PrologueMBB.liveins()) { |
3249 | allocMBB->addLiveIn(RegMaskPair: LI); |
3250 | checkMBB->addLiveIn(RegMaskPair: LI); |
3251 | } |
3252 | |
3253 | if (IsNested) |
3254 | allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D); |
3255 | |
3256 | MF.push_front(MBB: allocMBB); |
3257 | MF.push_front(MBB: checkMBB); |
3258 | |
3259 | // When the frame size is less than 256 we just compare the stack |
3260 | // boundary directly to the value of the stack pointer, per gcc. |
3261 | bool CompareStackPointer = StackSize < kSplitStackAvailable; |
3262 | |
3263 | // Read the limit off the current stacklet off the stack_guard location. |
3264 | if (Is64Bit) { |
3265 | if (STI.isTargetLinux()) { |
3266 | TlsReg = X86::FS; |
3267 | TlsOffset = IsLP64 ? 0x70 : 0x40; |
3268 | } else if (STI.isTargetDarwin()) { |
3269 | TlsReg = X86::GS; |
3270 | TlsOffset = 0x60 + 90 * 8; // See pthread_machdep.h. Steal TLS slot 90. |
3271 | } else if (STI.isTargetWin64()) { |
3272 | TlsReg = X86::GS; |
3273 | TlsOffset = 0x28; // pvArbitrary, reserved for application use |
3274 | } else if (STI.isTargetFreeBSD()) { |
3275 | TlsReg = X86::FS; |
3276 | TlsOffset = 0x18; |
3277 | } else if (STI.isTargetDragonFly()) { |
3278 | TlsReg = X86::FS; |
3279 | TlsOffset = 0x20; // use tls_tcb.tcb_segstack |
3280 | } else { |
3281 | report_fatal_error(reason: "Segmented stacks not supported on this platform." ); |
3282 | } |
3283 | |
3284 | if (CompareStackPointer) |
3285 | ScratchReg = IsLP64 ? X86::RSP : X86::ESP; |
3286 | else |
3287 | BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), |
3288 | ScratchReg) |
3289 | .addReg(X86::RSP) |
3290 | .addImm(1) |
3291 | .addReg(0) |
3292 | .addImm(-StackSize) |
3293 | .addReg(0); |
3294 | |
3295 | BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)) |
3296 | .addReg(ScratchReg) |
3297 | .addReg(0) |
3298 | .addImm(1) |
3299 | .addReg(0) |
3300 | .addImm(TlsOffset) |
3301 | .addReg(TlsReg); |
3302 | } else { |
3303 | if (STI.isTargetLinux()) { |
3304 | TlsReg = X86::GS; |
3305 | TlsOffset = 0x30; |
3306 | } else if (STI.isTargetDarwin()) { |
3307 | TlsReg = X86::GS; |
3308 | TlsOffset = 0x48 + 90 * 4; |
3309 | } else if (STI.isTargetWin32()) { |
3310 | TlsReg = X86::FS; |
3311 | TlsOffset = 0x14; // pvArbitrary, reserved for application use |
3312 | } else if (STI.isTargetDragonFly()) { |
3313 | TlsReg = X86::FS; |
3314 | TlsOffset = 0x10; // use tls_tcb.tcb_segstack |
3315 | } else if (STI.isTargetFreeBSD()) { |
3316 | report_fatal_error(reason: "Segmented stacks not supported on FreeBSD i386." ); |
3317 | } else { |
3318 | report_fatal_error(reason: "Segmented stacks not supported on this platform." ); |
3319 | } |
3320 | |
3321 | if (CompareStackPointer) |
3322 | ScratchReg = X86::ESP; |
3323 | else |
3324 | BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg) |
3325 | .addReg(X86::ESP) |
3326 | .addImm(1) |
3327 | .addReg(0) |
3328 | .addImm(-StackSize) |
3329 | .addReg(0); |
3330 | |
3331 | if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() || |
3332 | STI.isTargetDragonFly()) { |
3333 | BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)) |
3334 | .addReg(ScratchReg) |
3335 | .addReg(0) |
3336 | .addImm(0) |
3337 | .addReg(0) |
3338 | .addImm(TlsOffset) |
3339 | .addReg(TlsReg); |
3340 | } else if (STI.isTargetDarwin()) { |
3341 | |
3342 | // TlsOffset doesn't fit into a mod r/m byte so we need an extra register. |
3343 | unsigned ScratchReg2; |
3344 | bool SaveScratch2; |
3345 | if (CompareStackPointer) { |
3346 | // The primary scratch register is available for holding the TLS offset. |
3347 | ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, Primary: true); |
3348 | SaveScratch2 = false; |
3349 | } else { |
3350 | // Need to use a second register to hold the TLS offset |
3351 | ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, Primary: false); |
3352 | |
3353 | // Unfortunately, with fastcc the second scratch register may hold an |
3354 | // argument. |
3355 | SaveScratch2 = MF.getRegInfo().isLiveIn(Reg: ScratchReg2); |
3356 | } |
3357 | |
3358 | // If Scratch2 is live-in then it needs to be saved. |
3359 | assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) && |
3360 | "Scratch register is live-in and not saved" ); |
3361 | |
3362 | if (SaveScratch2) |
3363 | BuildMI(checkMBB, DL, TII.get(X86::PUSH32r)) |
3364 | .addReg(ScratchReg2, RegState::Kill); |
3365 | |
3366 | BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2) |
3367 | .addImm(TlsOffset); |
3368 | BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)) |
3369 | .addReg(ScratchReg) |
3370 | .addReg(ScratchReg2) |
3371 | .addImm(1) |
3372 | .addReg(0) |
3373 | .addImm(0) |
3374 | .addReg(TlsReg); |
3375 | |
3376 | if (SaveScratch2) |
3377 | BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2); |
3378 | } |
3379 | } |
3380 | |
3381 | // This jump is taken if SP >= (Stacklet Limit + Stack Space required). |
3382 | // It jumps to normal execution of the function body. |
3383 | BuildMI(checkMBB, DL, TII.get(X86::JCC_1)) |
3384 | .addMBB(&PrologueMBB) |
3385 | .addImm(X86::COND_A); |
3386 | |
3387 | // On 32 bit we first push the arguments size and then the frame size. On 64 |
3388 | // bit, we pass the stack frame size in r10 and the argument size in r11. |
3389 | if (Is64Bit) { |
3390 | // Functions with nested arguments use R10, so it needs to be saved across |
3391 | // the call to _morestack |
3392 | |
3393 | const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX; |
3394 | const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D; |
3395 | const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D; |
3396 | const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr; |
3397 | |
3398 | if (IsNested) |
3399 | BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10); |
3400 | |
3401 | BuildMI(allocMBB, DL, TII.get(getMOVriOpcode(Use64BitReg: IsLP64, Imm: StackSize)), Reg10) |
3402 | .addImm(StackSize); |
3403 | BuildMI(allocMBB, DL, |
3404 | TII.get(getMOVriOpcode(Use64BitReg: IsLP64, Imm: X86FI->getArgumentStackSize())), |
3405 | Reg11) |
3406 | .addImm(X86FI->getArgumentStackSize()); |
3407 | } else { |
3408 | BuildMI(allocMBB, DL, TII.get(X86::PUSH32i)) |
3409 | .addImm(X86FI->getArgumentStackSize()); |
3410 | BuildMI(allocMBB, DL, TII.get(X86::PUSH32i)).addImm(StackSize); |
3411 | } |
3412 | |
3413 | // __morestack is in libgcc |
3414 | if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { |
3415 | // Under the large code model, we cannot assume that __morestack lives |
3416 | // within 2^31 bytes of the call site, so we cannot use pc-relative |
3417 | // addressing. We cannot perform the call via a temporary register, |
3418 | // as the rax register may be used to store the static chain, and all |
3419 | // other suitable registers may be either callee-save or used for |
3420 | // parameter passing. We cannot use the stack at this point either |
3421 | // because __morestack manipulates the stack directly. |
3422 | // |
3423 | // To avoid these issues, perform an indirect call via a read-only memory |
3424 | // location containing the address. |
3425 | // |
3426 | // This solution is not perfect, as it assumes that the .rodata section |
3427 | // is laid out within 2^31 bytes of each function body, but this seems |
3428 | // to be sufficient for JIT. |
3429 | // FIXME: Add retpoline support and remove the error here.. |
3430 | if (STI.useIndirectThunkCalls()) |
3431 | report_fatal_error(reason: "Emitting morestack calls on 64-bit with the large " |
3432 | "code model and thunks not yet implemented." ); |
3433 | BuildMI(allocMBB, DL, TII.get(X86::CALL64m)) |
3434 | .addReg(X86::RIP) |
3435 | .addImm(0) |
3436 | .addReg(0) |
3437 | .addExternalSymbol("__morestack_addr" ) |
3438 | .addReg(0); |
3439 | } else { |
3440 | if (Is64Bit) |
3441 | BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32)) |
3442 | .addExternalSymbol("__morestack" ); |
3443 | else |
3444 | BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32)) |
3445 | .addExternalSymbol("__morestack" ); |
3446 | } |
3447 | |
3448 | if (IsNested) |
3449 | BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10)); |
3450 | else |
3451 | BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET)); |
3452 | |
3453 | allocMBB->addSuccessor(Succ: &PrologueMBB); |
3454 | |
3455 | checkMBB->addSuccessor(Succ: allocMBB, Prob: BranchProbability::getZero()); |
3456 | checkMBB->addSuccessor(Succ: &PrologueMBB, Prob: BranchProbability::getOne()); |
3457 | |
3458 | #ifdef EXPENSIVE_CHECKS |
3459 | MF.verify(); |
3460 | #endif |
3461 | } |
3462 | |
3463 | /// Lookup an ERTS parameter in the !hipe.literals named metadata node. |
3464 | /// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets |
3465 | /// to fields it needs, through a named metadata node "hipe.literals" containing |
3466 | /// name-value pairs. |
3467 | static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD, |
3468 | const StringRef LiteralName) { |
3469 | for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) { |
3470 | MDNode *Node = HiPELiteralsMD->getOperand(i); |
3471 | if (Node->getNumOperands() != 2) |
3472 | continue; |
3473 | MDString *NodeName = dyn_cast<MDString>(Val: Node->getOperand(I: 0)); |
3474 | ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Val: Node->getOperand(I: 1)); |
3475 | if (!NodeName || !NodeVal) |
3476 | continue; |
3477 | ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(Val: NodeVal->getValue()); |
3478 | if (ValConst && NodeName->getString() == LiteralName) { |
3479 | return ValConst->getZExtValue(); |
3480 | } |
3481 | } |
3482 | |
3483 | report_fatal_error(reason: "HiPE literal " + LiteralName + |
3484 | " required but not provided" ); |
3485 | } |
3486 | |
3487 | // Return true if there are no non-ehpad successors to MBB and there are no |
3488 | // non-meta instructions between MBBI and MBB.end(). |
3489 | static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, |
3490 | MachineBasicBlock::const_iterator MBBI) { |
3491 | return llvm::all_of( |
3492 | Range: MBB.successors(), |
3493 | P: [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) && |
3494 | std::all_of(first: MBBI, last: MBB.end(), pred: [](const MachineInstr &MI) { |
3495 | return MI.isMetaInstruction(); |
3496 | }); |
3497 | } |
3498 | |
3499 | /// Erlang programs may need a special prologue to handle the stack size they |
3500 | /// might need at runtime. That is because Erlang/OTP does not implement a C |
3501 | /// stack but uses a custom implementation of hybrid stack/heap architecture. |
3502 | /// (for more information see Eric Stenman's Ph.D. thesis: |
3503 | /// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf) |
3504 | /// |
3505 | /// CheckStack: |
3506 | /// temp0 = sp - MaxStack |
3507 | /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart |
3508 | /// OldStart: |
3509 | /// ... |
3510 | /// IncStack: |
3511 | /// call inc_stack # doubles the stack space |
3512 | /// temp0 = sp - MaxStack |
3513 | /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart |
3514 | void X86FrameLowering::adjustForHiPEPrologue( |
3515 | MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { |
3516 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
3517 | DebugLoc DL; |
3518 | |
3519 | // To support shrink-wrapping we would need to insert the new blocks |
3520 | // at the right place and update the branches to PrologueMBB. |
3521 | assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet" ); |
3522 | |
3523 | // HiPE-specific values |
3524 | NamedMDNode *HiPELiteralsMD = |
3525 | MF.getMMI().getModule()->getNamedMetadata(Name: "hipe.literals" ); |
3526 | if (!HiPELiteralsMD) |
3527 | report_fatal_error( |
3528 | reason: "Can't generate HiPE prologue without runtime parameters" ); |
3529 | const unsigned HipeLeafWords = getHiPELiteral( |
3530 | HiPELiteralsMD, LiteralName: Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS" ); |
3531 | const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5; |
3532 | const unsigned Guaranteed = HipeLeafWords * SlotSize; |
3533 | unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs |
3534 | ? MF.getFunction().arg_size() - CCRegisteredArgs |
3535 | : 0; |
3536 | unsigned MaxStack = MFI.getStackSize() + CallerStkArity * SlotSize + SlotSize; |
3537 | |
3538 | assert(STI.isTargetLinux() && |
3539 | "HiPE prologue is only supported on Linux operating systems." ); |
3540 | |
3541 | // Compute the largest caller's frame that is needed to fit the callees' |
3542 | // frames. This 'MaxStack' is computed from: |
3543 | // |
3544 | // a) the fixed frame size, which is the space needed for all spilled temps, |
3545 | // b) outgoing on-stack parameter areas, and |
3546 | // c) the minimum stack space this function needs to make available for the |
3547 | // functions it calls (a tunable ABI property). |
3548 | if (MFI.hasCalls()) { |
3549 | unsigned MoreStackForCalls = 0; |
3550 | |
3551 | for (auto &MBB : MF) { |
3552 | for (auto &MI : MBB) { |
3553 | if (!MI.isCall()) |
3554 | continue; |
3555 | |
3556 | // Get callee operand. |
3557 | const MachineOperand &MO = MI.getOperand(i: 0); |
3558 | |
3559 | // Only take account of global function calls (no closures etc.). |
3560 | if (!MO.isGlobal()) |
3561 | continue; |
3562 | |
3563 | const Function *F = dyn_cast<Function>(Val: MO.getGlobal()); |
3564 | if (!F) |
3565 | continue; |
3566 | |
3567 | // Do not update 'MaxStack' for primitive and built-in functions |
3568 | // (encoded with names either starting with "erlang."/"bif_" or not |
3569 | // having a ".", such as a simple <Module>.<Function>.<Arity>, or an |
3570 | // "_", such as the BIF "suspend_0") as they are executed on another |
3571 | // stack. |
3572 | if (F->getName().contains(Other: "erlang." ) || F->getName().contains(Other: "bif_" ) || |
3573 | F->getName().find_first_of(Chars: "._" ) == StringRef::npos) |
3574 | continue; |
3575 | |
3576 | unsigned CalleeStkArity = F->arg_size() > CCRegisteredArgs |
3577 | ? F->arg_size() - CCRegisteredArgs |
3578 | : 0; |
3579 | if (HipeLeafWords - 1 > CalleeStkArity) |
3580 | MoreStackForCalls = |
3581 | std::max(a: MoreStackForCalls, |
3582 | b: (HipeLeafWords - 1 - CalleeStkArity) * SlotSize); |
3583 | } |
3584 | } |
3585 | MaxStack += MoreStackForCalls; |
3586 | } |
3587 | |
3588 | // If the stack frame needed is larger than the guaranteed then runtime checks |
3589 | // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue. |
3590 | if (MaxStack > Guaranteed) { |
3591 | MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock(); |
3592 | MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock(); |
3593 | |
3594 | for (const auto &LI : PrologueMBB.liveins()) { |
3595 | stackCheckMBB->addLiveIn(RegMaskPair: LI); |
3596 | incStackMBB->addLiveIn(RegMaskPair: LI); |
3597 | } |
3598 | |
3599 | MF.push_front(MBB: incStackMBB); |
3600 | MF.push_front(MBB: stackCheckMBB); |
3601 | |
3602 | unsigned ScratchReg, SPReg, PReg, SPLimitOffset; |
3603 | unsigned LEAop, CMPop, CALLop; |
3604 | SPLimitOffset = getHiPELiteral(HiPELiteralsMD, LiteralName: "P_NSP_LIMIT" ); |
3605 | if (Is64Bit) { |
3606 | SPReg = X86::RSP; |
3607 | PReg = X86::RBP; |
3608 | LEAop = X86::LEA64r; |
3609 | CMPop = X86::CMP64rm; |
3610 | CALLop = X86::CALL64pcrel32; |
3611 | } else { |
3612 | SPReg = X86::ESP; |
3613 | PReg = X86::EBP; |
3614 | LEAop = X86::LEA32r; |
3615 | CMPop = X86::CMP32rm; |
3616 | CALLop = X86::CALLpcrel32; |
3617 | } |
3618 | |
3619 | ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, Primary: true); |
3620 | assert(!MF.getRegInfo().isLiveIn(ScratchReg) && |
3621 | "HiPE prologue scratch register is live-in" ); |
3622 | |
3623 | // Create new MBB for StackCheck: |
3624 | addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), SPReg, |
3625 | false, -MaxStack); |
3626 | // SPLimitOffset is in a fixed heap location (pointed by BP). |
3627 | addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)).addReg(ScratchReg), |
3628 | PReg, false, SPLimitOffset); |
3629 | BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1)) |
3630 | .addMBB(&PrologueMBB) |
3631 | .addImm(X86::COND_AE); |
3632 | |
3633 | // Create new MBB for IncStack: |
3634 | BuildMI(incStackMBB, DL, TII.get(CALLop)).addExternalSymbol("inc_stack_0" ); |
3635 | addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), SPReg, |
3636 | false, -MaxStack); |
3637 | addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)).addReg(ScratchReg), |
3638 | PReg, false, SPLimitOffset); |
3639 | BuildMI(incStackMBB, DL, TII.get(X86::JCC_1)) |
3640 | .addMBB(incStackMBB) |
3641 | .addImm(X86::COND_LE); |
3642 | |
3643 | stackCheckMBB->addSuccessor(Succ: &PrologueMBB, Prob: {99, 100}); |
3644 | stackCheckMBB->addSuccessor(Succ: incStackMBB, Prob: {1, 100}); |
3645 | incStackMBB->addSuccessor(Succ: &PrologueMBB, Prob: {99, 100}); |
3646 | incStackMBB->addSuccessor(Succ: incStackMBB, Prob: {1, 100}); |
3647 | } |
3648 | #ifdef EXPENSIVE_CHECKS |
3649 | MF.verify(); |
3650 | #endif |
3651 | } |
3652 | |
3653 | bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB, |
3654 | MachineBasicBlock::iterator MBBI, |
3655 | const DebugLoc &DL, |
3656 | int Offset) const { |
3657 | if (Offset <= 0) |
3658 | return false; |
3659 | |
3660 | if (Offset % SlotSize) |
3661 | return false; |
3662 | |
3663 | int NumPops = Offset / SlotSize; |
3664 | // This is only worth it if we have at most 2 pops. |
3665 | if (NumPops != 1 && NumPops != 2) |
3666 | return false; |
3667 | |
3668 | // Handle only the trivial case where the adjustment directly follows |
3669 | // a call. This is the most common one, anyway. |
3670 | if (MBBI == MBB.begin()) |
3671 | return false; |
3672 | MachineBasicBlock::iterator Prev = std::prev(x: MBBI); |
3673 | if (!Prev->isCall() || !Prev->getOperand(i: 1).isRegMask()) |
3674 | return false; |
3675 | |
3676 | unsigned Regs[2]; |
3677 | unsigned FoundRegs = 0; |
3678 | |
3679 | const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
3680 | const MachineOperand &RegMask = Prev->getOperand(i: 1); |
3681 | |
3682 | auto &RegClass = |
3683 | Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass; |
3684 | // Try to find up to NumPops free registers. |
3685 | for (auto Candidate : RegClass) { |
3686 | // Poor man's liveness: |
3687 | // Since we're immediately after a call, any register that is clobbered |
3688 | // by the call and not defined by it can be considered dead. |
3689 | if (!RegMask.clobbersPhysReg(Candidate)) |
3690 | continue; |
3691 | |
3692 | // Don't clobber reserved registers |
3693 | if (MRI.isReserved(Candidate)) |
3694 | continue; |
3695 | |
3696 | bool IsDef = false; |
3697 | for (const MachineOperand &MO : Prev->implicit_operands()) { |
3698 | if (MO.isReg() && MO.isDef() && |
3699 | TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) { |
3700 | IsDef = true; |
3701 | break; |
3702 | } |
3703 | } |
3704 | |
3705 | if (IsDef) |
3706 | continue; |
3707 | |
3708 | Regs[FoundRegs++] = Candidate; |
3709 | if (FoundRegs == (unsigned)NumPops) |
3710 | break; |
3711 | } |
3712 | |
3713 | if (FoundRegs == 0) |
3714 | return false; |
3715 | |
3716 | // If we found only one free register, but need two, reuse the same one twice. |
3717 | while (FoundRegs < (unsigned)NumPops) |
3718 | Regs[FoundRegs++] = Regs[0]; |
3719 | |
3720 | for (int i = 0; i < NumPops; ++i) |
3721 | BuildMI(MBB, MBBI, DL, TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), |
3722 | Regs[i]); |
3723 | |
3724 | return true; |
3725 | } |
3726 | |
3727 | MachineBasicBlock::iterator X86FrameLowering::eliminateCallFramePseudoInstr( |
3728 | MachineFunction &MF, MachineBasicBlock &MBB, |
3729 | MachineBasicBlock::iterator I) const { |
3730 | bool reserveCallFrame = hasReservedCallFrame(MF); |
3731 | unsigned Opcode = I->getOpcode(); |
3732 | bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); |
3733 | DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased. |
3734 | uint64_t Amount = TII.getFrameSize(*I); |
3735 | uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(I: *I) : 0; |
3736 | I = MBB.erase(I); |
3737 | auto InsertPos = skipDebugInstructionsForward(It: I, End: MBB.end()); |
3738 | |
3739 | // Try to avoid emitting dead SP adjustments if the block end is unreachable, |
3740 | // typically because the function is marked noreturn (abort, throw, |
3741 | // assert_fail, etc). |
3742 | if (isDestroy && blockEndIsUnreachable(MBB, MBBI: I)) |
3743 | return I; |
3744 | |
3745 | if (!reserveCallFrame) { |
3746 | // If the stack pointer can be changed after prologue, turn the |
3747 | // adjcallstackup instruction into a 'sub ESP, <amt>' and the |
3748 | // adjcallstackdown instruction into 'add ESP, <amt>' |
3749 | |
3750 | // We need to keep the stack aligned properly. To do this, we round the |
3751 | // amount of space needed for the outgoing arguments up to the next |
3752 | // alignment boundary. |
3753 | Amount = alignTo(Size: Amount, A: getStackAlign()); |
3754 | |
3755 | const Function &F = MF.getFunction(); |
3756 | bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); |
3757 | bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves(); |
3758 | |
3759 | // If we have any exception handlers in this function, and we adjust |
3760 | // the SP before calls, we may need to indicate this to the unwinder |
3761 | // using GNU_ARGS_SIZE. Note that this may be necessary even when |
3762 | // Amount == 0, because the preceding function may have set a non-0 |
3763 | // GNU_ARGS_SIZE. |
3764 | // TODO: We don't need to reset this between subsequent functions, |
3765 | // if it didn't change. |
3766 | bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty(); |
3767 | |
3768 | if (HasDwarfEHHandlers && !isDestroy && |
3769 | MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences()) |
3770 | BuildCFI(MBB, MBBI: InsertPos, DL, |
3771 | CFIInst: MCCFIInstruction::createGnuArgsSize(L: nullptr, Size: Amount)); |
3772 | |
3773 | if (Amount == 0) |
3774 | return I; |
3775 | |
3776 | // Factor out the amount that gets handled inside the sequence |
3777 | // (Pushes of argument for frame setup, callee pops for frame destroy) |
3778 | Amount -= InternalAmt; |
3779 | |
3780 | // TODO: This is needed only if we require precise CFA. |
3781 | // If this is a callee-pop calling convention, emit a CFA adjust for |
3782 | // the amount the callee popped. |
3783 | if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF)) |
3784 | BuildCFI(MBB, MBBI: InsertPos, DL, |
3785 | CFIInst: MCCFIInstruction::createAdjustCfaOffset(L: nullptr, Adjustment: -InternalAmt)); |
3786 | |
3787 | // Add Amount to SP to destroy a frame, or subtract to setup. |
3788 | int64_t StackAdjustment = isDestroy ? Amount : -Amount; |
3789 | |
3790 | if (StackAdjustment) { |
3791 | // Merge with any previous or following adjustment instruction. Note: the |
3792 | // instructions merged with here do not have CFI, so their stack |
3793 | // adjustments do not feed into CfaAdjustment. |
3794 | StackAdjustment += mergeSPUpdates(MBB, MBBI&: InsertPos, doMergeWithPrevious: true); |
3795 | StackAdjustment += mergeSPUpdates(MBB, MBBI&: InsertPos, doMergeWithPrevious: false); |
3796 | |
3797 | if (StackAdjustment) { |
3798 | if (!(F.hasMinSize() && |
3799 | adjustStackWithPops(MBB, MBBI: InsertPos, DL, Offset: StackAdjustment))) |
3800 | BuildStackAdjustment(MBB, MBBI: InsertPos, DL, Offset: StackAdjustment, |
3801 | /*InEpilogue=*/false); |
3802 | } |
3803 | } |
3804 | |
3805 | if (DwarfCFI && !hasFP(MF)) { |
3806 | // If we don't have FP, but need to generate unwind information, |
3807 | // we need to set the correct CFA offset after the stack adjustment. |
3808 | // How much we adjust the CFA offset depends on whether we're emitting |
3809 | // CFI only for EH purposes or for debugging. EH only requires the CFA |
3810 | // offset to be correct at each call site, while for debugging we want |
3811 | // it to be more precise. |
3812 | |
3813 | int64_t CfaAdjustment = -StackAdjustment; |
3814 | // TODO: When not using precise CFA, we also need to adjust for the |
3815 | // InternalAmt here. |
3816 | if (CfaAdjustment) { |
3817 | BuildCFI( |
3818 | MBB, MBBI: InsertPos, DL, |
3819 | CFIInst: MCCFIInstruction::createAdjustCfaOffset(L: nullptr, Adjustment: CfaAdjustment)); |
3820 | } |
3821 | } |
3822 | |
3823 | return I; |
3824 | } |
3825 | |
3826 | if (InternalAmt) { |
3827 | MachineBasicBlock::iterator CI = I; |
3828 | MachineBasicBlock::iterator B = MBB.begin(); |
3829 | while (CI != B && !std::prev(x: CI)->isCall()) |
3830 | --CI; |
3831 | BuildStackAdjustment(MBB, MBBI: CI, DL, Offset: -InternalAmt, /*InEpilogue=*/false); |
3832 | } |
3833 | |
3834 | return I; |
3835 | } |
3836 | |
3837 | bool X86FrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { |
3838 | assert(MBB.getParent() && "Block is not attached to a function!" ); |
3839 | const MachineFunction &MF = *MBB.getParent(); |
3840 | if (!MBB.isLiveIn(X86::EFLAGS)) |
3841 | return true; |
3842 | |
3843 | // If stack probes have to loop inline or call, that will clobber EFLAGS. |
3844 | // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock. |
3845 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
3846 | const X86TargetLowering &TLI = *STI.getTargetLowering(); |
3847 | if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF)) |
3848 | return false; |
3849 | |
3850 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
3851 | return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext(); |
3852 | } |
3853 | |
3854 | bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { |
3855 | assert(MBB.getParent() && "Block is not attached to a function!" ); |
3856 | |
3857 | // Win64 has strict requirements in terms of epilogue and we are |
3858 | // not taking a chance at messing with them. |
3859 | // I.e., unless this block is already an exit block, we can't use |
3860 | // it as an epilogue. |
3861 | if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock()) |
3862 | return false; |
3863 | |
3864 | // Swift async context epilogue has a BTR instruction that clobbers parts of |
3865 | // EFLAGS. |
3866 | const MachineFunction &MF = *MBB.getParent(); |
3867 | if (MF.getInfo<X86MachineFunctionInfo>()->hasSwiftAsyncContext()) |
3868 | return !flagsNeedToBePreservedBeforeTheTerminators(MBB); |
3869 | |
3870 | if (canUseLEAForSPInEpilogue(MF: *MBB.getParent())) |
3871 | return true; |
3872 | |
3873 | // If we cannot use LEA to adjust SP, we may need to use ADD, which |
3874 | // clobbers the EFLAGS. Check that we do not need to preserve it, |
3875 | // otherwise, conservatively assume this is not |
3876 | // safe to insert the epilogue here. |
3877 | return !flagsNeedToBePreservedBeforeTheTerminators(MBB); |
3878 | } |
3879 | |
3880 | bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { |
3881 | // If we may need to emit frameless compact unwind information, give |
3882 | // up as this is currently broken: PR25614. |
3883 | bool CompactUnwind = |
3884 | MF.getMMI().getContext().getObjectFileInfo()->getCompactUnwindSection() != |
3885 | nullptr; |
3886 | return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) || |
3887 | !CompactUnwind) && |
3888 | // The lowering of segmented stack and HiPE only support entry |
3889 | // blocks as prologue blocks: PR26107. This limitation may be |
3890 | // lifted if we fix: |
3891 | // - adjustForSegmentedStacks |
3892 | // - adjustForHiPEPrologue |
3893 | MF.getFunction().getCallingConv() != CallingConv::HiPE && |
3894 | !MF.shouldSplitStack(); |
3895 | } |
3896 | |
3897 | MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers( |
3898 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
3899 | const DebugLoc &DL, bool RestoreSP) const { |
3900 | assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env" ); |
3901 | assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32" ); |
3902 | assert(STI.is32Bit() && !Uses64BitFramePtr && |
3903 | "restoring EBP/ESI on non-32-bit target" ); |
3904 | |
3905 | MachineFunction &MF = *MBB.getParent(); |
3906 | Register FramePtr = TRI->getFrameRegister(MF); |
3907 | Register BasePtr = TRI->getBaseRegister(); |
3908 | WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo(); |
3909 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
3910 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
3911 | |
3912 | // FIXME: Don't set FrameSetup flag in catchret case. |
3913 | |
3914 | int FI = FuncInfo.EHRegNodeFrameIndex; |
3915 | int EHRegSize = MFI.getObjectSize(ObjectIdx: FI); |
3916 | |
3917 | if (RestoreSP) { |
3918 | // MOV32rm -EHRegSize(%ebp), %esp |
3919 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP), |
3920 | X86::EBP, true, -EHRegSize) |
3921 | .setMIFlag(MachineInstr::FrameSetup); |
3922 | } |
3923 | |
3924 | Register UsedReg; |
3925 | int EHRegOffset = getFrameIndexReference(MF, FI, FrameReg&: UsedReg).getFixed(); |
3926 | int EndOffset = -EHRegOffset - EHRegSize; |
3927 | FuncInfo.EHRegNodeEndOffset = EndOffset; |
3928 | |
3929 | if (UsedReg == FramePtr) { |
3930 | // ADD $offset, %ebp |
3931 | unsigned ADDri = getADDriOpcode(IsLP64: false); |
3932 | BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr) |
3933 | .addReg(FramePtr) |
3934 | .addImm(EndOffset) |
3935 | .setMIFlag(MachineInstr::FrameSetup) |
3936 | ->getOperand(3) |
3937 | .setIsDead(); |
3938 | assert(EndOffset >= 0 && |
3939 | "end of registration object above normal EBP position!" ); |
3940 | } else if (UsedReg == BasePtr) { |
3941 | // LEA offset(%ebp), %esi |
3942 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr), |
3943 | FramePtr, false, EndOffset) |
3944 | .setMIFlag(MachineInstr::FrameSetup); |
3945 | // MOV32rm SavedEBPOffset(%esi), %ebp |
3946 | assert(X86FI->getHasSEHFramePtrSave()); |
3947 | int Offset = |
3948 | getFrameIndexReference(MF, FI: X86FI->getSEHFramePtrSaveIndex(), FrameReg&: UsedReg) |
3949 | .getFixed(); |
3950 | assert(UsedReg == BasePtr); |
3951 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr), |
3952 | UsedReg, true, Offset) |
3953 | .setMIFlag(MachineInstr::FrameSetup); |
3954 | } else { |
3955 | llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr" ); |
3956 | } |
3957 | return MBBI; |
3958 | } |
3959 | |
3960 | int X86FrameLowering::getInitialCFAOffset(const MachineFunction &MF) const { |
3961 | return TRI->getSlotSize(); |
3962 | } |
3963 | |
3964 | Register |
3965 | X86FrameLowering::getInitialCFARegister(const MachineFunction &MF) const { |
3966 | return StackPtr; |
3967 | } |
3968 | |
3969 | TargetFrameLowering::DwarfFrameBase |
3970 | X86FrameLowering::getDwarfFrameBase(const MachineFunction &MF) const { |
3971 | const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); |
3972 | Register FrameRegister = RI->getFrameRegister(MF); |
3973 | if (getInitialCFARegister(MF) == FrameRegister && |
3974 | MF.getInfo<X86MachineFunctionInfo>()->hasCFIAdjustCfa()) { |
3975 | DwarfFrameBase FrameBase; |
3976 | FrameBase.Kind = DwarfFrameBase::CFA; |
3977 | FrameBase.Location.Offset = |
3978 | -MF.getFrameInfo().getStackSize() - getInitialCFAOffset(MF); |
3979 | return FrameBase; |
3980 | } |
3981 | |
3982 | return DwarfFrameBase{.Kind: DwarfFrameBase::Register, .Location: {.Reg: FrameRegister}}; |
3983 | } |
3984 | |
3985 | namespace { |
3986 | // Struct used by orderFrameObjects to help sort the stack objects. |
3987 | struct X86FrameSortingObject { |
3988 | bool IsValid = false; // true if we care about this Object. |
3989 | unsigned ObjectIndex = 0; // Index of Object into MFI list. |
3990 | unsigned ObjectSize = 0; // Size of Object in bytes. |
3991 | Align ObjectAlignment = Align(1); // Alignment of Object in bytes. |
3992 | unsigned ObjectNumUses = 0; // Object static number of uses. |
3993 | }; |
3994 | |
3995 | // The comparison function we use for std::sort to order our local |
3996 | // stack symbols. The current algorithm is to use an estimated |
3997 | // "density". This takes into consideration the size and number of |
3998 | // uses each object has in order to roughly minimize code size. |
3999 | // So, for example, an object of size 16B that is referenced 5 times |
4000 | // will get higher priority than 4 4B objects referenced 1 time each. |
4001 | // It's not perfect and we may be able to squeeze a few more bytes out of |
4002 | // it (for example : 0(esp) requires fewer bytes, symbols allocated at the |
4003 | // fringe end can have special consideration, given their size is less |
4004 | // important, etc.), but the algorithmic complexity grows too much to be |
4005 | // worth the extra gains we get. This gets us pretty close. |
4006 | // The final order leaves us with objects with highest priority going |
4007 | // at the end of our list. |
4008 | struct X86FrameSortingComparator { |
4009 | inline bool operator()(const X86FrameSortingObject &A, |
4010 | const X86FrameSortingObject &B) const { |
4011 | uint64_t DensityAScaled, DensityBScaled; |
4012 | |
4013 | // For consistency in our comparison, all invalid objects are placed |
4014 | // at the end. This also allows us to stop walking when we hit the |
4015 | // first invalid item after it's all sorted. |
4016 | if (!A.IsValid) |
4017 | return false; |
4018 | if (!B.IsValid) |
4019 | return true; |
4020 | |
4021 | // The density is calculated by doing : |
4022 | // (double)DensityA = A.ObjectNumUses / A.ObjectSize |
4023 | // (double)DensityB = B.ObjectNumUses / B.ObjectSize |
4024 | // Since this approach may cause inconsistencies in |
4025 | // the floating point <, >, == comparisons, depending on the floating |
4026 | // point model with which the compiler was built, we're going |
4027 | // to scale both sides by multiplying with |
4028 | // A.ObjectSize * B.ObjectSize. This ends up factoring away |
4029 | // the division and, with it, the need for any floating point |
4030 | // arithmetic. |
4031 | DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) * |
4032 | static_cast<uint64_t>(B.ObjectSize); |
4033 | DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) * |
4034 | static_cast<uint64_t>(A.ObjectSize); |
4035 | |
4036 | // If the two densities are equal, prioritize highest alignment |
4037 | // objects. This allows for similar alignment objects |
4038 | // to be packed together (given the same density). |
4039 | // There's room for improvement here, also, since we can pack |
4040 | // similar alignment (different density) objects next to each |
4041 | // other to save padding. This will also require further |
4042 | // complexity/iterations, and the overall gain isn't worth it, |
4043 | // in general. Something to keep in mind, though. |
4044 | if (DensityAScaled == DensityBScaled) |
4045 | return A.ObjectAlignment < B.ObjectAlignment; |
4046 | |
4047 | return DensityAScaled < DensityBScaled; |
4048 | } |
4049 | }; |
4050 | } // namespace |
4051 | |
4052 | // Order the symbols in the local stack. |
4053 | // We want to place the local stack objects in some sort of sensible order. |
4054 | // The heuristic we use is to try and pack them according to static number |
4055 | // of uses and size of object in order to minimize code size. |
4056 | void X86FrameLowering::orderFrameObjects( |
4057 | const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const { |
4058 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
4059 | |
4060 | // Don't waste time if there's nothing to do. |
4061 | if (ObjectsToAllocate.empty()) |
4062 | return; |
4063 | |
4064 | // Create an array of all MFI objects. We won't need all of these |
4065 | // objects, but we're going to create a full array of them to make |
4066 | // it easier to index into when we're counting "uses" down below. |
4067 | // We want to be able to easily/cheaply access an object by simply |
4068 | // indexing into it, instead of having to search for it every time. |
4069 | std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd()); |
4070 | |
4071 | // Walk the objects we care about and mark them as such in our working |
4072 | // struct. |
4073 | for (auto &Obj : ObjectsToAllocate) { |
4074 | SortingObjects[Obj].IsValid = true; |
4075 | SortingObjects[Obj].ObjectIndex = Obj; |
4076 | SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(ObjectIdx: Obj); |
4077 | // Set the size. |
4078 | int ObjectSize = MFI.getObjectSize(ObjectIdx: Obj); |
4079 | if (ObjectSize == 0) |
4080 | // Variable size. Just use 4. |
4081 | SortingObjects[Obj].ObjectSize = 4; |
4082 | else |
4083 | SortingObjects[Obj].ObjectSize = ObjectSize; |
4084 | } |
4085 | |
4086 | // Count the number of uses for each object. |
4087 | for (auto &MBB : MF) { |
4088 | for (auto &MI : MBB) { |
4089 | if (MI.isDebugInstr()) |
4090 | continue; |
4091 | for (const MachineOperand &MO : MI.operands()) { |
4092 | // Check to see if it's a local stack symbol. |
4093 | if (!MO.isFI()) |
4094 | continue; |
4095 | int Index = MO.getIndex(); |
4096 | // Check to see if it falls within our range, and is tagged |
4097 | // to require ordering. |
4098 | if (Index >= 0 && Index < MFI.getObjectIndexEnd() && |
4099 | SortingObjects[Index].IsValid) |
4100 | SortingObjects[Index].ObjectNumUses++; |
4101 | } |
4102 | } |
4103 | } |
4104 | |
4105 | // Sort the objects using X86FrameSortingAlgorithm (see its comment for |
4106 | // info). |
4107 | llvm::stable_sort(Range&: SortingObjects, C: X86FrameSortingComparator()); |
4108 | |
4109 | // Now modify the original list to represent the final order that |
4110 | // we want. The order will depend on whether we're going to access them |
4111 | // from the stack pointer or the frame pointer. For SP, the list should |
4112 | // end up with the END containing objects that we want with smaller offsets. |
4113 | // For FP, it should be flipped. |
4114 | int i = 0; |
4115 | for (auto &Obj : SortingObjects) { |
4116 | // All invalid items are sorted at the end, so it's safe to stop. |
4117 | if (!Obj.IsValid) |
4118 | break; |
4119 | ObjectsToAllocate[i++] = Obj.ObjectIndex; |
4120 | } |
4121 | |
4122 | // Flip it if we're accessing off of the FP. |
4123 | if (!TRI->hasStackRealignment(MF) && hasFP(MF)) |
4124 | std::reverse(first: ObjectsToAllocate.begin(), last: ObjectsToAllocate.end()); |
4125 | } |
4126 | |
4127 | unsigned |
4128 | X86FrameLowering::getWinEHParentFrameOffset(const MachineFunction &MF) const { |
4129 | // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue. |
4130 | unsigned Offset = 16; |
4131 | // RBP is immediately pushed. |
4132 | Offset += SlotSize; |
4133 | // All callee-saved registers are then pushed. |
4134 | Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize(); |
4135 | // Every funclet allocates enough stack space for the largest outgoing call. |
4136 | Offset += getWinEHFuncletFrameSize(MF); |
4137 | return Offset; |
4138 | } |
4139 | |
4140 | void X86FrameLowering::processFunctionBeforeFrameFinalized( |
4141 | MachineFunction &MF, RegScavenger *RS) const { |
4142 | // Mark the function as not having WinCFI. We will set it back to true in |
4143 | // emitPrologue if it gets called and emits CFI. |
4144 | MF.setHasWinCFI(false); |
4145 | |
4146 | // If we are using Windows x64 CFI, ensure that the stack is always 8 byte |
4147 | // aligned. The format doesn't support misaligned stack adjustments. |
4148 | if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) |
4149 | MF.getFrameInfo().ensureMaxAlignment(Alignment: Align(SlotSize)); |
4150 | |
4151 | // If this function isn't doing Win64-style C++ EH, we don't need to do |
4152 | // anything. |
4153 | if (STI.is64Bit() && MF.hasEHFunclets() && |
4154 | classifyEHPersonality(Pers: MF.getFunction().getPersonalityFn()) == |
4155 | EHPersonality::MSVC_CXX) { |
4156 | adjustFrameForMsvcCxxEh(MF); |
4157 | } |
4158 | } |
4159 | |
4160 | void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const { |
4161 | // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset |
4162 | // relative to RSP after the prologue. Find the offset of the last fixed |
4163 | // object, so that we can allocate a slot immediately following it. If there |
4164 | // were no fixed objects, use offset -SlotSize, which is immediately after the |
4165 | // return address. Fixed objects have negative frame indices. |
4166 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
4167 | WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo(); |
4168 | int64_t MinFixedObjOffset = -SlotSize; |
4169 | for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) |
4170 | MinFixedObjOffset = std::min(a: MinFixedObjOffset, b: MFI.getObjectOffset(ObjectIdx: I)); |
4171 | |
4172 | for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) { |
4173 | for (WinEHHandlerType &H : TBME.HandlerArray) { |
4174 | int FrameIndex = H.CatchObj.FrameIndex; |
4175 | if (FrameIndex != INT_MAX) { |
4176 | // Ensure alignment. |
4177 | unsigned Align = MFI.getObjectAlign(ObjectIdx: FrameIndex).value(); |
4178 | MinFixedObjOffset -= std::abs(i: MinFixedObjOffset) % Align; |
4179 | MinFixedObjOffset -= MFI.getObjectSize(ObjectIdx: FrameIndex); |
4180 | MFI.setObjectOffset(ObjectIdx: FrameIndex, SPOffset: MinFixedObjOffset); |
4181 | } |
4182 | } |
4183 | } |
4184 | |
4185 | // Ensure alignment. |
4186 | MinFixedObjOffset -= std::abs(i: MinFixedObjOffset) % 8; |
4187 | int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize; |
4188 | int UnwindHelpFI = |
4189 | MFI.CreateFixedObject(Size: SlotSize, SPOffset: UnwindHelpOffset, /*IsImmutable=*/false); |
4190 | EHInfo.UnwindHelpFrameIdx = UnwindHelpFI; |
4191 | |
4192 | // Store -2 into UnwindHelp on function entry. We have to scan forwards past |
4193 | // other frame setup instructions. |
4194 | MachineBasicBlock &MBB = MF.front(); |
4195 | auto MBBI = MBB.begin(); |
4196 | while (MBBI != MBB.end() && MBBI->getFlag(Flag: MachineInstr::FrameSetup)) |
4197 | ++MBBI; |
4198 | |
4199 | DebugLoc DL = MBB.findDebugLoc(MBBI); |
4200 | addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)), |
4201 | UnwindHelpFI) |
4202 | .addImm(-2); |
4203 | } |
4204 | |
4205 | void X86FrameLowering::processFunctionBeforeFrameIndicesReplaced( |
4206 | MachineFunction &MF, RegScavenger *RS) const { |
4207 | auto *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
4208 | |
4209 | if (STI.is32Bit() && MF.hasEHFunclets()) |
4210 | restoreWinEHStackPointersInParent(MF); |
4211 | // We have emitted prolog and epilog. Don't need stack pointer saving |
4212 | // instruction any more. |
4213 | if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) { |
4214 | MI->eraseFromParent(); |
4215 | X86FI->setStackPtrSaveMI(nullptr); |
4216 | } |
4217 | } |
4218 | |
4219 | void X86FrameLowering::restoreWinEHStackPointersInParent( |
4220 | MachineFunction &MF) const { |
4221 | // 32-bit functions have to restore stack pointers when control is transferred |
4222 | // back to the parent function. These blocks are identified as eh pads that |
4223 | // are not funclet entries. |
4224 | bool IsSEH = isAsynchronousEHPersonality( |
4225 | Pers: classifyEHPersonality(Pers: MF.getFunction().getPersonalityFn())); |
4226 | for (MachineBasicBlock &MBB : MF) { |
4227 | bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry(); |
4228 | if (NeedsRestore) |
4229 | restoreWin32EHStackPointers(MBB, MBBI: MBB.begin(), DL: DebugLoc(), |
4230 | /*RestoreSP=*/IsSEH); |
4231 | } |
4232 | } |
4233 | |