1 | //===- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function ---===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass is responsible for finalizing the functions frame layout, saving |
10 | // callee saved registers, and for emitting prolog & epilog code for the |
11 | // function. |
12 | // |
13 | // This pass must be run after register allocation. After this pass is |
14 | // executed, it is illegal to construct MO_FrameIndex operands. |
15 | // |
16 | //===----------------------------------------------------------------------===// |
17 | |
18 | #include "llvm/ADT/ArrayRef.h" |
19 | #include "llvm/ADT/BitVector.h" |
20 | #include "llvm/ADT/STLExtras.h" |
21 | #include "llvm/ADT/SetVector.h" |
22 | #include "llvm/ADT/SmallPtrSet.h" |
23 | #include "llvm/ADT/SmallSet.h" |
24 | #include "llvm/ADT/SmallVector.h" |
25 | #include "llvm/ADT/Statistic.h" |
26 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
27 | #include "llvm/CodeGen/MachineBasicBlock.h" |
28 | #include "llvm/CodeGen/MachineDominators.h" |
29 | #include "llvm/CodeGen/MachineFrameInfo.h" |
30 | #include "llvm/CodeGen/MachineFunction.h" |
31 | #include "llvm/CodeGen/MachineFunctionPass.h" |
32 | #include "llvm/CodeGen/MachineInstr.h" |
33 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
34 | #include "llvm/CodeGen/MachineLoopInfo.h" |
35 | #include "llvm/CodeGen/MachineModuleInfo.h" |
36 | #include "llvm/CodeGen/MachineOperand.h" |
37 | #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" |
38 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
39 | #include "llvm/CodeGen/RegisterScavenging.h" |
40 | #include "llvm/CodeGen/TargetFrameLowering.h" |
41 | #include "llvm/CodeGen/TargetInstrInfo.h" |
42 | #include "llvm/CodeGen/TargetOpcodes.h" |
43 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
44 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
45 | #include "llvm/CodeGen/WinEHFuncInfo.h" |
46 | #include "llvm/IR/Attributes.h" |
47 | #include "llvm/IR/CallingConv.h" |
48 | #include "llvm/IR/DebugInfoMetadata.h" |
49 | #include "llvm/IR/DiagnosticInfo.h" |
50 | #include "llvm/IR/Function.h" |
51 | #include "llvm/IR/InlineAsm.h" |
52 | #include "llvm/IR/LLVMContext.h" |
53 | #include "llvm/InitializePasses.h" |
54 | #include "llvm/MC/MCRegisterInfo.h" |
55 | #include "llvm/Pass.h" |
56 | #include "llvm/Support/CodeGen.h" |
57 | #include "llvm/Support/Debug.h" |
58 | #include "llvm/Support/ErrorHandling.h" |
59 | #include "llvm/Support/FormatVariadic.h" |
60 | #include "llvm/Support/raw_ostream.h" |
61 | #include "llvm/Target/TargetMachine.h" |
62 | #include "llvm/Target/TargetOptions.h" |
63 | #include <algorithm> |
64 | #include <cassert> |
65 | #include <cstdint> |
66 | #include <functional> |
67 | #include <limits> |
68 | #include <utility> |
69 | #include <vector> |
70 | |
71 | using namespace llvm; |
72 | |
73 | #define DEBUG_TYPE "prologepilog" |
74 | |
75 | using MBBVector = SmallVector<MachineBasicBlock *, 4>; |
76 | |
77 | STATISTIC(NumLeafFuncWithSpills, "Number of leaf functions with CSRs" ); |
78 | STATISTIC(NumFuncSeen, "Number of functions seen in PEI" ); |
79 | |
80 | |
81 | namespace { |
82 | |
83 | class PEI : public MachineFunctionPass { |
84 | public: |
85 | static char ID; |
86 | |
87 | PEI() : MachineFunctionPass(ID) { |
88 | initializePEIPass(*PassRegistry::getPassRegistry()); |
89 | } |
90 | |
91 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
92 | |
93 | /// runOnMachineFunction - Insert prolog/epilog code and replace abstract |
94 | /// frame indexes with appropriate references. |
95 | bool runOnMachineFunction(MachineFunction &MF) override; |
96 | |
97 | private: |
98 | RegScavenger *RS = nullptr; |
99 | |
100 | // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved |
101 | // stack frame indexes. |
102 | unsigned MinCSFrameIndex = std::numeric_limits<unsigned>::max(); |
103 | unsigned MaxCSFrameIndex = 0; |
104 | |
105 | // Save and Restore blocks of the current function. Typically there is a |
106 | // single save block, unless Windows EH funclets are involved. |
107 | MBBVector SaveBlocks; |
108 | MBBVector RestoreBlocks; |
109 | |
110 | // Flag to control whether to use the register scavenger to resolve |
111 | // frame index materialization registers. Set according to |
112 | // TRI->requiresFrameIndexScavenging() for the current function. |
113 | bool FrameIndexVirtualScavenging = false; |
114 | |
115 | // Flag to control whether the scavenger should be passed even though |
116 | // FrameIndexVirtualScavenging is used. |
117 | bool FrameIndexEliminationScavenging = false; |
118 | |
119 | // Emit remarks. |
120 | MachineOptimizationRemarkEmitter *ORE = nullptr; |
121 | |
122 | void calculateCallFrameInfo(MachineFunction &MF); |
123 | void calculateSaveRestoreBlocks(MachineFunction &MF); |
124 | void spillCalleeSavedRegs(MachineFunction &MF); |
125 | |
126 | void calculateFrameObjectOffsets(MachineFunction &MF); |
127 | void replaceFrameIndices(MachineFunction &MF); |
128 | void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, |
129 | int &SPAdj); |
130 | // Frame indices in debug values are encoded in a target independent |
131 | // way with simply the frame index and offset rather than any |
132 | // target-specific addressing mode. |
133 | bool replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI, |
134 | unsigned OpIdx, int SPAdj = 0); |
135 | // Does same as replaceFrameIndices but using the backward MIR walk and |
136 | // backward register scavenger walk. |
137 | void replaceFrameIndicesBackward(MachineFunction &MF); |
138 | void replaceFrameIndicesBackward(MachineBasicBlock *BB, MachineFunction &MF, |
139 | int &SPAdj); |
140 | |
141 | void insertPrologEpilogCode(MachineFunction &MF); |
142 | void insertZeroCallUsedRegs(MachineFunction &MF); |
143 | }; |
144 | |
145 | } // end anonymous namespace |
146 | |
147 | char PEI::ID = 0; |
148 | |
149 | char &llvm::PrologEpilogCodeInserterID = PEI::ID; |
150 | |
151 | INITIALIZE_PASS_BEGIN(PEI, DEBUG_TYPE, "Prologue/Epilogue Insertion" , false, |
152 | false) |
153 | INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) |
154 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) |
155 | INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) |
156 | INITIALIZE_PASS_END(PEI, DEBUG_TYPE, |
157 | "Prologue/Epilogue Insertion & Frame Finalization" , false, |
158 | false) |
159 | |
160 | MachineFunctionPass *llvm::createPrologEpilogInserterPass() { |
161 | return new PEI(); |
162 | } |
163 | |
164 | STATISTIC(NumBytesStackSpace, |
165 | "Number of bytes used for stack in all functions" ); |
166 | |
167 | void PEI::getAnalysisUsage(AnalysisUsage &AU) const { |
168 | AU.setPreservesCFG(); |
169 | AU.addPreserved<MachineLoopInfo>(); |
170 | AU.addPreserved<MachineDominatorTree>(); |
171 | AU.addRequired<MachineOptimizationRemarkEmitterPass>(); |
172 | MachineFunctionPass::getAnalysisUsage(AU); |
173 | } |
174 | |
175 | /// StackObjSet - A set of stack object indexes |
176 | using StackObjSet = SmallSetVector<int, 8>; |
177 | |
178 | using SavedDbgValuesMap = |
179 | SmallDenseMap<MachineBasicBlock *, SmallVector<MachineInstr *, 4>, 4>; |
180 | |
181 | /// Stash DBG_VALUEs that describe parameters and which are placed at the start |
182 | /// of the block. Later on, after the prologue code has been emitted, the |
183 | /// stashed DBG_VALUEs will be reinserted at the start of the block. |
184 | static void stashEntryDbgValues(MachineBasicBlock &MBB, |
185 | SavedDbgValuesMap &EntryDbgValues) { |
186 | SmallVector<const MachineInstr *, 4> FrameIndexValues; |
187 | |
188 | for (auto &MI : MBB) { |
189 | if (!MI.isDebugInstr()) |
190 | break; |
191 | if (!MI.isDebugValue() || !MI.getDebugVariable()->isParameter()) |
192 | continue; |
193 | if (any_of(Range: MI.debug_operands(), |
194 | P: [](const MachineOperand &MO) { return MO.isFI(); })) { |
195 | // We can only emit valid locations for frame indices after the frame |
196 | // setup, so do not stash away them. |
197 | FrameIndexValues.push_back(Elt: &MI); |
198 | continue; |
199 | } |
200 | const DILocalVariable *Var = MI.getDebugVariable(); |
201 | const DIExpression *Expr = MI.getDebugExpression(); |
202 | auto Overlaps = [Var, Expr](const MachineInstr *DV) { |
203 | return Var == DV->getDebugVariable() && |
204 | Expr->fragmentsOverlap(Other: DV->getDebugExpression()); |
205 | }; |
206 | // See if the debug value overlaps with any preceding debug value that will |
207 | // not be stashed. If that is the case, then we can't stash this value, as |
208 | // we would then reorder the values at reinsertion. |
209 | if (llvm::none_of(Range&: FrameIndexValues, P: Overlaps)) |
210 | EntryDbgValues[&MBB].push_back(Elt: &MI); |
211 | } |
212 | |
213 | // Remove stashed debug values from the block. |
214 | if (EntryDbgValues.count(Val: &MBB)) |
215 | for (auto *MI : EntryDbgValues[&MBB]) |
216 | MI->removeFromParent(); |
217 | } |
218 | |
219 | /// runOnMachineFunction - Insert prolog/epilog code and replace abstract |
220 | /// frame indexes with appropriate references. |
221 | bool PEI::runOnMachineFunction(MachineFunction &MF) { |
222 | NumFuncSeen++; |
223 | const Function &F = MF.getFunction(); |
224 | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); |
225 | const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); |
226 | |
227 | RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr; |
228 | FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(MF); |
229 | ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); |
230 | |
231 | // Calculate the MaxCallFrameSize value for the function's frame |
232 | // information. Also eliminates call frame pseudo instructions. |
233 | calculateCallFrameInfo(MF); |
234 | |
235 | // Determine placement of CSR spill/restore code and prolog/epilog code: |
236 | // place all spills in the entry block, all restores in return blocks. |
237 | calculateSaveRestoreBlocks(MF); |
238 | |
239 | // Stash away DBG_VALUEs that should not be moved by insertion of prolog code. |
240 | SavedDbgValuesMap EntryDbgValues; |
241 | for (MachineBasicBlock *SaveBlock : SaveBlocks) |
242 | stashEntryDbgValues(MBB&: *SaveBlock, EntryDbgValues); |
243 | |
244 | // Handle CSR spilling and restoring, for targets that need it. |
245 | if (MF.getTarget().usesPhysRegsForValues()) |
246 | spillCalleeSavedRegs(MF); |
247 | |
248 | // Allow the target machine to make final modifications to the function |
249 | // before the frame layout is finalized. |
250 | TFI->processFunctionBeforeFrameFinalized(MF, RS); |
251 | |
252 | // Calculate actual frame offsets for all abstract stack objects... |
253 | calculateFrameObjectOffsets(MF); |
254 | |
255 | // Add prolog and epilog code to the function. This function is required |
256 | // to align the stack frame as necessary for any stack variables or |
257 | // called functions. Because of this, calculateCalleeSavedRegisters() |
258 | // must be called before this function in order to set the AdjustsStack |
259 | // and MaxCallFrameSize variables. |
260 | if (!F.hasFnAttribute(Attribute::Naked)) |
261 | insertPrologEpilogCode(MF); |
262 | |
263 | // Reinsert stashed debug values at the start of the entry blocks. |
264 | for (auto &I : EntryDbgValues) |
265 | I.first->insert(I: I.first->begin(), S: I.second.begin(), E: I.second.end()); |
266 | |
267 | // Allow the target machine to make final modifications to the function |
268 | // before the frame layout is finalized. |
269 | TFI->processFunctionBeforeFrameIndicesReplaced(MF, RS); |
270 | |
271 | // Replace all MO_FrameIndex operands with physical register references |
272 | // and actual offsets. |
273 | if (TFI->needsFrameIndexResolution(MF)) { |
274 | // Allow the target to determine this after knowing the frame size. |
275 | FrameIndexEliminationScavenging = |
276 | (RS && !FrameIndexVirtualScavenging) || |
277 | TRI->requiresFrameIndexReplacementScavenging(MF); |
278 | |
279 | if (TRI->eliminateFrameIndicesBackwards()) |
280 | replaceFrameIndicesBackward(MF); |
281 | else |
282 | replaceFrameIndices(MF); |
283 | } |
284 | |
285 | // If register scavenging is needed, as we've enabled doing it as a |
286 | // post-pass, scavenge the virtual registers that frame index elimination |
287 | // inserted. |
288 | if (TRI->requiresRegisterScavenging(MF) && FrameIndexVirtualScavenging) |
289 | scavengeFrameVirtualRegs(MF, RS&: *RS); |
290 | |
291 | // Warn on stack size when we exceeds the given limit. |
292 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
293 | uint64_t StackSize = MFI.getStackSize(); |
294 | |
295 | uint64_t Threshold = TFI->getStackThreshold(); |
296 | if (MF.getFunction().hasFnAttribute(Kind: "warn-stack-size" )) { |
297 | bool Failed = MF.getFunction() |
298 | .getFnAttribute(Kind: "warn-stack-size" ) |
299 | .getValueAsString() |
300 | .getAsInteger(Radix: 10, Result&: Threshold); |
301 | // Verifier should have caught this. |
302 | assert(!Failed && "Invalid warn-stack-size fn attr value" ); |
303 | (void)Failed; |
304 | } |
305 | uint64_t UnsafeStackSize = MFI.getUnsafeStackSize(); |
306 | if (MF.getFunction().hasFnAttribute(Attribute::SafeStack)) |
307 | StackSize += UnsafeStackSize; |
308 | |
309 | if (StackSize > Threshold) { |
310 | DiagnosticInfoStackSize DiagStackSize(F, StackSize, Threshold, DS_Warning); |
311 | F.getContext().diagnose(DI: DiagStackSize); |
312 | int64_t SpillSize = 0; |
313 | for (int Idx = MFI.getObjectIndexBegin(), End = MFI.getObjectIndexEnd(); |
314 | Idx != End; ++Idx) { |
315 | if (MFI.isSpillSlotObjectIndex(ObjectIdx: Idx)) |
316 | SpillSize += MFI.getObjectSize(ObjectIdx: Idx); |
317 | } |
318 | |
319 | [[maybe_unused]] float SpillPct = |
320 | static_cast<float>(SpillSize) / static_cast<float>(StackSize); |
321 | LLVM_DEBUG( |
322 | dbgs() << formatv("{0}/{1} ({3:P}) spills, {2}/{1} ({4:P}) variables" , |
323 | SpillSize, StackSize, StackSize - SpillSize, SpillPct, |
324 | 1.0f - SpillPct)); |
325 | if (UnsafeStackSize != 0) { |
326 | LLVM_DEBUG(dbgs() << formatv(", {0}/{2} ({1:P}) unsafe stack" , |
327 | UnsafeStackSize, |
328 | static_cast<float>(UnsafeStackSize) / |
329 | static_cast<float>(StackSize), |
330 | StackSize)); |
331 | } |
332 | LLVM_DEBUG(dbgs() << "\n" ); |
333 | } |
334 | |
335 | ORE->emit(RemarkBuilder: [&]() { |
336 | return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "StackSize" , |
337 | MF.getFunction().getSubprogram(), |
338 | &MF.front()) |
339 | << ore::NV("NumStackBytes" , StackSize) |
340 | << " stack bytes in function '" |
341 | << ore::NV("Function" , MF.getFunction().getName()) << "'" ; |
342 | }); |
343 | |
344 | delete RS; |
345 | SaveBlocks.clear(); |
346 | RestoreBlocks.clear(); |
347 | MFI.setSavePoint(nullptr); |
348 | MFI.setRestorePoint(nullptr); |
349 | return true; |
350 | } |
351 | |
352 | /// Calculate the MaxCallFrameSize variable for the function's frame |
353 | /// information and eliminate call frame pseudo instructions. |
354 | void PEI::calculateCallFrameInfo(MachineFunction &MF) { |
355 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
356 | const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); |
357 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
358 | |
359 | // Get the function call frame set-up and tear-down instruction opcode |
360 | unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode(); |
361 | unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); |
362 | |
363 | // Early exit for targets which have no call frame setup/destroy pseudo |
364 | // instructions. |
365 | if (FrameSetupOpcode == ~0u && FrameDestroyOpcode == ~0u) |
366 | return; |
367 | |
368 | // (Re-)Compute the MaxCallFrameSize. |
369 | [[maybe_unused]] uint32_t MaxCFSIn = |
370 | MFI.isMaxCallFrameSizeComputed() ? MFI.getMaxCallFrameSize() : UINT32_MAX; |
371 | std::vector<MachineBasicBlock::iterator> FrameSDOps; |
372 | MFI.computeMaxCallFrameSize(MF, FrameSDOps: &FrameSDOps); |
373 | assert(MFI.getMaxCallFrameSize() <= MaxCFSIn && |
374 | "Recomputing MaxCFS gave a larger value." ); |
375 | assert((FrameSDOps.empty() || MF.getFrameInfo().adjustsStack()) && |
376 | "AdjustsStack not set in presence of a frame pseudo instruction." ); |
377 | |
378 | if (TFI->canSimplifyCallFramePseudos(MF)) { |
379 | // If call frames are not being included as part of the stack frame, and |
380 | // the target doesn't indicate otherwise, remove the call frame pseudos |
381 | // here. The sub/add sp instruction pairs are still inserted, but we don't |
382 | // need to track the SP adjustment for frame index elimination. |
383 | for (MachineBasicBlock::iterator I : FrameSDOps) |
384 | TFI->eliminateCallFramePseudoInstr(MF, MBB&: *I->getParent(), MI: I); |
385 | |
386 | // We can't track the call frame size after call frame pseudos have been |
387 | // eliminated. Set it to zero everywhere to keep MachineVerifier happy. |
388 | for (MachineBasicBlock &MBB : MF) |
389 | MBB.setCallFrameSize(0); |
390 | } |
391 | } |
392 | |
393 | /// Compute the sets of entry and return blocks for saving and restoring |
394 | /// callee-saved registers, and placing prolog and epilog code. |
395 | void PEI::calculateSaveRestoreBlocks(MachineFunction &MF) { |
396 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
397 | |
398 | // Even when we do not change any CSR, we still want to insert the |
399 | // prologue and epilogue of the function. |
400 | // So set the save points for those. |
401 | |
402 | // Use the points found by shrink-wrapping, if any. |
403 | if (MFI.getSavePoint()) { |
404 | SaveBlocks.push_back(Elt: MFI.getSavePoint()); |
405 | assert(MFI.getRestorePoint() && "Both restore and save must be set" ); |
406 | MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); |
407 | // If RestoreBlock does not have any successor and is not a return block |
408 | // then the end point is unreachable and we do not need to insert any |
409 | // epilogue. |
410 | if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) |
411 | RestoreBlocks.push_back(Elt: RestoreBlock); |
412 | return; |
413 | } |
414 | |
415 | // Save refs to entry and return blocks. |
416 | SaveBlocks.push_back(Elt: &MF.front()); |
417 | for (MachineBasicBlock &MBB : MF) { |
418 | if (MBB.isEHFuncletEntry()) |
419 | SaveBlocks.push_back(Elt: &MBB); |
420 | if (MBB.isReturnBlock()) |
421 | RestoreBlocks.push_back(Elt: &MBB); |
422 | } |
423 | } |
424 | |
425 | static void assignCalleeSavedSpillSlots(MachineFunction &F, |
426 | const BitVector &SavedRegs, |
427 | unsigned &MinCSFrameIndex, |
428 | unsigned &MaxCSFrameIndex) { |
429 | if (SavedRegs.empty()) |
430 | return; |
431 | |
432 | const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo(); |
433 | const MCPhysReg *CSRegs = F.getRegInfo().getCalleeSavedRegs(); |
434 | BitVector CSMask(SavedRegs.size()); |
435 | |
436 | for (unsigned i = 0; CSRegs[i]; ++i) |
437 | CSMask.set(CSRegs[i]); |
438 | |
439 | std::vector<CalleeSavedInfo> CSI; |
440 | for (unsigned i = 0; CSRegs[i]; ++i) { |
441 | unsigned Reg = CSRegs[i]; |
442 | if (SavedRegs.test(Idx: Reg)) { |
443 | bool SavedSuper = false; |
444 | for (const MCPhysReg &SuperReg : RegInfo->superregs(Reg)) { |
445 | // Some backends set all aliases for some registers as saved, such as |
446 | // Mips's $fp, so they appear in SavedRegs but not CSRegs. |
447 | if (SavedRegs.test(Idx: SuperReg) && CSMask.test(Idx: SuperReg)) { |
448 | SavedSuper = true; |
449 | break; |
450 | } |
451 | } |
452 | |
453 | if (!SavedSuper) |
454 | CSI.push_back(x: CalleeSavedInfo(Reg)); |
455 | } |
456 | } |
457 | |
458 | const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering(); |
459 | MachineFrameInfo &MFI = F.getFrameInfo(); |
460 | if (!TFI->assignCalleeSavedSpillSlots(MF&: F, TRI: RegInfo, CSI, MinCSFrameIndex, |
461 | MaxCSFrameIndex)) { |
462 | // If target doesn't implement this, use generic code. |
463 | |
464 | if (CSI.empty()) |
465 | return; // Early exit if no callee saved registers are modified! |
466 | |
467 | unsigned NumFixedSpillSlots; |
468 | const TargetFrameLowering::SpillSlot *FixedSpillSlots = |
469 | TFI->getCalleeSavedSpillSlots(NumEntries&: NumFixedSpillSlots); |
470 | |
471 | // Now that we know which registers need to be saved and restored, allocate |
472 | // stack slots for them. |
473 | for (auto &CS : CSI) { |
474 | // If the target has spilled this register to another register, we don't |
475 | // need to allocate a stack slot. |
476 | if (CS.isSpilledToReg()) |
477 | continue; |
478 | |
479 | unsigned Reg = CS.getReg(); |
480 | const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); |
481 | |
482 | int FrameIdx; |
483 | if (RegInfo->hasReservedSpillSlot(MF: F, Reg, FrameIdx)) { |
484 | CS.setFrameIdx(FrameIdx); |
485 | continue; |
486 | } |
487 | |
488 | // Check to see if this physreg must be spilled to a particular stack slot |
489 | // on this target. |
490 | const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots; |
491 | while (FixedSlot != FixedSpillSlots + NumFixedSpillSlots && |
492 | FixedSlot->Reg != Reg) |
493 | ++FixedSlot; |
494 | |
495 | unsigned Size = RegInfo->getSpillSize(RC: *RC); |
496 | if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { |
497 | // Nope, just spill it anywhere convenient. |
498 | Align Alignment = RegInfo->getSpillAlign(RC: *RC); |
499 | // We may not be able to satisfy the desired alignment specification of |
500 | // the TargetRegisterClass if the stack alignment is smaller. Use the |
501 | // min. |
502 | Alignment = std::min(a: Alignment, b: TFI->getStackAlign()); |
503 | FrameIdx = MFI.CreateStackObject(Size, Alignment, isSpillSlot: true); |
504 | if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; |
505 | if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; |
506 | } else { |
507 | // Spill it to the stack where we must. |
508 | FrameIdx = MFI.CreateFixedSpillStackObject(Size, SPOffset: FixedSlot->Offset); |
509 | } |
510 | |
511 | CS.setFrameIdx(FrameIdx); |
512 | } |
513 | } |
514 | |
515 | MFI.setCalleeSavedInfo(CSI); |
516 | } |
517 | |
518 | /// Helper function to update the liveness information for the callee-saved |
519 | /// registers. |
520 | static void updateLiveness(MachineFunction &MF) { |
521 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
522 | // Visited will contain all the basic blocks that are in the region |
523 | // where the callee saved registers are alive: |
524 | // - Anything that is not Save or Restore -> LiveThrough. |
525 | // - Save -> LiveIn. |
526 | // - Restore -> LiveOut. |
527 | // The live-out is not attached to the block, so no need to keep |
528 | // Restore in this set. |
529 | SmallPtrSet<MachineBasicBlock *, 8> Visited; |
530 | SmallVector<MachineBasicBlock *, 8> WorkList; |
531 | MachineBasicBlock *Entry = &MF.front(); |
532 | MachineBasicBlock *Save = MFI.getSavePoint(); |
533 | |
534 | if (!Save) |
535 | Save = Entry; |
536 | |
537 | if (Entry != Save) { |
538 | WorkList.push_back(Elt: Entry); |
539 | Visited.insert(Ptr: Entry); |
540 | } |
541 | Visited.insert(Ptr: Save); |
542 | |
543 | MachineBasicBlock *Restore = MFI.getRestorePoint(); |
544 | if (Restore) |
545 | // By construction Restore cannot be visited, otherwise it |
546 | // means there exists a path to Restore that does not go |
547 | // through Save. |
548 | WorkList.push_back(Elt: Restore); |
549 | |
550 | while (!WorkList.empty()) { |
551 | const MachineBasicBlock *CurBB = WorkList.pop_back_val(); |
552 | // By construction, the region that is after the save point is |
553 | // dominated by the Save and post-dominated by the Restore. |
554 | if (CurBB == Save && Save != Restore) |
555 | continue; |
556 | // Enqueue all the successors not already visited. |
557 | // Those are by construction either before Save or after Restore. |
558 | for (MachineBasicBlock *SuccBB : CurBB->successors()) |
559 | if (Visited.insert(Ptr: SuccBB).second) |
560 | WorkList.push_back(Elt: SuccBB); |
561 | } |
562 | |
563 | const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); |
564 | |
565 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
566 | for (const CalleeSavedInfo &I : CSI) { |
567 | for (MachineBasicBlock *MBB : Visited) { |
568 | MCPhysReg Reg = I.getReg(); |
569 | // Add the callee-saved register as live-in. |
570 | // It's killed at the spill. |
571 | if (!MRI.isReserved(PhysReg: Reg) && !MBB->isLiveIn(Reg)) |
572 | MBB->addLiveIn(PhysReg: Reg); |
573 | } |
574 | // If callee-saved register is spilled to another register rather than |
575 | // spilling to stack, the destination register has to be marked as live for |
576 | // each MBB between the prologue and epilogue so that it is not clobbered |
577 | // before it is reloaded in the epilogue. The Visited set contains all |
578 | // blocks outside of the region delimited by prologue/epilogue. |
579 | if (I.isSpilledToReg()) { |
580 | for (MachineBasicBlock &MBB : MF) { |
581 | if (Visited.count(Ptr: &MBB)) |
582 | continue; |
583 | MCPhysReg DstReg = I.getDstReg(); |
584 | if (!MBB.isLiveIn(Reg: DstReg)) |
585 | MBB.addLiveIn(PhysReg: DstReg); |
586 | } |
587 | } |
588 | } |
589 | } |
590 | |
591 | /// Insert spill code for the callee-saved registers used in the function. |
592 | static void insertCSRSaves(MachineBasicBlock &SaveBlock, |
593 | ArrayRef<CalleeSavedInfo> CSI) { |
594 | MachineFunction &MF = *SaveBlock.getParent(); |
595 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
596 | const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); |
597 | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); |
598 | |
599 | MachineBasicBlock::iterator I = SaveBlock.begin(); |
600 | if (!TFI->spillCalleeSavedRegisters(MBB&: SaveBlock, MI: I, CSI, TRI)) { |
601 | for (const CalleeSavedInfo &CS : CSI) { |
602 | // Insert the spill to the stack frame. |
603 | unsigned Reg = CS.getReg(); |
604 | |
605 | if (CS.isSpilledToReg()) { |
606 | BuildMI(BB&: SaveBlock, I, MIMD: DebugLoc(), |
607 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: CS.getDstReg()) |
608 | .addReg(RegNo: Reg, flags: getKillRegState(B: true)); |
609 | } else { |
610 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); |
611 | TII.storeRegToStackSlot(MBB&: SaveBlock, MI: I, SrcReg: Reg, isKill: true, FrameIndex: CS.getFrameIdx(), RC, |
612 | TRI, VReg: Register()); |
613 | } |
614 | } |
615 | } |
616 | } |
617 | |
618 | /// Insert restore code for the callee-saved registers used in the function. |
619 | static void insertCSRRestores(MachineBasicBlock &RestoreBlock, |
620 | std::vector<CalleeSavedInfo> &CSI) { |
621 | MachineFunction &MF = *RestoreBlock.getParent(); |
622 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
623 | const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); |
624 | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); |
625 | |
626 | // Restore all registers immediately before the return and any |
627 | // terminators that precede it. |
628 | MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator(); |
629 | |
630 | if (!TFI->restoreCalleeSavedRegisters(MBB&: RestoreBlock, MI: I, CSI, TRI)) { |
631 | for (const CalleeSavedInfo &CI : reverse(C&: CSI)) { |
632 | unsigned Reg = CI.getReg(); |
633 | if (CI.isSpilledToReg()) { |
634 | BuildMI(BB&: RestoreBlock, I, MIMD: DebugLoc(), MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: Reg) |
635 | .addReg(RegNo: CI.getDstReg(), flags: getKillRegState(B: true)); |
636 | } else { |
637 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); |
638 | TII.loadRegFromStackSlot(MBB&: RestoreBlock, MI: I, DestReg: Reg, FrameIndex: CI.getFrameIdx(), RC, |
639 | TRI, VReg: Register()); |
640 | assert(I != RestoreBlock.begin() && |
641 | "loadRegFromStackSlot didn't insert any code!" ); |
642 | // Insert in reverse order. loadRegFromStackSlot can insert |
643 | // multiple instructions. |
644 | } |
645 | } |
646 | } |
647 | } |
648 | |
649 | void PEI::spillCalleeSavedRegs(MachineFunction &MF) { |
650 | // We can't list this requirement in getRequiredProperties because some |
651 | // targets (WebAssembly) use virtual registers past this point, and the pass |
652 | // pipeline is set up without giving the passes a chance to look at the |
653 | // TargetMachine. |
654 | // FIXME: Find a way to express this in getRequiredProperties. |
655 | assert(MF.getProperties().hasProperty( |
656 | MachineFunctionProperties::Property::NoVRegs)); |
657 | |
658 | const Function &F = MF.getFunction(); |
659 | const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); |
660 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
661 | MinCSFrameIndex = std::numeric_limits<unsigned>::max(); |
662 | MaxCSFrameIndex = 0; |
663 | |
664 | // Determine which of the registers in the callee save list should be saved. |
665 | BitVector SavedRegs; |
666 | TFI->determineCalleeSaves(MF, SavedRegs, RS); |
667 | |
668 | // Assign stack slots for any callee-saved registers that must be spilled. |
669 | assignCalleeSavedSpillSlots(F&: MF, SavedRegs, MinCSFrameIndex, MaxCSFrameIndex); |
670 | |
671 | // Add the code to save and restore the callee saved registers. |
672 | if (!F.hasFnAttribute(Attribute::Naked)) { |
673 | MFI.setCalleeSavedInfoValid(true); |
674 | |
675 | std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); |
676 | if (!CSI.empty()) { |
677 | if (!MFI.hasCalls()) |
678 | NumLeafFuncWithSpills++; |
679 | |
680 | for (MachineBasicBlock *SaveBlock : SaveBlocks) |
681 | insertCSRSaves(SaveBlock&: *SaveBlock, CSI); |
682 | |
683 | // Update the live-in information of all the blocks up to the save point. |
684 | updateLiveness(MF); |
685 | |
686 | for (MachineBasicBlock *RestoreBlock : RestoreBlocks) |
687 | insertCSRRestores(RestoreBlock&: *RestoreBlock, CSI); |
688 | } |
689 | } |
690 | } |
691 | |
692 | /// AdjustStackOffset - Helper function used to adjust the stack frame offset. |
693 | static inline void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, |
694 | bool StackGrowsDown, int64_t &Offset, |
695 | Align &MaxAlign) { |
696 | // If the stack grows down, add the object size to find the lowest address. |
697 | if (StackGrowsDown) |
698 | Offset += MFI.getObjectSize(ObjectIdx: FrameIdx); |
699 | |
700 | Align Alignment = MFI.getObjectAlign(ObjectIdx: FrameIdx); |
701 | |
702 | // If the alignment of this object is greater than that of the stack, then |
703 | // increase the stack alignment to match. |
704 | MaxAlign = std::max(a: MaxAlign, b: Alignment); |
705 | |
706 | // Adjust to alignment boundary. |
707 | Offset = alignTo(Size: Offset, A: Alignment); |
708 | |
709 | if (StackGrowsDown) { |
710 | LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset |
711 | << "]\n" ); |
712 | MFI.setObjectOffset(ObjectIdx: FrameIdx, SPOffset: -Offset); // Set the computed offset |
713 | } else { |
714 | LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset |
715 | << "]\n" ); |
716 | MFI.setObjectOffset(ObjectIdx: FrameIdx, SPOffset: Offset); |
717 | Offset += MFI.getObjectSize(ObjectIdx: FrameIdx); |
718 | } |
719 | } |
720 | |
721 | /// Compute which bytes of fixed and callee-save stack area are unused and keep |
722 | /// track of them in StackBytesFree. |
723 | static inline void |
724 | computeFreeStackSlots(MachineFrameInfo &MFI, bool StackGrowsDown, |
725 | unsigned MinCSFrameIndex, unsigned MaxCSFrameIndex, |
726 | int64_t FixedCSEnd, BitVector &StackBytesFree) { |
727 | // Avoid undefined int64_t -> int conversion below in extreme case. |
728 | if (FixedCSEnd > std::numeric_limits<int>::max()) |
729 | return; |
730 | |
731 | StackBytesFree.resize(N: FixedCSEnd, t: true); |
732 | |
733 | SmallVector<int, 16> AllocatedFrameSlots; |
734 | // Add fixed objects. |
735 | for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) |
736 | // StackSlot scavenging is only implemented for the default stack. |
737 | if (MFI.getStackID(ObjectIdx: i) == TargetStackID::Default) |
738 | AllocatedFrameSlots.push_back(Elt: i); |
739 | // Add callee-save objects if there are any. |
740 | if (MinCSFrameIndex <= MaxCSFrameIndex) { |
741 | for (int i = MinCSFrameIndex; i <= (int)MaxCSFrameIndex; ++i) |
742 | if (MFI.getStackID(ObjectIdx: i) == TargetStackID::Default) |
743 | AllocatedFrameSlots.push_back(Elt: i); |
744 | } |
745 | |
746 | for (int i : AllocatedFrameSlots) { |
747 | // These are converted from int64_t, but they should always fit in int |
748 | // because of the FixedCSEnd check above. |
749 | int ObjOffset = MFI.getObjectOffset(ObjectIdx: i); |
750 | int ObjSize = MFI.getObjectSize(ObjectIdx: i); |
751 | int ObjStart, ObjEnd; |
752 | if (StackGrowsDown) { |
753 | // ObjOffset is negative when StackGrowsDown is true. |
754 | ObjStart = -ObjOffset - ObjSize; |
755 | ObjEnd = -ObjOffset; |
756 | } else { |
757 | ObjStart = ObjOffset; |
758 | ObjEnd = ObjOffset + ObjSize; |
759 | } |
760 | // Ignore fixed holes that are in the previous stack frame. |
761 | if (ObjEnd > 0) |
762 | StackBytesFree.reset(I: ObjStart, E: ObjEnd); |
763 | } |
764 | } |
765 | |
766 | /// Assign frame object to an unused portion of the stack in the fixed stack |
767 | /// object range. Return true if the allocation was successful. |
768 | static inline bool scavengeStackSlot(MachineFrameInfo &MFI, int FrameIdx, |
769 | bool StackGrowsDown, Align MaxAlign, |
770 | BitVector &StackBytesFree) { |
771 | if (MFI.isVariableSizedObjectIndex(ObjectIdx: FrameIdx)) |
772 | return false; |
773 | |
774 | if (StackBytesFree.none()) { |
775 | // clear it to speed up later scavengeStackSlot calls to |
776 | // StackBytesFree.none() |
777 | StackBytesFree.clear(); |
778 | return false; |
779 | } |
780 | |
781 | Align ObjAlign = MFI.getObjectAlign(ObjectIdx: FrameIdx); |
782 | if (ObjAlign > MaxAlign) |
783 | return false; |
784 | |
785 | int64_t ObjSize = MFI.getObjectSize(ObjectIdx: FrameIdx); |
786 | int FreeStart; |
787 | for (FreeStart = StackBytesFree.find_first(); FreeStart != -1; |
788 | FreeStart = StackBytesFree.find_next(Prev: FreeStart)) { |
789 | |
790 | // Check that free space has suitable alignment. |
791 | unsigned ObjStart = StackGrowsDown ? FreeStart + ObjSize : FreeStart; |
792 | if (alignTo(Size: ObjStart, A: ObjAlign) != ObjStart) |
793 | continue; |
794 | |
795 | if (FreeStart + ObjSize > StackBytesFree.size()) |
796 | return false; |
797 | |
798 | bool AllBytesFree = true; |
799 | for (unsigned Byte = 0; Byte < ObjSize; ++Byte) |
800 | if (!StackBytesFree.test(Idx: FreeStart + Byte)) { |
801 | AllBytesFree = false; |
802 | break; |
803 | } |
804 | if (AllBytesFree) |
805 | break; |
806 | } |
807 | |
808 | if (FreeStart == -1) |
809 | return false; |
810 | |
811 | if (StackGrowsDown) { |
812 | int ObjStart = -(FreeStart + ObjSize); |
813 | LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" |
814 | << ObjStart << "]\n" ); |
815 | MFI.setObjectOffset(ObjectIdx: FrameIdx, SPOffset: ObjStart); |
816 | } else { |
817 | LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" |
818 | << FreeStart << "]\n" ); |
819 | MFI.setObjectOffset(ObjectIdx: FrameIdx, SPOffset: FreeStart); |
820 | } |
821 | |
822 | StackBytesFree.reset(I: FreeStart, E: FreeStart + ObjSize); |
823 | return true; |
824 | } |
825 | |
826 | /// AssignProtectedObjSet - Helper function to assign large stack objects (i.e., |
827 | /// those required to be close to the Stack Protector) to stack offsets. |
828 | static void AssignProtectedObjSet(const StackObjSet &UnassignedObjs, |
829 | SmallSet<int, 16> &ProtectedObjs, |
830 | MachineFrameInfo &MFI, bool StackGrowsDown, |
831 | int64_t &Offset, Align &MaxAlign) { |
832 | |
833 | for (int i : UnassignedObjs) { |
834 | AdjustStackOffset(MFI, FrameIdx: i, StackGrowsDown, Offset, MaxAlign); |
835 | ProtectedObjs.insert(V: i); |
836 | } |
837 | } |
838 | |
839 | /// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the |
840 | /// abstract stack objects. |
841 | void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { |
842 | const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); |
843 | |
844 | bool StackGrowsDown = |
845 | TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; |
846 | |
847 | // Loop over all of the stack objects, assigning sequential addresses... |
848 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
849 | |
850 | // Start at the beginning of the local area. |
851 | // The Offset is the distance from the stack top in the direction |
852 | // of stack growth -- so it's always nonnegative. |
853 | int LocalAreaOffset = TFI.getOffsetOfLocalArea(); |
854 | if (StackGrowsDown) |
855 | LocalAreaOffset = -LocalAreaOffset; |
856 | assert(LocalAreaOffset >= 0 |
857 | && "Local area offset should be in direction of stack growth" ); |
858 | int64_t Offset = LocalAreaOffset; |
859 | |
860 | #ifdef EXPENSIVE_CHECKS |
861 | for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) |
862 | if (!MFI.isDeadObjectIndex(i) && |
863 | MFI.getStackID(i) == TargetStackID::Default) |
864 | assert(MFI.getObjectAlign(i) <= MFI.getMaxAlign() && |
865 | "MaxAlignment is invalid" ); |
866 | #endif |
867 | |
868 | // If there are fixed sized objects that are preallocated in the local area, |
869 | // non-fixed objects can't be allocated right at the start of local area. |
870 | // Adjust 'Offset' to point to the end of last fixed sized preallocated |
871 | // object. |
872 | for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) { |
873 | // Only allocate objects on the default stack. |
874 | if (MFI.getStackID(ObjectIdx: i) != TargetStackID::Default) |
875 | continue; |
876 | |
877 | int64_t FixedOff; |
878 | if (StackGrowsDown) { |
879 | // The maximum distance from the stack pointer is at lower address of |
880 | // the object -- which is given by offset. For down growing stack |
881 | // the offset is negative, so we negate the offset to get the distance. |
882 | FixedOff = -MFI.getObjectOffset(ObjectIdx: i); |
883 | } else { |
884 | // The maximum distance from the start pointer is at the upper |
885 | // address of the object. |
886 | FixedOff = MFI.getObjectOffset(ObjectIdx: i) + MFI.getObjectSize(ObjectIdx: i); |
887 | } |
888 | if (FixedOff > Offset) Offset = FixedOff; |
889 | } |
890 | |
891 | Align MaxAlign = MFI.getMaxAlign(); |
892 | // First assign frame offsets to stack objects that are used to spill |
893 | // callee saved registers. |
894 | if (MaxCSFrameIndex >= MinCSFrameIndex) { |
895 | for (unsigned i = 0; i <= MaxCSFrameIndex - MinCSFrameIndex; ++i) { |
896 | unsigned FrameIndex = |
897 | StackGrowsDown ? MinCSFrameIndex + i : MaxCSFrameIndex - i; |
898 | |
899 | // Only allocate objects on the default stack. |
900 | if (MFI.getStackID(ObjectIdx: FrameIndex) != TargetStackID::Default) |
901 | continue; |
902 | |
903 | // TODO: should this just be if (MFI.isDeadObjectIndex(FrameIndex)) |
904 | if (!StackGrowsDown && MFI.isDeadObjectIndex(ObjectIdx: FrameIndex)) |
905 | continue; |
906 | |
907 | AdjustStackOffset(MFI, FrameIdx: FrameIndex, StackGrowsDown, Offset, MaxAlign); |
908 | } |
909 | } |
910 | |
911 | assert(MaxAlign == MFI.getMaxAlign() && |
912 | "MFI.getMaxAlign should already account for all callee-saved " |
913 | "registers without a fixed stack slot" ); |
914 | |
915 | // FixedCSEnd is the stack offset to the end of the fixed and callee-save |
916 | // stack area. |
917 | int64_t FixedCSEnd = Offset; |
918 | |
919 | // Make sure the special register scavenging spill slot is closest to the |
920 | // incoming stack pointer if a frame pointer is required and is closer |
921 | // to the incoming rather than the final stack pointer. |
922 | const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); |
923 | bool EarlyScavengingSlots = TFI.allocateScavengingFrameIndexesNearIncomingSP(MF); |
924 | if (RS && EarlyScavengingSlots) { |
925 | SmallVector<int, 2> SFIs; |
926 | RS->getScavengingFrameIndices(A&: SFIs); |
927 | for (int SFI : SFIs) |
928 | AdjustStackOffset(MFI, FrameIdx: SFI, StackGrowsDown, Offset, MaxAlign); |
929 | } |
930 | |
931 | // FIXME: Once this is working, then enable flag will change to a target |
932 | // check for whether the frame is large enough to want to use virtual |
933 | // frame index registers. Functions which don't want/need this optimization |
934 | // will continue to use the existing code path. |
935 | if (MFI.getUseLocalStackAllocationBlock()) { |
936 | Align Alignment = MFI.getLocalFrameMaxAlign(); |
937 | |
938 | // Adjust to alignment boundary. |
939 | Offset = alignTo(Size: Offset, A: Alignment); |
940 | |
941 | LLVM_DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n" ); |
942 | |
943 | // Resolve offsets for objects in the local block. |
944 | for (unsigned i = 0, e = MFI.getLocalFrameObjectCount(); i != e; ++i) { |
945 | std::pair<int, int64_t> Entry = MFI.getLocalFrameObjectMap(i); |
946 | int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second; |
947 | LLVM_DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" << FIOffset |
948 | << "]\n" ); |
949 | MFI.setObjectOffset(ObjectIdx: Entry.first, SPOffset: FIOffset); |
950 | } |
951 | // Allocate the local block |
952 | Offset += MFI.getLocalFrameSize(); |
953 | |
954 | MaxAlign = std::max(a: Alignment, b: MaxAlign); |
955 | } |
956 | |
957 | // Retrieve the Exception Handler registration node. |
958 | int EHRegNodeFrameIndex = std::numeric_limits<int>::max(); |
959 | if (const WinEHFuncInfo *FuncInfo = MF.getWinEHFuncInfo()) |
960 | EHRegNodeFrameIndex = FuncInfo->EHRegNodeFrameIndex; |
961 | |
962 | // Make sure that the stack protector comes before the local variables on the |
963 | // stack. |
964 | SmallSet<int, 16> ProtectedObjs; |
965 | if (MFI.hasStackProtectorIndex()) { |
966 | int StackProtectorFI = MFI.getStackProtectorIndex(); |
967 | StackObjSet LargeArrayObjs; |
968 | StackObjSet SmallArrayObjs; |
969 | StackObjSet AddrOfObjs; |
970 | |
971 | // If we need a stack protector, we need to make sure that |
972 | // LocalStackSlotPass didn't already allocate a slot for it. |
973 | // If we are told to use the LocalStackAllocationBlock, the stack protector |
974 | // is expected to be already pre-allocated. |
975 | if (MFI.getStackID(ObjectIdx: StackProtectorFI) != TargetStackID::Default) { |
976 | // If the stack protector isn't on the default stack then it's up to the |
977 | // target to set the stack offset. |
978 | assert(MFI.getObjectOffset(StackProtectorFI) != 0 && |
979 | "Offset of stack protector on non-default stack expected to be " |
980 | "already set." ); |
981 | assert(!MFI.isObjectPreAllocated(MFI.getStackProtectorIndex()) && |
982 | "Stack protector on non-default stack expected to not be " |
983 | "pre-allocated by LocalStackSlotPass." ); |
984 | } else if (!MFI.getUseLocalStackAllocationBlock()) { |
985 | AdjustStackOffset(MFI, FrameIdx: StackProtectorFI, StackGrowsDown, Offset, |
986 | MaxAlign); |
987 | } else if (!MFI.isObjectPreAllocated(ObjectIdx: MFI.getStackProtectorIndex())) { |
988 | llvm_unreachable( |
989 | "Stack protector not pre-allocated by LocalStackSlotPass." ); |
990 | } |
991 | |
992 | // Assign large stack objects first. |
993 | for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) { |
994 | if (MFI.isObjectPreAllocated(ObjectIdx: i) && MFI.getUseLocalStackAllocationBlock()) |
995 | continue; |
996 | if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) |
997 | continue; |
998 | if (RS && RS->isScavengingFrameIndex(FI: (int)i)) |
999 | continue; |
1000 | if (MFI.isDeadObjectIndex(ObjectIdx: i)) |
1001 | continue; |
1002 | if (StackProtectorFI == (int)i || EHRegNodeFrameIndex == (int)i) |
1003 | continue; |
1004 | // Only allocate objects on the default stack. |
1005 | if (MFI.getStackID(ObjectIdx: i) != TargetStackID::Default) |
1006 | continue; |
1007 | |
1008 | switch (MFI.getObjectSSPLayout(ObjectIdx: i)) { |
1009 | case MachineFrameInfo::SSPLK_None: |
1010 | continue; |
1011 | case MachineFrameInfo::SSPLK_SmallArray: |
1012 | SmallArrayObjs.insert(X: i); |
1013 | continue; |
1014 | case MachineFrameInfo::SSPLK_AddrOf: |
1015 | AddrOfObjs.insert(X: i); |
1016 | continue; |
1017 | case MachineFrameInfo::SSPLK_LargeArray: |
1018 | LargeArrayObjs.insert(X: i); |
1019 | continue; |
1020 | } |
1021 | llvm_unreachable("Unexpected SSPLayoutKind." ); |
1022 | } |
1023 | |
1024 | // We expect **all** the protected stack objects to be pre-allocated by |
1025 | // LocalStackSlotPass. If it turns out that PEI still has to allocate some |
1026 | // of them, we may end up messing up the expected order of the objects. |
1027 | if (MFI.getUseLocalStackAllocationBlock() && |
1028 | !(LargeArrayObjs.empty() && SmallArrayObjs.empty() && |
1029 | AddrOfObjs.empty())) |
1030 | llvm_unreachable("Found protected stack objects not pre-allocated by " |
1031 | "LocalStackSlotPass." ); |
1032 | |
1033 | AssignProtectedObjSet(UnassignedObjs: LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown, |
1034 | Offset, MaxAlign); |
1035 | AssignProtectedObjSet(UnassignedObjs: SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown, |
1036 | Offset, MaxAlign); |
1037 | AssignProtectedObjSet(UnassignedObjs: AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown, |
1038 | Offset, MaxAlign); |
1039 | } |
1040 | |
1041 | SmallVector<int, 8> ObjectsToAllocate; |
1042 | |
1043 | // Then prepare to assign frame offsets to stack objects that are not used to |
1044 | // spill callee saved registers. |
1045 | for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) { |
1046 | if (MFI.isObjectPreAllocated(ObjectIdx: i) && MFI.getUseLocalStackAllocationBlock()) |
1047 | continue; |
1048 | if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) |
1049 | continue; |
1050 | if (RS && RS->isScavengingFrameIndex(FI: (int)i)) |
1051 | continue; |
1052 | if (MFI.isDeadObjectIndex(ObjectIdx: i)) |
1053 | continue; |
1054 | if (MFI.getStackProtectorIndex() == (int)i || EHRegNodeFrameIndex == (int)i) |
1055 | continue; |
1056 | if (ProtectedObjs.count(V: i)) |
1057 | continue; |
1058 | // Only allocate objects on the default stack. |
1059 | if (MFI.getStackID(ObjectIdx: i) != TargetStackID::Default) |
1060 | continue; |
1061 | |
1062 | // Add the objects that we need to allocate to our working set. |
1063 | ObjectsToAllocate.push_back(Elt: i); |
1064 | } |
1065 | |
1066 | // Allocate the EH registration node first if one is present. |
1067 | if (EHRegNodeFrameIndex != std::numeric_limits<int>::max()) |
1068 | AdjustStackOffset(MFI, FrameIdx: EHRegNodeFrameIndex, StackGrowsDown, Offset, |
1069 | MaxAlign); |
1070 | |
1071 | // Give the targets a chance to order the objects the way they like it. |
1072 | if (MF.getTarget().getOptLevel() != CodeGenOptLevel::None && |
1073 | MF.getTarget().Options.StackSymbolOrdering) |
1074 | TFI.orderFrameObjects(MF, objectsToAllocate&: ObjectsToAllocate); |
1075 | |
1076 | // Keep track of which bytes in the fixed and callee-save range are used so we |
1077 | // can use the holes when allocating later stack objects. Only do this if |
1078 | // stack protector isn't being used and the target requests it and we're |
1079 | // optimizing. |
1080 | BitVector StackBytesFree; |
1081 | if (!ObjectsToAllocate.empty() && |
1082 | MF.getTarget().getOptLevel() != CodeGenOptLevel::None && |
1083 | MFI.getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(MF)) |
1084 | computeFreeStackSlots(MFI, StackGrowsDown, MinCSFrameIndex, MaxCSFrameIndex, |
1085 | FixedCSEnd, StackBytesFree); |
1086 | |
1087 | // Now walk the objects and actually assign base offsets to them. |
1088 | for (auto &Object : ObjectsToAllocate) |
1089 | if (!scavengeStackSlot(MFI, FrameIdx: Object, StackGrowsDown, MaxAlign, |
1090 | StackBytesFree)) |
1091 | AdjustStackOffset(MFI, FrameIdx: Object, StackGrowsDown, Offset, MaxAlign); |
1092 | |
1093 | // Make sure the special register scavenging spill slot is closest to the |
1094 | // stack pointer. |
1095 | if (RS && !EarlyScavengingSlots) { |
1096 | SmallVector<int, 2> SFIs; |
1097 | RS->getScavengingFrameIndices(A&: SFIs); |
1098 | for (int SFI : SFIs) |
1099 | AdjustStackOffset(MFI, FrameIdx: SFI, StackGrowsDown, Offset, MaxAlign); |
1100 | } |
1101 | |
1102 | if (!TFI.targetHandlesStackFrameRounding()) { |
1103 | // If we have reserved argument space for call sites in the function |
1104 | // immediately on entry to the current function, count it as part of the |
1105 | // overall stack size. |
1106 | if (MFI.adjustsStack() && TFI.hasReservedCallFrame(MF)) |
1107 | Offset += MFI.getMaxCallFrameSize(); |
1108 | |
1109 | // Round up the size to a multiple of the alignment. If the function has |
1110 | // any calls or alloca's, align to the target's StackAlignment value to |
1111 | // ensure that the callee's frame or the alloca data is suitably aligned; |
1112 | // otherwise, for leaf functions, align to the TransientStackAlignment |
1113 | // value. |
1114 | Align StackAlign; |
1115 | if (MFI.adjustsStack() || MFI.hasVarSizedObjects() || |
1116 | (RegInfo->hasStackRealignment(MF) && MFI.getObjectIndexEnd() != 0)) |
1117 | StackAlign = TFI.getStackAlign(); |
1118 | else |
1119 | StackAlign = TFI.getTransientStackAlign(); |
1120 | |
1121 | // If the frame pointer is eliminated, all frame offsets will be relative to |
1122 | // SP not FP. Align to MaxAlign so this works. |
1123 | StackAlign = std::max(a: StackAlign, b: MaxAlign); |
1124 | int64_t OffsetBeforeAlignment = Offset; |
1125 | Offset = alignTo(Size: Offset, A: StackAlign); |
1126 | |
1127 | // If we have increased the offset to fulfill the alignment constrants, |
1128 | // then the scavenging spill slots may become harder to reach from the |
1129 | // stack pointer, float them so they stay close. |
1130 | if (StackGrowsDown && OffsetBeforeAlignment != Offset && RS && |
1131 | !EarlyScavengingSlots) { |
1132 | SmallVector<int, 2> SFIs; |
1133 | RS->getScavengingFrameIndices(A&: SFIs); |
1134 | LLVM_DEBUG(if (!SFIs.empty()) llvm::dbgs() |
1135 | << "Adjusting emergency spill slots!\n" ;); |
1136 | int64_t Delta = Offset - OffsetBeforeAlignment; |
1137 | for (int SFI : SFIs) { |
1138 | LLVM_DEBUG(llvm::dbgs() |
1139 | << "Adjusting offset of emergency spill slot #" << SFI |
1140 | << " from " << MFI.getObjectOffset(SFI);); |
1141 | MFI.setObjectOffset(ObjectIdx: SFI, SPOffset: MFI.getObjectOffset(ObjectIdx: SFI) - Delta); |
1142 | LLVM_DEBUG(llvm::dbgs() << " to " << MFI.getObjectOffset(SFI) << "\n" ;); |
1143 | } |
1144 | } |
1145 | } |
1146 | |
1147 | // Update frame info to pretend that this is part of the stack... |
1148 | int64_t StackSize = Offset - LocalAreaOffset; |
1149 | MFI.setStackSize(StackSize); |
1150 | NumBytesStackSpace += StackSize; |
1151 | } |
1152 | |
1153 | /// insertPrologEpilogCode - Scan the function for modified callee saved |
1154 | /// registers, insert spill code for these callee saved registers, then add |
1155 | /// prolog and epilog code to the function. |
1156 | void PEI::insertPrologEpilogCode(MachineFunction &MF) { |
1157 | const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); |
1158 | |
1159 | // Add prologue to the function... |
1160 | for (MachineBasicBlock *SaveBlock : SaveBlocks) |
1161 | TFI.emitPrologue(MF, MBB&: *SaveBlock); |
1162 | |
1163 | // Add epilogue to restore the callee-save registers in each exiting block. |
1164 | for (MachineBasicBlock *RestoreBlock : RestoreBlocks) |
1165 | TFI.emitEpilogue(MF, MBB&: *RestoreBlock); |
1166 | |
1167 | // Zero call used registers before restoring callee-saved registers. |
1168 | insertZeroCallUsedRegs(MF); |
1169 | |
1170 | for (MachineBasicBlock *SaveBlock : SaveBlocks) |
1171 | TFI.inlineStackProbe(MF, PrologueMBB&: *SaveBlock); |
1172 | |
1173 | // Emit additional code that is required to support segmented stacks, if |
1174 | // we've been asked for it. This, when linked with a runtime with support |
1175 | // for segmented stacks (libgcc is one), will result in allocating stack |
1176 | // space in small chunks instead of one large contiguous block. |
1177 | if (MF.shouldSplitStack()) { |
1178 | for (MachineBasicBlock *SaveBlock : SaveBlocks) |
1179 | TFI.adjustForSegmentedStacks(MF, PrologueMBB&: *SaveBlock); |
1180 | } |
1181 | |
1182 | // Emit additional code that is required to explicitly handle the stack in |
1183 | // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The |
1184 | // approach is rather similar to that of Segmented Stacks, but it uses a |
1185 | // different conditional check and another BIF for allocating more stack |
1186 | // space. |
1187 | if (MF.getFunction().getCallingConv() == CallingConv::HiPE) |
1188 | for (MachineBasicBlock *SaveBlock : SaveBlocks) |
1189 | TFI.adjustForHiPEPrologue(MF, PrologueMBB&: *SaveBlock); |
1190 | } |
1191 | |
1192 | /// insertZeroCallUsedRegs - Zero out call used registers. |
1193 | void PEI::insertZeroCallUsedRegs(MachineFunction &MF) { |
1194 | const Function &F = MF.getFunction(); |
1195 | |
1196 | if (!F.hasFnAttribute(Kind: "zero-call-used-regs" )) |
1197 | return; |
1198 | |
1199 | using namespace ZeroCallUsedRegs; |
1200 | |
1201 | ZeroCallUsedRegsKind ZeroRegsKind = |
1202 | StringSwitch<ZeroCallUsedRegsKind>( |
1203 | F.getFnAttribute(Kind: "zero-call-used-regs" ).getValueAsString()) |
1204 | .Case(S: "skip" , Value: ZeroCallUsedRegsKind::Skip) |
1205 | .Case(S: "used-gpr-arg" , Value: ZeroCallUsedRegsKind::UsedGPRArg) |
1206 | .Case(S: "used-gpr" , Value: ZeroCallUsedRegsKind::UsedGPR) |
1207 | .Case(S: "used-arg" , Value: ZeroCallUsedRegsKind::UsedArg) |
1208 | .Case(S: "used" , Value: ZeroCallUsedRegsKind::Used) |
1209 | .Case(S: "all-gpr-arg" , Value: ZeroCallUsedRegsKind::AllGPRArg) |
1210 | .Case(S: "all-gpr" , Value: ZeroCallUsedRegsKind::AllGPR) |
1211 | .Case(S: "all-arg" , Value: ZeroCallUsedRegsKind::AllArg) |
1212 | .Case(S: "all" , Value: ZeroCallUsedRegsKind::All); |
1213 | |
1214 | if (ZeroRegsKind == ZeroCallUsedRegsKind::Skip) |
1215 | return; |
1216 | |
1217 | const bool OnlyGPR = static_cast<unsigned>(ZeroRegsKind) & ONLY_GPR; |
1218 | const bool OnlyUsed = static_cast<unsigned>(ZeroRegsKind) & ONLY_USED; |
1219 | const bool OnlyArg = static_cast<unsigned>(ZeroRegsKind) & ONLY_ARG; |
1220 | |
1221 | const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); |
1222 | const BitVector AllocatableSet(TRI.getAllocatableSet(MF)); |
1223 | |
1224 | // Mark all used registers. |
1225 | BitVector UsedRegs(TRI.getNumRegs()); |
1226 | if (OnlyUsed) |
1227 | for (const MachineBasicBlock &MBB : MF) |
1228 | for (const MachineInstr &MI : MBB) { |
1229 | // skip debug instructions |
1230 | if (MI.isDebugInstr()) |
1231 | continue; |
1232 | |
1233 | for (const MachineOperand &MO : MI.operands()) { |
1234 | if (!MO.isReg()) |
1235 | continue; |
1236 | |
1237 | MCRegister Reg = MO.getReg(); |
1238 | if (AllocatableSet[Reg] && !MO.isImplicit() && |
1239 | (MO.isDef() || MO.isUse())) |
1240 | UsedRegs.set(Reg); |
1241 | } |
1242 | } |
1243 | |
1244 | // Get a list of registers that are used. |
1245 | BitVector LiveIns(TRI.getNumRegs()); |
1246 | for (const MachineBasicBlock::RegisterMaskPair &LI : MF.front().liveins()) |
1247 | LiveIns.set(LI.PhysReg); |
1248 | |
1249 | BitVector RegsToZero(TRI.getNumRegs()); |
1250 | for (MCRegister Reg : AllocatableSet.set_bits()) { |
1251 | // Skip over fixed registers. |
1252 | if (TRI.isFixedRegister(MF, PhysReg: Reg)) |
1253 | continue; |
1254 | |
1255 | // Want only general purpose registers. |
1256 | if (OnlyGPR && !TRI.isGeneralPurposeRegister(MF, PhysReg: Reg)) |
1257 | continue; |
1258 | |
1259 | // Want only used registers. |
1260 | if (OnlyUsed && !UsedRegs[Reg]) |
1261 | continue; |
1262 | |
1263 | // Want only registers used for arguments. |
1264 | if (OnlyArg) { |
1265 | if (OnlyUsed) { |
1266 | if (!LiveIns[Reg]) |
1267 | continue; |
1268 | } else if (!TRI.isArgumentRegister(MF, PhysReg: Reg)) { |
1269 | continue; |
1270 | } |
1271 | } |
1272 | |
1273 | RegsToZero.set(Reg); |
1274 | } |
1275 | |
1276 | // Don't clear registers that are live when leaving the function. |
1277 | for (const MachineBasicBlock &MBB : MF) |
1278 | for (const MachineInstr &MI : MBB.terminators()) { |
1279 | if (!MI.isReturn()) |
1280 | continue; |
1281 | |
1282 | for (const auto &MO : MI.operands()) { |
1283 | if (!MO.isReg()) |
1284 | continue; |
1285 | |
1286 | MCRegister Reg = MO.getReg(); |
1287 | if (!Reg) |
1288 | continue; |
1289 | |
1290 | // This picks up sibling registers (e.q. %al -> %ah). |
1291 | for (MCRegUnit Unit : TRI.regunits(Reg)) |
1292 | RegsToZero.reset(Idx: Unit); |
1293 | |
1294 | for (MCPhysReg SReg : TRI.sub_and_superregs_inclusive(Reg)) |
1295 | RegsToZero.reset(Idx: SReg); |
1296 | } |
1297 | } |
1298 | |
1299 | // Don't need to clear registers that are used/clobbered by terminating |
1300 | // instructions. |
1301 | for (const MachineBasicBlock &MBB : MF) { |
1302 | if (!MBB.isReturnBlock()) |
1303 | continue; |
1304 | |
1305 | MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator(); |
1306 | for (MachineBasicBlock::const_iterator I = MBBI, E = MBB.end(); I != E; |
1307 | ++I) { |
1308 | for (const MachineOperand &MO : I->operands()) { |
1309 | if (!MO.isReg()) |
1310 | continue; |
1311 | |
1312 | MCRegister Reg = MO.getReg(); |
1313 | if (!Reg) |
1314 | continue; |
1315 | |
1316 | for (const MCPhysReg Reg : TRI.sub_and_superregs_inclusive(Reg)) |
1317 | RegsToZero.reset(Idx: Reg); |
1318 | } |
1319 | } |
1320 | } |
1321 | |
1322 | // Don't clear registers that must be preserved. |
1323 | for (const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF: &MF); |
1324 | MCPhysReg CSReg = *CSRegs; ++CSRegs) |
1325 | for (MCRegister Reg : TRI.sub_and_superregs_inclusive(Reg: CSReg)) |
1326 | RegsToZero.reset(Idx: Reg); |
1327 | |
1328 | const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); |
1329 | for (MachineBasicBlock &MBB : MF) |
1330 | if (MBB.isReturnBlock()) |
1331 | TFI.emitZeroCallUsedRegs(RegsToZero, MBB); |
1332 | } |
1333 | |
1334 | /// Replace all FrameIndex operands with physical register references and actual |
1335 | /// offsets. |
1336 | void PEI::replaceFrameIndicesBackward(MachineFunction &MF) { |
1337 | const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); |
1338 | |
1339 | for (auto &MBB : MF) { |
1340 | int SPAdj = 0; |
1341 | if (!MBB.succ_empty()) { |
1342 | // Get the SP adjustment for the end of MBB from the start of any of its |
1343 | // successors. They should all be the same. |
1344 | assert(all_of(MBB.successors(), [&MBB](const MachineBasicBlock *Succ) { |
1345 | return Succ->getCallFrameSize() == |
1346 | (*MBB.succ_begin())->getCallFrameSize(); |
1347 | })); |
1348 | const MachineBasicBlock &FirstSucc = **MBB.succ_begin(); |
1349 | SPAdj = TFI.alignSPAdjust(SPAdj: FirstSucc.getCallFrameSize()); |
1350 | if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp) |
1351 | SPAdj = -SPAdj; |
1352 | } |
1353 | |
1354 | replaceFrameIndicesBackward(BB: &MBB, MF, SPAdj); |
1355 | |
1356 | // We can't track the call frame size after call frame pseudos have been |
1357 | // eliminated. Set it to zero everywhere to keep MachineVerifier happy. |
1358 | MBB.setCallFrameSize(0); |
1359 | } |
1360 | } |
1361 | |
1362 | /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical |
1363 | /// register references and actual offsets. |
1364 | void PEI::replaceFrameIndices(MachineFunction &MF) { |
1365 | const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); |
1366 | |
1367 | for (auto &MBB : MF) { |
1368 | int SPAdj = TFI.alignSPAdjust(SPAdj: MBB.getCallFrameSize()); |
1369 | if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp) |
1370 | SPAdj = -SPAdj; |
1371 | |
1372 | replaceFrameIndices(BB: &MBB, MF, SPAdj); |
1373 | |
1374 | // We can't track the call frame size after call frame pseudos have been |
1375 | // eliminated. Set it to zero everywhere to keep MachineVerifier happy. |
1376 | MBB.setCallFrameSize(0); |
1377 | } |
1378 | } |
1379 | |
1380 | bool PEI::replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI, |
1381 | unsigned OpIdx, int SPAdj) { |
1382 | const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); |
1383 | const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); |
1384 | if (MI.isDebugValue()) { |
1385 | |
1386 | MachineOperand &Op = MI.getOperand(i: OpIdx); |
1387 | assert(MI.isDebugOperand(&Op) && |
1388 | "Frame indices can only appear as a debug operand in a DBG_VALUE*" |
1389 | " machine instruction" ); |
1390 | Register Reg; |
1391 | unsigned FrameIdx = Op.getIndex(); |
1392 | unsigned Size = MF.getFrameInfo().getObjectSize(ObjectIdx: FrameIdx); |
1393 | |
1394 | StackOffset Offset = TFI->getFrameIndexReference(MF, FI: FrameIdx, FrameReg&: Reg); |
1395 | Op.ChangeToRegister(Reg, isDef: false /*isDef*/); |
1396 | |
1397 | const DIExpression *DIExpr = MI.getDebugExpression(); |
1398 | |
1399 | // If we have a direct DBG_VALUE, and its location expression isn't |
1400 | // currently complex, then adding an offset will morph it into a |
1401 | // complex location that is interpreted as being a memory address. |
1402 | // This changes a pointer-valued variable to dereference that pointer, |
1403 | // which is incorrect. Fix by adding DW_OP_stack_value. |
1404 | |
1405 | if (MI.isNonListDebugValue()) { |
1406 | unsigned PrependFlags = DIExpression::ApplyOffset; |
1407 | if (!MI.isIndirectDebugValue() && !DIExpr->isComplex()) |
1408 | PrependFlags |= DIExpression::StackValue; |
1409 | |
1410 | // If we have DBG_VALUE that is indirect and has a Implicit location |
1411 | // expression need to insert a deref before prepending a Memory |
1412 | // location expression. Also after doing this we change the DBG_VALUE |
1413 | // to be direct. |
1414 | if (MI.isIndirectDebugValue() && DIExpr->isImplicit()) { |
1415 | SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, Size}; |
1416 | bool WithStackValue = true; |
1417 | DIExpr = DIExpression::prependOpcodes(Expr: DIExpr, Ops, StackValue: WithStackValue); |
1418 | // Make the DBG_VALUE direct. |
1419 | MI.getDebugOffset().ChangeToRegister(Reg: 0, isDef: false); |
1420 | } |
1421 | DIExpr = TRI.prependOffsetExpression(Expr: DIExpr, PrependFlags, Offset); |
1422 | } else { |
1423 | // The debug operand at DebugOpIndex was a frame index at offset |
1424 | // `Offset`; now the operand has been replaced with the frame |
1425 | // register, we must add Offset with `register x, plus Offset`. |
1426 | unsigned DebugOpIndex = MI.getDebugOperandIndex(Op: &Op); |
1427 | SmallVector<uint64_t, 3> Ops; |
1428 | TRI.getOffsetOpcodes(Offset, Ops); |
1429 | DIExpr = DIExpression::appendOpsToArg(Expr: DIExpr, Ops, ArgNo: DebugOpIndex); |
1430 | } |
1431 | MI.getDebugExpressionOp().setMetadata(DIExpr); |
1432 | return true; |
1433 | } |
1434 | |
1435 | if (MI.isDebugPHI()) { |
1436 | // Allow stack ref to continue onwards. |
1437 | return true; |
1438 | } |
1439 | |
1440 | // TODO: This code should be commoned with the code for |
1441 | // PATCHPOINT. There's no good reason for the difference in |
1442 | // implementation other than historical accident. The only |
1443 | // remaining difference is the unconditional use of the stack |
1444 | // pointer as the base register. |
1445 | if (MI.getOpcode() == TargetOpcode::STATEPOINT) { |
1446 | assert((!MI.isDebugValue() || OpIdx == 0) && |
1447 | "Frame indicies can only appear as the first operand of a " |
1448 | "DBG_VALUE machine instruction" ); |
1449 | Register Reg; |
1450 | MachineOperand &Offset = MI.getOperand(i: OpIdx + 1); |
1451 | StackOffset refOffset = TFI->getFrameIndexReferencePreferSP( |
1452 | MF, FI: MI.getOperand(i: OpIdx).getIndex(), FrameReg&: Reg, /*IgnoreSPUpdates*/ false); |
1453 | assert(!refOffset.getScalable() && |
1454 | "Frame offsets with a scalable component are not supported" ); |
1455 | Offset.setImm(Offset.getImm() + refOffset.getFixed() + SPAdj); |
1456 | MI.getOperand(i: OpIdx).ChangeToRegister(Reg, isDef: false /*isDef*/); |
1457 | return true; |
1458 | } |
1459 | return false; |
1460 | } |
1461 | |
1462 | void PEI::replaceFrameIndicesBackward(MachineBasicBlock *BB, |
1463 | MachineFunction &MF, int &SPAdj) { |
1464 | assert(MF.getSubtarget().getRegisterInfo() && |
1465 | "getRegisterInfo() must be implemented!" ); |
1466 | |
1467 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
1468 | const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); |
1469 | const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); |
1470 | |
1471 | RegScavenger *LocalRS = FrameIndexEliminationScavenging ? RS : nullptr; |
1472 | if (LocalRS) |
1473 | LocalRS->enterBasicBlockEnd(MBB&: *BB); |
1474 | |
1475 | for (MachineBasicBlock::iterator I = BB->end(); I != BB->begin();) { |
1476 | MachineInstr &MI = *std::prev(x: I); |
1477 | |
1478 | if (TII.isFrameInstr(I: MI)) { |
1479 | SPAdj -= TII.getSPAdjust(MI); |
1480 | TFI.eliminateCallFramePseudoInstr(MF, MBB&: *BB, MI: &MI); |
1481 | continue; |
1482 | } |
1483 | |
1484 | // Step backwards to get the liveness state at (immedately after) MI. |
1485 | if (LocalRS) |
1486 | LocalRS->backward(I); |
1487 | |
1488 | bool RemovedMI = false; |
1489 | for (const auto &[Idx, Op] : enumerate(First: MI.operands())) { |
1490 | if (!Op.isFI()) |
1491 | continue; |
1492 | |
1493 | if (replaceFrameIndexDebugInstr(MF, MI, OpIdx: Idx, SPAdj)) |
1494 | continue; |
1495 | |
1496 | // Eliminate this FrameIndex operand. |
1497 | RemovedMI = TRI.eliminateFrameIndex(MI, SPAdj, FIOperandNum: Idx, RS: LocalRS); |
1498 | if (RemovedMI) |
1499 | break; |
1500 | } |
1501 | |
1502 | if (!RemovedMI) |
1503 | --I; |
1504 | } |
1505 | } |
1506 | |
1507 | void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, |
1508 | int &SPAdj) { |
1509 | assert(MF.getSubtarget().getRegisterInfo() && |
1510 | "getRegisterInfo() must be implemented!" ); |
1511 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
1512 | const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); |
1513 | const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); |
1514 | |
1515 | bool InsideCallSequence = false; |
1516 | |
1517 | for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { |
1518 | if (TII.isFrameInstr(I: *I)) { |
1519 | InsideCallSequence = TII.isFrameSetup(I: *I); |
1520 | SPAdj += TII.getSPAdjust(MI: *I); |
1521 | I = TFI->eliminateCallFramePseudoInstr(MF, MBB&: *BB, MI: I); |
1522 | continue; |
1523 | } |
1524 | |
1525 | MachineInstr &MI = *I; |
1526 | bool DoIncr = true; |
1527 | bool DidFinishLoop = true; |
1528 | for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { |
1529 | if (!MI.getOperand(i).isFI()) |
1530 | continue; |
1531 | |
1532 | if (replaceFrameIndexDebugInstr(MF, MI, OpIdx: i, SPAdj)) |
1533 | continue; |
1534 | |
1535 | // Some instructions (e.g. inline asm instructions) can have |
1536 | // multiple frame indices and/or cause eliminateFrameIndex |
1537 | // to insert more than one instruction. We need the register |
1538 | // scavenger to go through all of these instructions so that |
1539 | // it can update its register information. We keep the |
1540 | // iterator at the point before insertion so that we can |
1541 | // revisit them in full. |
1542 | bool AtBeginning = (I == BB->begin()); |
1543 | if (!AtBeginning) --I; |
1544 | |
1545 | // If this instruction has a FrameIndex operand, we need to |
1546 | // use that target machine register info object to eliminate |
1547 | // it. |
1548 | TRI.eliminateFrameIndex(MI, SPAdj, FIOperandNum: i); |
1549 | |
1550 | // Reset the iterator if we were at the beginning of the BB. |
1551 | if (AtBeginning) { |
1552 | I = BB->begin(); |
1553 | DoIncr = false; |
1554 | } |
1555 | |
1556 | DidFinishLoop = false; |
1557 | break; |
1558 | } |
1559 | |
1560 | // If we are looking at a call sequence, we need to keep track of |
1561 | // the SP adjustment made by each instruction in the sequence. |
1562 | // This includes both the frame setup/destroy pseudos (handled above), |
1563 | // as well as other instructions that have side effects w.r.t the SP. |
1564 | // Note that this must come after eliminateFrameIndex, because |
1565 | // if I itself referred to a frame index, we shouldn't count its own |
1566 | // adjustment. |
1567 | if (DidFinishLoop && InsideCallSequence) |
1568 | SPAdj += TII.getSPAdjust(MI); |
1569 | |
1570 | if (DoIncr && I != BB->end()) |
1571 | ++I; |
1572 | } |
1573 | } |
1574 | |