1//===- bolt/Passes/ShrinkWrapping.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the ShrinkWrapping class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Passes/ShrinkWrapping.h"
14#include "bolt/Passes/DataflowInfoManager.h"
15#include "bolt/Passes/MCF.h"
16#include "bolt/Utils/CommandLineOpts.h"
17#include <numeric>
18#include <optional>
19#include <stack>
20
21#define DEBUG_TYPE "shrinkwrapping"
22
23using namespace llvm;
24
25namespace opts {
26
27extern cl::opt<bool> TimeOpts;
28extern cl::OptionCategory BoltOptCategory;
29
30static cl::opt<unsigned> ShrinkWrappingThreshold(
31 "shrink-wrapping-threshold",
32 cl::desc("Percentage of prologue execution count to use as threshold when"
33 " evaluating whether a block is cold enough to be profitable to"
34 " move eligible spills there"),
35 cl::init(Val: 30), cl::ZeroOrMore, cl::cat(BoltOptCategory));
36} // namespace opts
37
38namespace llvm {
39namespace bolt {
40
41void CalleeSavedAnalysis::analyzeSaves() {
42 ReachingDefOrUse</*Def=*/true> &RD = Info.getReachingDefs();
43 StackReachingUses &SRU = Info.getStackReachingUses();
44 auto &InsnToBB = Info.getInsnToBBMap();
45 BitVector BlacklistedRegs(BC.MRI->getNumRegs(), false);
46
47 LLVM_DEBUG(dbgs() << "Checking spill locations\n");
48 for (BinaryBasicBlock &BB : BF) {
49 LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
50 const MCInst *Prev = nullptr;
51 for (MCInst &Inst : BB) {
52 if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
53 // Blacklist weird stores we don't understand
54 if ((!FIE->IsSimple || FIE->StackOffset >= 0) && FIE->IsStore &&
55 FIE->IsStoreFromReg) {
56 BlacklistedRegs.set(FIE->RegOrImm);
57 CalleeSaved.reset(Idx: FIE->RegOrImm);
58 Prev = &Inst;
59 continue;
60 }
61
62 if (!FIE->IsStore || !FIE->IsStoreFromReg ||
63 BlacklistedRegs[FIE->RegOrImm]) {
64 Prev = &Inst;
65 continue;
66 }
67
68 // If this reg is defined locally, it is not a callee-saved reg
69 if (RD.isReachedBy(Reg: FIE->RegOrImm,
70 Candidates: Prev ? RD.expr_begin(Point: *Prev) : RD.expr_begin(Point: BB))) {
71 BlacklistedRegs.set(FIE->RegOrImm);
72 CalleeSaved.reset(Idx: FIE->RegOrImm);
73 Prev = &Inst;
74 continue;
75 }
76
77 // If this stack position is accessed in another function, we are
78 // probably dealing with a parameter passed in a stack -- do not mess
79 // with it
80 if (SRU.isStoreUsed(StoreFIE: *FIE,
81 Candidates: Prev ? SRU.expr_begin(Point: *Prev) : SRU.expr_begin(Point: BB)),
82 /*IncludeLocalAccesses=*/false) {
83 BlacklistedRegs.set(FIE->RegOrImm);
84 CalleeSaved.reset(Idx: FIE->RegOrImm);
85 Prev = &Inst;
86 continue;
87 }
88
89 // If this stack position is loaded elsewhere in another reg, we can't
90 // update it, so blacklist it.
91 if (SRU.isLoadedInDifferentReg(StoreFIE: *FIE, Candidates: Prev ? SRU.expr_begin(Point: *Prev)
92 : SRU.expr_begin(Point: BB))) {
93 BlacklistedRegs.set(FIE->RegOrImm);
94 CalleeSaved.reset(Idx: FIE->RegOrImm);
95 Prev = &Inst;
96 continue;
97 }
98
99 // Ignore regs with multiple saves
100 if (CalleeSaved[FIE->RegOrImm]) {
101 BlacklistedRegs.set(FIE->RegOrImm);
102 CalleeSaved.reset(Idx: FIE->RegOrImm);
103 Prev = &Inst;
104 continue;
105 }
106
107 CalleeSaved.set(FIE->RegOrImm);
108 SaveFIEByReg[FIE->RegOrImm] = &*FIE;
109 SavingCost[FIE->RegOrImm] += InsnToBB[&Inst]->getKnownExecutionCount();
110 BC.MIB->addAnnotation(Inst, Index: getSaveTag(), Val: FIE->RegOrImm, AllocatorId);
111 OffsetsByReg[FIE->RegOrImm] = FIE->StackOffset;
112 LLVM_DEBUG(dbgs() << "Logging new candidate for Callee-Saved Reg: "
113 << FIE->RegOrImm << "\n");
114 }
115 Prev = &Inst;
116 }
117 }
118}
119
120void CalleeSavedAnalysis::analyzeRestores() {
121 ReachingDefOrUse</*Def=*/false> &RU = Info.getReachingUses();
122
123 // Now compute all restores of these callee-saved regs
124 for (BinaryBasicBlock &BB : BF) {
125 const MCInst *Prev = nullptr;
126 for (MCInst &Inst : llvm::reverse(C&: BB)) {
127 if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
128 if (!FIE->IsLoad || !CalleeSaved[FIE->RegOrImm]) {
129 Prev = &Inst;
130 continue;
131 }
132
133 // If this reg is used locally after a restore, then we are probably
134 // not dealing with a callee-saved reg. Except if this use is by
135 // another store, but we don't cover this case yet.
136 // Also not callee-saved if this load accesses caller stack or isn't
137 // simple.
138 if (!FIE->IsSimple || FIE->StackOffset >= 0 ||
139 RU.isReachedBy(Reg: FIE->RegOrImm,
140 Candidates: Prev ? RU.expr_begin(Point: *Prev) : RU.expr_begin(Point: BB))) {
141 CalleeSaved.reset(Idx: FIE->RegOrImm);
142 Prev = &Inst;
143 continue;
144 }
145 // If stack offsets between saves/store don't agree with each other,
146 // we don't completely understand what's happening here
147 if (FIE->StackOffset != OffsetsByReg[FIE->RegOrImm]) {
148 CalleeSaved.reset(Idx: FIE->RegOrImm);
149 LLVM_DEBUG(dbgs() << "Dismissing Callee-Saved Reg because we found a "
150 "mismatching restore: "
151 << FIE->RegOrImm << "\n");
152 Prev = &Inst;
153 continue;
154 }
155
156 LLVM_DEBUG(dbgs() << "Adding matching restore for: " << FIE->RegOrImm
157 << "\n");
158 if (LoadFIEByReg[FIE->RegOrImm] == nullptr)
159 LoadFIEByReg[FIE->RegOrImm] = &*FIE;
160 BC.MIB->addAnnotation(Inst, Index: getRestoreTag(), Val: FIE->RegOrImm,
161 AllocatorId);
162 HasRestores.set(FIE->RegOrImm);
163 }
164 Prev = &Inst;
165 }
166 }
167}
168
169std::vector<MCInst *> CalleeSavedAnalysis::getSavesByReg(uint16_t Reg) {
170 std::vector<MCInst *> Results;
171 for (BinaryBasicBlock &BB : BF)
172 for (MCInst &Inst : BB)
173 if (getSavedReg(Inst) == Reg)
174 Results.push_back(x: &Inst);
175 return Results;
176}
177
178std::vector<MCInst *> CalleeSavedAnalysis::getRestoresByReg(uint16_t Reg) {
179 std::vector<MCInst *> Results;
180 for (BinaryBasicBlock &BB : BF)
181 for (MCInst &Inst : BB)
182 if (getRestoredReg(Inst) == Reg)
183 Results.push_back(x: &Inst);
184 return Results;
185}
186
187CalleeSavedAnalysis::~CalleeSavedAnalysis() {
188 for (BinaryBasicBlock &BB : BF) {
189 for (MCInst &Inst : BB) {
190 BC.MIB->removeAnnotation(Inst, Index: getSaveTag());
191 BC.MIB->removeAnnotation(Inst, Index: getRestoreTag());
192 }
193 }
194}
195
196void StackLayoutModifier::blacklistRegion(int64_t Offset, int64_t Size) {
197 if (BlacklistedRegions[Offset] < Size)
198 BlacklistedRegions[Offset] = Size;
199}
200
201bool StackLayoutModifier::isRegionBlacklisted(int64_t Offset, int64_t Size) {
202 for (std::pair<const int64_t, int64_t> Elem : BlacklistedRegions)
203 if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second)
204 return true;
205 return false;
206}
207
208bool StackLayoutModifier::blacklistAllInConflictWith(int64_t Offset,
209 int64_t Size) {
210 bool HasConflict = false;
211 for (auto Iter = AvailableRegions.begin(); Iter != AvailableRegions.end();) {
212 std::pair<const int64_t, int64_t> &Elem = *Iter;
213 if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second &&
214 (Offset != Elem.first || Size != Elem.second)) {
215 Iter = AvailableRegions.erase(position: Iter);
216 HasConflict = true;
217 continue;
218 }
219 ++Iter;
220 }
221 if (HasConflict) {
222 blacklistRegion(Offset, Size);
223 return true;
224 }
225 return false;
226}
227
228void StackLayoutModifier::checkFramePointerInitialization(MCInst &Point) {
229 StackPointerTracking &SPT = Info.getStackPointerTracking();
230 if (!BC.MII->get(Opcode: Point.getOpcode())
231 .hasDefOfPhysReg(MI: Point, Reg: BC.MIB->getFramePointer(), RI: *BC.MRI))
232 return;
233
234 int SPVal, FPVal;
235 std::tie(args&: SPVal, args&: FPVal) = *SPT.getStateBefore(Point);
236 std::pair<MCPhysReg, int64_t> FP;
237
238 if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
239 FP = std::make_pair(x: BC.MIB->getFramePointer(), y&: FPVal);
240 else
241 FP = std::make_pair(x: 0, y: 0);
242 std::pair<MCPhysReg, int64_t> SP;
243
244 if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
245 SP = std::make_pair(x: BC.MIB->getStackPointer(), y&: SPVal);
246 else
247 SP = std::make_pair(x: 0, y: 0);
248
249 int64_t Output;
250 if (!BC.MIB->evaluateStackOffsetExpr(Inst: Point, Output, Input1: SP, Input2: FP))
251 return;
252
253 // Not your regular frame pointer initialization... bail
254 if (Output != SPVal)
255 blacklistRegion(Offset: 0, Size: 0);
256}
257
258void StackLayoutModifier::checkStackPointerRestore(MCInst &Point) {
259 StackPointerTracking &SPT = Info.getStackPointerTracking();
260 if (!BC.MII->get(Opcode: Point.getOpcode())
261 .hasDefOfPhysReg(MI: Point, Reg: BC.MIB->getStackPointer(), RI: *BC.MRI))
262 return;
263 // Check if the definition of SP comes from FP -- in this case, this
264 // value may need to be updated depending on our stack layout changes
265 bool UsesFP = llvm::any_of(Range: BC.MIB->useOperands(Inst&: Point), P: [&](MCOperand &Op) {
266 return Op.isReg() && Op.getReg() == BC.MIB->getFramePointer();
267 });
268 if (!UsesFP)
269 return;
270
271 // Setting up evaluation
272 int SPVal, FPVal;
273 std::tie(args&: SPVal, args&: FPVal) = *SPT.getStateBefore(Point);
274 std::pair<MCPhysReg, int64_t> FP;
275
276 if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
277 FP = std::make_pair(x: BC.MIB->getFramePointer(), y&: FPVal);
278 else
279 FP = std::make_pair(x: 0, y: 0);
280 std::pair<MCPhysReg, int64_t> SP;
281
282 if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
283 SP = std::make_pair(x: BC.MIB->getStackPointer(), y&: SPVal);
284 else
285 SP = std::make_pair(x: 0, y: 0);
286
287 int64_t Output;
288 if (!BC.MIB->evaluateStackOffsetExpr(Inst: Point, Output, Input1: SP, Input2: FP))
289 return;
290
291 // If the value is the same of FP, no need to adjust it
292 if (Output == FPVal)
293 return;
294
295 // If an allocation happened through FP, bail
296 if (Output <= SPVal) {
297 blacklistRegion(Offset: 0, Size: 0);
298 return;
299 }
300
301 // We are restoring SP to an old value based on FP. Mark it as a stack
302 // access to be fixed later.
303 BC.MIB->addAnnotation(Inst&: Point, Index: getSlotTag(), Val: Output, AllocatorId);
304}
305
306void StackLayoutModifier::classifyStackAccesses() {
307 // Understand when stack slots are being used non-locally
308 StackReachingUses &SRU = Info.getStackReachingUses();
309
310 for (BinaryBasicBlock &BB : BF) {
311 const MCInst *Prev = nullptr;
312 for (MCInst &Inst : llvm::reverse(C&: BB)) {
313 checkFramePointerInitialization(Point&: Inst);
314 checkStackPointerRestore(Point&: Inst);
315 ErrorOr<const FrameIndexEntry &> FIEX = FA.getFIEFor(Inst);
316 if (!FIEX) {
317 Prev = &Inst;
318 continue;
319 }
320 if (!FIEX->IsSimple || (FIEX->IsStore && !FIEX->IsStoreFromReg)) {
321 blacklistRegion(Offset: FIEX->StackOffset, Size: FIEX->Size);
322 Prev = &Inst;
323 continue;
324 }
325 // If this stack position is accessed in another function, we are
326 // probably dealing with a parameter passed in a stack -- do not mess
327 // with it
328 if (SRU.isStoreUsed(StoreFIE: *FIEX,
329 Candidates: Prev ? SRU.expr_begin(Point: *Prev) : SRU.expr_begin(Point: BB),
330 /*IncludeLocalAccesses=*/false)) {
331 blacklistRegion(Offset: FIEX->StackOffset, Size: FIEX->Size);
332 Prev = &Inst;
333 continue;
334 }
335 // Now we have a clear stack slot access. Check if its blacklisted or if
336 // it conflicts with another chunk.
337 if (isRegionBlacklisted(Offset: FIEX->StackOffset, Size: FIEX->Size) ||
338 blacklistAllInConflictWith(Offset: FIEX->StackOffset, Size: FIEX->Size)) {
339 Prev = &Inst;
340 continue;
341 }
342 // We are free to go. Add it as available stack slot which we know how
343 // to move it.
344 AvailableRegions[FIEX->StackOffset] = FIEX->Size;
345 BC.MIB->addAnnotation(Inst, Index: getSlotTag(), Val: FIEX->StackOffset, AllocatorId);
346 RegionToRegMap[FIEX->StackOffset].insert(x: FIEX->RegOrImm);
347 RegToRegionMap[FIEX->RegOrImm].insert(x: FIEX->StackOffset);
348 LLVM_DEBUG(dbgs() << "Adding region " << FIEX->StackOffset << " size "
349 << (int)FIEX->Size << "\n");
350 }
351 }
352}
353
354void StackLayoutModifier::classifyCFIs() {
355 std::stack<std::pair<int64_t, uint16_t>> CFIStack;
356 int64_t CfaOffset = -8;
357 uint16_t CfaReg = 7;
358
359 auto recordAccess = [&](MCInst *Inst, int64_t Offset) {
360 const uint16_t Reg = *BC.MRI->getLLVMRegNum(RegNum: CfaReg, /*isEH=*/false);
361 if (Reg == BC.MIB->getStackPointer() || Reg == BC.MIB->getFramePointer()) {
362 BC.MIB->addAnnotation(Inst&: *Inst, Index: getSlotTag(), Val: Offset, AllocatorId);
363 LLVM_DEBUG(dbgs() << "Recording CFI " << Offset << "\n");
364 } else {
365 IsSimple = false;
366 return;
367 }
368 };
369
370 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
371 for (MCInst &Inst : *BB) {
372 if (!BC.MIB->isCFI(Inst))
373 continue;
374 const MCCFIInstruction *CFI = BF.getCFIFor(Instr: Inst);
375 switch (CFI->getOperation()) {
376 case MCCFIInstruction::OpDefCfa:
377 CfaOffset = -CFI->getOffset();
378 recordAccess(&Inst, CfaOffset);
379 [[fallthrough]];
380 case MCCFIInstruction::OpDefCfaRegister:
381 CfaReg = CFI->getRegister();
382 break;
383 case MCCFIInstruction::OpDefCfaOffset:
384 CfaOffset = -CFI->getOffset();
385 recordAccess(&Inst, CfaOffset);
386 break;
387 case MCCFIInstruction::OpOffset:
388 recordAccess(&Inst, CFI->getOffset());
389 BC.MIB->addAnnotation(Inst, Index: getOffsetCFIRegTag(),
390 Val: BC.MRI->getLLVMRegNum(RegNum: CFI->getRegister(),
391 /*isEH=*/false),
392 AllocatorId);
393 break;
394 case MCCFIInstruction::OpSameValue:
395 BC.MIB->addAnnotation(Inst, Index: getOffsetCFIRegTag(),
396 Val: BC.MRI->getLLVMRegNum(RegNum: CFI->getRegister(),
397 /*isEH=*/false),
398 AllocatorId);
399 break;
400 case MCCFIInstruction::OpRememberState:
401 CFIStack.push(x: std::make_pair(x&: CfaOffset, y&: CfaReg));
402 break;
403 case MCCFIInstruction::OpRestoreState: {
404 assert(!CFIStack.empty() && "Corrupt CFI stack");
405 std::pair<int64_t, uint16_t> &Elem = CFIStack.top();
406 CFIStack.pop();
407 CfaOffset = Elem.first;
408 CfaReg = Elem.second;
409 break;
410 }
411 case MCCFIInstruction::OpRelOffset:
412 case MCCFIInstruction::OpAdjustCfaOffset:
413 llvm_unreachable("Unhandled AdjustCfaOffset");
414 break;
415 default:
416 break;
417 }
418 }
419 }
420}
421
422void StackLayoutModifier::scheduleChange(
423 MCInst &Inst, StackLayoutModifier::WorklistItem Item) {
424 auto &WList = BC.MIB->getOrCreateAnnotationAs<std::vector<WorklistItem>>(
425 Inst, Index: getTodoTag(), AllocatorId);
426 WList.push_back(x: Item);
427}
428
429bool StackLayoutModifier::canCollapseRegion(MCInst *DeletedPush) {
430 if (!IsSimple || !BC.MIB->isPush(Inst: *DeletedPush))
431 return false;
432
433 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst: *DeletedPush);
434 if (!FIE)
435 return false;
436
437 return canCollapseRegion(RegionAddr: FIE->StackOffset);
438}
439
440bool StackLayoutModifier::canCollapseRegion(int64_t RegionAddr) {
441 if (!IsInitialized)
442 initialize();
443 if (!IsSimple)
444 return false;
445
446 if (CollapsedRegions.count(V: RegionAddr))
447 return true;
448
449 // Check if it is possible to readjust all accesses below RegionAddr
450 if (!BlacklistedRegions.empty())
451 return false;
452
453 return true;
454}
455
456bool StackLayoutModifier::collapseRegion(MCInst *DeletedPush) {
457 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst: *DeletedPush);
458 if (!FIE)
459 return false;
460 int64_t RegionAddr = FIE->StackOffset;
461 int64_t RegionSz = FIE->Size;
462 return collapseRegion(Alloc: DeletedPush, RegionAddr, RegionSize: RegionSz);
463}
464
465bool StackLayoutModifier::collapseRegion(MCInst *Alloc, int64_t RegionAddr,
466 int64_t RegionSz) {
467 if (!canCollapseRegion(RegionAddr))
468 return false;
469
470 assert(IsInitialized);
471 StackAllocationAnalysis &SAA = Info.getStackAllocationAnalysis();
472
473 for (BinaryBasicBlock &BB : BF) {
474 for (MCInst &Inst : BB) {
475 if (!BC.MIB->hasAnnotation(Inst, Index: getSlotTag()))
476 continue;
477 auto Slot =
478 BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
479 Inst, Index: getSlotTag());
480 if (!AvailableRegions.count(x: Slot))
481 continue;
482 // We need to ensure this access is affected by the deleted push
483 if (!(*SAA.getStateBefore(Point: Inst))[SAA.ExprToIdx[Alloc]])
484 continue;
485
486 if (BC.MIB->isCFI(Inst)) {
487 if (Slot > RegionAddr)
488 continue;
489 scheduleChange(Inst, Item: WorklistItem(WorklistItem::AdjustCFI, RegionSz));
490 continue;
491 }
492 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
493 if (!FIE) {
494 if (Slot > RegionAddr)
495 continue;
496 // SP update based on frame pointer
497 scheduleChange(
498 Inst, Item: WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
499 continue;
500 }
501
502 if (Slot == RegionAddr) {
503 BC.MIB->addAnnotation(Inst, Name: "AccessesDeletedPos", Val: 0U, AllocatorId);
504 continue;
505 }
506 if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
507 continue;
508
509 if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
510 continue;
511
512 if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot > RegionAddr)
513 continue;
514
515 scheduleChange(
516 Inst, Item: WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
517 }
518 }
519
520 CollapsedRegions.insert(V: RegionAddr);
521 return true;
522}
523
524void StackLayoutModifier::setOffsetForCollapsedAccesses(int64_t NewOffset) {
525 for (BinaryBasicBlock &BB : BF) {
526 for (MCInst &Inst : BB) {
527 if (!BC.MIB->hasAnnotation(Inst, Name: "AccessesDeletedPos"))
528 continue;
529 BC.MIB->removeAnnotation(Inst, Name: "AccessesDeletedPos");
530 scheduleChange(
531 Inst, Item: WorklistItem(WorklistItem::AdjustLoadStoreOffset, NewOffset));
532 }
533 }
534}
535
536bool StackLayoutModifier::canInsertRegion(ProgramPoint P) {
537 if (!IsInitialized)
538 initialize();
539 if (!IsSimple)
540 return false;
541
542 StackPointerTracking &SPT = Info.getStackPointerTracking();
543 int64_t RegionAddr = SPT.getStateBefore(Point: P)->first;
544 if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
545 return false;
546
547 if (InsertedRegions.count(V: RegionAddr))
548 return true;
549
550 // Check if we are going to screw up stack accesses at call sites that
551 // pass parameters via stack
552 if (!BlacklistedRegions.empty())
553 return false;
554
555 return true;
556}
557
558bool StackLayoutModifier::insertRegion(ProgramPoint P, int64_t RegionSz) {
559 if (!canInsertRegion(P))
560 return false;
561
562 assert(IsInitialized);
563 StackPointerTracking &SPT = Info.getStackPointerTracking();
564 // This RegionAddr is slightly different from the one seen in collapseRegion
565 // This is the value of SP before the allocation the user wants to make.
566 int64_t RegionAddr = SPT.getStateBefore(Point: P)->first;
567 if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
568 return false;
569
570 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
571
572 for (BinaryBasicBlock &BB : BF) {
573 for (MCInst &Inst : BB) {
574 if (!BC.MIB->hasAnnotation(Inst, Index: getSlotTag()))
575 continue;
576 auto Slot =
577 BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
578 Inst, Index: getSlotTag());
579 if (!AvailableRegions.count(x: Slot))
580 continue;
581
582 if (!(DA.doesADominateB(A: P, B: Inst)))
583 continue;
584
585 if (BC.MIB->isCFI(Inst)) {
586 if (Slot >= RegionAddr)
587 continue;
588 scheduleChange(Inst, Item: WorklistItem(WorklistItem::AdjustCFI, -RegionSz));
589 continue;
590 }
591 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
592 if (!FIE) {
593 if (Slot >= RegionAddr)
594 continue;
595 scheduleChange(
596 Inst, Item: WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
597 continue;
598 }
599
600 if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
601 continue;
602 if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot >= RegionAddr)
603 continue;
604 if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
605 continue;
606 scheduleChange(
607 Inst, Item: WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
608 }
609 }
610
611 InsertedRegions.insert(V: RegionAddr);
612 return true;
613}
614
615void StackLayoutModifier::performChanges() {
616 std::set<uint32_t> ModifiedCFIIndices;
617 for (BinaryBasicBlock &BB : BF) {
618 for (MCInst &Inst : llvm::reverse(C&: BB)) {
619 if (BC.MIB->hasAnnotation(Inst, Name: "AccessesDeletedPos")) {
620 assert(BC.MIB->isPop(Inst) || BC.MIB->isPush(Inst));
621 BC.MIB->removeAnnotation(Inst, Name: "AccessesDeletedPos");
622 }
623 if (!BC.MIB->hasAnnotation(Inst, Index: getTodoTag()))
624 continue;
625 auto &WList = BC.MIB->getAnnotationAs<std::vector<WorklistItem>>(
626 Inst, Index: getTodoTag());
627 int64_t Adjustment = 0;
628 WorklistItem::ActionType AdjustmentType = WorklistItem::None;
629 for (WorklistItem &WI : WList) {
630 if (WI.Action == WorklistItem::None)
631 continue;
632 assert(WI.Action == WorklistItem::AdjustLoadStoreOffset ||
633 WI.Action == WorklistItem::AdjustCFI);
634 assert((AdjustmentType == WorklistItem::None ||
635 AdjustmentType == WI.Action) &&
636 "Conflicting actions requested at the same program point");
637 AdjustmentType = WI.Action;
638 Adjustment += WI.OffsetUpdate;
639 }
640 if (!Adjustment)
641 continue;
642 if (AdjustmentType != WorklistItem::AdjustLoadStoreOffset) {
643 assert(BC.MIB->isCFI(Inst));
644 uint32_t CFINum = Inst.getOperand(i: 0).getImm();
645 if (ModifiedCFIIndices.count(x: CFINum))
646 continue;
647 ModifiedCFIIndices.insert(x: CFINum);
648 const MCCFIInstruction *CFI = BF.getCFIFor(Instr: Inst);
649 const MCCFIInstruction::OpType Operation = CFI->getOperation();
650 if (Operation == MCCFIInstruction::OpDefCfa ||
651 Operation == MCCFIInstruction::OpDefCfaOffset)
652 Adjustment = 0 - Adjustment;
653 LLVM_DEBUG(dbgs() << "Changing CFI offset from " << CFI->getOffset()
654 << " to " << (CFI->getOffset() + Adjustment) << "\n");
655 BF.mutateCFIOffsetFor(Instr: Inst, NewOffset: CFI->getOffset() + Adjustment);
656 continue;
657 }
658 int32_t SrcImm = 0;
659 MCPhysReg Reg = 0;
660 MCPhysReg StackPtrReg = 0;
661 int64_t StackOffset = 0;
662 bool IsIndexed = false;
663 bool IsLoad = false;
664 bool IsStore = false;
665 bool IsSimple = false;
666 bool IsStoreFromReg = false;
667 uint8_t Size = 0;
668 bool Success = false;
669 Success = BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg,
670 Reg, SrcImm, StackPtrReg, StackOffset,
671 Size, IsSimple, IsIndexed);
672 if (!Success) {
673 // SP update based on FP value
674 Success = BC.MIB->addToImm(Inst, Amt&: Adjustment, Ctx: &*BC.Ctx);
675 assert(Success);
676 continue;
677 }
678 assert(Success && IsSimple && !IsIndexed && (!IsStore || IsStoreFromReg));
679 if (StackPtrReg != BC.MIB->getFramePointer())
680 Adjustment = -Adjustment;
681 if (IsLoad)
682 BC.MIB->createRestoreFromStack(Inst, StackReg: StackPtrReg,
683 Offset: StackOffset + Adjustment, DstReg: Reg, Size);
684 else if (IsStore)
685 BC.MIB->createSaveToStack(Inst, StackReg: StackPtrReg, Offset: StackOffset + Adjustment,
686 SrcReg: Reg, Size);
687 LLVM_DEBUG({
688 dbgs() << "Adjusted instruction: ";
689 Inst.dump();
690 });
691 }
692 }
693}
694
695void StackLayoutModifier::initialize() {
696 classifyStackAccesses();
697 classifyCFIs();
698 IsInitialized = true;
699}
700
701std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedRegularMode{0};
702std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedPushPopMode{0};
703std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedDynamicCount{0};
704std::atomic<std::uint64_t> ShrinkWrapping::SpillsFailedDynamicCount{0};
705std::atomic<std::uint64_t> ShrinkWrapping::InstrDynamicCount{0};
706std::atomic<std::uint64_t> ShrinkWrapping::StoreDynamicCount{0};
707
708using BBIterTy = BinaryBasicBlock::iterator;
709
710void ShrinkWrapping::classifyCSRUses() {
711 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
712 StackPointerTracking &SPT = Info.getStackPointerTracking();
713 UsesByReg = std::vector<BitVector>(BC.MRI->getNumRegs(),
714 BitVector(DA.NumInstrs, false));
715
716 const BitVector &FPAliases = BC.MIB->getAliases(Reg: BC.MIB->getFramePointer());
717 for (BinaryBasicBlock &BB : BF) {
718 for (MCInst &Inst : BB) {
719 if (BC.MIB->isCFI(Inst))
720 continue;
721 BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
722 BC.MIB->getTouchedRegs(Inst, Regs&: BV);
723 BV &= CSA.CalleeSaved;
724 for (int I : BV.set_bits()) {
725 if (I == 0)
726 continue;
727 if (CSA.getSavedReg(Inst) != I && CSA.getRestoredReg(Inst) != I)
728 UsesByReg[I].set(DA.ExprToIdx[&Inst]);
729 }
730 if (!SPT.HasFramePointer || !BC.MIB->isCall(Inst))
731 continue;
732 BV = CSA.CalleeSaved;
733 BV &= FPAliases;
734 for (int I : BV.set_bits())
735 UsesByReg[I].set(DA.ExprToIdx[&Inst]);
736 }
737 }
738}
739
740void ShrinkWrapping::pruneUnwantedCSRs() {
741 BitVector ParamRegs = BC.MIB->getRegsUsedAsParams();
742 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
743 if (!CSA.CalleeSaved[I])
744 continue;
745 if (ParamRegs[I]) {
746 CSA.CalleeSaved.reset(Idx: I);
747 continue;
748 }
749 if (UsesByReg[I].empty()) {
750 LLVM_DEBUG(
751 dbgs()
752 << "Dismissing Callee-Saved Reg because we found no uses of it:" << I
753 << "\n");
754 CSA.CalleeSaved.reset(Idx: I);
755 continue;
756 }
757 if (!CSA.HasRestores[I]) {
758 LLVM_DEBUG(
759 dbgs() << "Dismissing Callee-Saved Reg because it does not have "
760 "restores:"
761 << I << "\n");
762 CSA.CalleeSaved.reset(Idx: I);
763 }
764 }
765}
766
767void ShrinkWrapping::computeSaveLocations() {
768 BestSavePos = std::vector<std::vector<MCInst *>>(BC.MRI->getNumRegs());
769 ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
770 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
771 StackPointerTracking &SPT = Info.getStackPointerTracking();
772
773 LLVM_DEBUG(dbgs() << "Checking save/restore possibilities\n");
774 for (BinaryBasicBlock &BB : BF) {
775 LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
776
777 MCInst *First = BB.begin() != BB.end() ? &*BB.begin() : nullptr;
778 if (!First)
779 continue;
780
781 // Use reaching instructions to detect if we are inside a loop - if we
782 // are, do not consider this BB as valid placement for saves.
783 if (RI.isInLoop(BB))
784 continue;
785
786 const std::pair<int, int> SPFP = *SPT.getStateBefore(Point: *First);
787 // If we don't know stack state at this point, bail
788 if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
789 (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY))
790 continue;
791
792 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
793 if (!CSA.CalleeSaved[I])
794 continue;
795
796 BitVector BBDominatedUses = BitVector(DA.NumInstrs, false);
797 for (int J : UsesByReg[I].set_bits())
798 if (DA.doesADominateB(A: *First, BIdx: J))
799 BBDominatedUses.set(J);
800 LLVM_DEBUG(dbgs() << "\t\tBB " << BB.getName() << " dominates "
801 << BBDominatedUses.count() << " uses for reg " << I
802 << ". Total uses for reg is " << UsesByReg[I].count()
803 << "\n");
804 BBDominatedUses &= UsesByReg[I];
805 if (BBDominatedUses == UsesByReg[I]) {
806 LLVM_DEBUG(dbgs() << "\t\t\tAdded " << BB.getName()
807 << " as a save pos for " << I << "\n");
808 BestSavePos[I].push_back(x: First);
809 LLVM_DEBUG({
810 dbgs() << "Dominated uses are:\n";
811 for (int J : UsesByReg[I].set_bits()) {
812 dbgs() << "Idx " << J << ": ";
813 BC.printInstruction(dbgs(), *DA.Expressions[J]);
814 DA.Expressions[J]->dump();
815 }
816 });
817 }
818 }
819 }
820
821 BestSaveCount = std::vector<std::vector<uint64_t>>(BC.MRI->getNumRegs());
822
823 auto &InsnToBB = Info.getInsnToBBMap();
824 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
825 if (!CSA.CalleeSaved[I])
826 continue;
827
828 std::stable_sort(first: BestSavePos[I].begin(), last: BestSavePos[I].end(),
829 comp: [&](const MCInst *A, const MCInst *B) {
830 const BinaryBasicBlock *BBA = InsnToBB[A];
831 const BinaryBasicBlock *BBB = InsnToBB[B];
832 const uint64_t CountA = BBA->getKnownExecutionCount();
833 const uint64_t CountB = BBB->getKnownExecutionCount();
834 return CountB < CountA;
835 });
836
837 for (MCInst *Pos : BestSavePos[I]) {
838 const BinaryBasicBlock *BB = InsnToBB[Pos];
839 const uint64_t Count = BB->getKnownExecutionCount();
840 BestSaveCount[I].push_back(x: Count);
841 }
842 }
843}
844
845void ShrinkWrapping::computeDomOrder() {
846 DomOrder = std::vector<MCPhysReg>(BC.MRI->getNumRegs(), 0);
847 std::vector<MCPhysReg> Order;
848 for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
849 Order.push_back(x: I);
850 }
851
852 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
853 auto &InsnToBB = Info.getInsnToBBMap();
854 llvm::sort(C&: Order, Comp: [&](const MCPhysReg &A, const MCPhysReg &B) {
855 BinaryBasicBlock *BBA =
856 BestSavePos[A].size() ? InsnToBB[BestSavePos[A].back()] : nullptr;
857 BinaryBasicBlock *BBB =
858 BestSavePos[B].size() ? InsnToBB[BestSavePos[B].back()] : nullptr;
859 if (BBA == BBB)
860 return A < B;
861 if (!BBA && BBB)
862 return false;
863 if (BBA && !BBB)
864 return true;
865 if (DA.doesADominateB(A: *BestSavePos[A].back(), B: *BestSavePos[B].back()))
866 return true;
867 if (DA.doesADominateB(A: *BestSavePos[B].back(), B: *BestSavePos[A].back()))
868 return false;
869 return A < B;
870 });
871
872 for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I)
873 DomOrder[Order[I]] = I;
874}
875
876bool ShrinkWrapping::isBestSavePosCold(unsigned CSR, MCInst *&BestPosSave,
877 uint64_t &TotalEstimatedWin) {
878 const uint64_t CurSavingCost = CSA.SavingCost[CSR];
879 if (!CSA.CalleeSaved[CSR])
880 return false;
881
882 assert(BestSaveCount[CSR].size() == BestSavePos[CSR].size() &&
883 "save position vectors out of sync");
884 if (BestSaveCount[CSR].empty())
885 return false;
886
887 const uint64_t BestCount = BestSaveCount[CSR].back();
888 BestPosSave = BestSavePos[CSR].back();
889 if (BestCount >= (opts::ShrinkWrappingThreshold / 100.0) * CurSavingCost)
890 return false;
891
892 LLVM_DEBUG({
893 auto &InsnToBB = Info.getInsnToBBMap();
894 dbgs() << "Better position for saves found in func " << BF.getPrintName()
895 << " count << " << BF.getKnownExecutionCount() << "\n";
896 dbgs() << "Reg: " << CSR << "; New BB: " << InsnToBB[BestPosSave]->getName()
897 << " Freq reduction: " << (CurSavingCost - BestCount) << "\n";
898 });
899
900 TotalEstimatedWin = CurSavingCost - BestCount;
901 return true;
902}
903
904/// Auxiliary function used to create basic blocks for critical edges and update
905/// the dominance frontier with these new locations
906void ShrinkWrapping::splitFrontierCritEdges(
907 BinaryFunction *Func, SmallVector<ProgramPoint, 4> &Frontier,
908 const SmallVector<bool, 4> &IsCritEdge,
909 const SmallVector<BinaryBasicBlock *, 4> &From,
910 const SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> &To) {
911 LLVM_DEBUG(dbgs() << "splitFrontierCritEdges: Now handling func "
912 << BF.getPrintName() << "\n");
913 // For every FromBB, there might be one or more critical edges, with
914 // To[I] containing destination BBs. It's important to memorize
915 // the original size of the Frontier as we may append to it while splitting
916 // critical edges originating with blocks with multiple destinations.
917 for (size_t I = 0, IE = Frontier.size(); I < IE; ++I) {
918 if (!IsCritEdge[I])
919 continue;
920 if (To[I].empty())
921 continue;
922 BinaryBasicBlock *FromBB = From[I];
923 LLVM_DEBUG(dbgs() << " - Now handling FrontierBB " << FromBB->getName()
924 << "\n");
925 // Split edge for every DestinationBBs
926 for (size_t DI = 0, DIE = To[I].size(); DI < DIE; ++DI) {
927 BinaryBasicBlock *DestinationBB = To[I][DI];
928 LLVM_DEBUG(dbgs() << " - Dest : " << DestinationBB->getName() << "\n");
929 BinaryBasicBlock *NewBB = Func->splitEdge(From: FromBB, To: DestinationBB);
930 // Insert dummy instruction so this BB is never empty (we need this for
931 // PredictiveStackPointerTracking to work, since it annotates instructions
932 // and not BBs).
933 if (NewBB->empty()) {
934 MCInst NewInst;
935 BC.MIB->createNoop(Inst&: NewInst);
936 NewBB->addInstruction(Inst: std::move(NewInst));
937 scheduleChange(PP: &*NewBB->begin(), Item: WorklistItem(WorklistItem::Erase, 0));
938 }
939
940 // Update frontier
941 ProgramPoint NewFrontierPP = ProgramPoint::getLastPointAt(BB&: *NewBB);
942 if (DI == 0) {
943 // Update frontier inplace
944 Frontier[I] = NewFrontierPP;
945 LLVM_DEBUG(dbgs() << " - Update frontier with " << NewBB->getName()
946 << '\n');
947 } else {
948 // Append new frontier to the end of the list
949 Frontier.push_back(Elt: NewFrontierPP);
950 LLVM_DEBUG(dbgs() << " - Append frontier " << NewBB->getName()
951 << '\n');
952 }
953 }
954 }
955}
956
957SmallVector<ProgramPoint, 4>
958ShrinkWrapping::doRestorePlacement(MCInst *BestPosSave, unsigned CSR,
959 uint64_t TotalEstimatedWin) {
960 SmallVector<ProgramPoint, 4> Frontier;
961 SmallVector<bool, 4> IsCritEdge;
962 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
963
964 SmallVector<BinaryBasicBlock *, 4> CritEdgesFrom;
965 SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> CritEdgesTo;
966 // In case of a critical edge, we need to create extra BBs to host restores
967 // into edges transitioning to the dominance frontier, otherwise we pull these
968 // restores to inside the dominated area.
969 Frontier = DA.getDominanceFrontierFor(Dom: *BestPosSave).takeVector();
970 LLVM_DEBUG({
971 dbgs() << "Dumping dominance frontier for ";
972 BC.printInstruction(dbgs(), *BestPosSave);
973 for (ProgramPoint &PP : Frontier)
974 if (PP.isInst())
975 BC.printInstruction(dbgs(), *PP.getInst());
976 else
977 dbgs() << PP.getBB()->getName() << "\n";
978 });
979 for (ProgramPoint &PP : Frontier) {
980 bool HasCritEdges = false;
981 if (PP.isInst() && BC.MIB->isTerminator(Inst: *PP.getInst()) &&
982 doesInstUsesCSR(Inst: *PP.getInst(), CSR)) {
983 Frontier.clear();
984 return Frontier;
985 }
986 BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
987 CritEdgesFrom.emplace_back(Args&: FrontierBB);
988 CritEdgesTo.emplace_back(Args: 0);
989 SmallVector<BinaryBasicBlock *, 4> &Dests = CritEdgesTo.back();
990 // Check for invoke instructions at the dominance frontier, which indicates
991 // the landing pad is not dominated.
992 if (PP.isInst() && BC.MIB->isInvoke(Inst: *PP.getInst())) {
993 Frontier.clear();
994 return Frontier;
995 }
996 doForAllSuccs(BB: *FrontierBB, Task: [&](ProgramPoint P) {
997 if (!DA.doesADominateB(A: *BestPosSave, B: P)) {
998 Dests.emplace_back(Args: Info.getParentBB(PP: P));
999 return;
1000 }
1001 HasCritEdges = true;
1002 });
1003 IsCritEdge.push_back(Elt: HasCritEdges);
1004 }
1005 // Restores cannot be placed in empty BBs because we have a dataflow
1006 // analysis that depends on insertions happening before real instructions
1007 // (PredictiveStackPointerTracking). Detect now for empty BBs and add a
1008 // dummy nop that is scheduled to be removed later.
1009 bool InvalidateRequired = false;
1010 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1011 if (BB->size() != 0)
1012 continue;
1013 MCInst NewInst;
1014 BC.MIB->createNoop(Inst&: NewInst);
1015 auto II = BB->addInstruction(Inst: std::move(NewInst));
1016 scheduleChange(PP: &*II, Item: WorklistItem(WorklistItem::Erase, 0));
1017 InvalidateRequired = true;
1018 }
1019 if (std::accumulate(first: IsCritEdge.begin(), last: IsCritEdge.end(), init: 0)) {
1020 LLVM_DEBUG({
1021 dbgs() << "Now detected critical edges in the following frontier:\n";
1022 for (ProgramPoint &PP : Frontier) {
1023 if (PP.isBB()) {
1024 dbgs() << " BB: " << PP.getBB()->getName() << "\n";
1025 } else {
1026 dbgs() << " Inst: ";
1027 PP.getInst()->dump();
1028 }
1029 }
1030 });
1031 splitFrontierCritEdges(Func: &BF, Frontier, IsCritEdge, From: CritEdgesFrom,
1032 To: CritEdgesTo);
1033 InvalidateRequired = true;
1034 }
1035 if (InvalidateRequired) {
1036 // BitVectors that represent all insns of the function are invalid now
1037 // since we changed BBs/Insts. Re-run steps that depend on pointers being
1038 // valid
1039 Info.invalidateAll();
1040 classifyCSRUses();
1041 }
1042 return Frontier;
1043}
1044
1045bool ShrinkWrapping::validatePushPopsMode(unsigned CSR, MCInst *BestPosSave,
1046 int64_t SaveOffset) {
1047 if (FA.requiresAlignment(Func: BF)) {
1048 LLVM_DEBUG({
1049 dbgs() << "Reg " << CSR
1050 << " is not using push/pops due to function "
1051 "alignment requirements.\n";
1052 });
1053 return false;
1054 }
1055 if (FA.hasStackArithmetic(Func: BF)) {
1056 LLVM_DEBUG({
1057 dbgs() << "Reg " << CSR
1058 << " is not using push/pops due to function "
1059 "taking the address of a stack position.\n";
1060 });
1061 return false;
1062 }
1063 for (MCInst *Save : CSA.getSavesByReg(Reg: CSR)) {
1064 if (!SLM.canCollapseRegion(DeletedPush: Save)) {
1065 LLVM_DEBUG(dbgs() << "Reg " << CSR << " cannot collapse region.\n");
1066 return false;
1067 }
1068 }
1069 // Abort if one of the restores for this CSR is not a POP.
1070 for (MCInst *Load : CSA.getRestoresByReg(Reg: CSR)) {
1071 if (!BC.MIB->isPop(Inst: *Load)) {
1072 LLVM_DEBUG(dbgs() << "Reg " << CSR << " has a mismatching restore.\n");
1073 return false;
1074 }
1075 }
1076
1077 StackPointerTracking &SPT = Info.getStackPointerTracking();
1078 // Abort if we are inserting a push into an entry BB (offset -8) and this
1079 // func sets up a frame pointer.
1080 if (!SLM.canInsertRegion(P: BestPosSave) || SaveOffset == SPT.SUPERPOSITION ||
1081 SaveOffset == SPT.EMPTY || (SaveOffset == -8 && SPT.HasFramePointer)) {
1082 LLVM_DEBUG({
1083 dbgs() << "Reg " << CSR
1084 << " cannot insert region or we are "
1085 "trying to insert a push into entry bb.\n";
1086 });
1087 return false;
1088 }
1089 return true;
1090}
1091
1092SmallVector<ProgramPoint, 4> ShrinkWrapping::fixPopsPlacements(
1093 const SmallVector<ProgramPoint, 4> &RestorePoints, int64_t SaveOffset,
1094 unsigned CSR) {
1095 SmallVector<ProgramPoint, 4> FixedRestorePoints = RestorePoints;
1096 // Moving pop locations to the correct sp offset
1097 ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
1098 StackPointerTracking &SPT = Info.getStackPointerTracking();
1099 for (ProgramPoint &PP : FixedRestorePoints) {
1100 BinaryBasicBlock *BB = Info.getParentBB(PP);
1101 bool Found = false;
1102 if (SPT.getStateAt(Point: ProgramPoint::getLastPointAt(BB&: *BB))->first ==
1103 SaveOffset) {
1104 BitVector BV = *RI.getStateAt(Point: ProgramPoint::getLastPointAt(BB&: *BB));
1105 BV &= UsesByReg[CSR];
1106 if (!BV.any()) {
1107 Found = true;
1108 PP = BB;
1109 continue;
1110 }
1111 }
1112 for (MCInst &Inst : llvm::reverse(C&: *BB)) {
1113 if (SPT.getStateBefore(Point: Inst)->first == SaveOffset) {
1114 BitVector BV = *RI.getStateAt(Point: Inst);
1115 BV &= UsesByReg[CSR];
1116 if (!BV.any()) {
1117 Found = true;
1118 PP = &Inst;
1119 break;
1120 }
1121 }
1122 }
1123 if (!Found) {
1124 LLVM_DEBUG({
1125 dbgs() << "Could not find restore insertion point for " << CSR
1126 << ", falling back to load/store mode\n";
1127 });
1128 FixedRestorePoints.clear();
1129 return FixedRestorePoints;
1130 }
1131 }
1132 return FixedRestorePoints;
1133}
1134
1135void ShrinkWrapping::scheduleOldSaveRestoresRemoval(unsigned CSR,
1136 bool UsePushPops) {
1137
1138 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1139 std::vector<MCInst *> CFIs;
1140 for (MCInst &Inst : llvm::reverse(C&: *BB)) {
1141 if (BC.MIB->isCFI(Inst)) {
1142 // Delete all offset CFIs related to this CSR
1143 if (SLM.getOffsetCFIReg(Inst) == CSR) {
1144 HasDeletedOffsetCFIs[CSR] = true;
1145 scheduleChange(PP: &Inst, Item: WorklistItem(WorklistItem::Erase, CSR));
1146 continue;
1147 }
1148 CFIs.push_back(x: &Inst);
1149 continue;
1150 }
1151
1152 uint16_t SavedReg = CSA.getSavedReg(Inst);
1153 uint16_t RestoredReg = CSA.getRestoredReg(Inst);
1154 if (SavedReg != CSR && RestoredReg != CSR) {
1155 CFIs.clear();
1156 continue;
1157 }
1158
1159 scheduleChange(PP: &Inst, Item: WorklistItem(UsePushPops
1160 ? WorklistItem::Erase
1161 : WorklistItem::ChangeToAdjustment,
1162 CSR));
1163
1164 // Delete associated CFIs
1165 const bool RecordDeletedPushCFIs =
1166 SavedReg == CSR && DeletedPushCFIs[CSR].empty();
1167 const bool RecordDeletedPopCFIs =
1168 RestoredReg == CSR && DeletedPopCFIs[CSR].empty();
1169 for (MCInst *CFI : CFIs) {
1170 const MCCFIInstruction *MCCFI = BF.getCFIFor(Instr: *CFI);
1171 // Do not touch these...
1172 if (MCCFI->getOperation() == MCCFIInstruction::OpRestoreState ||
1173 MCCFI->getOperation() == MCCFIInstruction::OpRememberState)
1174 continue;
1175 scheduleChange(PP: CFI, Item: WorklistItem(WorklistItem::Erase, CSR));
1176 if (RecordDeletedPushCFIs) {
1177 // Do not record this to be replayed later because we are going to
1178 // rebuild it.
1179 if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1180 continue;
1181 DeletedPushCFIs[CSR].push_back(x: CFI->getOperand(i: 0).getImm());
1182 }
1183 if (RecordDeletedPopCFIs) {
1184 if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1185 continue;
1186 DeletedPopCFIs[CSR].push_back(x: CFI->getOperand(i: 0).getImm());
1187 }
1188 }
1189 CFIs.clear();
1190 }
1191 }
1192}
1193
1194bool ShrinkWrapping::doesInstUsesCSR(const MCInst &Inst, uint16_t CSR) {
1195 if (BC.MIB->isCFI(Inst) || CSA.getSavedReg(Inst) == CSR ||
1196 CSA.getRestoredReg(Inst) == CSR)
1197 return false;
1198 BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
1199 BC.MIB->getTouchedRegs(Inst, Regs&: BV);
1200 return BV[CSR];
1201}
1202
1203void ShrinkWrapping::scheduleSaveRestoreInsertions(
1204 unsigned CSR, MCInst *BestPosSave,
1205 SmallVector<ProgramPoint, 4> &RestorePoints, bool UsePushPops) {
1206 auto &InsnToBB = Info.getInsnToBBMap();
1207 const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[CSR];
1208 const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[CSR];
1209 assert(FIESave && FIELoad && "Invalid CSR");
1210
1211 LLVM_DEBUG({
1212 dbgs() << "Scheduling save insertion at: ";
1213 BestPosSave->dump();
1214 });
1215
1216 scheduleChange(PP: BestPosSave,
1217 Item: UsePushPops ? WorklistItem::InsertPushOrPop
1218 : WorklistItem::InsertLoadOrStore,
1219 Item: *FIESave, Item&: CSR);
1220
1221 for (ProgramPoint &PP : RestorePoints) {
1222 BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
1223 LLVM_DEBUG({
1224 dbgs() << "Scheduling restore insertion at: ";
1225 if (PP.isInst())
1226 PP.getInst()->dump();
1227 else
1228 dbgs() << PP.getBB()->getName() << "\n";
1229 });
1230 MCInst *Term =
1231 FrontierBB->getTerminatorBefore(Pos: PP.isInst() ? PP.getInst() : nullptr);
1232 if (Term)
1233 PP = Term;
1234 bool PrecededByPrefix = false;
1235 if (PP.isInst()) {
1236 auto Iter = FrontierBB->findInstruction(Inst: PP.getInst());
1237 if (Iter != FrontierBB->end() && Iter != FrontierBB->begin()) {
1238 --Iter;
1239 PrecededByPrefix = BC.MIB->isPrefix(Inst: *Iter);
1240 }
1241 }
1242 if (PP.isInst() &&
1243 (doesInstUsesCSR(Inst: *PP.getInst(), CSR) || PrecededByPrefix)) {
1244 assert(!InsnToBB[PP.getInst()]->hasTerminatorAfter(PP.getInst()) &&
1245 "cannot move to end of bb");
1246 scheduleChange(PP: InsnToBB[PP.getInst()],
1247 Item: UsePushPops ? WorklistItem::InsertPushOrPop
1248 : WorklistItem::InsertLoadOrStore,
1249 Item: *FIELoad, Item&: CSR);
1250 continue;
1251 }
1252 scheduleChange(PP,
1253 Item: UsePushPops ? WorklistItem::InsertPushOrPop
1254 : WorklistItem::InsertLoadOrStore,
1255 Item: *FIELoad, Item&: CSR);
1256 }
1257}
1258
1259void ShrinkWrapping::moveSaveRestores() {
1260 bool DisablePushPopMode = false;
1261 bool UsedPushPopMode = false;
1262 // Keeps info about successfully moved regs: reg index, save position and
1263 // save size
1264 std::vector<std::tuple<unsigned, MCInst *, size_t>> MovedRegs;
1265 uint64_t TotalEstimatedWin = 0;
1266
1267 computeDomOrder();
1268 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
1269 MCInst *BestPosSave = nullptr;
1270 uint64_t EstimatedWin = 0;
1271 SmallVector<ProgramPoint, 4> RestorePoints;
1272 while (RestorePoints.empty() &&
1273 isBestSavePosCold(CSR: I, BestPosSave, TotalEstimatedWin&: EstimatedWin)) {
1274 RestorePoints = doRestorePlacement(BestPosSave, CSR: I, TotalEstimatedWin: EstimatedWin);
1275 if (RestorePoints.empty()) {
1276 LLVM_DEBUG({
1277 dbgs() << "Dropping opportunity because restore placement failed"
1278 " -- total est. freq reduc: "
1279 << EstimatedWin << ". Will try "
1280 << (BestSaveCount[I].size() - 1) << " more times.\n";
1281 });
1282 BestSaveCount[I].pop_back();
1283 BestSavePos[I].pop_back();
1284 computeDomOrder();
1285 }
1286 }
1287 if (RestorePoints.empty()) {
1288 SpillsFailedDynamicCount += EstimatedWin;
1289 continue;
1290 }
1291
1292 const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[I];
1293 const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[I];
1294 (void)FIELoad;
1295 assert(FIESave && FIELoad);
1296 StackPointerTracking &SPT = Info.getStackPointerTracking();
1297 const std::pair<int, int> SPFP = *SPT.getStateBefore(Point: *BestPosSave);
1298 int SaveOffset = SPFP.first;
1299 uint8_t SaveSize = FIESave->Size;
1300
1301 // If we don't know stack state at this point, bail
1302 if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
1303 (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY)) {
1304 SpillsFailedDynamicCount += EstimatedWin;
1305 continue;
1306 }
1307
1308 // Operation mode: if true, will insert push/pops instead of loads/restores
1309 bool UsePushPops = validatePushPopsMode(CSR: I, BestPosSave, SaveOffset);
1310
1311 if (UsePushPops) {
1312 SmallVector<ProgramPoint, 4> FixedRestorePoints =
1313 fixPopsPlacements(RestorePoints, SaveOffset, CSR: I);
1314 if (FixedRestorePoints.empty())
1315 UsePushPops = false;
1316 else
1317 RestorePoints = FixedRestorePoints;
1318 }
1319
1320 // Disable push-pop mode for all CSRs in this function
1321 if (!UsePushPops)
1322 DisablePushPopMode = true;
1323 else
1324 UsedPushPopMode = true;
1325
1326 scheduleOldSaveRestoresRemoval(CSR: I, UsePushPops);
1327 scheduleSaveRestoreInsertions(CSR: I, BestPosSave, RestorePoints, UsePushPops);
1328 MovedRegs.emplace_back(args: std::make_tuple(args&: I, args&: BestPosSave, args&: SaveSize));
1329 TotalEstimatedWin += EstimatedWin;
1330 }
1331
1332 // Revert push-pop mode if it failed for a single CSR
1333 if (DisablePushPopMode && UsedPushPopMode) {
1334 UsedPushPopMode = false;
1335 for (BinaryBasicBlock &BB : BF) {
1336 auto WRI = Todo.find(Val: &BB);
1337 if (WRI != Todo.end()) {
1338 std::vector<WorklistItem> &TodoList = WRI->second;
1339 for (WorklistItem &Item : TodoList)
1340 if (Item.Action == WorklistItem::InsertPushOrPop)
1341 Item.Action = WorklistItem::InsertLoadOrStore;
1342 }
1343 for (MCInst &Inst : llvm::reverse(C&: BB)) {
1344 auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1345 Inst, Index: getAnnotationIndex());
1346 if (!TodoList)
1347 continue;
1348 bool isCFI = BC.MIB->isCFI(Inst);
1349 for (WorklistItem &Item : *TodoList) {
1350 if (Item.Action == WorklistItem::InsertPushOrPop)
1351 Item.Action = WorklistItem::InsertLoadOrStore;
1352 if (!isCFI && Item.Action == WorklistItem::Erase)
1353 Item.Action = WorklistItem::ChangeToAdjustment;
1354 }
1355 }
1356 }
1357 }
1358 SpillsMovedDynamicCount += TotalEstimatedWin;
1359
1360 // Update statistics
1361 if (!UsedPushPopMode) {
1362 SpillsMovedRegularMode += MovedRegs.size();
1363 return;
1364 }
1365
1366 // Schedule modifications to stack-accessing instructions via
1367 // StackLayoutModifier.
1368 SpillsMovedPushPopMode += MovedRegs.size();
1369 for (std::tuple<unsigned, MCInst *, size_t> &I : MovedRegs) {
1370 unsigned RegNdx;
1371 MCInst *SavePos;
1372 size_t SaveSize;
1373 std::tie(args&: RegNdx, args&: SavePos, args&: SaveSize) = I;
1374 for (MCInst *Save : CSA.getSavesByReg(Reg: RegNdx))
1375 SLM.collapseRegion(DeletedPush: Save);
1376 SLM.insertRegion(P: SavePos, RegionSz: SaveSize);
1377 }
1378}
1379
1380namespace {
1381/// Helper function to identify whether two basic blocks created by splitting
1382/// a critical edge have the same contents.
1383bool isIdenticalSplitEdgeBB(const BinaryContext &BC, const BinaryBasicBlock &A,
1384 const BinaryBasicBlock &B) {
1385 if (A.succ_size() != B.succ_size())
1386 return false;
1387 if (A.succ_size() != 1)
1388 return false;
1389
1390 if (*A.succ_begin() != *B.succ_begin())
1391 return false;
1392
1393 if (A.size() != B.size())
1394 return false;
1395
1396 // Compare instructions
1397 auto I = A.begin(), E = A.end();
1398 auto OtherI = B.begin(), OtherE = B.end();
1399 while (I != E && OtherI != OtherE) {
1400 if (I->getOpcode() != OtherI->getOpcode())
1401 return false;
1402 if (!BC.MIB->equals(A: *I, B: *OtherI, Comp: [](const MCSymbol *A, const MCSymbol *B) {
1403 return true;
1404 }))
1405 return false;
1406 ++I;
1407 ++OtherI;
1408 }
1409 return true;
1410}
1411} // namespace
1412
1413bool ShrinkWrapping::foldIdenticalSplitEdges() {
1414 bool Changed = false;
1415 for (auto Iter = BF.begin(); Iter != BF.end(); ++Iter) {
1416 BinaryBasicBlock &BB = *Iter;
1417 if (!BB.getName().starts_with(Prefix: ".LSplitEdge"))
1418 continue;
1419 for (BinaryBasicBlock &RBB : llvm::reverse(C&: BF)) {
1420 if (&RBB == &BB)
1421 break;
1422 if (!RBB.getName().starts_with(Prefix: ".LSplitEdge") || !RBB.isValid() ||
1423 !isIdenticalSplitEdgeBB(BC, A: *Iter, B: RBB))
1424 continue;
1425 assert(RBB.pred_size() == 1 && "Invalid split edge BB");
1426 BinaryBasicBlock *Pred = *RBB.pred_begin();
1427 uint64_t OrigCount = Pred->branch_info_begin()->Count;
1428 uint64_t OrigMispreds = Pred->branch_info_begin()->MispredictedCount;
1429 BF.replaceJumpTableEntryIn(BB: Pred, OldDest: &RBB, NewDest: &BB);
1430 Pred->replaceSuccessor(Succ: &RBB, NewSucc: &BB, Count: OrigCount, MispredictedCount: OrigMispreds);
1431 Changed = true;
1432 // Remove the block from CFG
1433 RBB.markValid(Valid: false);
1434 }
1435 }
1436
1437 return Changed;
1438}
1439
1440namespace {
1441
1442// A special StackPointerTracking that compensates for our future plans
1443// in removing/adding insn.
1444class PredictiveStackPointerTracking
1445 : public StackPointerTrackingBase<PredictiveStackPointerTracking> {
1446 friend class DataflowAnalysis<PredictiveStackPointerTracking,
1447 std::pair<int, int>>;
1448 decltype(ShrinkWrapping::Todo) &TodoMap;
1449 DataflowInfoManager &Info;
1450
1451 std::optional<unsigned> AnnotationIndex;
1452
1453protected:
1454 void compNextAux(const MCInst &Point,
1455 const std::vector<ShrinkWrapping::WorklistItem> &TodoItems,
1456 std::pair<int, int> &Res) {
1457 for (const ShrinkWrapping::WorklistItem &Item : TodoItems) {
1458 if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
1459 BC.MIB->isPush(Inst: Point)) {
1460 Res.first += BC.MIB->getPushSize(Inst: Point);
1461 continue;
1462 }
1463 if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
1464 BC.MIB->isPop(Inst: Point)) {
1465 Res.first -= BC.MIB->getPopSize(Inst: Point);
1466 continue;
1467 }
1468 if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
1469 Item.FIEToInsert.IsStore) {
1470 Res.first -= Item.FIEToInsert.Size;
1471 continue;
1472 }
1473 if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
1474 Item.FIEToInsert.IsLoad) {
1475 Res.first += Item.FIEToInsert.Size;
1476 continue;
1477 }
1478 }
1479 }
1480
1481 std::pair<int, int> computeNext(const MCInst &Point,
1482 const std::pair<int, int> &Cur) {
1483 std::pair<int, int> Res =
1484 StackPointerTrackingBase<PredictiveStackPointerTracking>::computeNext(
1485 Point, Cur);
1486 if (Res.first == StackPointerTracking::SUPERPOSITION ||
1487 Res.first == StackPointerTracking::EMPTY)
1488 return Res;
1489 auto TodoItems =
1490 BC.MIB->tryGetAnnotationAs<std::vector<ShrinkWrapping::WorklistItem>>(
1491 Inst: Point, Name: ShrinkWrapping::getAnnotationName());
1492 if (TodoItems)
1493 compNextAux(Point, TodoItems: *TodoItems, Res);
1494 auto &InsnToBBMap = Info.getInsnToBBMap();
1495 if (&*InsnToBBMap[&Point]->rbegin() != &Point)
1496 return Res;
1497 auto WRI = TodoMap.find(Val: InsnToBBMap[&Point]);
1498 if (WRI == TodoMap.end())
1499 return Res;
1500 compNextAux(Point, TodoItems: WRI->second, Res);
1501 return Res;
1502 }
1503
1504 StringRef getAnnotationName() const {
1505 return StringRef("PredictiveStackPointerTracking");
1506 }
1507
1508public:
1509 PredictiveStackPointerTracking(BinaryFunction &BF,
1510 decltype(ShrinkWrapping::Todo) &TodoMap,
1511 DataflowInfoManager &Info,
1512 MCPlusBuilder::AllocatorIdTy AllocatorId = 0)
1513 : StackPointerTrackingBase<PredictiveStackPointerTracking>(BF,
1514 AllocatorId),
1515 TodoMap(TodoMap), Info(Info) {}
1516
1517 void run() {
1518 StackPointerTrackingBase<PredictiveStackPointerTracking>::run();
1519 }
1520};
1521
1522} // end anonymous namespace
1523
1524void ShrinkWrapping::insertUpdatedCFI(unsigned CSR, int SPValPush,
1525 int SPValPop) {
1526 MCInst *SavePoint = nullptr;
1527 for (BinaryBasicBlock &BB : BF) {
1528 for (MCInst &Inst : llvm::reverse(C&: BB)) {
1529 int32_t SrcImm = 0;
1530 MCPhysReg Reg = 0;
1531 MCPhysReg StackPtrReg = 0;
1532 int64_t StackOffset = 0;
1533 bool IsIndexed = false;
1534 bool IsLoad = false;
1535 bool IsStore = false;
1536 bool IsSimple = false;
1537 bool IsStoreFromReg = false;
1538 uint8_t Size = 0;
1539 if (!BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, Reg,
1540 SrcImm, StackPtrReg, StackOffset, Size,
1541 IsSimple, IsIndexed))
1542 continue;
1543 if (Reg != CSR || !IsStore || !IsSimple)
1544 continue;
1545 SavePoint = &Inst;
1546 break;
1547 }
1548 if (SavePoint)
1549 break;
1550 }
1551 assert(SavePoint);
1552 LLVM_DEBUG({
1553 dbgs() << "Now using as save point for reg " << CSR << " :";
1554 SavePoint->dump();
1555 });
1556 bool PrevAffectedZone = false;
1557 BinaryBasicBlock *PrevBB = nullptr;
1558 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
1559 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1560 if (BB->size() == 0)
1561 continue;
1562 const bool InAffectedZoneAtEnd = DA.count(Point: *BB->rbegin(), Expr: *SavePoint);
1563 const bool InAffectedZoneAtBegin =
1564 (*DA.getStateBefore(Point: *BB->begin()))[DA.ExprToIdx[SavePoint]];
1565 bool InAffectedZone = InAffectedZoneAtBegin;
1566 for (auto InstIter = BB->begin(); InstIter != BB->end(); ++InstIter) {
1567 const bool CurZone = DA.count(Point: *InstIter, Expr: *SavePoint);
1568 if (InAffectedZone != CurZone) {
1569 auto InsertionIter = InstIter;
1570 ++InsertionIter;
1571 InAffectedZone = CurZone;
1572 if (InAffectedZone)
1573 InstIter = insertCFIsForPushOrPop(BB&: *BB, Pos: InsertionIter, Reg: CSR, IsPush: true, Sz: 0,
1574 NewOffset: SPValPop);
1575 else
1576 InstIter = insertCFIsForPushOrPop(BB&: *BB, Pos: InsertionIter, Reg: CSR, IsPush: false, Sz: 0,
1577 NewOffset: SPValPush);
1578 --InstIter;
1579 }
1580 }
1581 // Are we at the first basic block or hot-cold split point?
1582 if (!PrevBB || (BF.isSplit() && BB->isCold() != PrevBB->isCold())) {
1583 if (InAffectedZoneAtBegin)
1584 insertCFIsForPushOrPop(BB&: *BB, Pos: BB->begin(), Reg: CSR, IsPush: true, Sz: 0, NewOffset: SPValPush);
1585 } else if (InAffectedZoneAtBegin != PrevAffectedZone) {
1586 if (InAffectedZoneAtBegin)
1587 insertCFIsForPushOrPop(BB&: *PrevBB, Pos: PrevBB->end(), Reg: CSR, IsPush: true, Sz: 0, NewOffset: SPValPush);
1588 else
1589 insertCFIsForPushOrPop(BB&: *PrevBB, Pos: PrevBB->end(), Reg: CSR, IsPush: false, Sz: 0, NewOffset: SPValPop);
1590 }
1591 PrevAffectedZone = InAffectedZoneAtEnd;
1592 PrevBB = BB;
1593 }
1594}
1595
1596void ShrinkWrapping::rebuildCFIForSP() {
1597 for (BinaryBasicBlock &BB : BF) {
1598 for (MCInst &Inst : BB) {
1599 if (!BC.MIB->isCFI(Inst))
1600 continue;
1601 const MCCFIInstruction *CFI = BF.getCFIFor(Instr: Inst);
1602 if (CFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1603 BC.MIB->addAnnotation(Inst, Name: "DeleteMe", Val: 0U, AllocatorId);
1604 }
1605 }
1606
1607 int PrevSPVal = -8;
1608 BinaryBasicBlock *PrevBB = nullptr;
1609 StackPointerTracking &SPT = Info.getStackPointerTracking();
1610 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1611 if (BB->size() == 0)
1612 continue;
1613 const int SPValAtEnd = SPT.getStateAt(Point: *BB->rbegin())->first;
1614 const int SPValAtBegin = SPT.getStateBefore(Point: *BB->begin())->first;
1615 int SPVal = SPValAtBegin;
1616 for (auto Iter = BB->begin(); Iter != BB->end(); ++Iter) {
1617 const int CurVal = SPT.getStateAt(Point: *Iter)->first;
1618 if (SPVal != CurVal) {
1619 auto InsertionIter = Iter;
1620 ++InsertionIter;
1621 Iter = BF.addCFIInstruction(
1622 BB, Pos: InsertionIter,
1623 Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: -CurVal));
1624 SPVal = CurVal;
1625 }
1626 }
1627 if (BF.isSplit() && PrevBB && BB->isCold() != PrevBB->isCold())
1628 BF.addCFIInstruction(
1629 BB, Pos: BB->begin(),
1630 Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: -SPValAtBegin));
1631 else if (SPValAtBegin != PrevSPVal)
1632 BF.addCFIInstruction(
1633 BB: PrevBB, Pos: PrevBB->end(),
1634 Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: -SPValAtBegin));
1635 PrevSPVal = SPValAtEnd;
1636 PrevBB = BB;
1637 }
1638
1639 for (BinaryBasicBlock &BB : BF)
1640 for (auto I = BB.begin(); I != BB.end();)
1641 if (BC.MIB->hasAnnotation(Inst: *I, Name: "DeleteMe"))
1642 I = BB.eraseInstruction(II: I);
1643 else
1644 ++I;
1645}
1646
1647Expected<MCInst> ShrinkWrapping::createStackAccess(int SPVal, int FPVal,
1648 const FrameIndexEntry &FIE,
1649 bool CreatePushOrPop) {
1650 MCInst NewInst;
1651 if (SPVal != StackPointerTracking::SUPERPOSITION &&
1652 SPVal != StackPointerTracking::EMPTY) {
1653 if (FIE.IsLoad) {
1654 BC.MIB->createRestoreFromStack(Inst&: NewInst, StackReg: BC.MIB->getStackPointer(),
1655 Offset: FIE.StackOffset - SPVal, DstReg: FIE.RegOrImm,
1656 Size: FIE.Size);
1657 } else {
1658 BC.MIB->createSaveToStack(Inst&: NewInst, StackReg: BC.MIB->getStackPointer(),
1659 Offset: FIE.StackOffset - SPVal, SrcReg: FIE.RegOrImm,
1660 Size: FIE.Size);
1661 }
1662 if (CreatePushOrPop)
1663 BC.MIB->changeToPushOrPop(Inst&: NewInst);
1664 return NewInst;
1665 }
1666 assert(FPVal != StackPointerTracking::SUPERPOSITION &&
1667 FPVal != StackPointerTracking::EMPTY);
1668
1669 if (FIE.IsLoad) {
1670 BC.MIB->createRestoreFromStack(Inst&: NewInst, StackReg: BC.MIB->getFramePointer(),
1671 Offset: FIE.StackOffset - FPVal, DstReg: FIE.RegOrImm,
1672 Size: FIE.Size);
1673 } else {
1674 BC.MIB->createSaveToStack(Inst&: NewInst, StackReg: BC.MIB->getFramePointer(),
1675 Offset: FIE.StackOffset - FPVal, SrcReg: FIE.RegOrImm, Size: FIE.Size);
1676 }
1677 return NewInst;
1678}
1679
1680void ShrinkWrapping::updateCFIInstOffset(MCInst &Inst, int64_t NewOffset) {
1681 const MCCFIInstruction *CFI = BF.getCFIFor(Instr: Inst);
1682 if (UpdatedCFIs.count(Ptr: CFI))
1683 return;
1684
1685 switch (CFI->getOperation()) {
1686 case MCCFIInstruction::OpDefCfa:
1687 case MCCFIInstruction::OpDefCfaRegister:
1688 case MCCFIInstruction::OpDefCfaOffset:
1689 CFI = BF.mutateCFIOffsetFor(Instr: Inst, NewOffset: -NewOffset);
1690 break;
1691 case MCCFIInstruction::OpOffset:
1692 default:
1693 break;
1694 }
1695
1696 UpdatedCFIs.insert(Ptr: CFI);
1697}
1698
1699BBIterTy ShrinkWrapping::insertCFIsForPushOrPop(BinaryBasicBlock &BB,
1700 BBIterTy Pos, unsigned Reg,
1701 bool isPush, int Sz,
1702 int64_t NewOffset) {
1703 if (isPush) {
1704 for (uint32_t Idx : DeletedPushCFIs[Reg]) {
1705 Pos = BF.addCFIPseudo(BB: &BB, Pos, Offset: Idx);
1706 updateCFIInstOffset(Inst&: *Pos++, NewOffset);
1707 }
1708 if (HasDeletedOffsetCFIs[Reg]) {
1709 Pos = BF.addCFIInstruction(
1710 BB: &BB, Pos,
1711 Inst: MCCFIInstruction::createOffset(
1712 L: nullptr, Register: BC.MRI->getDwarfRegNum(RegNum: Reg, isEH: false), Offset: NewOffset));
1713 ++Pos;
1714 }
1715 } else {
1716 for (uint32_t Idx : DeletedPopCFIs[Reg]) {
1717 Pos = BF.addCFIPseudo(BB: &BB, Pos, Offset: Idx);
1718 updateCFIInstOffset(Inst&: *Pos++, NewOffset);
1719 }
1720 if (HasDeletedOffsetCFIs[Reg]) {
1721 Pos = BF.addCFIInstruction(
1722 BB: &BB, Pos,
1723 Inst: MCCFIInstruction::createSameValue(
1724 L: nullptr, Register: BC.MRI->getDwarfRegNum(RegNum: Reg, isEH: false)));
1725 ++Pos;
1726 }
1727 }
1728 return Pos;
1729}
1730
1731Expected<BBIterTy> ShrinkWrapping::processInsertion(BBIterTy InsertionPoint,
1732 BinaryBasicBlock *CurBB,
1733 const WorklistItem &Item,
1734 int64_t SPVal,
1735 int64_t FPVal) {
1736 // Trigger CFI reconstruction for this CSR if necessary - writing to
1737 // PushOffsetByReg/PopOffsetByReg *will* trigger CFI update
1738 if ((Item.FIEToInsert.IsStore &&
1739 !DeletedPushCFIs[Item.AffectedReg].empty()) ||
1740 (Item.FIEToInsert.IsLoad && !DeletedPopCFIs[Item.AffectedReg].empty()) ||
1741 HasDeletedOffsetCFIs[Item.AffectedReg]) {
1742 if (Item.Action == WorklistItem::InsertPushOrPop) {
1743 if (Item.FIEToInsert.IsStore)
1744 PushOffsetByReg[Item.AffectedReg] = SPVal - Item.FIEToInsert.Size;
1745 else
1746 PopOffsetByReg[Item.AffectedReg] = SPVal;
1747 } else {
1748 if (Item.FIEToInsert.IsStore)
1749 PushOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
1750 else
1751 PopOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
1752 }
1753 }
1754
1755 LLVM_DEBUG({
1756 dbgs() << "Creating stack access with SPVal = " << SPVal
1757 << "; stack offset = " << Item.FIEToInsert.StackOffset
1758 << " Is push = " << (Item.Action == WorklistItem::InsertPushOrPop)
1759 << "\n";
1760 });
1761 Expected<MCInst> NewInstOrErr =
1762 createStackAccess(SPVal, FPVal, FIE: Item.FIEToInsert,
1763 CreatePushOrPop: Item.Action == WorklistItem::InsertPushOrPop);
1764 if (auto E = NewInstOrErr.takeError())
1765 return Error(std::move(E));
1766 MCInst &NewInst = *NewInstOrErr;
1767 if (InsertionPoint != CurBB->end()) {
1768 LLVM_DEBUG({
1769 dbgs() << "Adding before Inst: ";
1770 InsertionPoint->dump();
1771 dbgs() << "the following inst: ";
1772 NewInst.dump();
1773 });
1774 BBIterTy Iter =
1775 CurBB->insertInstruction(At: InsertionPoint, NewInst: std::move(NewInst));
1776 return ++Iter;
1777 }
1778 CurBB->addInstruction(Inst: std::move(NewInst));
1779 LLVM_DEBUG(dbgs() << "Adding to BB!\n");
1780 return CurBB->end();
1781}
1782
1783Expected<BBIterTy> ShrinkWrapping::processInsertionsList(
1784 BBIterTy InsertionPoint, BinaryBasicBlock *CurBB,
1785 std::vector<WorklistItem> &TodoList, int64_t SPVal, int64_t FPVal) {
1786 bool HasInsertions = llvm::any_of(Range&: TodoList, P: [&](WorklistItem &Item) {
1787 return Item.Action == WorklistItem::InsertLoadOrStore ||
1788 Item.Action == WorklistItem::InsertPushOrPop;
1789 });
1790
1791 if (!HasInsertions)
1792 return InsertionPoint;
1793
1794 assert(((SPVal != StackPointerTracking::SUPERPOSITION &&
1795 SPVal != StackPointerTracking::EMPTY) ||
1796 (FPVal != StackPointerTracking::SUPERPOSITION &&
1797 FPVal != StackPointerTracking::EMPTY)) &&
1798 "Cannot insert if we have no idea of the stack state here");
1799
1800 // Revert the effect of PSPT for this location, we want SP Value before
1801 // insertions
1802 if (InsertionPoint == CurBB->end()) {
1803 for (WorklistItem &Item : TodoList) {
1804 if (Item.Action != WorklistItem::InsertPushOrPop)
1805 continue;
1806 if (Item.FIEToInsert.IsStore)
1807 SPVal += Item.FIEToInsert.Size;
1808 if (Item.FIEToInsert.IsLoad)
1809 SPVal -= Item.FIEToInsert.Size;
1810 }
1811 }
1812
1813 // Reorder POPs to obey the correct dominance relation between them
1814 llvm::stable_sort(Range&: TodoList, C: [&](const WorklistItem &A,
1815 const WorklistItem &B) {
1816 if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad) &&
1817 (B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad))
1818 return false;
1819 if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad))
1820 return true;
1821 if ((B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad))
1822 return false;
1823 return DomOrder[B.AffectedReg] < DomOrder[A.AffectedReg];
1824 });
1825
1826 // Process insertions
1827 for (WorklistItem &Item : TodoList) {
1828 if (Item.Action == WorklistItem::Erase ||
1829 Item.Action == WorklistItem::ChangeToAdjustment)
1830 continue;
1831
1832 auto InsertionPointOrErr =
1833 processInsertion(InsertionPoint, CurBB, Item, SPVal, FPVal);
1834 if (auto E = InsertionPointOrErr.takeError())
1835 return Error(std::move(E));
1836 InsertionPoint = *InsertionPointOrErr;
1837 if (Item.Action == WorklistItem::InsertPushOrPop &&
1838 Item.FIEToInsert.IsStore)
1839 SPVal -= Item.FIEToInsert.Size;
1840 if (Item.Action == WorklistItem::InsertPushOrPop &&
1841 Item.FIEToInsert.IsLoad)
1842 SPVal += Item.FIEToInsert.Size;
1843 }
1844 return InsertionPoint;
1845}
1846
1847Expected<bool> ShrinkWrapping::processInsertions() {
1848 PredictiveStackPointerTracking PSPT(BF, Todo, Info, AllocatorId);
1849 PSPT.run();
1850
1851 bool Changes = false;
1852 for (BinaryBasicBlock &BB : BF) {
1853 // Process insertions before some inst.
1854 for (auto I = BB.begin(); I != BB.end(); ++I) {
1855 MCInst &Inst = *I;
1856 auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1857 Inst, Index: getAnnotationIndex());
1858 if (!TodoList)
1859 continue;
1860 Changes = true;
1861 std::vector<WorklistItem> List = *TodoList;
1862 LLVM_DEBUG({
1863 dbgs() << "Now processing insertions in " << BB.getName()
1864 << " before inst: ";
1865 Inst.dump();
1866 });
1867 auto Iter = I;
1868 std::pair<int, int> SPTState =
1869 *PSPT.getStateAt(Point: Iter == BB.begin() ? (ProgramPoint)&BB : &*(--Iter));
1870 auto IterOrErr =
1871 processInsertionsList(InsertionPoint: I, CurBB: &BB, TodoList&: List, SPVal: SPTState.first, FPVal: SPTState.second);
1872 if (auto E = IterOrErr.takeError())
1873 return Error(std::move(E));
1874 I = *IterOrErr;
1875 }
1876 // Process insertions at the end of bb
1877 auto WRI = Todo.find(Val: &BB);
1878 if (WRI != Todo.end()) {
1879 std::pair<int, int> SPTState = *PSPT.getStateAt(Point: *BB.rbegin());
1880 if (auto E = processInsertionsList(InsertionPoint: BB.end(), CurBB: &BB, TodoList&: WRI->second,
1881 SPVal: SPTState.first, FPVal: SPTState.second)
1882 .takeError())
1883 return Error(std::move(E));
1884 Changes = true;
1885 }
1886 }
1887 return Changes;
1888}
1889
1890void ShrinkWrapping::processDeletions() {
1891 LivenessAnalysis &LA = Info.getLivenessAnalysis();
1892 for (BinaryBasicBlock &BB : BF) {
1893 for (auto II = BB.begin(); II != BB.end(); ++II) {
1894 MCInst &Inst = *II;
1895 auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1896 Inst, Index: getAnnotationIndex());
1897 if (!TodoList)
1898 continue;
1899 // Process all deletions
1900 for (WorklistItem &Item : *TodoList) {
1901 if (Item.Action != WorklistItem::Erase &&
1902 Item.Action != WorklistItem::ChangeToAdjustment)
1903 continue;
1904
1905 if (Item.Action == WorklistItem::ChangeToAdjustment) {
1906 // Is flag reg alive across this func?
1907 bool DontClobberFlags = LA.isAlive(PP: &Inst, Reg: BC.MIB->getFlagsReg());
1908 if (int Sz = BC.MIB->getPushSize(Inst)) {
1909 BC.MIB->createStackPointerIncrement(Inst, Size: Sz, NoFlagsClobber: DontClobberFlags);
1910 continue;
1911 }
1912 if (int Sz = BC.MIB->getPopSize(Inst)) {
1913 BC.MIB->createStackPointerDecrement(Inst, Size: Sz, NoFlagsClobber: DontClobberFlags);
1914 continue;
1915 }
1916 }
1917
1918 LLVM_DEBUG({
1919 dbgs() << "Erasing: ";
1920 BC.printInstruction(dbgs(), Inst);
1921 });
1922 II = std::prev(x: BB.eraseInstruction(II));
1923 break;
1924 }
1925 }
1926 }
1927}
1928
1929void ShrinkWrapping::rebuildCFI() {
1930 const bool FP = Info.getStackPointerTracking().HasFramePointer;
1931 Info.invalidateAll();
1932 if (!FP) {
1933 rebuildCFIForSP();
1934 Info.invalidateAll();
1935 }
1936 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
1937 if (PushOffsetByReg[I] == 0 || PopOffsetByReg[I] == 0)
1938 continue;
1939 const int64_t SPValPush = PushOffsetByReg[I];
1940 const int64_t SPValPop = PopOffsetByReg[I];
1941 insertUpdatedCFI(CSR: I, SPValPush, SPValPop);
1942 Info.invalidateAll();
1943 }
1944}
1945
1946Expected<bool> ShrinkWrapping::perform(bool HotOnly) {
1947 HasDeletedOffsetCFIs = BitVector(BC.MRI->getNumRegs(), false);
1948 PushOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
1949 PopOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
1950
1951 // Update pass statistics
1952 uint64_t TotalInstrs = 0ULL;
1953 uint64_t TotalStoreInstrs = 0ULL;
1954 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1955 uint64_t BBExecCount = BB->getExecutionCount();
1956 if (!BBExecCount || BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
1957 continue;
1958 for (const auto &Instr : *BB) {
1959 if (BC.MIB->isPseudo(Inst: Instr))
1960 continue;
1961 if (BC.MIB->mayStore(Inst: Instr))
1962 TotalStoreInstrs += BBExecCount;
1963 TotalInstrs += BBExecCount;
1964 }
1965 }
1966 InstrDynamicCount += TotalInstrs;
1967 StoreDynamicCount += TotalStoreInstrs;
1968
1969 if (!FA.hasFrameInfo(Func: BF))
1970 return false;
1971
1972 if (HotOnly && (BF.getKnownExecutionCount() < BC.getHotThreshold()))
1973 return false;
1974
1975 if (opts::EqualizeBBCounts)
1976 equalizeBBCounts(Info, BF);
1977
1978 if (BF.checkForAmbiguousJumpTables()) {
1979 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ambiguous JTs in " << BF.getPrintName()
1980 << ".\n");
1981 // We could call disambiguateJumpTables here, but it is probably not worth
1982 // the cost (of duplicating potentially large jump tables that could regress
1983 // dcache misses). Moreover, ambiguous JTs are rare and coming from code
1984 // written in assembly language. Just bail.
1985 return false;
1986 }
1987 SLM.initialize();
1988 CSA.compute();
1989 classifyCSRUses();
1990 pruneUnwantedCSRs();
1991 computeSaveLocations();
1992 moveSaveRestores();
1993 LLVM_DEBUG({
1994 dbgs() << "Func before shrink-wrapping: \n";
1995 BF.dump();
1996 });
1997 SLM.performChanges();
1998 // Early exit if processInsertions doesn't detect any todo items
1999 auto ModifiedOrErr = processInsertions();
2000 if (auto E = ModifiedOrErr.takeError())
2001 return Error(std::move(E));
2002 const bool Modified = *ModifiedOrErr;
2003 if (!Modified)
2004 return false;
2005 processDeletions();
2006 if (foldIdenticalSplitEdges()) {
2007 const std::pair<unsigned, uint64_t> Stats = BF.eraseInvalidBBs();
2008 (void)Stats;
2009 LLVM_DEBUG(dbgs() << "Deleted " << Stats.first
2010 << " redundant split edge BBs (" << Stats.second
2011 << " bytes) for " << BF.getPrintName() << "\n");
2012 }
2013 rebuildCFI();
2014 // We may have split edges, creating BBs that need correct branching
2015 BF.fixBranches();
2016 LLVM_DEBUG({
2017 dbgs() << "Func after shrink-wrapping: \n";
2018 BF.dump();
2019 });
2020 return true;
2021}
2022
2023void ShrinkWrapping::printStats(BinaryContext &BC) {
2024 BC.outs() << "BOLT-INFO: Shrink wrapping moved " << SpillsMovedRegularMode
2025 << " spills inserting load/stores and " << SpillsMovedPushPopMode
2026 << " spills inserting push/pops\n";
2027 if (!InstrDynamicCount || !StoreDynamicCount)
2028 return;
2029 BC.outs() << "BOLT-INFO: Shrink wrapping reduced " << SpillsMovedDynamicCount
2030 << " store executions ("
2031 << format(Fmt: "%.1lf%%",
2032 Vals: (100.0 * SpillsMovedDynamicCount / InstrDynamicCount))
2033 << " total instructions executed, "
2034 << format(Fmt: "%.1lf%%",
2035 Vals: (100.0 * SpillsMovedDynamicCount / StoreDynamicCount))
2036 << " store instructions)\n";
2037 BC.outs() << "BOLT-INFO: Shrink wrapping failed at reducing "
2038 << SpillsFailedDynamicCount << " store executions ("
2039 << format(Fmt: "%.1lf%%",
2040 Vals: (100.0 * SpillsFailedDynamicCount / InstrDynamicCount))
2041 << " total instructions executed, "
2042 << format(Fmt: "%.1lf%%",
2043 Vals: (100.0 * SpillsFailedDynamicCount / StoreDynamicCount))
2044 << " store instructions)\n";
2045}
2046
2047// Operators necessary as a result of using MCAnnotation
2048raw_ostream &operator<<(raw_ostream &OS,
2049 const std::vector<ShrinkWrapping::WorklistItem> &Vec) {
2050 OS << "SWTodo[";
2051 const char *Sep = "";
2052 for (const ShrinkWrapping::WorklistItem &Item : Vec) {
2053 OS << Sep;
2054 switch (Item.Action) {
2055 case ShrinkWrapping::WorklistItem::Erase:
2056 OS << "Erase";
2057 break;
2058 case ShrinkWrapping::WorklistItem::ChangeToAdjustment:
2059 OS << "ChangeToAdjustment";
2060 break;
2061 case ShrinkWrapping::WorklistItem::InsertLoadOrStore:
2062 OS << "InsertLoadOrStore";
2063 break;
2064 case ShrinkWrapping::WorklistItem::InsertPushOrPop:
2065 OS << "InsertPushOrPop";
2066 break;
2067 }
2068 Sep = ", ";
2069 }
2070 OS << "]";
2071 return OS;
2072}
2073
2074raw_ostream &
2075operator<<(raw_ostream &OS,
2076 const std::vector<StackLayoutModifier::WorklistItem> &Vec) {
2077 OS << "SLMTodo[";
2078 const char *Sep = "";
2079 for (const StackLayoutModifier::WorklistItem &Item : Vec) {
2080 OS << Sep;
2081 switch (Item.Action) {
2082 case StackLayoutModifier::WorklistItem::None:
2083 OS << "None";
2084 break;
2085 case StackLayoutModifier::WorklistItem::AdjustLoadStoreOffset:
2086 OS << "AdjustLoadStoreOffset";
2087 break;
2088 case StackLayoutModifier::WorklistItem::AdjustCFI:
2089 OS << "AdjustCFI";
2090 break;
2091 }
2092 Sep = ", ";
2093 }
2094 OS << "]";
2095 return OS;
2096}
2097
2098bool operator==(const ShrinkWrapping::WorklistItem &A,
2099 const ShrinkWrapping::WorklistItem &B) {
2100 return (A.Action == B.Action && A.AffectedReg == B.AffectedReg &&
2101 A.Adjustment == B.Adjustment &&
2102 A.FIEToInsert.IsLoad == B.FIEToInsert.IsLoad &&
2103 A.FIEToInsert.IsStore == B.FIEToInsert.IsStore &&
2104 A.FIEToInsert.RegOrImm == B.FIEToInsert.RegOrImm &&
2105 A.FIEToInsert.Size == B.FIEToInsert.Size &&
2106 A.FIEToInsert.IsSimple == B.FIEToInsert.IsSimple &&
2107 A.FIEToInsert.StackOffset == B.FIEToInsert.StackOffset);
2108}
2109
2110bool operator==(const StackLayoutModifier::WorklistItem &A,
2111 const StackLayoutModifier::WorklistItem &B) {
2112 return (A.Action == B.Action && A.OffsetUpdate == B.OffsetUpdate);
2113}
2114
2115} // end namespace bolt
2116} // end namespace llvm
2117

source code of bolt/lib/Passes/ShrinkWrapping.cpp