1//===- bolt/Passes/ShrinkWrapping.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the ShrinkWrapping class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Passes/ShrinkWrapping.h"
14#include "bolt/Passes/DataflowInfoManager.h"
15#include "bolt/Passes/MCF.h"
16#include "bolt/Utils/CommandLineOpts.h"
17#include <numeric>
18#include <optional>
19#include <stack>
20
21#define DEBUG_TYPE "shrinkwrapping"
22
23using namespace llvm;
24
25namespace opts {
26
27extern cl::opt<bool> TimeOpts;
28extern cl::OptionCategory BoltOptCategory;
29
30static cl::opt<unsigned> ShrinkWrappingThreshold(
31 "shrink-wrapping-threshold",
32 cl::desc("Percentage of prologue execution count to use as threshold when"
33 " evaluating whether a block is cold enough to be profitable to"
34 " move eligible spills there"),
35 cl::init(Val: 30), cl::ZeroOrMore, cl::cat(BoltOptCategory));
36} // namespace opts
37
38namespace llvm {
39namespace bolt {
40
41void CalleeSavedAnalysis::analyzeSaves() {
42 ReachingDefOrUse</*Def=*/true> &RD = Info.getReachingDefs();
43 StackReachingUses &SRU = Info.getStackReachingUses();
44 auto &InsnToBB = Info.getInsnToBBMap();
45 BitVector BlacklistedRegs(BC.MRI->getNumRegs(), false);
46
47 LLVM_DEBUG(dbgs() << "Checking spill locations\n");
48 for (BinaryBasicBlock &BB : BF) {
49 LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
50 const MCInst *Prev = nullptr;
51 for (MCInst &Inst : BB) {
52 if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
53 // Blacklist weird stores we don't understand
54 if ((!FIE->IsSimple || FIE->StackOffset >= 0) && FIE->IsStore &&
55 FIE->IsStoreFromReg) {
56 BlacklistedRegs.set(FIE->RegOrImm);
57 CalleeSaved.reset(Idx: FIE->RegOrImm);
58 Prev = &Inst;
59 continue;
60 }
61
62 if (!FIE->IsStore || !FIE->IsStoreFromReg ||
63 BlacklistedRegs[FIE->RegOrImm]) {
64 Prev = &Inst;
65 continue;
66 }
67
68 // If this reg is defined locally, it is not a callee-saved reg
69 if (RD.isReachedBy(Reg: FIE->RegOrImm,
70 Candidates: Prev ? RD.expr_begin(Point: *Prev) : RD.expr_begin(Point: BB))) {
71 BlacklistedRegs.set(FIE->RegOrImm);
72 CalleeSaved.reset(Idx: FIE->RegOrImm);
73 Prev = &Inst;
74 continue;
75 }
76
77 // If this stack position is accessed in another function, we are
78 // probably dealing with a parameter passed in a stack -- do not mess
79 // with it
80 if (SRU.isStoreUsed(StoreFIE: *FIE,
81 Candidates: Prev ? SRU.expr_begin(Point: *Prev) : SRU.expr_begin(Point: BB)),
82 /*IncludeLocalAccesses=*/false) {
83 BlacklistedRegs.set(FIE->RegOrImm);
84 CalleeSaved.reset(Idx: FIE->RegOrImm);
85 Prev = &Inst;
86 continue;
87 }
88
89 // If this stack position is loaded elsewhere in another reg, we can't
90 // update it, so blacklist it.
91 if (SRU.isLoadedInDifferentReg(StoreFIE: *FIE, Candidates: Prev ? SRU.expr_begin(Point: *Prev)
92 : SRU.expr_begin(Point: BB))) {
93 BlacklistedRegs.set(FIE->RegOrImm);
94 CalleeSaved.reset(Idx: FIE->RegOrImm);
95 Prev = &Inst;
96 continue;
97 }
98
99 // Ignore regs with multiple saves
100 if (CalleeSaved[FIE->RegOrImm]) {
101 BlacklistedRegs.set(FIE->RegOrImm);
102 CalleeSaved.reset(Idx: FIE->RegOrImm);
103 Prev = &Inst;
104 continue;
105 }
106
107 CalleeSaved.set(FIE->RegOrImm);
108 SaveFIEByReg[FIE->RegOrImm] = &*FIE;
109 SavingCost[FIE->RegOrImm] += InsnToBB[&Inst]->getKnownExecutionCount();
110 BC.MIB->addAnnotation(Inst, Index: getSaveTag(), Val: FIE->RegOrImm, AllocatorId);
111 OffsetsByReg[FIE->RegOrImm] = FIE->StackOffset;
112 LLVM_DEBUG(dbgs() << "Logging new candidate for Callee-Saved Reg: "
113 << FIE->RegOrImm << "\n");
114 }
115 Prev = &Inst;
116 }
117 }
118}
119
120void CalleeSavedAnalysis::analyzeRestores() {
121 ReachingDefOrUse</*Def=*/false> &RU = Info.getReachingUses();
122
123 // Now compute all restores of these callee-saved regs
124 for (BinaryBasicBlock &BB : BF) {
125 const MCInst *Prev = nullptr;
126 for (MCInst &Inst : llvm::reverse(C&: BB)) {
127 if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
128 if (!FIE->IsLoad || !CalleeSaved[FIE->RegOrImm]) {
129 Prev = &Inst;
130 continue;
131 }
132
133 // If this reg is used locally after a restore, then we are probably
134 // not dealing with a callee-saved reg. Except if this use is by
135 // another store, but we don't cover this case yet.
136 // Also not callee-saved if this load accesses caller stack or isn't
137 // simple.
138 if (!FIE->IsSimple || FIE->StackOffset >= 0 ||
139 RU.isReachedBy(Reg: FIE->RegOrImm,
140 Candidates: Prev ? RU.expr_begin(Point: *Prev) : RU.expr_begin(Point: BB))) {
141 CalleeSaved.reset(Idx: FIE->RegOrImm);
142 Prev = &Inst;
143 continue;
144 }
145 // If stack offsets between saves/store don't agree with each other,
146 // we don't completely understand what's happening here
147 if (FIE->StackOffset != OffsetsByReg[FIE->RegOrImm]) {
148 CalleeSaved.reset(Idx: FIE->RegOrImm);
149 LLVM_DEBUG(dbgs() << "Dismissing Callee-Saved Reg because we found a "
150 "mismatching restore: "
151 << FIE->RegOrImm << "\n");
152 Prev = &Inst;
153 continue;
154 }
155
156 LLVM_DEBUG(dbgs() << "Adding matching restore for: " << FIE->RegOrImm
157 << "\n");
158 if (LoadFIEByReg[FIE->RegOrImm] == nullptr)
159 LoadFIEByReg[FIE->RegOrImm] = &*FIE;
160 BC.MIB->addAnnotation(Inst, Index: getRestoreTag(), Val: FIE->RegOrImm,
161 AllocatorId);
162 HasRestores.set(FIE->RegOrImm);
163 }
164 Prev = &Inst;
165 }
166 }
167}
168
169std::vector<MCInst *> CalleeSavedAnalysis::getSavesByReg(uint16_t Reg) {
170 std::vector<MCInst *> Results;
171 for (BinaryBasicBlock &BB : BF)
172 for (MCInst &Inst : BB)
173 if (getSavedReg(Inst) == Reg)
174 Results.push_back(x: &Inst);
175 return Results;
176}
177
178std::vector<MCInst *> CalleeSavedAnalysis::getRestoresByReg(uint16_t Reg) {
179 std::vector<MCInst *> Results;
180 for (BinaryBasicBlock &BB : BF)
181 for (MCInst &Inst : BB)
182 if (getRestoredReg(Inst) == Reg)
183 Results.push_back(x: &Inst);
184 return Results;
185}
186
187CalleeSavedAnalysis::~CalleeSavedAnalysis() {
188 for (BinaryBasicBlock &BB : BF) {
189 for (MCInst &Inst : BB) {
190 BC.MIB->removeAnnotation(Inst, Index: getSaveTag());
191 BC.MIB->removeAnnotation(Inst, Index: getRestoreTag());
192 }
193 }
194}
195
196void StackLayoutModifier::blacklistRegion(int64_t Offset, int64_t Size) {
197 if (BlacklistedRegions[Offset] < Size)
198 BlacklistedRegions[Offset] = Size;
199}
200
201bool StackLayoutModifier::isRegionBlacklisted(int64_t Offset, int64_t Size) {
202 for (std::pair<const int64_t, int64_t> Elem : BlacklistedRegions)
203 if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second)
204 return true;
205 return false;
206}
207
208bool StackLayoutModifier::blacklistAllInConflictWith(int64_t Offset,
209 int64_t Size) {
210 bool HasConflict = false;
211 for (auto Iter = AvailableRegions.begin(); Iter != AvailableRegions.end();) {
212 std::pair<const int64_t, int64_t> &Elem = *Iter;
213 if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second &&
214 (Offset != Elem.first || Size != Elem.second)) {
215 Iter = AvailableRegions.erase(position: Iter);
216 HasConflict = true;
217 continue;
218 }
219 ++Iter;
220 }
221 if (HasConflict) {
222 blacklistRegion(Offset, Size);
223 return true;
224 }
225 return false;
226}
227
228void StackLayoutModifier::checkFramePointerInitialization(MCInst &Point) {
229 StackPointerTracking &SPT = Info.getStackPointerTracking();
230 if (!BC.MII->get(Opcode: Point.getOpcode())
231 .hasDefOfPhysReg(MI: Point, Reg: BC.MIB->getFramePointer(), RI: *BC.MRI))
232 return;
233
234 int SPVal, FPVal;
235 std::tie(args&: SPVal, args&: FPVal) = *SPT.getStateBefore(Point);
236 std::pair<MCPhysReg, int64_t> FP;
237
238 if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
239 FP = std::make_pair(x: BC.MIB->getFramePointer(), y&: FPVal);
240 else
241 FP = std::make_pair(x: 0, y: 0);
242 std::pair<MCPhysReg, int64_t> SP;
243
244 if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
245 SP = std::make_pair(x: BC.MIB->getStackPointer(), y&: SPVal);
246 else
247 SP = std::make_pair(x: 0, y: 0);
248
249 int64_t Output;
250 if (!BC.MIB->evaluateStackOffsetExpr(Inst: Point, Output, Input1: SP, Input2: FP))
251 return;
252
253 // Not your regular frame pointer initialization... bail
254 if (Output != SPVal)
255 blacklistRegion(Offset: 0, Size: 0);
256}
257
258void StackLayoutModifier::checkStackPointerRestore(MCInst &Point) {
259 StackPointerTracking &SPT = Info.getStackPointerTracking();
260 if (!BC.MII->get(Opcode: Point.getOpcode())
261 .hasDefOfPhysReg(MI: Point, Reg: BC.MIB->getStackPointer(), RI: *BC.MRI))
262 return;
263 // Check if the definition of SP comes from FP -- in this case, this
264 // value may need to be updated depending on our stack layout changes
265 bool UsesFP = llvm::any_of(Range: BC.MIB->useOperands(Inst&: Point), P: [&](MCOperand &Op) {
266 return Op.isReg() && Op.getReg() == BC.MIB->getFramePointer();
267 });
268 if (!UsesFP)
269 return;
270
271 // Setting up evaluation
272 int SPVal, FPVal;
273 std::tie(args&: SPVal, args&: FPVal) = *SPT.getStateBefore(Point);
274 std::pair<MCPhysReg, int64_t> FP;
275
276 if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
277 FP = std::make_pair(x: BC.MIB->getFramePointer(), y&: FPVal);
278 else
279 FP = std::make_pair(x: 0, y: 0);
280 std::pair<MCPhysReg, int64_t> SP;
281
282 if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
283 SP = std::make_pair(x: BC.MIB->getStackPointer(), y&: SPVal);
284 else
285 SP = std::make_pair(x: 0, y: 0);
286
287 int64_t Output;
288 if (!BC.MIB->evaluateStackOffsetExpr(Inst: Point, Output, Input1: SP, Input2: FP))
289 return;
290
291 // If the value is the same of FP, no need to adjust it
292 if (Output == FPVal)
293 return;
294
295 // If an allocation happened through FP, bail
296 if (Output <= SPVal) {
297 blacklistRegion(Offset: 0, Size: 0);
298 return;
299 }
300
301 // We are restoring SP to an old value based on FP. Mark it as a stack
302 // access to be fixed later.
303 BC.MIB->addAnnotation(Inst&: Point, Index: getSlotTag(), Val: Output, AllocatorId);
304}
305
306void StackLayoutModifier::classifyStackAccesses() {
307 // Understand when stack slots are being used non-locally
308 StackReachingUses &SRU = Info.getStackReachingUses();
309
310 for (BinaryBasicBlock &BB : BF) {
311 const MCInst *Prev = nullptr;
312 for (MCInst &Inst : llvm::reverse(C&: BB)) {
313 checkFramePointerInitialization(Point&: Inst);
314 checkStackPointerRestore(Point&: Inst);
315 ErrorOr<const FrameIndexEntry &> FIEX = FA.getFIEFor(Inst);
316 if (!FIEX) {
317 Prev = &Inst;
318 continue;
319 }
320 if (!FIEX->IsSimple || (FIEX->IsStore && !FIEX->IsStoreFromReg)) {
321 blacklistRegion(Offset: FIEX->StackOffset, Size: FIEX->Size);
322 Prev = &Inst;
323 continue;
324 }
325 // If this stack position is accessed in another function, we are
326 // probably dealing with a parameter passed in a stack -- do not mess
327 // with it
328 if (SRU.isStoreUsed(StoreFIE: *FIEX,
329 Candidates: Prev ? SRU.expr_begin(Point: *Prev) : SRU.expr_begin(Point: BB),
330 /*IncludeLocalAccesses=*/false)) {
331 blacklistRegion(Offset: FIEX->StackOffset, Size: FIEX->Size);
332 Prev = &Inst;
333 continue;
334 }
335 // Now we have a clear stack slot access. Check if its blacklisted or if
336 // it conflicts with another chunk.
337 if (isRegionBlacklisted(Offset: FIEX->StackOffset, Size: FIEX->Size) ||
338 blacklistAllInConflictWith(Offset: FIEX->StackOffset, Size: FIEX->Size)) {
339 Prev = &Inst;
340 continue;
341 }
342 // We are free to go. Add it as available stack slot which we know how
343 // to move it.
344 AvailableRegions[FIEX->StackOffset] = FIEX->Size;
345 BC.MIB->addAnnotation(Inst, Index: getSlotTag(), Val: FIEX->StackOffset, AllocatorId);
346 RegionToRegMap[FIEX->StackOffset].insert(x: FIEX->RegOrImm);
347 RegToRegionMap[FIEX->RegOrImm].insert(x: FIEX->StackOffset);
348 LLVM_DEBUG(dbgs() << "Adding region " << FIEX->StackOffset << " size "
349 << (int)FIEX->Size << "\n");
350 }
351 }
352}
353
354void StackLayoutModifier::classifyCFIs() {
355 std::stack<std::pair<int64_t, uint16_t>> CFIStack;
356 int64_t CfaOffset = -8;
357 uint16_t CfaReg = 7;
358
359 auto recordAccess = [&](MCInst *Inst, int64_t Offset) {
360 const uint16_t Reg = *BC.MRI->getLLVMRegNum(RegNum: CfaReg, /*isEH=*/false);
361 if (Reg == BC.MIB->getStackPointer() || Reg == BC.MIB->getFramePointer()) {
362 BC.MIB->addAnnotation(Inst&: *Inst, Index: getSlotTag(), Val: Offset, AllocatorId);
363 LLVM_DEBUG(dbgs() << "Recording CFI " << Offset << "\n");
364 } else {
365 IsSimple = false;
366 return;
367 }
368 };
369
370 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
371 for (MCInst &Inst : *BB) {
372 if (!BC.MIB->isCFI(Inst))
373 continue;
374 const MCCFIInstruction *CFI = BF.getCFIFor(Instr: Inst);
375 switch (CFI->getOperation()) {
376 case MCCFIInstruction::OpDefCfa:
377 CfaOffset = -CFI->getOffset();
378 recordAccess(&Inst, CfaOffset);
379 [[fallthrough]];
380 case MCCFIInstruction::OpDefCfaRegister:
381 CfaReg = CFI->getRegister();
382 break;
383 case MCCFIInstruction::OpDefCfaOffset:
384 CfaOffset = -CFI->getOffset();
385 recordAccess(&Inst, CfaOffset);
386 break;
387 case MCCFIInstruction::OpOffset:
388 recordAccess(&Inst, CFI->getOffset());
389 BC.MIB->addAnnotation(Inst, Index: getOffsetCFIRegTag(),
390 Val: BC.MRI->getLLVMRegNum(RegNum: CFI->getRegister(),
391 /*isEH=*/false),
392 AllocatorId);
393 break;
394 case MCCFIInstruction::OpSameValue:
395 BC.MIB->addAnnotation(Inst, Index: getOffsetCFIRegTag(),
396 Val: BC.MRI->getLLVMRegNum(RegNum: CFI->getRegister(),
397 /*isEH=*/false),
398 AllocatorId);
399 break;
400 case MCCFIInstruction::OpRememberState:
401 CFIStack.push(x: std::make_pair(x&: CfaOffset, y&: CfaReg));
402 break;
403 case MCCFIInstruction::OpRestoreState: {
404 assert(!CFIStack.empty() && "Corrupt CFI stack");
405 std::pair<int64_t, uint16_t> &Elem = CFIStack.top();
406 CFIStack.pop();
407 CfaOffset = Elem.first;
408 CfaReg = Elem.second;
409 break;
410 }
411 case MCCFIInstruction::OpRelOffset:
412 case MCCFIInstruction::OpAdjustCfaOffset:
413 llvm_unreachable("Unhandled AdjustCfaOffset");
414 break;
415 default:
416 break;
417 }
418 }
419 }
420}
421
422void StackLayoutModifier::scheduleChange(
423 MCInst &Inst, StackLayoutModifier::WorklistItem Item) {
424 auto &WList = BC.MIB->getOrCreateAnnotationAs<std::vector<WorklistItem>>(
425 Inst, Index: getTodoTag(), AllocatorId);
426 WList.push_back(x: Item);
427}
428
429bool StackLayoutModifier::canCollapseRegion(MCInst *DeletedPush) {
430 if (!IsSimple || !BC.MIB->isPush(Inst: *DeletedPush))
431 return false;
432
433 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst: *DeletedPush);
434 if (!FIE)
435 return false;
436
437 return canCollapseRegion(RegionAddr: FIE->StackOffset);
438}
439
440bool StackLayoutModifier::canCollapseRegion(int64_t RegionAddr) {
441 if (!IsInitialized)
442 initialize();
443 if (!IsSimple)
444 return false;
445
446 if (CollapsedRegions.count(V: RegionAddr))
447 return true;
448
449 // Check if it is possible to readjust all accesses below RegionAddr
450 if (!BlacklistedRegions.empty())
451 return false;
452
453 return true;
454}
455
456bool StackLayoutModifier::collapseRegion(MCInst *DeletedPush) {
457 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst: *DeletedPush);
458 if (!FIE)
459 return false;
460 int64_t RegionAddr = FIE->StackOffset;
461 int64_t RegionSz = FIE->Size;
462 return collapseRegion(Alloc: DeletedPush, RegionAddr, RegionSize: RegionSz);
463}
464
465bool StackLayoutModifier::collapseRegion(MCInst *Alloc, int64_t RegionAddr,
466 int64_t RegionSz) {
467 if (!canCollapseRegion(RegionAddr))
468 return false;
469
470 assert(IsInitialized);
471 StackAllocationAnalysis &SAA = Info.getStackAllocationAnalysis();
472
473 for (BinaryBasicBlock &BB : BF) {
474 for (MCInst &Inst : BB) {
475 if (!BC.MIB->hasAnnotation(Inst, Index: getSlotTag()))
476 continue;
477 auto Slot =
478 BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
479 Inst, Index: getSlotTag());
480 if (!AvailableRegions.count(x: Slot))
481 continue;
482 // We need to ensure this access is affected by the deleted push
483 if (!(*SAA.getStateBefore(Point: Inst))[SAA.ExprToIdx[Alloc]])
484 continue;
485
486 if (BC.MIB->isCFI(Inst)) {
487 if (Slot > RegionAddr)
488 continue;
489 scheduleChange(Inst, Item: WorklistItem(WorklistItem::AdjustCFI, RegionSz));
490 continue;
491 }
492 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
493 if (!FIE) {
494 if (Slot > RegionAddr)
495 continue;
496 // SP update based on frame pointer
497 scheduleChange(
498 Inst, Item: WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
499 continue;
500 }
501
502 if (Slot == RegionAddr) {
503 BC.MIB->addAnnotation(Inst, Name: "AccessesDeletedPos", Val: 0U, AllocatorId);
504 continue;
505 }
506 if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
507 continue;
508
509 if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
510 continue;
511
512 if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot > RegionAddr)
513 continue;
514
515 scheduleChange(
516 Inst, Item: WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
517 }
518 }
519
520 CollapsedRegions.insert(V: RegionAddr);
521 return true;
522}
523
524void StackLayoutModifier::setOffsetForCollapsedAccesses(int64_t NewOffset) {
525 for (BinaryBasicBlock &BB : BF) {
526 for (MCInst &Inst : BB) {
527 if (!BC.MIB->hasAnnotation(Inst, Name: "AccessesDeletedPos"))
528 continue;
529 BC.MIB->removeAnnotation(Inst, Name: "AccessesDeletedPos");
530 scheduleChange(
531 Inst, Item: WorklistItem(WorklistItem::AdjustLoadStoreOffset, NewOffset));
532 }
533 }
534}
535
536bool StackLayoutModifier::canInsertRegion(ProgramPoint P) {
537 if (!IsInitialized)
538 initialize();
539 if (!IsSimple)
540 return false;
541
542 StackPointerTracking &SPT = Info.getStackPointerTracking();
543 int64_t RegionAddr = SPT.getStateBefore(Point: P)->first;
544 if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
545 return false;
546
547 if (InsertedRegions.count(V: RegionAddr))
548 return true;
549
550 // Check if we are going to screw up stack accesses at call sites that
551 // pass parameters via stack
552 if (!BlacklistedRegions.empty())
553 return false;
554
555 return true;
556}
557
558bool StackLayoutModifier::insertRegion(ProgramPoint P, int64_t RegionSz) {
559 if (!canInsertRegion(P))
560 return false;
561
562 assert(IsInitialized);
563 StackPointerTracking &SPT = Info.getStackPointerTracking();
564 // This RegionAddr is slightly different from the one seen in collapseRegion
565 // This is the value of SP before the allocation the user wants to make.
566 int64_t RegionAddr = SPT.getStateBefore(Point: P)->first;
567 if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
568 return false;
569
570 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
571
572 for (BinaryBasicBlock &BB : BF) {
573 for (MCInst &Inst : BB) {
574 if (!BC.MIB->hasAnnotation(Inst, Index: getSlotTag()))
575 continue;
576 auto Slot =
577 BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
578 Inst, Index: getSlotTag());
579 if (!AvailableRegions.count(x: Slot))
580 continue;
581
582 if (!(DA.doesADominateB(A: P, B: Inst)))
583 continue;
584
585 if (BC.MIB->isCFI(Inst)) {
586 if (Slot >= RegionAddr)
587 continue;
588 scheduleChange(Inst, Item: WorklistItem(WorklistItem::AdjustCFI, -RegionSz));
589 continue;
590 }
591 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
592 if (!FIE) {
593 if (Slot >= RegionAddr)
594 continue;
595 scheduleChange(
596 Inst, Item: WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
597 continue;
598 }
599
600 if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
601 continue;
602 if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot >= RegionAddr)
603 continue;
604 if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
605 continue;
606 scheduleChange(
607 Inst, Item: WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
608 }
609 }
610
611 InsertedRegions.insert(V: RegionAddr);
612 return true;
613}
614
615void StackLayoutModifier::performChanges() {
616 std::set<uint32_t> ModifiedCFIIndices;
617 for (BinaryBasicBlock &BB : BF) {
618 for (MCInst &Inst : llvm::reverse(C&: BB)) {
619 if (BC.MIB->hasAnnotation(Inst, Name: "AccessesDeletedPos")) {
620 assert(BC.MIB->isPop(Inst) || BC.MIB->isPush(Inst));
621 BC.MIB->removeAnnotation(Inst, Name: "AccessesDeletedPos");
622 }
623 if (!BC.MIB->hasAnnotation(Inst, Index: getTodoTag()))
624 continue;
625 auto &WList = BC.MIB->getAnnotationAs<std::vector<WorklistItem>>(
626 Inst, Index: getTodoTag());
627 int64_t Adjustment = 0;
628 WorklistItem::ActionType AdjustmentType = WorklistItem::None;
629 for (WorklistItem &WI : WList) {
630 if (WI.Action == WorklistItem::None)
631 continue;
632 assert(WI.Action == WorklistItem::AdjustLoadStoreOffset ||
633 WI.Action == WorklistItem::AdjustCFI);
634 assert((AdjustmentType == WorklistItem::None ||
635 AdjustmentType == WI.Action) &&
636 "Conflicting actions requested at the same program point");
637 AdjustmentType = WI.Action;
638 Adjustment += WI.OffsetUpdate;
639 }
640 if (!Adjustment)
641 continue;
642 if (AdjustmentType != WorklistItem::AdjustLoadStoreOffset) {
643 assert(BC.MIB->isCFI(Inst));
644 uint32_t CFINum = Inst.getOperand(i: 0).getImm();
645 if (ModifiedCFIIndices.count(x: CFINum))
646 continue;
647 ModifiedCFIIndices.insert(x: CFINum);
648 const MCCFIInstruction *CFI = BF.getCFIFor(Instr: Inst);
649 const MCCFIInstruction::OpType Operation = CFI->getOperation();
650 if (Operation == MCCFIInstruction::OpDefCfa ||
651 Operation == MCCFIInstruction::OpDefCfaOffset)
652 Adjustment = 0 - Adjustment;
653 LLVM_DEBUG(dbgs() << "Changing CFI offset from " << CFI->getOffset()
654 << " to " << (CFI->getOffset() + Adjustment) << "\n");
655 BF.mutateCFIOffsetFor(Instr: Inst, NewOffset: CFI->getOffset() + Adjustment);
656 continue;
657 }
658 int32_t SrcImm = 0;
659 MCPhysReg Reg = 0;
660 MCPhysReg StackPtrReg = 0;
661 int64_t StackOffset = 0;
662 bool IsIndexed = false;
663 bool IsLoad = false;
664 bool IsStore = false;
665 bool IsSimple = false;
666 bool IsStoreFromReg = false;
667 uint8_t Size = 0;
668 bool Success = false;
669 Success = BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg,
670 Reg, SrcImm, StackPtrReg, StackOffset,
671 Size, IsSimple, IsIndexed);
672 if (!Success) {
673 // SP update based on FP value
674 Success = BC.MIB->addToImm(Inst, Amt&: Adjustment, Ctx: &*BC.Ctx);
675 assert(Success);
676 continue;
677 }
678 assert(Success && IsSimple && !IsIndexed && (!IsStore || IsStoreFromReg));
679 if (StackPtrReg != BC.MIB->getFramePointer())
680 Adjustment = -Adjustment;
681 if (IsLoad)
682 BC.MIB->createRestoreFromStack(Inst, StackReg: StackPtrReg,
683 Offset: StackOffset + Adjustment, DstReg: Reg, Size);
684 else if (IsStore)
685 BC.MIB->createSaveToStack(Inst, StackReg: StackPtrReg, Offset: StackOffset + Adjustment,
686 SrcReg: Reg, Size);
687 LLVM_DEBUG({
688 dbgs() << "Adjusted instruction: ";
689 Inst.dump();
690 });
691 }
692 }
693}
694
695void StackLayoutModifier::initialize() {
696 classifyStackAccesses();
697 classifyCFIs();
698 IsInitialized = true;
699}
700
701std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedRegularMode{0};
702std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedPushPopMode{0};
703std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedDynamicCount{0};
704std::atomic<std::uint64_t> ShrinkWrapping::SpillsFailedDynamicCount{0};
705std::atomic<std::uint64_t> ShrinkWrapping::InstrDynamicCount{0};
706std::atomic<std::uint64_t> ShrinkWrapping::StoreDynamicCount{0};
707
708using BBIterTy = BinaryBasicBlock::iterator;
709
710void ShrinkWrapping::classifyCSRUses() {
711 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
712 StackPointerTracking &SPT = Info.getStackPointerTracking();
713 UsesByReg = std::vector<BitVector>(BC.MRI->getNumRegs(),
714 BitVector(DA.NumInstrs, false));
715
716 const BitVector &FPAliases = BC.MIB->getAliases(Reg: BC.MIB->getFramePointer());
717 for (BinaryBasicBlock &BB : BF) {
718 for (MCInst &Inst : BB) {
719 if (BC.MIB->isCFI(Inst))
720 continue;
721 BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
722 BC.MIB->getTouchedRegs(Inst, Regs&: BV);
723 BV &= CSA.CalleeSaved;
724 for (int I : BV.set_bits()) {
725 if (I == 0)
726 continue;
727 if (CSA.getSavedReg(Inst) != I && CSA.getRestoredReg(Inst) != I)
728 UsesByReg[I].set(DA.ExprToIdx[&Inst]);
729 }
730 if (!SPT.HasFramePointer || !BC.MIB->isCall(Inst))
731 continue;
732 BV = CSA.CalleeSaved;
733 BV &= FPAliases;
734 for (int I : BV.set_bits())
735 UsesByReg[I].set(DA.ExprToIdx[&Inst]);
736 }
737 }
738}
739
740void ShrinkWrapping::pruneUnwantedCSRs() {
741 BitVector ParamRegs = BC.MIB->getRegsUsedAsParams();
742 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
743 if (!CSA.CalleeSaved[I])
744 continue;
745 if (ParamRegs[I]) {
746 CSA.CalleeSaved.reset(Idx: I);
747 continue;
748 }
749 if (UsesByReg[I].empty()) {
750 LLVM_DEBUG(
751 dbgs()
752 << "Dismissing Callee-Saved Reg because we found no uses of it:" << I
753 << "\n");
754 CSA.CalleeSaved.reset(Idx: I);
755 continue;
756 }
757 if (!CSA.HasRestores[I]) {
758 LLVM_DEBUG(
759 dbgs() << "Dismissing Callee-Saved Reg because it does not have "
760 "restores:"
761 << I << "\n");
762 CSA.CalleeSaved.reset(Idx: I);
763 }
764 }
765}
766
767void ShrinkWrapping::computeSaveLocations() {
768 BestSavePos = std::vector<std::vector<MCInst *>>(BC.MRI->getNumRegs());
769 ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
770 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
771 StackPointerTracking &SPT = Info.getStackPointerTracking();
772
773 LLVM_DEBUG(dbgs() << "Checking save/restore possibilities\n");
774 for (BinaryBasicBlock &BB : BF) {
775 LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
776
777 MCInst *First = BB.begin() != BB.end() ? &*BB.begin() : nullptr;
778 if (!First)
779 continue;
780
781 // Use reaching instructions to detect if we are inside a loop - if we
782 // are, do not consider this BB as valid placement for saves.
783 if (RI.isInLoop(BB))
784 continue;
785
786 const std::pair<int, int> SPFP = *SPT.getStateBefore(Point: *First);
787 // If we don't know stack state at this point, bail
788 if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
789 (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY))
790 continue;
791
792 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
793 if (!CSA.CalleeSaved[I])
794 continue;
795
796 BitVector BBDominatedUses = BitVector(DA.NumInstrs, false);
797 for (int J : UsesByReg[I].set_bits())
798 if (DA.doesADominateB(A: *First, BIdx: J))
799 BBDominatedUses.set(J);
800 LLVM_DEBUG(dbgs() << "\t\tBB " << BB.getName() << " dominates "
801 << BBDominatedUses.count() << " uses for reg " << I
802 << ". Total uses for reg is " << UsesByReg[I].count()
803 << "\n");
804 BBDominatedUses &= UsesByReg[I];
805 if (BBDominatedUses == UsesByReg[I]) {
806 LLVM_DEBUG(dbgs() << "\t\t\tAdded " << BB.getName()
807 << " as a save pos for " << I << "\n");
808 BestSavePos[I].push_back(x: First);
809 LLVM_DEBUG({
810 dbgs() << "Dominated uses are:\n";
811 for (int J : UsesByReg[I].set_bits()) {
812 dbgs() << "Idx " << J << ": ";
813 BC.printInstruction(dbgs(), *DA.Expressions[J]);
814 DA.Expressions[J]->dump();
815 }
816 });
817 }
818 }
819 }
820
821 BestSaveCount = std::vector<std::vector<uint64_t>>(BC.MRI->getNumRegs());
822
823 auto &InsnToBB = Info.getInsnToBBMap();
824 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
825 if (!CSA.CalleeSaved[I])
826 continue;
827
828 llvm::stable_sort(Range&: BestSavePos[I], C: [&](const MCInst *A, const MCInst *B) {
829 const BinaryBasicBlock *BBA = InsnToBB[A];
830 const BinaryBasicBlock *BBB = InsnToBB[B];
831 const uint64_t CountA = BBA->getKnownExecutionCount();
832 const uint64_t CountB = BBB->getKnownExecutionCount();
833 return CountB < CountA;
834 });
835
836 for (MCInst *Pos : BestSavePos[I]) {
837 const BinaryBasicBlock *BB = InsnToBB[Pos];
838 const uint64_t Count = BB->getKnownExecutionCount();
839 BestSaveCount[I].push_back(x: Count);
840 }
841 }
842}
843
844void ShrinkWrapping::computeDomOrder() {
845 DomOrder = std::vector<MCPhysReg>(BC.MRI->getNumRegs(), 0);
846 std::vector<MCPhysReg> Order;
847 for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
848 Order.push_back(x: I);
849 }
850
851 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
852 auto &InsnToBB = Info.getInsnToBBMap();
853 llvm::sort(C&: Order, Comp: [&](const MCPhysReg &A, const MCPhysReg &B) {
854 BinaryBasicBlock *BBA =
855 BestSavePos[A].size() ? InsnToBB[BestSavePos[A].back()] : nullptr;
856 BinaryBasicBlock *BBB =
857 BestSavePos[B].size() ? InsnToBB[BestSavePos[B].back()] : nullptr;
858 if (BBA == BBB)
859 return A < B;
860 if (!BBA && BBB)
861 return false;
862 if (BBA && !BBB)
863 return true;
864 if (DA.doesADominateB(A: *BestSavePos[A].back(), B: *BestSavePos[B].back()))
865 return true;
866 if (DA.doesADominateB(A: *BestSavePos[B].back(), B: *BestSavePos[A].back()))
867 return false;
868 return A < B;
869 });
870
871 for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I)
872 DomOrder[Order[I]] = I;
873}
874
875bool ShrinkWrapping::isBestSavePosCold(unsigned CSR, MCInst *&BestPosSave,
876 uint64_t &TotalEstimatedWin) {
877 const uint64_t CurSavingCost = CSA.SavingCost[CSR];
878 if (!CSA.CalleeSaved[CSR])
879 return false;
880
881 assert(BestSaveCount[CSR].size() == BestSavePos[CSR].size() &&
882 "save position vectors out of sync");
883 if (BestSaveCount[CSR].empty())
884 return false;
885
886 const uint64_t BestCount = BestSaveCount[CSR].back();
887 BestPosSave = BestSavePos[CSR].back();
888 if (BestCount >= (opts::ShrinkWrappingThreshold / 100.0) * CurSavingCost)
889 return false;
890
891 LLVM_DEBUG({
892 auto &InsnToBB = Info.getInsnToBBMap();
893 dbgs() << "Better position for saves found in func " << BF.getPrintName()
894 << " count << " << BF.getKnownExecutionCount() << "\n";
895 dbgs() << "Reg: " << CSR << "; New BB: " << InsnToBB[BestPosSave]->getName()
896 << " Freq reduction: " << (CurSavingCost - BestCount) << "\n";
897 });
898
899 TotalEstimatedWin = CurSavingCost - BestCount;
900 return true;
901}
902
903/// Auxiliary function used to create basic blocks for critical edges and update
904/// the dominance frontier with these new locations
905void ShrinkWrapping::splitFrontierCritEdges(
906 BinaryFunction *Func, SmallVector<ProgramPoint, 4> &Frontier,
907 const SmallVector<bool, 4> &IsCritEdge,
908 const SmallVector<BinaryBasicBlock *, 4> &From,
909 const SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> &To) {
910 LLVM_DEBUG(dbgs() << "splitFrontierCritEdges: Now handling func "
911 << BF.getPrintName() << "\n");
912 // For every FromBB, there might be one or more critical edges, with
913 // To[I] containing destination BBs. It's important to memorize
914 // the original size of the Frontier as we may append to it while splitting
915 // critical edges originating with blocks with multiple destinations.
916 for (size_t I = 0, IE = Frontier.size(); I < IE; ++I) {
917 if (!IsCritEdge[I])
918 continue;
919 if (To[I].empty())
920 continue;
921 BinaryBasicBlock *FromBB = From[I];
922 LLVM_DEBUG(dbgs() << " - Now handling FrontierBB " << FromBB->getName()
923 << "\n");
924 // Split edge for every DestinationBBs
925 for (size_t DI = 0, DIE = To[I].size(); DI < DIE; ++DI) {
926 BinaryBasicBlock *DestinationBB = To[I][DI];
927 LLVM_DEBUG(dbgs() << " - Dest : " << DestinationBB->getName() << "\n");
928 BinaryBasicBlock *NewBB = Func->splitEdge(From: FromBB, To: DestinationBB);
929 // Insert dummy instruction so this BB is never empty (we need this for
930 // PredictiveStackPointerTracking to work, since it annotates instructions
931 // and not BBs).
932 if (NewBB->empty()) {
933 MCInst NewInst;
934 BC.MIB->createNoop(Inst&: NewInst);
935 NewBB->addInstruction(Inst: std::move(NewInst));
936 scheduleChange(PP: &*NewBB->begin(), Item: WorklistItem(WorklistItem::Erase, 0));
937 }
938
939 // Update frontier
940 ProgramPoint NewFrontierPP = ProgramPoint::getLastPointAt(BB&: *NewBB);
941 if (DI == 0) {
942 // Update frontier inplace
943 Frontier[I] = NewFrontierPP;
944 LLVM_DEBUG(dbgs() << " - Update frontier with " << NewBB->getName()
945 << '\n');
946 } else {
947 // Append new frontier to the end of the list
948 Frontier.push_back(Elt: NewFrontierPP);
949 LLVM_DEBUG(dbgs() << " - Append frontier " << NewBB->getName()
950 << '\n');
951 }
952 }
953 }
954}
955
956SmallVector<ProgramPoint, 4>
957ShrinkWrapping::doRestorePlacement(MCInst *BestPosSave, unsigned CSR,
958 uint64_t TotalEstimatedWin) {
959 SmallVector<ProgramPoint, 4> Frontier;
960 SmallVector<bool, 4> IsCritEdge;
961 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
962
963 SmallVector<BinaryBasicBlock *, 4> CritEdgesFrom;
964 SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> CritEdgesTo;
965 // In case of a critical edge, we need to create extra BBs to host restores
966 // into edges transitioning to the dominance frontier, otherwise we pull these
967 // restores to inside the dominated area.
968 Frontier = DA.getDominanceFrontierFor(Dom: *BestPosSave).takeVector();
969 LLVM_DEBUG({
970 dbgs() << "Dumping dominance frontier for ";
971 BC.printInstruction(dbgs(), *BestPosSave);
972 for (ProgramPoint &PP : Frontier)
973 if (PP.isInst())
974 BC.printInstruction(dbgs(), *PP.getInst());
975 else
976 dbgs() << PP.getBB()->getName() << "\n";
977 });
978 for (ProgramPoint &PP : Frontier) {
979 bool HasCritEdges = false;
980 if (PP.isInst() && BC.MIB->isTerminator(Inst: *PP.getInst()) &&
981 doesInstUsesCSR(Inst: *PP.getInst(), CSR)) {
982 Frontier.clear();
983 return Frontier;
984 }
985 BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
986 CritEdgesFrom.emplace_back(Args&: FrontierBB);
987 CritEdgesTo.emplace_back(Args: 0);
988 SmallVector<BinaryBasicBlock *, 4> &Dests = CritEdgesTo.back();
989 // Check for invoke instructions at the dominance frontier, which indicates
990 // the landing pad is not dominated.
991 if (PP.isInst() && BC.MIB->isInvoke(Inst: *PP.getInst())) {
992 Frontier.clear();
993 return Frontier;
994 }
995 doForAllSuccs(BB: *FrontierBB, Task: [&](ProgramPoint P) {
996 if (!DA.doesADominateB(A: *BestPosSave, B: P)) {
997 Dests.emplace_back(Args: Info.getParentBB(PP: P));
998 return;
999 }
1000 HasCritEdges = true;
1001 });
1002 IsCritEdge.push_back(Elt: HasCritEdges);
1003 }
1004 // Restores cannot be placed in empty BBs because we have a dataflow
1005 // analysis that depends on insertions happening before real instructions
1006 // (PredictiveStackPointerTracking). Detect now for empty BBs and add a
1007 // dummy nop that is scheduled to be removed later.
1008 bool InvalidateRequired = false;
1009 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1010 if (BB->size() != 0)
1011 continue;
1012 MCInst NewInst;
1013 BC.MIB->createNoop(Inst&: NewInst);
1014 auto II = BB->addInstruction(Inst: std::move(NewInst));
1015 scheduleChange(PP: &*II, Item: WorklistItem(WorklistItem::Erase, 0));
1016 InvalidateRequired = true;
1017 }
1018 if (std::accumulate(first: IsCritEdge.begin(), last: IsCritEdge.end(), init: 0)) {
1019 LLVM_DEBUG({
1020 dbgs() << "Now detected critical edges in the following frontier:\n";
1021 for (ProgramPoint &PP : Frontier) {
1022 if (PP.isBB()) {
1023 dbgs() << " BB: " << PP.getBB()->getName() << "\n";
1024 } else {
1025 dbgs() << " Inst: ";
1026 PP.getInst()->dump();
1027 }
1028 }
1029 });
1030 splitFrontierCritEdges(Func: &BF, Frontier, IsCritEdge, From: CritEdgesFrom,
1031 To: CritEdgesTo);
1032 InvalidateRequired = true;
1033 }
1034 if (InvalidateRequired) {
1035 // BitVectors that represent all insns of the function are invalid now
1036 // since we changed BBs/Insts. Re-run steps that depend on pointers being
1037 // valid
1038 Info.invalidateAll();
1039 classifyCSRUses();
1040 }
1041 return Frontier;
1042}
1043
1044bool ShrinkWrapping::validatePushPopsMode(unsigned CSR, MCInst *BestPosSave,
1045 int64_t SaveOffset) {
1046 if (FA.requiresAlignment(Func: BF)) {
1047 LLVM_DEBUG({
1048 dbgs() << "Reg " << CSR
1049 << " is not using push/pops due to function "
1050 "alignment requirements.\n";
1051 });
1052 return false;
1053 }
1054 if (FA.hasStackArithmetic(Func: BF)) {
1055 LLVM_DEBUG({
1056 dbgs() << "Reg " << CSR
1057 << " is not using push/pops due to function "
1058 "taking the address of a stack position.\n";
1059 });
1060 return false;
1061 }
1062 for (MCInst *Save : CSA.getSavesByReg(Reg: CSR)) {
1063 if (!SLM.canCollapseRegion(DeletedPush: Save)) {
1064 LLVM_DEBUG(dbgs() << "Reg " << CSR << " cannot collapse region.\n");
1065 return false;
1066 }
1067 }
1068 // Abort if one of the restores for this CSR is not a POP.
1069 for (MCInst *Load : CSA.getRestoresByReg(Reg: CSR)) {
1070 if (!BC.MIB->isPop(Inst: *Load)) {
1071 LLVM_DEBUG(dbgs() << "Reg " << CSR << " has a mismatching restore.\n");
1072 return false;
1073 }
1074 }
1075
1076 StackPointerTracking &SPT = Info.getStackPointerTracking();
1077 // Abort if we are inserting a push into an entry BB (offset -8) and this
1078 // func sets up a frame pointer.
1079 if (!SLM.canInsertRegion(P: BestPosSave) || SaveOffset == SPT.SUPERPOSITION ||
1080 SaveOffset == SPT.EMPTY || (SaveOffset == -8 && SPT.HasFramePointer)) {
1081 LLVM_DEBUG({
1082 dbgs() << "Reg " << CSR
1083 << " cannot insert region or we are "
1084 "trying to insert a push into entry bb.\n";
1085 });
1086 return false;
1087 }
1088 return true;
1089}
1090
1091SmallVector<ProgramPoint, 4> ShrinkWrapping::fixPopsPlacements(
1092 const SmallVector<ProgramPoint, 4> &RestorePoints, int64_t SaveOffset,
1093 unsigned CSR) {
1094 SmallVector<ProgramPoint, 4> FixedRestorePoints = RestorePoints;
1095 // Moving pop locations to the correct sp offset
1096 ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
1097 StackPointerTracking &SPT = Info.getStackPointerTracking();
1098 for (ProgramPoint &PP : FixedRestorePoints) {
1099 BinaryBasicBlock *BB = Info.getParentBB(PP);
1100 bool Found = false;
1101 if (SPT.getStateAt(Point: ProgramPoint::getLastPointAt(BB&: *BB))->first ==
1102 SaveOffset) {
1103 BitVector BV = *RI.getStateAt(Point: ProgramPoint::getLastPointAt(BB&: *BB));
1104 BV &= UsesByReg[CSR];
1105 if (!BV.any()) {
1106 Found = true;
1107 PP = BB;
1108 continue;
1109 }
1110 }
1111 for (MCInst &Inst : llvm::reverse(C&: *BB)) {
1112 if (SPT.getStateBefore(Point: Inst)->first == SaveOffset) {
1113 BitVector BV = *RI.getStateAt(Point: Inst);
1114 BV &= UsesByReg[CSR];
1115 if (!BV.any()) {
1116 Found = true;
1117 PP = &Inst;
1118 break;
1119 }
1120 }
1121 }
1122 if (!Found) {
1123 LLVM_DEBUG({
1124 dbgs() << "Could not find restore insertion point for " << CSR
1125 << ", falling back to load/store mode\n";
1126 });
1127 FixedRestorePoints.clear();
1128 return FixedRestorePoints;
1129 }
1130 }
1131 return FixedRestorePoints;
1132}
1133
1134void ShrinkWrapping::scheduleOldSaveRestoresRemoval(unsigned CSR,
1135 bool UsePushPops) {
1136
1137 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1138 std::vector<MCInst *> CFIs;
1139 for (MCInst &Inst : llvm::reverse(C&: *BB)) {
1140 if (BC.MIB->isCFI(Inst)) {
1141 // Delete all offset CFIs related to this CSR
1142 if (SLM.getOffsetCFIReg(Inst) == CSR) {
1143 HasDeletedOffsetCFIs[CSR] = true;
1144 scheduleChange(PP: &Inst, Item: WorklistItem(WorklistItem::Erase, CSR));
1145 continue;
1146 }
1147 CFIs.push_back(x: &Inst);
1148 continue;
1149 }
1150
1151 uint16_t SavedReg = CSA.getSavedReg(Inst);
1152 uint16_t RestoredReg = CSA.getRestoredReg(Inst);
1153 if (SavedReg != CSR && RestoredReg != CSR) {
1154 CFIs.clear();
1155 continue;
1156 }
1157
1158 scheduleChange(PP: &Inst, Item: WorklistItem(UsePushPops
1159 ? WorklistItem::Erase
1160 : WorklistItem::ChangeToAdjustment,
1161 CSR));
1162
1163 // Delete associated CFIs
1164 const bool RecordDeletedPushCFIs =
1165 SavedReg == CSR && DeletedPushCFIs[CSR].empty();
1166 const bool RecordDeletedPopCFIs =
1167 RestoredReg == CSR && DeletedPopCFIs[CSR].empty();
1168 for (MCInst *CFI : CFIs) {
1169 const MCCFIInstruction *MCCFI = BF.getCFIFor(Instr: *CFI);
1170 // Do not touch these...
1171 if (MCCFI->getOperation() == MCCFIInstruction::OpRestoreState ||
1172 MCCFI->getOperation() == MCCFIInstruction::OpRememberState)
1173 continue;
1174 scheduleChange(PP: CFI, Item: WorklistItem(WorklistItem::Erase, CSR));
1175 if (RecordDeletedPushCFIs) {
1176 // Do not record this to be replayed later because we are going to
1177 // rebuild it.
1178 if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1179 continue;
1180 DeletedPushCFIs[CSR].push_back(x: CFI->getOperand(i: 0).getImm());
1181 }
1182 if (RecordDeletedPopCFIs) {
1183 if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1184 continue;
1185 DeletedPopCFIs[CSR].push_back(x: CFI->getOperand(i: 0).getImm());
1186 }
1187 }
1188 CFIs.clear();
1189 }
1190 }
1191}
1192
1193bool ShrinkWrapping::doesInstUsesCSR(const MCInst &Inst, uint16_t CSR) {
1194 if (BC.MIB->isCFI(Inst) || CSA.getSavedReg(Inst) == CSR ||
1195 CSA.getRestoredReg(Inst) == CSR)
1196 return false;
1197 BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
1198 BC.MIB->getTouchedRegs(Inst, Regs&: BV);
1199 return BV[CSR];
1200}
1201
1202void ShrinkWrapping::scheduleSaveRestoreInsertions(
1203 unsigned CSR, MCInst *BestPosSave,
1204 SmallVector<ProgramPoint, 4> &RestorePoints, bool UsePushPops) {
1205 auto &InsnToBB = Info.getInsnToBBMap();
1206 const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[CSR];
1207 const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[CSR];
1208 assert(FIESave && FIELoad && "Invalid CSR");
1209
1210 LLVM_DEBUG({
1211 dbgs() << "Scheduling save insertion at: ";
1212 BestPosSave->dump();
1213 });
1214
1215 scheduleChange(PP: BestPosSave,
1216 Item: UsePushPops ? WorklistItem::InsertPushOrPop
1217 : WorklistItem::InsertLoadOrStore,
1218 Item: *FIESave, Item&: CSR);
1219
1220 for (ProgramPoint &PP : RestorePoints) {
1221 BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
1222 LLVM_DEBUG({
1223 dbgs() << "Scheduling restore insertion at: ";
1224 if (PP.isInst())
1225 PP.getInst()->dump();
1226 else
1227 dbgs() << PP.getBB()->getName() << "\n";
1228 });
1229 MCInst *Term =
1230 FrontierBB->getTerminatorBefore(Pos: PP.isInst() ? PP.getInst() : nullptr);
1231 if (Term)
1232 PP = Term;
1233 bool PrecededByPrefix = false;
1234 if (PP.isInst()) {
1235 auto Iter = FrontierBB->findInstruction(Inst: PP.getInst());
1236 if (Iter != FrontierBB->end() && Iter != FrontierBB->begin()) {
1237 --Iter;
1238 PrecededByPrefix = BC.MIB->isPrefix(Inst: *Iter);
1239 }
1240 }
1241 if (PP.isInst() &&
1242 (doesInstUsesCSR(Inst: *PP.getInst(), CSR) || PrecededByPrefix)) {
1243 assert(!InsnToBB[PP.getInst()]->hasTerminatorAfter(PP.getInst()) &&
1244 "cannot move to end of bb");
1245 scheduleChange(PP: InsnToBB[PP.getInst()],
1246 Item: UsePushPops ? WorklistItem::InsertPushOrPop
1247 : WorklistItem::InsertLoadOrStore,
1248 Item: *FIELoad, Item&: CSR);
1249 continue;
1250 }
1251 scheduleChange(PP,
1252 Item: UsePushPops ? WorklistItem::InsertPushOrPop
1253 : WorklistItem::InsertLoadOrStore,
1254 Item: *FIELoad, Item&: CSR);
1255 }
1256}
1257
1258void ShrinkWrapping::moveSaveRestores() {
1259 bool DisablePushPopMode = false;
1260 bool UsedPushPopMode = false;
1261 // Keeps info about successfully moved regs: reg index, save position and
1262 // save size
1263 std::vector<std::tuple<unsigned, MCInst *, size_t>> MovedRegs;
1264 uint64_t TotalEstimatedWin = 0;
1265
1266 computeDomOrder();
1267 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
1268 MCInst *BestPosSave = nullptr;
1269 uint64_t EstimatedWin = 0;
1270 SmallVector<ProgramPoint, 4> RestorePoints;
1271 while (RestorePoints.empty() &&
1272 isBestSavePosCold(CSR: I, BestPosSave, TotalEstimatedWin&: EstimatedWin)) {
1273 RestorePoints = doRestorePlacement(BestPosSave, CSR: I, TotalEstimatedWin: EstimatedWin);
1274 if (RestorePoints.empty()) {
1275 LLVM_DEBUG({
1276 dbgs() << "Dropping opportunity because restore placement failed"
1277 " -- total est. freq reduc: "
1278 << EstimatedWin << ". Will try "
1279 << (BestSaveCount[I].size() - 1) << " more times.\n";
1280 });
1281 BestSaveCount[I].pop_back();
1282 BestSavePos[I].pop_back();
1283 computeDomOrder();
1284 }
1285 }
1286 if (RestorePoints.empty()) {
1287 SpillsFailedDynamicCount += EstimatedWin;
1288 continue;
1289 }
1290
1291 const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[I];
1292 const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[I];
1293 (void)FIELoad;
1294 assert(FIESave && FIELoad);
1295 StackPointerTracking &SPT = Info.getStackPointerTracking();
1296 const std::pair<int, int> SPFP = *SPT.getStateBefore(Point: *BestPosSave);
1297 int SaveOffset = SPFP.first;
1298 uint8_t SaveSize = FIESave->Size;
1299
1300 // If we don't know stack state at this point, bail
1301 if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
1302 (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY)) {
1303 SpillsFailedDynamicCount += EstimatedWin;
1304 continue;
1305 }
1306
1307 // Operation mode: if true, will insert push/pops instead of loads/restores
1308 bool UsePushPops = validatePushPopsMode(CSR: I, BestPosSave, SaveOffset);
1309
1310 if (UsePushPops) {
1311 SmallVector<ProgramPoint, 4> FixedRestorePoints =
1312 fixPopsPlacements(RestorePoints, SaveOffset, CSR: I);
1313 if (FixedRestorePoints.empty())
1314 UsePushPops = false;
1315 else
1316 RestorePoints = FixedRestorePoints;
1317 }
1318
1319 // Disable push-pop mode for all CSRs in this function
1320 if (!UsePushPops)
1321 DisablePushPopMode = true;
1322 else
1323 UsedPushPopMode = true;
1324
1325 scheduleOldSaveRestoresRemoval(CSR: I, UsePushPops);
1326 scheduleSaveRestoreInsertions(CSR: I, BestPosSave, RestorePoints, UsePushPops);
1327 MovedRegs.emplace_back(args: std::make_tuple(args&: I, args&: BestPosSave, args&: SaveSize));
1328 TotalEstimatedWin += EstimatedWin;
1329 }
1330
1331 // Revert push-pop mode if it failed for a single CSR
1332 if (DisablePushPopMode && UsedPushPopMode) {
1333 UsedPushPopMode = false;
1334 for (BinaryBasicBlock &BB : BF) {
1335 auto WRI = Todo.find(Val: &BB);
1336 if (WRI != Todo.end()) {
1337 std::vector<WorklistItem> &TodoList = WRI->second;
1338 for (WorklistItem &Item : TodoList)
1339 if (Item.Action == WorklistItem::InsertPushOrPop)
1340 Item.Action = WorklistItem::InsertLoadOrStore;
1341 }
1342 for (MCInst &Inst : llvm::reverse(C&: BB)) {
1343 auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1344 Inst, Index: getAnnotationIndex());
1345 if (!TodoList)
1346 continue;
1347 bool isCFI = BC.MIB->isCFI(Inst);
1348 for (WorklistItem &Item : *TodoList) {
1349 if (Item.Action == WorklistItem::InsertPushOrPop)
1350 Item.Action = WorklistItem::InsertLoadOrStore;
1351 if (!isCFI && Item.Action == WorklistItem::Erase)
1352 Item.Action = WorklistItem::ChangeToAdjustment;
1353 }
1354 }
1355 }
1356 }
1357 SpillsMovedDynamicCount += TotalEstimatedWin;
1358
1359 // Update statistics
1360 if (!UsedPushPopMode) {
1361 SpillsMovedRegularMode += MovedRegs.size();
1362 return;
1363 }
1364
1365 // Schedule modifications to stack-accessing instructions via
1366 // StackLayoutModifier.
1367 SpillsMovedPushPopMode += MovedRegs.size();
1368 for (std::tuple<unsigned, MCInst *, size_t> &I : MovedRegs) {
1369 unsigned RegNdx;
1370 MCInst *SavePos;
1371 size_t SaveSize;
1372 std::tie(args&: RegNdx, args&: SavePos, args&: SaveSize) = I;
1373 for (MCInst *Save : CSA.getSavesByReg(Reg: RegNdx))
1374 SLM.collapseRegion(DeletedPush: Save);
1375 SLM.insertRegion(P: SavePos, RegionSz: SaveSize);
1376 }
1377}
1378
1379namespace {
1380/// Helper function to identify whether two basic blocks created by splitting
1381/// a critical edge have the same contents.
1382bool isIdenticalSplitEdgeBB(const BinaryContext &BC, const BinaryBasicBlock &A,
1383 const BinaryBasicBlock &B) {
1384 if (A.succ_size() != B.succ_size())
1385 return false;
1386 if (A.succ_size() != 1)
1387 return false;
1388
1389 if (*A.succ_begin() != *B.succ_begin())
1390 return false;
1391
1392 if (A.size() != B.size())
1393 return false;
1394
1395 // Compare instructions
1396 auto I = A.begin(), E = A.end();
1397 auto OtherI = B.begin(), OtherE = B.end();
1398 while (I != E && OtherI != OtherE) {
1399 if (I->getOpcode() != OtherI->getOpcode())
1400 return false;
1401 if (!BC.MIB->equals(A: *I, B: *OtherI, Comp: [](const MCSymbol *A, const MCSymbol *B) {
1402 return true;
1403 }))
1404 return false;
1405 ++I;
1406 ++OtherI;
1407 }
1408 return true;
1409}
1410} // namespace
1411
1412bool ShrinkWrapping::foldIdenticalSplitEdges() {
1413 bool Changed = false;
1414 for (auto Iter = BF.begin(); Iter != BF.end(); ++Iter) {
1415 BinaryBasicBlock &BB = *Iter;
1416 if (!BB.getName().starts_with(Prefix: ".LSplitEdge"))
1417 continue;
1418 for (BinaryBasicBlock &RBB : llvm::reverse(C&: BF)) {
1419 if (&RBB == &BB)
1420 break;
1421 if (!RBB.getName().starts_with(Prefix: ".LSplitEdge") || !RBB.isValid() ||
1422 !isIdenticalSplitEdgeBB(BC, A: *Iter, B: RBB))
1423 continue;
1424 assert(RBB.pred_size() == 1 && "Invalid split edge BB");
1425 BinaryBasicBlock *Pred = *RBB.pred_begin();
1426 uint64_t OrigCount = Pred->branch_info_begin()->Count;
1427 uint64_t OrigMispreds = Pred->branch_info_begin()->MispredictedCount;
1428 BF.replaceJumpTableEntryIn(BB: Pred, OldDest: &RBB, NewDest: &BB);
1429 Pred->replaceSuccessor(Succ: &RBB, NewSucc: &BB, Count: OrigCount, MispredictedCount: OrigMispreds);
1430 Changed = true;
1431 // Remove the block from CFG
1432 RBB.markValid(Valid: false);
1433 }
1434 }
1435
1436 return Changed;
1437}
1438
1439namespace {
1440
1441// A special StackPointerTracking that compensates for our future plans
1442// in removing/adding insn.
1443class PredictiveStackPointerTracking
1444 : public StackPointerTrackingBase<PredictiveStackPointerTracking> {
1445 friend class DataflowAnalysis<PredictiveStackPointerTracking,
1446 std::pair<int, int>>;
1447 decltype(ShrinkWrapping::Todo) &TodoMap;
1448 DataflowInfoManager &Info;
1449
1450 std::optional<unsigned> AnnotationIndex;
1451
1452protected:
1453 void compNextAux(const MCInst &Point,
1454 const std::vector<ShrinkWrapping::WorklistItem> &TodoItems,
1455 std::pair<int, int> &Res) {
1456 for (const ShrinkWrapping::WorklistItem &Item : TodoItems) {
1457 if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
1458 BC.MIB->isPush(Inst: Point)) {
1459 Res.first += BC.MIB->getPushSize(Inst: Point);
1460 continue;
1461 }
1462 if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
1463 BC.MIB->isPop(Inst: Point)) {
1464 Res.first -= BC.MIB->getPopSize(Inst: Point);
1465 continue;
1466 }
1467 if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
1468 Item.FIEToInsert.IsStore) {
1469 Res.first -= Item.FIEToInsert.Size;
1470 continue;
1471 }
1472 if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
1473 Item.FIEToInsert.IsLoad) {
1474 Res.first += Item.FIEToInsert.Size;
1475 continue;
1476 }
1477 }
1478 }
1479
1480 std::pair<int, int> computeNext(const MCInst &Point,
1481 const std::pair<int, int> &Cur) {
1482 std::pair<int, int> Res =
1483 StackPointerTrackingBase<PredictiveStackPointerTracking>::computeNext(
1484 Point, Cur);
1485 if (Res.first == StackPointerTracking::SUPERPOSITION ||
1486 Res.first == StackPointerTracking::EMPTY)
1487 return Res;
1488 auto TodoItems =
1489 BC.MIB->tryGetAnnotationAs<std::vector<ShrinkWrapping::WorklistItem>>(
1490 Inst: Point, Name: ShrinkWrapping::getAnnotationName());
1491 if (TodoItems)
1492 compNextAux(Point, TodoItems: *TodoItems, Res);
1493 auto &InsnToBBMap = Info.getInsnToBBMap();
1494 if (&*InsnToBBMap[&Point]->rbegin() != &Point)
1495 return Res;
1496 auto WRI = TodoMap.find(Val: InsnToBBMap[&Point]);
1497 if (WRI == TodoMap.end())
1498 return Res;
1499 compNextAux(Point, TodoItems: WRI->second, Res);
1500 return Res;
1501 }
1502
1503 StringRef getAnnotationName() const {
1504 return StringRef("PredictiveStackPointerTracking");
1505 }
1506
1507public:
1508 PredictiveStackPointerTracking(BinaryFunction &BF,
1509 decltype(ShrinkWrapping::Todo) &TodoMap,
1510 DataflowInfoManager &Info,
1511 MCPlusBuilder::AllocatorIdTy AllocatorId = 0)
1512 : StackPointerTrackingBase<PredictiveStackPointerTracking>(BF,
1513 AllocatorId),
1514 TodoMap(TodoMap), Info(Info) {}
1515
1516 void run() {
1517 StackPointerTrackingBase<PredictiveStackPointerTracking>::run();
1518 }
1519};
1520
1521} // end anonymous namespace
1522
1523void ShrinkWrapping::insertUpdatedCFI(unsigned CSR, int SPValPush,
1524 int SPValPop) {
1525 MCInst *SavePoint = nullptr;
1526 for (BinaryBasicBlock &BB : BF) {
1527 for (MCInst &Inst : llvm::reverse(C&: BB)) {
1528 int32_t SrcImm = 0;
1529 MCPhysReg Reg = 0;
1530 MCPhysReg StackPtrReg = 0;
1531 int64_t StackOffset = 0;
1532 bool IsIndexed = false;
1533 bool IsLoad = false;
1534 bool IsStore = false;
1535 bool IsSimple = false;
1536 bool IsStoreFromReg = false;
1537 uint8_t Size = 0;
1538 if (!BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, Reg,
1539 SrcImm, StackPtrReg, StackOffset, Size,
1540 IsSimple, IsIndexed))
1541 continue;
1542 if (Reg != CSR || !IsStore || !IsSimple)
1543 continue;
1544 SavePoint = &Inst;
1545 break;
1546 }
1547 if (SavePoint)
1548 break;
1549 }
1550 assert(SavePoint);
1551 LLVM_DEBUG({
1552 dbgs() << "Now using as save point for reg " << CSR << " :";
1553 SavePoint->dump();
1554 });
1555 bool PrevAffectedZone = false;
1556 BinaryBasicBlock *PrevBB = nullptr;
1557 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
1558 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1559 if (BB->size() == 0)
1560 continue;
1561 const bool InAffectedZoneAtEnd = DA.count(Point: *BB->rbegin(), Expr: *SavePoint);
1562 const bool InAffectedZoneAtBegin =
1563 (*DA.getStateBefore(Point: *BB->begin()))[DA.ExprToIdx[SavePoint]];
1564 bool InAffectedZone = InAffectedZoneAtBegin;
1565 for (auto InstIter = BB->begin(); InstIter != BB->end(); ++InstIter) {
1566 const bool CurZone = DA.count(Point: *InstIter, Expr: *SavePoint);
1567 if (InAffectedZone != CurZone) {
1568 auto InsertionIter = InstIter;
1569 ++InsertionIter;
1570 InAffectedZone = CurZone;
1571 if (InAffectedZone)
1572 InstIter = insertCFIsForPushOrPop(BB&: *BB, Pos: InsertionIter, Reg: CSR, IsPush: true, Sz: 0,
1573 NewOffset: SPValPop);
1574 else
1575 InstIter = insertCFIsForPushOrPop(BB&: *BB, Pos: InsertionIter, Reg: CSR, IsPush: false, Sz: 0,
1576 NewOffset: SPValPush);
1577 --InstIter;
1578 }
1579 }
1580 // Are we at the first basic block or hot-cold split point?
1581 if (!PrevBB || (BF.isSplit() && BB->isCold() != PrevBB->isCold())) {
1582 if (InAffectedZoneAtBegin)
1583 insertCFIsForPushOrPop(BB&: *BB, Pos: BB->begin(), Reg: CSR, IsPush: true, Sz: 0, NewOffset: SPValPush);
1584 } else if (InAffectedZoneAtBegin != PrevAffectedZone) {
1585 if (InAffectedZoneAtBegin)
1586 insertCFIsForPushOrPop(BB&: *PrevBB, Pos: PrevBB->end(), Reg: CSR, IsPush: true, Sz: 0, NewOffset: SPValPush);
1587 else
1588 insertCFIsForPushOrPop(BB&: *PrevBB, Pos: PrevBB->end(), Reg: CSR, IsPush: false, Sz: 0, NewOffset: SPValPop);
1589 }
1590 PrevAffectedZone = InAffectedZoneAtEnd;
1591 PrevBB = BB;
1592 }
1593}
1594
1595void ShrinkWrapping::rebuildCFIForSP() {
1596 for (BinaryBasicBlock &BB : BF) {
1597 for (MCInst &Inst : BB) {
1598 if (!BC.MIB->isCFI(Inst))
1599 continue;
1600 const MCCFIInstruction *CFI = BF.getCFIFor(Instr: Inst);
1601 if (CFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1602 BC.MIB->addAnnotation(Inst, Name: "DeleteMe", Val: 0U, AllocatorId);
1603 }
1604 }
1605
1606 int PrevSPVal = -8;
1607 BinaryBasicBlock *PrevBB = nullptr;
1608 StackPointerTracking &SPT = Info.getStackPointerTracking();
1609 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1610 if (BB->size() == 0)
1611 continue;
1612 const int SPValAtEnd = SPT.getStateAt(Point: *BB->rbegin())->first;
1613 const int SPValAtBegin = SPT.getStateBefore(Point: *BB->begin())->first;
1614 int SPVal = SPValAtBegin;
1615 for (auto Iter = BB->begin(); Iter != BB->end(); ++Iter) {
1616 const int CurVal = SPT.getStateAt(Point: *Iter)->first;
1617 if (SPVal != CurVal) {
1618 auto InsertionIter = Iter;
1619 ++InsertionIter;
1620 Iter = BF.addCFIInstruction(
1621 BB, Pos: InsertionIter,
1622 Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: -CurVal));
1623 SPVal = CurVal;
1624 }
1625 }
1626 if (BF.isSplit() && PrevBB && BB->isCold() != PrevBB->isCold())
1627 BF.addCFIInstruction(
1628 BB, Pos: BB->begin(),
1629 Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: -SPValAtBegin));
1630 else if (SPValAtBegin != PrevSPVal)
1631 BF.addCFIInstruction(
1632 BB: PrevBB, Pos: PrevBB->end(),
1633 Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: -SPValAtBegin));
1634 PrevSPVal = SPValAtEnd;
1635 PrevBB = BB;
1636 }
1637
1638 for (BinaryBasicBlock &BB : BF)
1639 for (auto I = BB.begin(); I != BB.end();)
1640 if (BC.MIB->hasAnnotation(Inst: *I, Name: "DeleteMe"))
1641 I = BB.eraseInstruction(II: I);
1642 else
1643 ++I;
1644}
1645
1646Expected<MCInst> ShrinkWrapping::createStackAccess(int SPVal, int FPVal,
1647 const FrameIndexEntry &FIE,
1648 bool CreatePushOrPop) {
1649 MCInst NewInst;
1650 if (SPVal != StackPointerTracking::SUPERPOSITION &&
1651 SPVal != StackPointerTracking::EMPTY) {
1652 if (FIE.IsLoad) {
1653 BC.MIB->createRestoreFromStack(Inst&: NewInst, StackReg: BC.MIB->getStackPointer(),
1654 Offset: FIE.StackOffset - SPVal, DstReg: FIE.RegOrImm,
1655 Size: FIE.Size);
1656 } else {
1657 BC.MIB->createSaveToStack(Inst&: NewInst, StackReg: BC.MIB->getStackPointer(),
1658 Offset: FIE.StackOffset - SPVal, SrcReg: FIE.RegOrImm,
1659 Size: FIE.Size);
1660 }
1661 if (CreatePushOrPop)
1662 BC.MIB->changeToPushOrPop(Inst&: NewInst);
1663 return NewInst;
1664 }
1665 assert(FPVal != StackPointerTracking::SUPERPOSITION &&
1666 FPVal != StackPointerTracking::EMPTY);
1667
1668 if (FIE.IsLoad) {
1669 BC.MIB->createRestoreFromStack(Inst&: NewInst, StackReg: BC.MIB->getFramePointer(),
1670 Offset: FIE.StackOffset - FPVal, DstReg: FIE.RegOrImm,
1671 Size: FIE.Size);
1672 } else {
1673 BC.MIB->createSaveToStack(Inst&: NewInst, StackReg: BC.MIB->getFramePointer(),
1674 Offset: FIE.StackOffset - FPVal, SrcReg: FIE.RegOrImm, Size: FIE.Size);
1675 }
1676 return NewInst;
1677}
1678
1679void ShrinkWrapping::updateCFIInstOffset(MCInst &Inst, int64_t NewOffset) {
1680 const MCCFIInstruction *CFI = BF.getCFIFor(Instr: Inst);
1681 if (UpdatedCFIs.count(Ptr: CFI))
1682 return;
1683
1684 switch (CFI->getOperation()) {
1685 case MCCFIInstruction::OpDefCfa:
1686 case MCCFIInstruction::OpDefCfaRegister:
1687 case MCCFIInstruction::OpDefCfaOffset:
1688 CFI = BF.mutateCFIOffsetFor(Instr: Inst, NewOffset: -NewOffset);
1689 break;
1690 case MCCFIInstruction::OpOffset:
1691 default:
1692 break;
1693 }
1694
1695 UpdatedCFIs.insert(Ptr: CFI);
1696}
1697
1698BBIterTy ShrinkWrapping::insertCFIsForPushOrPop(BinaryBasicBlock &BB,
1699 BBIterTy Pos, unsigned Reg,
1700 bool isPush, int Sz,
1701 int64_t NewOffset) {
1702 if (isPush) {
1703 for (uint32_t Idx : DeletedPushCFIs[Reg]) {
1704 Pos = BF.addCFIPseudo(BB: &BB, Pos, Offset: Idx);
1705 updateCFIInstOffset(Inst&: *Pos++, NewOffset);
1706 }
1707 if (HasDeletedOffsetCFIs[Reg]) {
1708 Pos = BF.addCFIInstruction(
1709 BB: &BB, Pos,
1710 Inst: MCCFIInstruction::createOffset(
1711 L: nullptr, Register: BC.MRI->getDwarfRegNum(RegNum: Reg, isEH: false), Offset: NewOffset));
1712 ++Pos;
1713 }
1714 } else {
1715 for (uint32_t Idx : DeletedPopCFIs[Reg]) {
1716 Pos = BF.addCFIPseudo(BB: &BB, Pos, Offset: Idx);
1717 updateCFIInstOffset(Inst&: *Pos++, NewOffset);
1718 }
1719 if (HasDeletedOffsetCFIs[Reg]) {
1720 Pos = BF.addCFIInstruction(
1721 BB: &BB, Pos,
1722 Inst: MCCFIInstruction::createSameValue(
1723 L: nullptr, Register: BC.MRI->getDwarfRegNum(RegNum: Reg, isEH: false)));
1724 ++Pos;
1725 }
1726 }
1727 return Pos;
1728}
1729
1730Expected<BBIterTy> ShrinkWrapping::processInsertion(BBIterTy InsertionPoint,
1731 BinaryBasicBlock *CurBB,
1732 const WorklistItem &Item,
1733 int64_t SPVal,
1734 int64_t FPVal) {
1735 // Trigger CFI reconstruction for this CSR if necessary - writing to
1736 // PushOffsetByReg/PopOffsetByReg *will* trigger CFI update
1737 if ((Item.FIEToInsert.IsStore &&
1738 !DeletedPushCFIs[Item.AffectedReg].empty()) ||
1739 (Item.FIEToInsert.IsLoad && !DeletedPopCFIs[Item.AffectedReg].empty()) ||
1740 HasDeletedOffsetCFIs[Item.AffectedReg]) {
1741 if (Item.Action == WorklistItem::InsertPushOrPop) {
1742 if (Item.FIEToInsert.IsStore)
1743 PushOffsetByReg[Item.AffectedReg] = SPVal - Item.FIEToInsert.Size;
1744 else
1745 PopOffsetByReg[Item.AffectedReg] = SPVal;
1746 } else {
1747 if (Item.FIEToInsert.IsStore)
1748 PushOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
1749 else
1750 PopOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
1751 }
1752 }
1753
1754 LLVM_DEBUG({
1755 dbgs() << "Creating stack access with SPVal = " << SPVal
1756 << "; stack offset = " << Item.FIEToInsert.StackOffset
1757 << " Is push = " << (Item.Action == WorklistItem::InsertPushOrPop)
1758 << "\n";
1759 });
1760 Expected<MCInst> NewInstOrErr =
1761 createStackAccess(SPVal, FPVal, FIE: Item.FIEToInsert,
1762 CreatePushOrPop: Item.Action == WorklistItem::InsertPushOrPop);
1763 if (auto E = NewInstOrErr.takeError())
1764 return Error(std::move(E));
1765 MCInst &NewInst = *NewInstOrErr;
1766 if (InsertionPoint != CurBB->end()) {
1767 LLVM_DEBUG({
1768 dbgs() << "Adding before Inst: ";
1769 InsertionPoint->dump();
1770 dbgs() << "the following inst: ";
1771 NewInst.dump();
1772 });
1773 BBIterTy Iter =
1774 CurBB->insertInstruction(At: InsertionPoint, NewInst: std::move(NewInst));
1775 return ++Iter;
1776 }
1777 CurBB->addInstruction(Inst: std::move(NewInst));
1778 LLVM_DEBUG(dbgs() << "Adding to BB!\n");
1779 return CurBB->end();
1780}
1781
1782Expected<BBIterTy> ShrinkWrapping::processInsertionsList(
1783 BBIterTy InsertionPoint, BinaryBasicBlock *CurBB,
1784 std::vector<WorklistItem> &TodoList, int64_t SPVal, int64_t FPVal) {
1785 bool HasInsertions = llvm::any_of(Range&: TodoList, P: [&](WorklistItem &Item) {
1786 return Item.Action == WorklistItem::InsertLoadOrStore ||
1787 Item.Action == WorklistItem::InsertPushOrPop;
1788 });
1789
1790 if (!HasInsertions)
1791 return InsertionPoint;
1792
1793 assert(((SPVal != StackPointerTracking::SUPERPOSITION &&
1794 SPVal != StackPointerTracking::EMPTY) ||
1795 (FPVal != StackPointerTracking::SUPERPOSITION &&
1796 FPVal != StackPointerTracking::EMPTY)) &&
1797 "Cannot insert if we have no idea of the stack state here");
1798
1799 // Revert the effect of PSPT for this location, we want SP Value before
1800 // insertions
1801 if (InsertionPoint == CurBB->end()) {
1802 for (WorklistItem &Item : TodoList) {
1803 if (Item.Action != WorklistItem::InsertPushOrPop)
1804 continue;
1805 if (Item.FIEToInsert.IsStore)
1806 SPVal += Item.FIEToInsert.Size;
1807 if (Item.FIEToInsert.IsLoad)
1808 SPVal -= Item.FIEToInsert.Size;
1809 }
1810 }
1811
1812 // Reorder POPs to obey the correct dominance relation between them
1813 llvm::stable_sort(Range&: TodoList, C: [&](const WorklistItem &A,
1814 const WorklistItem &B) {
1815 if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad) &&
1816 (B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad))
1817 return false;
1818 if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad))
1819 return true;
1820 if ((B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad))
1821 return false;
1822 return DomOrder[B.AffectedReg] < DomOrder[A.AffectedReg];
1823 });
1824
1825 // Process insertions
1826 for (WorklistItem &Item : TodoList) {
1827 if (Item.Action == WorklistItem::Erase ||
1828 Item.Action == WorklistItem::ChangeToAdjustment)
1829 continue;
1830
1831 auto InsertionPointOrErr =
1832 processInsertion(InsertionPoint, CurBB, Item, SPVal, FPVal);
1833 if (auto E = InsertionPointOrErr.takeError())
1834 return Error(std::move(E));
1835 InsertionPoint = *InsertionPointOrErr;
1836 if (Item.Action == WorklistItem::InsertPushOrPop &&
1837 Item.FIEToInsert.IsStore)
1838 SPVal -= Item.FIEToInsert.Size;
1839 if (Item.Action == WorklistItem::InsertPushOrPop &&
1840 Item.FIEToInsert.IsLoad)
1841 SPVal += Item.FIEToInsert.Size;
1842 }
1843 return InsertionPoint;
1844}
1845
1846Expected<bool> ShrinkWrapping::processInsertions() {
1847 PredictiveStackPointerTracking PSPT(BF, Todo, Info, AllocatorId);
1848 PSPT.run();
1849
1850 bool Changes = false;
1851 for (BinaryBasicBlock &BB : BF) {
1852 // Process insertions before some inst.
1853 for (auto I = BB.begin(); I != BB.end(); ++I) {
1854 MCInst &Inst = *I;
1855 auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1856 Inst, Index: getAnnotationIndex());
1857 if (!TodoList)
1858 continue;
1859 Changes = true;
1860 std::vector<WorklistItem> List = *TodoList;
1861 LLVM_DEBUG({
1862 dbgs() << "Now processing insertions in " << BB.getName()
1863 << " before inst: ";
1864 Inst.dump();
1865 });
1866 auto Iter = I;
1867 std::pair<int, int> SPTState =
1868 *PSPT.getStateAt(Point: Iter == BB.begin() ? (ProgramPoint)&BB : &*(--Iter));
1869 auto IterOrErr =
1870 processInsertionsList(InsertionPoint: I, CurBB: &BB, TodoList&: List, SPVal: SPTState.first, FPVal: SPTState.second);
1871 if (auto E = IterOrErr.takeError())
1872 return Error(std::move(E));
1873 I = *IterOrErr;
1874 }
1875 // Process insertions at the end of bb
1876 auto WRI = Todo.find(Val: &BB);
1877 if (WRI != Todo.end()) {
1878 std::pair<int, int> SPTState = *PSPT.getStateAt(Point: *BB.rbegin());
1879 if (auto E = processInsertionsList(InsertionPoint: BB.end(), CurBB: &BB, TodoList&: WRI->second,
1880 SPVal: SPTState.first, FPVal: SPTState.second)
1881 .takeError())
1882 return Error(std::move(E));
1883 Changes = true;
1884 }
1885 }
1886 return Changes;
1887}
1888
1889void ShrinkWrapping::processDeletions() {
1890 LivenessAnalysis &LA = Info.getLivenessAnalysis();
1891 for (BinaryBasicBlock &BB : BF) {
1892 for (auto II = BB.begin(); II != BB.end(); ++II) {
1893 MCInst &Inst = *II;
1894 auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1895 Inst, Index: getAnnotationIndex());
1896 if (!TodoList)
1897 continue;
1898 // Process all deletions
1899 for (WorklistItem &Item : *TodoList) {
1900 if (Item.Action != WorklistItem::Erase &&
1901 Item.Action != WorklistItem::ChangeToAdjustment)
1902 continue;
1903
1904 if (Item.Action == WorklistItem::ChangeToAdjustment) {
1905 // Is flag reg alive across this func?
1906 bool DontClobberFlags = LA.isAlive(PP: &Inst, Reg: BC.MIB->getFlagsReg());
1907 if (int Sz = BC.MIB->getPushSize(Inst)) {
1908 BC.MIB->createStackPointerIncrement(Inst, Size: Sz, NoFlagsClobber: DontClobberFlags);
1909 continue;
1910 }
1911 if (int Sz = BC.MIB->getPopSize(Inst)) {
1912 BC.MIB->createStackPointerDecrement(Inst, Size: Sz, NoFlagsClobber: DontClobberFlags);
1913 continue;
1914 }
1915 }
1916
1917 LLVM_DEBUG({
1918 dbgs() << "Erasing: ";
1919 BC.printInstruction(dbgs(), Inst);
1920 });
1921 II = std::prev(x: BB.eraseInstruction(II));
1922 break;
1923 }
1924 }
1925 }
1926}
1927
1928void ShrinkWrapping::rebuildCFI() {
1929 const bool FP = Info.getStackPointerTracking().HasFramePointer;
1930 Info.invalidateAll();
1931 if (!FP) {
1932 rebuildCFIForSP();
1933 Info.invalidateAll();
1934 }
1935 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
1936 if (PushOffsetByReg[I] == 0 || PopOffsetByReg[I] == 0)
1937 continue;
1938 const int64_t SPValPush = PushOffsetByReg[I];
1939 const int64_t SPValPop = PopOffsetByReg[I];
1940 insertUpdatedCFI(CSR: I, SPValPush, SPValPop);
1941 Info.invalidateAll();
1942 }
1943}
1944
1945Expected<bool> ShrinkWrapping::perform(bool HotOnly) {
1946 HasDeletedOffsetCFIs = BitVector(BC.MRI->getNumRegs(), false);
1947 PushOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
1948 PopOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
1949
1950 // Update pass statistics
1951 uint64_t TotalInstrs = 0ULL;
1952 uint64_t TotalStoreInstrs = 0ULL;
1953 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1954 uint64_t BBExecCount = BB->getExecutionCount();
1955 if (!BBExecCount || BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
1956 continue;
1957 for (const auto &Instr : *BB) {
1958 if (BC.MIB->isPseudo(Inst: Instr))
1959 continue;
1960 if (BC.MIB->mayStore(Inst: Instr))
1961 TotalStoreInstrs += BBExecCount;
1962 TotalInstrs += BBExecCount;
1963 }
1964 }
1965 InstrDynamicCount += TotalInstrs;
1966 StoreDynamicCount += TotalStoreInstrs;
1967
1968 if (!FA.hasFrameInfo(Func: BF))
1969 return false;
1970
1971 if (HotOnly && (BF.getKnownExecutionCount() < BC.getHotThreshold()))
1972 return false;
1973
1974 if (opts::EqualizeBBCounts)
1975 equalizeBBCounts(Info, BF);
1976
1977 if (BF.checkForAmbiguousJumpTables()) {
1978 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ambiguous JTs in " << BF.getPrintName()
1979 << ".\n");
1980 // We could call disambiguateJumpTables here, but it is probably not worth
1981 // the cost (of duplicating potentially large jump tables that could regress
1982 // dcache misses). Moreover, ambiguous JTs are rare and coming from code
1983 // written in assembly language. Just bail.
1984 return false;
1985 }
1986 SLM.initialize();
1987 CSA.compute();
1988 classifyCSRUses();
1989 pruneUnwantedCSRs();
1990 computeSaveLocations();
1991 moveSaveRestores();
1992 LLVM_DEBUG({
1993 dbgs() << "Func before shrink-wrapping: \n";
1994 BF.dump();
1995 });
1996 SLM.performChanges();
1997 // Early exit if processInsertions doesn't detect any todo items
1998 auto ModifiedOrErr = processInsertions();
1999 if (auto E = ModifiedOrErr.takeError())
2000 return Error(std::move(E));
2001 const bool Modified = *ModifiedOrErr;
2002 if (!Modified)
2003 return false;
2004 processDeletions();
2005 if (foldIdenticalSplitEdges()) {
2006 const std::pair<unsigned, uint64_t> Stats = BF.eraseInvalidBBs();
2007 (void)Stats;
2008 LLVM_DEBUG(dbgs() << "Deleted " << Stats.first
2009 << " redundant split edge BBs (" << Stats.second
2010 << " bytes) for " << BF.getPrintName() << "\n");
2011 }
2012 rebuildCFI();
2013 // We may have split edges, creating BBs that need correct branching
2014 BF.fixBranches();
2015 LLVM_DEBUG({
2016 dbgs() << "Func after shrink-wrapping: \n";
2017 BF.dump();
2018 });
2019 return true;
2020}
2021
2022void ShrinkWrapping::printStats(BinaryContext &BC) {
2023 BC.outs() << "BOLT-INFO: Shrink wrapping moved " << SpillsMovedRegularMode
2024 << " spills inserting load/stores and " << SpillsMovedPushPopMode
2025 << " spills inserting push/pops\n";
2026 if (!InstrDynamicCount || !StoreDynamicCount)
2027 return;
2028 BC.outs() << "BOLT-INFO: Shrink wrapping reduced " << SpillsMovedDynamicCount
2029 << " store executions ("
2030 << format(Fmt: "%.1lf%%",
2031 Vals: (100.0 * SpillsMovedDynamicCount / InstrDynamicCount))
2032 << " total instructions executed, "
2033 << format(Fmt: "%.1lf%%",
2034 Vals: (100.0 * SpillsMovedDynamicCount / StoreDynamicCount))
2035 << " store instructions)\n";
2036 BC.outs() << "BOLT-INFO: Shrink wrapping failed at reducing "
2037 << SpillsFailedDynamicCount << " store executions ("
2038 << format(Fmt: "%.1lf%%",
2039 Vals: (100.0 * SpillsFailedDynamicCount / InstrDynamicCount))
2040 << " total instructions executed, "
2041 << format(Fmt: "%.1lf%%",
2042 Vals: (100.0 * SpillsFailedDynamicCount / StoreDynamicCount))
2043 << " store instructions)\n";
2044}
2045
2046// Operators necessary as a result of using MCAnnotation
2047raw_ostream &operator<<(raw_ostream &OS,
2048 const std::vector<ShrinkWrapping::WorklistItem> &Vec) {
2049 OS << "SWTodo[";
2050 const char *Sep = "";
2051 for (const ShrinkWrapping::WorklistItem &Item : Vec) {
2052 OS << Sep;
2053 switch (Item.Action) {
2054 case ShrinkWrapping::WorklistItem::Erase:
2055 OS << "Erase";
2056 break;
2057 case ShrinkWrapping::WorklistItem::ChangeToAdjustment:
2058 OS << "ChangeToAdjustment";
2059 break;
2060 case ShrinkWrapping::WorklistItem::InsertLoadOrStore:
2061 OS << "InsertLoadOrStore";
2062 break;
2063 case ShrinkWrapping::WorklistItem::InsertPushOrPop:
2064 OS << "InsertPushOrPop";
2065 break;
2066 }
2067 Sep = ", ";
2068 }
2069 OS << "]";
2070 return OS;
2071}
2072
2073raw_ostream &
2074operator<<(raw_ostream &OS,
2075 const std::vector<StackLayoutModifier::WorklistItem> &Vec) {
2076 OS << "SLMTodo[";
2077 const char *Sep = "";
2078 for (const StackLayoutModifier::WorklistItem &Item : Vec) {
2079 OS << Sep;
2080 switch (Item.Action) {
2081 case StackLayoutModifier::WorklistItem::None:
2082 OS << "None";
2083 break;
2084 case StackLayoutModifier::WorklistItem::AdjustLoadStoreOffset:
2085 OS << "AdjustLoadStoreOffset";
2086 break;
2087 case StackLayoutModifier::WorklistItem::AdjustCFI:
2088 OS << "AdjustCFI";
2089 break;
2090 }
2091 Sep = ", ";
2092 }
2093 OS << "]";
2094 return OS;
2095}
2096
2097bool operator==(const ShrinkWrapping::WorklistItem &A,
2098 const ShrinkWrapping::WorklistItem &B) {
2099 return (A.Action == B.Action && A.AffectedReg == B.AffectedReg &&
2100 A.Adjustment == B.Adjustment &&
2101 A.FIEToInsert.IsLoad == B.FIEToInsert.IsLoad &&
2102 A.FIEToInsert.IsStore == B.FIEToInsert.IsStore &&
2103 A.FIEToInsert.RegOrImm == B.FIEToInsert.RegOrImm &&
2104 A.FIEToInsert.Size == B.FIEToInsert.Size &&
2105 A.FIEToInsert.IsSimple == B.FIEToInsert.IsSimple &&
2106 A.FIEToInsert.StackOffset == B.FIEToInsert.StackOffset);
2107}
2108
2109bool operator==(const StackLayoutModifier::WorklistItem &A,
2110 const StackLayoutModifier::WorklistItem &B) {
2111 return (A.Action == B.Action && A.OffsetUpdate == B.OffsetUpdate);
2112}
2113
2114} // end namespace bolt
2115} // end namespace llvm
2116

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of bolt/lib/Passes/ShrinkWrapping.cpp