1 | //===- bolt/Passes/ShrinkWrapping.cpp -------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the ShrinkWrapping class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Passes/ShrinkWrapping.h" |
14 | #include "bolt/Passes/DataflowInfoManager.h" |
15 | #include "bolt/Passes/MCF.h" |
16 | #include "bolt/Utils/CommandLineOpts.h" |
17 | #include <numeric> |
18 | #include <optional> |
19 | #include <stack> |
20 | |
21 | #define DEBUG_TYPE "shrinkwrapping" |
22 | |
23 | using namespace llvm; |
24 | |
25 | namespace opts { |
26 | |
27 | extern cl::opt<bool> TimeOpts; |
28 | extern cl::OptionCategory BoltOptCategory; |
29 | |
30 | static cl::opt<unsigned> ShrinkWrappingThreshold( |
31 | "shrink-wrapping-threshold" , |
32 | cl::desc("Percentage of prologue execution count to use as threshold when" |
33 | " evaluating whether a block is cold enough to be profitable to" |
34 | " move eligible spills there" ), |
35 | cl::init(Val: 30), cl::ZeroOrMore, cl::cat(BoltOptCategory)); |
36 | } // namespace opts |
37 | |
38 | namespace llvm { |
39 | namespace bolt { |
40 | |
41 | void CalleeSavedAnalysis::analyzeSaves() { |
42 | ReachingDefOrUse</*Def=*/true> &RD = Info.getReachingDefs(); |
43 | StackReachingUses &SRU = Info.getStackReachingUses(); |
44 | auto &InsnToBB = Info.getInsnToBBMap(); |
45 | BitVector BlacklistedRegs(BC.MRI->getNumRegs(), false); |
46 | |
47 | LLVM_DEBUG(dbgs() << "Checking spill locations\n" ); |
48 | for (BinaryBasicBlock &BB : BF) { |
49 | LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n" ); |
50 | const MCInst *Prev = nullptr; |
51 | for (MCInst &Inst : BB) { |
52 | if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) { |
53 | // Blacklist weird stores we don't understand |
54 | if ((!FIE->IsSimple || FIE->StackOffset >= 0) && FIE->IsStore && |
55 | FIE->IsStoreFromReg) { |
56 | BlacklistedRegs.set(FIE->RegOrImm); |
57 | CalleeSaved.reset(Idx: FIE->RegOrImm); |
58 | Prev = &Inst; |
59 | continue; |
60 | } |
61 | |
62 | if (!FIE->IsStore || !FIE->IsStoreFromReg || |
63 | BlacklistedRegs[FIE->RegOrImm]) { |
64 | Prev = &Inst; |
65 | continue; |
66 | } |
67 | |
68 | // If this reg is defined locally, it is not a callee-saved reg |
69 | if (RD.isReachedBy(Reg: FIE->RegOrImm, |
70 | Candidates: Prev ? RD.expr_begin(Point: *Prev) : RD.expr_begin(Point: BB))) { |
71 | BlacklistedRegs.set(FIE->RegOrImm); |
72 | CalleeSaved.reset(Idx: FIE->RegOrImm); |
73 | Prev = &Inst; |
74 | continue; |
75 | } |
76 | |
77 | // If this stack position is accessed in another function, we are |
78 | // probably dealing with a parameter passed in a stack -- do not mess |
79 | // with it |
80 | if (SRU.isStoreUsed(StoreFIE: *FIE, |
81 | Candidates: Prev ? SRU.expr_begin(Point: *Prev) : SRU.expr_begin(Point: BB)), |
82 | /*IncludeLocalAccesses=*/false) { |
83 | BlacklistedRegs.set(FIE->RegOrImm); |
84 | CalleeSaved.reset(Idx: FIE->RegOrImm); |
85 | Prev = &Inst; |
86 | continue; |
87 | } |
88 | |
89 | // If this stack position is loaded elsewhere in another reg, we can't |
90 | // update it, so blacklist it. |
91 | if (SRU.isLoadedInDifferentReg(StoreFIE: *FIE, Candidates: Prev ? SRU.expr_begin(Point: *Prev) |
92 | : SRU.expr_begin(Point: BB))) { |
93 | BlacklistedRegs.set(FIE->RegOrImm); |
94 | CalleeSaved.reset(Idx: FIE->RegOrImm); |
95 | Prev = &Inst; |
96 | continue; |
97 | } |
98 | |
99 | // Ignore regs with multiple saves |
100 | if (CalleeSaved[FIE->RegOrImm]) { |
101 | BlacklistedRegs.set(FIE->RegOrImm); |
102 | CalleeSaved.reset(Idx: FIE->RegOrImm); |
103 | Prev = &Inst; |
104 | continue; |
105 | } |
106 | |
107 | CalleeSaved.set(FIE->RegOrImm); |
108 | SaveFIEByReg[FIE->RegOrImm] = &*FIE; |
109 | SavingCost[FIE->RegOrImm] += InsnToBB[&Inst]->getKnownExecutionCount(); |
110 | BC.MIB->addAnnotation(Inst, Index: getSaveTag(), Val: FIE->RegOrImm, AllocatorId); |
111 | OffsetsByReg[FIE->RegOrImm] = FIE->StackOffset; |
112 | LLVM_DEBUG(dbgs() << "Logging new candidate for Callee-Saved Reg: " |
113 | << FIE->RegOrImm << "\n" ); |
114 | } |
115 | Prev = &Inst; |
116 | } |
117 | } |
118 | } |
119 | |
120 | void CalleeSavedAnalysis::analyzeRestores() { |
121 | ReachingDefOrUse</*Def=*/false> &RU = Info.getReachingUses(); |
122 | |
123 | // Now compute all restores of these callee-saved regs |
124 | for (BinaryBasicBlock &BB : BF) { |
125 | const MCInst *Prev = nullptr; |
126 | for (MCInst &Inst : llvm::reverse(C&: BB)) { |
127 | if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) { |
128 | if (!FIE->IsLoad || !CalleeSaved[FIE->RegOrImm]) { |
129 | Prev = &Inst; |
130 | continue; |
131 | } |
132 | |
133 | // If this reg is used locally after a restore, then we are probably |
134 | // not dealing with a callee-saved reg. Except if this use is by |
135 | // another store, but we don't cover this case yet. |
136 | // Also not callee-saved if this load accesses caller stack or isn't |
137 | // simple. |
138 | if (!FIE->IsSimple || FIE->StackOffset >= 0 || |
139 | RU.isReachedBy(Reg: FIE->RegOrImm, |
140 | Candidates: Prev ? RU.expr_begin(Point: *Prev) : RU.expr_begin(Point: BB))) { |
141 | CalleeSaved.reset(Idx: FIE->RegOrImm); |
142 | Prev = &Inst; |
143 | continue; |
144 | } |
145 | // If stack offsets between saves/store don't agree with each other, |
146 | // we don't completely understand what's happening here |
147 | if (FIE->StackOffset != OffsetsByReg[FIE->RegOrImm]) { |
148 | CalleeSaved.reset(Idx: FIE->RegOrImm); |
149 | LLVM_DEBUG(dbgs() << "Dismissing Callee-Saved Reg because we found a " |
150 | "mismatching restore: " |
151 | << FIE->RegOrImm << "\n" ); |
152 | Prev = &Inst; |
153 | continue; |
154 | } |
155 | |
156 | LLVM_DEBUG(dbgs() << "Adding matching restore for: " << FIE->RegOrImm |
157 | << "\n" ); |
158 | if (LoadFIEByReg[FIE->RegOrImm] == nullptr) |
159 | LoadFIEByReg[FIE->RegOrImm] = &*FIE; |
160 | BC.MIB->addAnnotation(Inst, Index: getRestoreTag(), Val: FIE->RegOrImm, |
161 | AllocatorId); |
162 | HasRestores.set(FIE->RegOrImm); |
163 | } |
164 | Prev = &Inst; |
165 | } |
166 | } |
167 | } |
168 | |
169 | std::vector<MCInst *> CalleeSavedAnalysis::getSavesByReg(uint16_t Reg) { |
170 | std::vector<MCInst *> Results; |
171 | for (BinaryBasicBlock &BB : BF) |
172 | for (MCInst &Inst : BB) |
173 | if (getSavedReg(Inst) == Reg) |
174 | Results.push_back(x: &Inst); |
175 | return Results; |
176 | } |
177 | |
178 | std::vector<MCInst *> CalleeSavedAnalysis::getRestoresByReg(uint16_t Reg) { |
179 | std::vector<MCInst *> Results; |
180 | for (BinaryBasicBlock &BB : BF) |
181 | for (MCInst &Inst : BB) |
182 | if (getRestoredReg(Inst) == Reg) |
183 | Results.push_back(x: &Inst); |
184 | return Results; |
185 | } |
186 | |
187 | CalleeSavedAnalysis::~CalleeSavedAnalysis() { |
188 | for (BinaryBasicBlock &BB : BF) { |
189 | for (MCInst &Inst : BB) { |
190 | BC.MIB->removeAnnotation(Inst, Index: getSaveTag()); |
191 | BC.MIB->removeAnnotation(Inst, Index: getRestoreTag()); |
192 | } |
193 | } |
194 | } |
195 | |
196 | void StackLayoutModifier::blacklistRegion(int64_t Offset, int64_t Size) { |
197 | if (BlacklistedRegions[Offset] < Size) |
198 | BlacklistedRegions[Offset] = Size; |
199 | } |
200 | |
201 | bool StackLayoutModifier::isRegionBlacklisted(int64_t Offset, int64_t Size) { |
202 | for (std::pair<const int64_t, int64_t> Elem : BlacklistedRegions) |
203 | if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second) |
204 | return true; |
205 | return false; |
206 | } |
207 | |
208 | bool StackLayoutModifier::blacklistAllInConflictWith(int64_t Offset, |
209 | int64_t Size) { |
210 | bool HasConflict = false; |
211 | for (auto Iter = AvailableRegions.begin(); Iter != AvailableRegions.end();) { |
212 | std::pair<const int64_t, int64_t> &Elem = *Iter; |
213 | if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second && |
214 | (Offset != Elem.first || Size != Elem.second)) { |
215 | Iter = AvailableRegions.erase(position: Iter); |
216 | HasConflict = true; |
217 | continue; |
218 | } |
219 | ++Iter; |
220 | } |
221 | if (HasConflict) { |
222 | blacklistRegion(Offset, Size); |
223 | return true; |
224 | } |
225 | return false; |
226 | } |
227 | |
228 | void StackLayoutModifier::checkFramePointerInitialization(MCInst &Point) { |
229 | StackPointerTracking &SPT = Info.getStackPointerTracking(); |
230 | if (!BC.MII->get(Opcode: Point.getOpcode()) |
231 | .hasDefOfPhysReg(MI: Point, Reg: BC.MIB->getFramePointer(), RI: *BC.MRI)) |
232 | return; |
233 | |
234 | int SPVal, FPVal; |
235 | std::tie(args&: SPVal, args&: FPVal) = *SPT.getStateBefore(Point); |
236 | std::pair<MCPhysReg, int64_t> FP; |
237 | |
238 | if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION) |
239 | FP = std::make_pair(x: BC.MIB->getFramePointer(), y&: FPVal); |
240 | else |
241 | FP = std::make_pair(x: 0, y: 0); |
242 | std::pair<MCPhysReg, int64_t> SP; |
243 | |
244 | if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION) |
245 | SP = std::make_pair(x: BC.MIB->getStackPointer(), y&: SPVal); |
246 | else |
247 | SP = std::make_pair(x: 0, y: 0); |
248 | |
249 | int64_t Output; |
250 | if (!BC.MIB->evaluateStackOffsetExpr(Inst: Point, Output, Input1: SP, Input2: FP)) |
251 | return; |
252 | |
253 | // Not your regular frame pointer initialization... bail |
254 | if (Output != SPVal) |
255 | blacklistRegion(Offset: 0, Size: 0); |
256 | } |
257 | |
258 | void StackLayoutModifier::checkStackPointerRestore(MCInst &Point) { |
259 | StackPointerTracking &SPT = Info.getStackPointerTracking(); |
260 | if (!BC.MII->get(Opcode: Point.getOpcode()) |
261 | .hasDefOfPhysReg(MI: Point, Reg: BC.MIB->getStackPointer(), RI: *BC.MRI)) |
262 | return; |
263 | // Check if the definition of SP comes from FP -- in this case, this |
264 | // value may need to be updated depending on our stack layout changes |
265 | bool UsesFP = llvm::any_of(Range: BC.MIB->useOperands(Inst&: Point), P: [&](MCOperand &Op) { |
266 | return Op.isReg() && Op.getReg() == BC.MIB->getFramePointer(); |
267 | }); |
268 | if (!UsesFP) |
269 | return; |
270 | |
271 | // Setting up evaluation |
272 | int SPVal, FPVal; |
273 | std::tie(args&: SPVal, args&: FPVal) = *SPT.getStateBefore(Point); |
274 | std::pair<MCPhysReg, int64_t> FP; |
275 | |
276 | if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION) |
277 | FP = std::make_pair(x: BC.MIB->getFramePointer(), y&: FPVal); |
278 | else |
279 | FP = std::make_pair(x: 0, y: 0); |
280 | std::pair<MCPhysReg, int64_t> SP; |
281 | |
282 | if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION) |
283 | SP = std::make_pair(x: BC.MIB->getStackPointer(), y&: SPVal); |
284 | else |
285 | SP = std::make_pair(x: 0, y: 0); |
286 | |
287 | int64_t Output; |
288 | if (!BC.MIB->evaluateStackOffsetExpr(Inst: Point, Output, Input1: SP, Input2: FP)) |
289 | return; |
290 | |
291 | // If the value is the same of FP, no need to adjust it |
292 | if (Output == FPVal) |
293 | return; |
294 | |
295 | // If an allocation happened through FP, bail |
296 | if (Output <= SPVal) { |
297 | blacklistRegion(Offset: 0, Size: 0); |
298 | return; |
299 | } |
300 | |
301 | // We are restoring SP to an old value based on FP. Mark it as a stack |
302 | // access to be fixed later. |
303 | BC.MIB->addAnnotation(Inst&: Point, Index: getSlotTag(), Val: Output, AllocatorId); |
304 | } |
305 | |
306 | void StackLayoutModifier::classifyStackAccesses() { |
307 | // Understand when stack slots are being used non-locally |
308 | StackReachingUses &SRU = Info.getStackReachingUses(); |
309 | |
310 | for (BinaryBasicBlock &BB : BF) { |
311 | const MCInst *Prev = nullptr; |
312 | for (MCInst &Inst : llvm::reverse(C&: BB)) { |
313 | checkFramePointerInitialization(Point&: Inst); |
314 | checkStackPointerRestore(Point&: Inst); |
315 | ErrorOr<const FrameIndexEntry &> FIEX = FA.getFIEFor(Inst); |
316 | if (!FIEX) { |
317 | Prev = &Inst; |
318 | continue; |
319 | } |
320 | if (!FIEX->IsSimple || (FIEX->IsStore && !FIEX->IsStoreFromReg)) { |
321 | blacklistRegion(Offset: FIEX->StackOffset, Size: FIEX->Size); |
322 | Prev = &Inst; |
323 | continue; |
324 | } |
325 | // If this stack position is accessed in another function, we are |
326 | // probably dealing with a parameter passed in a stack -- do not mess |
327 | // with it |
328 | if (SRU.isStoreUsed(StoreFIE: *FIEX, |
329 | Candidates: Prev ? SRU.expr_begin(Point: *Prev) : SRU.expr_begin(Point: BB), |
330 | /*IncludeLocalAccesses=*/false)) { |
331 | blacklistRegion(Offset: FIEX->StackOffset, Size: FIEX->Size); |
332 | Prev = &Inst; |
333 | continue; |
334 | } |
335 | // Now we have a clear stack slot access. Check if its blacklisted or if |
336 | // it conflicts with another chunk. |
337 | if (isRegionBlacklisted(Offset: FIEX->StackOffset, Size: FIEX->Size) || |
338 | blacklistAllInConflictWith(Offset: FIEX->StackOffset, Size: FIEX->Size)) { |
339 | Prev = &Inst; |
340 | continue; |
341 | } |
342 | // We are free to go. Add it as available stack slot which we know how |
343 | // to move it. |
344 | AvailableRegions[FIEX->StackOffset] = FIEX->Size; |
345 | BC.MIB->addAnnotation(Inst, Index: getSlotTag(), Val: FIEX->StackOffset, AllocatorId); |
346 | RegionToRegMap[FIEX->StackOffset].insert(x: FIEX->RegOrImm); |
347 | RegToRegionMap[FIEX->RegOrImm].insert(x: FIEX->StackOffset); |
348 | LLVM_DEBUG(dbgs() << "Adding region " << FIEX->StackOffset << " size " |
349 | << (int)FIEX->Size << "\n" ); |
350 | } |
351 | } |
352 | } |
353 | |
354 | void StackLayoutModifier::classifyCFIs() { |
355 | std::stack<std::pair<int64_t, uint16_t>> CFIStack; |
356 | int64_t CfaOffset = -8; |
357 | uint16_t CfaReg = 7; |
358 | |
359 | auto recordAccess = [&](MCInst *Inst, int64_t Offset) { |
360 | const uint16_t Reg = *BC.MRI->getLLVMRegNum(RegNum: CfaReg, /*isEH=*/false); |
361 | if (Reg == BC.MIB->getStackPointer() || Reg == BC.MIB->getFramePointer()) { |
362 | BC.MIB->addAnnotation(Inst&: *Inst, Index: getSlotTag(), Val: Offset, AllocatorId); |
363 | LLVM_DEBUG(dbgs() << "Recording CFI " << Offset << "\n" ); |
364 | } else { |
365 | IsSimple = false; |
366 | return; |
367 | } |
368 | }; |
369 | |
370 | for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { |
371 | for (MCInst &Inst : *BB) { |
372 | if (!BC.MIB->isCFI(Inst)) |
373 | continue; |
374 | const MCCFIInstruction *CFI = BF.getCFIFor(Instr: Inst); |
375 | switch (CFI->getOperation()) { |
376 | case MCCFIInstruction::OpDefCfa: |
377 | CfaOffset = -CFI->getOffset(); |
378 | recordAccess(&Inst, CfaOffset); |
379 | [[fallthrough]]; |
380 | case MCCFIInstruction::OpDefCfaRegister: |
381 | CfaReg = CFI->getRegister(); |
382 | break; |
383 | case MCCFIInstruction::OpDefCfaOffset: |
384 | CfaOffset = -CFI->getOffset(); |
385 | recordAccess(&Inst, CfaOffset); |
386 | break; |
387 | case MCCFIInstruction::OpOffset: |
388 | recordAccess(&Inst, CFI->getOffset()); |
389 | BC.MIB->addAnnotation(Inst, Index: getOffsetCFIRegTag(), |
390 | Val: BC.MRI->getLLVMRegNum(RegNum: CFI->getRegister(), |
391 | /*isEH=*/false), |
392 | AllocatorId); |
393 | break; |
394 | case MCCFIInstruction::OpSameValue: |
395 | BC.MIB->addAnnotation(Inst, Index: getOffsetCFIRegTag(), |
396 | Val: BC.MRI->getLLVMRegNum(RegNum: CFI->getRegister(), |
397 | /*isEH=*/false), |
398 | AllocatorId); |
399 | break; |
400 | case MCCFIInstruction::OpRememberState: |
401 | CFIStack.push(x: std::make_pair(x&: CfaOffset, y&: CfaReg)); |
402 | break; |
403 | case MCCFIInstruction::OpRestoreState: { |
404 | assert(!CFIStack.empty() && "Corrupt CFI stack" ); |
405 | std::pair<int64_t, uint16_t> &Elem = CFIStack.top(); |
406 | CFIStack.pop(); |
407 | CfaOffset = Elem.first; |
408 | CfaReg = Elem.second; |
409 | break; |
410 | } |
411 | case MCCFIInstruction::OpRelOffset: |
412 | case MCCFIInstruction::OpAdjustCfaOffset: |
413 | llvm_unreachable("Unhandled AdjustCfaOffset" ); |
414 | break; |
415 | default: |
416 | break; |
417 | } |
418 | } |
419 | } |
420 | } |
421 | |
422 | void StackLayoutModifier::scheduleChange( |
423 | MCInst &Inst, StackLayoutModifier::WorklistItem Item) { |
424 | auto &WList = BC.MIB->getOrCreateAnnotationAs<std::vector<WorklistItem>>( |
425 | Inst, Index: getTodoTag(), AllocatorId); |
426 | WList.push_back(x: Item); |
427 | } |
428 | |
429 | bool StackLayoutModifier::canCollapseRegion(MCInst *DeletedPush) { |
430 | if (!IsSimple || !BC.MIB->isPush(Inst: *DeletedPush)) |
431 | return false; |
432 | |
433 | ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst: *DeletedPush); |
434 | if (!FIE) |
435 | return false; |
436 | |
437 | return canCollapseRegion(RegionAddr: FIE->StackOffset); |
438 | } |
439 | |
440 | bool StackLayoutModifier::canCollapseRegion(int64_t RegionAddr) { |
441 | if (!IsInitialized) |
442 | initialize(); |
443 | if (!IsSimple) |
444 | return false; |
445 | |
446 | if (CollapsedRegions.count(V: RegionAddr)) |
447 | return true; |
448 | |
449 | // Check if it is possible to readjust all accesses below RegionAddr |
450 | if (!BlacklistedRegions.empty()) |
451 | return false; |
452 | |
453 | return true; |
454 | } |
455 | |
456 | bool StackLayoutModifier::collapseRegion(MCInst *DeletedPush) { |
457 | ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst: *DeletedPush); |
458 | if (!FIE) |
459 | return false; |
460 | int64_t RegionAddr = FIE->StackOffset; |
461 | int64_t RegionSz = FIE->Size; |
462 | return collapseRegion(Alloc: DeletedPush, RegionAddr, RegionSize: RegionSz); |
463 | } |
464 | |
465 | bool StackLayoutModifier::collapseRegion(MCInst *Alloc, int64_t RegionAddr, |
466 | int64_t RegionSz) { |
467 | if (!canCollapseRegion(RegionAddr)) |
468 | return false; |
469 | |
470 | assert(IsInitialized); |
471 | StackAllocationAnalysis &SAA = Info.getStackAllocationAnalysis(); |
472 | |
473 | for (BinaryBasicBlock &BB : BF) { |
474 | for (MCInst &Inst : BB) { |
475 | if (!BC.MIB->hasAnnotation(Inst, Index: getSlotTag())) |
476 | continue; |
477 | auto Slot = |
478 | BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>( |
479 | Inst, Index: getSlotTag()); |
480 | if (!AvailableRegions.count(x: Slot)) |
481 | continue; |
482 | // We need to ensure this access is affected by the deleted push |
483 | if (!(*SAA.getStateBefore(Point: Inst))[SAA.ExprToIdx[Alloc]]) |
484 | continue; |
485 | |
486 | if (BC.MIB->isCFI(Inst)) { |
487 | if (Slot > RegionAddr) |
488 | continue; |
489 | scheduleChange(Inst, Item: WorklistItem(WorklistItem::AdjustCFI, RegionSz)); |
490 | continue; |
491 | } |
492 | ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst); |
493 | if (!FIE) { |
494 | if (Slot > RegionAddr) |
495 | continue; |
496 | // SP update based on frame pointer |
497 | scheduleChange( |
498 | Inst, Item: WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz)); |
499 | continue; |
500 | } |
501 | |
502 | if (Slot == RegionAddr) { |
503 | BC.MIB->addAnnotation(Inst, Name: "AccessesDeletedPos" , Val: 0U, AllocatorId); |
504 | continue; |
505 | } |
506 | if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst)) |
507 | continue; |
508 | |
509 | if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr) |
510 | continue; |
511 | |
512 | if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot > RegionAddr) |
513 | continue; |
514 | |
515 | scheduleChange( |
516 | Inst, Item: WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz)); |
517 | } |
518 | } |
519 | |
520 | CollapsedRegions.insert(V: RegionAddr); |
521 | return true; |
522 | } |
523 | |
524 | void StackLayoutModifier::setOffsetForCollapsedAccesses(int64_t NewOffset) { |
525 | for (BinaryBasicBlock &BB : BF) { |
526 | for (MCInst &Inst : BB) { |
527 | if (!BC.MIB->hasAnnotation(Inst, Name: "AccessesDeletedPos" )) |
528 | continue; |
529 | BC.MIB->removeAnnotation(Inst, Name: "AccessesDeletedPos" ); |
530 | scheduleChange( |
531 | Inst, Item: WorklistItem(WorklistItem::AdjustLoadStoreOffset, NewOffset)); |
532 | } |
533 | } |
534 | } |
535 | |
536 | bool StackLayoutModifier::canInsertRegion(ProgramPoint P) { |
537 | if (!IsInitialized) |
538 | initialize(); |
539 | if (!IsSimple) |
540 | return false; |
541 | |
542 | StackPointerTracking &SPT = Info.getStackPointerTracking(); |
543 | int64_t RegionAddr = SPT.getStateBefore(Point: P)->first; |
544 | if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY) |
545 | return false; |
546 | |
547 | if (InsertedRegions.count(V: RegionAddr)) |
548 | return true; |
549 | |
550 | // Check if we are going to screw up stack accesses at call sites that |
551 | // pass parameters via stack |
552 | if (!BlacklistedRegions.empty()) |
553 | return false; |
554 | |
555 | return true; |
556 | } |
557 | |
558 | bool StackLayoutModifier::insertRegion(ProgramPoint P, int64_t RegionSz) { |
559 | if (!canInsertRegion(P)) |
560 | return false; |
561 | |
562 | assert(IsInitialized); |
563 | StackPointerTracking &SPT = Info.getStackPointerTracking(); |
564 | // This RegionAddr is slightly different from the one seen in collapseRegion |
565 | // This is the value of SP before the allocation the user wants to make. |
566 | int64_t RegionAddr = SPT.getStateBefore(Point: P)->first; |
567 | if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY) |
568 | return false; |
569 | |
570 | DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); |
571 | |
572 | for (BinaryBasicBlock &BB : BF) { |
573 | for (MCInst &Inst : BB) { |
574 | if (!BC.MIB->hasAnnotation(Inst, Index: getSlotTag())) |
575 | continue; |
576 | auto Slot = |
577 | BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>( |
578 | Inst, Index: getSlotTag()); |
579 | if (!AvailableRegions.count(x: Slot)) |
580 | continue; |
581 | |
582 | if (!(DA.doesADominateB(A: P, B: Inst))) |
583 | continue; |
584 | |
585 | if (BC.MIB->isCFI(Inst)) { |
586 | if (Slot >= RegionAddr) |
587 | continue; |
588 | scheduleChange(Inst, Item: WorklistItem(WorklistItem::AdjustCFI, -RegionSz)); |
589 | continue; |
590 | } |
591 | ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst); |
592 | if (!FIE) { |
593 | if (Slot >= RegionAddr) |
594 | continue; |
595 | scheduleChange( |
596 | Inst, Item: WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz)); |
597 | continue; |
598 | } |
599 | |
600 | if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr) |
601 | continue; |
602 | if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot >= RegionAddr) |
603 | continue; |
604 | if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst)) |
605 | continue; |
606 | scheduleChange( |
607 | Inst, Item: WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz)); |
608 | } |
609 | } |
610 | |
611 | InsertedRegions.insert(V: RegionAddr); |
612 | return true; |
613 | } |
614 | |
615 | void StackLayoutModifier::performChanges() { |
616 | std::set<uint32_t> ModifiedCFIIndices; |
617 | for (BinaryBasicBlock &BB : BF) { |
618 | for (MCInst &Inst : llvm::reverse(C&: BB)) { |
619 | if (BC.MIB->hasAnnotation(Inst, Name: "AccessesDeletedPos" )) { |
620 | assert(BC.MIB->isPop(Inst) || BC.MIB->isPush(Inst)); |
621 | BC.MIB->removeAnnotation(Inst, Name: "AccessesDeletedPos" ); |
622 | } |
623 | if (!BC.MIB->hasAnnotation(Inst, Index: getTodoTag())) |
624 | continue; |
625 | auto &WList = BC.MIB->getAnnotationAs<std::vector<WorklistItem>>( |
626 | Inst, Index: getTodoTag()); |
627 | int64_t Adjustment = 0; |
628 | WorklistItem::ActionType AdjustmentType = WorklistItem::None; |
629 | for (WorklistItem &WI : WList) { |
630 | if (WI.Action == WorklistItem::None) |
631 | continue; |
632 | assert(WI.Action == WorklistItem::AdjustLoadStoreOffset || |
633 | WI.Action == WorklistItem::AdjustCFI); |
634 | assert((AdjustmentType == WorklistItem::None || |
635 | AdjustmentType == WI.Action) && |
636 | "Conflicting actions requested at the same program point" ); |
637 | AdjustmentType = WI.Action; |
638 | Adjustment += WI.OffsetUpdate; |
639 | } |
640 | if (!Adjustment) |
641 | continue; |
642 | if (AdjustmentType != WorklistItem::AdjustLoadStoreOffset) { |
643 | assert(BC.MIB->isCFI(Inst)); |
644 | uint32_t CFINum = Inst.getOperand(i: 0).getImm(); |
645 | if (ModifiedCFIIndices.count(x: CFINum)) |
646 | continue; |
647 | ModifiedCFIIndices.insert(x: CFINum); |
648 | const MCCFIInstruction *CFI = BF.getCFIFor(Instr: Inst); |
649 | const MCCFIInstruction::OpType Operation = CFI->getOperation(); |
650 | if (Operation == MCCFIInstruction::OpDefCfa || |
651 | Operation == MCCFIInstruction::OpDefCfaOffset) |
652 | Adjustment = 0 - Adjustment; |
653 | LLVM_DEBUG(dbgs() << "Changing CFI offset from " << CFI->getOffset() |
654 | << " to " << (CFI->getOffset() + Adjustment) << "\n" ); |
655 | BF.mutateCFIOffsetFor(Instr: Inst, NewOffset: CFI->getOffset() + Adjustment); |
656 | continue; |
657 | } |
658 | int32_t SrcImm = 0; |
659 | MCPhysReg Reg = 0; |
660 | MCPhysReg StackPtrReg = 0; |
661 | int64_t StackOffset = 0; |
662 | bool IsIndexed = false; |
663 | bool IsLoad = false; |
664 | bool IsStore = false; |
665 | bool IsSimple = false; |
666 | bool IsStoreFromReg = false; |
667 | uint8_t Size = 0; |
668 | bool Success = false; |
669 | Success = BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, |
670 | Reg, SrcImm, StackPtrReg, StackOffset, |
671 | Size, IsSimple, IsIndexed); |
672 | if (!Success) { |
673 | // SP update based on FP value |
674 | Success = BC.MIB->addToImm(Inst, Amt&: Adjustment, Ctx: &*BC.Ctx); |
675 | assert(Success); |
676 | continue; |
677 | } |
678 | assert(Success && IsSimple && !IsIndexed && (!IsStore || IsStoreFromReg)); |
679 | if (StackPtrReg != BC.MIB->getFramePointer()) |
680 | Adjustment = -Adjustment; |
681 | if (IsLoad) |
682 | BC.MIB->createRestoreFromStack(Inst, StackReg: StackPtrReg, |
683 | Offset: StackOffset + Adjustment, DstReg: Reg, Size); |
684 | else if (IsStore) |
685 | BC.MIB->createSaveToStack(Inst, StackReg: StackPtrReg, Offset: StackOffset + Adjustment, |
686 | SrcReg: Reg, Size); |
687 | LLVM_DEBUG({ |
688 | dbgs() << "Adjusted instruction: " ; |
689 | Inst.dump(); |
690 | }); |
691 | } |
692 | } |
693 | } |
694 | |
695 | void StackLayoutModifier::initialize() { |
696 | classifyStackAccesses(); |
697 | classifyCFIs(); |
698 | IsInitialized = true; |
699 | } |
700 | |
701 | std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedRegularMode{0}; |
702 | std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedPushPopMode{0}; |
703 | std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedDynamicCount{0}; |
704 | std::atomic<std::uint64_t> ShrinkWrapping::SpillsFailedDynamicCount{0}; |
705 | std::atomic<std::uint64_t> ShrinkWrapping::InstrDynamicCount{0}; |
706 | std::atomic<std::uint64_t> ShrinkWrapping::StoreDynamicCount{0}; |
707 | |
708 | using BBIterTy = BinaryBasicBlock::iterator; |
709 | |
710 | void ShrinkWrapping::classifyCSRUses() { |
711 | DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); |
712 | StackPointerTracking &SPT = Info.getStackPointerTracking(); |
713 | UsesByReg = std::vector<BitVector>(BC.MRI->getNumRegs(), |
714 | BitVector(DA.NumInstrs, false)); |
715 | |
716 | const BitVector &FPAliases = BC.MIB->getAliases(Reg: BC.MIB->getFramePointer()); |
717 | for (BinaryBasicBlock &BB : BF) { |
718 | for (MCInst &Inst : BB) { |
719 | if (BC.MIB->isCFI(Inst)) |
720 | continue; |
721 | BitVector BV = BitVector(BC.MRI->getNumRegs(), false); |
722 | BC.MIB->getTouchedRegs(Inst, Regs&: BV); |
723 | BV &= CSA.CalleeSaved; |
724 | for (int I : BV.set_bits()) { |
725 | if (I == 0) |
726 | continue; |
727 | if (CSA.getSavedReg(Inst) != I && CSA.getRestoredReg(Inst) != I) |
728 | UsesByReg[I].set(DA.ExprToIdx[&Inst]); |
729 | } |
730 | if (!SPT.HasFramePointer || !BC.MIB->isCall(Inst)) |
731 | continue; |
732 | BV = CSA.CalleeSaved; |
733 | BV &= FPAliases; |
734 | for (int I : BV.set_bits()) |
735 | UsesByReg[I].set(DA.ExprToIdx[&Inst]); |
736 | } |
737 | } |
738 | } |
739 | |
740 | void ShrinkWrapping::pruneUnwantedCSRs() { |
741 | BitVector ParamRegs = BC.MIB->getRegsUsedAsParams(); |
742 | for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { |
743 | if (!CSA.CalleeSaved[I]) |
744 | continue; |
745 | if (ParamRegs[I]) { |
746 | CSA.CalleeSaved.reset(Idx: I); |
747 | continue; |
748 | } |
749 | if (UsesByReg[I].empty()) { |
750 | LLVM_DEBUG( |
751 | dbgs() |
752 | << "Dismissing Callee-Saved Reg because we found no uses of it:" << I |
753 | << "\n" ); |
754 | CSA.CalleeSaved.reset(Idx: I); |
755 | continue; |
756 | } |
757 | if (!CSA.HasRestores[I]) { |
758 | LLVM_DEBUG( |
759 | dbgs() << "Dismissing Callee-Saved Reg because it does not have " |
760 | "restores:" |
761 | << I << "\n" ); |
762 | CSA.CalleeSaved.reset(Idx: I); |
763 | } |
764 | } |
765 | } |
766 | |
767 | void ShrinkWrapping::computeSaveLocations() { |
768 | BestSavePos = std::vector<std::vector<MCInst *>>(BC.MRI->getNumRegs()); |
769 | ReachingInsns<true> &RI = Info.getReachingInsnsBackwards(); |
770 | DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); |
771 | StackPointerTracking &SPT = Info.getStackPointerTracking(); |
772 | |
773 | LLVM_DEBUG(dbgs() << "Checking save/restore possibilities\n" ); |
774 | for (BinaryBasicBlock &BB : BF) { |
775 | LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n" ); |
776 | |
777 | MCInst *First = BB.begin() != BB.end() ? &*BB.begin() : nullptr; |
778 | if (!First) |
779 | continue; |
780 | |
781 | // Use reaching instructions to detect if we are inside a loop - if we |
782 | // are, do not consider this BB as valid placement for saves. |
783 | if (RI.isInLoop(BB)) |
784 | continue; |
785 | |
786 | const std::pair<int, int> SPFP = *SPT.getStateBefore(Point: *First); |
787 | // If we don't know stack state at this point, bail |
788 | if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) && |
789 | (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY)) |
790 | continue; |
791 | |
792 | for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { |
793 | if (!CSA.CalleeSaved[I]) |
794 | continue; |
795 | |
796 | BitVector BBDominatedUses = BitVector(DA.NumInstrs, false); |
797 | for (int J : UsesByReg[I].set_bits()) |
798 | if (DA.doesADominateB(A: *First, BIdx: J)) |
799 | BBDominatedUses.set(J); |
800 | LLVM_DEBUG(dbgs() << "\t\tBB " << BB.getName() << " dominates " |
801 | << BBDominatedUses.count() << " uses for reg " << I |
802 | << ". Total uses for reg is " << UsesByReg[I].count() |
803 | << "\n" ); |
804 | BBDominatedUses &= UsesByReg[I]; |
805 | if (BBDominatedUses == UsesByReg[I]) { |
806 | LLVM_DEBUG(dbgs() << "\t\t\tAdded " << BB.getName() |
807 | << " as a save pos for " << I << "\n" ); |
808 | BestSavePos[I].push_back(x: First); |
809 | LLVM_DEBUG({ |
810 | dbgs() << "Dominated uses are:\n" ; |
811 | for (int J : UsesByReg[I].set_bits()) { |
812 | dbgs() << "Idx " << J << ": " ; |
813 | BC.printInstruction(dbgs(), *DA.Expressions[J]); |
814 | DA.Expressions[J]->dump(); |
815 | } |
816 | }); |
817 | } |
818 | } |
819 | } |
820 | |
821 | BestSaveCount = std::vector<std::vector<uint64_t>>(BC.MRI->getNumRegs()); |
822 | |
823 | auto &InsnToBB = Info.getInsnToBBMap(); |
824 | for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { |
825 | if (!CSA.CalleeSaved[I]) |
826 | continue; |
827 | |
828 | std::stable_sort(first: BestSavePos[I].begin(), last: BestSavePos[I].end(), |
829 | comp: [&](const MCInst *A, const MCInst *B) { |
830 | const BinaryBasicBlock *BBA = InsnToBB[A]; |
831 | const BinaryBasicBlock *BBB = InsnToBB[B]; |
832 | const uint64_t CountA = BBA->getKnownExecutionCount(); |
833 | const uint64_t CountB = BBB->getKnownExecutionCount(); |
834 | return CountB < CountA; |
835 | }); |
836 | |
837 | for (MCInst *Pos : BestSavePos[I]) { |
838 | const BinaryBasicBlock *BB = InsnToBB[Pos]; |
839 | const uint64_t Count = BB->getKnownExecutionCount(); |
840 | BestSaveCount[I].push_back(x: Count); |
841 | } |
842 | } |
843 | } |
844 | |
845 | void ShrinkWrapping::computeDomOrder() { |
846 | DomOrder = std::vector<MCPhysReg>(BC.MRI->getNumRegs(), 0); |
847 | std::vector<MCPhysReg> Order; |
848 | for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { |
849 | Order.push_back(x: I); |
850 | } |
851 | |
852 | DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); |
853 | auto &InsnToBB = Info.getInsnToBBMap(); |
854 | llvm::sort(C&: Order, Comp: [&](const MCPhysReg &A, const MCPhysReg &B) { |
855 | BinaryBasicBlock *BBA = |
856 | BestSavePos[A].size() ? InsnToBB[BestSavePos[A].back()] : nullptr; |
857 | BinaryBasicBlock *BBB = |
858 | BestSavePos[B].size() ? InsnToBB[BestSavePos[B].back()] : nullptr; |
859 | if (BBA == BBB) |
860 | return A < B; |
861 | if (!BBA && BBB) |
862 | return false; |
863 | if (BBA && !BBB) |
864 | return true; |
865 | if (DA.doesADominateB(A: *BestSavePos[A].back(), B: *BestSavePos[B].back())) |
866 | return true; |
867 | if (DA.doesADominateB(A: *BestSavePos[B].back(), B: *BestSavePos[A].back())) |
868 | return false; |
869 | return A < B; |
870 | }); |
871 | |
872 | for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) |
873 | DomOrder[Order[I]] = I; |
874 | } |
875 | |
876 | bool ShrinkWrapping::isBestSavePosCold(unsigned CSR, MCInst *&BestPosSave, |
877 | uint64_t &TotalEstimatedWin) { |
878 | const uint64_t CurSavingCost = CSA.SavingCost[CSR]; |
879 | if (!CSA.CalleeSaved[CSR]) |
880 | return false; |
881 | |
882 | assert(BestSaveCount[CSR].size() == BestSavePos[CSR].size() && |
883 | "save position vectors out of sync" ); |
884 | if (BestSaveCount[CSR].empty()) |
885 | return false; |
886 | |
887 | const uint64_t BestCount = BestSaveCount[CSR].back(); |
888 | BestPosSave = BestSavePos[CSR].back(); |
889 | if (BestCount >= (opts::ShrinkWrappingThreshold / 100.0) * CurSavingCost) |
890 | return false; |
891 | |
892 | LLVM_DEBUG({ |
893 | auto &InsnToBB = Info.getInsnToBBMap(); |
894 | dbgs() << "Better position for saves found in func " << BF.getPrintName() |
895 | << " count << " << BF.getKnownExecutionCount() << "\n" ; |
896 | dbgs() << "Reg: " << CSR << "; New BB: " << InsnToBB[BestPosSave]->getName() |
897 | << " Freq reduction: " << (CurSavingCost - BestCount) << "\n" ; |
898 | }); |
899 | |
900 | TotalEstimatedWin = CurSavingCost - BestCount; |
901 | return true; |
902 | } |
903 | |
904 | /// Auxiliary function used to create basic blocks for critical edges and update |
905 | /// the dominance frontier with these new locations |
906 | void ShrinkWrapping::splitFrontierCritEdges( |
907 | BinaryFunction *Func, SmallVector<ProgramPoint, 4> &Frontier, |
908 | const SmallVector<bool, 4> &IsCritEdge, |
909 | const SmallVector<BinaryBasicBlock *, 4> &From, |
910 | const SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> &To) { |
911 | LLVM_DEBUG(dbgs() << "splitFrontierCritEdges: Now handling func " |
912 | << BF.getPrintName() << "\n" ); |
913 | // For every FromBB, there might be one or more critical edges, with |
914 | // To[I] containing destination BBs. It's important to memorize |
915 | // the original size of the Frontier as we may append to it while splitting |
916 | // critical edges originating with blocks with multiple destinations. |
917 | for (size_t I = 0, IE = Frontier.size(); I < IE; ++I) { |
918 | if (!IsCritEdge[I]) |
919 | continue; |
920 | if (To[I].empty()) |
921 | continue; |
922 | BinaryBasicBlock *FromBB = From[I]; |
923 | LLVM_DEBUG(dbgs() << " - Now handling FrontierBB " << FromBB->getName() |
924 | << "\n" ); |
925 | // Split edge for every DestinationBBs |
926 | for (size_t DI = 0, DIE = To[I].size(); DI < DIE; ++DI) { |
927 | BinaryBasicBlock *DestinationBB = To[I][DI]; |
928 | LLVM_DEBUG(dbgs() << " - Dest : " << DestinationBB->getName() << "\n" ); |
929 | BinaryBasicBlock *NewBB = Func->splitEdge(From: FromBB, To: DestinationBB); |
930 | // Insert dummy instruction so this BB is never empty (we need this for |
931 | // PredictiveStackPointerTracking to work, since it annotates instructions |
932 | // and not BBs). |
933 | if (NewBB->empty()) { |
934 | MCInst NewInst; |
935 | BC.MIB->createNoop(Inst&: NewInst); |
936 | NewBB->addInstruction(Inst: std::move(NewInst)); |
937 | scheduleChange(PP: &*NewBB->begin(), Item: WorklistItem(WorklistItem::Erase, 0)); |
938 | } |
939 | |
940 | // Update frontier |
941 | ProgramPoint NewFrontierPP = ProgramPoint::getLastPointAt(BB&: *NewBB); |
942 | if (DI == 0) { |
943 | // Update frontier inplace |
944 | Frontier[I] = NewFrontierPP; |
945 | LLVM_DEBUG(dbgs() << " - Update frontier with " << NewBB->getName() |
946 | << '\n'); |
947 | } else { |
948 | // Append new frontier to the end of the list |
949 | Frontier.push_back(Elt: NewFrontierPP); |
950 | LLVM_DEBUG(dbgs() << " - Append frontier " << NewBB->getName() |
951 | << '\n'); |
952 | } |
953 | } |
954 | } |
955 | } |
956 | |
957 | SmallVector<ProgramPoint, 4> |
958 | ShrinkWrapping::doRestorePlacement(MCInst *BestPosSave, unsigned CSR, |
959 | uint64_t TotalEstimatedWin) { |
960 | SmallVector<ProgramPoint, 4> Frontier; |
961 | SmallVector<bool, 4> IsCritEdge; |
962 | DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); |
963 | |
964 | SmallVector<BinaryBasicBlock *, 4> CritEdgesFrom; |
965 | SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> CritEdgesTo; |
966 | // In case of a critical edge, we need to create extra BBs to host restores |
967 | // into edges transitioning to the dominance frontier, otherwise we pull these |
968 | // restores to inside the dominated area. |
969 | Frontier = DA.getDominanceFrontierFor(Dom: *BestPosSave).takeVector(); |
970 | LLVM_DEBUG({ |
971 | dbgs() << "Dumping dominance frontier for " ; |
972 | BC.printInstruction(dbgs(), *BestPosSave); |
973 | for (ProgramPoint &PP : Frontier) |
974 | if (PP.isInst()) |
975 | BC.printInstruction(dbgs(), *PP.getInst()); |
976 | else |
977 | dbgs() << PP.getBB()->getName() << "\n" ; |
978 | }); |
979 | for (ProgramPoint &PP : Frontier) { |
980 | bool HasCritEdges = false; |
981 | if (PP.isInst() && BC.MIB->isTerminator(Inst: *PP.getInst()) && |
982 | doesInstUsesCSR(Inst: *PP.getInst(), CSR)) { |
983 | Frontier.clear(); |
984 | return Frontier; |
985 | } |
986 | BinaryBasicBlock *FrontierBB = Info.getParentBB(PP); |
987 | CritEdgesFrom.emplace_back(Args&: FrontierBB); |
988 | CritEdgesTo.emplace_back(Args: 0); |
989 | SmallVector<BinaryBasicBlock *, 4> &Dests = CritEdgesTo.back(); |
990 | // Check for invoke instructions at the dominance frontier, which indicates |
991 | // the landing pad is not dominated. |
992 | if (PP.isInst() && BC.MIB->isInvoke(Inst: *PP.getInst())) { |
993 | Frontier.clear(); |
994 | return Frontier; |
995 | } |
996 | doForAllSuccs(BB: *FrontierBB, Task: [&](ProgramPoint P) { |
997 | if (!DA.doesADominateB(A: *BestPosSave, B: P)) { |
998 | Dests.emplace_back(Args: Info.getParentBB(PP: P)); |
999 | return; |
1000 | } |
1001 | HasCritEdges = true; |
1002 | }); |
1003 | IsCritEdge.push_back(Elt: HasCritEdges); |
1004 | } |
1005 | // Restores cannot be placed in empty BBs because we have a dataflow |
1006 | // analysis that depends on insertions happening before real instructions |
1007 | // (PredictiveStackPointerTracking). Detect now for empty BBs and add a |
1008 | // dummy nop that is scheduled to be removed later. |
1009 | bool InvalidateRequired = false; |
1010 | for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { |
1011 | if (BB->size() != 0) |
1012 | continue; |
1013 | MCInst NewInst; |
1014 | BC.MIB->createNoop(Inst&: NewInst); |
1015 | auto II = BB->addInstruction(Inst: std::move(NewInst)); |
1016 | scheduleChange(PP: &*II, Item: WorklistItem(WorklistItem::Erase, 0)); |
1017 | InvalidateRequired = true; |
1018 | } |
1019 | if (std::accumulate(first: IsCritEdge.begin(), last: IsCritEdge.end(), init: 0)) { |
1020 | LLVM_DEBUG({ |
1021 | dbgs() << "Now detected critical edges in the following frontier:\n" ; |
1022 | for (ProgramPoint &PP : Frontier) { |
1023 | if (PP.isBB()) { |
1024 | dbgs() << " BB: " << PP.getBB()->getName() << "\n" ; |
1025 | } else { |
1026 | dbgs() << " Inst: " ; |
1027 | PP.getInst()->dump(); |
1028 | } |
1029 | } |
1030 | }); |
1031 | splitFrontierCritEdges(Func: &BF, Frontier, IsCritEdge, From: CritEdgesFrom, |
1032 | To: CritEdgesTo); |
1033 | InvalidateRequired = true; |
1034 | } |
1035 | if (InvalidateRequired) { |
1036 | // BitVectors that represent all insns of the function are invalid now |
1037 | // since we changed BBs/Insts. Re-run steps that depend on pointers being |
1038 | // valid |
1039 | Info.invalidateAll(); |
1040 | classifyCSRUses(); |
1041 | } |
1042 | return Frontier; |
1043 | } |
1044 | |
1045 | bool ShrinkWrapping::validatePushPopsMode(unsigned CSR, MCInst *BestPosSave, |
1046 | int64_t SaveOffset) { |
1047 | if (FA.requiresAlignment(Func: BF)) { |
1048 | LLVM_DEBUG({ |
1049 | dbgs() << "Reg " << CSR |
1050 | << " is not using push/pops due to function " |
1051 | "alignment requirements.\n" ; |
1052 | }); |
1053 | return false; |
1054 | } |
1055 | if (FA.hasStackArithmetic(Func: BF)) { |
1056 | LLVM_DEBUG({ |
1057 | dbgs() << "Reg " << CSR |
1058 | << " is not using push/pops due to function " |
1059 | "taking the address of a stack position.\n" ; |
1060 | }); |
1061 | return false; |
1062 | } |
1063 | for (MCInst *Save : CSA.getSavesByReg(Reg: CSR)) { |
1064 | if (!SLM.canCollapseRegion(DeletedPush: Save)) { |
1065 | LLVM_DEBUG(dbgs() << "Reg " << CSR << " cannot collapse region.\n" ); |
1066 | return false; |
1067 | } |
1068 | } |
1069 | // Abort if one of the restores for this CSR is not a POP. |
1070 | for (MCInst *Load : CSA.getRestoresByReg(Reg: CSR)) { |
1071 | if (!BC.MIB->isPop(Inst: *Load)) { |
1072 | LLVM_DEBUG(dbgs() << "Reg " << CSR << " has a mismatching restore.\n" ); |
1073 | return false; |
1074 | } |
1075 | } |
1076 | |
1077 | StackPointerTracking &SPT = Info.getStackPointerTracking(); |
1078 | // Abort if we are inserting a push into an entry BB (offset -8) and this |
1079 | // func sets up a frame pointer. |
1080 | if (!SLM.canInsertRegion(P: BestPosSave) || SaveOffset == SPT.SUPERPOSITION || |
1081 | SaveOffset == SPT.EMPTY || (SaveOffset == -8 && SPT.HasFramePointer)) { |
1082 | LLVM_DEBUG({ |
1083 | dbgs() << "Reg " << CSR |
1084 | << " cannot insert region or we are " |
1085 | "trying to insert a push into entry bb.\n" ; |
1086 | }); |
1087 | return false; |
1088 | } |
1089 | return true; |
1090 | } |
1091 | |
1092 | SmallVector<ProgramPoint, 4> ShrinkWrapping::fixPopsPlacements( |
1093 | const SmallVector<ProgramPoint, 4> &RestorePoints, int64_t SaveOffset, |
1094 | unsigned CSR) { |
1095 | SmallVector<ProgramPoint, 4> FixedRestorePoints = RestorePoints; |
1096 | // Moving pop locations to the correct sp offset |
1097 | ReachingInsns<true> &RI = Info.getReachingInsnsBackwards(); |
1098 | StackPointerTracking &SPT = Info.getStackPointerTracking(); |
1099 | for (ProgramPoint &PP : FixedRestorePoints) { |
1100 | BinaryBasicBlock *BB = Info.getParentBB(PP); |
1101 | bool Found = false; |
1102 | if (SPT.getStateAt(Point: ProgramPoint::getLastPointAt(BB&: *BB))->first == |
1103 | SaveOffset) { |
1104 | BitVector BV = *RI.getStateAt(Point: ProgramPoint::getLastPointAt(BB&: *BB)); |
1105 | BV &= UsesByReg[CSR]; |
1106 | if (!BV.any()) { |
1107 | Found = true; |
1108 | PP = BB; |
1109 | continue; |
1110 | } |
1111 | } |
1112 | for (MCInst &Inst : llvm::reverse(C&: *BB)) { |
1113 | if (SPT.getStateBefore(Point: Inst)->first == SaveOffset) { |
1114 | BitVector BV = *RI.getStateAt(Point: Inst); |
1115 | BV &= UsesByReg[CSR]; |
1116 | if (!BV.any()) { |
1117 | Found = true; |
1118 | PP = &Inst; |
1119 | break; |
1120 | } |
1121 | } |
1122 | } |
1123 | if (!Found) { |
1124 | LLVM_DEBUG({ |
1125 | dbgs() << "Could not find restore insertion point for " << CSR |
1126 | << ", falling back to load/store mode\n" ; |
1127 | }); |
1128 | FixedRestorePoints.clear(); |
1129 | return FixedRestorePoints; |
1130 | } |
1131 | } |
1132 | return FixedRestorePoints; |
1133 | } |
1134 | |
1135 | void ShrinkWrapping::scheduleOldSaveRestoresRemoval(unsigned CSR, |
1136 | bool UsePushPops) { |
1137 | |
1138 | for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { |
1139 | std::vector<MCInst *> CFIs; |
1140 | for (MCInst &Inst : llvm::reverse(C&: *BB)) { |
1141 | if (BC.MIB->isCFI(Inst)) { |
1142 | // Delete all offset CFIs related to this CSR |
1143 | if (SLM.getOffsetCFIReg(Inst) == CSR) { |
1144 | HasDeletedOffsetCFIs[CSR] = true; |
1145 | scheduleChange(PP: &Inst, Item: WorklistItem(WorklistItem::Erase, CSR)); |
1146 | continue; |
1147 | } |
1148 | CFIs.push_back(x: &Inst); |
1149 | continue; |
1150 | } |
1151 | |
1152 | uint16_t SavedReg = CSA.getSavedReg(Inst); |
1153 | uint16_t RestoredReg = CSA.getRestoredReg(Inst); |
1154 | if (SavedReg != CSR && RestoredReg != CSR) { |
1155 | CFIs.clear(); |
1156 | continue; |
1157 | } |
1158 | |
1159 | scheduleChange(PP: &Inst, Item: WorklistItem(UsePushPops |
1160 | ? WorklistItem::Erase |
1161 | : WorklistItem::ChangeToAdjustment, |
1162 | CSR)); |
1163 | |
1164 | // Delete associated CFIs |
1165 | const bool RecordDeletedPushCFIs = |
1166 | SavedReg == CSR && DeletedPushCFIs[CSR].empty(); |
1167 | const bool RecordDeletedPopCFIs = |
1168 | RestoredReg == CSR && DeletedPopCFIs[CSR].empty(); |
1169 | for (MCInst *CFI : CFIs) { |
1170 | const MCCFIInstruction *MCCFI = BF.getCFIFor(Instr: *CFI); |
1171 | // Do not touch these... |
1172 | if (MCCFI->getOperation() == MCCFIInstruction::OpRestoreState || |
1173 | MCCFI->getOperation() == MCCFIInstruction::OpRememberState) |
1174 | continue; |
1175 | scheduleChange(PP: CFI, Item: WorklistItem(WorklistItem::Erase, CSR)); |
1176 | if (RecordDeletedPushCFIs) { |
1177 | // Do not record this to be replayed later because we are going to |
1178 | // rebuild it. |
1179 | if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset) |
1180 | continue; |
1181 | DeletedPushCFIs[CSR].push_back(x: CFI->getOperand(i: 0).getImm()); |
1182 | } |
1183 | if (RecordDeletedPopCFIs) { |
1184 | if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset) |
1185 | continue; |
1186 | DeletedPopCFIs[CSR].push_back(x: CFI->getOperand(i: 0).getImm()); |
1187 | } |
1188 | } |
1189 | CFIs.clear(); |
1190 | } |
1191 | } |
1192 | } |
1193 | |
1194 | bool ShrinkWrapping::doesInstUsesCSR(const MCInst &Inst, uint16_t CSR) { |
1195 | if (BC.MIB->isCFI(Inst) || CSA.getSavedReg(Inst) == CSR || |
1196 | CSA.getRestoredReg(Inst) == CSR) |
1197 | return false; |
1198 | BitVector BV = BitVector(BC.MRI->getNumRegs(), false); |
1199 | BC.MIB->getTouchedRegs(Inst, Regs&: BV); |
1200 | return BV[CSR]; |
1201 | } |
1202 | |
1203 | void ShrinkWrapping::scheduleSaveRestoreInsertions( |
1204 | unsigned CSR, MCInst *BestPosSave, |
1205 | SmallVector<ProgramPoint, 4> &RestorePoints, bool UsePushPops) { |
1206 | auto &InsnToBB = Info.getInsnToBBMap(); |
1207 | const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[CSR]; |
1208 | const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[CSR]; |
1209 | assert(FIESave && FIELoad && "Invalid CSR" ); |
1210 | |
1211 | LLVM_DEBUG({ |
1212 | dbgs() << "Scheduling save insertion at: " ; |
1213 | BestPosSave->dump(); |
1214 | }); |
1215 | |
1216 | scheduleChange(PP: BestPosSave, |
1217 | Item: UsePushPops ? WorklistItem::InsertPushOrPop |
1218 | : WorklistItem::InsertLoadOrStore, |
1219 | Item: *FIESave, Item&: CSR); |
1220 | |
1221 | for (ProgramPoint &PP : RestorePoints) { |
1222 | BinaryBasicBlock *FrontierBB = Info.getParentBB(PP); |
1223 | LLVM_DEBUG({ |
1224 | dbgs() << "Scheduling restore insertion at: " ; |
1225 | if (PP.isInst()) |
1226 | PP.getInst()->dump(); |
1227 | else |
1228 | dbgs() << PP.getBB()->getName() << "\n" ; |
1229 | }); |
1230 | MCInst *Term = |
1231 | FrontierBB->getTerminatorBefore(Pos: PP.isInst() ? PP.getInst() : nullptr); |
1232 | if (Term) |
1233 | PP = Term; |
1234 | bool PrecededByPrefix = false; |
1235 | if (PP.isInst()) { |
1236 | auto Iter = FrontierBB->findInstruction(Inst: PP.getInst()); |
1237 | if (Iter != FrontierBB->end() && Iter != FrontierBB->begin()) { |
1238 | --Iter; |
1239 | PrecededByPrefix = BC.MIB->isPrefix(Inst: *Iter); |
1240 | } |
1241 | } |
1242 | if (PP.isInst() && |
1243 | (doesInstUsesCSR(Inst: *PP.getInst(), CSR) || PrecededByPrefix)) { |
1244 | assert(!InsnToBB[PP.getInst()]->hasTerminatorAfter(PP.getInst()) && |
1245 | "cannot move to end of bb" ); |
1246 | scheduleChange(PP: InsnToBB[PP.getInst()], |
1247 | Item: UsePushPops ? WorklistItem::InsertPushOrPop |
1248 | : WorklistItem::InsertLoadOrStore, |
1249 | Item: *FIELoad, Item&: CSR); |
1250 | continue; |
1251 | } |
1252 | scheduleChange(PP, |
1253 | Item: UsePushPops ? WorklistItem::InsertPushOrPop |
1254 | : WorklistItem::InsertLoadOrStore, |
1255 | Item: *FIELoad, Item&: CSR); |
1256 | } |
1257 | } |
1258 | |
1259 | void ShrinkWrapping::moveSaveRestores() { |
1260 | bool DisablePushPopMode = false; |
1261 | bool UsedPushPopMode = false; |
1262 | // Keeps info about successfully moved regs: reg index, save position and |
1263 | // save size |
1264 | std::vector<std::tuple<unsigned, MCInst *, size_t>> MovedRegs; |
1265 | uint64_t TotalEstimatedWin = 0; |
1266 | |
1267 | computeDomOrder(); |
1268 | for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { |
1269 | MCInst *BestPosSave = nullptr; |
1270 | uint64_t EstimatedWin = 0; |
1271 | SmallVector<ProgramPoint, 4> RestorePoints; |
1272 | while (RestorePoints.empty() && |
1273 | isBestSavePosCold(CSR: I, BestPosSave, TotalEstimatedWin&: EstimatedWin)) { |
1274 | RestorePoints = doRestorePlacement(BestPosSave, CSR: I, TotalEstimatedWin: EstimatedWin); |
1275 | if (RestorePoints.empty()) { |
1276 | LLVM_DEBUG({ |
1277 | dbgs() << "Dropping opportunity because restore placement failed" |
1278 | " -- total est. freq reduc: " |
1279 | << EstimatedWin << ". Will try " |
1280 | << (BestSaveCount[I].size() - 1) << " more times.\n" ; |
1281 | }); |
1282 | BestSaveCount[I].pop_back(); |
1283 | BestSavePos[I].pop_back(); |
1284 | computeDomOrder(); |
1285 | } |
1286 | } |
1287 | if (RestorePoints.empty()) { |
1288 | SpillsFailedDynamicCount += EstimatedWin; |
1289 | continue; |
1290 | } |
1291 | |
1292 | const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[I]; |
1293 | const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[I]; |
1294 | (void)FIELoad; |
1295 | assert(FIESave && FIELoad); |
1296 | StackPointerTracking &SPT = Info.getStackPointerTracking(); |
1297 | const std::pair<int, int> SPFP = *SPT.getStateBefore(Point: *BestPosSave); |
1298 | int SaveOffset = SPFP.first; |
1299 | uint8_t SaveSize = FIESave->Size; |
1300 | |
1301 | // If we don't know stack state at this point, bail |
1302 | if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) && |
1303 | (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY)) { |
1304 | SpillsFailedDynamicCount += EstimatedWin; |
1305 | continue; |
1306 | } |
1307 | |
1308 | // Operation mode: if true, will insert push/pops instead of loads/restores |
1309 | bool UsePushPops = validatePushPopsMode(CSR: I, BestPosSave, SaveOffset); |
1310 | |
1311 | if (UsePushPops) { |
1312 | SmallVector<ProgramPoint, 4> FixedRestorePoints = |
1313 | fixPopsPlacements(RestorePoints, SaveOffset, CSR: I); |
1314 | if (FixedRestorePoints.empty()) |
1315 | UsePushPops = false; |
1316 | else |
1317 | RestorePoints = FixedRestorePoints; |
1318 | } |
1319 | |
1320 | // Disable push-pop mode for all CSRs in this function |
1321 | if (!UsePushPops) |
1322 | DisablePushPopMode = true; |
1323 | else |
1324 | UsedPushPopMode = true; |
1325 | |
1326 | scheduleOldSaveRestoresRemoval(CSR: I, UsePushPops); |
1327 | scheduleSaveRestoreInsertions(CSR: I, BestPosSave, RestorePoints, UsePushPops); |
1328 | MovedRegs.emplace_back(args: std::make_tuple(args&: I, args&: BestPosSave, args&: SaveSize)); |
1329 | TotalEstimatedWin += EstimatedWin; |
1330 | } |
1331 | |
1332 | // Revert push-pop mode if it failed for a single CSR |
1333 | if (DisablePushPopMode && UsedPushPopMode) { |
1334 | UsedPushPopMode = false; |
1335 | for (BinaryBasicBlock &BB : BF) { |
1336 | auto WRI = Todo.find(Val: &BB); |
1337 | if (WRI != Todo.end()) { |
1338 | std::vector<WorklistItem> &TodoList = WRI->second; |
1339 | for (WorklistItem &Item : TodoList) |
1340 | if (Item.Action == WorklistItem::InsertPushOrPop) |
1341 | Item.Action = WorklistItem::InsertLoadOrStore; |
1342 | } |
1343 | for (MCInst &Inst : llvm::reverse(C&: BB)) { |
1344 | auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>( |
1345 | Inst, Index: getAnnotationIndex()); |
1346 | if (!TodoList) |
1347 | continue; |
1348 | bool isCFI = BC.MIB->isCFI(Inst); |
1349 | for (WorklistItem &Item : *TodoList) { |
1350 | if (Item.Action == WorklistItem::InsertPushOrPop) |
1351 | Item.Action = WorklistItem::InsertLoadOrStore; |
1352 | if (!isCFI && Item.Action == WorklistItem::Erase) |
1353 | Item.Action = WorklistItem::ChangeToAdjustment; |
1354 | } |
1355 | } |
1356 | } |
1357 | } |
1358 | SpillsMovedDynamicCount += TotalEstimatedWin; |
1359 | |
1360 | // Update statistics |
1361 | if (!UsedPushPopMode) { |
1362 | SpillsMovedRegularMode += MovedRegs.size(); |
1363 | return; |
1364 | } |
1365 | |
1366 | // Schedule modifications to stack-accessing instructions via |
1367 | // StackLayoutModifier. |
1368 | SpillsMovedPushPopMode += MovedRegs.size(); |
1369 | for (std::tuple<unsigned, MCInst *, size_t> &I : MovedRegs) { |
1370 | unsigned RegNdx; |
1371 | MCInst *SavePos; |
1372 | size_t SaveSize; |
1373 | std::tie(args&: RegNdx, args&: SavePos, args&: SaveSize) = I; |
1374 | for (MCInst *Save : CSA.getSavesByReg(Reg: RegNdx)) |
1375 | SLM.collapseRegion(DeletedPush: Save); |
1376 | SLM.insertRegion(P: SavePos, RegionSz: SaveSize); |
1377 | } |
1378 | } |
1379 | |
1380 | namespace { |
1381 | /// Helper function to identify whether two basic blocks created by splitting |
1382 | /// a critical edge have the same contents. |
1383 | bool isIdenticalSplitEdgeBB(const BinaryContext &BC, const BinaryBasicBlock &A, |
1384 | const BinaryBasicBlock &B) { |
1385 | if (A.succ_size() != B.succ_size()) |
1386 | return false; |
1387 | if (A.succ_size() != 1) |
1388 | return false; |
1389 | |
1390 | if (*A.succ_begin() != *B.succ_begin()) |
1391 | return false; |
1392 | |
1393 | if (A.size() != B.size()) |
1394 | return false; |
1395 | |
1396 | // Compare instructions |
1397 | auto I = A.begin(), E = A.end(); |
1398 | auto OtherI = B.begin(), OtherE = B.end(); |
1399 | while (I != E && OtherI != OtherE) { |
1400 | if (I->getOpcode() != OtherI->getOpcode()) |
1401 | return false; |
1402 | if (!BC.MIB->equals(A: *I, B: *OtherI, Comp: [](const MCSymbol *A, const MCSymbol *B) { |
1403 | return true; |
1404 | })) |
1405 | return false; |
1406 | ++I; |
1407 | ++OtherI; |
1408 | } |
1409 | return true; |
1410 | } |
1411 | } // namespace |
1412 | |
1413 | bool ShrinkWrapping::foldIdenticalSplitEdges() { |
1414 | bool Changed = false; |
1415 | for (auto Iter = BF.begin(); Iter != BF.end(); ++Iter) { |
1416 | BinaryBasicBlock &BB = *Iter; |
1417 | if (!BB.getName().starts_with(Prefix: ".LSplitEdge" )) |
1418 | continue; |
1419 | for (BinaryBasicBlock &RBB : llvm::reverse(C&: BF)) { |
1420 | if (&RBB == &BB) |
1421 | break; |
1422 | if (!RBB.getName().starts_with(Prefix: ".LSplitEdge" ) || !RBB.isValid() || |
1423 | !isIdenticalSplitEdgeBB(BC, A: *Iter, B: RBB)) |
1424 | continue; |
1425 | assert(RBB.pred_size() == 1 && "Invalid split edge BB" ); |
1426 | BinaryBasicBlock *Pred = *RBB.pred_begin(); |
1427 | uint64_t OrigCount = Pred->branch_info_begin()->Count; |
1428 | uint64_t OrigMispreds = Pred->branch_info_begin()->MispredictedCount; |
1429 | BF.replaceJumpTableEntryIn(BB: Pred, OldDest: &RBB, NewDest: &BB); |
1430 | Pred->replaceSuccessor(Succ: &RBB, NewSucc: &BB, Count: OrigCount, MispredictedCount: OrigMispreds); |
1431 | Changed = true; |
1432 | // Remove the block from CFG |
1433 | RBB.markValid(Valid: false); |
1434 | } |
1435 | } |
1436 | |
1437 | return Changed; |
1438 | } |
1439 | |
1440 | namespace { |
1441 | |
1442 | // A special StackPointerTracking that compensates for our future plans |
1443 | // in removing/adding insn. |
1444 | class PredictiveStackPointerTracking |
1445 | : public StackPointerTrackingBase<PredictiveStackPointerTracking> { |
1446 | friend class DataflowAnalysis<PredictiveStackPointerTracking, |
1447 | std::pair<int, int>>; |
1448 | decltype(ShrinkWrapping::Todo) &TodoMap; |
1449 | DataflowInfoManager &Info; |
1450 | |
1451 | std::optional<unsigned> AnnotationIndex; |
1452 | |
1453 | protected: |
1454 | void compNextAux(const MCInst &Point, |
1455 | const std::vector<ShrinkWrapping::WorklistItem> &TodoItems, |
1456 | std::pair<int, int> &Res) { |
1457 | for (const ShrinkWrapping::WorklistItem &Item : TodoItems) { |
1458 | if (Item.Action == ShrinkWrapping::WorklistItem::Erase && |
1459 | BC.MIB->isPush(Inst: Point)) { |
1460 | Res.first += BC.MIB->getPushSize(Inst: Point); |
1461 | continue; |
1462 | } |
1463 | if (Item.Action == ShrinkWrapping::WorklistItem::Erase && |
1464 | BC.MIB->isPop(Inst: Point)) { |
1465 | Res.first -= BC.MIB->getPopSize(Inst: Point); |
1466 | continue; |
1467 | } |
1468 | if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop && |
1469 | Item.FIEToInsert.IsStore) { |
1470 | Res.first -= Item.FIEToInsert.Size; |
1471 | continue; |
1472 | } |
1473 | if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop && |
1474 | Item.FIEToInsert.IsLoad) { |
1475 | Res.first += Item.FIEToInsert.Size; |
1476 | continue; |
1477 | } |
1478 | } |
1479 | } |
1480 | |
1481 | std::pair<int, int> computeNext(const MCInst &Point, |
1482 | const std::pair<int, int> &Cur) { |
1483 | std::pair<int, int> Res = |
1484 | StackPointerTrackingBase<PredictiveStackPointerTracking>::computeNext( |
1485 | Point, Cur); |
1486 | if (Res.first == StackPointerTracking::SUPERPOSITION || |
1487 | Res.first == StackPointerTracking::EMPTY) |
1488 | return Res; |
1489 | auto TodoItems = |
1490 | BC.MIB->tryGetAnnotationAs<std::vector<ShrinkWrapping::WorklistItem>>( |
1491 | Inst: Point, Name: ShrinkWrapping::getAnnotationName()); |
1492 | if (TodoItems) |
1493 | compNextAux(Point, TodoItems: *TodoItems, Res); |
1494 | auto &InsnToBBMap = Info.getInsnToBBMap(); |
1495 | if (&*InsnToBBMap[&Point]->rbegin() != &Point) |
1496 | return Res; |
1497 | auto WRI = TodoMap.find(Val: InsnToBBMap[&Point]); |
1498 | if (WRI == TodoMap.end()) |
1499 | return Res; |
1500 | compNextAux(Point, TodoItems: WRI->second, Res); |
1501 | return Res; |
1502 | } |
1503 | |
1504 | StringRef getAnnotationName() const { |
1505 | return StringRef("PredictiveStackPointerTracking" ); |
1506 | } |
1507 | |
1508 | public: |
1509 | PredictiveStackPointerTracking(BinaryFunction &BF, |
1510 | decltype(ShrinkWrapping::Todo) &TodoMap, |
1511 | DataflowInfoManager &Info, |
1512 | MCPlusBuilder::AllocatorIdTy AllocatorId = 0) |
1513 | : StackPointerTrackingBase<PredictiveStackPointerTracking>(BF, |
1514 | AllocatorId), |
1515 | TodoMap(TodoMap), Info(Info) {} |
1516 | |
1517 | void run() { |
1518 | StackPointerTrackingBase<PredictiveStackPointerTracking>::run(); |
1519 | } |
1520 | }; |
1521 | |
1522 | } // end anonymous namespace |
1523 | |
1524 | void ShrinkWrapping::insertUpdatedCFI(unsigned CSR, int SPValPush, |
1525 | int SPValPop) { |
1526 | MCInst *SavePoint = nullptr; |
1527 | for (BinaryBasicBlock &BB : BF) { |
1528 | for (MCInst &Inst : llvm::reverse(C&: BB)) { |
1529 | int32_t SrcImm = 0; |
1530 | MCPhysReg Reg = 0; |
1531 | MCPhysReg StackPtrReg = 0; |
1532 | int64_t StackOffset = 0; |
1533 | bool IsIndexed = false; |
1534 | bool IsLoad = false; |
1535 | bool IsStore = false; |
1536 | bool IsSimple = false; |
1537 | bool IsStoreFromReg = false; |
1538 | uint8_t Size = 0; |
1539 | if (!BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, Reg, |
1540 | SrcImm, StackPtrReg, StackOffset, Size, |
1541 | IsSimple, IsIndexed)) |
1542 | continue; |
1543 | if (Reg != CSR || !IsStore || !IsSimple) |
1544 | continue; |
1545 | SavePoint = &Inst; |
1546 | break; |
1547 | } |
1548 | if (SavePoint) |
1549 | break; |
1550 | } |
1551 | assert(SavePoint); |
1552 | LLVM_DEBUG({ |
1553 | dbgs() << "Now using as save point for reg " << CSR << " :" ; |
1554 | SavePoint->dump(); |
1555 | }); |
1556 | bool PrevAffectedZone = false; |
1557 | BinaryBasicBlock *PrevBB = nullptr; |
1558 | DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); |
1559 | for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { |
1560 | if (BB->size() == 0) |
1561 | continue; |
1562 | const bool InAffectedZoneAtEnd = DA.count(Point: *BB->rbegin(), Expr: *SavePoint); |
1563 | const bool InAffectedZoneAtBegin = |
1564 | (*DA.getStateBefore(Point: *BB->begin()))[DA.ExprToIdx[SavePoint]]; |
1565 | bool InAffectedZone = InAffectedZoneAtBegin; |
1566 | for (auto InstIter = BB->begin(); InstIter != BB->end(); ++InstIter) { |
1567 | const bool CurZone = DA.count(Point: *InstIter, Expr: *SavePoint); |
1568 | if (InAffectedZone != CurZone) { |
1569 | auto InsertionIter = InstIter; |
1570 | ++InsertionIter; |
1571 | InAffectedZone = CurZone; |
1572 | if (InAffectedZone) |
1573 | InstIter = insertCFIsForPushOrPop(BB&: *BB, Pos: InsertionIter, Reg: CSR, IsPush: true, Sz: 0, |
1574 | NewOffset: SPValPop); |
1575 | else |
1576 | InstIter = insertCFIsForPushOrPop(BB&: *BB, Pos: InsertionIter, Reg: CSR, IsPush: false, Sz: 0, |
1577 | NewOffset: SPValPush); |
1578 | --InstIter; |
1579 | } |
1580 | } |
1581 | // Are we at the first basic block or hot-cold split point? |
1582 | if (!PrevBB || (BF.isSplit() && BB->isCold() != PrevBB->isCold())) { |
1583 | if (InAffectedZoneAtBegin) |
1584 | insertCFIsForPushOrPop(BB&: *BB, Pos: BB->begin(), Reg: CSR, IsPush: true, Sz: 0, NewOffset: SPValPush); |
1585 | } else if (InAffectedZoneAtBegin != PrevAffectedZone) { |
1586 | if (InAffectedZoneAtBegin) |
1587 | insertCFIsForPushOrPop(BB&: *PrevBB, Pos: PrevBB->end(), Reg: CSR, IsPush: true, Sz: 0, NewOffset: SPValPush); |
1588 | else |
1589 | insertCFIsForPushOrPop(BB&: *PrevBB, Pos: PrevBB->end(), Reg: CSR, IsPush: false, Sz: 0, NewOffset: SPValPop); |
1590 | } |
1591 | PrevAffectedZone = InAffectedZoneAtEnd; |
1592 | PrevBB = BB; |
1593 | } |
1594 | } |
1595 | |
1596 | void ShrinkWrapping::rebuildCFIForSP() { |
1597 | for (BinaryBasicBlock &BB : BF) { |
1598 | for (MCInst &Inst : BB) { |
1599 | if (!BC.MIB->isCFI(Inst)) |
1600 | continue; |
1601 | const MCCFIInstruction *CFI = BF.getCFIFor(Instr: Inst); |
1602 | if (CFI->getOperation() == MCCFIInstruction::OpDefCfaOffset) |
1603 | BC.MIB->addAnnotation(Inst, Name: "DeleteMe" , Val: 0U, AllocatorId); |
1604 | } |
1605 | } |
1606 | |
1607 | int PrevSPVal = -8; |
1608 | BinaryBasicBlock *PrevBB = nullptr; |
1609 | StackPointerTracking &SPT = Info.getStackPointerTracking(); |
1610 | for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { |
1611 | if (BB->size() == 0) |
1612 | continue; |
1613 | const int SPValAtEnd = SPT.getStateAt(Point: *BB->rbegin())->first; |
1614 | const int SPValAtBegin = SPT.getStateBefore(Point: *BB->begin())->first; |
1615 | int SPVal = SPValAtBegin; |
1616 | for (auto Iter = BB->begin(); Iter != BB->end(); ++Iter) { |
1617 | const int CurVal = SPT.getStateAt(Point: *Iter)->first; |
1618 | if (SPVal != CurVal) { |
1619 | auto InsertionIter = Iter; |
1620 | ++InsertionIter; |
1621 | Iter = BF.addCFIInstruction( |
1622 | BB, Pos: InsertionIter, |
1623 | Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: -CurVal)); |
1624 | SPVal = CurVal; |
1625 | } |
1626 | } |
1627 | if (BF.isSplit() && PrevBB && BB->isCold() != PrevBB->isCold()) |
1628 | BF.addCFIInstruction( |
1629 | BB, Pos: BB->begin(), |
1630 | Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: -SPValAtBegin)); |
1631 | else if (SPValAtBegin != PrevSPVal) |
1632 | BF.addCFIInstruction( |
1633 | BB: PrevBB, Pos: PrevBB->end(), |
1634 | Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: -SPValAtBegin)); |
1635 | PrevSPVal = SPValAtEnd; |
1636 | PrevBB = BB; |
1637 | } |
1638 | |
1639 | for (BinaryBasicBlock &BB : BF) |
1640 | for (auto I = BB.begin(); I != BB.end();) |
1641 | if (BC.MIB->hasAnnotation(Inst: *I, Name: "DeleteMe" )) |
1642 | I = BB.eraseInstruction(II: I); |
1643 | else |
1644 | ++I; |
1645 | } |
1646 | |
1647 | Expected<MCInst> ShrinkWrapping::createStackAccess(int SPVal, int FPVal, |
1648 | const FrameIndexEntry &FIE, |
1649 | bool CreatePushOrPop) { |
1650 | MCInst NewInst; |
1651 | if (SPVal != StackPointerTracking::SUPERPOSITION && |
1652 | SPVal != StackPointerTracking::EMPTY) { |
1653 | if (FIE.IsLoad) { |
1654 | BC.MIB->createRestoreFromStack(Inst&: NewInst, StackReg: BC.MIB->getStackPointer(), |
1655 | Offset: FIE.StackOffset - SPVal, DstReg: FIE.RegOrImm, |
1656 | Size: FIE.Size); |
1657 | } else { |
1658 | BC.MIB->createSaveToStack(Inst&: NewInst, StackReg: BC.MIB->getStackPointer(), |
1659 | Offset: FIE.StackOffset - SPVal, SrcReg: FIE.RegOrImm, |
1660 | Size: FIE.Size); |
1661 | } |
1662 | if (CreatePushOrPop) |
1663 | BC.MIB->changeToPushOrPop(Inst&: NewInst); |
1664 | return NewInst; |
1665 | } |
1666 | assert(FPVal != StackPointerTracking::SUPERPOSITION && |
1667 | FPVal != StackPointerTracking::EMPTY); |
1668 | |
1669 | if (FIE.IsLoad) { |
1670 | BC.MIB->createRestoreFromStack(Inst&: NewInst, StackReg: BC.MIB->getFramePointer(), |
1671 | Offset: FIE.StackOffset - FPVal, DstReg: FIE.RegOrImm, |
1672 | Size: FIE.Size); |
1673 | } else { |
1674 | BC.MIB->createSaveToStack(Inst&: NewInst, StackReg: BC.MIB->getFramePointer(), |
1675 | Offset: FIE.StackOffset - FPVal, SrcReg: FIE.RegOrImm, Size: FIE.Size); |
1676 | } |
1677 | return NewInst; |
1678 | } |
1679 | |
1680 | void ShrinkWrapping::updateCFIInstOffset(MCInst &Inst, int64_t NewOffset) { |
1681 | const MCCFIInstruction *CFI = BF.getCFIFor(Instr: Inst); |
1682 | if (UpdatedCFIs.count(Ptr: CFI)) |
1683 | return; |
1684 | |
1685 | switch (CFI->getOperation()) { |
1686 | case MCCFIInstruction::OpDefCfa: |
1687 | case MCCFIInstruction::OpDefCfaRegister: |
1688 | case MCCFIInstruction::OpDefCfaOffset: |
1689 | CFI = BF.mutateCFIOffsetFor(Instr: Inst, NewOffset: -NewOffset); |
1690 | break; |
1691 | case MCCFIInstruction::OpOffset: |
1692 | default: |
1693 | break; |
1694 | } |
1695 | |
1696 | UpdatedCFIs.insert(Ptr: CFI); |
1697 | } |
1698 | |
1699 | BBIterTy ShrinkWrapping::insertCFIsForPushOrPop(BinaryBasicBlock &BB, |
1700 | BBIterTy Pos, unsigned Reg, |
1701 | bool isPush, int Sz, |
1702 | int64_t NewOffset) { |
1703 | if (isPush) { |
1704 | for (uint32_t Idx : DeletedPushCFIs[Reg]) { |
1705 | Pos = BF.addCFIPseudo(BB: &BB, Pos, Offset: Idx); |
1706 | updateCFIInstOffset(Inst&: *Pos++, NewOffset); |
1707 | } |
1708 | if (HasDeletedOffsetCFIs[Reg]) { |
1709 | Pos = BF.addCFIInstruction( |
1710 | BB: &BB, Pos, |
1711 | Inst: MCCFIInstruction::createOffset( |
1712 | L: nullptr, Register: BC.MRI->getDwarfRegNum(RegNum: Reg, isEH: false), Offset: NewOffset)); |
1713 | ++Pos; |
1714 | } |
1715 | } else { |
1716 | for (uint32_t Idx : DeletedPopCFIs[Reg]) { |
1717 | Pos = BF.addCFIPseudo(BB: &BB, Pos, Offset: Idx); |
1718 | updateCFIInstOffset(Inst&: *Pos++, NewOffset); |
1719 | } |
1720 | if (HasDeletedOffsetCFIs[Reg]) { |
1721 | Pos = BF.addCFIInstruction( |
1722 | BB: &BB, Pos, |
1723 | Inst: MCCFIInstruction::createSameValue( |
1724 | L: nullptr, Register: BC.MRI->getDwarfRegNum(RegNum: Reg, isEH: false))); |
1725 | ++Pos; |
1726 | } |
1727 | } |
1728 | return Pos; |
1729 | } |
1730 | |
1731 | Expected<BBIterTy> ShrinkWrapping::processInsertion(BBIterTy InsertionPoint, |
1732 | BinaryBasicBlock *CurBB, |
1733 | const WorklistItem &Item, |
1734 | int64_t SPVal, |
1735 | int64_t FPVal) { |
1736 | // Trigger CFI reconstruction for this CSR if necessary - writing to |
1737 | // PushOffsetByReg/PopOffsetByReg *will* trigger CFI update |
1738 | if ((Item.FIEToInsert.IsStore && |
1739 | !DeletedPushCFIs[Item.AffectedReg].empty()) || |
1740 | (Item.FIEToInsert.IsLoad && !DeletedPopCFIs[Item.AffectedReg].empty()) || |
1741 | HasDeletedOffsetCFIs[Item.AffectedReg]) { |
1742 | if (Item.Action == WorklistItem::InsertPushOrPop) { |
1743 | if (Item.FIEToInsert.IsStore) |
1744 | PushOffsetByReg[Item.AffectedReg] = SPVal - Item.FIEToInsert.Size; |
1745 | else |
1746 | PopOffsetByReg[Item.AffectedReg] = SPVal; |
1747 | } else { |
1748 | if (Item.FIEToInsert.IsStore) |
1749 | PushOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset; |
1750 | else |
1751 | PopOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset; |
1752 | } |
1753 | } |
1754 | |
1755 | LLVM_DEBUG({ |
1756 | dbgs() << "Creating stack access with SPVal = " << SPVal |
1757 | << "; stack offset = " << Item.FIEToInsert.StackOffset |
1758 | << " Is push = " << (Item.Action == WorklistItem::InsertPushOrPop) |
1759 | << "\n" ; |
1760 | }); |
1761 | Expected<MCInst> NewInstOrErr = |
1762 | createStackAccess(SPVal, FPVal, FIE: Item.FIEToInsert, |
1763 | CreatePushOrPop: Item.Action == WorklistItem::InsertPushOrPop); |
1764 | if (auto E = NewInstOrErr.takeError()) |
1765 | return Error(std::move(E)); |
1766 | MCInst &NewInst = *NewInstOrErr; |
1767 | if (InsertionPoint != CurBB->end()) { |
1768 | LLVM_DEBUG({ |
1769 | dbgs() << "Adding before Inst: " ; |
1770 | InsertionPoint->dump(); |
1771 | dbgs() << "the following inst: " ; |
1772 | NewInst.dump(); |
1773 | }); |
1774 | BBIterTy Iter = |
1775 | CurBB->insertInstruction(At: InsertionPoint, NewInst: std::move(NewInst)); |
1776 | return ++Iter; |
1777 | } |
1778 | CurBB->addInstruction(Inst: std::move(NewInst)); |
1779 | LLVM_DEBUG(dbgs() << "Adding to BB!\n" ); |
1780 | return CurBB->end(); |
1781 | } |
1782 | |
1783 | Expected<BBIterTy> ShrinkWrapping::processInsertionsList( |
1784 | BBIterTy InsertionPoint, BinaryBasicBlock *CurBB, |
1785 | std::vector<WorklistItem> &TodoList, int64_t SPVal, int64_t FPVal) { |
1786 | bool HasInsertions = llvm::any_of(Range&: TodoList, P: [&](WorklistItem &Item) { |
1787 | return Item.Action == WorklistItem::InsertLoadOrStore || |
1788 | Item.Action == WorklistItem::InsertPushOrPop; |
1789 | }); |
1790 | |
1791 | if (!HasInsertions) |
1792 | return InsertionPoint; |
1793 | |
1794 | assert(((SPVal != StackPointerTracking::SUPERPOSITION && |
1795 | SPVal != StackPointerTracking::EMPTY) || |
1796 | (FPVal != StackPointerTracking::SUPERPOSITION && |
1797 | FPVal != StackPointerTracking::EMPTY)) && |
1798 | "Cannot insert if we have no idea of the stack state here" ); |
1799 | |
1800 | // Revert the effect of PSPT for this location, we want SP Value before |
1801 | // insertions |
1802 | if (InsertionPoint == CurBB->end()) { |
1803 | for (WorklistItem &Item : TodoList) { |
1804 | if (Item.Action != WorklistItem::InsertPushOrPop) |
1805 | continue; |
1806 | if (Item.FIEToInsert.IsStore) |
1807 | SPVal += Item.FIEToInsert.Size; |
1808 | if (Item.FIEToInsert.IsLoad) |
1809 | SPVal -= Item.FIEToInsert.Size; |
1810 | } |
1811 | } |
1812 | |
1813 | // Reorder POPs to obey the correct dominance relation between them |
1814 | llvm::stable_sort(Range&: TodoList, C: [&](const WorklistItem &A, |
1815 | const WorklistItem &B) { |
1816 | if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad) && |
1817 | (B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad)) |
1818 | return false; |
1819 | if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad)) |
1820 | return true; |
1821 | if ((B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad)) |
1822 | return false; |
1823 | return DomOrder[B.AffectedReg] < DomOrder[A.AffectedReg]; |
1824 | }); |
1825 | |
1826 | // Process insertions |
1827 | for (WorklistItem &Item : TodoList) { |
1828 | if (Item.Action == WorklistItem::Erase || |
1829 | Item.Action == WorklistItem::ChangeToAdjustment) |
1830 | continue; |
1831 | |
1832 | auto InsertionPointOrErr = |
1833 | processInsertion(InsertionPoint, CurBB, Item, SPVal, FPVal); |
1834 | if (auto E = InsertionPointOrErr.takeError()) |
1835 | return Error(std::move(E)); |
1836 | InsertionPoint = *InsertionPointOrErr; |
1837 | if (Item.Action == WorklistItem::InsertPushOrPop && |
1838 | Item.FIEToInsert.IsStore) |
1839 | SPVal -= Item.FIEToInsert.Size; |
1840 | if (Item.Action == WorklistItem::InsertPushOrPop && |
1841 | Item.FIEToInsert.IsLoad) |
1842 | SPVal += Item.FIEToInsert.Size; |
1843 | } |
1844 | return InsertionPoint; |
1845 | } |
1846 | |
1847 | Expected<bool> ShrinkWrapping::processInsertions() { |
1848 | PredictiveStackPointerTracking PSPT(BF, Todo, Info, AllocatorId); |
1849 | PSPT.run(); |
1850 | |
1851 | bool Changes = false; |
1852 | for (BinaryBasicBlock &BB : BF) { |
1853 | // Process insertions before some inst. |
1854 | for (auto I = BB.begin(); I != BB.end(); ++I) { |
1855 | MCInst &Inst = *I; |
1856 | auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>( |
1857 | Inst, Index: getAnnotationIndex()); |
1858 | if (!TodoList) |
1859 | continue; |
1860 | Changes = true; |
1861 | std::vector<WorklistItem> List = *TodoList; |
1862 | LLVM_DEBUG({ |
1863 | dbgs() << "Now processing insertions in " << BB.getName() |
1864 | << " before inst: " ; |
1865 | Inst.dump(); |
1866 | }); |
1867 | auto Iter = I; |
1868 | std::pair<int, int> SPTState = |
1869 | *PSPT.getStateAt(Point: Iter == BB.begin() ? (ProgramPoint)&BB : &*(--Iter)); |
1870 | auto IterOrErr = |
1871 | processInsertionsList(InsertionPoint: I, CurBB: &BB, TodoList&: List, SPVal: SPTState.first, FPVal: SPTState.second); |
1872 | if (auto E = IterOrErr.takeError()) |
1873 | return Error(std::move(E)); |
1874 | I = *IterOrErr; |
1875 | } |
1876 | // Process insertions at the end of bb |
1877 | auto WRI = Todo.find(Val: &BB); |
1878 | if (WRI != Todo.end()) { |
1879 | std::pair<int, int> SPTState = *PSPT.getStateAt(Point: *BB.rbegin()); |
1880 | if (auto E = processInsertionsList(InsertionPoint: BB.end(), CurBB: &BB, TodoList&: WRI->second, |
1881 | SPVal: SPTState.first, FPVal: SPTState.second) |
1882 | .takeError()) |
1883 | return Error(std::move(E)); |
1884 | Changes = true; |
1885 | } |
1886 | } |
1887 | return Changes; |
1888 | } |
1889 | |
1890 | void ShrinkWrapping::processDeletions() { |
1891 | LivenessAnalysis &LA = Info.getLivenessAnalysis(); |
1892 | for (BinaryBasicBlock &BB : BF) { |
1893 | for (auto II = BB.begin(); II != BB.end(); ++II) { |
1894 | MCInst &Inst = *II; |
1895 | auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>( |
1896 | Inst, Index: getAnnotationIndex()); |
1897 | if (!TodoList) |
1898 | continue; |
1899 | // Process all deletions |
1900 | for (WorklistItem &Item : *TodoList) { |
1901 | if (Item.Action != WorklistItem::Erase && |
1902 | Item.Action != WorklistItem::ChangeToAdjustment) |
1903 | continue; |
1904 | |
1905 | if (Item.Action == WorklistItem::ChangeToAdjustment) { |
1906 | // Is flag reg alive across this func? |
1907 | bool DontClobberFlags = LA.isAlive(PP: &Inst, Reg: BC.MIB->getFlagsReg()); |
1908 | if (int Sz = BC.MIB->getPushSize(Inst)) { |
1909 | BC.MIB->createStackPointerIncrement(Inst, Size: Sz, NoFlagsClobber: DontClobberFlags); |
1910 | continue; |
1911 | } |
1912 | if (int Sz = BC.MIB->getPopSize(Inst)) { |
1913 | BC.MIB->createStackPointerDecrement(Inst, Size: Sz, NoFlagsClobber: DontClobberFlags); |
1914 | continue; |
1915 | } |
1916 | } |
1917 | |
1918 | LLVM_DEBUG({ |
1919 | dbgs() << "Erasing: " ; |
1920 | BC.printInstruction(dbgs(), Inst); |
1921 | }); |
1922 | II = std::prev(x: BB.eraseInstruction(II)); |
1923 | break; |
1924 | } |
1925 | } |
1926 | } |
1927 | } |
1928 | |
1929 | void ShrinkWrapping::rebuildCFI() { |
1930 | const bool FP = Info.getStackPointerTracking().HasFramePointer; |
1931 | Info.invalidateAll(); |
1932 | if (!FP) { |
1933 | rebuildCFIForSP(); |
1934 | Info.invalidateAll(); |
1935 | } |
1936 | for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { |
1937 | if (PushOffsetByReg[I] == 0 || PopOffsetByReg[I] == 0) |
1938 | continue; |
1939 | const int64_t SPValPush = PushOffsetByReg[I]; |
1940 | const int64_t SPValPop = PopOffsetByReg[I]; |
1941 | insertUpdatedCFI(CSR: I, SPValPush, SPValPop); |
1942 | Info.invalidateAll(); |
1943 | } |
1944 | } |
1945 | |
1946 | Expected<bool> ShrinkWrapping::perform(bool HotOnly) { |
1947 | HasDeletedOffsetCFIs = BitVector(BC.MRI->getNumRegs(), false); |
1948 | PushOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL); |
1949 | PopOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL); |
1950 | |
1951 | // Update pass statistics |
1952 | uint64_t TotalInstrs = 0ULL; |
1953 | uint64_t TotalStoreInstrs = 0ULL; |
1954 | for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { |
1955 | uint64_t BBExecCount = BB->getExecutionCount(); |
1956 | if (!BBExecCount || BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE) |
1957 | continue; |
1958 | for (const auto &Instr : *BB) { |
1959 | if (BC.MIB->isPseudo(Inst: Instr)) |
1960 | continue; |
1961 | if (BC.MIB->mayStore(Inst: Instr)) |
1962 | TotalStoreInstrs += BBExecCount; |
1963 | TotalInstrs += BBExecCount; |
1964 | } |
1965 | } |
1966 | InstrDynamicCount += TotalInstrs; |
1967 | StoreDynamicCount += TotalStoreInstrs; |
1968 | |
1969 | if (!FA.hasFrameInfo(Func: BF)) |
1970 | return false; |
1971 | |
1972 | if (HotOnly && (BF.getKnownExecutionCount() < BC.getHotThreshold())) |
1973 | return false; |
1974 | |
1975 | if (opts::EqualizeBBCounts) |
1976 | equalizeBBCounts(Info, BF); |
1977 | |
1978 | if (BF.checkForAmbiguousJumpTables()) { |
1979 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ambiguous JTs in " << BF.getPrintName() |
1980 | << ".\n" ); |
1981 | // We could call disambiguateJumpTables here, but it is probably not worth |
1982 | // the cost (of duplicating potentially large jump tables that could regress |
1983 | // dcache misses). Moreover, ambiguous JTs are rare and coming from code |
1984 | // written in assembly language. Just bail. |
1985 | return false; |
1986 | } |
1987 | SLM.initialize(); |
1988 | CSA.compute(); |
1989 | classifyCSRUses(); |
1990 | pruneUnwantedCSRs(); |
1991 | computeSaveLocations(); |
1992 | moveSaveRestores(); |
1993 | LLVM_DEBUG({ |
1994 | dbgs() << "Func before shrink-wrapping: \n" ; |
1995 | BF.dump(); |
1996 | }); |
1997 | SLM.performChanges(); |
1998 | // Early exit if processInsertions doesn't detect any todo items |
1999 | auto ModifiedOrErr = processInsertions(); |
2000 | if (auto E = ModifiedOrErr.takeError()) |
2001 | return Error(std::move(E)); |
2002 | const bool Modified = *ModifiedOrErr; |
2003 | if (!Modified) |
2004 | return false; |
2005 | processDeletions(); |
2006 | if (foldIdenticalSplitEdges()) { |
2007 | const std::pair<unsigned, uint64_t> Stats = BF.eraseInvalidBBs(); |
2008 | (void)Stats; |
2009 | LLVM_DEBUG(dbgs() << "Deleted " << Stats.first |
2010 | << " redundant split edge BBs (" << Stats.second |
2011 | << " bytes) for " << BF.getPrintName() << "\n" ); |
2012 | } |
2013 | rebuildCFI(); |
2014 | // We may have split edges, creating BBs that need correct branching |
2015 | BF.fixBranches(); |
2016 | LLVM_DEBUG({ |
2017 | dbgs() << "Func after shrink-wrapping: \n" ; |
2018 | BF.dump(); |
2019 | }); |
2020 | return true; |
2021 | } |
2022 | |
2023 | void ShrinkWrapping::printStats(BinaryContext &BC) { |
2024 | BC.outs() << "BOLT-INFO: Shrink wrapping moved " << SpillsMovedRegularMode |
2025 | << " spills inserting load/stores and " << SpillsMovedPushPopMode |
2026 | << " spills inserting push/pops\n" ; |
2027 | if (!InstrDynamicCount || !StoreDynamicCount) |
2028 | return; |
2029 | BC.outs() << "BOLT-INFO: Shrink wrapping reduced " << SpillsMovedDynamicCount |
2030 | << " store executions (" |
2031 | << format(Fmt: "%.1lf%%" , |
2032 | Vals: (100.0 * SpillsMovedDynamicCount / InstrDynamicCount)) |
2033 | << " total instructions executed, " |
2034 | << format(Fmt: "%.1lf%%" , |
2035 | Vals: (100.0 * SpillsMovedDynamicCount / StoreDynamicCount)) |
2036 | << " store instructions)\n" ; |
2037 | BC.outs() << "BOLT-INFO: Shrink wrapping failed at reducing " |
2038 | << SpillsFailedDynamicCount << " store executions (" |
2039 | << format(Fmt: "%.1lf%%" , |
2040 | Vals: (100.0 * SpillsFailedDynamicCount / InstrDynamicCount)) |
2041 | << " total instructions executed, " |
2042 | << format(Fmt: "%.1lf%%" , |
2043 | Vals: (100.0 * SpillsFailedDynamicCount / StoreDynamicCount)) |
2044 | << " store instructions)\n" ; |
2045 | } |
2046 | |
2047 | // Operators necessary as a result of using MCAnnotation |
2048 | raw_ostream &operator<<(raw_ostream &OS, |
2049 | const std::vector<ShrinkWrapping::WorklistItem> &Vec) { |
2050 | OS << "SWTodo[" ; |
2051 | const char *Sep = "" ; |
2052 | for (const ShrinkWrapping::WorklistItem &Item : Vec) { |
2053 | OS << Sep; |
2054 | switch (Item.Action) { |
2055 | case ShrinkWrapping::WorklistItem::Erase: |
2056 | OS << "Erase" ; |
2057 | break; |
2058 | case ShrinkWrapping::WorklistItem::ChangeToAdjustment: |
2059 | OS << "ChangeToAdjustment" ; |
2060 | break; |
2061 | case ShrinkWrapping::WorklistItem::InsertLoadOrStore: |
2062 | OS << "InsertLoadOrStore" ; |
2063 | break; |
2064 | case ShrinkWrapping::WorklistItem::InsertPushOrPop: |
2065 | OS << "InsertPushOrPop" ; |
2066 | break; |
2067 | } |
2068 | Sep = ", " ; |
2069 | } |
2070 | OS << "]" ; |
2071 | return OS; |
2072 | } |
2073 | |
2074 | raw_ostream & |
2075 | operator<<(raw_ostream &OS, |
2076 | const std::vector<StackLayoutModifier::WorklistItem> &Vec) { |
2077 | OS << "SLMTodo[" ; |
2078 | const char *Sep = "" ; |
2079 | for (const StackLayoutModifier::WorklistItem &Item : Vec) { |
2080 | OS << Sep; |
2081 | switch (Item.Action) { |
2082 | case StackLayoutModifier::WorklistItem::None: |
2083 | OS << "None" ; |
2084 | break; |
2085 | case StackLayoutModifier::WorklistItem::AdjustLoadStoreOffset: |
2086 | OS << "AdjustLoadStoreOffset" ; |
2087 | break; |
2088 | case StackLayoutModifier::WorklistItem::AdjustCFI: |
2089 | OS << "AdjustCFI" ; |
2090 | break; |
2091 | } |
2092 | Sep = ", " ; |
2093 | } |
2094 | OS << "]" ; |
2095 | return OS; |
2096 | } |
2097 | |
2098 | bool operator==(const ShrinkWrapping::WorklistItem &A, |
2099 | const ShrinkWrapping::WorklistItem &B) { |
2100 | return (A.Action == B.Action && A.AffectedReg == B.AffectedReg && |
2101 | A.Adjustment == B.Adjustment && |
2102 | A.FIEToInsert.IsLoad == B.FIEToInsert.IsLoad && |
2103 | A.FIEToInsert.IsStore == B.FIEToInsert.IsStore && |
2104 | A.FIEToInsert.RegOrImm == B.FIEToInsert.RegOrImm && |
2105 | A.FIEToInsert.Size == B.FIEToInsert.Size && |
2106 | A.FIEToInsert.IsSimple == B.FIEToInsert.IsSimple && |
2107 | A.FIEToInsert.StackOffset == B.FIEToInsert.StackOffset); |
2108 | } |
2109 | |
2110 | bool operator==(const StackLayoutModifier::WorklistItem &A, |
2111 | const StackLayoutModifier::WorklistItem &B) { |
2112 | return (A.Action == B.Action && A.OffsetUpdate == B.OffsetUpdate); |
2113 | } |
2114 | |
2115 | } // end namespace bolt |
2116 | } // end namespace llvm |
2117 | |