1 | //===- bolt/Passes/ShrinkWrapping.cpp -------------------------------------===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the ShrinkWrapping class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Passes/ShrinkWrapping.h" |
14 | #include "bolt/Passes/DataflowInfoManager.h" |
15 | #include "bolt/Passes/MCF.h" |
16 | #include "bolt/Utils/CommandLineOpts.h" |
17 | #include <numeric> |
18 | #include <optional> |
19 | #include <stack> |
20 | |
21 | #define DEBUG_TYPE "shrinkwrapping" |
22 | |
23 | using namespace llvm; |
24 | |
25 | namespace opts { |
26 | |
27 | extern cl::opt<bool> TimeOpts; |
28 | extern cl::OptionCategory BoltOptCategory; |
29 | |
30 | static cl::opt<unsigned> ShrinkWrappingThreshold( |
31 | "shrink-wrapping-threshold", |
32 | cl::desc("Percentage of prologue execution count to use as threshold when" |
33 | " evaluating whether a block is cold enough to be profitable to" |
34 | " move eligible spills there"), |
35 | cl::init(Val: 30), cl::ZeroOrMore, cl::cat(BoltOptCategory)); |
36 | } // namespace opts |
37 | |
38 | namespace llvm { |
39 | namespace bolt { |
40 | |
41 | void CalleeSavedAnalysis::analyzeSaves() { |
42 | ReachingDefOrUse</*Def=*/true> &RD = Info.getReachingDefs(); |
43 | StackReachingUses &SRU = Info.getStackReachingUses(); |
44 | auto &InsnToBB = Info.getInsnToBBMap(); |
45 | BitVector BlacklistedRegs(BC.MRI->getNumRegs(), false); |
46 | |
47 | LLVM_DEBUG(dbgs() << "Checking spill locations\n"); |
48 | for (BinaryBasicBlock &BB : BF) { |
49 | LLVM_DEBUG(dbgs() << "\tNow at BB "<< BB.getName() << "\n"); |
50 | const MCInst *Prev = nullptr; |
51 | for (MCInst &Inst : BB) { |
52 | if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) { |
53 | // Blacklist weird stores we don't understand |
54 | if ((!FIE->IsSimple || FIE->StackOffset >= 0) && FIE->IsStore && |
55 | FIE->IsStoreFromReg) { |
56 | BlacklistedRegs.set(FIE->RegOrImm); |
57 | CalleeSaved.reset(Idx: FIE->RegOrImm); |
58 | Prev = &Inst; |
59 | continue; |
60 | } |
61 | |
62 | if (!FIE->IsStore || !FIE->IsStoreFromReg || |
63 | BlacklistedRegs[FIE->RegOrImm]) { |
64 | Prev = &Inst; |
65 | continue; |
66 | } |
67 | |
68 | // If this reg is defined locally, it is not a callee-saved reg |
69 | if (RD.isReachedBy(Reg: FIE->RegOrImm, |
70 | Candidates: Prev ? RD.expr_begin(Point: *Prev) : RD.expr_begin(Point: BB))) { |
71 | BlacklistedRegs.set(FIE->RegOrImm); |
72 | CalleeSaved.reset(Idx: FIE->RegOrImm); |
73 | Prev = &Inst; |
74 | continue; |
75 | } |
76 | |
77 | // If this stack position is accessed in another function, we are |
78 | // probably dealing with a parameter passed in a stack -- do not mess |
79 | // with it |
80 | if (SRU.isStoreUsed(StoreFIE: *FIE, |
81 | Candidates: Prev ? SRU.expr_begin(Point: *Prev) : SRU.expr_begin(Point: BB)), |
82 | /*IncludeLocalAccesses=*/false) { |
83 | BlacklistedRegs.set(FIE->RegOrImm); |
84 | CalleeSaved.reset(Idx: FIE->RegOrImm); |
85 | Prev = &Inst; |
86 | continue; |
87 | } |
88 | |
89 | // If this stack position is loaded elsewhere in another reg, we can't |
90 | // update it, so blacklist it. |
91 | if (SRU.isLoadedInDifferentReg(StoreFIE: *FIE, Candidates: Prev ? SRU.expr_begin(Point: *Prev) |
92 | : SRU.expr_begin(Point: BB))) { |
93 | BlacklistedRegs.set(FIE->RegOrImm); |
94 | CalleeSaved.reset(Idx: FIE->RegOrImm); |
95 | Prev = &Inst; |
96 | continue; |
97 | } |
98 | |
99 | // Ignore regs with multiple saves |
100 | if (CalleeSaved[FIE->RegOrImm]) { |
101 | BlacklistedRegs.set(FIE->RegOrImm); |
102 | CalleeSaved.reset(Idx: FIE->RegOrImm); |
103 | Prev = &Inst; |
104 | continue; |
105 | } |
106 | |
107 | CalleeSaved.set(FIE->RegOrImm); |
108 | SaveFIEByReg[FIE->RegOrImm] = &*FIE; |
109 | SavingCost[FIE->RegOrImm] += InsnToBB[&Inst]->getKnownExecutionCount(); |
110 | BC.MIB->addAnnotation(Inst, Index: getSaveTag(), Val: FIE->RegOrImm, AllocatorId); |
111 | OffsetsByReg[FIE->RegOrImm] = FIE->StackOffset; |
112 | LLVM_DEBUG(dbgs() << "Logging new candidate for Callee-Saved Reg: " |
113 | << FIE->RegOrImm << "\n"); |
114 | } |
115 | Prev = &Inst; |
116 | } |
117 | } |
118 | } |
119 | |
120 | void CalleeSavedAnalysis::analyzeRestores() { |
121 | ReachingDefOrUse</*Def=*/false> &RU = Info.getReachingUses(); |
122 | |
123 | // Now compute all restores of these callee-saved regs |
124 | for (BinaryBasicBlock &BB : BF) { |
125 | const MCInst *Prev = nullptr; |
126 | for (MCInst &Inst : llvm::reverse(C&: BB)) { |
127 | if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) { |
128 | if (!FIE->IsLoad || !CalleeSaved[FIE->RegOrImm]) { |
129 | Prev = &Inst; |
130 | continue; |
131 | } |
132 | |
133 | // If this reg is used locally after a restore, then we are probably |
134 | // not dealing with a callee-saved reg. Except if this use is by |
135 | // another store, but we don't cover this case yet. |
136 | // Also not callee-saved if this load accesses caller stack or isn't |
137 | // simple. |
138 | if (!FIE->IsSimple || FIE->StackOffset >= 0 || |
139 | RU.isReachedBy(Reg: FIE->RegOrImm, |
140 | Candidates: Prev ? RU.expr_begin(Point: *Prev) : RU.expr_begin(Point: BB))) { |
141 | CalleeSaved.reset(Idx: FIE->RegOrImm); |
142 | Prev = &Inst; |
143 | continue; |
144 | } |
145 | // If stack offsets between saves/store don't agree with each other, |
146 | // we don't completely understand what's happening here |
147 | if (FIE->StackOffset != OffsetsByReg[FIE->RegOrImm]) { |
148 | CalleeSaved.reset(Idx: FIE->RegOrImm); |
149 | LLVM_DEBUG(dbgs() << "Dismissing Callee-Saved Reg because we found a " |
150 | "mismatching restore: " |
151 | << FIE->RegOrImm << "\n"); |
152 | Prev = &Inst; |
153 | continue; |
154 | } |
155 | |
156 | LLVM_DEBUG(dbgs() << "Adding matching restore for: "<< FIE->RegOrImm |
157 | << "\n"); |
158 | if (LoadFIEByReg[FIE->RegOrImm] == nullptr) |
159 | LoadFIEByReg[FIE->RegOrImm] = &*FIE; |
160 | BC.MIB->addAnnotation(Inst, Index: getRestoreTag(), Val: FIE->RegOrImm, |
161 | AllocatorId); |
162 | HasRestores.set(FIE->RegOrImm); |
163 | } |
164 | Prev = &Inst; |
165 | } |
166 | } |
167 | } |
168 | |
169 | std::vector<MCInst *> CalleeSavedAnalysis::getSavesByReg(uint16_t Reg) { |
170 | std::vector<MCInst *> Results; |
171 | for (BinaryBasicBlock &BB : BF) |
172 | for (MCInst &Inst : BB) |
173 | if (getSavedReg(Inst) == Reg) |
174 | Results.push_back(x: &Inst); |
175 | return Results; |
176 | } |
177 | |
178 | std::vector<MCInst *> CalleeSavedAnalysis::getRestoresByReg(uint16_t Reg) { |
179 | std::vector<MCInst *> Results; |
180 | for (BinaryBasicBlock &BB : BF) |
181 | for (MCInst &Inst : BB) |
182 | if (getRestoredReg(Inst) == Reg) |
183 | Results.push_back(x: &Inst); |
184 | return Results; |
185 | } |
186 | |
187 | CalleeSavedAnalysis::~CalleeSavedAnalysis() { |
188 | for (BinaryBasicBlock &BB : BF) { |
189 | for (MCInst &Inst : BB) { |
190 | BC.MIB->removeAnnotation(Inst, Index: getSaveTag()); |
191 | BC.MIB->removeAnnotation(Inst, Index: getRestoreTag()); |
192 | } |
193 | } |
194 | } |
195 | |
196 | void StackLayoutModifier::blacklistRegion(int64_t Offset, int64_t Size) { |
197 | if (BlacklistedRegions[Offset] < Size) |
198 | BlacklistedRegions[Offset] = Size; |
199 | } |
200 | |
201 | bool StackLayoutModifier::isRegionBlacklisted(int64_t Offset, int64_t Size) { |
202 | for (std::pair<const int64_t, int64_t> Elem : BlacklistedRegions) |
203 | if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second) |
204 | return true; |
205 | return false; |
206 | } |
207 | |
208 | bool StackLayoutModifier::blacklistAllInConflictWith(int64_t Offset, |
209 | int64_t Size) { |
210 | bool HasConflict = false; |
211 | for (auto Iter = AvailableRegions.begin(); Iter != AvailableRegions.end();) { |
212 | std::pair<const int64_t, int64_t> &Elem = *Iter; |
213 | if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second && |
214 | (Offset != Elem.first || Size != Elem.second)) { |
215 | Iter = AvailableRegions.erase(position: Iter); |
216 | HasConflict = true; |
217 | continue; |
218 | } |
219 | ++Iter; |
220 | } |
221 | if (HasConflict) { |
222 | blacklistRegion(Offset, Size); |
223 | return true; |
224 | } |
225 | return false; |
226 | } |
227 | |
228 | void StackLayoutModifier::checkFramePointerInitialization(MCInst &Point) { |
229 | StackPointerTracking &SPT = Info.getStackPointerTracking(); |
230 | if (!BC.MII->get(Opcode: Point.getOpcode()) |
231 | .hasDefOfPhysReg(MI: Point, Reg: BC.MIB->getFramePointer(), RI: *BC.MRI)) |
232 | return; |
233 | |
234 | int SPVal, FPVal; |
235 | std::tie(args&: SPVal, args&: FPVal) = *SPT.getStateBefore(Point); |
236 | std::pair<MCPhysReg, int64_t> FP; |
237 | |
238 | if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION) |
239 | FP = std::make_pair(x: BC.MIB->getFramePointer(), y&: FPVal); |
240 | else |
241 | FP = std::make_pair(x: 0, y: 0); |
242 | std::pair<MCPhysReg, int64_t> SP; |
243 | |
244 | if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION) |
245 | SP = std::make_pair(x: BC.MIB->getStackPointer(), y&: SPVal); |
246 | else |
247 | SP = std::make_pair(x: 0, y: 0); |
248 | |
249 | int64_t Output; |
250 | if (!BC.MIB->evaluateStackOffsetExpr(Inst: Point, Output, Input1: SP, Input2: FP)) |
251 | return; |
252 | |
253 | // Not your regular frame pointer initialization... bail |
254 | if (Output != SPVal) |
255 | blacklistRegion(Offset: 0, Size: 0); |
256 | } |
257 | |
258 | void StackLayoutModifier::checkStackPointerRestore(MCInst &Point) { |
259 | StackPointerTracking &SPT = Info.getStackPointerTracking(); |
260 | if (!BC.MII->get(Opcode: Point.getOpcode()) |
261 | .hasDefOfPhysReg(MI: Point, Reg: BC.MIB->getStackPointer(), RI: *BC.MRI)) |
262 | return; |
263 | // Check if the definition of SP comes from FP -- in this case, this |
264 | // value may need to be updated depending on our stack layout changes |
265 | bool UsesFP = llvm::any_of(Range: BC.MIB->useOperands(Inst&: Point), P: [&](MCOperand &Op) { |
266 | return Op.isReg() && Op.getReg() == BC.MIB->getFramePointer(); |
267 | }); |
268 | if (!UsesFP) |
269 | return; |
270 | |
271 | // Setting up evaluation |
272 | int SPVal, FPVal; |
273 | std::tie(args&: SPVal, args&: FPVal) = *SPT.getStateBefore(Point); |
274 | std::pair<MCPhysReg, int64_t> FP; |
275 | |
276 | if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION) |
277 | FP = std::make_pair(x: BC.MIB->getFramePointer(), y&: FPVal); |
278 | else |
279 | FP = std::make_pair(x: 0, y: 0); |
280 | std::pair<MCPhysReg, int64_t> SP; |
281 | |
282 | if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION) |
283 | SP = std::make_pair(x: BC.MIB->getStackPointer(), y&: SPVal); |
284 | else |
285 | SP = std::make_pair(x: 0, y: 0); |
286 | |
287 | int64_t Output; |
288 | if (!BC.MIB->evaluateStackOffsetExpr(Inst: Point, Output, Input1: SP, Input2: FP)) |
289 | return; |
290 | |
291 | // If the value is the same of FP, no need to adjust it |
292 | if (Output == FPVal) |
293 | return; |
294 | |
295 | // If an allocation happened through FP, bail |
296 | if (Output <= SPVal) { |
297 | blacklistRegion(Offset: 0, Size: 0); |
298 | return; |
299 | } |
300 | |
301 | // We are restoring SP to an old value based on FP. Mark it as a stack |
302 | // access to be fixed later. |
303 | BC.MIB->addAnnotation(Inst&: Point, Index: getSlotTag(), Val: Output, AllocatorId); |
304 | } |
305 | |
306 | void StackLayoutModifier::classifyStackAccesses() { |
307 | // Understand when stack slots are being used non-locally |
308 | StackReachingUses &SRU = Info.getStackReachingUses(); |
309 | |
310 | for (BinaryBasicBlock &BB : BF) { |
311 | const MCInst *Prev = nullptr; |
312 | for (MCInst &Inst : llvm::reverse(C&: BB)) { |
313 | checkFramePointerInitialization(Point&: Inst); |
314 | checkStackPointerRestore(Point&: Inst); |
315 | ErrorOr<const FrameIndexEntry &> FIEX = FA.getFIEFor(Inst); |
316 | if (!FIEX) { |
317 | Prev = &Inst; |
318 | continue; |
319 | } |
320 | if (!FIEX->IsSimple || (FIEX->IsStore && !FIEX->IsStoreFromReg)) { |
321 | blacklistRegion(Offset: FIEX->StackOffset, Size: FIEX->Size); |
322 | Prev = &Inst; |
323 | continue; |
324 | } |
325 | // If this stack position is accessed in another function, we are |
326 | // probably dealing with a parameter passed in a stack -- do not mess |
327 | // with it |
328 | if (SRU.isStoreUsed(StoreFIE: *FIEX, |
329 | Candidates: Prev ? SRU.expr_begin(Point: *Prev) : SRU.expr_begin(Point: BB), |
330 | /*IncludeLocalAccesses=*/false)) { |
331 | blacklistRegion(Offset: FIEX->StackOffset, Size: FIEX->Size); |
332 | Prev = &Inst; |
333 | continue; |
334 | } |
335 | // Now we have a clear stack slot access. Check if its blacklisted or if |
336 | // it conflicts with another chunk. |
337 | if (isRegionBlacklisted(Offset: FIEX->StackOffset, Size: FIEX->Size) || |
338 | blacklistAllInConflictWith(Offset: FIEX->StackOffset, Size: FIEX->Size)) { |
339 | Prev = &Inst; |
340 | continue; |
341 | } |
342 | // We are free to go. Add it as available stack slot which we know how |
343 | // to move it. |
344 | AvailableRegions[FIEX->StackOffset] = FIEX->Size; |
345 | BC.MIB->addAnnotation(Inst, Index: getSlotTag(), Val: FIEX->StackOffset, AllocatorId); |
346 | RegionToRegMap[FIEX->StackOffset].insert(x: FIEX->RegOrImm); |
347 | RegToRegionMap[FIEX->RegOrImm].insert(x: FIEX->StackOffset); |
348 | LLVM_DEBUG(dbgs() << "Adding region "<< FIEX->StackOffset << " size " |
349 | << (int)FIEX->Size << "\n"); |
350 | } |
351 | } |
352 | } |
353 | |
354 | void StackLayoutModifier::classifyCFIs() { |
355 | std::stack<std::pair<int64_t, uint16_t>> CFIStack; |
356 | int64_t CfaOffset = -8; |
357 | uint16_t CfaReg = 7; |
358 | |
359 | auto recordAccess = [&](MCInst *Inst, int64_t Offset) { |
360 | const uint16_t Reg = *BC.MRI->getLLVMRegNum(RegNum: CfaReg, /*isEH=*/false); |
361 | if (Reg == BC.MIB->getStackPointer() || Reg == BC.MIB->getFramePointer()) { |
362 | BC.MIB->addAnnotation(Inst&: *Inst, Index: getSlotTag(), Val: Offset, AllocatorId); |
363 | LLVM_DEBUG(dbgs() << "Recording CFI "<< Offset << "\n"); |
364 | } else { |
365 | IsSimple = false; |
366 | return; |
367 | } |
368 | }; |
369 | |
370 | for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { |
371 | for (MCInst &Inst : *BB) { |
372 | if (!BC.MIB->isCFI(Inst)) |
373 | continue; |
374 | const MCCFIInstruction *CFI = BF.getCFIFor(Instr: Inst); |
375 | switch (CFI->getOperation()) { |
376 | case MCCFIInstruction::OpDefCfa: |
377 | CfaOffset = -CFI->getOffset(); |
378 | recordAccess(&Inst, CfaOffset); |
379 | [[fallthrough]]; |
380 | case MCCFIInstruction::OpDefCfaRegister: |
381 | CfaReg = CFI->getRegister(); |
382 | break; |
383 | case MCCFIInstruction::OpDefCfaOffset: |
384 | CfaOffset = -CFI->getOffset(); |
385 | recordAccess(&Inst, CfaOffset); |
386 | break; |
387 | case MCCFIInstruction::OpOffset: |
388 | recordAccess(&Inst, CFI->getOffset()); |
389 | BC.MIB->addAnnotation(Inst, Index: getOffsetCFIRegTag(), |
390 | Val: BC.MRI->getLLVMRegNum(RegNum: CFI->getRegister(), |
391 | /*isEH=*/false), |
392 | AllocatorId); |
393 | break; |
394 | case MCCFIInstruction::OpSameValue: |
395 | BC.MIB->addAnnotation(Inst, Index: getOffsetCFIRegTag(), |
396 | Val: BC.MRI->getLLVMRegNum(RegNum: CFI->getRegister(), |
397 | /*isEH=*/false), |
398 | AllocatorId); |
399 | break; |
400 | case MCCFIInstruction::OpRememberState: |
401 | CFIStack.push(x: std::make_pair(x&: CfaOffset, y&: CfaReg)); |
402 | break; |
403 | case MCCFIInstruction::OpRestoreState: { |
404 | assert(!CFIStack.empty() && "Corrupt CFI stack"); |
405 | std::pair<int64_t, uint16_t> &Elem = CFIStack.top(); |
406 | CFIStack.pop(); |
407 | CfaOffset = Elem.first; |
408 | CfaReg = Elem.second; |
409 | break; |
410 | } |
411 | case MCCFIInstruction::OpRelOffset: |
412 | case MCCFIInstruction::OpAdjustCfaOffset: |
413 | llvm_unreachable("Unhandled AdjustCfaOffset"); |
414 | break; |
415 | default: |
416 | break; |
417 | } |
418 | } |
419 | } |
420 | } |
421 | |
422 | void StackLayoutModifier::scheduleChange( |
423 | MCInst &Inst, StackLayoutModifier::WorklistItem Item) { |
424 | auto &WList = BC.MIB->getOrCreateAnnotationAs<std::vector<WorklistItem>>( |
425 | Inst, Index: getTodoTag(), AllocatorId); |
426 | WList.push_back(x: Item); |
427 | } |
428 | |
429 | bool StackLayoutModifier::canCollapseRegion(MCInst *DeletedPush) { |
430 | if (!IsSimple || !BC.MIB->isPush(Inst: *DeletedPush)) |
431 | return false; |
432 | |
433 | ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst: *DeletedPush); |
434 | if (!FIE) |
435 | return false; |
436 | |
437 | return canCollapseRegion(RegionAddr: FIE->StackOffset); |
438 | } |
439 | |
440 | bool StackLayoutModifier::canCollapseRegion(int64_t RegionAddr) { |
441 | if (!IsInitialized) |
442 | initialize(); |
443 | if (!IsSimple) |
444 | return false; |
445 | |
446 | if (CollapsedRegions.count(V: RegionAddr)) |
447 | return true; |
448 | |
449 | // Check if it is possible to readjust all accesses below RegionAddr |
450 | if (!BlacklistedRegions.empty()) |
451 | return false; |
452 | |
453 | return true; |
454 | } |
455 | |
456 | bool StackLayoutModifier::collapseRegion(MCInst *DeletedPush) { |
457 | ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst: *DeletedPush); |
458 | if (!FIE) |
459 | return false; |
460 | int64_t RegionAddr = FIE->StackOffset; |
461 | int64_t RegionSz = FIE->Size; |
462 | return collapseRegion(Alloc: DeletedPush, RegionAddr, RegionSize: RegionSz); |
463 | } |
464 | |
465 | bool StackLayoutModifier::collapseRegion(MCInst *Alloc, int64_t RegionAddr, |
466 | int64_t RegionSz) { |
467 | if (!canCollapseRegion(RegionAddr)) |
468 | return false; |
469 | |
470 | assert(IsInitialized); |
471 | StackAllocationAnalysis &SAA = Info.getStackAllocationAnalysis(); |
472 | |
473 | for (BinaryBasicBlock &BB : BF) { |
474 | for (MCInst &Inst : BB) { |
475 | if (!BC.MIB->hasAnnotation(Inst, Index: getSlotTag())) |
476 | continue; |
477 | auto Slot = |
478 | BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>( |
479 | Inst, Index: getSlotTag()); |
480 | if (!AvailableRegions.count(x: Slot)) |
481 | continue; |
482 | // We need to ensure this access is affected by the deleted push |
483 | if (!(*SAA.getStateBefore(Point: Inst))[SAA.ExprToIdx[Alloc]]) |
484 | continue; |
485 | |
486 | if (BC.MIB->isCFI(Inst)) { |
487 | if (Slot > RegionAddr) |
488 | continue; |
489 | scheduleChange(Inst, Item: WorklistItem(WorklistItem::AdjustCFI, RegionSz)); |
490 | continue; |
491 | } |
492 | ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst); |
493 | if (!FIE) { |
494 | if (Slot > RegionAddr) |
495 | continue; |
496 | // SP update based on frame pointer |
497 | scheduleChange( |
498 | Inst, Item: WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz)); |
499 | continue; |
500 | } |
501 | |
502 | if (Slot == RegionAddr) { |
503 | BC.MIB->addAnnotation(Inst, Name: "AccessesDeletedPos", Val: 0U, AllocatorId); |
504 | continue; |
505 | } |
506 | if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst)) |
507 | continue; |
508 | |
509 | if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr) |
510 | continue; |
511 | |
512 | if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot > RegionAddr) |
513 | continue; |
514 | |
515 | scheduleChange( |
516 | Inst, Item: WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz)); |
517 | } |
518 | } |
519 | |
520 | CollapsedRegions.insert(V: RegionAddr); |
521 | return true; |
522 | } |
523 | |
524 | void StackLayoutModifier::setOffsetForCollapsedAccesses(int64_t NewOffset) { |
525 | for (BinaryBasicBlock &BB : BF) { |
526 | for (MCInst &Inst : BB) { |
527 | if (!BC.MIB->hasAnnotation(Inst, Name: "AccessesDeletedPos")) |
528 | continue; |
529 | BC.MIB->removeAnnotation(Inst, Name: "AccessesDeletedPos"); |
530 | scheduleChange( |
531 | Inst, Item: WorklistItem(WorklistItem::AdjustLoadStoreOffset, NewOffset)); |
532 | } |
533 | } |
534 | } |
535 | |
536 | bool StackLayoutModifier::canInsertRegion(ProgramPoint P) { |
537 | if (!IsInitialized) |
538 | initialize(); |
539 | if (!IsSimple) |
540 | return false; |
541 | |
542 | StackPointerTracking &SPT = Info.getStackPointerTracking(); |
543 | int64_t RegionAddr = SPT.getStateBefore(Point: P)->first; |
544 | if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY) |
545 | return false; |
546 | |
547 | if (InsertedRegions.count(V: RegionAddr)) |
548 | return true; |
549 | |
550 | // Check if we are going to screw up stack accesses at call sites that |
551 | // pass parameters via stack |
552 | if (!BlacklistedRegions.empty()) |
553 | return false; |
554 | |
555 | return true; |
556 | } |
557 | |
558 | bool StackLayoutModifier::insertRegion(ProgramPoint P, int64_t RegionSz) { |
559 | if (!canInsertRegion(P)) |
560 | return false; |
561 | |
562 | assert(IsInitialized); |
563 | StackPointerTracking &SPT = Info.getStackPointerTracking(); |
564 | // This RegionAddr is slightly different from the one seen in collapseRegion |
565 | // This is the value of SP before the allocation the user wants to make. |
566 | int64_t RegionAddr = SPT.getStateBefore(Point: P)->first; |
567 | if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY) |
568 | return false; |
569 | |
570 | DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); |
571 | |
572 | for (BinaryBasicBlock &BB : BF) { |
573 | for (MCInst &Inst : BB) { |
574 | if (!BC.MIB->hasAnnotation(Inst, Index: getSlotTag())) |
575 | continue; |
576 | auto Slot = |
577 | BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>( |
578 | Inst, Index: getSlotTag()); |
579 | if (!AvailableRegions.count(x: Slot)) |
580 | continue; |
581 | |
582 | if (!(DA.doesADominateB(A: P, B: Inst))) |
583 | continue; |
584 | |
585 | if (BC.MIB->isCFI(Inst)) { |
586 | if (Slot >= RegionAddr) |
587 | continue; |
588 | scheduleChange(Inst, Item: WorklistItem(WorklistItem::AdjustCFI, -RegionSz)); |
589 | continue; |
590 | } |
591 | ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst); |
592 | if (!FIE) { |
593 | if (Slot >= RegionAddr) |
594 | continue; |
595 | scheduleChange( |
596 | Inst, Item: WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz)); |
597 | continue; |
598 | } |
599 | |
600 | if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr) |
601 | continue; |
602 | if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot >= RegionAddr) |
603 | continue; |
604 | if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst)) |
605 | continue; |
606 | scheduleChange( |
607 | Inst, Item: WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz)); |
608 | } |
609 | } |
610 | |
611 | InsertedRegions.insert(V: RegionAddr); |
612 | return true; |
613 | } |
614 | |
615 | void StackLayoutModifier::performChanges() { |
616 | std::set<uint32_t> ModifiedCFIIndices; |
617 | for (BinaryBasicBlock &BB : BF) { |
618 | for (MCInst &Inst : llvm::reverse(C&: BB)) { |
619 | if (BC.MIB->hasAnnotation(Inst, Name: "AccessesDeletedPos")) { |
620 | assert(BC.MIB->isPop(Inst) || BC.MIB->isPush(Inst)); |
621 | BC.MIB->removeAnnotation(Inst, Name: "AccessesDeletedPos"); |
622 | } |
623 | if (!BC.MIB->hasAnnotation(Inst, Index: getTodoTag())) |
624 | continue; |
625 | auto &WList = BC.MIB->getAnnotationAs<std::vector<WorklistItem>>( |
626 | Inst, Index: getTodoTag()); |
627 | int64_t Adjustment = 0; |
628 | WorklistItem::ActionType AdjustmentType = WorklistItem::None; |
629 | for (WorklistItem &WI : WList) { |
630 | if (WI.Action == WorklistItem::None) |
631 | continue; |
632 | assert(WI.Action == WorklistItem::AdjustLoadStoreOffset || |
633 | WI.Action == WorklistItem::AdjustCFI); |
634 | assert((AdjustmentType == WorklistItem::None || |
635 | AdjustmentType == WI.Action) && |
636 | "Conflicting actions requested at the same program point"); |
637 | AdjustmentType = WI.Action; |
638 | Adjustment += WI.OffsetUpdate; |
639 | } |
640 | if (!Adjustment) |
641 | continue; |
642 | if (AdjustmentType != WorklistItem::AdjustLoadStoreOffset) { |
643 | assert(BC.MIB->isCFI(Inst)); |
644 | uint32_t CFINum = Inst.getOperand(i: 0).getImm(); |
645 | if (ModifiedCFIIndices.count(x: CFINum)) |
646 | continue; |
647 | ModifiedCFIIndices.insert(x: CFINum); |
648 | const MCCFIInstruction *CFI = BF.getCFIFor(Instr: Inst); |
649 | const MCCFIInstruction::OpType Operation = CFI->getOperation(); |
650 | if (Operation == MCCFIInstruction::OpDefCfa || |
651 | Operation == MCCFIInstruction::OpDefCfaOffset) |
652 | Adjustment = 0 - Adjustment; |
653 | LLVM_DEBUG(dbgs() << "Changing CFI offset from "<< CFI->getOffset() |
654 | << " to "<< (CFI->getOffset() + Adjustment) << "\n"); |
655 | BF.mutateCFIOffsetFor(Instr: Inst, NewOffset: CFI->getOffset() + Adjustment); |
656 | continue; |
657 | } |
658 | int32_t SrcImm = 0; |
659 | MCPhysReg Reg = 0; |
660 | MCPhysReg StackPtrReg = 0; |
661 | int64_t StackOffset = 0; |
662 | bool IsIndexed = false; |
663 | bool IsLoad = false; |
664 | bool IsStore = false; |
665 | bool IsSimple = false; |
666 | bool IsStoreFromReg = false; |
667 | uint8_t Size = 0; |
668 | bool Success = false; |
669 | Success = BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, |
670 | Reg, SrcImm, StackPtrReg, StackOffset, |
671 | Size, IsSimple, IsIndexed); |
672 | if (!Success) { |
673 | // SP update based on FP value |
674 | Success = BC.MIB->addToImm(Inst, Amt&: Adjustment, Ctx: &*BC.Ctx); |
675 | assert(Success); |
676 | continue; |
677 | } |
678 | assert(Success && IsSimple && !IsIndexed && (!IsStore || IsStoreFromReg)); |
679 | if (StackPtrReg != BC.MIB->getFramePointer()) |
680 | Adjustment = -Adjustment; |
681 | if (IsLoad) |
682 | BC.MIB->createRestoreFromStack(Inst, StackReg: StackPtrReg, |
683 | Offset: StackOffset + Adjustment, DstReg: Reg, Size); |
684 | else if (IsStore) |
685 | BC.MIB->createSaveToStack(Inst, StackReg: StackPtrReg, Offset: StackOffset + Adjustment, |
686 | SrcReg: Reg, Size); |
687 | LLVM_DEBUG({ |
688 | dbgs() << "Adjusted instruction: "; |
689 | Inst.dump(); |
690 | }); |
691 | } |
692 | } |
693 | } |
694 | |
695 | void StackLayoutModifier::initialize() { |
696 | classifyStackAccesses(); |
697 | classifyCFIs(); |
698 | IsInitialized = true; |
699 | } |
700 | |
701 | std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedRegularMode{0}; |
702 | std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedPushPopMode{0}; |
703 | std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedDynamicCount{0}; |
704 | std::atomic<std::uint64_t> ShrinkWrapping::SpillsFailedDynamicCount{0}; |
705 | std::atomic<std::uint64_t> ShrinkWrapping::InstrDynamicCount{0}; |
706 | std::atomic<std::uint64_t> ShrinkWrapping::StoreDynamicCount{0}; |
707 | |
708 | using BBIterTy = BinaryBasicBlock::iterator; |
709 | |
710 | void ShrinkWrapping::classifyCSRUses() { |
711 | DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); |
712 | StackPointerTracking &SPT = Info.getStackPointerTracking(); |
713 | UsesByReg = std::vector<BitVector>(BC.MRI->getNumRegs(), |
714 | BitVector(DA.NumInstrs, false)); |
715 | |
716 | const BitVector &FPAliases = BC.MIB->getAliases(Reg: BC.MIB->getFramePointer()); |
717 | for (BinaryBasicBlock &BB : BF) { |
718 | for (MCInst &Inst : BB) { |
719 | if (BC.MIB->isCFI(Inst)) |
720 | continue; |
721 | BitVector BV = BitVector(BC.MRI->getNumRegs(), false); |
722 | BC.MIB->getTouchedRegs(Inst, Regs&: BV); |
723 | BV &= CSA.CalleeSaved; |
724 | for (int I : BV.set_bits()) { |
725 | if (I == 0) |
726 | continue; |
727 | if (CSA.getSavedReg(Inst) != I && CSA.getRestoredReg(Inst) != I) |
728 | UsesByReg[I].set(DA.ExprToIdx[&Inst]); |
729 | } |
730 | if (!SPT.HasFramePointer || !BC.MIB->isCall(Inst)) |
731 | continue; |
732 | BV = CSA.CalleeSaved; |
733 | BV &= FPAliases; |
734 | for (int I : BV.set_bits()) |
735 | UsesByReg[I].set(DA.ExprToIdx[&Inst]); |
736 | } |
737 | } |
738 | } |
739 | |
740 | void ShrinkWrapping::pruneUnwantedCSRs() { |
741 | BitVector ParamRegs = BC.MIB->getRegsUsedAsParams(); |
742 | for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { |
743 | if (!CSA.CalleeSaved[I]) |
744 | continue; |
745 | if (ParamRegs[I]) { |
746 | CSA.CalleeSaved.reset(Idx: I); |
747 | continue; |
748 | } |
749 | if (UsesByReg[I].empty()) { |
750 | LLVM_DEBUG( |
751 | dbgs() |
752 | << "Dismissing Callee-Saved Reg because we found no uses of it:"<< I |
753 | << "\n"); |
754 | CSA.CalleeSaved.reset(Idx: I); |
755 | continue; |
756 | } |
757 | if (!CSA.HasRestores[I]) { |
758 | LLVM_DEBUG( |
759 | dbgs() << "Dismissing Callee-Saved Reg because it does not have " |
760 | "restores:" |
761 | << I << "\n"); |
762 | CSA.CalleeSaved.reset(Idx: I); |
763 | } |
764 | } |
765 | } |
766 | |
767 | void ShrinkWrapping::computeSaveLocations() { |
768 | BestSavePos = std::vector<std::vector<MCInst *>>(BC.MRI->getNumRegs()); |
769 | ReachingInsns<true> &RI = Info.getReachingInsnsBackwards(); |
770 | DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); |
771 | StackPointerTracking &SPT = Info.getStackPointerTracking(); |
772 | |
773 | LLVM_DEBUG(dbgs() << "Checking save/restore possibilities\n"); |
774 | for (BinaryBasicBlock &BB : BF) { |
775 | LLVM_DEBUG(dbgs() << "\tNow at BB "<< BB.getName() << "\n"); |
776 | |
777 | MCInst *First = BB.begin() != BB.end() ? &*BB.begin() : nullptr; |
778 | if (!First) |
779 | continue; |
780 | |
781 | // Use reaching instructions to detect if we are inside a loop - if we |
782 | // are, do not consider this BB as valid placement for saves. |
783 | if (RI.isInLoop(BB)) |
784 | continue; |
785 | |
786 | const std::pair<int, int> SPFP = *SPT.getStateBefore(Point: *First); |
787 | // If we don't know stack state at this point, bail |
788 | if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) && |
789 | (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY)) |
790 | continue; |
791 | |
792 | for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { |
793 | if (!CSA.CalleeSaved[I]) |
794 | continue; |
795 | |
796 | BitVector BBDominatedUses = BitVector(DA.NumInstrs, false); |
797 | for (int J : UsesByReg[I].set_bits()) |
798 | if (DA.doesADominateB(A: *First, BIdx: J)) |
799 | BBDominatedUses.set(J); |
800 | LLVM_DEBUG(dbgs() << "\t\tBB "<< BB.getName() << " dominates " |
801 | << BBDominatedUses.count() << " uses for reg "<< I |
802 | << ". Total uses for reg is "<< UsesByReg[I].count() |
803 | << "\n"); |
804 | BBDominatedUses &= UsesByReg[I]; |
805 | if (BBDominatedUses == UsesByReg[I]) { |
806 | LLVM_DEBUG(dbgs() << "\t\t\tAdded "<< BB.getName() |
807 | << " as a save pos for "<< I << "\n"); |
808 | BestSavePos[I].push_back(x: First); |
809 | LLVM_DEBUG({ |
810 | dbgs() << "Dominated uses are:\n"; |
811 | for (int J : UsesByReg[I].set_bits()) { |
812 | dbgs() << "Idx "<< J << ": "; |
813 | BC.printInstruction(dbgs(), *DA.Expressions[J]); |
814 | DA.Expressions[J]->dump(); |
815 | } |
816 | }); |
817 | } |
818 | } |
819 | } |
820 | |
821 | BestSaveCount = std::vector<std::vector<uint64_t>>(BC.MRI->getNumRegs()); |
822 | |
823 | auto &InsnToBB = Info.getInsnToBBMap(); |
824 | for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { |
825 | if (!CSA.CalleeSaved[I]) |
826 | continue; |
827 | |
828 | llvm::stable_sort(Range&: BestSavePos[I], C: [&](const MCInst *A, const MCInst *B) { |
829 | const BinaryBasicBlock *BBA = InsnToBB[A]; |
830 | const BinaryBasicBlock *BBB = InsnToBB[B]; |
831 | const uint64_t CountA = BBA->getKnownExecutionCount(); |
832 | const uint64_t CountB = BBB->getKnownExecutionCount(); |
833 | return CountB < CountA; |
834 | }); |
835 | |
836 | for (MCInst *Pos : BestSavePos[I]) { |
837 | const BinaryBasicBlock *BB = InsnToBB[Pos]; |
838 | const uint64_t Count = BB->getKnownExecutionCount(); |
839 | BestSaveCount[I].push_back(x: Count); |
840 | } |
841 | } |
842 | } |
843 | |
844 | void ShrinkWrapping::computeDomOrder() { |
845 | DomOrder = std::vector<MCPhysReg>(BC.MRI->getNumRegs(), 0); |
846 | std::vector<MCPhysReg> Order; |
847 | for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { |
848 | Order.push_back(x: I); |
849 | } |
850 | |
851 | DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); |
852 | auto &InsnToBB = Info.getInsnToBBMap(); |
853 | llvm::sort(C&: Order, Comp: [&](const MCPhysReg &A, const MCPhysReg &B) { |
854 | BinaryBasicBlock *BBA = |
855 | BestSavePos[A].size() ? InsnToBB[BestSavePos[A].back()] : nullptr; |
856 | BinaryBasicBlock *BBB = |
857 | BestSavePos[B].size() ? InsnToBB[BestSavePos[B].back()] : nullptr; |
858 | if (BBA == BBB) |
859 | return A < B; |
860 | if (!BBA && BBB) |
861 | return false; |
862 | if (BBA && !BBB) |
863 | return true; |
864 | if (DA.doesADominateB(A: *BestSavePos[A].back(), B: *BestSavePos[B].back())) |
865 | return true; |
866 | if (DA.doesADominateB(A: *BestSavePos[B].back(), B: *BestSavePos[A].back())) |
867 | return false; |
868 | return A < B; |
869 | }); |
870 | |
871 | for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) |
872 | DomOrder[Order[I]] = I; |
873 | } |
874 | |
875 | bool ShrinkWrapping::isBestSavePosCold(unsigned CSR, MCInst *&BestPosSave, |
876 | uint64_t &TotalEstimatedWin) { |
877 | const uint64_t CurSavingCost = CSA.SavingCost[CSR]; |
878 | if (!CSA.CalleeSaved[CSR]) |
879 | return false; |
880 | |
881 | assert(BestSaveCount[CSR].size() == BestSavePos[CSR].size() && |
882 | "save position vectors out of sync"); |
883 | if (BestSaveCount[CSR].empty()) |
884 | return false; |
885 | |
886 | const uint64_t BestCount = BestSaveCount[CSR].back(); |
887 | BestPosSave = BestSavePos[CSR].back(); |
888 | if (BestCount >= (opts::ShrinkWrappingThreshold / 100.0) * CurSavingCost) |
889 | return false; |
890 | |
891 | LLVM_DEBUG({ |
892 | auto &InsnToBB = Info.getInsnToBBMap(); |
893 | dbgs() << "Better position for saves found in func "<< BF.getPrintName() |
894 | << " count << "<< BF.getKnownExecutionCount() << "\n"; |
895 | dbgs() << "Reg: "<< CSR << "; New BB: "<< InsnToBB[BestPosSave]->getName() |
896 | << " Freq reduction: "<< (CurSavingCost - BestCount) << "\n"; |
897 | }); |
898 | |
899 | TotalEstimatedWin = CurSavingCost - BestCount; |
900 | return true; |
901 | } |
902 | |
903 | /// Auxiliary function used to create basic blocks for critical edges and update |
904 | /// the dominance frontier with these new locations |
905 | void ShrinkWrapping::splitFrontierCritEdges( |
906 | BinaryFunction *Func, SmallVector<ProgramPoint, 4> &Frontier, |
907 | const SmallVector<bool, 4> &IsCritEdge, |
908 | const SmallVector<BinaryBasicBlock *, 4> &From, |
909 | const SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> &To) { |
910 | LLVM_DEBUG(dbgs() << "splitFrontierCritEdges: Now handling func " |
911 | << BF.getPrintName() << "\n"); |
912 | // For every FromBB, there might be one or more critical edges, with |
913 | // To[I] containing destination BBs. It's important to memorize |
914 | // the original size of the Frontier as we may append to it while splitting |
915 | // critical edges originating with blocks with multiple destinations. |
916 | for (size_t I = 0, IE = Frontier.size(); I < IE; ++I) { |
917 | if (!IsCritEdge[I]) |
918 | continue; |
919 | if (To[I].empty()) |
920 | continue; |
921 | BinaryBasicBlock *FromBB = From[I]; |
922 | LLVM_DEBUG(dbgs() << " - Now handling FrontierBB "<< FromBB->getName() |
923 | << "\n"); |
924 | // Split edge for every DestinationBBs |
925 | for (size_t DI = 0, DIE = To[I].size(); DI < DIE; ++DI) { |
926 | BinaryBasicBlock *DestinationBB = To[I][DI]; |
927 | LLVM_DEBUG(dbgs() << " - Dest : "<< DestinationBB->getName() << "\n"); |
928 | BinaryBasicBlock *NewBB = Func->splitEdge(From: FromBB, To: DestinationBB); |
929 | // Insert dummy instruction so this BB is never empty (we need this for |
930 | // PredictiveStackPointerTracking to work, since it annotates instructions |
931 | // and not BBs). |
932 | if (NewBB->empty()) { |
933 | MCInst NewInst; |
934 | BC.MIB->createNoop(Inst&: NewInst); |
935 | NewBB->addInstruction(Inst: std::move(NewInst)); |
936 | scheduleChange(PP: &*NewBB->begin(), Item: WorklistItem(WorklistItem::Erase, 0)); |
937 | } |
938 | |
939 | // Update frontier |
940 | ProgramPoint NewFrontierPP = ProgramPoint::getLastPointAt(BB&: *NewBB); |
941 | if (DI == 0) { |
942 | // Update frontier inplace |
943 | Frontier[I] = NewFrontierPP; |
944 | LLVM_DEBUG(dbgs() << " - Update frontier with "<< NewBB->getName() |
945 | << '\n'); |
946 | } else { |
947 | // Append new frontier to the end of the list |
948 | Frontier.push_back(Elt: NewFrontierPP); |
949 | LLVM_DEBUG(dbgs() << " - Append frontier "<< NewBB->getName() |
950 | << '\n'); |
951 | } |
952 | } |
953 | } |
954 | } |
955 | |
956 | SmallVector<ProgramPoint, 4> |
957 | ShrinkWrapping::doRestorePlacement(MCInst *BestPosSave, unsigned CSR, |
958 | uint64_t TotalEstimatedWin) { |
959 | SmallVector<ProgramPoint, 4> Frontier; |
960 | SmallVector<bool, 4> IsCritEdge; |
961 | DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); |
962 | |
963 | SmallVector<BinaryBasicBlock *, 4> CritEdgesFrom; |
964 | SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> CritEdgesTo; |
965 | // In case of a critical edge, we need to create extra BBs to host restores |
966 | // into edges transitioning to the dominance frontier, otherwise we pull these |
967 | // restores to inside the dominated area. |
968 | Frontier = DA.getDominanceFrontierFor(Dom: *BestPosSave).takeVector(); |
969 | LLVM_DEBUG({ |
970 | dbgs() << "Dumping dominance frontier for "; |
971 | BC.printInstruction(dbgs(), *BestPosSave); |
972 | for (ProgramPoint &PP : Frontier) |
973 | if (PP.isInst()) |
974 | BC.printInstruction(dbgs(), *PP.getInst()); |
975 | else |
976 | dbgs() << PP.getBB()->getName() << "\n"; |
977 | }); |
978 | for (ProgramPoint &PP : Frontier) { |
979 | bool HasCritEdges = false; |
980 | if (PP.isInst() && BC.MIB->isTerminator(Inst: *PP.getInst()) && |
981 | doesInstUsesCSR(Inst: *PP.getInst(), CSR)) { |
982 | Frontier.clear(); |
983 | return Frontier; |
984 | } |
985 | BinaryBasicBlock *FrontierBB = Info.getParentBB(PP); |
986 | CritEdgesFrom.emplace_back(Args&: FrontierBB); |
987 | CritEdgesTo.emplace_back(Args: 0); |
988 | SmallVector<BinaryBasicBlock *, 4> &Dests = CritEdgesTo.back(); |
989 | // Check for invoke instructions at the dominance frontier, which indicates |
990 | // the landing pad is not dominated. |
991 | if (PP.isInst() && BC.MIB->isInvoke(Inst: *PP.getInst())) { |
992 | Frontier.clear(); |
993 | return Frontier; |
994 | } |
995 | doForAllSuccs(BB: *FrontierBB, Task: [&](ProgramPoint P) { |
996 | if (!DA.doesADominateB(A: *BestPosSave, B: P)) { |
997 | Dests.emplace_back(Args: Info.getParentBB(PP: P)); |
998 | return; |
999 | } |
1000 | HasCritEdges = true; |
1001 | }); |
1002 | IsCritEdge.push_back(Elt: HasCritEdges); |
1003 | } |
1004 | // Restores cannot be placed in empty BBs because we have a dataflow |
1005 | // analysis that depends on insertions happening before real instructions |
1006 | // (PredictiveStackPointerTracking). Detect now for empty BBs and add a |
1007 | // dummy nop that is scheduled to be removed later. |
1008 | bool InvalidateRequired = false; |
1009 | for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { |
1010 | if (BB->size() != 0) |
1011 | continue; |
1012 | MCInst NewInst; |
1013 | BC.MIB->createNoop(Inst&: NewInst); |
1014 | auto II = BB->addInstruction(Inst: std::move(NewInst)); |
1015 | scheduleChange(PP: &*II, Item: WorklistItem(WorklistItem::Erase, 0)); |
1016 | InvalidateRequired = true; |
1017 | } |
1018 | if (std::accumulate(first: IsCritEdge.begin(), last: IsCritEdge.end(), init: 0)) { |
1019 | LLVM_DEBUG({ |
1020 | dbgs() << "Now detected critical edges in the following frontier:\n"; |
1021 | for (ProgramPoint &PP : Frontier) { |
1022 | if (PP.isBB()) { |
1023 | dbgs() << " BB: "<< PP.getBB()->getName() << "\n"; |
1024 | } else { |
1025 | dbgs() << " Inst: "; |
1026 | PP.getInst()->dump(); |
1027 | } |
1028 | } |
1029 | }); |
1030 | splitFrontierCritEdges(Func: &BF, Frontier, IsCritEdge, From: CritEdgesFrom, |
1031 | To: CritEdgesTo); |
1032 | InvalidateRequired = true; |
1033 | } |
1034 | if (InvalidateRequired) { |
1035 | // BitVectors that represent all insns of the function are invalid now |
1036 | // since we changed BBs/Insts. Re-run steps that depend on pointers being |
1037 | // valid |
1038 | Info.invalidateAll(); |
1039 | classifyCSRUses(); |
1040 | } |
1041 | return Frontier; |
1042 | } |
1043 | |
1044 | bool ShrinkWrapping::validatePushPopsMode(unsigned CSR, MCInst *BestPosSave, |
1045 | int64_t SaveOffset) { |
1046 | if (FA.requiresAlignment(Func: BF)) { |
1047 | LLVM_DEBUG({ |
1048 | dbgs() << "Reg "<< CSR |
1049 | << " is not using push/pops due to function " |
1050 | "alignment requirements.\n"; |
1051 | }); |
1052 | return false; |
1053 | } |
1054 | if (FA.hasStackArithmetic(Func: BF)) { |
1055 | LLVM_DEBUG({ |
1056 | dbgs() << "Reg "<< CSR |
1057 | << " is not using push/pops due to function " |
1058 | "taking the address of a stack position.\n"; |
1059 | }); |
1060 | return false; |
1061 | } |
1062 | for (MCInst *Save : CSA.getSavesByReg(Reg: CSR)) { |
1063 | if (!SLM.canCollapseRegion(DeletedPush: Save)) { |
1064 | LLVM_DEBUG(dbgs() << "Reg "<< CSR << " cannot collapse region.\n"); |
1065 | return false; |
1066 | } |
1067 | } |
1068 | // Abort if one of the restores for this CSR is not a POP. |
1069 | for (MCInst *Load : CSA.getRestoresByReg(Reg: CSR)) { |
1070 | if (!BC.MIB->isPop(Inst: *Load)) { |
1071 | LLVM_DEBUG(dbgs() << "Reg "<< CSR << " has a mismatching restore.\n"); |
1072 | return false; |
1073 | } |
1074 | } |
1075 | |
1076 | StackPointerTracking &SPT = Info.getStackPointerTracking(); |
1077 | // Abort if we are inserting a push into an entry BB (offset -8) and this |
1078 | // func sets up a frame pointer. |
1079 | if (!SLM.canInsertRegion(P: BestPosSave) || SaveOffset == SPT.SUPERPOSITION || |
1080 | SaveOffset == SPT.EMPTY || (SaveOffset == -8 && SPT.HasFramePointer)) { |
1081 | LLVM_DEBUG({ |
1082 | dbgs() << "Reg "<< CSR |
1083 | << " cannot insert region or we are " |
1084 | "trying to insert a push into entry bb.\n"; |
1085 | }); |
1086 | return false; |
1087 | } |
1088 | return true; |
1089 | } |
1090 | |
1091 | SmallVector<ProgramPoint, 4> ShrinkWrapping::fixPopsPlacements( |
1092 | const SmallVector<ProgramPoint, 4> &RestorePoints, int64_t SaveOffset, |
1093 | unsigned CSR) { |
1094 | SmallVector<ProgramPoint, 4> FixedRestorePoints = RestorePoints; |
1095 | // Moving pop locations to the correct sp offset |
1096 | ReachingInsns<true> &RI = Info.getReachingInsnsBackwards(); |
1097 | StackPointerTracking &SPT = Info.getStackPointerTracking(); |
1098 | for (ProgramPoint &PP : FixedRestorePoints) { |
1099 | BinaryBasicBlock *BB = Info.getParentBB(PP); |
1100 | bool Found = false; |
1101 | if (SPT.getStateAt(Point: ProgramPoint::getLastPointAt(BB&: *BB))->first == |
1102 | SaveOffset) { |
1103 | BitVector BV = *RI.getStateAt(Point: ProgramPoint::getLastPointAt(BB&: *BB)); |
1104 | BV &= UsesByReg[CSR]; |
1105 | if (!BV.any()) { |
1106 | Found = true; |
1107 | PP = BB; |
1108 | continue; |
1109 | } |
1110 | } |
1111 | for (MCInst &Inst : llvm::reverse(C&: *BB)) { |
1112 | if (SPT.getStateBefore(Point: Inst)->first == SaveOffset) { |
1113 | BitVector BV = *RI.getStateAt(Point: Inst); |
1114 | BV &= UsesByReg[CSR]; |
1115 | if (!BV.any()) { |
1116 | Found = true; |
1117 | PP = &Inst; |
1118 | break; |
1119 | } |
1120 | } |
1121 | } |
1122 | if (!Found) { |
1123 | LLVM_DEBUG({ |
1124 | dbgs() << "Could not find restore insertion point for "<< CSR |
1125 | << ", falling back to load/store mode\n"; |
1126 | }); |
1127 | FixedRestorePoints.clear(); |
1128 | return FixedRestorePoints; |
1129 | } |
1130 | } |
1131 | return FixedRestorePoints; |
1132 | } |
1133 | |
1134 | void ShrinkWrapping::scheduleOldSaveRestoresRemoval(unsigned CSR, |
1135 | bool UsePushPops) { |
1136 | |
1137 | for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { |
1138 | std::vector<MCInst *> CFIs; |
1139 | for (MCInst &Inst : llvm::reverse(C&: *BB)) { |
1140 | if (BC.MIB->isCFI(Inst)) { |
1141 | // Delete all offset CFIs related to this CSR |
1142 | if (SLM.getOffsetCFIReg(Inst) == CSR) { |
1143 | HasDeletedOffsetCFIs[CSR] = true; |
1144 | scheduleChange(PP: &Inst, Item: WorklistItem(WorklistItem::Erase, CSR)); |
1145 | continue; |
1146 | } |
1147 | CFIs.push_back(x: &Inst); |
1148 | continue; |
1149 | } |
1150 | |
1151 | uint16_t SavedReg = CSA.getSavedReg(Inst); |
1152 | uint16_t RestoredReg = CSA.getRestoredReg(Inst); |
1153 | if (SavedReg != CSR && RestoredReg != CSR) { |
1154 | CFIs.clear(); |
1155 | continue; |
1156 | } |
1157 | |
1158 | scheduleChange(PP: &Inst, Item: WorklistItem(UsePushPops |
1159 | ? WorklistItem::Erase |
1160 | : WorklistItem::ChangeToAdjustment, |
1161 | CSR)); |
1162 | |
1163 | // Delete associated CFIs |
1164 | const bool RecordDeletedPushCFIs = |
1165 | SavedReg == CSR && DeletedPushCFIs[CSR].empty(); |
1166 | const bool RecordDeletedPopCFIs = |
1167 | RestoredReg == CSR && DeletedPopCFIs[CSR].empty(); |
1168 | for (MCInst *CFI : CFIs) { |
1169 | const MCCFIInstruction *MCCFI = BF.getCFIFor(Instr: *CFI); |
1170 | // Do not touch these... |
1171 | if (MCCFI->getOperation() == MCCFIInstruction::OpRestoreState || |
1172 | MCCFI->getOperation() == MCCFIInstruction::OpRememberState) |
1173 | continue; |
1174 | scheduleChange(PP: CFI, Item: WorklistItem(WorklistItem::Erase, CSR)); |
1175 | if (RecordDeletedPushCFIs) { |
1176 | // Do not record this to be replayed later because we are going to |
1177 | // rebuild it. |
1178 | if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset) |
1179 | continue; |
1180 | DeletedPushCFIs[CSR].push_back(x: CFI->getOperand(i: 0).getImm()); |
1181 | } |
1182 | if (RecordDeletedPopCFIs) { |
1183 | if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset) |
1184 | continue; |
1185 | DeletedPopCFIs[CSR].push_back(x: CFI->getOperand(i: 0).getImm()); |
1186 | } |
1187 | } |
1188 | CFIs.clear(); |
1189 | } |
1190 | } |
1191 | } |
1192 | |
1193 | bool ShrinkWrapping::doesInstUsesCSR(const MCInst &Inst, uint16_t CSR) { |
1194 | if (BC.MIB->isCFI(Inst) || CSA.getSavedReg(Inst) == CSR || |
1195 | CSA.getRestoredReg(Inst) == CSR) |
1196 | return false; |
1197 | BitVector BV = BitVector(BC.MRI->getNumRegs(), false); |
1198 | BC.MIB->getTouchedRegs(Inst, Regs&: BV); |
1199 | return BV[CSR]; |
1200 | } |
1201 | |
1202 | void ShrinkWrapping::scheduleSaveRestoreInsertions( |
1203 | unsigned CSR, MCInst *BestPosSave, |
1204 | SmallVector<ProgramPoint, 4> &RestorePoints, bool UsePushPops) { |
1205 | auto &InsnToBB = Info.getInsnToBBMap(); |
1206 | const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[CSR]; |
1207 | const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[CSR]; |
1208 | assert(FIESave && FIELoad && "Invalid CSR"); |
1209 | |
1210 | LLVM_DEBUG({ |
1211 | dbgs() << "Scheduling save insertion at: "; |
1212 | BestPosSave->dump(); |
1213 | }); |
1214 | |
1215 | scheduleChange(PP: BestPosSave, |
1216 | Item: UsePushPops ? WorklistItem::InsertPushOrPop |
1217 | : WorklistItem::InsertLoadOrStore, |
1218 | Item: *FIESave, Item&: CSR); |
1219 | |
1220 | for (ProgramPoint &PP : RestorePoints) { |
1221 | BinaryBasicBlock *FrontierBB = Info.getParentBB(PP); |
1222 | LLVM_DEBUG({ |
1223 | dbgs() << "Scheduling restore insertion at: "; |
1224 | if (PP.isInst()) |
1225 | PP.getInst()->dump(); |
1226 | else |
1227 | dbgs() << PP.getBB()->getName() << "\n"; |
1228 | }); |
1229 | MCInst *Term = |
1230 | FrontierBB->getTerminatorBefore(Pos: PP.isInst() ? PP.getInst() : nullptr); |
1231 | if (Term) |
1232 | PP = Term; |
1233 | bool PrecededByPrefix = false; |
1234 | if (PP.isInst()) { |
1235 | auto Iter = FrontierBB->findInstruction(Inst: PP.getInst()); |
1236 | if (Iter != FrontierBB->end() && Iter != FrontierBB->begin()) { |
1237 | --Iter; |
1238 | PrecededByPrefix = BC.MIB->isPrefix(Inst: *Iter); |
1239 | } |
1240 | } |
1241 | if (PP.isInst() && |
1242 | (doesInstUsesCSR(Inst: *PP.getInst(), CSR) || PrecededByPrefix)) { |
1243 | assert(!InsnToBB[PP.getInst()]->hasTerminatorAfter(PP.getInst()) && |
1244 | "cannot move to end of bb"); |
1245 | scheduleChange(PP: InsnToBB[PP.getInst()], |
1246 | Item: UsePushPops ? WorklistItem::InsertPushOrPop |
1247 | : WorklistItem::InsertLoadOrStore, |
1248 | Item: *FIELoad, Item&: CSR); |
1249 | continue; |
1250 | } |
1251 | scheduleChange(PP, |
1252 | Item: UsePushPops ? WorklistItem::InsertPushOrPop |
1253 | : WorklistItem::InsertLoadOrStore, |
1254 | Item: *FIELoad, Item&: CSR); |
1255 | } |
1256 | } |
1257 | |
1258 | void ShrinkWrapping::moveSaveRestores() { |
1259 | bool DisablePushPopMode = false; |
1260 | bool UsedPushPopMode = false; |
1261 | // Keeps info about successfully moved regs: reg index, save position and |
1262 | // save size |
1263 | std::vector<std::tuple<unsigned, MCInst *, size_t>> MovedRegs; |
1264 | uint64_t TotalEstimatedWin = 0; |
1265 | |
1266 | computeDomOrder(); |
1267 | for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { |
1268 | MCInst *BestPosSave = nullptr; |
1269 | uint64_t EstimatedWin = 0; |
1270 | SmallVector<ProgramPoint, 4> RestorePoints; |
1271 | while (RestorePoints.empty() && |
1272 | isBestSavePosCold(CSR: I, BestPosSave, TotalEstimatedWin&: EstimatedWin)) { |
1273 | RestorePoints = doRestorePlacement(BestPosSave, CSR: I, TotalEstimatedWin: EstimatedWin); |
1274 | if (RestorePoints.empty()) { |
1275 | LLVM_DEBUG({ |
1276 | dbgs() << "Dropping opportunity because restore placement failed" |
1277 | " -- total est. freq reduc: " |
1278 | << EstimatedWin << ". Will try " |
1279 | << (BestSaveCount[I].size() - 1) << " more times.\n"; |
1280 | }); |
1281 | BestSaveCount[I].pop_back(); |
1282 | BestSavePos[I].pop_back(); |
1283 | computeDomOrder(); |
1284 | } |
1285 | } |
1286 | if (RestorePoints.empty()) { |
1287 | SpillsFailedDynamicCount += EstimatedWin; |
1288 | continue; |
1289 | } |
1290 | |
1291 | const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[I]; |
1292 | const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[I]; |
1293 | (void)FIELoad; |
1294 | assert(FIESave && FIELoad); |
1295 | StackPointerTracking &SPT = Info.getStackPointerTracking(); |
1296 | const std::pair<int, int> SPFP = *SPT.getStateBefore(Point: *BestPosSave); |
1297 | int SaveOffset = SPFP.first; |
1298 | uint8_t SaveSize = FIESave->Size; |
1299 | |
1300 | // If we don't know stack state at this point, bail |
1301 | if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) && |
1302 | (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY)) { |
1303 | SpillsFailedDynamicCount += EstimatedWin; |
1304 | continue; |
1305 | } |
1306 | |
1307 | // Operation mode: if true, will insert push/pops instead of loads/restores |
1308 | bool UsePushPops = validatePushPopsMode(CSR: I, BestPosSave, SaveOffset); |
1309 | |
1310 | if (UsePushPops) { |
1311 | SmallVector<ProgramPoint, 4> FixedRestorePoints = |
1312 | fixPopsPlacements(RestorePoints, SaveOffset, CSR: I); |
1313 | if (FixedRestorePoints.empty()) |
1314 | UsePushPops = false; |
1315 | else |
1316 | RestorePoints = FixedRestorePoints; |
1317 | } |
1318 | |
1319 | // Disable push-pop mode for all CSRs in this function |
1320 | if (!UsePushPops) |
1321 | DisablePushPopMode = true; |
1322 | else |
1323 | UsedPushPopMode = true; |
1324 | |
1325 | scheduleOldSaveRestoresRemoval(CSR: I, UsePushPops); |
1326 | scheduleSaveRestoreInsertions(CSR: I, BestPosSave, RestorePoints, UsePushPops); |
1327 | MovedRegs.emplace_back(args: std::make_tuple(args&: I, args&: BestPosSave, args&: SaveSize)); |
1328 | TotalEstimatedWin += EstimatedWin; |
1329 | } |
1330 | |
1331 | // Revert push-pop mode if it failed for a single CSR |
1332 | if (DisablePushPopMode && UsedPushPopMode) { |
1333 | UsedPushPopMode = false; |
1334 | for (BinaryBasicBlock &BB : BF) { |
1335 | auto WRI = Todo.find(Val: &BB); |
1336 | if (WRI != Todo.end()) { |
1337 | std::vector<WorklistItem> &TodoList = WRI->second; |
1338 | for (WorklistItem &Item : TodoList) |
1339 | if (Item.Action == WorklistItem::InsertPushOrPop) |
1340 | Item.Action = WorklistItem::InsertLoadOrStore; |
1341 | } |
1342 | for (MCInst &Inst : llvm::reverse(C&: BB)) { |
1343 | auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>( |
1344 | Inst, Index: getAnnotationIndex()); |
1345 | if (!TodoList) |
1346 | continue; |
1347 | bool isCFI = BC.MIB->isCFI(Inst); |
1348 | for (WorklistItem &Item : *TodoList) { |
1349 | if (Item.Action == WorklistItem::InsertPushOrPop) |
1350 | Item.Action = WorklistItem::InsertLoadOrStore; |
1351 | if (!isCFI && Item.Action == WorklistItem::Erase) |
1352 | Item.Action = WorklistItem::ChangeToAdjustment; |
1353 | } |
1354 | } |
1355 | } |
1356 | } |
1357 | SpillsMovedDynamicCount += TotalEstimatedWin; |
1358 | |
1359 | // Update statistics |
1360 | if (!UsedPushPopMode) { |
1361 | SpillsMovedRegularMode += MovedRegs.size(); |
1362 | return; |
1363 | } |
1364 | |
1365 | // Schedule modifications to stack-accessing instructions via |
1366 | // StackLayoutModifier. |
1367 | SpillsMovedPushPopMode += MovedRegs.size(); |
1368 | for (std::tuple<unsigned, MCInst *, size_t> &I : MovedRegs) { |
1369 | unsigned RegNdx; |
1370 | MCInst *SavePos; |
1371 | size_t SaveSize; |
1372 | std::tie(args&: RegNdx, args&: SavePos, args&: SaveSize) = I; |
1373 | for (MCInst *Save : CSA.getSavesByReg(Reg: RegNdx)) |
1374 | SLM.collapseRegion(DeletedPush: Save); |
1375 | SLM.insertRegion(P: SavePos, RegionSz: SaveSize); |
1376 | } |
1377 | } |
1378 | |
1379 | namespace { |
1380 | /// Helper function to identify whether two basic blocks created by splitting |
1381 | /// a critical edge have the same contents. |
1382 | bool isIdenticalSplitEdgeBB(const BinaryContext &BC, const BinaryBasicBlock &A, |
1383 | const BinaryBasicBlock &B) { |
1384 | if (A.succ_size() != B.succ_size()) |
1385 | return false; |
1386 | if (A.succ_size() != 1) |
1387 | return false; |
1388 | |
1389 | if (*A.succ_begin() != *B.succ_begin()) |
1390 | return false; |
1391 | |
1392 | if (A.size() != B.size()) |
1393 | return false; |
1394 | |
1395 | // Compare instructions |
1396 | auto I = A.begin(), E = A.end(); |
1397 | auto OtherI = B.begin(), OtherE = B.end(); |
1398 | while (I != E && OtherI != OtherE) { |
1399 | if (I->getOpcode() != OtherI->getOpcode()) |
1400 | return false; |
1401 | if (!BC.MIB->equals(A: *I, B: *OtherI, Comp: [](const MCSymbol *A, const MCSymbol *B) { |
1402 | return true; |
1403 | })) |
1404 | return false; |
1405 | ++I; |
1406 | ++OtherI; |
1407 | } |
1408 | return true; |
1409 | } |
1410 | } // namespace |
1411 | |
1412 | bool ShrinkWrapping::foldIdenticalSplitEdges() { |
1413 | bool Changed = false; |
1414 | for (auto Iter = BF.begin(); Iter != BF.end(); ++Iter) { |
1415 | BinaryBasicBlock &BB = *Iter; |
1416 | if (!BB.getName().starts_with(Prefix: ".LSplitEdge")) |
1417 | continue; |
1418 | for (BinaryBasicBlock &RBB : llvm::reverse(C&: BF)) { |
1419 | if (&RBB == &BB) |
1420 | break; |
1421 | if (!RBB.getName().starts_with(Prefix: ".LSplitEdge") || !RBB.isValid() || |
1422 | !isIdenticalSplitEdgeBB(BC, A: *Iter, B: RBB)) |
1423 | continue; |
1424 | assert(RBB.pred_size() == 1 && "Invalid split edge BB"); |
1425 | BinaryBasicBlock *Pred = *RBB.pred_begin(); |
1426 | uint64_t OrigCount = Pred->branch_info_begin()->Count; |
1427 | uint64_t OrigMispreds = Pred->branch_info_begin()->MispredictedCount; |
1428 | BF.replaceJumpTableEntryIn(BB: Pred, OldDest: &RBB, NewDest: &BB); |
1429 | Pred->replaceSuccessor(Succ: &RBB, NewSucc: &BB, Count: OrigCount, MispredictedCount: OrigMispreds); |
1430 | Changed = true; |
1431 | // Remove the block from CFG |
1432 | RBB.markValid(Valid: false); |
1433 | } |
1434 | } |
1435 | |
1436 | return Changed; |
1437 | } |
1438 | |
1439 | namespace { |
1440 | |
1441 | // A special StackPointerTracking that compensates for our future plans |
1442 | // in removing/adding insn. |
1443 | class PredictiveStackPointerTracking |
1444 | : public StackPointerTrackingBase<PredictiveStackPointerTracking> { |
1445 | friend class DataflowAnalysis<PredictiveStackPointerTracking, |
1446 | std::pair<int, int>>; |
1447 | decltype(ShrinkWrapping::Todo) &TodoMap; |
1448 | DataflowInfoManager &Info; |
1449 | |
1450 | std::optional<unsigned> AnnotationIndex; |
1451 | |
1452 | protected: |
1453 | void compNextAux(const MCInst &Point, |
1454 | const std::vector<ShrinkWrapping::WorklistItem> &TodoItems, |
1455 | std::pair<int, int> &Res) { |
1456 | for (const ShrinkWrapping::WorklistItem &Item : TodoItems) { |
1457 | if (Item.Action == ShrinkWrapping::WorklistItem::Erase && |
1458 | BC.MIB->isPush(Inst: Point)) { |
1459 | Res.first += BC.MIB->getPushSize(Inst: Point); |
1460 | continue; |
1461 | } |
1462 | if (Item.Action == ShrinkWrapping::WorklistItem::Erase && |
1463 | BC.MIB->isPop(Inst: Point)) { |
1464 | Res.first -= BC.MIB->getPopSize(Inst: Point); |
1465 | continue; |
1466 | } |
1467 | if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop && |
1468 | Item.FIEToInsert.IsStore) { |
1469 | Res.first -= Item.FIEToInsert.Size; |
1470 | continue; |
1471 | } |
1472 | if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop && |
1473 | Item.FIEToInsert.IsLoad) { |
1474 | Res.first += Item.FIEToInsert.Size; |
1475 | continue; |
1476 | } |
1477 | } |
1478 | } |
1479 | |
1480 | std::pair<int, int> computeNext(const MCInst &Point, |
1481 | const std::pair<int, int> &Cur) { |
1482 | std::pair<int, int> Res = |
1483 | StackPointerTrackingBase<PredictiveStackPointerTracking>::computeNext( |
1484 | Point, Cur); |
1485 | if (Res.first == StackPointerTracking::SUPERPOSITION || |
1486 | Res.first == StackPointerTracking::EMPTY) |
1487 | return Res; |
1488 | auto TodoItems = |
1489 | BC.MIB->tryGetAnnotationAs<std::vector<ShrinkWrapping::WorklistItem>>( |
1490 | Inst: Point, Name: ShrinkWrapping::getAnnotationName()); |
1491 | if (TodoItems) |
1492 | compNextAux(Point, TodoItems: *TodoItems, Res); |
1493 | auto &InsnToBBMap = Info.getInsnToBBMap(); |
1494 | if (&*InsnToBBMap[&Point]->rbegin() != &Point) |
1495 | return Res; |
1496 | auto WRI = TodoMap.find(Val: InsnToBBMap[&Point]); |
1497 | if (WRI == TodoMap.end()) |
1498 | return Res; |
1499 | compNextAux(Point, TodoItems: WRI->second, Res); |
1500 | return Res; |
1501 | } |
1502 | |
1503 | StringRef getAnnotationName() const { |
1504 | return StringRef("PredictiveStackPointerTracking"); |
1505 | } |
1506 | |
1507 | public: |
1508 | PredictiveStackPointerTracking(BinaryFunction &BF, |
1509 | decltype(ShrinkWrapping::Todo) &TodoMap, |
1510 | DataflowInfoManager &Info, |
1511 | MCPlusBuilder::AllocatorIdTy AllocatorId = 0) |
1512 | : StackPointerTrackingBase<PredictiveStackPointerTracking>(BF, |
1513 | AllocatorId), |
1514 | TodoMap(TodoMap), Info(Info) {} |
1515 | |
1516 | void run() { |
1517 | StackPointerTrackingBase<PredictiveStackPointerTracking>::run(); |
1518 | } |
1519 | }; |
1520 | |
1521 | } // end anonymous namespace |
1522 | |
1523 | void ShrinkWrapping::insertUpdatedCFI(unsigned CSR, int SPValPush, |
1524 | int SPValPop) { |
1525 | MCInst *SavePoint = nullptr; |
1526 | for (BinaryBasicBlock &BB : BF) { |
1527 | for (MCInst &Inst : llvm::reverse(C&: BB)) { |
1528 | int32_t SrcImm = 0; |
1529 | MCPhysReg Reg = 0; |
1530 | MCPhysReg StackPtrReg = 0; |
1531 | int64_t StackOffset = 0; |
1532 | bool IsIndexed = false; |
1533 | bool IsLoad = false; |
1534 | bool IsStore = false; |
1535 | bool IsSimple = false; |
1536 | bool IsStoreFromReg = false; |
1537 | uint8_t Size = 0; |
1538 | if (!BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, Reg, |
1539 | SrcImm, StackPtrReg, StackOffset, Size, |
1540 | IsSimple, IsIndexed)) |
1541 | continue; |
1542 | if (Reg != CSR || !IsStore || !IsSimple) |
1543 | continue; |
1544 | SavePoint = &Inst; |
1545 | break; |
1546 | } |
1547 | if (SavePoint) |
1548 | break; |
1549 | } |
1550 | assert(SavePoint); |
1551 | LLVM_DEBUG({ |
1552 | dbgs() << "Now using as save point for reg "<< CSR << " :"; |
1553 | SavePoint->dump(); |
1554 | }); |
1555 | bool PrevAffectedZone = false; |
1556 | BinaryBasicBlock *PrevBB = nullptr; |
1557 | DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); |
1558 | for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { |
1559 | if (BB->size() == 0) |
1560 | continue; |
1561 | const bool InAffectedZoneAtEnd = DA.count(Point: *BB->rbegin(), Expr: *SavePoint); |
1562 | const bool InAffectedZoneAtBegin = |
1563 | (*DA.getStateBefore(Point: *BB->begin()))[DA.ExprToIdx[SavePoint]]; |
1564 | bool InAffectedZone = InAffectedZoneAtBegin; |
1565 | for (auto InstIter = BB->begin(); InstIter != BB->end(); ++InstIter) { |
1566 | const bool CurZone = DA.count(Point: *InstIter, Expr: *SavePoint); |
1567 | if (InAffectedZone != CurZone) { |
1568 | auto InsertionIter = InstIter; |
1569 | ++InsertionIter; |
1570 | InAffectedZone = CurZone; |
1571 | if (InAffectedZone) |
1572 | InstIter = insertCFIsForPushOrPop(BB&: *BB, Pos: InsertionIter, Reg: CSR, IsPush: true, Sz: 0, |
1573 | NewOffset: SPValPop); |
1574 | else |
1575 | InstIter = insertCFIsForPushOrPop(BB&: *BB, Pos: InsertionIter, Reg: CSR, IsPush: false, Sz: 0, |
1576 | NewOffset: SPValPush); |
1577 | --InstIter; |
1578 | } |
1579 | } |
1580 | // Are we at the first basic block or hot-cold split point? |
1581 | if (!PrevBB || (BF.isSplit() && BB->isCold() != PrevBB->isCold())) { |
1582 | if (InAffectedZoneAtBegin) |
1583 | insertCFIsForPushOrPop(BB&: *BB, Pos: BB->begin(), Reg: CSR, IsPush: true, Sz: 0, NewOffset: SPValPush); |
1584 | } else if (InAffectedZoneAtBegin != PrevAffectedZone) { |
1585 | if (InAffectedZoneAtBegin) |
1586 | insertCFIsForPushOrPop(BB&: *PrevBB, Pos: PrevBB->end(), Reg: CSR, IsPush: true, Sz: 0, NewOffset: SPValPush); |
1587 | else |
1588 | insertCFIsForPushOrPop(BB&: *PrevBB, Pos: PrevBB->end(), Reg: CSR, IsPush: false, Sz: 0, NewOffset: SPValPop); |
1589 | } |
1590 | PrevAffectedZone = InAffectedZoneAtEnd; |
1591 | PrevBB = BB; |
1592 | } |
1593 | } |
1594 | |
1595 | void ShrinkWrapping::rebuildCFIForSP() { |
1596 | for (BinaryBasicBlock &BB : BF) { |
1597 | for (MCInst &Inst : BB) { |
1598 | if (!BC.MIB->isCFI(Inst)) |
1599 | continue; |
1600 | const MCCFIInstruction *CFI = BF.getCFIFor(Instr: Inst); |
1601 | if (CFI->getOperation() == MCCFIInstruction::OpDefCfaOffset) |
1602 | BC.MIB->addAnnotation(Inst, Name: "DeleteMe", Val: 0U, AllocatorId); |
1603 | } |
1604 | } |
1605 | |
1606 | int PrevSPVal = -8; |
1607 | BinaryBasicBlock *PrevBB = nullptr; |
1608 | StackPointerTracking &SPT = Info.getStackPointerTracking(); |
1609 | for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { |
1610 | if (BB->size() == 0) |
1611 | continue; |
1612 | const int SPValAtEnd = SPT.getStateAt(Point: *BB->rbegin())->first; |
1613 | const int SPValAtBegin = SPT.getStateBefore(Point: *BB->begin())->first; |
1614 | int SPVal = SPValAtBegin; |
1615 | for (auto Iter = BB->begin(); Iter != BB->end(); ++Iter) { |
1616 | const int CurVal = SPT.getStateAt(Point: *Iter)->first; |
1617 | if (SPVal != CurVal) { |
1618 | auto InsertionIter = Iter; |
1619 | ++InsertionIter; |
1620 | Iter = BF.addCFIInstruction( |
1621 | BB, Pos: InsertionIter, |
1622 | Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: -CurVal)); |
1623 | SPVal = CurVal; |
1624 | } |
1625 | } |
1626 | if (BF.isSplit() && PrevBB && BB->isCold() != PrevBB->isCold()) |
1627 | BF.addCFIInstruction( |
1628 | BB, Pos: BB->begin(), |
1629 | Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: -SPValAtBegin)); |
1630 | else if (SPValAtBegin != PrevSPVal) |
1631 | BF.addCFIInstruction( |
1632 | BB: PrevBB, Pos: PrevBB->end(), |
1633 | Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: -SPValAtBegin)); |
1634 | PrevSPVal = SPValAtEnd; |
1635 | PrevBB = BB; |
1636 | } |
1637 | |
1638 | for (BinaryBasicBlock &BB : BF) |
1639 | for (auto I = BB.begin(); I != BB.end();) |
1640 | if (BC.MIB->hasAnnotation(Inst: *I, Name: "DeleteMe")) |
1641 | I = BB.eraseInstruction(II: I); |
1642 | else |
1643 | ++I; |
1644 | } |
1645 | |
1646 | Expected<MCInst> ShrinkWrapping::createStackAccess(int SPVal, int FPVal, |
1647 | const FrameIndexEntry &FIE, |
1648 | bool CreatePushOrPop) { |
1649 | MCInst NewInst; |
1650 | if (SPVal != StackPointerTracking::SUPERPOSITION && |
1651 | SPVal != StackPointerTracking::EMPTY) { |
1652 | if (FIE.IsLoad) { |
1653 | BC.MIB->createRestoreFromStack(Inst&: NewInst, StackReg: BC.MIB->getStackPointer(), |
1654 | Offset: FIE.StackOffset - SPVal, DstReg: FIE.RegOrImm, |
1655 | Size: FIE.Size); |
1656 | } else { |
1657 | BC.MIB->createSaveToStack(Inst&: NewInst, StackReg: BC.MIB->getStackPointer(), |
1658 | Offset: FIE.StackOffset - SPVal, SrcReg: FIE.RegOrImm, |
1659 | Size: FIE.Size); |
1660 | } |
1661 | if (CreatePushOrPop) |
1662 | BC.MIB->changeToPushOrPop(Inst&: NewInst); |
1663 | return NewInst; |
1664 | } |
1665 | assert(FPVal != StackPointerTracking::SUPERPOSITION && |
1666 | FPVal != StackPointerTracking::EMPTY); |
1667 | |
1668 | if (FIE.IsLoad) { |
1669 | BC.MIB->createRestoreFromStack(Inst&: NewInst, StackReg: BC.MIB->getFramePointer(), |
1670 | Offset: FIE.StackOffset - FPVal, DstReg: FIE.RegOrImm, |
1671 | Size: FIE.Size); |
1672 | } else { |
1673 | BC.MIB->createSaveToStack(Inst&: NewInst, StackReg: BC.MIB->getFramePointer(), |
1674 | Offset: FIE.StackOffset - FPVal, SrcReg: FIE.RegOrImm, Size: FIE.Size); |
1675 | } |
1676 | return NewInst; |
1677 | } |
1678 | |
1679 | void ShrinkWrapping::updateCFIInstOffset(MCInst &Inst, int64_t NewOffset) { |
1680 | const MCCFIInstruction *CFI = BF.getCFIFor(Instr: Inst); |
1681 | if (UpdatedCFIs.count(Ptr: CFI)) |
1682 | return; |
1683 | |
1684 | switch (CFI->getOperation()) { |
1685 | case MCCFIInstruction::OpDefCfa: |
1686 | case MCCFIInstruction::OpDefCfaRegister: |
1687 | case MCCFIInstruction::OpDefCfaOffset: |
1688 | CFI = BF.mutateCFIOffsetFor(Instr: Inst, NewOffset: -NewOffset); |
1689 | break; |
1690 | case MCCFIInstruction::OpOffset: |
1691 | default: |
1692 | break; |
1693 | } |
1694 | |
1695 | UpdatedCFIs.insert(Ptr: CFI); |
1696 | } |
1697 | |
1698 | BBIterTy ShrinkWrapping::insertCFIsForPushOrPop(BinaryBasicBlock &BB, |
1699 | BBIterTy Pos, unsigned Reg, |
1700 | bool isPush, int Sz, |
1701 | int64_t NewOffset) { |
1702 | if (isPush) { |
1703 | for (uint32_t Idx : DeletedPushCFIs[Reg]) { |
1704 | Pos = BF.addCFIPseudo(BB: &BB, Pos, Offset: Idx); |
1705 | updateCFIInstOffset(Inst&: *Pos++, NewOffset); |
1706 | } |
1707 | if (HasDeletedOffsetCFIs[Reg]) { |
1708 | Pos = BF.addCFIInstruction( |
1709 | BB: &BB, Pos, |
1710 | Inst: MCCFIInstruction::createOffset( |
1711 | L: nullptr, Register: BC.MRI->getDwarfRegNum(RegNum: Reg, isEH: false), Offset: NewOffset)); |
1712 | ++Pos; |
1713 | } |
1714 | } else { |
1715 | for (uint32_t Idx : DeletedPopCFIs[Reg]) { |
1716 | Pos = BF.addCFIPseudo(BB: &BB, Pos, Offset: Idx); |
1717 | updateCFIInstOffset(Inst&: *Pos++, NewOffset); |
1718 | } |
1719 | if (HasDeletedOffsetCFIs[Reg]) { |
1720 | Pos = BF.addCFIInstruction( |
1721 | BB: &BB, Pos, |
1722 | Inst: MCCFIInstruction::createSameValue( |
1723 | L: nullptr, Register: BC.MRI->getDwarfRegNum(RegNum: Reg, isEH: false))); |
1724 | ++Pos; |
1725 | } |
1726 | } |
1727 | return Pos; |
1728 | } |
1729 | |
1730 | Expected<BBIterTy> ShrinkWrapping::processInsertion(BBIterTy InsertionPoint, |
1731 | BinaryBasicBlock *CurBB, |
1732 | const WorklistItem &Item, |
1733 | int64_t SPVal, |
1734 | int64_t FPVal) { |
1735 | // Trigger CFI reconstruction for this CSR if necessary - writing to |
1736 | // PushOffsetByReg/PopOffsetByReg *will* trigger CFI update |
1737 | if ((Item.FIEToInsert.IsStore && |
1738 | !DeletedPushCFIs[Item.AffectedReg].empty()) || |
1739 | (Item.FIEToInsert.IsLoad && !DeletedPopCFIs[Item.AffectedReg].empty()) || |
1740 | HasDeletedOffsetCFIs[Item.AffectedReg]) { |
1741 | if (Item.Action == WorklistItem::InsertPushOrPop) { |
1742 | if (Item.FIEToInsert.IsStore) |
1743 | PushOffsetByReg[Item.AffectedReg] = SPVal - Item.FIEToInsert.Size; |
1744 | else |
1745 | PopOffsetByReg[Item.AffectedReg] = SPVal; |
1746 | } else { |
1747 | if (Item.FIEToInsert.IsStore) |
1748 | PushOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset; |
1749 | else |
1750 | PopOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset; |
1751 | } |
1752 | } |
1753 | |
1754 | LLVM_DEBUG({ |
1755 | dbgs() << "Creating stack access with SPVal = "<< SPVal |
1756 | << "; stack offset = "<< Item.FIEToInsert.StackOffset |
1757 | << " Is push = "<< (Item.Action == WorklistItem::InsertPushOrPop) |
1758 | << "\n"; |
1759 | }); |
1760 | Expected<MCInst> NewInstOrErr = |
1761 | createStackAccess(SPVal, FPVal, FIE: Item.FIEToInsert, |
1762 | CreatePushOrPop: Item.Action == WorklistItem::InsertPushOrPop); |
1763 | if (auto E = NewInstOrErr.takeError()) |
1764 | return Error(std::move(E)); |
1765 | MCInst &NewInst = *NewInstOrErr; |
1766 | if (InsertionPoint != CurBB->end()) { |
1767 | LLVM_DEBUG({ |
1768 | dbgs() << "Adding before Inst: "; |
1769 | InsertionPoint->dump(); |
1770 | dbgs() << "the following inst: "; |
1771 | NewInst.dump(); |
1772 | }); |
1773 | BBIterTy Iter = |
1774 | CurBB->insertInstruction(At: InsertionPoint, NewInst: std::move(NewInst)); |
1775 | return ++Iter; |
1776 | } |
1777 | CurBB->addInstruction(Inst: std::move(NewInst)); |
1778 | LLVM_DEBUG(dbgs() << "Adding to BB!\n"); |
1779 | return CurBB->end(); |
1780 | } |
1781 | |
1782 | Expected<BBIterTy> ShrinkWrapping::processInsertionsList( |
1783 | BBIterTy InsertionPoint, BinaryBasicBlock *CurBB, |
1784 | std::vector<WorklistItem> &TodoList, int64_t SPVal, int64_t FPVal) { |
1785 | bool HasInsertions = llvm::any_of(Range&: TodoList, P: [&](WorklistItem &Item) { |
1786 | return Item.Action == WorklistItem::InsertLoadOrStore || |
1787 | Item.Action == WorklistItem::InsertPushOrPop; |
1788 | }); |
1789 | |
1790 | if (!HasInsertions) |
1791 | return InsertionPoint; |
1792 | |
1793 | assert(((SPVal != StackPointerTracking::SUPERPOSITION && |
1794 | SPVal != StackPointerTracking::EMPTY) || |
1795 | (FPVal != StackPointerTracking::SUPERPOSITION && |
1796 | FPVal != StackPointerTracking::EMPTY)) && |
1797 | "Cannot insert if we have no idea of the stack state here"); |
1798 | |
1799 | // Revert the effect of PSPT for this location, we want SP Value before |
1800 | // insertions |
1801 | if (InsertionPoint == CurBB->end()) { |
1802 | for (WorklistItem &Item : TodoList) { |
1803 | if (Item.Action != WorklistItem::InsertPushOrPop) |
1804 | continue; |
1805 | if (Item.FIEToInsert.IsStore) |
1806 | SPVal += Item.FIEToInsert.Size; |
1807 | if (Item.FIEToInsert.IsLoad) |
1808 | SPVal -= Item.FIEToInsert.Size; |
1809 | } |
1810 | } |
1811 | |
1812 | // Reorder POPs to obey the correct dominance relation between them |
1813 | llvm::stable_sort(Range&: TodoList, C: [&](const WorklistItem &A, |
1814 | const WorklistItem &B) { |
1815 | if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad) && |
1816 | (B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad)) |
1817 | return false; |
1818 | if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad)) |
1819 | return true; |
1820 | if ((B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad)) |
1821 | return false; |
1822 | return DomOrder[B.AffectedReg] < DomOrder[A.AffectedReg]; |
1823 | }); |
1824 | |
1825 | // Process insertions |
1826 | for (WorklistItem &Item : TodoList) { |
1827 | if (Item.Action == WorklistItem::Erase || |
1828 | Item.Action == WorklistItem::ChangeToAdjustment) |
1829 | continue; |
1830 | |
1831 | auto InsertionPointOrErr = |
1832 | processInsertion(InsertionPoint, CurBB, Item, SPVal, FPVal); |
1833 | if (auto E = InsertionPointOrErr.takeError()) |
1834 | return Error(std::move(E)); |
1835 | InsertionPoint = *InsertionPointOrErr; |
1836 | if (Item.Action == WorklistItem::InsertPushOrPop && |
1837 | Item.FIEToInsert.IsStore) |
1838 | SPVal -= Item.FIEToInsert.Size; |
1839 | if (Item.Action == WorklistItem::InsertPushOrPop && |
1840 | Item.FIEToInsert.IsLoad) |
1841 | SPVal += Item.FIEToInsert.Size; |
1842 | } |
1843 | return InsertionPoint; |
1844 | } |
1845 | |
1846 | Expected<bool> ShrinkWrapping::processInsertions() { |
1847 | PredictiveStackPointerTracking PSPT(BF, Todo, Info, AllocatorId); |
1848 | PSPT.run(); |
1849 | |
1850 | bool Changes = false; |
1851 | for (BinaryBasicBlock &BB : BF) { |
1852 | // Process insertions before some inst. |
1853 | for (auto I = BB.begin(); I != BB.end(); ++I) { |
1854 | MCInst &Inst = *I; |
1855 | auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>( |
1856 | Inst, Index: getAnnotationIndex()); |
1857 | if (!TodoList) |
1858 | continue; |
1859 | Changes = true; |
1860 | std::vector<WorklistItem> List = *TodoList; |
1861 | LLVM_DEBUG({ |
1862 | dbgs() << "Now processing insertions in "<< BB.getName() |
1863 | << " before inst: "; |
1864 | Inst.dump(); |
1865 | }); |
1866 | auto Iter = I; |
1867 | std::pair<int, int> SPTState = |
1868 | *PSPT.getStateAt(Point: Iter == BB.begin() ? (ProgramPoint)&BB : &*(--Iter)); |
1869 | auto IterOrErr = |
1870 | processInsertionsList(InsertionPoint: I, CurBB: &BB, TodoList&: List, SPVal: SPTState.first, FPVal: SPTState.second); |
1871 | if (auto E = IterOrErr.takeError()) |
1872 | return Error(std::move(E)); |
1873 | I = *IterOrErr; |
1874 | } |
1875 | // Process insertions at the end of bb |
1876 | auto WRI = Todo.find(Val: &BB); |
1877 | if (WRI != Todo.end()) { |
1878 | std::pair<int, int> SPTState = *PSPT.getStateAt(Point: *BB.rbegin()); |
1879 | if (auto E = processInsertionsList(InsertionPoint: BB.end(), CurBB: &BB, TodoList&: WRI->second, |
1880 | SPVal: SPTState.first, FPVal: SPTState.second) |
1881 | .takeError()) |
1882 | return Error(std::move(E)); |
1883 | Changes = true; |
1884 | } |
1885 | } |
1886 | return Changes; |
1887 | } |
1888 | |
1889 | void ShrinkWrapping::processDeletions() { |
1890 | LivenessAnalysis &LA = Info.getLivenessAnalysis(); |
1891 | for (BinaryBasicBlock &BB : BF) { |
1892 | for (auto II = BB.begin(); II != BB.end(); ++II) { |
1893 | MCInst &Inst = *II; |
1894 | auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>( |
1895 | Inst, Index: getAnnotationIndex()); |
1896 | if (!TodoList) |
1897 | continue; |
1898 | // Process all deletions |
1899 | for (WorklistItem &Item : *TodoList) { |
1900 | if (Item.Action != WorklistItem::Erase && |
1901 | Item.Action != WorklistItem::ChangeToAdjustment) |
1902 | continue; |
1903 | |
1904 | if (Item.Action == WorklistItem::ChangeToAdjustment) { |
1905 | // Is flag reg alive across this func? |
1906 | bool DontClobberFlags = LA.isAlive(PP: &Inst, Reg: BC.MIB->getFlagsReg()); |
1907 | if (int Sz = BC.MIB->getPushSize(Inst)) { |
1908 | BC.MIB->createStackPointerIncrement(Inst, Size: Sz, NoFlagsClobber: DontClobberFlags); |
1909 | continue; |
1910 | } |
1911 | if (int Sz = BC.MIB->getPopSize(Inst)) { |
1912 | BC.MIB->createStackPointerDecrement(Inst, Size: Sz, NoFlagsClobber: DontClobberFlags); |
1913 | continue; |
1914 | } |
1915 | } |
1916 | |
1917 | LLVM_DEBUG({ |
1918 | dbgs() << "Erasing: "; |
1919 | BC.printInstruction(dbgs(), Inst); |
1920 | }); |
1921 | II = std::prev(x: BB.eraseInstruction(II)); |
1922 | break; |
1923 | } |
1924 | } |
1925 | } |
1926 | } |
1927 | |
1928 | void ShrinkWrapping::rebuildCFI() { |
1929 | const bool FP = Info.getStackPointerTracking().HasFramePointer; |
1930 | Info.invalidateAll(); |
1931 | if (!FP) { |
1932 | rebuildCFIForSP(); |
1933 | Info.invalidateAll(); |
1934 | } |
1935 | for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { |
1936 | if (PushOffsetByReg[I] == 0 || PopOffsetByReg[I] == 0) |
1937 | continue; |
1938 | const int64_t SPValPush = PushOffsetByReg[I]; |
1939 | const int64_t SPValPop = PopOffsetByReg[I]; |
1940 | insertUpdatedCFI(CSR: I, SPValPush, SPValPop); |
1941 | Info.invalidateAll(); |
1942 | } |
1943 | } |
1944 | |
1945 | Expected<bool> ShrinkWrapping::perform(bool HotOnly) { |
1946 | HasDeletedOffsetCFIs = BitVector(BC.MRI->getNumRegs(), false); |
1947 | PushOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL); |
1948 | PopOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL); |
1949 | |
1950 | // Update pass statistics |
1951 | uint64_t TotalInstrs = 0ULL; |
1952 | uint64_t TotalStoreInstrs = 0ULL; |
1953 | for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { |
1954 | uint64_t BBExecCount = BB->getExecutionCount(); |
1955 | if (!BBExecCount || BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE) |
1956 | continue; |
1957 | for (const auto &Instr : *BB) { |
1958 | if (BC.MIB->isPseudo(Inst: Instr)) |
1959 | continue; |
1960 | if (BC.MIB->mayStore(Inst: Instr)) |
1961 | TotalStoreInstrs += BBExecCount; |
1962 | TotalInstrs += BBExecCount; |
1963 | } |
1964 | } |
1965 | InstrDynamicCount += TotalInstrs; |
1966 | StoreDynamicCount += TotalStoreInstrs; |
1967 | |
1968 | if (!FA.hasFrameInfo(Func: BF)) |
1969 | return false; |
1970 | |
1971 | if (HotOnly && (BF.getKnownExecutionCount() < BC.getHotThreshold())) |
1972 | return false; |
1973 | |
1974 | if (opts::EqualizeBBCounts) |
1975 | equalizeBBCounts(Info, BF); |
1976 | |
1977 | if (BF.checkForAmbiguousJumpTables()) { |
1978 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ambiguous JTs in "<< BF.getPrintName() |
1979 | << ".\n"); |
1980 | // We could call disambiguateJumpTables here, but it is probably not worth |
1981 | // the cost (of duplicating potentially large jump tables that could regress |
1982 | // dcache misses). Moreover, ambiguous JTs are rare and coming from code |
1983 | // written in assembly language. Just bail. |
1984 | return false; |
1985 | } |
1986 | SLM.initialize(); |
1987 | CSA.compute(); |
1988 | classifyCSRUses(); |
1989 | pruneUnwantedCSRs(); |
1990 | computeSaveLocations(); |
1991 | moveSaveRestores(); |
1992 | LLVM_DEBUG({ |
1993 | dbgs() << "Func before shrink-wrapping: \n"; |
1994 | BF.dump(); |
1995 | }); |
1996 | SLM.performChanges(); |
1997 | // Early exit if processInsertions doesn't detect any todo items |
1998 | auto ModifiedOrErr = processInsertions(); |
1999 | if (auto E = ModifiedOrErr.takeError()) |
2000 | return Error(std::move(E)); |
2001 | const bool Modified = *ModifiedOrErr; |
2002 | if (!Modified) |
2003 | return false; |
2004 | processDeletions(); |
2005 | if (foldIdenticalSplitEdges()) { |
2006 | const std::pair<unsigned, uint64_t> Stats = BF.eraseInvalidBBs(); |
2007 | (void)Stats; |
2008 | LLVM_DEBUG(dbgs() << "Deleted "<< Stats.first |
2009 | << " redundant split edge BBs ("<< Stats.second |
2010 | << " bytes) for "<< BF.getPrintName() << "\n"); |
2011 | } |
2012 | rebuildCFI(); |
2013 | // We may have split edges, creating BBs that need correct branching |
2014 | BF.fixBranches(); |
2015 | LLVM_DEBUG({ |
2016 | dbgs() << "Func after shrink-wrapping: \n"; |
2017 | BF.dump(); |
2018 | }); |
2019 | return true; |
2020 | } |
2021 | |
2022 | void ShrinkWrapping::printStats(BinaryContext &BC) { |
2023 | BC.outs() << "BOLT-INFO: Shrink wrapping moved "<< SpillsMovedRegularMode |
2024 | << " spills inserting load/stores and "<< SpillsMovedPushPopMode |
2025 | << " spills inserting push/pops\n"; |
2026 | if (!InstrDynamicCount || !StoreDynamicCount) |
2027 | return; |
2028 | BC.outs() << "BOLT-INFO: Shrink wrapping reduced "<< SpillsMovedDynamicCount |
2029 | << " store executions (" |
2030 | << format(Fmt: "%.1lf%%", |
2031 | Vals: (100.0 * SpillsMovedDynamicCount / InstrDynamicCount)) |
2032 | << " total instructions executed, " |
2033 | << format(Fmt: "%.1lf%%", |
2034 | Vals: (100.0 * SpillsMovedDynamicCount / StoreDynamicCount)) |
2035 | << " store instructions)\n"; |
2036 | BC.outs() << "BOLT-INFO: Shrink wrapping failed at reducing " |
2037 | << SpillsFailedDynamicCount << " store executions (" |
2038 | << format(Fmt: "%.1lf%%", |
2039 | Vals: (100.0 * SpillsFailedDynamicCount / InstrDynamicCount)) |
2040 | << " total instructions executed, " |
2041 | << format(Fmt: "%.1lf%%", |
2042 | Vals: (100.0 * SpillsFailedDynamicCount / StoreDynamicCount)) |
2043 | << " store instructions)\n"; |
2044 | } |
2045 | |
2046 | // Operators necessary as a result of using MCAnnotation |
2047 | raw_ostream &operator<<(raw_ostream &OS, |
2048 | const std::vector<ShrinkWrapping::WorklistItem> &Vec) { |
2049 | OS << "SWTodo["; |
2050 | const char *Sep = ""; |
2051 | for (const ShrinkWrapping::WorklistItem &Item : Vec) { |
2052 | OS << Sep; |
2053 | switch (Item.Action) { |
2054 | case ShrinkWrapping::WorklistItem::Erase: |
2055 | OS << "Erase"; |
2056 | break; |
2057 | case ShrinkWrapping::WorklistItem::ChangeToAdjustment: |
2058 | OS << "ChangeToAdjustment"; |
2059 | break; |
2060 | case ShrinkWrapping::WorklistItem::InsertLoadOrStore: |
2061 | OS << "InsertLoadOrStore"; |
2062 | break; |
2063 | case ShrinkWrapping::WorklistItem::InsertPushOrPop: |
2064 | OS << "InsertPushOrPop"; |
2065 | break; |
2066 | } |
2067 | Sep = ", "; |
2068 | } |
2069 | OS << "]"; |
2070 | return OS; |
2071 | } |
2072 | |
2073 | raw_ostream & |
2074 | operator<<(raw_ostream &OS, |
2075 | const std::vector<StackLayoutModifier::WorklistItem> &Vec) { |
2076 | OS << "SLMTodo["; |
2077 | const char *Sep = ""; |
2078 | for (const StackLayoutModifier::WorklistItem &Item : Vec) { |
2079 | OS << Sep; |
2080 | switch (Item.Action) { |
2081 | case StackLayoutModifier::WorklistItem::None: |
2082 | OS << "None"; |
2083 | break; |
2084 | case StackLayoutModifier::WorklistItem::AdjustLoadStoreOffset: |
2085 | OS << "AdjustLoadStoreOffset"; |
2086 | break; |
2087 | case StackLayoutModifier::WorklistItem::AdjustCFI: |
2088 | OS << "AdjustCFI"; |
2089 | break; |
2090 | } |
2091 | Sep = ", "; |
2092 | } |
2093 | OS << "]"; |
2094 | return OS; |
2095 | } |
2096 | |
2097 | bool operator==(const ShrinkWrapping::WorklistItem &A, |
2098 | const ShrinkWrapping::WorklistItem &B) { |
2099 | return (A.Action == B.Action && A.AffectedReg == B.AffectedReg && |
2100 | A.Adjustment == B.Adjustment && |
2101 | A.FIEToInsert.IsLoad == B.FIEToInsert.IsLoad && |
2102 | A.FIEToInsert.IsStore == B.FIEToInsert.IsStore && |
2103 | A.FIEToInsert.RegOrImm == B.FIEToInsert.RegOrImm && |
2104 | A.FIEToInsert.Size == B.FIEToInsert.Size && |
2105 | A.FIEToInsert.IsSimple == B.FIEToInsert.IsSimple && |
2106 | A.FIEToInsert.StackOffset == B.FIEToInsert.StackOffset); |
2107 | } |
2108 | |
2109 | bool operator==(const StackLayoutModifier::WorklistItem &A, |
2110 | const StackLayoutModifier::WorklistItem &B) { |
2111 | return (A.Action == B.Action && A.OffsetUpdate == B.OffsetUpdate); |
2112 | } |
2113 | |
2114 | } // end namespace bolt |
2115 | } // end namespace llvm |
2116 |
Definitions
- ShrinkWrappingThreshold
- analyzeSaves
- analyzeRestores
- getSavesByReg
- getRestoresByReg
- ~CalleeSavedAnalysis
- blacklistRegion
- isRegionBlacklisted
- blacklistAllInConflictWith
- checkFramePointerInitialization
- checkStackPointerRestore
- classifyStackAccesses
- classifyCFIs
- scheduleChange
- canCollapseRegion
- canCollapseRegion
- collapseRegion
- collapseRegion
- setOffsetForCollapsedAccesses
- canInsertRegion
- insertRegion
- performChanges
- initialize
- SpillsMovedRegularMode
- SpillsMovedPushPopMode
- SpillsMovedDynamicCount
- SpillsFailedDynamicCount
- InstrDynamicCount
- StoreDynamicCount
- classifyCSRUses
- pruneUnwantedCSRs
- computeSaveLocations
- computeDomOrder
- isBestSavePosCold
- splitFrontierCritEdges
- doRestorePlacement
- validatePushPopsMode
- fixPopsPlacements
- scheduleOldSaveRestoresRemoval
- doesInstUsesCSR
- scheduleSaveRestoreInsertions
- moveSaveRestores
- isIdenticalSplitEdgeBB
- foldIdenticalSplitEdges
- PredictiveStackPointerTracking
- compNextAux
- computeNext
- getAnnotationName
- PredictiveStackPointerTracking
- run
- insertUpdatedCFI
- rebuildCFIForSP
- createStackAccess
- updateCFIInstOffset
- insertCFIsForPushOrPop
- processInsertion
- processInsertionsList
- processInsertions
- processDeletions
- rebuildCFI
- perform
- printStats
- operator<<
- operator<<
- operator==
Learn to use CMake with our Intro Training
Find out more